diff options
Diffstat (limited to 'gcc/config/i386/i386.md')
-rw-r--r-- | gcc/config/i386/i386.md | 1043 |
1 files changed, 713 insertions, 330 deletions
diff --git a/gcc/config/i386/i386.md b/gcc/config/i386/i386.md index d6b2f29..6686f10 100644 --- a/gcc/config/i386/i386.md +++ b/gcc/config/i386/i386.md @@ -58,10 +58,11 @@ ;; H -- print a memory address offset by 8; used for sse high-parts ;; K -- print HLE lock prefix ;; Y -- print condition for XOP pcom* instruction. +;; v -- print segment override prefix ;; + -- print a branch hint as 'cs' or 'ds' prefix ;; ; -- print a semicolon (after prefixes due to bug in older gas). ;; ~ -- print "i" if TARGET_AVX2, "f" otherwise. -;; ^ -- print addr32 prefix if TARGET_64BIT and Pmode != word_mode +;; ^ -- print addr32 prefix if Pmode != word_mode ;; ! -- print NOTRACK prefix for jxx/call/ret instructions if required. (define_c_enum "unspec" [ @@ -79,6 +80,7 @@ UNSPEC_MACHOPIC_OFFSET UNSPEC_PCREL UNSPEC_SIZEOF + UNSPEC_SECREL32 ;; Prologue support UNSPEC_STACK_ALLOC @@ -579,12 +581,11 @@ (define_attr "isa" "base,x64,nox64,x64_sse2,x64_sse4,x64_sse4_noavx, x64_avx,x64_avx512bw,x64_avx512dq,apx_ndd,apx_ndd_64, sse_noavx,sse2,sse2_noavx,sse3,sse3_noavx,sse4,sse4_noavx, - avx,noavx,avx2,noavx2,bmi,bmi2,fma4,fma,avx512f,avx512f_512, - noavx512f,avx512bw,avx512bw_512,noavx512bw,avx512dq, - noavx512dq,fma_or_avx512vl,avx512vl,noavx512vl,avxvnni, - avx512vnnivl,avx512fp16,avxifma,avx512ifmavl,avxneconvert, - avx512bf16vl,vpclmulqdqvl,avx_noavx512f,avx_noavx512vl, - vaes_avx512vl,noapx_nf,avx10_2" + avx,noavx,avx2,noavx2,bmi,bmi2,fma4,fma,avx512f,noavx512f, + avx512bw,noavx512bw,avx512dq,noavx512dq,fma_or_avx512vl, + avx512vl,noavx512vl,avxvnni,avx512vnnivl,avx512fp16,avxifma, + avx512ifmavl,avxneconvert,avx512bf16vl,vpclmulqdqvl, + avx_noavx512f,avx_noavx512vl,vaes_avx512vl,noapx_nf,avx10_2" (const_string "base")) ;; The (bounding maximum) length of an instruction immediate. @@ -954,12 +955,8 @@ (eq_attr "isa" "fma_or_avx512vl") (symbol_ref "TARGET_FMA || TARGET_AVX512VL") (eq_attr "isa" "avx512f") (symbol_ref "TARGET_AVX512F") - (eq_attr "isa" "avx512f_512") - (symbol_ref "TARGET_AVX512F && TARGET_EVEX512") (eq_attr "isa" "noavx512f") (symbol_ref "!TARGET_AVX512F") (eq_attr "isa" "avx512bw") (symbol_ref "TARGET_AVX512BW") - (eq_attr "isa" "avx512bw_512") - (symbol_ref "TARGET_AVX512BW && TARGET_EVEX512") (eq_attr "isa" "noavx512bw") (symbol_ref "!TARGET_AVX512BW") (eq_attr "isa" "avx512dq") (symbol_ref "TARGET_AVX512DQ") (eq_attr "isa" "noavx512dq") (symbol_ref "!TARGET_AVX512DQ") @@ -1495,7 +1492,7 @@ [(reg:CC FLAGS_REG) (const_int 0)]) (label_ref (match_operand 3)) (pc)))] - "TARGET_AVX512F && TARGET_EVEX512 && !TARGET_PREFER_AVX256" + "TARGET_AVX512F && !TARGET_PREFER_AVX256" { ix86_expand_branch (GET_CODE (operands[0]), operands[1], operands[2], operands[3]); @@ -1602,15 +1599,27 @@ [(set_attr "type" "icmp") (set_attr "mode" "<MODE>")]) +(define_insn "*cmp<mode>_plus_1" + [(set (reg FLAGS_REG) + (compare + (plus:SWI (match_operand:SWI 0 "nonimmediate_operand" "<r>m") + (match_operand:SWI 1 "x86_64_neg_const_int_operand" "n")) + (const_int 0)))] + "ix86_match_ccmode (insn, CCGOCmode)" +{ + operands[1] = gen_int_mode (-INTVAL (operands[1]), <MODE>mode); + return "cmp{<imodesuffix>}\t{%1, %0|%0, %1}"; +} + [(set_attr "type" "icmp") + (set_attr "mode" "<MODE>")]) + (define_insn "*cmpqi_ext<mode>_1" [(set (reg FLAGS_REG) (compare (match_operand:QI 0 "nonimmediate_operand" "QBn") (subreg:QI - (match_operator:SWI248 2 "extract_operator" - [(match_operand 1 "int248_register_operand" "Q") - (const_int 8) - (const_int 8)]) 0)))] + (match_operator:SWI248 2 "extract_high_operator" + [(match_operand 1 "int248_register_operand" "Q")]) 0)))] "ix86_match_ccmode (insn, CCmode)" "cmp{b}\t{%h1, %0|%0, %h1}" [(set_attr "addr" "gpr8") @@ -1621,10 +1630,8 @@ [(set (reg FLAGS_REG) (compare (subreg:QI - (match_operator:SWI248 2 "extract_operator" - [(match_operand 0 "int248_register_operand" "Q") - (const_int 8) - (const_int 8)]) 0) + (match_operator:SWI248 2 "extract_high_operator" + [(match_operand 0 "int248_register_operand" "Q")]) 0) (match_operand:QI 1 "const0_operand")))] "ix86_match_ccmode (insn, CCNOmode)" "test{b}\t%h0, %h0" @@ -1646,10 +1653,8 @@ [(set (reg FLAGS_REG) (compare (subreg:QI - (match_operator:SWI248 2 "extract_operator" - [(match_operand 0 "int248_register_operand" "Q") - (const_int 8) - (const_int 8)]) 0) + (match_operator:SWI248 2 "extract_high_operator" + [(match_operand 0 "int248_register_operand" "Q")]) 0) (match_operand:QI 1 "general_operand" "QnBn")))] "ix86_match_ccmode (insn, CCmode)" "cmp{b}\t{%1, %h0|%h0, %1}" @@ -1661,15 +1666,11 @@ [(set (reg FLAGS_REG) (compare (subreg:QI - (match_operator:SWI248 2 "extract_operator" - [(match_operand 0 "int248_register_operand" "Q") - (const_int 8) - (const_int 8)]) 0) + (match_operator:SWI248 2 "extract_high_operator" + [(match_operand 0 "int248_register_operand" "Q")]) 0) (subreg:QI - (match_operator:SWI248 3 "extract_operator" - [(match_operand 1 "int248_register_operand" "Q") - (const_int 8) - (const_int 8)]) 0)))] + (match_operator:SWI248 3 "extract_high_operator" + [(match_operand 1 "int248_register_operand" "Q")]) 0)))] "ix86_match_ccmode (insn, CCmode)" "cmp{b}\t{%h1, %h0|%h0, %h1}" [(set_attr "type" "icmp") @@ -2374,7 +2375,7 @@ (define_expand "movxi" [(set (match_operand:XI 0 "nonimmediate_operand") (match_operand:XI 1 "general_operand"))] - "TARGET_AVX512F && TARGET_EVEX512" + "TARGET_AVX512F" "ix86_expand_vector_move (XImode, operands); DONE;") (define_expand "movoi" @@ -2427,22 +2428,32 @@ (set_attr "mode" "SI") (set_attr "length_immediate" "0")]) -(define_insn "*mov<mode>_and" +;; Generate shorter "and $0,mem" for -Oz. Split it to "mov $0,mem" +;; otherwise. +(define_insn_and_split "*mov<mode>_and" [(set (match_operand:SWI248 0 "memory_operand" "=m") (match_operand:SWI248 1 "const0_operand")) (clobber (reg:CC FLAGS_REG))] "reload_completed" "and{<imodesuffix>}\t{%1, %0|%0, %1}" + "&& !(optimize_insn_for_size_p () && optimize_size > 1)" + [(set (match_dup 0) (match_dup 1))] + "" [(set_attr "type" "alu1") (set_attr "mode" "<MODE>") (set_attr "length_immediate" "1")]) -(define_insn "*mov<mode>_or" +;; Generate shorter "or $-1,mem" for -Oz. Split it to "mov $-1,mem" +;; otherwise. +(define_insn_and_split "*mov<mode>_or" [(set (match_operand:SWI248 0 "nonimmediate_operand" "=rm") (match_operand:SWI248 1 "constm1_operand")) (clobber (reg:CC FLAGS_REG))] "reload_completed" "or{<imodesuffix>}\t{%1, %0|%0, %1}" + "&& !(optimize_insn_for_size_p () && optimize_size > 1)" + [(set (match_dup 0) (match_dup 1))] + "" [(set_attr "type" "alu1") (set_attr "mode" "<MODE>") (set_attr "length_immediate" "1")]) @@ -2450,7 +2461,7 @@ (define_insn "*movxi_internal_avx512f" [(set (match_operand:XI 0 "nonimmediate_operand" "=v,v ,v ,m") (match_operand:XI 1 "nonimmediate_or_sse_const_operand" " C,BC,vm,v"))] - "TARGET_AVX512F && TARGET_EVEX512 + "TARGET_AVX512F && (register_operand (operands[0], XImode) || register_operand (operands[1], XImode))" { @@ -2947,6 +2958,8 @@ (match_operand:SWI248 1 "const_int_operand"))] "optimize_insn_for_size_p () && optimize_size > 1 && operands[1] != const0_rtx + && (operands[1] != constm1_rtx + || (<MODE>mode == DImode && LEGACY_INT_REG_P (operands[0]))) && IN_RANGE (INTVAL (operands[1]), -128, 127) && !ix86_red_zone_used && REGNO (operands[0]) != SP_REG" @@ -3457,10 +3470,8 @@ [(set (strict_low_part (match_operand:QI 0 "register_operand" "+Q")) (subreg:QI - (match_operator:SWI248 2 "extract_operator" - [(match_operand 1 "int248_register_operand" "Q") - (const_int 8) - (const_int 8)]) 0))] + (match_operator:SWI248 2 "extract_high_operator" + [(match_operand 1 "int248_register_operand" "Q")]) 0))] "!TARGET_PARTIAL_REG_STALL || optimize_function_for_size_p (cfun)" "mov{b}\t{%h1, %0|%0, %h1}" [(set_attr "type" "imov") @@ -3543,10 +3554,8 @@ (define_insn "*extzvqi" [(set (match_operand:QI 0 "nonimmediate_operand" "=QBn,?R") (subreg:QI - (match_operator:SWI248 2 "extract_operator" - [(match_operand 1 "int248_register_operand" "Q,Q") - (const_int 8) - (const_int 8)]) 0))] + (match_operator:SWI248 2 "extract_high_operator" + [(match_operand 1 "int248_register_operand" "Q,Q")]) 0))] "" { switch (get_attr_type (insn)) @@ -3667,10 +3676,8 @@ (match_operand 0 "int248_register_operand" "+Q") (const_int 8) (const_int 8)) - (match_operator:SWI248 2 "extract_operator" - [(match_operand 1 "int248_register_operand" "Q") - (const_int 8) - (const_int 8)]))] + (match_operator:SWI248 2 "extract_high_operator" + [(match_operand 1 "int248_register_operand" "Q")]))] "" "mov{b}\t{%h1, %h0|%h0, %h1}" [(set_attr "type" "imov") @@ -4414,7 +4421,7 @@ (eq_attr "alternative" "11") (const_string "DI") (eq_attr "alternative" "5") - (cond [(and (match_test "TARGET_AVX512F && TARGET_EVEX512") + (cond [(and (match_test "TARGET_AVX512F") (not (match_test "TARGET_PREFER_AVX256"))) (const_string "V16SF") (match_test "TARGET_AVX") @@ -5237,10 +5244,8 @@ [(set (match_operand:SWI24 0 "register_operand" "=R") (sign_extend:SWI24 (subreg:QI - (match_operator:SWI248 2 "extract_operator" - [(match_operand 1 "int248_register_operand" "Q") - (const_int 8) - (const_int 8)]) 0)))] + (match_operator:SWI248 2 "extract_high_operator" + [(match_operand 1 "int248_register_operand" "Q")]) 0)))] "" "movs{b<SWI24:imodesuffix>|x}\t{%h1, %0|%0, %h1}" [(set_attr "type" "imovx") @@ -5482,7 +5487,7 @@ (set_attr "memory" "none") (set (attr "enabled") (if_then_else (eq_attr "alternative" "2") - (symbol_ref "TARGET_AVX512F && TARGET_EVEX512 + (symbol_ref "TARGET_AVX512F && !TARGET_AVX512VL && !TARGET_PREFER_AVX256") (const_string "*")))]) @@ -5704,7 +5709,7 @@ /* vcvtneps2bf16 doesn't honor SNAN, and turn sNAN into qNAN quietly, and it always round to even. - flag_unsafte_math_optimization is needed for psrld. + flag_unsafe_math_optimization is needed for psrld. If we don't expect qNaNs nor sNaNs and can assume rounding to nearest, we can expand the conversion inline as (fromi + 0x7fff + ((fromi >> 16) & 1)) >> 16. */ @@ -6986,10 +6991,8 @@ [(set (strict_low_part (match_operand:QI 0 "register_operand" "+Q,&Q")) (plus:QI (subreg:QI - (match_operator:SWI248 3 "extract_operator" - [(match_operand 2 "int248_register_operand" "Q,Q") - (const_int 8) - (const_int 8)]) 0) + (match_operator:SWI248 3 "extract_high_operator" + [(match_operand 2 "int248_register_operand" "Q,Q")]) 0) (match_operand:QI 1 "nonimmediate_operand" "0,!qm"))) (clobber (reg:CC FLAGS_REG))] "!TARGET_PARTIAL_REG_STALL || optimize_function_for_size_p (cfun)" @@ -7003,8 +7006,8 @@ [(set (strict_low_part (match_dup 0)) (plus:QI (subreg:QI - (match_op_dup 3 - [(match_dup 2) (const_int 8) (const_int 8)]) 0) + (zero_extract:SWI248 + (match_dup 2) (const_int 8) (const_int 8)) 0) (match_dup 0))) (clobber (reg:CC FLAGS_REG))])] "" @@ -7015,29 +7018,25 @@ [(set (strict_low_part (match_operand:QI 0 "register_operand" "+&Q")) (plus:QI (subreg:QI - (match_operator:SWI248 3 "extract_operator" - [(match_operand 1 "int248_register_operand" "Q") - (const_int 8) - (const_int 8)]) 0) + (match_operator:SWI248 3 "extract_high_operator" + [(match_operand 1 "int248_register_operand" "Q")]) 0) (subreg:QI - (match_operator:SWI248 4 "extract_operator" - [(match_operand 2 "int248_register_operand" "Q") - (const_int 8) - (const_int 8)]) 0))) + (match_operator:SWI248 4 "extract_high_operator" + [(match_operand 2 "int248_register_operand" "Q")]) 0))) (clobber (reg:CC FLAGS_REG))] "!TARGET_PARTIAL_REG_STALL || optimize_function_for_size_p (cfun)" "#" "&& reload_completed" [(set (strict_low_part (match_dup 0)) (subreg:QI - (match_op_dup 4 - [(match_dup 2) (const_int 8) (const_int 8)]) 0)) + (zero_extract:SWI248 + (match_dup 2) (const_int 8) (const_int 8)) 0)) (parallel [(set (strict_low_part (match_dup 0)) (plus:QI (subreg:QI - (match_op_dup 3 - [(match_dup 1) (const_int 8) (const_int 8)]) 0) + (zero_extract:SWI248 + (match_dup 1) (const_int 8) (const_int 8)) 0) (match_dup 0))) (clobber (reg:CC FLAGS_REG))])] "" @@ -7452,10 +7451,8 @@ [(set (match_operand:QI 0 "nonimmediate_operand" "=QBn") (plus:QI (subreg:QI - (match_operator:SWI248 3 "extract_operator" - [(match_operand 2 "int248_register_operand" "Q") - (const_int 8) - (const_int 8)]) 0) + (match_operator:SWI248 3 "extract_high_operator" + [(match_operand 2 "int248_register_operand" "Q")]) 0) (match_operand:QI 1 "nonimmediate_operand" "0"))) (clobber (reg:CC FLAGS_REG))] "" @@ -7468,29 +7465,25 @@ [(set (match_operand:QI 0 "register_operand" "=&Q") (plus:QI (subreg:QI - (match_operator:SWI248 3 "extract_operator" - [(match_operand 1 "int248_register_operand" "Q") - (const_int 8) - (const_int 8)]) 0) + (match_operator:SWI248 3 "extract_high_operator" + [(match_operand 1 "int248_register_operand" "Q")]) 0) (subreg:QI - (match_operator:SWI248 4 "extract_operator" - [(match_operand 2 "int248_register_operand" "Q") - (const_int 8) - (const_int 8)]) 0))) + (match_operator:SWI248 4 "extract_high_operator" + [(match_operand 2 "int248_register_operand" "Q")]) 0))) (clobber (reg:CC FLAGS_REG))] "" "#" "&& reload_completed" [(set (match_dup 0) (subreg:QI - (match_op_dup 4 - [(match_dup 2) (const_int 8) (const_int 8)]) 0)) + (zero_extract:SWI248 + (match_dup 2) (const_int 8) (const_int 8)) 0)) (parallel [(set (match_dup 0) (plus:QI (subreg:QI - (match_op_dup 3 - [(match_dup 1) (const_int 8) (const_int 8)]) 0) + (zero_extract:SWI248 + (match_dup 1) (const_int 8) (const_int 8)) 0) (match_dup 0))) (clobber (reg:CC FLAGS_REG))])] "" @@ -7520,10 +7513,8 @@ (subreg:SWI248 (plus:QI (subreg:QI - (match_operator:SWI248 3 "extract_operator" - [(match_operand 1 "int248_register_operand" "0,!Q") - (const_int 8) - (const_int 8)]) 0) + (match_operator:SWI248 3 "extract_high_operator" + [(match_operand 1 "int248_register_operand" "0,!Q")]) 0) (match_operand:QI 2 "general_operand" "QnBn,QnBn")) 0)) (clobber (reg:CC FLAGS_REG))] "" @@ -7558,8 +7549,8 @@ (subreg:SWI248 (plus:QI (subreg:QI - (match_op_dup 3 - [(match_dup 0) (const_int 8) (const_int 8)]) 0) + (zero_extract:SWI248 + (match_dup 0) (const_int 8) (const_int 8)) 0) (match_dup 2)) 0)) (clobber (reg:CC FLAGS_REG))])] "" @@ -7579,15 +7570,11 @@ (subreg:SWI248 (plusminus:QI (subreg:QI - (match_operator:SWI248 3 "extract_operator" - [(match_operand 1 "int248_register_operand" "<comm>0,!Q") - (const_int 8) - (const_int 8)]) 0) + (match_operator:SWI248 3 "extract_high_operator" + [(match_operand 1 "int248_register_operand" "<comm>0,!Q")]) 0) (subreg:QI - (match_operator:SWI248 4 "extract_operator" - [(match_operand 2 "int248_register_operand" "Q,Q") - (const_int 8) - (const_int 8)]) 0)) 0)) + (match_operator:SWI248 4 "extract_high_operator" + [(match_operand 2 "int248_register_operand" "Q,Q")]) 0)) 0)) (clobber (reg:CC FLAGS_REG))] "" "@ @@ -7606,11 +7593,11 @@ (subreg:SWI248 (plusminus:QI (subreg:QI - (match_op_dup 3 - [(match_dup 0) (const_int 8) (const_int 8)]) 0) + (zero_extract:SWI248 + (match_dup 0) (const_int 8) (const_int 8)) 0) (subreg:QI - (match_op_dup 4 - [(match_dup 2) (const_int 8) (const_int 8)]) 0)) 0)) + (zero_extract:SWI248 + (match_dup 2) (const_int 8) (const_int 8)) 0)) 0)) (clobber (reg:CC FLAGS_REG))])] "" [(set_attr "type" "alu") @@ -8207,10 +8194,8 @@ (minus:QI (match_operand:QI 1 "nonimmediate_operand" "0,!qm") (subreg:QI - (match_operator:SWI248 3 "extract_operator" - [(match_operand 2 "int248_register_operand" "Q,Q") - (const_int 8) - (const_int 8)]) 0))) + (match_operator:SWI248 3 "extract_high_operator" + [(match_operand 2 "int248_register_operand" "Q,Q")]) 0))) (clobber (reg:CC FLAGS_REG))] "!TARGET_PARTIAL_REG_STALL || optimize_function_for_size_p (cfun)" "@ @@ -8224,8 +8209,8 @@ (minus:QI (match_dup 0) (subreg:QI - (match_op_dup 3 - [(match_dup 2) (const_int 8) (const_int 8)]) 0))) + (zero_extract:SWI248 + (match_dup 2) (const_int 8) (const_int 8)) 0))) (clobber (reg:CC FLAGS_REG))])] "" [(set_attr "type" "alu") @@ -8235,30 +8220,26 @@ [(set (strict_low_part (match_operand:QI 0 "register_operand" "+&Q")) (minus:QI (subreg:QI - (match_operator:SWI248 3 "extract_operator" - [(match_operand 1 "int248_register_operand" "Q") - (const_int 8) - (const_int 8)]) 0) + (match_operator:SWI248 3 "extract_high_operator" + [(match_operand 1 "int248_register_operand" "Q")]) 0) (subreg:QI - (match_operator:SWI248 4 "extract_operator" - [(match_operand 2 "int248_register_operand" "Q") - (const_int 8) - (const_int 8)]) 0))) + (match_operator:SWI248 4 "extract_high_operator" + [(match_operand 2 "int248_register_operand" "Q")]) 0))) (clobber (reg:CC FLAGS_REG))] "!TARGET_PARTIAL_REG_STALL || optimize_function_for_size_p (cfun)" "#" "&& reload_completed" [(set (strict_low_part (match_dup 0)) (subreg:QI - (match_op_dup 3 - [(match_dup 1) (const_int 8) (const_int 8)]) 0)) + (zero_extract:SWI248 + (match_dup 1) (const_int 8) (const_int 8)) 0)) (parallel [(set (strict_low_part (match_dup 0)) (minus:QI (match_dup 0) (subreg:QI - (match_op_dup 4 - [(match_dup 2) (const_int 8) (const_int 8)]) 0))) + (zero_extract:SWI248 + (match_dup 2) (const_int 8) (const_int 8)) 0))) (clobber (reg:CC FLAGS_REG))])] "" [(set_attr "type" "alu") @@ -8309,10 +8290,8 @@ (minus:QI (match_operand:QI 1 "nonimmediate_operand" "0") (subreg:QI - (match_operator:SWI248 3 "extract_operator" - [(match_operand 2 "int248_register_operand" "Q") - (const_int 8) - (const_int 8)]) 0))) + (match_operator:SWI248 3 "extract_high_operator" + [(match_operand 2 "int248_register_operand" "Q")]) 0))) (clobber (reg:CC FLAGS_REG))] "" "sub{b}\t{%h2, %0|%0, %h2}" @@ -8324,30 +8303,26 @@ [(set (match_operand:QI 0 "register_operand" "=&Q") (minus:QI (subreg:QI - (match_operator:SWI248 3 "extract_operator" - [(match_operand 1 "int248_register_operand" "Q") - (const_int 8) - (const_int 8)]) 0) + (match_operator:SWI248 3 "extract_high_operator" + [(match_operand 1 "int248_register_operand" "Q")]) 0) (subreg:QI - (match_operator:SWI248 4 "extract_operator" - [(match_operand 2 "int248_register_operand" "Q") - (const_int 8) - (const_int 8)]) 0))) + (match_operator:SWI248 4 "extract_high_operator" + [(match_operand 2 "int248_register_operand" "Q")]) 0))) (clobber (reg:CC FLAGS_REG))] "" "#" "&& reload_completed" [(set (match_dup 0) (subreg:QI - (match_op_dup 3 - [(match_dup 1) (const_int 8) (const_int 8)]) 0)) + (zero_extract:SWI248 + (match_dup 1) (const_int 8) (const_int 8)) 0)) (parallel [(set (match_dup 0) (minus:QI (match_dup 0) (subreg:QI - (match_op_dup 4 - [(match_dup 2) (const_int 8) (const_int 8)]) 0))) + (zero_extract:SWI248 + (match_dup 2) (const_int 8) (const_int 8)) 0))) (clobber (reg:CC FLAGS_REG))])] "" [(set_attr "type" "alu") @@ -8362,10 +8337,8 @@ (subreg:SWI248 (minus:QI (subreg:QI - (match_operator:SWI248 3 "extract_operator" - [(match_operand 1 "int248_register_operand" "0,!Q") - (const_int 8) - (const_int 8)]) 0) + (match_operator:SWI248 3 "extract_high_operator" + [(match_operand 1 "int248_register_operand" "0,!Q")]) 0) (match_operand:QI 2 "general_operand" "QnBn,QnBn")) 0)) (clobber (reg:CC FLAGS_REG))] "" @@ -8384,8 +8357,8 @@ (subreg:SWI248 (minus:QI (subreg:QI - (match_op_dup 3 - [(match_dup 0) (const_int 8) (const_int 8)]) 0) + (zero_extract:SWI248 + (match_dup 0) (const_int 8) (const_int 8)) 0) (match_dup 2)) 0)) (clobber (reg:CC FLAGS_REG))])] "" @@ -8708,6 +8681,34 @@ (set (match_dup 1) (minus:SWI (match_dup 1) (match_dup 0)))])]) +;; Under APX NDD, 'sub reg, mem, reg' is valid. +;; New format for +;; mov reg0, mem1 +;; sub reg0, mem2, reg0 +;; mov mem2, reg0 +;; to +;; mov reg0, mem1 +;; sub mem2, reg0 +(define_peephole2 + [(set (match_operand:SWI 0 "general_reg_operand") + (match_operand:SWI 1 "memory_operand")) + (parallel [(set (reg:CC FLAGS_REG) + (compare:CC (match_operand:SWI 2 "memory_operand") + (match_dup 0))) + (set (match_dup 0) + (minus:SWI (match_dup 2) (match_dup 0)))]) + (set (match_dup 2) (match_dup 0))] + "TARGET_APX_NDD + && (TARGET_READ_MODIFY_WRITE || optimize_insn_for_size_p ()) + && peep2_reg_dead_p (3, operands[0]) + && !reg_overlap_mentioned_p (operands[0], operands[1]) + && !reg_overlap_mentioned_p (operands[0], operands[2])" + [(set (match_dup 0) (match_dup 1)) + (parallel [(set (reg:CC FLAGS_REG) + (compare:CC (match_dup 2) (match_dup 0))) + (set (match_dup 2) + (minus:SWI (match_dup 2) (match_dup 0)))])]) + ;; decl %eax; cmpl $-1, %eax; jne .Lxx; can be optimized into ;; subl $1, %eax; jnc .Lxx; (define_peephole2 @@ -9155,6 +9156,118 @@ (match_dup 1)) (match_dup 0)))])]) +;; Under APX NDD, 'adc reg, mem, reg' is valid. +;; +;; New format for +;; mov reg0, mem1 +;; adc reg0, mem2, reg0 +;; mov mem1, reg0 +;; to +;; mov reg0, mem2 +;; adc mem1, reg0 +(define_peephole2 + [(set (match_operand:SWI48 0 "general_reg_operand") + (match_operand:SWI48 1 "memory_operand")) + (parallel [(set (reg:CCC FLAGS_REG) + (compare:CCC + (zero_extend:<DWI> + (plus:SWI48 + (plus:SWI48 + (match_operator:SWI48 5 "ix86_carry_flag_operator" + [(match_operand 3 "flags_reg_operand") + (const_int 0)]) + (match_operand:SWI48 2 "memory_operand")) + (match_dup 0))) + (plus:<DWI> + (match_operator:<DWI> 4 "ix86_carry_flag_operator" + [(match_dup 3) (const_int 0)]) + (zero_extend:<DWI> (match_dup 0))))) + (set (match_dup 0) + (plus:SWI48 (plus:SWI48 (match_op_dup 5 + [(match_dup 3) (const_int 0)]) + (match_dup 2)) + (match_dup 0)))]) + (set (match_dup 1) (match_dup 0))] + "TARGET_APX_NDD + && (TARGET_READ_MODIFY_WRITE || optimize_insn_for_size_p ()) + && peep2_reg_dead_p (3, operands[0]) + && !reg_overlap_mentioned_p (operands[0], operands[1]) + && !reg_overlap_mentioned_p (operands[0], operands[2])" + [(set (match_dup 0) (match_dup 2)) + (parallel [(set (reg:CCC FLAGS_REG) + (compare:CCC + (zero_extend:<DWI> + (plus:SWI48 + (plus:SWI48 + (match_op_dup 5 + [(match_dup 3) (const_int 0)]) + (match_dup 1)) + (match_dup 0))) + (plus:<DWI> + (match_op_dup 4 + [(match_dup 3) (const_int 0)]) + (zero_extend:<DWI> (match_dup 0))))) + (set (match_dup 1) + (plus:SWI48 (plus:SWI48 (match_op_dup 5 + [(match_dup 3) (const_int 0)]) + (match_dup 1)) + (match_dup 0)))])]) + +;; New format for +;; mov reg0, mem1 +;; adc reg0, mem2, reg0 +;; mov mem2, reg0 +;; to +;; mov reg0, mem1 +;; adc mem2, reg0 +(define_peephole2 + [(set (match_operand:SWI48 0 "general_reg_operand") + (match_operand:SWI48 1 "memory_operand")) + (parallel [(set (reg:CCC FLAGS_REG) + (compare:CCC + (zero_extend:<DWI> + (plus:SWI48 + (plus:SWI48 + (match_operator:SWI48 5 "ix86_carry_flag_operator" + [(match_operand 3 "flags_reg_operand") + (const_int 0)]) + (match_operand:SWI48 2 "memory_operand")) + (match_dup 0))) + (plus:<DWI> + (match_operator:<DWI> 4 "ix86_carry_flag_operator" + [(match_dup 3) (const_int 0)]) + (zero_extend:<DWI> (match_dup 0))))) + (set (match_dup 0) + (plus:SWI48 (plus:SWI48 (match_op_dup 5 + [(match_dup 3) (const_int 0)]) + (match_dup 2)) + (match_dup 0)))]) + (set (match_dup 2) (match_dup 0))] + "TARGET_APX_NDD + && (TARGET_READ_MODIFY_WRITE || optimize_insn_for_size_p ()) + && peep2_reg_dead_p (3, operands[0]) + && !reg_overlap_mentioned_p (operands[0], operands[1]) + && !reg_overlap_mentioned_p (operands[0], operands[2])" + [(set (match_dup 0) (match_dup 1)) + (parallel [(set (reg:CCC FLAGS_REG) + (compare:CCC + (zero_extend:<DWI> + (plus:SWI48 + (plus:SWI48 + (match_op_dup 5 + [(match_dup 3) (const_int 0)]) + (match_dup 2)) + (match_dup 0))) + (plus:<DWI> + (match_op_dup 4 + [(match_dup 3) (const_int 0)]) + (zero_extend:<DWI> (match_dup 0))))) + (set (match_dup 2) + (plus:SWI48 (plus:SWI48 (match_op_dup 5 + [(match_dup 3) (const_int 0)]) + (match_dup 2)) + (match_dup 0)))])]) + (define_peephole2 [(parallel [(set (reg:CCC FLAGS_REG) (compare:CCC @@ -9635,6 +9748,52 @@ [(match_dup 3) (const_int 0)])) (match_dup 0)))])]) +;; Under APX NDD, 'sbb reg, mem, reg' is valid. +;; +;; New format for +;; mov reg0, mem1 +;; sbb reg0, mem2, reg0 +;; mov mem2, reg0 +;; to +;; mov reg0, mem1 +;; sbb mem2, reg0 +(define_peephole2 + [(set (match_operand:SWI48 0 "general_reg_operand") + (match_operand:SWI48 1 "memory_operand")) + (parallel [(set (reg:CCC FLAGS_REG) + (compare:CCC + (zero_extend:<DWI> (match_operand:SWI48 2 "memory_operand")) + (plus:<DWI> + (match_operator:<DWI> 4 "ix86_carry_flag_operator" + [(match_operand 3 "flags_reg_operand") (const_int 0)]) + (zero_extend:<DWI> + (match_dup 0))))) + (set (match_dup 0) + (minus:SWI48 + (minus:SWI48 + (match_dup 2) + (match_operator:SWI48 5 "ix86_carry_flag_operator" + [(match_dup 3) (const_int 0)])) + (match_dup 0)))]) + (set (match_dup 2) (match_dup 0))] + "TARGET_APX_NDD + && (TARGET_READ_MODIFY_WRITE || optimize_insn_for_size_p ()) + && peep2_reg_dead_p (3, operands[0]) + && !reg_overlap_mentioned_p (operands[0], operands[1]) + && !reg_overlap_mentioned_p (operands[0], operands[2])" + [(set (match_dup 0) (match_dup 1)) + (parallel [(set (reg:CCC FLAGS_REG) + (compare:CCC + (zero_extend:<DWI> (match_dup 2)) + (plus:<DWI> (match_op_dup 4 + [(match_dup 3) (const_int 0)]) + (zero_extend:<DWI> (match_dup 0))))) + (set (match_dup 2) + (minus:SWI48 (minus:SWI48 (match_dup 2) + (match_op_dup 5 + [(match_dup 3) (const_int 0)])) + (match_dup 0)))])]) + (define_peephole2 [(set (match_operand:SWI48 6 "general_reg_operand") (match_operand:SWI48 7 "memory_operand")) @@ -12147,10 +12306,8 @@ (compare (and:QI (subreg:QI - (match_operator:SWI248 2 "extract_operator" - [(match_operand 0 "int248_register_operand" "Q") - (const_int 8) - (const_int 8)]) 0) + (match_operator:SWI248 2 "extract_high_operator" + [(match_operand 0 "int248_register_operand" "Q")]) 0) (match_operand:QI 1 "general_operand" "QnBn")) (const_int 0)))] "ix86_match_ccmode (insn, CCNOmode)" @@ -12164,15 +12321,11 @@ (compare (and:QI (subreg:QI - (match_operator:SWI248 2 "extract_operator" - [(match_operand 0 "int248_register_operand" "Q") - (const_int 8) - (const_int 8)]) 0) + (match_operator:SWI248 2 "extract_high_operator" + [(match_operand 0 "int248_register_operand" "Q")]) 0) (subreg:QI - (match_operator:SWI248 3 "extract_operator" - [(match_operand 1 "int248_register_operand" "Q") - (const_int 8) - (const_int 8)]) 0)) + (match_operator:SWI248 3 "extract_high_operator" + [(match_operand 1 "int248_register_operand" "Q")]) 0)) (const_int 0)))] "ix86_match_ccmode (insn, CCNOmode)" "test{b}\t{%h1, %h0|%h0, %h1}" @@ -12602,8 +12755,8 @@ (zero_extend:DI (and:SI (match_dup 1) (match_dup 2)))) (clobber (reg:CC FLAGS_REG))])] { - if (GET_CODE (operands[2]) == SYMBOL_REF - || GET_CODE (operands[2]) == LABEL_REF) + if (SYMBOL_REF_P (operands[2]) + || LABEL_REF_P (operands[2])) { operands[2] = shallow_copy_rtx (operands[2]); PUT_MODE (operands[2], SImode); @@ -12761,10 +12914,8 @@ [(set (strict_low_part (match_operand:QI 0 "register_operand" "+Q,&Q")) (any_logic:QI (subreg:QI - (match_operator:SWI248 3 "extract_operator" - [(match_operand 2 "int248_register_operand" "Q,Q") - (const_int 8) - (const_int 8)]) 0) + (match_operator:SWI248 3 "extract_high_operator" + [(match_operand 2 "int248_register_operand" "Q,Q")]) 0) (match_operand:QI 1 "nonimmediate_operand" "0,!qm"))) (clobber (reg:CC FLAGS_REG))] "!TARGET_PARTIAL_REG_STALL || optimize_function_for_size_p (cfun)" @@ -12778,8 +12929,8 @@ [(set (strict_low_part (match_dup 0)) (any_logic:QI (subreg:QI - (match_op_dup 3 - [(match_dup 2) (const_int 8) (const_int 8)]) 0) + (zero_extract:SWI248 + (match_dup 2) (const_int 8) (const_int 8)) 0) (match_dup 0))) (clobber (reg:CC FLAGS_REG))])] "" @@ -12790,29 +12941,25 @@ [(set (strict_low_part (match_operand:QI 0 "register_operand" "+&Q")) (any_logic:QI (subreg:QI - (match_operator:SWI248 3 "extract_operator" - [(match_operand 1 "int248_register_operand" "Q") - (const_int 8) - (const_int 8)]) 0) + (match_operator:SWI248 3 "extract_high_operator" + [(match_operand 1 "int248_register_operand" "Q")]) 0) (subreg:QI - (match_operator:SWI248 4 "extract_operator" - [(match_operand 2 "int248_register_operand" "Q") - (const_int 8) - (const_int 8)]) 0))) + (match_operator:SWI248 4 "extract_high_operator" + [(match_operand 2 "int248_register_operand" "Q")]) 0))) (clobber (reg:CC FLAGS_REG))] "!TARGET_PARTIAL_REG_STALL || optimize_function_for_size_p (cfun)" "#" "&& reload_completed" [(set (strict_low_part (match_dup 0)) (subreg:QI - (match_op_dup 4 - [(match_dup 2) (const_int 8) (const_int 8)]) 0)) + (zero_extract:SWI248 + (match_dup 2) (const_int 8) (const_int 8)) 0)) (parallel [(set (strict_low_part (match_dup 0)) (any_logic:QI (subreg:QI - (match_op_dup 3 - [(match_dup 1) (const_int 8) (const_int 8)]) 0) + (zero_extract:SWI248 + (match_dup 1) (const_int 8) (const_int 8)) 0) (match_dup 0))) (clobber (reg:CC FLAGS_REG))])] "" @@ -13015,10 +13162,8 @@ [(set (match_operand:QI 0 "nonimmediate_operand" "=QBn") (any_logic:QI (subreg:QI - (match_operator:SWI248 3 "extract_operator" - [(match_operand 2 "int248_register_operand" "Q") - (const_int 8) - (const_int 8)]) 0) + (match_operator:SWI248 3 "extract_high_operator" + [(match_operand 2 "int248_register_operand" "Q")]) 0) (match_operand:QI 1 "nonimmediate_operand" "0"))) (clobber (reg:CC FLAGS_REG))] "" @@ -13031,29 +13176,25 @@ [(set (match_operand:QI 0 "register_operand" "=&Q") (any_logic:QI (subreg:QI - (match_operator:SWI248 3 "extract_operator" - [(match_operand 1 "int248_register_operand" "Q") - (const_int 8) - (const_int 8)]) 0) + (match_operator:SWI248 3 "extract_high_operator" + [(match_operand 1 "int248_register_operand" "Q")]) 0) (subreg:QI - (match_operator:SWI248 4 "extract_operator" - [(match_operand 2 "int248_register_operand" "Q") - (const_int 8) - (const_int 8)]) 0))) + (match_operator:SWI248 4 "extract_high_operator" + [(match_operand 2 "int248_register_operand" "Q")]) 0))) (clobber (reg:CC FLAGS_REG))] "" "#" "&& reload_completed" [(set (match_dup 0) (subreg:QI - (match_op_dup 4 - [(match_dup 2) (const_int 8) (const_int 8)]) 0)) + (zero_extract:SWI248 + (match_dup 2) (const_int 8) (const_int 8)) 0)) (parallel [(set (match_dup 0) (any_logic:QI (subreg:QI - (match_op_dup 3 - [(match_dup 1) (const_int 8) (const_int 8)]) 0) + (zero_extract:SWI248 + (match_dup 1) (const_int 8) (const_int 8)) 0) (match_dup 0))) (clobber (reg:CC FLAGS_REG))])] "" @@ -13083,10 +13224,8 @@ (subreg:SWI248 (any_logic:QI (subreg:QI - (match_operator:SWI248 3 "extract_operator" - [(match_operand 1 "int248_register_operand" "0,!Q") - (const_int 8) - (const_int 8)]) 0) + (match_operator:SWI248 3 "extract_high_operator" + [(match_operand 1 "int248_register_operand" "0,!Q")]) 0) (match_operand:QI 2 "general_operand" "QnBn,QnBn")) 0)) (clobber (reg:CC FLAGS_REG))] "" @@ -13105,8 +13244,8 @@ (subreg:SWI248 (any_logic:QI (subreg:QI - (match_op_dup 3 - [(match_dup 0) (const_int 8) (const_int 8)]) 0) + (zero_extract:SWI248 + (match_dup 0) (const_int 8) (const_int 8)) 0) (match_dup 2)) 0)) (clobber (reg:CC FLAGS_REG))])] "" @@ -13120,10 +13259,8 @@ (match_operator 5 "compare_operator" [(any_logic:QI (subreg:QI - (match_operator:SWI248 3 "extract_operator" - [(match_operand 1 "int248_register_operand" "0,!Q") - (const_int 8) - (const_int 8)]) 0) + (match_operator:SWI248 3 "extract_high_operator" + [(match_operand 1 "int248_register_operand" "0,!Q")]) 0) (match_operand:QI 2 "general_operand" "QnBn,QnBn")) (const_int 0)])) (set (zero_extract:SWI248 @@ -13133,8 +13270,8 @@ (subreg:SWI248 (any_logic:QI (subreg:QI - (match_op_dup 3 - [(match_dup 0) (const_int 8) (const_int 8)]) 0) + (zero_extract:SWI248 + (match_dup 0) (const_int 8) (const_int 8)) 0) (match_dup 2)) 0))] "ix86_match_ccmode (insn, CCNOmode)" "@ @@ -13150,9 +13287,9 @@ [(set (match_dup 4) (match_op_dup 5 [(any_logic:QI - (subreg:QI - (match_op_dup 3 - [(match_dup 0) (const_int 8) (const_int 8)]) 0) + (subreg:QI + (zero_extract:SWI248 + (match_dup 0) (const_int 8) (const_int 8)) 0) (match_dup 2)) (const_int 0)])) (set (zero_extract:SWI248 @@ -13160,8 +13297,8 @@ (subreg:SWI248 (any_logic:QI (subreg:QI - (match_op_dup 3 - [(match_dup 1) (const_int 8) (const_int 8)]) 0) + (zero_extract:SWI248 + (match_dup 1) (const_int 8) (const_int 8)) 0) (match_dup 2)) 0))])] "" [(set_attr "addr" "gpr8") @@ -13177,15 +13314,11 @@ (subreg:SWI248 (any_logic:QI (subreg:QI - (match_operator:SWI248 3 "extract_operator" - [(match_operand 1 "int248_register_operand" "%0,!Q") - (const_int 8) - (const_int 8)]) 0) + (match_operator:SWI248 3 "extract_high_operator" + [(match_operand 1 "int248_register_operand" "%0,!Q")]) 0) (subreg:QI - (match_operator:SWI248 4 "extract_operator" - [(match_operand 2 "int248_register_operand" "Q,Q") - (const_int 8) - (const_int 8)]) 0)) 0)) + (match_operator:SWI248 4 "extract_high_operator" + [(match_operand 2 "int248_register_operand" "Q,Q")]) 0)) 0)) (clobber (reg:CC FLAGS_REG))] "" "@ @@ -13204,11 +13337,11 @@ (subreg:SWI248 (any_logic:QI (subreg:QI - (match_op_dup 3 - [(match_dup 0) (const_int 8) (const_int 8)]) 0) + (zero_extract:SWI248 + (match_dup 0) (const_int 8) (const_int 8)) 0) (subreg:QI - (match_op_dup 4 - [(match_dup 2) (const_int 8) (const_int 8)]) 0)) 0)) + (zero_extract:SWI248 + (match_dup 2) (const_int 8) (const_int 8)) 0)) 0)) (clobber (reg:CC FLAGS_REG))])] "" [(set_attr "type" "alu") @@ -13220,12 +13353,10 @@ (match_operand 0 "int248_register_operand" "+Q,&Q") (const_int 8) (const_int 8)) - (match_operator:SWI248 3 "extract_operator" + (match_operator:SWI248 3 "extract_high_operator" [(any_logic (match_operand 1 "int248_register_operand" "%0,!Q") - (match_operand 2 "int248_register_operand" "Q,Q")) - (const_int 8) - (const_int 8)])) + (match_operand 2 "int248_register_operand" "Q,Q"))])) (clobber (reg:CC FLAGS_REG))] "GET_MODE (operands[1]) == GET_MODE (operands[2])" "@ @@ -13241,9 +13372,9 @@ (parallel [(set (zero_extract:SWI248 (match_dup 0) (const_int 8) (const_int 8)) - (match_op_dup 3 - [(any_logic (match_dup 4) (match_dup 2)) - (const_int 8) (const_int 8)])) + (zero_extract:SWI248 + (any_logic (match_dup 4) (match_dup 2)) + (const_int 8) (const_int 8))) (clobber (reg:CC FLAGS_REG))])] "operands[4] = gen_lowpart (GET_MODE (operands[1]), operands[0]);" [(set_attr "type" "alu") @@ -14488,10 +14619,8 @@ (subreg:SWI248 (neg:QI (subreg:QI - (match_operator:SWI248 2 "extract_operator" - [(match_operand 1 "int248_register_operand" "0,!Q") - (const_int 8) - (const_int 8)]) 0)) 0)) + (match_operator:SWI248 2 "extract_high_operator" + [(match_operand 1 "int248_register_operand" "0,!Q")]) 0)) 0)) (clobber (reg:CC FLAGS_REG))] "" "@ @@ -14509,8 +14638,8 @@ (subreg:SWI248 (neg:QI (subreg:QI - (match_op_dup 2 - [(match_dup 0) (const_int 8) (const_int 8)]) 0)) 0)) + (zero_extract:SWI248 + (match_dup 0) (const_int 8) (const_int 8)) 0)) 0)) (clobber (reg:CC FLAGS_REG))])] "" [(set_attr "type" "negnot") @@ -14561,6 +14690,17 @@ (compare:CCZ (neg:SWI (match_dup 0)) (const_int 0))) (set (match_dup 0) (neg:SWI (match_dup 0)))])]) +;; Optimize *negsi_1 followed by *cmpsi_ccno_1 (PR target/91384) with APX_F +(define_peephole2 + [(parallel [(set (match_operand:SWI 0 "general_reg_operand") + (neg:SWI (match_operand:SWI 1 "general_reg_operand"))) + (clobber (reg:CC FLAGS_REG))]) + (set (reg:CCZ FLAGS_REG) (compare:CCZ (match_dup 1) (const_int 0)))] + "TARGET_APX_NDD" + [(parallel [(set (reg:CCZ FLAGS_REG) + (compare:CCZ (neg:SWI (match_dup 1)) (const_int 0))) + (set (match_dup 0) (neg:SWI (match_dup 1)))])]) + ;; Special expand pattern to handle integer mode abs (define_expand "abs<mode>2" @@ -15131,13 +15271,9 @@ (match_operand 0 "int248_register_operand" "+Q,&Q") (const_int 8) (const_int 8)) - (subreg:SWI248 - (not:QI - (subreg:QI - (match_operator:SWI248 2 "extract_operator" - [(match_operand 1 "int248_register_operand" "0,!Q") - (const_int 8) - (const_int 8)]) 0)) 0))] + (not:SWI248 + (match_operator:SWI248 2 "extract_high_operator" + [(match_operand 1 "int248_register_operand" "0,!Q")])))] "" "@ not{b}\t%h0 @@ -15150,11 +15286,8 @@ (match_dup 1) (const_int 8) (const_int 8))) (set (zero_extract:SWI248 (match_dup 0) (const_int 8) (const_int 8)) - (subreg:SWI248 - (not:QI - (subreg:QI - (match_op_dup 2 - [(match_dup 0) (const_int 8) (const_int 8)]) 0)) 0))] + (not:SWI248 + (zero_extract:SWI248 (match_dup 0) (const_int 8) (const_int 8))))] "" [(set_attr "type" "negnot") (set_attr "mode" "QI")]) @@ -16501,10 +16634,8 @@ (subreg:SWI248 (ashift:QI (subreg:QI - (match_operator:SWI248 3 "extract_operator" - [(match_operand 1 "int248_register_operand" "0,!Q") - (const_int 8) - (const_int 8)]) 0) + (match_operator:SWI248 3 "extract_high_operator" + [(match_operand 1 "int248_register_operand" "0,!Q")]) 0) (match_operand:QI 2 "nonmemory_operand" "cI,cI")) 0)) (clobber (reg:CC FLAGS_REG))] "" @@ -16538,8 +16669,8 @@ (subreg:SWI248 (ashift:QI (subreg:QI - (match_op_dup 3 - [(match_dup 0) (const_int 8) (const_int 8)]) 0) + (zero_extract:SWI248 + (match_dup 0) (const_int 8) (const_int 8)) 0) (match_dup 2)) 0)) (clobber (reg:CC FLAGS_REG))])] "" @@ -17785,10 +17916,8 @@ (subreg:SWI248 (any_shiftrt:QI (subreg:QI - (match_operator:SWI248 3 "extract_operator" - [(match_operand 1 "int248_register_operand" "0,!Q") - (const_int 8) - (const_int 8)]) 0) + (match_operator:SWI248 3 "extract_high_operator" + [(match_operand 1 "int248_register_operand" "0,!Q")]) 0) (match_operand:QI 2 "nonmemory_operand" "cI,cI")) 0)) (clobber (reg:CC FLAGS_REG))] "" @@ -17814,8 +17943,8 @@ (subreg:SWI248 (any_shiftrt:QI (subreg:QI - (match_op_dup 3 - [(match_dup 0) (const_int 8) (const_int 8)]) 0) + (zero_extract:SWI248 + (match_dup 0) (const_int 8) (const_int 8)) 0) (match_dup 2)) 0)) (clobber (reg:CC FLAGS_REG))])] "" @@ -18169,17 +18298,17 @@ (any_rotate:SWI (match_operand:SWI 1 "const_int_operand") (subreg:QI - (and - (match_operand 2 "int248_register_operand") - (match_operand 3 "const_int_operand")) 0)))] + (match_operator 4 "and_operator" + [(match_operand 2 "int248_register_operand") + (match_operand 3 "const_int_operand")]) 0)))] "(INTVAL (operands[3]) & (GET_MODE_BITSIZE (<MODE>mode) - 1)) == GET_MODE_BITSIZE (<MODE>mode) - 1" - [(set (match_dup 4) (match_dup 1)) + [(set (match_dup 5) (match_dup 1)) (set (match_dup 0) - (any_rotate:SWI (match_dup 4) + (any_rotate:SWI (match_dup 5) (subreg:QI - (and:SI (match_dup 2) (match_dup 3)) 0)))] - "operands[4] = gen_reg_rtx (<MODE>mode);") + (match_op_dup 4 [(match_dup 2) (match_dup 3)]) 0)))] + "operands[5] = gen_reg_rtx (<MODE>mode);") (define_insn_and_split "*<insn><mode>3_mask_1" [(set (match_operand:SWI 0 "nonimmediate_operand") @@ -19894,7 +20023,7 @@ /* We can't use @GOTOFF for text labels on VxWorks; see gotoff_operand. */ - if (TARGET_64BIT || TARGET_VXWORKS_RTP) + if (TARGET_64BIT || TARGET_VXWORKS_VAROFF) { code = PLUS; op0 = operands[0]; @@ -20762,7 +20891,7 @@ (clobber (reg:CC FLAGS_REG))])] "!TARGET_64BIT" { - if (flag_pic && !TARGET_VXWORKS_RTP) + if (flag_pic && !TARGET_VXWORKS_GOTTPIC) ix86_pc_thunk_call_expanded = true; }) @@ -20783,7 +20912,7 @@ (clobber (reg:CC FLAGS_REG))])] "!TARGET_64BIT" { - if (flag_pic && !TARGET_VXWORKS_RTP) + if (flag_pic && !TARGET_VXWORKS_GOTTPIC) ix86_pc_thunk_call_expanded = true; }) @@ -21315,11 +21444,12 @@ (set_attr "mode" "SI")]) ; As bsr is undefined behavior on zero and for other input -; values it is in range 0 to 63, we can optimize away sign-extends. -(define_insn_and_split "*bsr_rex64_2" +; values it is in range 0 to 63, we can optimize away sign-extends +; or zero-extends. +(define_insn_and_split "*bsr_rex64<u>_2" [(set (match_operand:DI 0 "register_operand") (xor:DI - (sign_extend:DI + (any_extend:DI (minus:SI (const_int 63) (subreg:SI (clz:DI (match_operand:DI 1 "nonimmediate_operand")) @@ -21341,9 +21471,9 @@ operands[3] = lowpart_subreg (SImode, operands[2], DImode); }) -(define_insn_and_split "*bsr_2" +(define_insn_and_split "*bsr<u>_2" [(set (match_operand:DI 0 "register_operand") - (sign_extend:DI + (any_extend:DI (xor:SI (minus:SI (const_int 31) @@ -21420,7 +21550,7 @@ (minus:DI (match_operand:DI 2 "const_int_operand") (xor:DI - (sign_extend:DI + (any_extend:DI (minus:SI (const_int 63) (subreg:SI (clz:DI (match_operand:DI 1 "nonimmediate_operand")) @@ -21450,7 +21580,7 @@ [(set (match_operand:DI 0 "register_operand") (minus:DI (match_operand:DI 2 "const_int_operand") - (sign_extend:DI + (any_extend:DI (xor:SI (minus:SI (const_int 31) (clz:SI (match_operand:SI 1 "nonimmediate_operand"))) @@ -22992,7 +23122,8 @@ (match_operand 3))) (unspec:P [(match_operand 1 "tls_symbolic_operand") (reg:P SP_REG)] - UNSPEC_TLS_GD)] + UNSPEC_TLS_GD) + (clobber (match_operand:P 4 "register_operand" "=D"))] "TARGET_64BIT" { if (!TARGET_X32) @@ -23009,7 +23140,7 @@ Use data16 prefix instead, which doesn't have this problem. */ fputs ("\tdata16", asm_out_file); output_asm_insn - ("lea{q}\t{%E1@tlsgd(%%rip), %%rdi|rdi, %E1@tlsgd[rip]}", operands); + ("lea{q}\t{%E1@tlsgd(%%rip), %q4|%q4, %E1@tlsgd[rip]}", operands); if (TARGET_SUN_TLS || flag_plt || !HAVE_AS_IX86_TLS_GET_ADDR_GOT) fputs (ASM_SHORT "0x6666\n", asm_out_file); else @@ -23033,14 +23164,15 @@ (match_operand 4))) (unspec:DI [(match_operand 1 "tls_symbolic_operand") (reg:DI SP_REG)] - UNSPEC_TLS_GD)] + UNSPEC_TLS_GD) + (clobber (match_operand:DI 5 "register_operand" "=D"))] "TARGET_64BIT && ix86_cmodel == CM_LARGE_PIC && !TARGET_PECOFF && GET_CODE (operands[3]) == CONST && GET_CODE (XEXP (operands[3], 0)) == UNSPEC && XINT (XEXP (operands[3], 0), 1) == UNSPEC_PLTOFF" { output_asm_insn - ("lea{q}\t{%E1@tlsgd(%%rip), %%rdi|rdi, %E1@tlsgd[rip]}", operands); + ("lea{q}\t{%E1@tlsgd(%%rip), %5|%5, %E1@tlsgd[rip]}", operands); output_asm_insn ("movabs{q}\t{%3, %%rax|rax, %3}", operands); output_asm_insn ("add{q}\t{%2, %%rax|rax, %2}", operands); return "call\t{*%%rax|rax}"; @@ -23056,7 +23188,8 @@ (const_int 0))) (unspec:P [(match_operand 1 "tls_symbolic_operand") (reg:P SP_REG)] - UNSPEC_TLS_GD)])] + UNSPEC_TLS_GD) + (clobber (match_operand:P 3 "register_operand"))])] "TARGET_64BIT" "ix86_tls_descriptor_calls_expanded_in_cfun = true;") @@ -23107,11 +23240,12 @@ (call:P (mem:QI (match_operand 1 "constant_call_address_operand" "Bz")) (match_operand 2))) - (unspec:P [(reg:P SP_REG)] UNSPEC_TLS_LD_BASE)] + (unspec:P [(reg:P SP_REG)] UNSPEC_TLS_LD_BASE) + (clobber (match_operand:P 3 "register_operand" "=D"))] "TARGET_64BIT" { output_asm_insn - ("lea{q}\t{%&@tlsld(%%rip), %%rdi|rdi, %&@tlsld[rip]}", operands); + ("lea{q}\t{%&@tlsld(%%rip), %q3|%q3, %&@tlsld[rip]}", operands); if (TARGET_SUN_TLS) return "call\t%p1@plt"; if (flag_plt || !HAVE_AS_IX86_TLS_GET_ADDR_GOT) @@ -23127,14 +23261,15 @@ (mem:QI (plus:DI (match_operand:DI 1 "register_operand" "b") (match_operand:DI 2 "immediate_operand" "i"))) (match_operand 3))) - (unspec:DI [(reg:DI SP_REG)] UNSPEC_TLS_LD_BASE)] + (unspec:DI [(reg:DI SP_REG)] UNSPEC_TLS_LD_BASE) + (clobber (match_operand:DI 4 "register_operand" "=D"))] "TARGET_64BIT && ix86_cmodel == CM_LARGE_PIC && !TARGET_PECOFF && GET_CODE (operands[2]) == CONST && GET_CODE (XEXP (operands[2], 0)) == UNSPEC && XINT (XEXP (operands[2], 0), 1) == UNSPEC_PLTOFF" { output_asm_insn - ("lea{q}\t{%&@tlsld(%%rip), %%rdi|rdi, %&@tlsld[rip]}", operands); + ("lea{q}\t{%&@tlsld(%%rip), %4|%4, %&@tlsld[rip]}", operands); output_asm_insn ("movabs{q}\t{%2, %%rax|rax, %2}", operands); output_asm_insn ("add{q}\t{%1, %%rax|rax, %1}", operands); return "call\t{*%%rax|rax}"; @@ -23148,7 +23283,8 @@ (call:P (mem:QI (match_operand 1)) (const_int 0))) - (unspec:P [(reg:P SP_REG)] UNSPEC_TLS_LD_BASE)])] + (unspec:P [(reg:P SP_REG)] UNSPEC_TLS_LD_BASE) + (clobber (match_operand:P 2 "register_operand"))])] "TARGET_64BIT" "ix86_tls_descriptor_calls_expanded_in_cfun = true;") @@ -25587,10 +25723,6 @@ (clobber (reg:CC FLAGS_REG))])] "" { - /* Can't use this for non-default address spaces. */ - if (!ADDR_SPACE_GENERIC_P (MEM_ADDR_SPACE (operands[3]))) - FAIL; - int piece_size = GET_MODE_SIZE (GET_MODE (operands[1])); /* If .md ever supports :P for Pmode, these can be directly @@ -25598,9 +25730,14 @@ operands[5] = plus_constant (Pmode, operands[0], piece_size); operands[6] = plus_constant (Pmode, operands[2], piece_size); - /* Can't use this if the user has appropriated esi or edi. */ + /* Can't use this if the user has appropriated esi or edi, + * or if we have the destination in the non-default address space, + * since string insns cannot override the destination segment. */ if ((TARGET_SINGLE_STRINGOP || optimize_insn_for_size_p ()) - && !(fixed_regs[SI_REG] || fixed_regs[DI_REG])) + && !(fixed_regs[SI_REG] || fixed_regs[DI_REG]) + && ADDR_SPACE_GENERIC_P (MEM_ADDR_SPACE (operands[1])) + && (ADDR_SPACE_GENERIC_P (MEM_ADDR_SPACE (operands[3])) + || Pmode == word_mode)) { emit_insn (gen_strmov_singleop (operands[0], operands[1], operands[2], operands[3], @@ -25635,8 +25772,15 @@ (const_int 8)))] "TARGET_64BIT && !(fixed_regs[SI_REG] || fixed_regs[DI_REG]) - && ix86_check_no_addr_space (insn)" - "%^movsq" + && ix86_check_movs (insn, 0)" +{ + rtx exp = XVECEXP (PATTERN (insn), 0, 0); + + operands[0] = SET_DEST (exp); + operands[1] = SET_SRC (exp); + + return "%^%v1movsq"; +} [(set_attr "type" "str") (set_attr "memory" "both") (set_attr "mode" "DI")]) @@ -25651,8 +25795,15 @@ (plus:P (match_dup 3) (const_int 4)))] "!(fixed_regs[SI_REG] || fixed_regs[DI_REG]) - && ix86_check_no_addr_space (insn)" - "%^movs{l|d}" + && ix86_check_movs (insn, 0)" +{ + rtx exp = XVECEXP (PATTERN (insn), 0, 0); + + operands[0] = SET_DEST (exp); + operands[1] = SET_SRC (exp); + + return "%^%v1movs{l|d}"; +} [(set_attr "type" "str") (set_attr "memory" "both") (set_attr "mode" "SI")]) @@ -25667,8 +25818,15 @@ (plus:P (match_dup 3) (const_int 2)))] "!(fixed_regs[SI_REG] || fixed_regs[DI_REG]) - && ix86_check_no_addr_space (insn)" - "%^movsw" + && ix86_check_movs (insn, 0)" +{ + rtx exp = XVECEXP (PATTERN (insn), 0, 0); + + operands[0] = SET_DEST (exp); + operands[1] = SET_SRC (exp); + + return "%^%v1movsw"; +} [(set_attr "type" "str") (set_attr "memory" "both") (set_attr "mode" "HI")]) @@ -25683,8 +25841,15 @@ (plus:P (match_dup 3) (const_int 1)))] "!(fixed_regs[SI_REG] || fixed_regs[DI_REG]) - && ix86_check_no_addr_space (insn)" - "%^movsb" + && ix86_check_movs (insn, 0)" +{ + rtx exp = XVECEXP (PATTERN (insn), 0, 0); + + operands[0] = SET_DEST (exp); + operands[1] = SET_SRC (exp); + + return "%^%v1movsb"; +} [(set_attr "type" "str") (set_attr "memory" "both") (set (attr "prefix_rex") @@ -25723,8 +25888,15 @@ (use (match_dup 5))] "TARGET_64BIT && !(fixed_regs[CX_REG] || fixed_regs[SI_REG] || fixed_regs[DI_REG]) - && ix86_check_no_addr_space (insn)" - "%^rep{%;} movsq" + && ix86_check_movs (insn, 3)" +{ + rtx exp = XVECEXP (PATTERN (insn), 0, 3); + + operands[0] = SET_DEST (exp); + operands[1] = SET_SRC (exp); + + return "%^%v1rep{%;} movsq"; +} [(set_attr "type" "str") (set_attr "prefix_rep" "1") (set_attr "memory" "both") @@ -25743,8 +25915,15 @@ (mem:BLK (match_dup 4))) (use (match_dup 5))] "!(fixed_regs[CX_REG] || fixed_regs[SI_REG] || fixed_regs[DI_REG]) - && ix86_check_no_addr_space (insn)" - "%^rep{%;} movs{l|d}" + && ix86_check_movs (insn, 3)" +{ + rtx exp = XVECEXP (PATTERN (insn), 0, 3); + + operands[0] = SET_DEST (exp); + operands[1] = SET_SRC (exp); + + return "%^%v1rep{%;} movs{l|d}"; +} [(set_attr "type" "str") (set_attr "prefix_rep" "1") (set_attr "memory" "both") @@ -25761,8 +25940,15 @@ (mem:BLK (match_dup 4))) (use (match_dup 5))] "!(fixed_regs[CX_REG] || fixed_regs[SI_REG] || fixed_regs[DI_REG]) - && ix86_check_no_addr_space (insn)" - "%^rep{%;} movsb" + && ix86_check_movs (insn, 3)" +{ + rtx exp = XVECEXP (PATTERN (insn), 0, 3); + + operands[0] = SET_DEST (exp); + operands[1] = SET_SRC (exp); + + return "%^%v1rep{%;} movsb"; +} [(set_attr "type" "str") (set_attr "prefix_rep" "1") (set_attr "memory" "both") @@ -25844,7 +26030,8 @@ (unspec [(const_int 0)] UNSPEC_STOS)] "TARGET_64BIT && !(fixed_regs[AX_REG] || fixed_regs[DI_REG]) - && ix86_check_no_addr_space (insn)" + && ADDR_SPACE_GENERIC_P + (MEM_ADDR_SPACE (SET_DEST (XVECEXP (PATTERN (insn), 0, 0))))" "%^stosq" [(set_attr "type" "str") (set_attr "memory" "store") @@ -25858,7 +26045,8 @@ (const_int 4))) (unspec [(const_int 0)] UNSPEC_STOS)] "!(fixed_regs[AX_REG] || fixed_regs[DI_REG]) - && ix86_check_no_addr_space (insn)" + && ADDR_SPACE_GENERIC_P + (MEM_ADDR_SPACE (SET_DEST (XVECEXP (PATTERN (insn), 0, 0))))" "%^stos{l|d}" [(set_attr "type" "str") (set_attr "memory" "store") @@ -25872,7 +26060,8 @@ (const_int 2))) (unspec [(const_int 0)] UNSPEC_STOS)] "!(fixed_regs[AX_REG] || fixed_regs[DI_REG]) - && ix86_check_no_addr_space (insn)" + && ADDR_SPACE_GENERIC_P + (MEM_ADDR_SPACE (SET_DEST (XVECEXP (PATTERN (insn), 0, 0))))" "%^stosw" [(set_attr "type" "str") (set_attr "memory" "store") @@ -25886,7 +26075,8 @@ (const_int 1))) (unspec [(const_int 0)] UNSPEC_STOS)] "!(fixed_regs[AX_REG] || fixed_regs[DI_REG]) - && ix86_check_no_addr_space (insn)" + && ADDR_SPACE_GENERIC_P + (MEM_ADDR_SPACE (SET_DEST (XVECEXP (PATTERN (insn), 0, 0))))" "%^stosb" [(set_attr "type" "str") (set_attr "memory" "store") @@ -25922,7 +26112,8 @@ (use (match_dup 4))] "TARGET_64BIT && !(fixed_regs[AX_REG] || fixed_regs[CX_REG] || fixed_regs[DI_REG]) - && ix86_check_no_addr_space (insn)" + && ADDR_SPACE_GENERIC_P + (MEM_ADDR_SPACE (SET_DEST (XVECEXP (PATTERN (insn), 0, 2))))" "%^rep{%;} stosq" [(set_attr "type" "str") (set_attr "prefix_rep" "1") @@ -25940,7 +26131,8 @@ (use (match_operand:SI 2 "register_operand" "a")) (use (match_dup 4))] "!(fixed_regs[AX_REG] || fixed_regs[CX_REG] || fixed_regs[DI_REG]) - && ix86_check_no_addr_space (insn)" + && ADDR_SPACE_GENERIC_P + (MEM_ADDR_SPACE (SET_DEST (XVECEXP (PATTERN (insn), 0, 2))))" "%^rep{%;} stos{l|d}" [(set_attr "type" "str") (set_attr "prefix_rep" "1") @@ -25957,7 +26149,8 @@ (use (match_operand:QI 2 "register_operand" "a")) (use (match_dup 4))] "!(fixed_regs[AX_REG] || fixed_regs[CX_REG] || fixed_regs[DI_REG]) - && ix86_check_no_addr_space (insn)" + && ADDR_SPACE_GENERIC_P + (MEM_ADDR_SPACE (SET_DEST (XVECEXP (PATTERN (insn), 0, 2))))" "%^rep{%;} stosb" [(set_attr "type" "str") (set_attr "prefix_rep" "1") @@ -26224,8 +26417,8 @@ (define_expand "mov<mode>cc" [(set (match_operand:SWIM 0 "register_operand") (if_then_else:SWIM (match_operand 1 "comparison_operator") - (match_operand:SWIM 2 "<general_operand>") - (match_operand:SWIM 3 "<general_operand>")))] + (match_operand:SWIM 2 "general_operand") + (match_operand:SWIM 3 "general_operand")))] "" "if (ix86_expand_int_movcc (operands)) DONE; else FAIL;") @@ -26592,8 +26785,8 @@ [(set (match_operand:X87MODEF 0 "register_operand") (if_then_else:X87MODEF (match_operand 1 "comparison_operator") - (match_operand:X87MODEF 2 "register_operand") - (match_operand:X87MODEF 3 "register_operand")))] + (match_operand:X87MODEF 2 "nonimm_or_0_or_1s_operand") + (match_operand:X87MODEF 3 "nonimm_or_0_operand")))] "(TARGET_80387 && TARGET_CMOVE) || (SSE_FLOAT_MODE_P (<MODE>mode) && TARGET_SSE_MATH)" "if (ix86_expand_fp_movcc (operands)) DONE; else FAIL;") @@ -27183,7 +27376,7 @@ (cond [(and (eq_attr "alternative" "0") (not (match_test "TARGET_OPT_AGU"))) (const_string "alu") - (match_operand:<MODE> 2 "const0_operand") + (match_operand 2 "const0_operand") (const_string "imov") ] (const_string "lea"))) @@ -27197,6 +27390,46 @@ (const_string "*"))) (set_attr "mode" "<MODE>")]) +(define_insn "@pro_epilogue_adjust_stack_add_nocc<mode>" + [(set (match_operand:P 0 "register_operand" "=r") + (plus:P (match_operand:P 1 "register_operand" "r") + (match_operand:P 2 "<nonmemory_operand>" "l<i>"))) + (clobber (mem:BLK (scratch)))] + "" +{ + if (get_attr_type (insn) == TYPE_IMOV) + return "mov{<imodesuffix>}\t{%1, %0|%0, %1}"; + else + { + operands[2] = SET_SRC (XVECEXP (PATTERN (insn), 0, 0)); + return "lea{<imodesuffix>}\t{%E2, %0|%0, %E2}"; + } +} + [(set (attr "type") + (cond [(match_operand 2 "const0_operand") + (const_string "imov") + ] + (const_string "lea"))) + (set (attr "length_immediate") + (cond [(eq_attr "type" "imov") + (const_string "0") + ] + (const_string "*"))) + (set_attr "mode" "<MODE>")]) + +(define_peephole2 + [(parallel + [(set (match_operand:P 0 "register_operand") + (plus:P (match_dup 0) + (match_operand:P 1 "<nonmemory_operand>"))) + (clobber (mem:BLK (scratch)))])] + "peep2_regno_dead_p (0, FLAGS_REG)" + [(parallel + [(set (match_dup 0) + (plus:P (match_dup 0) (match_dup 1))) + (clobber (reg:CC FLAGS_REG)) + (clobber (mem:BLK (scratch)))])]) + (define_insn "@pro_epilogue_adjust_stack_sub_<mode>" [(set (match_operand:P 0 "register_operand" "=r") (minus:P (match_operand:P 1 "register_operand" "0") @@ -27928,10 +28161,8 @@ (match_operator 1 "compare_operator" [(and:QI (subreg:QI - (match_operator:SWI248 4 "extract_operator" - [(match_operand 2 "int248_register_operand") - (const_int 8) - (const_int 8)]) 0) + (match_operator:SWI248 4 "extract_high_operator" + [(match_operand 2 "int248_register_operand")]) 0) (match_operand 3 "const_int_operand")) (const_int 0)]))] "! TARGET_PARTIAL_REG_STALL @@ -27943,9 +28174,9 @@ (match_op_dup 1 [(and:QI (subreg:QI - (match_op_dup 4 [(match_dup 2) - (const_int 8) - (const_int 8)]) 0) + (zero_extract:SWI248 (match_dup 2) + (const_int 8) + (const_int 8)) 0) (match_dup 3)) (const_int 0)])) (set (zero_extract:SWI248 (match_dup 2) @@ -27954,9 +28185,9 @@ (subreg:SWI248 (and:QI (subreg:QI - (match_op_dup 4 [(match_dup 2) - (const_int 8) - (const_int 8)]) 0) + (zero_extract:SWI248 (match_dup 2) + (const_int 8) + (const_int 8)) 0) (match_dup 3)) 0))])]) ;; Don't do logical operations with memory inputs. @@ -28144,6 +28375,41 @@ const0_rtx); }) +;; For APX NDD PLUS/MINUS/LOGIC +;; Like cmpelim optimized pattern. +;; Reduce an extra mov instruction like +;; decl (%rdi), %eax +;; mov %eax, (%rdi) +;; to +;; decl (%rdi) +(define_peephole2 + [(parallel [(set (reg FLAGS_REG) + (compare (match_operator:SWI 2 "plusminuslogic_operator" + [(match_operand:SWI 0 "memory_operand") + (match_operand:SWI 1 "<nonmemory_operand>")]) + (const_int 0))) + (set (match_operand:SWI 3 "register_operand") (match_dup 2))]) + (set (match_dup 0) (match_dup 3))] + "TARGET_APX_NDD + && (TARGET_READ_MODIFY_WRITE || optimize_insn_for_size_p ()) + && peep2_reg_dead_p (2, operands[3]) + && !reg_overlap_mentioned_p (operands[3], operands[0]) + && ix86_match_ccmode (peep2_next_insn (0), + (GET_CODE (operands[2]) == PLUS + || GET_CODE (operands[2]) == MINUS) + ? CCGOCmode : CCNOmode)" + [(parallel [(set (match_dup 4) (match_dup 6)) + (set (match_dup 0) (match_dup 5))])] +{ + operands[4] = SET_DEST (XVECEXP (PATTERN (peep2_next_insn (0)), 0, 0)); + operands[5] + = gen_rtx_fmt_ee (GET_CODE (operands[2]), GET_MODE (operands[2]), + copy_rtx (operands[0]), operands[1]); + operands[6] + = gen_rtx_COMPARE (GET_MODE (operands[4]), copy_rtx (operands[5]), + const0_rtx); +}) + ;; Likewise for instances where we have a lea pattern. (define_peephole2 [(set (match_operand:SWI 0 "register_operand") @@ -28237,6 +28503,54 @@ const0_rtx); }) +;; For APX NDD XOR +;; Reduce 2 mov and 1 cmp instruction. +;; from +;; movq (%rdi), %rax +;; xorq %rsi, %rax, %rdx +;; movb %rdx, (%rdi) +;; cmpb %rsi, %rax +;; jne +;; to +;; xorb %rsi, (%rdi) +;; jne +(define_peephole2 + [(set (match_operand:SWI 0 "register_operand") + (match_operand:SWI 1 "memory_operand")) + (parallel [(set (match_operand:SWI 4 "register_operand") + (xor:SWI (match_operand:SWI 3 "register_operand") + (match_operand:SWI 2 "<nonmemory_operand>"))) + (clobber (reg:CC FLAGS_REG))]) + (set (match_dup 1) (match_dup 4)) + (set (reg:CCZ FLAGS_REG) + (compare:CCZ (match_operand:SWI 5 "register_operand") + (match_operand:SWI 6 "<nonmemory_operand>")))] + "TARGET_APX_NDD + && (TARGET_READ_MODIFY_WRITE || optimize_insn_for_size_p ()) + && REGNO (operands[3]) == REGNO (operands[0]) + && (rtx_equal_p (operands[0], operands[5]) + ? rtx_equal_p (operands[2], operands[6]) + : rtx_equal_p (operands[2], operands[5]) + && rtx_equal_p (operands[0], operands[6])) + && peep2_reg_dead_p (3, operands[4]) + && peep2_reg_dead_p (4, operands[0]) + && !reg_overlap_mentioned_p (operands[0], operands[1]) + && !reg_overlap_mentioned_p (operands[0], operands[2]) + && (<MODE>mode != QImode + || immediate_operand (operands[2], QImode) + || any_QIreg_operand (operands[2], QImode))" + [(parallel [(set (match_dup 7) (match_dup 9)) + (set (match_dup 1) (match_dup 8))])] +{ + operands[7] = SET_DEST (PATTERN (peep2_next_insn (3))); + operands[8] = gen_rtx_XOR (<MODE>mode, copy_rtx (operands[1]), + operands[2]); + operands[9] + = gen_rtx_COMPARE (GET_MODE (operands[7]), + copy_rtx (operands[8]), + const0_rtx); +}) + (define_peephole2 [(set (match_operand:SWI12 0 "register_operand") (match_operand:SWI12 1 "memory_operand")) @@ -28480,6 +28794,58 @@ const0_rtx); }) +;; For APX NDD XOR +;; Reduce 2 mov and 1 cmp instruction. +;; from +;; movb (%rdi), %al +;; xorl %esi, %eax, %edx +;; movb %dl, (%rdi) +;; cmpb %sil, %al +;; jne +;; to +;; xorl %sil, (%rdi) +;; jne +(define_peephole2 + [(set (match_operand:SWI12 0 "register_operand") + (match_operand:SWI12 1 "memory_operand")) + (parallel [(set (match_operand:SI 4 "register_operand") + (xor:SI (match_operand:SI 3 "register_operand") + (match_operand:SI 2 "<nonmemory_operand>"))) + (clobber (reg:CC FLAGS_REG))]) + (set (match_dup 1) (match_operand:SWI12 5 "register_operand")) + (set (reg:CCZ FLAGS_REG) + (compare:CCZ (match_operand:SWI12 6 "register_operand") + (match_operand:SWI12 7 "<nonmemory_operand>")))] + "TARGET_APX_NDD + && (TARGET_READ_MODIFY_WRITE || optimize_insn_for_size_p ()) + && REGNO (operands[3]) == REGNO (operands[0]) + && REGNO (operands[5]) == REGNO (operands[4]) + && (rtx_equal_p (operands[0], operands[6]) + ? (REG_P (operands[2]) + ? REG_P (operands[7]) && REGNO (operands[2]) == REGNO (operands[7]) + : rtx_equal_p (operands[2], operands[7])) + : (rtx_equal_p (operands[0], operands[7]) + && REG_P (operands[2]) + && REGNO (operands[2]) == REGNO (operands[6]))) + && peep2_reg_dead_p (3, operands[5]) + && peep2_reg_dead_p (4, operands[0]) + && !reg_overlap_mentioned_p (operands[0], operands[1]) + && !reg_overlap_mentioned_p (operands[0], operands[2]) + && (<MODE>mode != QImode + || immediate_operand (operands[2], SImode) + || any_QIreg_operand (operands[2], SImode))" + [(parallel [(set (match_dup 8) (match_dup 10)) + (set (match_dup 1) (match_dup 9))])] +{ + operands[8] = SET_DEST (PATTERN (peep2_next_insn (3))); + operands[9] = gen_rtx_XOR (<MODE>mode, copy_rtx (operands[1]), + gen_lowpart (<MODE>mode, operands[2])); + operands[10] + = gen_rtx_COMPARE (GET_MODE (operands[8]), + copy_rtx (operands[9]), + const0_rtx); +}) + ;; Attempt to optimize away memory stores of values the memory already ;; has. See PR79593. (define_peephole2 @@ -29082,6 +29448,23 @@ (set_attr "prefix_extra" "1") (set_attr "mode" "DI")]) +(define_expand "crc_rev<SWI124:mode>si4" + [(match_operand:SI 0 "register_operand") + (match_operand:SI 1 "register_operand") + (match_operand:SWI124 2 "nonimmediate_operand") + (match_operand:SI 3)] + "TARGET_CRC32" +{ + /* crc32 uses iSCSI polynomial */ + if (INTVAL (operands[3]) == 0x1EDC6F41) + emit_insn (gen_sse4_2_crc32<mode> (operands[0], operands[1], operands[2])); + else + expand_reversed_crc_table_based (operands[0], operands[1], operands[2], + operands[3], <SWI124:MODE>mode, + generate_reflecting_code_standard); + DONE; +}) + (define_insn "rdpmc" [(set (match_operand:DI 0 "register_operand" "=A") (unspec_volatile:DI [(match_operand:SI 1 "register_operand" "c")] |