diff options
author | Ian Lance Taylor <iant@golang.org> | 2023-06-26 09:57:21 -0700 |
---|---|---|
committer | Ian Lance Taylor <iant@golang.org> | 2023-06-26 09:57:21 -0700 |
commit | aa1e672b5d99102b03eb5fb9c51609c45f62bff7 (patch) | |
tree | 886212591b1c9d127eaaf234a4a2e22452ea384a /gcc/config/i386 | |
parent | 97e31a0a2a2d2273687fcdb4e5416aab1a2186e1 (diff) | |
parent | 3a39a31b8ae9c6465434aefa657f7fcc86f905c0 (diff) | |
download | gcc-devel/gccgo.zip gcc-devel/gccgo.tar.gz gcc-devel/gccgo.tar.bz2 |
Merge from trunk revision 3a39a31b8ae9c6465434aefa657f7fcc86f905c0.devel/gccgo
Diffstat (limited to 'gcc/config/i386')
-rw-r--r-- | gcc/config/i386/i386-expand.cc | 12 | ||||
-rw-r--r-- | gcc/config/i386/i386-options.cc | 6 | ||||
-rw-r--r-- | gcc/config/i386/i386.cc | 23 | ||||
-rw-r--r-- | gcc/config/i386/i386.md | 56 | ||||
-rw-r--r-- | gcc/config/i386/sse.md | 96 |
5 files changed, 170 insertions, 23 deletions
diff --git a/gcc/config/i386/i386-expand.cc b/gcc/config/i386/i386-expand.cc index 7bb4d39..9a8d244 100644 --- a/gcc/config/i386/i386-expand.cc +++ b/gcc/config/i386/i386-expand.cc @@ -10234,6 +10234,18 @@ ix86_expand_sse_ptest (const struct builtin_description *d, tree exp, machine_mode mode1 = insn_data[d->icode].operand[1].mode; enum rtx_code comparison = d->comparison; + /* ptest reg, reg sets the carry flag. */ + if (comparison == LTU + && (d->code == IX86_BUILTIN_PTESTC + || d->code == IX86_BUILTIN_PTESTC256) + && rtx_equal_p (op0, op1)) + { + if (!target) + target = gen_reg_rtx (SImode); + emit_move_insn (target, const1_rtx); + return target; + } + if (VECTOR_MODE_P (mode0)) op0 = safe_vector_operand (op0, mode0); if (VECTOR_MODE_P (mode1)) diff --git a/gcc/config/i386/i386-options.cc b/gcc/config/i386/i386-options.cc index 2cb0bdd..7f593ce 100644 --- a/gcc/config/i386/i386-options.cc +++ b/gcc/config/i386/i386-options.cc @@ -1400,7 +1400,11 @@ ix86_valid_target_attribute_tree (tree fndecl, tree args, if (option_strings[IX86_FUNCTION_SPECIFIC_TUNE]) opts->x_ix86_tune_string = ggc_strdup (option_strings[IX86_FUNCTION_SPECIFIC_TUNE]); - else if (orig_tune_defaulted) + /* If we have explicit arch string and no tune string specified, set + tune_string to NULL and later it will be overriden by arch_string + so target clones can get proper optimization. */ + else if (option_strings[IX86_FUNCTION_SPECIFIC_ARCH] + || orig_tune_defaulted) opts->x_ix86_tune_string = NULL; /* If fpmath= is not set, and we now have sse2 on 32-bit, use it. */ diff --git a/gcc/config/i386/i386.cc b/gcc/config/i386/i386.cc index 32851a5..0761965 100644 --- a/gcc/config/i386/i386.cc +++ b/gcc/config/i386/i386.cc @@ -21423,16 +21423,23 @@ ix86_rtx_costs (rtx x, machine_mode mode, int outer_code_i, int opno, else if (XINT (x, 1) == UNSPEC_PTEST) { *total = cost->sse_op; - if (XVECLEN (x, 0) == 2 - && GET_CODE (XVECEXP (x, 0, 0)) == AND) + rtx test_op0 = XVECEXP (x, 0, 0); + if (!rtx_equal_p (test_op0, XVECEXP (x, 0, 1))) + return false; + if (GET_CODE (test_op0) == AND) { - rtx andop = XVECEXP (x, 0, 0); - *total += rtx_cost (XEXP (andop, 0), GET_MODE (andop), - AND, opno, speed) - + rtx_cost (XEXP (andop, 1), GET_MODE (andop), - AND, opno, speed); - return true; + rtx and_op0 = XEXP (test_op0, 0); + if (GET_CODE (and_op0) == NOT) + and_op0 = XEXP (and_op0, 0); + *total += rtx_cost (and_op0, GET_MODE (and_op0), + AND, 0, speed) + + rtx_cost (XEXP (test_op0, 1), GET_MODE (and_op0), + AND, 1, speed); } + else + *total = rtx_cost (test_op0, GET_MODE (test_op0), + UNSPEC, 0, speed); + return true; } return false; diff --git a/gcc/config/i386/i386.md b/gcc/config/i386/i386.md index 95a6653c..15c0310 100644 --- a/gcc/config/i386/i386.md +++ b/gcc/config/i386/i386.md @@ -11380,6 +11380,8 @@ [(set_attr "type" "alu") (set_attr "mode" "QI")]) +;; *andqi_ext<mode>_3 is defined via *<code>qi_ext<mode>_3 below. + ;; Convert wide AND instructions with immediate operand to shorter QImode ;; equivalents when possible. ;; Don't do the splitting with memory operands, since it introduces risk @@ -12092,6 +12094,26 @@ [(set_attr "type" "alu") (set_attr "mode" "QI")]) +(define_insn "*<code>qi_ext<mode>_3" + [(set (zero_extract:SWI248 + (match_operand 0 "int248_register_operand" "+Q") + (const_int 8) + (const_int 8)) + (zero_extract:SWI248 + (any_logic:SWI248 + (match_operand 1 "int248_register_operand" "%0") + (match_operand 2 "int248_register_operand" "Q")) + (const_int 8) + (const_int 8))) + (clobber (reg:CC FLAGS_REG))] + "(!TARGET_PARTIAL_REG_STALL || optimize_function_for_size_p (cfun)) + /* FIXME: without this LRA can't reload this pattern, see PR82524. */ + && (rtx_equal_p (operands[0], operands[1]) + || rtx_equal_p (operands[0], operands[2]))" + "<logic>{b}\t{%h2, %h0|%h0, %h2}" + [(set_attr "type" "alu") + (set_attr "mode" "QI")]) + ;; Convert wide OR instructions with immediate operand to shorter QImode ;; equivalents when possible. ;; Don't do the splitting with memory operands, since it introduces risk @@ -12206,6 +12228,18 @@ (set_attr "type" "alu") (set_attr "mode" "QI")]) +;; Peephole2 rega = 0; rega op= regb into rega = regb. +(define_peephole2 + [(parallel [(set (match_operand:SWI 0 "general_reg_operand") + (const_int 0)) + (clobber (reg:CC FLAGS_REG))]) + (parallel [(set (match_dup 0) + (any_or_plus:SWI (match_dup 0) + (match_operand:SWI 1 "<general_operand>"))) + (clobber (reg:CC FLAGS_REG))])] + "" + [(set (match_dup 0) (match_dup 1))]) + ;; Split DST = (HI<<32)|LO early to minimize register usage. (define_insn_and_split "*concat<mode><dwi>3_1" [(set (match_operand:<DWI> 0 "nonimmediate_operand" "=ro,r") @@ -13365,6 +13399,28 @@ [(const_int 0)] "ix86_split_ashl (operands, operands[3], <DWI>mode); DONE;") +(define_insn_and_split "*ashl<dwi>3_doubleword_highpart" + [(set (match_operand:<DWI> 0 "register_operand" "=r") + (ashift:<DWI> + (any_extend:<DWI> (match_operand:DWIH 1 "nonimmediate_operand" "rm")) + (match_operand:QI 2 "const_int_operand"))) + (clobber (reg:CC FLAGS_REG))] + "INTVAL (operands[2]) >= <MODE_SIZE> * BITS_PER_UNIT + && INTVAL (operands[2]) < <MODE_SIZE> * BITS_PER_UNIT * 2" + "#" + "&& reload_completed" + [(const_int 0)] +{ + split_double_mode (<DWI>mode, &operands[0], 1, &operands[0], &operands[3]); + int bits = INTVAL (operands[2]) - (<MODE_SIZE> * BITS_PER_UNIT); + if (!rtx_equal_p (operands[3], operands[1])) + emit_move_insn (operands[3], operands[1]); + if (bits > 0) + emit_insn (gen_ashl<mode>3 (operands[3], operands[3], GEN_INT (bits))); + ix86_expand_clear (operands[0]); + DONE; +}) + (define_insn "x86_64_shld" [(set (match_operand:DI 0 "nonimmediate_operand" "+r*m") (ior:DI (ashift:DI (match_dup 0) diff --git a/gcc/config/i386/sse.md b/gcc/config/i386/sse.md index f793258..3b50c71 100644 --- a/gcc/config/i386/sse.md +++ b/gcc/config/i386/sse.md @@ -1465,12 +1465,12 @@ }) (define_insn "*<avx512>_load<mode>_mask" - [(set (match_operand:VI12_AVX512VL 0 "register_operand" "=v") - (vec_merge:VI12_AVX512VL - (unspec:VI12_AVX512VL - [(match_operand:VI12_AVX512VL 1 "memory_operand" "m")] + [(set (match_operand:VI12HFBF_AVX512VL 0 "register_operand" "=v") + (vec_merge:VI12HFBF_AVX512VL + (unspec:VI12HFBF_AVX512VL + [(match_operand:VI12HFBF_AVX512VL 1 "memory_operand" "m")] UNSPEC_MASKLOAD) - (match_operand:VI12_AVX512VL 2 "nonimm_or_0_operand" "0C") + (match_operand:VI12HFBF_AVX512VL 2 "nonimm_or_0_operand" "0C") (match_operand:<avx512fmaskmode> 3 "register_operand" "Yk")))] "TARGET_AVX512BW" "vmovdqu<ssescalarsize>\t{%1, %0%{%3%}%N2|%0%{%3%}%N2, %1}" @@ -1479,9 +1479,9 @@ (set_attr "mode" "<sseinsnmode>")]) (define_insn_and_split "*<avx512>_load<mode>" - [(set (match_operand:VI12_AVX512VL 0 "register_operand" "=v") - (unspec:VI12_AVX512VL - [(match_operand:VI12_AVX512VL 1 "memory_operand" "m")] + [(set (match_operand:VI12HFBF_AVX512VL 0 "register_operand" "=v") + (unspec:VI12HFBF_AVX512VL + [(match_operand:VI12HFBF_AVX512VL 1 "memory_operand" "m")] UNSPEC_MASKLOAD))] "TARGET_AVX512BW" "#" @@ -23490,6 +23490,70 @@ [(set (reg:CCZ FLAGS_REG) (unspec:CCZ [(match_dup 0) (match_dup 1)] UNSPEC_PTEST))]) +;; ptest reg,reg sets the carry flag. +(define_split + [(set (reg:CCC FLAGS_REG) + (unspec:CCC [(match_operand:V_AVX 0 "register_operand") + (match_operand:V_AVX 1 "register_operand")] + UNSPEC_PTEST))] + "TARGET_SSE4_1 + && rtx_equal_p (operands[0], operands[1])" + [(set (reg:CCC FLAGS_REG) + (unspec:CCC [(const_int 0)] UNSPEC_STC))]) + +;; Changing the CCmode of FLAGS_REG requires updating both def and use. +;; pandn/ptestz/set{n?}e -> ptestc/set{n?}c +(define_split + [(set (match_operand:SWI 0 "register_operand") + (match_operator:SWI 3 "bt_comparison_operator" + [(unspec:CCZ [ + (and:V_AVX (not:V_AVX (match_operand:V_AVX 1 "register_operand")) + (match_operand:V_AVX 2 "register_operand")) + (and:V_AVX (not:V_AVX (match_dup 1)) (match_dup 2))] + UNSPEC_PTEST) + (const_int 0)]))] + "TARGET_SSE4_1" + [(set (reg:CCC FLAGS_REG) + (unspec:CCC [(match_dup 1) (match_dup 2)] UNSPEC_PTEST)) + (set (match_dup 0) + (match_op_dup 3 [(reg:CCC FLAGS_REG) (const_int 0)]))]) + +(define_split + [(set (strict_low_part (match_operand:QI 0 "register_operand")) + (match_operator:QI 3 "bt_comparison_operator" + [(unspec:CCZ [ + (and:V_AVX (not:V_AVX (match_operand:V_AVX 1 "register_operand")) + (match_operand:V_AVX 2 "register_operand")) + (and:V_AVX (not:V_AVX (match_dup 1)) (match_dup 2))] + UNSPEC_PTEST) + (const_int 0)]))] + "TARGET_SSE4_1" + [(set (reg:CCC FLAGS_REG) + (unspec:CCC [(match_dup 1) (match_dup 2)] UNSPEC_PTEST)) + (set (strict_low_part (match_dup 0)) + (match_op_dup 3 [(reg:CCC FLAGS_REG) (const_int 0)]))]) + +;; pandn/ptestz/j{n?}e -> ptestc/j{n?}c +(define_split + [(set (pc) + (if_then_else + (match_operator 3 "bt_comparison_operator" + [(unspec:CCZ [ + (and:V_AVX + (not:V_AVX (match_operand:V_AVX 1 "register_operand")) + (match_operand:V_AVX 2 "register_operand")) + (and:V_AVX (not:V_AVX (match_dup 1)) (match_dup 2))] + UNSPEC_PTEST) + (const_int 0)]) + (match_operand 0) + (pc)))] + "TARGET_SSE4_1" + [(set (reg:CCC FLAGS_REG) + (unspec:CCC [(match_dup 1) (match_dup 2)] UNSPEC_PTEST)) + (set (pc) (if_then_else (match_op_dup 3 [(reg:CCC FLAGS_REG) (const_int 0)]) + (match_dup 0) + (pc)))]) + (define_expand "nearbyint<mode>2" [(set (match_operand:VFH 0 "register_operand") (unspec:VFH @@ -26915,17 +26979,21 @@ "TARGET_AVX") (define_expand "maskload<mode><avx512fmaskmodelower>" - [(set (match_operand:V48H_AVX512VL 0 "register_operand") - (vec_merge:V48H_AVX512VL - (match_operand:V48H_AVX512VL 1 "memory_operand") + [(set (match_operand:V48_AVX512VL 0 "register_operand") + (vec_merge:V48_AVX512VL + (unspec:V48_AVX512VL + [(match_operand:V48_AVX512VL 1 "memory_operand")] + UNSPEC_MASKLOAD) (match_dup 0) (match_operand:<avx512fmaskmode> 2 "register_operand")))] "TARGET_AVX512F") (define_expand "maskload<mode><avx512fmaskmodelower>" - [(set (match_operand:VI12_AVX512VL 0 "register_operand") - (vec_merge:VI12_AVX512VL - (match_operand:VI12_AVX512VL 1 "memory_operand") + [(set (match_operand:VI12HFBF_AVX512VL 0 "register_operand") + (vec_merge:VI12HFBF_AVX512VL + (unspec:VI12HFBF_AVX512VL + [(match_operand:VI12HFBF_AVX512VL 1 "memory_operand")] + UNSPEC_MASKLOAD) (match_dup 0) (match_operand:<avx512fmaskmode> 2 "register_operand")))] "TARGET_AVX512BW") |