diff options
author | Uros Bizjak <ubizjak@gmail.com> | 2021-07-01 10:56:32 +0200 |
---|---|---|
committer | Uros Bizjak <ubizjak@gmail.com> | 2021-07-01 11:19:28 +0200 |
commit | 9f6aeb85ee87c6b4e580b6b71e26cbe99e1dab70 (patch) | |
tree | eb44f095de9f8f4ad52c769852078ea2fe1a089e /gcc | |
parent | 7d8211603a3d04384812b481b0ae01205a287a72 (diff) | |
download | gcc-9f6aeb85ee87c6b4e580b6b71e26cbe99e1dab70.zip gcc-9f6aeb85ee87c6b4e580b6b71e26cbe99e1dab70.tar.gz gcc-9f6aeb85ee87c6b4e580b6b71e26cbe99e1dab70.tar.bz2 |
i386: Add integer nabs instructions [PR101044]
The patch adds integer nabs "(NEG (ABS (...)))" instructions, adds STV
conversion and adjusts STV cost calculations accordingly. When CMOV
instruction is used to implement abs, the sign is determined from the
preceeding operand negation, and CMOVS is used to select between
negated and non-negated value.
To implement nabs, just reverse the condition and emit CMOVNS instead.
The STV costs are adjusted for inherent NOT of nabs insn. V2DI NOT is
somehow costly operation, since it is implemented as a load of zero,
followed by a SUB insn. OTOH, integer nabs with inherent NOT is relatively
cheap, so some STV chains became less profitable for conversion.
The patch rewrites operand scanner in compute_convert_gain to a switch
and reorders case instances in general_scalar_to_vector_candidate_p
to benefit from fallthroughs, and to remove special processing of
andnot in the later case.
gcc/
2021-07-01 Uroš Bizjak <ubizjak@gmail.com>
PR target/101044
* config/i386/i386.md (*nabs<dwi>2_doubleword):
New insn_and_split pattern.
(*nabs<dwi>2_1): Ditto.
* config/i386/i386-features.c
(general_scalar_chain::compute_convert_gain):
Handle (NEG (ABS (...))) RTX. Rewrite src code
scanner as switch statement.
(general_scalar_chain::convert_insn):
Handle (NEG (ABS (...))) RTX.
(general_scalar_to_vector_candidate_p):
Detect (NEG (ABS (...))) RTX. Reorder case statements
for (AND (NOT (...) ...)) fallthrough.
gcc/testsuite/
2021-07-01 Uroš Bizjak <ubizjak@gmail.com>
PR target/101044
* gcc.target/i386/pr101044.c: New test.
Diffstat (limited to 'gcc')
-rw-r--r-- | gcc/config/i386/i386-features.c | 195 | ||||
-rw-r--r-- | gcc/config/i386/i386.md | 72 | ||||
-rw-r--r-- | gcc/testsuite/gcc.target/i386/pr101044.c | 9 |
3 files changed, 196 insertions, 80 deletions
diff --git a/gcc/config/i386/i386-features.c b/gcc/config/i386/i386-features.c index a25769a..cbd430a 100644 --- a/gcc/config/i386/i386-features.c +++ b/gcc/config/i386/i386-features.c @@ -544,71 +544,83 @@ general_scalar_chain::compute_convert_gain () += m * ix86_cost->int_store[2] - ix86_cost->sse_store[sse_cost_idx]; else if (MEM_P (src) && REG_P (dst)) igain += m * ix86_cost->int_load[2] - ix86_cost->sse_load[sse_cost_idx]; - else if (GET_CODE (src) == ASHIFT - || GET_CODE (src) == ASHIFTRT - || GET_CODE (src) == LSHIFTRT) - { - if (m == 2) - { - if (INTVAL (XEXP (src, 1)) >= 32) - igain += ix86_cost->add; - else - igain += ix86_cost->shift_const; - } + else + switch (GET_CODE (src)) + { + case ASHIFT: + case ASHIFTRT: + case LSHIFTRT: + if (m == 2) + { + if (INTVAL (XEXP (src, 1)) >= 32) + igain += ix86_cost->add; + else + igain += ix86_cost->shift_const; + } - igain += ix86_cost->shift_const - ix86_cost->sse_op; + igain += ix86_cost->shift_const - ix86_cost->sse_op; - if (CONST_INT_P (XEXP (src, 0))) - igain -= vector_const_cost (XEXP (src, 0)); - } - else if (GET_CODE (src) == PLUS - || GET_CODE (src) == MINUS - || GET_CODE (src) == IOR - || GET_CODE (src) == XOR - || GET_CODE (src) == AND) - { - igain += m * ix86_cost->add - ix86_cost->sse_op; - /* Additional gain for andnot for targets without BMI. */ - if (GET_CODE (XEXP (src, 0)) == NOT - && !TARGET_BMI) - igain += m * ix86_cost->add; - - if (CONST_INT_P (XEXP (src, 0))) - igain -= vector_const_cost (XEXP (src, 0)); - if (CONST_INT_P (XEXP (src, 1))) - igain -= vector_const_cost (XEXP (src, 1)); - } - else if (GET_CODE (src) == NEG - || GET_CODE (src) == NOT) - igain += m * ix86_cost->add - ix86_cost->sse_op - COSTS_N_INSNS (1); - else if (GET_CODE (src) == ABS - || GET_CODE (src) == SMAX - || GET_CODE (src) == SMIN - || GET_CODE (src) == UMAX - || GET_CODE (src) == UMIN) - { - /* We do not have any conditional move cost, estimate it as a - reg-reg move. Comparisons are costed as adds. */ - igain += m * (COSTS_N_INSNS (2) + ix86_cost->add); - /* Integer SSE ops are all costed the same. */ - igain -= ix86_cost->sse_op; - } - else if (GET_CODE (src) == COMPARE) - { - /* Assume comparison cost is the same. */ - } - else if (CONST_INT_P (src)) - { - if (REG_P (dst)) - /* DImode can be immediate for TARGET_64BIT and SImode always. */ - igain += m * COSTS_N_INSNS (1); - else if (MEM_P (dst)) - igain += (m * ix86_cost->int_store[2] - - ix86_cost->sse_store[sse_cost_idx]); - igain -= vector_const_cost (src); - } - else - gcc_unreachable (); + if (CONST_INT_P (XEXP (src, 0))) + igain -= vector_const_cost (XEXP (src, 0)); + break; + + case AND: + case IOR: + case XOR: + case PLUS: + case MINUS: + igain += m * ix86_cost->add - ix86_cost->sse_op; + /* Additional gain for andnot for targets without BMI. */ + if (GET_CODE (XEXP (src, 0)) == NOT + && !TARGET_BMI) + igain += m * ix86_cost->add; + + if (CONST_INT_P (XEXP (src, 0))) + igain -= vector_const_cost (XEXP (src, 0)); + if (CONST_INT_P (XEXP (src, 1))) + igain -= vector_const_cost (XEXP (src, 1)); + break; + + case NEG: + case NOT: + igain -= ix86_cost->sse_op + COSTS_N_INSNS (1); + + if (GET_CODE (XEXP (src, 0)) != ABS) + { + igain += m * ix86_cost->add; + break; + } + /* FALLTHRU */ + + case ABS: + case SMAX: + case SMIN: + case UMAX: + case UMIN: + /* We do not have any conditional move cost, estimate it as a + reg-reg move. Comparisons are costed as adds. */ + igain += m * (COSTS_N_INSNS (2) + ix86_cost->add); + /* Integer SSE ops are all costed the same. */ + igain -= ix86_cost->sse_op; + break; + + case COMPARE: + /* Assume comparison cost is the same. */ + break; + + case CONST_INT: + if (REG_P (dst)) + /* DImode can be immediate for TARGET_64BIT and SImode always. */ + igain += m * COSTS_N_INSNS (1); + else if (MEM_P (dst)) + igain += (m * ix86_cost->int_store[2] + - ix86_cost->sse_store[sse_cost_idx]); + igain -= vector_const_cost (src); + break; + + default: + gcc_unreachable (); + } if (igain != 0 && dump_file) { @@ -1009,7 +1021,19 @@ general_scalar_chain::convert_insn (rtx_insn *insn) case NEG: src = XEXP (src, 0); - convert_op (&src, insn); + + if (GET_CODE (src) == ABS) + { + src = XEXP (src, 0); + convert_op (&src, insn); + subreg = gen_reg_rtx (vmode); + emit_insn_before (gen_rtx_SET (subreg, + gen_rtx_ABS (vmode, src)), insn); + src = subreg; + } + else + convert_op (&src, insn); + subreg = gen_reg_rtx (vmode); emit_insn_before (gen_move_insn (subreg, CONST0_RTX (vmode)), insn); src = gen_rtx_MINUS (vmode, subreg, src); @@ -1042,9 +1066,10 @@ general_scalar_chain::convert_insn (rtx_insn *insn) gcc_assert (REG_P (src) && GET_MODE (src) == DImode); subreg = gen_rtx_SUBREG (V2DImode, src, 0); - emit_insn_before (gen_vec_interleave_lowv2di (copy_rtx_if_shared (subreg), - copy_rtx_if_shared (subreg), - copy_rtx_if_shared (subreg)), + emit_insn_before (gen_vec_interleave_lowv2di + (copy_rtx_if_shared (subreg), + copy_rtx_if_shared (subreg), + copy_rtx_if_shared (subreg)), insn); dst = gen_rtx_REG (CCmode, FLAGS_REG); src = gen_rtx_UNSPEC (CCmode, gen_rtvec (2, copy_rtx_if_shared (subreg), @@ -1400,11 +1425,11 @@ general_scalar_to_vector_candidate_p (rtx_insn *insn, enum machine_mode mode) return false; /* Fallthru. */ - case PLUS: - case MINUS: + case AND: case IOR: case XOR: - case AND: + case PLUS: + case MINUS: if (!REG_P (XEXP (src, 1)) && !MEM_P (XEXP (src, 1)) && !CONST_INT_P (XEXP (src, 1))) @@ -1413,18 +1438,32 @@ general_scalar_to_vector_candidate_p (rtx_insn *insn, enum machine_mode mode) if (GET_MODE (XEXP (src, 1)) != mode && !CONST_INT_P (XEXP (src, 1))) return false; + + /* Check for andnot case. */ + if (GET_CODE (src) != AND + || GET_CODE (XEXP (src, 0)) != NOT) + break; + + src = XEXP (src, 0); + /* FALLTHRU */ + + case NOT: break; + case NEG: + /* Check for nabs case. */ + if (GET_CODE (XEXP (src, 0)) != ABS) + break; + + src = XEXP (src, 0); + /* FALLTHRU */ + case ABS: if ((mode == DImode && !TARGET_AVX512VL) || (mode == SImode && !TARGET_SSSE3)) return false; break; - case NEG: - case NOT: - break; - case REG: return true; @@ -1438,12 +1477,8 @@ general_scalar_to_vector_candidate_p (rtx_insn *insn, enum machine_mode mode) if (!REG_P (XEXP (src, 0)) && !MEM_P (XEXP (src, 0)) - && !CONST_INT_P (XEXP (src, 0)) - /* Check for andnot case. */ - && (GET_CODE (src) != AND - || GET_CODE (XEXP (src, 0)) != NOT - || !REG_P (XEXP (XEXP (src, 0), 0)))) - return false; + && !CONST_INT_P (XEXP (src, 0))) + return false; if (GET_MODE (XEXP (src, 0)) != mode && !CONST_INT_P (XEXP (src, 0))) diff --git a/gcc/config/i386/i386.md b/gcc/config/i386/i386.md index 9b619e2..156c6a9 100644 --- a/gcc/config/i386/i386.md +++ b/gcc/config/i386/i386.md @@ -10305,6 +10305,50 @@ split_double_mode (<DWI>mode, &operands[0], 3, &operands[0], &operands[3]); }) +(define_insn_and_split "*nabs<dwi>2_doubleword" + [(set (match_operand:<DWI> 0 "register_operand") + (neg:<DWI> + (abs:<DWI> + (match_operand:<DWI> 1 "general_operand")))) + (clobber (reg:CC FLAGS_REG))] + "TARGET_CMOVE + && ix86_pre_reload_split ()" + "#" + "&& 1" + [(parallel + [(set (reg:CCC FLAGS_REG) + (ne:CCC (match_dup 1) (const_int 0))) + (set (match_dup 2) (neg:DWIH (match_dup 1)))]) + (parallel + [(set (match_dup 5) + (plus:DWIH (plus:DWIH (ltu:DWIH (reg:CC FLAGS_REG) (const_int 0)) + (match_dup 4)) + (const_int 0))) + (clobber (reg:CC FLAGS_REG))]) + (parallel + [(set (reg:CCGOC FLAGS_REG) + (compare:CCGOC + (neg:DWIH (match_dup 5)) + (const_int 0))) + (set (match_dup 5) + (neg:DWIH (match_dup 5)))]) + (set (match_dup 0) + (if_then_else:DWIH + (lt (reg:CCGOC FLAGS_REG) (const_int 0)) + (match_dup 2) + (match_dup 1))) + (set (match_dup 3) + (if_then_else:DWIH + (lt (reg:CCGOC FLAGS_REG) (const_int 0)) + (match_dup 5) + (match_dup 4)))] +{ + operands[1] = force_reg (<DWI>mode, operands[1]); + operands[2] = gen_reg_rtx (<DWI>mode); + + split_double_mode (<DWI>mode, &operands[0], 3, &operands[0], &operands[3]); +}) + (define_insn_and_split "*abs<mode>2_1" [(set (match_operand:SWI 0 "register_operand") (abs:SWI @@ -10332,6 +10376,34 @@ operands[2] = gen_reg_rtx (<MODE>mode); }) +(define_insn_and_split "*nabs<mode>2_1" + [(set (match_operand:SWI 0 "register_operand") + (neg:SWI + (abs:SWI + (match_operand:SWI 1 "general_operand")))) + (clobber (reg:CC FLAGS_REG))] + "TARGET_CMOVE + && (<MODE>mode != QImode || !TARGET_PARTIAL_REG_STALL) + && ix86_pre_reload_split ()" + "#" + "&& 1" + [(parallel + [(set (reg:CCGOC FLAGS_REG) + (compare:CCGOC + (neg:SWI (match_dup 1)) + (const_int 0))) + (set (match_dup 2) + (neg:SWI (match_dup 1)))]) + (set (match_dup 0) + (if_then_else:SWI + (lt (reg:CCGOC FLAGS_REG) (const_int 0)) + (match_dup 2) + (match_dup 1)))] +{ + operands[1] = force_reg (<MODE>mode, operands[1]); + operands[2] = gen_reg_rtx (<MODE>mode); +}) + (define_expand "<code>tf2" [(set (match_operand:TF 0 "register_operand") (absneg:TF (match_operand:TF 1 "register_operand")))] diff --git a/gcc/testsuite/gcc.target/i386/pr101044.c b/gcc/testsuite/gcc.target/i386/pr101044.c new file mode 100644 index 0000000..03df86d --- /dev/null +++ b/gcc/testsuite/gcc.target/i386/pr101044.c @@ -0,0 +1,9 @@ +/* PR target/101044 */ +/* { dg-do compile } */ +/* { dg-options "-O2 -msse2 -mno-sse3 -mtune=generic" } */ +/* { dg-final { scan-assembler-times "neg" 1 } } */ + +int foo (int x) +{ + return (x < 0) ? x : -x; +} |