diff options
author | Richard Sandiford <richard.sandiford@arm.com> | 2022-02-15 18:09:35 +0000 |
---|---|---|
committer | Richard Sandiford <richard.sandiford@arm.com> | 2022-02-15 18:09:35 +0000 |
commit | 8e84b2b37a541b27feea69769fc314d534464ebd (patch) | |
tree | 9a14883947209d80f42453ac8c91ccfdce5090be /gcc/config | |
parent | 65df3aefaa6c0735bd46ffdc7b5018a8b4020ab8 (diff) | |
download | gcc-8e84b2b37a541b27feea69769fc314d534464ebd.zip gcc-8e84b2b37a541b27feea69769fc314d534464ebd.tar.gz gcc-8e84b2b37a541b27feea69769fc314d534464ebd.tar.bz2 |
aarch64: Fix subs_compare_2.c regression [PR100874]
subs_compare_2.c tests that we can use a SUBS+CSEL sequence for:
unsigned int
foo (unsigned int a, unsigned int b)
{
unsigned int x = a - 4;
if (a < 4)
return x;
else
return 0;
}
As Andrew notes in the PR, this is effectively MIN (x, 4) - 4,
and it is now recognised as such by phiopt. Previously it was
if-converted in RTL instead.
I tried to look for ways to generalise this to other situations
and to other ?:-style operations, not just max and min. However,
for general ?: we tend to push an outer “- CST” into the arms of
the ?: -- at least if one of them simplifies -- so I didn't find
any useful abstraction.
This patch therefore adds a pattern specifically for
max/min(a,cst)-cst. I'm not thrilled at having to do this,
but it seems like the least worst fix in the circumstances.
Also, max(a,cst)-cst for unsigned a is a useful saturating
subtraction idiom and so is arguably worth its own code
for that reason.
gcc/
PR target/100874
* config/aarch64/aarch64-protos.h (aarch64_maxmin_plus_const):
Declare.
* config/aarch64/aarch64.cc (aarch64_maxmin_plus_const): New function.
* config/aarch64/aarch64.md (*aarch64_minmax_plus): New pattern.
gcc/testsuite/
* gcc.target/aarch64/max_plus_1.c: New test.
* gcc.target/aarch64/max_plus_2.c: Likewise.
* gcc.target/aarch64/max_plus_3.c: Likewise.
* gcc.target/aarch64/max_plus_4.c: Likewise.
* gcc.target/aarch64/max_plus_5.c: Likewise.
* gcc.target/aarch64/max_plus_6.c: Likewise.
* gcc.target/aarch64/max_plus_7.c: Likewise.
* gcc.target/aarch64/min_plus_1.c: Likewise.
* gcc.target/aarch64/min_plus_2.c: Likewise.
* gcc.target/aarch64/min_plus_3.c: Likewise.
* gcc.target/aarch64/min_plus_4.c: Likewise.
* gcc.target/aarch64/min_plus_5.c: Likewise.
* gcc.target/aarch64/min_plus_6.c: Likewise.
* gcc.target/aarch64/min_plus_7.c: Likewise.
Diffstat (limited to 'gcc/config')
-rw-r--r-- | gcc/config/aarch64/aarch64-protos.h | 1 | ||||
-rw-r--r-- | gcc/config/aarch64/aarch64.cc | 104 | ||||
-rw-r--r-- | gcc/config/aarch64/aarch64.md | 27 |
3 files changed, 132 insertions, 0 deletions
diff --git a/gcc/config/aarch64/aarch64-protos.h b/gcc/config/aarch64/aarch64-protos.h index 392efa0..d0e78d6 100644 --- a/gcc/config/aarch64/aarch64-protos.h +++ b/gcc/config/aarch64/aarch64-protos.h @@ -939,6 +939,7 @@ bool aarch64_legitimate_address_p (machine_mode, rtx, bool, aarch64_addr_query_type = ADDR_QUERY_M); machine_mode aarch64_select_cc_mode (RTX_CODE, rtx, rtx); rtx aarch64_gen_compare_reg (RTX_CODE, rtx, rtx); +bool aarch64_maxmin_plus_const (rtx_code, rtx *, bool); rtx aarch64_load_tp (rtx); void aarch64_expand_compare_and_swap (rtx op[]); diff --git a/gcc/config/aarch64/aarch64.cc b/gcc/config/aarch64/aarch64.cc index 1a460d4..37ed22bc 100644 --- a/gcc/config/aarch64/aarch64.cc +++ b/gcc/config/aarch64/aarch64.cc @@ -3781,6 +3781,110 @@ aarch64_gen_compare_reg_maybe_ze (RTX_CODE code, rtx x, rtx y, return aarch64_gen_compare_reg (code, x, y); } +/* Consider the operation: + + OPERANDS[0] = CODE (OPERANDS[1], OPERANDS[2]) + OPERANDS[3] + + where: + + - CODE is [SU]MAX or [SU]MIN + - OPERANDS[2] and OPERANDS[3] are constant integers + - OPERANDS[3] is a positive or negative shifted 12-bit immediate + - all operands have mode MODE + + Decide whether it is possible to implement the operation using: + + SUBS <tmp>, OPERANDS[1], -OPERANDS[3] + or + ADDS <tmp>, OPERANDS[1], OPERANDS[3] + + followed by: + + <insn> OPERANDS[0], <tmp>, [wx]zr, <cond> + + where <insn> is one of CSEL, CSINV or CSINC. Return true if so. + If GENERATE_P is true, also update OPERANDS as follows: + + OPERANDS[4] = -OPERANDS[3] + OPERANDS[5] = the rtl condition representing <cond> + OPERANDS[6] = <tmp> + OPERANDS[7] = 0 for CSEL, -1 for CSINV or 1 for CSINC. */ +bool +aarch64_maxmin_plus_const (rtx_code code, rtx *operands, bool generate_p) +{ + signop sgn = (code == UMAX || code == UMIN ? UNSIGNED : SIGNED); + rtx dst = operands[0]; + rtx maxmin_op = operands[2]; + rtx add_op = operands[3]; + machine_mode mode = GET_MODE (dst); + + /* max (x, y) - z == (x >= y + 1 ? x : y) - z + == (x >= y ? x : y) - z + == (x > y ? x : y) - z + == (x > y - 1 ? x : y) - z + + min (x, y) - z == (x <= y - 1 ? x : y) - z + == (x <= y ? x : y) - z + == (x < y ? x : y) - z + == (x < y + 1 ? x : y) - z + + Check whether z is in { y - 1, y, y + 1 } and pick the form(s) for + which x is compared with z. Set DIFF to y - z. Thus the supported + combinations are as follows, with DIFF being the value after the ":": + + max (x, y) - z == x >= y + 1 ? x - (y + 1) : -1 [z == y + 1] + == x >= y ? x - y : 0 [z == y] + == x > y ? x - y : 0 [z == y] + == x > y - 1 ? x - (y - 1) : 1 [z == y - 1] + + min (x, y) - z == x <= y - 1 ? x - (y - 1) : 1 [z == y - 1] + == x <= y ? x - y : 0 [z == y] + == x < y ? x - y : 0 [z == y] + == x < y + 1 ? x - (y + 1) : -1 [z == y + 1]. */ + auto maxmin_val = rtx_mode_t (maxmin_op, mode); + auto add_val = rtx_mode_t (add_op, mode); + auto sub_val = wi::neg (add_val); + auto diff = wi::sub (maxmin_val, sub_val); + if (!(diff == 0 + || (diff == 1 && wi::gt_p (maxmin_val, sub_val, sgn)) + || (diff == -1 && wi::lt_p (maxmin_val, sub_val, sgn)))) + return false; + + if (!generate_p) + return true; + + rtx_code cmp; + switch (code) + { + case SMAX: + cmp = diff == 1 ? GT : GE; + break; + case UMAX: + cmp = diff == 1 ? GTU : GEU; + break; + case SMIN: + cmp = diff == -1 ? LT : LE; + break; + case UMIN: + cmp = diff == -1 ? LTU : LEU; + break; + default: + gcc_unreachable (); + } + rtx cc = gen_rtx_REG (CCmode, CC_REGNUM); + + operands[4] = immed_wide_int_const (sub_val, mode); + operands[5] = gen_rtx_fmt_ee (cmp, VOIDmode, cc, const0_rtx); + if (can_create_pseudo_p ()) + operands[6] = gen_reg_rtx (mode); + else + operands[6] = dst; + operands[7] = immed_wide_int_const (diff, mode); + + return true; +} + + /* Build the SYMBOL_REF for __tls_get_addr. */ static GTY(()) rtx tls_get_addr_libfunc; diff --git a/gcc/config/aarch64/aarch64.md b/gcc/config/aarch64/aarch64.md index 3c72bda..64cc21d 100644 --- a/gcc/config/aarch64/aarch64.md +++ b/gcc/config/aarch64/aarch64.md @@ -4405,6 +4405,33 @@ } ) +;; Implement MAX/MIN (A, B) - C using SUBS/ADDS followed by CSEL/CSINV/CSINC. +;; See aarch64_maxmin_plus_const for details about the supported cases. +(define_insn_and_split "*aarch64_minmax_plus" + [(set (match_operand:GPI 0 "register_operand" "=r") + (plus:GPI + (MAXMIN:GPI + (match_operand:GPI 1 "register_operand" "r") + (match_operand:GPI 2 "const_int_operand")) + (match_operand:GPI 3 "aarch64_plus_immediate"))) + (clobber (reg:CC CC_REGNUM))] + "aarch64_maxmin_plus_const (<CODE>, operands, false)" + "#" + "&& 1" + [(parallel + [(set (reg:CC CC_REGNUM) + (compare:CC (match_dup 1) (match_dup 4))) + (set (match_dup 6) + (plus:GPI (match_dup 1) (match_dup 3)))]) + (set (match_dup 0) + (if_then_else:GPI (match_dup 5) (match_dup 6) (match_dup 7)))] + { + if (!aarch64_maxmin_plus_const (<CODE>, operands, true)) + gcc_unreachable (); + } + [(set_attr "length" "8")] +) + ;; ------------------------------------------------------------------- ;; Logical operations ;; ------------------------------------------------------------------- |