diff options
author | Kyrylo Tkachov <kyrylo.tkachov@arm.com> | 2023-04-21 19:06:37 +0100 |
---|---|---|
committer | Kyrylo Tkachov <kyrylo.tkachov@arm.com> | 2023-04-21 19:07:08 +0100 |
commit | 88195141aea9561b26a72023c446ce9c1151340e (patch) | |
tree | 187e27819e6f4e049f4c7d8978737b3dbaba07b0 | |
parent | 573624ec90c80d1a024ab405e2575785b869a833 (diff) | |
download | gcc-88195141aea9561b26a72023c446ce9c1151340e.zip gcc-88195141aea9561b26a72023c446ce9c1151340e.tar.gz gcc-88195141aea9561b26a72023c446ce9c1151340e.tar.bz2 |
aarch64: Emit single-instruction for smin (x, 0) and smax (x, 0)
Motivated by https://reviews.llvm.org/D148249, we can expand to a single instruction
for the SMIN (x, 0) and SMAX (x, 0) cases using the combined AND/BIC and ASR operations.
Given that we already have well-fitting TARGET_CSSC patterns and expanders for the min/max codes
in the backend this patch does some minor refactoring to ensure we emit the right SMAX/SMIN RTL codes
for TARGET_CSSC, fall back to the generic expanders or emit a simple SMIN/SMAX with 0 RTX for !TARGET_CSSC
that is now matched by a separate pattern.
Bootstrapped and tested on aarch64-none-linux-gnu.
gcc/ChangeLog:
* config/aarch64/aarch64.md (aarch64_umax<mode>3_insn): Delete.
(umax<mode>3): Emit raw UMAX RTL instead of going through gen_ function
for umax.
(<optab><mode>3): New define_expand for MAXMIN_NOUMAX codes.
(*aarch64_<optab><mode>3_zero): Define.
(*aarch64_<optab><mode>3_cssc): Likewise.
* config/aarch64/iterators.md (maxminand): New code attribute.
gcc/testsuite/ChangeLog:
* gcc.target/aarch64/sminmax-asr_1.c: New test.
-rw-r--r-- | gcc/config/aarch64/aarch64.md | 50 | ||||
-rw-r--r-- | gcc/config/aarch64/iterators.md | 2 | ||||
-rw-r--r-- | gcc/testsuite/gcc.target/aarch64/sminmax-asr_1.c | 60 |
3 files changed, 97 insertions, 15 deletions
diff --git a/gcc/config/aarch64/aarch64.md b/gcc/config/aarch64/aarch64.md index f46d738..eaa87bf 100644 --- a/gcc/config/aarch64/aarch64.md +++ b/gcc/config/aarch64/aarch64.md @@ -4412,17 +4412,6 @@ [(set_attr "type" "csel")] ) -(define_insn "aarch64_umax<mode>3_insn" - [(set (match_operand:GPI 0 "register_operand" "=r,r") - (umax:GPI (match_operand:GPI 1 "register_operand" "r,r") - (match_operand:GPI 2 "aarch64_uminmax_operand" "r,Uum")))] - "TARGET_CSSC" - "@ - umax\\t%<w>0, %<w>1, %<w>2 - umax\\t%<w>0, %<w>1, %2" - [(set_attr "type" "alu_sreg,alu_imm")] -) - ;; If X can be loaded by a single CNT[BHWD] instruction, ;; ;; A = UMAX (B, X) @@ -4466,8 +4455,8 @@ operands[1] = force_reg (<MODE>mode, operands[1]); if (!aarch64_uminmax_operand (operands[2], <MODE>mode)) operands[2] = force_reg (<MODE>mode, operands[2]); - emit_insn (gen_aarch64_umax<mode>3_insn (operands[0], operands[1], - operands[2])); + emit_move_insn (operands[0], gen_rtx_UMAX (<MODE>mode, operands[1], + operands[2])); DONE; } else @@ -6759,9 +6748,30 @@ [(set_attr "type" "ffarith<stype>")] ) -(define_insn "<optab><mode>3" +;; Expander for integer smin, smax, umin. Mainly used to generate +;; straightforward RTL for TARGET_CSSC. When that is not available +;; FAIL and let the generic expanders generate the CMP + CSEL sequences, +;; except for the SMIN and SMAX with zero cases, for which we have a +;; single instruction even for the base architecture. +(define_expand "<optab><mode>3" + [(set (match_operand:GPI 0 "register_operand") + (MAXMIN_NOUMAX:GPI + (match_operand:GPI 1 "register_operand") + (match_operand:GPI 2 "aarch64_<su>minmax_operand")))] + "" + { + if (!TARGET_CSSC) + { + if (operands[2] != CONST0_RTX (<MODE>mode) + || !(<CODE> == SMAX || <CODE> == SMIN)) + FAIL; + } + } +) + +(define_insn "*aarch64_<optab><mode>3_cssc" [(set (match_operand:GPI 0 "register_operand" "=r,r") - (MAXMIN_NOUMAX:GPI (match_operand:GPI 1 "register_operand" "r,r") + (MAXMIN:GPI (match_operand:GPI 1 "register_operand" "r,r") (match_operand:GPI 2 "aarch64_<su>minmax_operand" "r,U<su>m")))] "TARGET_CSSC" "@ @@ -6770,6 +6780,16 @@ [(set_attr "type" "alu_sreg,alu_imm")] ) +(define_insn "*aarch64_<optab><mode>3_zero" + [(set (match_operand:GPI 0 "register_operand" "=r") + (FMAXMIN:GPI + (match_operand:GPI 1 "register_operand" "r") + (const_int 0)))] + "" + "<maxminand>\\t%<w>0, %<w>1, %<w>1, asr <sizem1>"; + [(set_attr "type" "logic_shift_imm")] +) + ;; Given that smax/smin do not specify the result when either input is NaN, ;; we could use either FMAXNM or FMAX for smax, and either FMINNM or FMIN ;; for smin. diff --git a/gcc/config/aarch64/iterators.md b/gcc/config/aarch64/iterators.md index d3c43a2..d0184c8 100644 --- a/gcc/config/aarch64/iterators.md +++ b/gcc/config/aarch64/iterators.md @@ -2435,6 +2435,8 @@ (umax "max") (umin "min")]) +(define_code_attr maxminand [(smax "bic") (smin "and")]) + ;; MLA/MLS attributes. (define_code_attr as [(ss_plus "a") (ss_minus "s")]) diff --git a/gcc/testsuite/gcc.target/aarch64/sminmax-asr_1.c b/gcc/testsuite/gcc.target/aarch64/sminmax-asr_1.c new file mode 100644 index 0000000..63396ec --- /dev/null +++ b/gcc/testsuite/gcc.target/aarch64/sminmax-asr_1.c @@ -0,0 +1,60 @@ +/* { dg-do compile } */ +/* { dg-options "-O --save-temps" } */ +/* { dg-final { check-function-bodies "**" "" "" } } */ + +#include <stdint.h> + +#pragma GCC target "+nocssc" + +#define MAX(X, Y) ((X) > (Y) ? (X) : (Y)) +#define MIN(X, Y) ((X) < (Y) ? (X) : (Y)) + + +/* +** minzero: +** and w0, w0, w0, asr #31 +** ret +*/ + +int32_t +minzero (int32_t a) +{ + return MIN (a, 0); +} + +/* +** maxzero: +** bic w0, w0, w0, asr #31 +** ret +*/ + +int32_t +maxzero (int32_t a) +{ + return MAX (a, 0); +} + +/* +** minzerol: +** and x0, x0, x0, asr #63 +** ret +*/ + +int64_t +minzerol (int64_t a) +{ + return MIN (a, 0); +} + +/* +** maxzerol: +** bic x0, x0, x0, asr #63 +** ret +*/ + +int64_t +maxzerol (int64_t a) +{ + return MAX (a, 0); +} + |