diff options
author | Kyrylo Tkachov <ktkachov@nvidia.com> | 2025-09-04 02:46:39 -0700 |
---|---|---|
committer | Kyrylo Tkachov <ktkachov@nvidia.com> | 2025-09-05 10:49:22 +0200 |
commit | ed9612bad7866ad66df63e123175d8b3aaba08de (patch) | |
tree | d3e6211cc4935e00565e884011b55252da4d7d6e /gcc | |
parent | 6b6a2d461bfd3c81cc35c9989b225845681357cb (diff) | |
download | gcc-ed9612bad7866ad66df63e123175d8b3aaba08de.zip gcc-ed9612bad7866ad66df63e123175d8b3aaba08de.tar.gz gcc-ed9612bad7866ad66df63e123175d8b3aaba08de.tar.bz2 |
aarch64: Use SVE for V2DImode integer min/max operations
Unlike Advanced SIMD, SVE has instruction to perform smin, smax, umin, umax
on 64-bit elements. Thus, we can use them with the fixed-width V2DImode
expander. Most of the machinery is already there on the define_insn side,
supporting V2DImode operands of the SVE pattern. We just need to wire up
the RTL emission to the v2di standard names for the TARGET_SVE case.
So for the smin case we now generate:
min_di:
ldr q30, [x0]
ptrue p7.b, all
ldr q31, [x1]
smin z30.d, p7/m, z30.d, z31.d
str q30, [x2]
ret
min_imm_di:
ldr q31, [x0]
smin z31.d, z31.d, #5
str q31, [x2]
ret
instead of the previous:
min_di:
ldr q30, [x0]
ldr q31, [x1]
cmgt v29.2d, v30.2d, v31.2d
bsl v29.16b, v31.16b, v30.16b
str q29, [x2]
ret
min_imm_di:
ldr q31, [x0]
mov z30.d, #5
cmgt v29.2d, v30.2d, v31.2d
bsl v29.16b, v31.16b, v30.16b
str q29, [x2]
ret
The register operand case is the same length, though the new ptrue can now be
shared and moved away. But the immediate operand case is obviously better
as the SVE immediate form doesn't require a predicate operand.
Bootstrapped and tested on aarch64-none-linux-gnu.
Signed-off-by: Kyrylo Tkachov <ktkachov@nvidia.com>
gcc/
* config/aarch64/iterators.md (sve_di_suf): New mode attribute.
* config/aarch64/aarch64-sve.md (<optab><mode>3 SVE_INT_BINARY_MULTI):
Rename to...
(<optab><mode>3<sve_di_suf>): ... This. Use SVE_I_SIMD_DI mode
iterator.
* config/aarch64/aarch64-simd.md (<su><maxmin>v2di3): Use the above
for TARGET_SVE.
gcc/testsuite/
* gcc.target/aarch64/sve/usminmax_di.c: New test.
Diffstat (limited to 'gcc')
-rw-r--r-- | gcc/config/aarch64/aarch64-simd.md | 8 | ||||
-rw-r--r-- | gcc/config/aarch64/aarch64-sve.md | 12 | ||||
-rw-r--r-- | gcc/config/aarch64/iterators.md | 5 | ||||
-rw-r--r-- | gcc/testsuite/gcc.target/aarch64/sve/usminmax_di.c | 44 |
4 files changed, 63 insertions, 6 deletions
diff --git a/gcc/config/aarch64/aarch64-simd.md b/gcc/config/aarch64/aarch64-simd.md index c111dc2..14b9d5c 100644 --- a/gcc/config/aarch64/aarch64-simd.md +++ b/gcc/config/aarch64/aarch64-simd.md @@ -1788,6 +1788,14 @@ enum rtx_code cmp_operator; rtx cmp_fmt; + /* SVE has native D-forms of the MIN/MAX instructions. */ + if (TARGET_SVE) + { + emit_insn (gen_<su><maxmin>v2di3_as_sve (operands[0], operands[1], + operands[2])); + DONE; + } + switch (<CODE>) { case UMIN: diff --git a/gcc/config/aarch64/aarch64-sve.md b/gcc/config/aarch64/aarch64-sve.md index 51e2d7d..1ebcffe 100644 --- a/gcc/config/aarch64/aarch64-sve.md +++ b/gcc/config/aarch64/aarch64-sve.md @@ -4052,13 +4052,13 @@ ;; ------------------------------------------------------------------------- ;; Unpredicated integer binary operations that have an immediate form. -(define_expand "<optab><mode>3" - [(set (match_operand:SVE_I 0 "register_operand") - (unspec:SVE_I +(define_expand "<optab><mode>3<sve_di_suf>" + [(set (match_operand:SVE_I_SIMD_DI 0 "register_operand") + (unspec:SVE_I_SIMD_DI [(match_dup 3) - (SVE_INT_BINARY_MULTI:SVE_I - (match_operand:SVE_I 1 "register_operand") - (match_operand:SVE_I 2 "aarch64_sve_<sve_imm_con>_operand"))] + (SVE_INT_BINARY_MULTI:SVE_I_SIMD_DI + (match_operand:SVE_I_SIMD_DI 1 "register_operand") + (match_operand:SVE_I_SIMD_DI 2 "aarch64_sve_<sve_imm_con>_operand"))] UNSPEC_PRED_X))] "TARGET_SVE" { diff --git a/gcc/config/aarch64/iterators.md b/gcc/config/aarch64/iterators.md index 7a6ea0d..451b00f 100644 --- a/gcc/config/aarch64/iterators.md +++ b/gcc/config/aarch64/iterators.md @@ -1931,6 +1931,11 @@ (VNx8SI "VNx8HI") (VNx16SI "VNx16QI") (VNx8DI "VNx8HI")]) +;; Suffix mapping Advanced SIMD modes to be expanded as SVE instructions. +(define_mode_attr sve_di_suf [(VNx16QI "") (VNx8HI "") (VNx4SI "") (VNx2DI "") + (VNx8QI "") (VNx4QI "") (VNx2QI "") (VNx4HI "") + (VNx2HI "") (VNx2SI "") (V2DI "_as_sve")]) + ;; Register suffix narrowed modes for VQN. (define_mode_attr Vntype [(V8HI "8b") (V4SI "4h") (V2DI "2s")]) diff --git a/gcc/testsuite/gcc.target/aarch64/sve/usminmax_di.c b/gcc/testsuite/gcc.target/aarch64/sve/usminmax_di.c new file mode 100644 index 0000000..5405308 --- /dev/null +++ b/gcc/testsuite/gcc.target/aarch64/sve/usminmax_di.c @@ -0,0 +1,44 @@ +/* { dg-do compile } */ +/* { dg-options "-O2 --param aarch64-autovec-preference=asimd-only" } */ + +#define MIN(a, b) ((a) < (b) ? (a) : (b)) +#define MAX(a, b) ((a) > (b) ? (a) : (b)) + +#define FUNC(T, N, S) \ +void min_##S (T * __restrict__ a, T * __restrict__ b, T * __restrict__ c) \ +{ \ + int i; \ + for (i = 0; i < N; i++) \ + c[i] = MIN (a[i], b[i]); \ +} \ +void max_##S (T * __restrict__ a, T * __restrict__ b, T * __restrict__ c) \ +{ \ + int i; \ + for (i = 0; i < N; i++) \ + c[i] = MAX (a[i], b[i]); \ +} \ +void min_imm_##S (T * __restrict__ a, T * __restrict__ b, T * __restrict__ c) \ +{ \ + int i; \ + for (i = 0; i < N; i++) \ + c[i] = MIN (a[i], 5); \ +} \ +void max_imm_##S (T * __restrict__ a, T * __restrict__ b, T * __restrict__ c) \ +{ \ + int i; \ + for (i = 0; i < N; i++) \ + c[i] = MAX (a[i], 8); \ +} + +/* { dg-final { scan-assembler-times {\tsmax\tz[0-9]+\.d, p[0-7]/m, z[0-9]+\.d, z[0-9]+\.d\n} 1 } } */ +/* { dg-final { scan-assembler-times {\tsmin\tz[0-9]+\.d, p[0-7]/m, z[0-9]+\.d, z[0-9]+\.d\n} 1 } } */ +/* { dg-final { scan-assembler-times {\tsmax\tz[0-9]+\.d, z[0-9]+\.d, #8\n} 1 } } */ +/* { dg-final { scan-assembler-times {\tsmin\tz[0-9]+\.d, z[0-9]+\.d, #5\n} 1 } } */ +FUNC (long long, 2, di) + +/* { dg-final { scan-assembler-times {\tumax\tz[0-9]+\.d, p[0-7]/m, z[0-9]+\.d, z[0-9]+\.d\n} 1 } } */ +/* { dg-final { scan-assembler-times {\tumin\tz[0-9]+\.d, p[0-7]/m, z[0-9]+\.d, z[0-9]+\.d\n} 1 } } */ +/* { dg-final { scan-assembler-times {\tumax\tz[0-9]+\.d, z[0-9]+\.d, #8\n} 1 } } */ +/* { dg-final { scan-assembler-times {\tumin\tz[0-9]+\.d, z[0-9]+\.d, #5\n} 1 } } */ +FUNC (unsigned long long, 2, udi) + |