From ed9612bad7866ad66df63e123175d8b3aaba08de Mon Sep 17 00:00:00 2001 From: Kyrylo Tkachov Date: Thu, 4 Sep 2025 02:46:39 -0700 Subject: aarch64: Use SVE for V2DImode integer min/max operations Unlike Advanced SIMD, SVE has instruction to perform smin, smax, umin, umax on 64-bit elements. Thus, we can use them with the fixed-width V2DImode expander. Most of the machinery is already there on the define_insn side, supporting V2DImode operands of the SVE pattern. We just need to wire up the RTL emission to the v2di standard names for the TARGET_SVE case. So for the smin case we now generate: min_di: ldr q30, [x0] ptrue p7.b, all ldr q31, [x1] smin z30.d, p7/m, z30.d, z31.d str q30, [x2] ret min_imm_di: ldr q31, [x0] smin z31.d, z31.d, #5 str q31, [x2] ret instead of the previous: min_di: ldr q30, [x0] ldr q31, [x1] cmgt v29.2d, v30.2d, v31.2d bsl v29.16b, v31.16b, v30.16b str q29, [x2] ret min_imm_di: ldr q31, [x0] mov z30.d, #5 cmgt v29.2d, v30.2d, v31.2d bsl v29.16b, v31.16b, v30.16b str q29, [x2] ret The register operand case is the same length, though the new ptrue can now be shared and moved away. But the immediate operand case is obviously better as the SVE immediate form doesn't require a predicate operand. Bootstrapped and tested on aarch64-none-linux-gnu. Signed-off-by: Kyrylo Tkachov gcc/ * config/aarch64/iterators.md (sve_di_suf): New mode attribute. * config/aarch64/aarch64-sve.md (3 SVE_INT_BINARY_MULTI): Rename to... (3): ... This. Use SVE_I_SIMD_DI mode iterator. * config/aarch64/aarch64-simd.md (v2di3): Use the above for TARGET_SVE. gcc/testsuite/ * gcc.target/aarch64/sve/usminmax_di.c: New test. --- gcc/config/aarch64/aarch64-simd.md | 8 ++++ gcc/config/aarch64/aarch64-sve.md | 12 +++--- gcc/config/aarch64/iterators.md | 5 +++ gcc/testsuite/gcc.target/aarch64/sve/usminmax_di.c | 44 ++++++++++++++++++++++ 4 files changed, 63 insertions(+), 6 deletions(-) create mode 100644 gcc/testsuite/gcc.target/aarch64/sve/usminmax_di.c (limited to 'gcc') diff --git a/gcc/config/aarch64/aarch64-simd.md b/gcc/config/aarch64/aarch64-simd.md index c111dc2..14b9d5c 100644 --- a/gcc/config/aarch64/aarch64-simd.md +++ b/gcc/config/aarch64/aarch64-simd.md @@ -1788,6 +1788,14 @@ enum rtx_code cmp_operator; rtx cmp_fmt; + /* SVE has native D-forms of the MIN/MAX instructions. */ + if (TARGET_SVE) + { + emit_insn (gen_v2di3_as_sve (operands[0], operands[1], + operands[2])); + DONE; + } + switch () { case UMIN: diff --git a/gcc/config/aarch64/aarch64-sve.md b/gcc/config/aarch64/aarch64-sve.md index 51e2d7d..1ebcffe 100644 --- a/gcc/config/aarch64/aarch64-sve.md +++ b/gcc/config/aarch64/aarch64-sve.md @@ -4052,13 +4052,13 @@ ;; ------------------------------------------------------------------------- ;; Unpredicated integer binary operations that have an immediate form. -(define_expand "3" - [(set (match_operand:SVE_I 0 "register_operand") - (unspec:SVE_I +(define_expand "3" + [(set (match_operand:SVE_I_SIMD_DI 0 "register_operand") + (unspec:SVE_I_SIMD_DI [(match_dup 3) - (SVE_INT_BINARY_MULTI:SVE_I - (match_operand:SVE_I 1 "register_operand") - (match_operand:SVE_I 2 "aarch64_sve__operand"))] + (SVE_INT_BINARY_MULTI:SVE_I_SIMD_DI + (match_operand:SVE_I_SIMD_DI 1 "register_operand") + (match_operand:SVE_I_SIMD_DI 2 "aarch64_sve__operand"))] UNSPEC_PRED_X))] "TARGET_SVE" { diff --git a/gcc/config/aarch64/iterators.md b/gcc/config/aarch64/iterators.md index 7a6ea0d..451b00f 100644 --- a/gcc/config/aarch64/iterators.md +++ b/gcc/config/aarch64/iterators.md @@ -1931,6 +1931,11 @@ (VNx8SI "VNx8HI") (VNx16SI "VNx16QI") (VNx8DI "VNx8HI")]) +;; Suffix mapping Advanced SIMD modes to be expanded as SVE instructions. +(define_mode_attr sve_di_suf [(VNx16QI "") (VNx8HI "") (VNx4SI "") (VNx2DI "") + (VNx8QI "") (VNx4QI "") (VNx2QI "") (VNx4HI "") + (VNx2HI "") (VNx2SI "") (V2DI "_as_sve")]) + ;; Register suffix narrowed modes for VQN. (define_mode_attr Vntype [(V8HI "8b") (V4SI "4h") (V2DI "2s")]) diff --git a/gcc/testsuite/gcc.target/aarch64/sve/usminmax_di.c b/gcc/testsuite/gcc.target/aarch64/sve/usminmax_di.c new file mode 100644 index 0000000..5405308 --- /dev/null +++ b/gcc/testsuite/gcc.target/aarch64/sve/usminmax_di.c @@ -0,0 +1,44 @@ +/* { dg-do compile } */ +/* { dg-options "-O2 --param aarch64-autovec-preference=asimd-only" } */ + +#define MIN(a, b) ((a) < (b) ? (a) : (b)) +#define MAX(a, b) ((a) > (b) ? (a) : (b)) + +#define FUNC(T, N, S) \ +void min_##S (T * __restrict__ a, T * __restrict__ b, T * __restrict__ c) \ +{ \ + int i; \ + for (i = 0; i < N; i++) \ + c[i] = MIN (a[i], b[i]); \ +} \ +void max_##S (T * __restrict__ a, T * __restrict__ b, T * __restrict__ c) \ +{ \ + int i; \ + for (i = 0; i < N; i++) \ + c[i] = MAX (a[i], b[i]); \ +} \ +void min_imm_##S (T * __restrict__ a, T * __restrict__ b, T * __restrict__ c) \ +{ \ + int i; \ + for (i = 0; i < N; i++) \ + c[i] = MIN (a[i], 5); \ +} \ +void max_imm_##S (T * __restrict__ a, T * __restrict__ b, T * __restrict__ c) \ +{ \ + int i; \ + for (i = 0; i < N; i++) \ + c[i] = MAX (a[i], 8); \ +} + +/* { dg-final { scan-assembler-times {\tsmax\tz[0-9]+\.d, p[0-7]/m, z[0-9]+\.d, z[0-9]+\.d\n} 1 } } */ +/* { dg-final { scan-assembler-times {\tsmin\tz[0-9]+\.d, p[0-7]/m, z[0-9]+\.d, z[0-9]+\.d\n} 1 } } */ +/* { dg-final { scan-assembler-times {\tsmax\tz[0-9]+\.d, z[0-9]+\.d, #8\n} 1 } } */ +/* { dg-final { scan-assembler-times {\tsmin\tz[0-9]+\.d, z[0-9]+\.d, #5\n} 1 } } */ +FUNC (long long, 2, di) + +/* { dg-final { scan-assembler-times {\tumax\tz[0-9]+\.d, p[0-7]/m, z[0-9]+\.d, z[0-9]+\.d\n} 1 } } */ +/* { dg-final { scan-assembler-times {\tumin\tz[0-9]+\.d, p[0-7]/m, z[0-9]+\.d, z[0-9]+\.d\n} 1 } } */ +/* { dg-final { scan-assembler-times {\tumax\tz[0-9]+\.d, z[0-9]+\.d, #8\n} 1 } } */ +/* { dg-final { scan-assembler-times {\tumin\tz[0-9]+\.d, z[0-9]+\.d, #5\n} 1 } } */ +FUNC (unsigned long long, 2, udi) + -- cgit v1.1