aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorRichard Sandiford <richard.sandiford@arm.com>2024-11-07 20:34:48 +0000
committerRichard Sandiford <richard.sandiford@arm.com>2024-11-07 20:34:48 +0000
commitf5962839d6e0c3115931e68d938d9a0cd7a383b1 (patch)
tree1361904635e6021c7a94f6e1f07facf7fa6b924b
parent0e1382034246a594f1da8dbaee97c4a06743f31a (diff)
downloadgcc-f5962839d6e0c3115931e68d938d9a0cd7a383b1.zip
gcc-f5962839d6e0c3115931e68d938d9a0cd7a383b1.tar.gz
gcc-f5962839d6e0c3115931e68d938d9a0cd7a383b1.tar.bz2
aarch64: Restrict FCLAMP to SME2
There are two sets of patterns for FCLAMP: one set for single registers and one set for multiple registers. The multiple-register set was correctly gated on SME2, but the single-register set only required SME. This doesn't matter for ACLE usage, since the intrinsic definitions are correctly gated. But it does matter for automatic generation of FCLAMP from separate minimum and maximum operations (either ACLE intrinsics or autovectorised code). gcc/ * config/aarch64/aarch64-sve2.md (@aarch64_sve_fclamp<mode>) (*aarch64_sve_fclamp<mode>_x): Require TARGET_STREAMING_SME2 rather than TARGET_STREAMING_SME. gcc/testsuite/ * gcc.target/aarch64/sme/clamp_3.c: Force sme2 * gcc.target/aarch64/sme/clamp_4.c: Likewise. * gcc.target/aarch64/sme/clamp_5.c: New test.
-rw-r--r--gcc/config/aarch64/aarch64-sve2.md4
-rw-r--r--gcc/testsuite/gcc.target/aarch64/sme/clamp_3.c2
-rw-r--r--gcc/testsuite/gcc.target/aarch64/sme/clamp_4.c2
-rw-r--r--gcc/testsuite/gcc.target/aarch64/sme/clamp_5.c24
4 files changed, 30 insertions, 2 deletions
diff --git a/gcc/config/aarch64/aarch64-sve2.md b/gcc/config/aarch64/aarch64-sve2.md
index 8047f40..08f83fc 100644
--- a/gcc/config/aarch64/aarch64-sve2.md
+++ b/gcc/config/aarch64/aarch64-sve2.md
@@ -1117,7 +1117,7 @@
UNSPEC_FMAXNM)
(match_operand:SVE_FULL_F 3 "register_operand")]
UNSPEC_FMINNM))]
- "TARGET_STREAMING_SME"
+ "TARGET_STREAMING_SME2"
{@ [cons: =0, 1, 2, 3; attrs: movprfx]
[ w, %0, w, w; * ] fclamp\t%0.<Vetype>, %2.<Vetype>, %3.<Vetype>
[ ?&w, w, w, w; yes ] movprfx\t%0, %1\;fclamp\t%0.<Vetype>, %2.<Vetype>, %3.<Vetype>
@@ -1137,7 +1137,7 @@
UNSPEC_COND_FMAXNM)
(match_operand:SVE_FULL_F 3 "register_operand")]
UNSPEC_COND_FMINNM))]
- "TARGET_STREAMING_SME"
+ "TARGET_STREAMING_SME2"
{@ [cons: =0, 1, 2, 3; attrs: movprfx]
[ w, %0, w, w; * ] #
[ ?&w, w, w, w; yes ] #
diff --git a/gcc/testsuite/gcc.target/aarch64/sme/clamp_3.c b/gcc/testsuite/gcc.target/aarch64/sme/clamp_3.c
index 44959f7..162de62 100644
--- a/gcc/testsuite/gcc.target/aarch64/sme/clamp_3.c
+++ b/gcc/testsuite/gcc.target/aarch64/sme/clamp_3.c
@@ -2,6 +2,8 @@
#include <arm_sme.h>
+#pragma GCC target "+sme2"
+
#define TEST(TYPE) \
TYPE \
tied1_##TYPE(TYPE a, TYPE b, TYPE c) __arm_streaming \
diff --git a/gcc/testsuite/gcc.target/aarch64/sme/clamp_4.c b/gcc/testsuite/gcc.target/aarch64/sme/clamp_4.c
index 643b263..453c82c 100644
--- a/gcc/testsuite/gcc.target/aarch64/sme/clamp_4.c
+++ b/gcc/testsuite/gcc.target/aarch64/sme/clamp_4.c
@@ -2,6 +2,8 @@
#include <arm_sme.h>
+#pragma GCC target "+sme2"
+
#define TEST(TYPE) \
TYPE \
untied_##TYPE(TYPE a, TYPE b, TYPE c, TYPE d) __arm_streaming \
diff --git a/gcc/testsuite/gcc.target/aarch64/sme/clamp_5.c b/gcc/testsuite/gcc.target/aarch64/sme/clamp_5.c
new file mode 100644
index 0000000..7c5464b
--- /dev/null
+++ b/gcc/testsuite/gcc.target/aarch64/sme/clamp_5.c
@@ -0,0 +1,24 @@
+// { dg-options "-O" }
+
+#include <arm_sme.h>
+
+#pragma GCC target "+nosme2"
+
+#define TEST(TYPE) \
+ TYPE \
+ tied1_##TYPE(TYPE a, TYPE b, TYPE c) __arm_streaming \
+ { \
+ return svminnm_x(svptrue_b8(), svmaxnm_x(svptrue_b8(), a, b), c); \
+ } \
+ \
+ TYPE \
+ tied2_##TYPE(TYPE a, TYPE b, TYPE c) __arm_streaming \
+ { \
+ return svminnm_x(svptrue_b8(), svmaxnm_x(svptrue_b8(), b, a), c); \
+ }
+
+TEST(svfloat16_t)
+TEST(svfloat32_t)
+TEST(svfloat64_t)
+
+/* { dg-final { scan-assembler-not {\tfclamp\t} } } */