aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorRichard Sandiford <richard.sandiford@arm.com>2025-08-04 11:45:28 +0100
committerRichard Sandiford <richard.sandiford@arm.com>2025-08-04 11:45:28 +0100
commitfcfbe83d88c1bfae49e654b5095ebe46cbe361d8 (patch)
tree43067d105b44a31922d6a8bf48aecc02287d8c69
parent0d276cd378e7a41b9004577a30b9a8ca16ec6b4c (diff)
downloadgcc-fcfbe83d88c1bfae49e654b5095ebe46cbe361d8.zip
gcc-fcfbe83d88c1bfae49e654b5095ebe46cbe361d8.tar.gz
gcc-fcfbe83d88c1bfae49e654b5095ebe46cbe361d8.tar.bz2
aarch64: Improve svdupq_lane expension for big-endian [PR121293]
If the index to svdupq_lane is variable, or is outside the range of the .Q form of DUP, the fallback expansion is to convert to VNx2DI and use TBL. The problem in this PR was that the conversion used subregs, and on big-endian targets, a bitcast from VNx2DI to another element size requires a REV[BHW] in the best case or a spill and reload in the worst case. (See the comment at the head of aarch64-sve.md for details.) Here we want the conversion to act like svreinterpret, so it should use aarch64_sve_reinterpret instead of subregs. gcc/ PR target/121293 * config/aarch64/aarch64-sve-builtins-base.cc (svdupq_lane::expand): Use aarch64_sve_reinterpret instead of subregs. Explicitly reinterpret the result back to the required mode, rather than leaving the caller to take a subreg. gcc/testsuite/ PR target/121293 * gcc.target/aarch64/sve/acle/general/dupq_lane_9.c: New test.
-rw-r--r--gcc/config/aarch64/aarch64-sve-builtins-base.cc5
-rw-r--r--gcc/testsuite/gcc.target/aarch64/sve/acle/general/dupq_lane_9.c8
2 files changed, 11 insertions, 2 deletions
diff --git a/gcc/config/aarch64/aarch64-sve-builtins-base.cc b/gcc/config/aarch64/aarch64-sve-builtins-base.cc
index b439683..32cce97 100644
--- a/gcc/config/aarch64/aarch64-sve-builtins-base.cc
+++ b/gcc/config/aarch64/aarch64-sve-builtins-base.cc
@@ -1259,9 +1259,10 @@ public:
index = target;
}
- e.args[0] = gen_lowpart (VNx2DImode, e.args[0]);
+ e.args[0] = aarch64_sve_reinterpret (VNx2DImode, e.args[0]);
e.args[1] = index;
- return e.use_exact_insn (CODE_FOR_aarch64_sve_tblvnx2di);
+ rtx res = e.use_exact_insn (CODE_FOR_aarch64_sve_tblvnx2di);
+ return aarch64_sve_reinterpret (mode, res);
}
};
diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/general/dupq_lane_9.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/general/dupq_lane_9.c
new file mode 100644
index 0000000..e3f352b
--- /dev/null
+++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/general/dupq_lane_9.c
@@ -0,0 +1,8 @@
+/* { dg-options "-O2 -mbig-endian" } */
+
+#pragma GCC aarch64 "arm_sve.h"
+
+svint32_t f(svint32_t x) { return svdupq_lane (x, 17); }
+void g(svint32_t *a, svint32_t *b) { *a = svdupq_lane (*b, 17); }
+
+/* { dg-final { scan-assembler-not {\trevw\t} } } */