aboutsummaryrefslogtreecommitdiff
path: root/gcc
diff options
context:
space:
mode:
authorKyrylo Tkachov <ktkachov@nvidia.com>2024-11-05 05:10:22 -0800
committerKyrylo Tkachov <ktkachov@nvidia.com>2024-11-05 17:58:00 +0100
commit161e246cf32f1298400aa3c1d86110490a3cd0ce (patch)
tree8f3bfe5a21604b500f6a914f9ff8c1eac391f870 /gcc
parentf185a89fc4b6e6f5ae5475cd7c723b3acf39976b (diff)
downloadgcc-161e246cf32f1298400aa3c1d86110490a3cd0ce.zip
gcc-161e246cf32f1298400aa3c1d86110490a3cd0ce.tar.gz
gcc-161e246cf32f1298400aa3c1d86110490a3cd0ce.tar.bz2
PR target/117449: Restrict vector rotate match and split to pre-reload
The vector rotate splitter has some logic to deal with post-reload splitting but not all cases in aarch64_emit_opt_vec_rotate are post-reload-safe. In particular the ROTATE+XOR expansion for TARGET_SHA3 can create RTL that can later be simplified to a simple ROTATE post-reload, which would then match the insn again and try to split it. So do a clean split pre-reload and avoid going down this path post-reload by restricting the insn_and_split to can_create_pseudo_p (). Bootstrapped and tested on aarch64-none-linux. Signed-off-by: Kyrylo Tkachov <ktkachov@nvidia.com> gcc/ PR target/117449 * config/aarch64/aarch64-simd.md (*aarch64_simd_rotate_imm<mode>): Match only when can_create_pseudo_p (). * config/aarch64/aarch64.cc (aarch64_emit_opt_vec_rotate): Assume can_create_pseudo_p (). gcc/testsuite/ PR target/117449 * gcc.c-torture/compile/pr117449.c: New test.
Diffstat (limited to 'gcc')
-rw-r--r--gcc/config/aarch64/aarch64-simd.md6
-rw-r--r--gcc/config/aarch64/aarch64.cc11
-rw-r--r--gcc/testsuite/gcc.c-torture/compile/pr117449.c8
3 files changed, 18 insertions, 7 deletions
diff --git a/gcc/config/aarch64/aarch64-simd.md b/gcc/config/aarch64/aarch64-simd.md
index a91222b..cfe95bd 100644
--- a/gcc/config/aarch64/aarch64-simd.md
+++ b/gcc/config/aarch64/aarch64-simd.md
@@ -1296,11 +1296,13 @@
;; After all the combinations and propagations of ROTATE have been
;; attempted split any remaining vector rotates into SHL + USRA sequences.
+;; Don't match this after reload as the various possible sequence for this
+;; require temporary registers.
(define_insn_and_split "*aarch64_simd_rotate_imm<mode>"
[(set (match_operand:VDQ_I 0 "register_operand" "=&w")
(rotate:VDQ_I (match_operand:VDQ_I 1 "register_operand" "w")
(match_operand:VDQ_I 2 "aarch64_simd_lshift_imm")))]
- "TARGET_SIMD"
+ "TARGET_SIMD && can_create_pseudo_p ()"
"#"
"&& 1"
[(set (match_dup 3)
@@ -1316,7 +1318,7 @@
if (aarch64_emit_opt_vec_rotate (operands[0], operands[1], operands[2]))
DONE;
- operands[3] = reload_completed ? operands[0] : gen_reg_rtx (<MODE>mode);
+ operands[3] = gen_reg_rtx (<MODE>mode);
rtx shft_amnt = unwrap_const_vec_duplicate (operands[2]);
int bitwidth = GET_MODE_UNIT_SIZE (<MODE>mode) * BITS_PER_UNIT;
operands[4]
diff --git a/gcc/config/aarch64/aarch64.cc b/gcc/config/aarch64/aarch64.cc
index 9347e06..f2b5347 100644
--- a/gcc/config/aarch64/aarch64.cc
+++ b/gcc/config/aarch64/aarch64.cc
@@ -16030,6 +16030,8 @@ aarch64_emit_opt_vec_rotate (rtx dst, rtx reg, rtx amnt_vec)
gcc_assert (CONST_INT_P (amnt));
HOST_WIDE_INT rotamnt = UINTVAL (amnt);
machine_mode mode = GET_MODE (reg);
+ /* Don't end up here after reload. */
+ gcc_assert (can_create_pseudo_p ());
/* Rotates by half the element width map down to REV* instructions and should
always be preferred when possible. */
if (rotamnt == GET_MODE_UNIT_BITSIZE (mode) / 2
@@ -16037,11 +16039,10 @@ aarch64_emit_opt_vec_rotate (rtx dst, rtx reg, rtx amnt_vec)
return true;
/* 64 and 128-bit vector modes can use the XAR instruction
when available. */
- else if (can_create_pseudo_p ()
- && ((TARGET_SHA3 && mode == V2DImode)
- || (TARGET_SVE2
- && (known_eq (GET_MODE_SIZE (mode), 8)
- || known_eq (GET_MODE_SIZE (mode), 16)))))
+ else if ((TARGET_SHA3 && mode == V2DImode)
+ || (TARGET_SVE2
+ && (known_eq (GET_MODE_SIZE (mode), 8)
+ || known_eq (GET_MODE_SIZE (mode), 16))))
{
rtx zeroes = aarch64_gen_shareable_zero (mode);
rtx xar_op
diff --git a/gcc/testsuite/gcc.c-torture/compile/pr117449.c b/gcc/testsuite/gcc.c-torture/compile/pr117449.c
new file mode 100644
index 0000000..8ae0071
--- /dev/null
+++ b/gcc/testsuite/gcc.c-torture/compile/pr117449.c
@@ -0,0 +1,8 @@
+/* { dg-additional-options "-march=armv8.2-a+sha3" { target aarch64*-*-* } } */
+
+unsigned long *a;
+int i;
+void f() {
+ for (i = 0; i < 80; i++)
+ a[i] = (a[i] >> 8 | a[i] << 64 - 8) ^ a[i];
+}