aboutsummaryrefslogtreecommitdiff
path: root/gcc
diff options
context:
space:
mode:
authorRichard Sandiford <richard.sandiford@arm.com>2021-01-04 11:59:07 +0000
committerRichard Sandiford <richard.sandiford@arm.com>2021-01-04 11:59:07 +0000
commitb41e6dd50f329b0291457e939d4c0dacd81c82c1 (patch)
tree65dc2ac48b43e224d17ae38019837a277ca57bc6 /gcc
parentba15b0fa0df773a90374f6b06775534ecd9f7b43 (diff)
downloadgcc-b41e6dd50f329b0291457e939d4c0dacd81c82c1.zip
gcc-b41e6dd50f329b0291457e939d4c0dacd81c82c1.tar.gz
gcc-b41e6dd50f329b0291457e939d4c0dacd81c82c1.tar.bz2
aarch64: Improve vcombine codegen [PR89057]
This patch fixes a codegen regression in the handling of things like: __temp.val[0] \ = vcombine_##funcsuffix (__b.val[0], \ vcreate_##funcsuffix (__AARCH64_UINT64_C (0))); \ in the 64-bit vst[234] functions. The zero was forced into a register at expand time, and we relied on combine to fuse the zero and combine back together into a single combinez pattern. The problem is that the zero could be hoisted before combine gets a chance to do its thing. gcc/ PR target/89057 * config/aarch64/aarch64-simd.md (aarch64_combine<mode>): Accept aarch64_simd_reg_or_zero for operand 2. Use the combinez patterns to handle zero operands. gcc/testsuite/ PR target/89057 * gcc.target/aarch64/pr89057.c: New test.
Diffstat (limited to 'gcc')
-rw-r--r--gcc/config/aarch64/aarch64-simd.md15
-rw-r--r--gcc/testsuite/gcc.target/aarch64/pr89057.c16
2 files changed, 28 insertions, 3 deletions
diff --git a/gcc/config/aarch64/aarch64-simd.md b/gcc/config/aarch64/aarch64-simd.md
index c4e3b89..85770c8 100644
--- a/gcc/config/aarch64/aarch64-simd.md
+++ b/gcc/config/aarch64/aarch64-simd.md
@@ -3340,11 +3340,20 @@
(define_expand "aarch64_combine<mode>"
[(match_operand:<VDBL> 0 "register_operand")
(match_operand:VDC 1 "register_operand")
- (match_operand:VDC 2 "register_operand")]
+ (match_operand:VDC 2 "aarch64_simd_reg_or_zero")]
"TARGET_SIMD"
{
- aarch64_split_simd_combine (operands[0], operands[1], operands[2]);
-
+ if (operands[2] == CONST0_RTX (<MODE>mode))
+ {
+ if (BYTES_BIG_ENDIAN)
+ emit_insn (gen_aarch64_combinez_be<mode> (operands[0], operands[1],
+ operands[2]));
+ else
+ emit_insn (gen_aarch64_combinez<mode> (operands[0], operands[1],
+ operands[2]));
+ }
+ else
+ aarch64_split_simd_combine (operands[0], operands[1], operands[2]);
DONE;
}
)
diff --git a/gcc/testsuite/gcc.target/aarch64/pr89057.c b/gcc/testsuite/gcc.target/aarch64/pr89057.c
new file mode 100644
index 0000000..1e20024
--- /dev/null
+++ b/gcc/testsuite/gcc.target/aarch64/pr89057.c
@@ -0,0 +1,16 @@
+/* { dg-options "-O3" } */
+
+#include <arm_neon.h>
+
+void
+f (int32_t *dst, int32_t *src, int n)
+{
+ for (int i = 0; i < n; ++i)
+ {
+ int32x2x3_t a = vld3_s32 (src + i * 6);
+ int32x2x3_t b = { a.val[2], a.val[1], a.val[0] };
+ vst3_s32 (dst + i * 6, b);
+ }
+}
+
+/* { dg-final { scan-assembler-not {\tins\t} } } */