aboutsummaryrefslogtreecommitdiff
path: root/gcc/testsuite
diff options
context:
space:
mode:
authorKyrylo Tkachov <ktkachov@nvidia.com>2025-07-03 08:37:33 -0700
committerKyrylo Tkachov <ktkachov@nvidia.com>2025-07-11 16:09:14 +0200
commit5300e2bda9c74ca458f01c7e8fc3ea035687b900 (patch)
tree125ee0838aed35db398faf2dc77f580a8bdf939a /gcc/testsuite
parentf451ef41bdcbf6d86397f734e1227c94b01cae17 (diff)
downloadgcc-5300e2bda9c74ca458f01c7e8fc3ea035687b900.zip
gcc-5300e2bda9c74ca458f01c7e8fc3ea035687b900.tar.gz
gcc-5300e2bda9c74ca458f01c7e8fc3ea035687b900.tar.bz2
aarch64: Allow 64-bit vector modes in pattern for BCAX instruction
The BCAX instruction from TARGET_SHA3 only operates on the full .16b form of the inputs but as it's a pure bitwise operation we can use it for the 64-bit modes as well as there we don't care about the upper 64 bits. This patch extends the relevant pattern in aarch64-simd.md to accept the 64-bit vector modes. Thus, for the input: uint32x2_t bcax_s (uint32x2_t a, uint32x2_t b, uint32x2_t c) { return BCAX (a, b, c); } we can now generate: bcax_s: bcax v0.16b, v0.16b, v1.16b, v2.16b ret instead of the current: bcax_s: bic v1.8b, v1.8b, v2.8b eor v0.8b, v1.8b, v0.8b ret This patch doesn't cover the DI/V1DI modes as that would require extending the bcaxqdi4 pattern with =r,r alternatives and adding splitting logic to handle the cases where the operands arrive in GP regs. It is doable, but can be a separate patch. This patch as is should be a straightforward improvement always. Bootstrapped and tested on aarch64-none-linux-gnu. Signed-off-by: Kyrylo Tkachov <ktkachov@nvidia.com> gcc/ * config/aarch64/aarch64-simd.md (bcaxq<mode>4): Use VDQ_I mode iterator. gcc/testsuite/ * gcc.target/aarch64/simd/bcax_d.c: New test.
Diffstat (limited to 'gcc/testsuite')
-rw-r--r--gcc/testsuite/gcc.target/aarch64/simd/bcax_d.c15
1 files changed, 15 insertions, 0 deletions
diff --git a/gcc/testsuite/gcc.target/aarch64/simd/bcax_d.c b/gcc/testsuite/gcc.target/aarch64/simd/bcax_d.c
new file mode 100644
index 0000000..d68f0e1
--- /dev/null
+++ b/gcc/testsuite/gcc.target/aarch64/simd/bcax_d.c
@@ -0,0 +1,15 @@
+/* { dg-do compile } */
+/* { dg-options "-O2" } */
+
+#include <arm_neon.h>
+
+#pragma GCC target "+sha3"
+
+#define BCAX(x,y,z) ((x) ^ ((y) & ~(z)))
+
+uint32x2_t bcax_s (uint32x2_t a, uint32x2_t b, uint32x2_t c) { return BCAX (a, b, c); }
+uint16x4_t bcax_h (uint16x4_t a, uint16x4_t b, uint16x4_t c) { return BCAX (a, b, c); }
+uint8x8_t bcax_b (uint8x8_t a, uint8x8_t b, uint8x8_t c) { return BCAX (a, b, c); }
+
+/* { dg-final { scan-assembler-times {bcax\tv0.16b, v0.16b, v1.16b, v2.16b} 3 } } */
+