aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorAlex Coplan <alex.coplan@arm.com>2020-11-12 10:03:21 +0000
committerAlex Coplan <alex.coplan@arm.com>2020-11-12 10:03:21 +0000
commit7f445b5d6116000f1a6527f2164836cbc7c01dee (patch)
treeee704c64d0e9d354585837dc07517422054bf1d2
parent64326bb428ac750ec3b0f56c06dfb98277cd41b5 (diff)
downloadgcc-7f445b5d6116000f1a6527f2164836cbc7c01dee.zip
gcc-7f445b5d6116000f1a6527f2164836cbc7c01dee.tar.gz
gcc-7f445b5d6116000f1a6527f2164836cbc7c01dee.tar.bz2
aarch64: Fix SVE2 BCAX pattern [PR97730]
This patch adds a missing not to the SVE2 BCAX (Bitwise clear and exclusive or) pattern, fixing the PR. Since SVE doesn't have an unpredicated not instruction, we need to use a (vacuously) predicated not here. To ensure that the predicate is instantiated correctly (to all 1s) for the intrinsics, we pull out a separate expander from the define_insn. From the ISA reference [1]: > Bitwise AND elements of the second source vector with the > corresponding inverted elements of the third source vector, then > exclusive OR the results with corresponding elements of the first > source vector. [1] : https://developer.arm.com/docs/ddi0602/g/a64-sve-instructions-alphabetic-order/bcax-bitwise-clear-and-exclusive-or gcc/ChangeLog: PR target/97730 * config/aarch64/aarch64-sve2.md (@aarch64_sve2_bcax<mode>): Change to define_expand, add missing (trivially-predicated) not rtx to fix wrong code bug. (*aarch64_sve2_bcax<mode>): New. gcc/testsuite/ChangeLog: PR target/97730 * gcc.target/aarch64/sve2/bcax_1.c (OP): Add missing bitwise not to match correct bcax semantics. * gcc.dg/vect/pr97730.c: New test.
-rw-r--r--gcc/config/aarch64/aarch64-sve2.md31
-rw-r--r--gcc/testsuite/gcc.dg/vect/pr97730.c12
-rw-r--r--gcc/testsuite/gcc.target/aarch64/sve2/bcax_1.c2
3 files changed, 41 insertions, 4 deletions
diff --git a/gcc/config/aarch64/aarch64-sve2.md b/gcc/config/aarch64/aarch64-sve2.md
index 0cafd0b..12dc9aa 100644
--- a/gcc/config/aarch64/aarch64-sve2.md
+++ b/gcc/config/aarch64/aarch64-sve2.md
@@ -786,17 +786,42 @@
;; -------------------------------------------------------------------------
;; Unpredicated exclusive OR of AND.
-(define_insn "@aarch64_sve2_bcax<mode>"
+(define_expand "@aarch64_sve2_bcax<mode>"
+ [(set (match_operand:SVE_FULL_I 0 "register_operand")
+ (xor:SVE_FULL_I
+ (and:SVE_FULL_I
+ (unspec:SVE_FULL_I
+ [(match_dup 4)
+ (not:SVE_FULL_I
+ (match_operand:SVE_FULL_I 3 "register_operand"))]
+ UNSPEC_PRED_X)
+ (match_operand:SVE_FULL_I 2 "register_operand"))
+ (match_operand:SVE_FULL_I 1 "register_operand")))]
+ "TARGET_SVE2"
+ {
+ operands[4] = CONSTM1_RTX (<VPRED>mode);
+ }
+)
+
+(define_insn_and_rewrite "*aarch64_sve2_bcax<mode>"
[(set (match_operand:SVE_FULL_I 0 "register_operand" "=w, ?&w")
(xor:SVE_FULL_I
(and:SVE_FULL_I
- (match_operand:SVE_FULL_I 2 "register_operand" "w, w")
- (match_operand:SVE_FULL_I 3 "register_operand" "w, w"))
+ (unspec:SVE_FULL_I
+ [(match_operand 4)
+ (not:SVE_FULL_I
+ (match_operand:SVE_FULL_I 3 "register_operand" "w, w"))]
+ UNSPEC_PRED_X)
+ (match_operand:SVE_FULL_I 2 "register_operand" "w, w"))
(match_operand:SVE_FULL_I 1 "register_operand" "0, w")))]
"TARGET_SVE2"
"@
bcax\t%0.d, %0.d, %2.d, %3.d
movprfx\t%0, %1\;bcax\t%0.d, %0.d, %2.d, %3.d"
+ "&& !CONSTANT_P (operands[4])"
+ {
+ operands[4] = CONSTM1_RTX (<VPRED>mode);
+ }
[(set_attr "movprfx" "*,yes")]
)
diff --git a/gcc/testsuite/gcc.dg/vect/pr97730.c b/gcc/testsuite/gcc.dg/vect/pr97730.c
new file mode 100644
index 0000000..af4bca4
--- /dev/null
+++ b/gcc/testsuite/gcc.dg/vect/pr97730.c
@@ -0,0 +1,12 @@
+/* { dg-additional-options "-O1" } */
+unsigned b = 0xce8e5a48, c = 0xb849691a;
+unsigned a[8080];
+int main() {
+ a[0] = b;
+ c = c;
+ unsigned f = 0xb1e8;
+ for (int h = 0; h < 5; h++)
+ a[h] = (b & c) ^ f;
+ if (a[0] != 0x8808f9e0)
+ __builtin_abort();
+}
diff --git a/gcc/testsuite/gcc.target/aarch64/sve2/bcax_1.c b/gcc/testsuite/gcc.target/aarch64/sve2/bcax_1.c
index 4b0d5a9..7c31afc 100644
--- a/gcc/testsuite/gcc.target/aarch64/sve2/bcax_1.c
+++ b/gcc/testsuite/gcc.target/aarch64/sve2/bcax_1.c
@@ -1,7 +1,7 @@
/* { dg-do compile } */
/* { dg-options "-O2 -ftree-vectorize -fdump-tree-vect-details --save-temps" } */
-#define OP(x,y,z) ((x) ^ ((y) & (z)))
+#define OP(x,y,z) ((x) ^ (~(y) & (z)))
#include "bitsel_1.c"