aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorKewen Lin <linkw@linux.ibm.com>2023-04-03 21:47:44 -0500
committerKewen Lin <linkw@linux.ibm.com>2023-04-04 00:11:54 -0500
commitcdd2d6643f7fef40e335a7027edfea7276cde608 (patch)
treeae236e9936e15c6b0ace63f6bab9566849635066
parent0dfbb28a9549c2503204b0338bf550f1bff9c681 (diff)
downloadgcc-cdd2d6643f7fef40e335a7027edfea7276cde608.zip
gcc-cdd2d6643f7fef40e335a7027edfea7276cde608.tar.gz
gcc-cdd2d6643f7fef40e335a7027edfea7276cde608.tar.bz2
rs6000: Fix vector parity support [PR108699]
The failures on the original failed case builtin-bitops-1.c and the associated test case pr108699.c here show that the current support of parity vector mode is wrong on Power. The hardware insns vprtyb[wdq] which operate on the least significant bit of each byte per element, they doesn't match what RTL opcode parity needs, but the current implementation expands it with them wrongly. This patch is to fix the handling with one more insn vpopcntb. PR target/108699 gcc/ChangeLog: * config/rs6000/altivec.md (*p9v_parity<mode>2): Rename to ... (rs6000_vprtyb<mode>2): ... this. * config/rs6000/rs6000-builtins.def (VPRTYBD): Replace parityv2di2 with rs6000_vprtybv2di2. (VPRTYBW): Replace parityv4si2 with rs6000_vprtybv4si2. (VPRTYBQ): Replace parityv1ti2 with rs6000_vprtybv1ti2. * config/rs6000/vector.md (parity<mode>2 with VEC_IP): Expand with popcountv16qi2 and the corresponding rs6000_vprtyb<mode>2. gcc/testsuite/ChangeLog: * gcc.target/powerpc/p9-vparity.c: Add scan-assembler-not for vpopcntb to distinguish parity byte from parity. * gcc.target/powerpc/pr108699.c: New test.
-rw-r--r--gcc/config/rs6000/altivec.md8
-rw-r--r--gcc/config/rs6000/rs6000-builtins.def6
-rw-r--r--gcc/config/rs6000/vector.md11
-rw-r--r--gcc/testsuite/gcc.target/powerpc/p9-vparity.c1
-rw-r--r--gcc/testsuite/gcc.target/powerpc/pr108699.c42
5 files changed, 61 insertions, 7 deletions
diff --git a/gcc/config/rs6000/altivec.md b/gcc/config/rs6000/altivec.md
index 30606b8..49b0c96 100644
--- a/gcc/config/rs6000/altivec.md
+++ b/gcc/config/rs6000/altivec.md
@@ -4195,9 +4195,11 @@
[(set_attr "type" "vecsimple")])
;; Vector parity
-(define_insn "*p9v_parity<mode>2"
- [(set (match_operand:VParity 0 "register_operand" "=v")
- (parity:VParity (match_operand:VParity 1 "register_operand" "v")))]
+(define_insn "rs6000_vprtyb<mode>2"
+ [(set (match_operand:VEC_IP 0 "register_operand" "=v")
+ (unspec:VEC_IP
+ [(match_operand:VEC_IP 1 "register_operand" "v")]
+ UNSPEC_PARITY))]
"TARGET_P9_VECTOR"
"vprtyb<wd> %0,%1"
[(set_attr "type" "vecsimple")])
diff --git a/gcc/config/rs6000/rs6000-builtins.def b/gcc/config/rs6000/rs6000-builtins.def
index e0d9f5a..03fb194 100644
--- a/gcc/config/rs6000/rs6000-builtins.def
+++ b/gcc/config/rs6000/rs6000-builtins.def
@@ -2666,13 +2666,13 @@
VMSUMUDM altivec_vmsumudm {}
const vsll __builtin_altivec_vprtybd (vsll);
- VPRTYBD parityv2di2 {}
+ VPRTYBD rs6000_vprtybv2di2 {}
const vsq __builtin_altivec_vprtybq (vsq);
- VPRTYBQ parityv1ti2 {}
+ VPRTYBQ rs6000_vprtybv1ti2 {}
const vsi __builtin_altivec_vprtybw (vsi);
- VPRTYBW parityv4si2 {}
+ VPRTYBW rs6000_vprtybv4si2 {}
const vsll __builtin_altivec_vrldmi (vsll, vsll, vsll);
VRLDMI altivec_vrldmi {}
diff --git a/gcc/config/rs6000/vector.md b/gcc/config/rs6000/vector.md
index 12fd5f9..1ae04c8 100644
--- a/gcc/config/rs6000/vector.md
+++ b/gcc/config/rs6000/vector.md
@@ -1226,7 +1226,16 @@
(define_expand "parity<mode>2"
[(set (match_operand:VEC_IP 0 "register_operand")
(parity:VEC_IP (match_operand:VEC_IP 1 "register_operand")))]
- "TARGET_P9_VECTOR")
+ "TARGET_P9_VECTOR"
+{
+ rtx op1 = gen_lowpart (V16QImode, operands[1]);
+ rtx res = gen_reg_rtx (V16QImode);
+ emit_insn (gen_popcountv16qi2 (res, op1));
+ emit_insn (gen_rs6000_vprtyb<mode>2 (operands[0],
+ gen_lowpart (<MODE>mode, res)));
+
+ DONE;
+})
;; Same size conversions
diff --git a/gcc/testsuite/gcc.target/powerpc/p9-vparity.c b/gcc/testsuite/gcc.target/powerpc/p9-vparity.c
index f4aba15..8f6f123 100644
--- a/gcc/testsuite/gcc.target/powerpc/p9-vparity.c
+++ b/gcc/testsuite/gcc.target/powerpc/p9-vparity.c
@@ -105,3 +105,4 @@ parity_ti_4u (__uint128_t a)
/* { dg-final { scan-assembler "vprtybd" } } */
/* { dg-final { scan-assembler "vprtybq" } } */
/* { dg-final { scan-assembler "vprtybw" } } */
+/* { dg-final { scan-assembler-not "vpopcntb" } } */
diff --git a/gcc/testsuite/gcc.target/powerpc/pr108699.c b/gcc/testsuite/gcc.target/powerpc/pr108699.c
new file mode 100644
index 0000000..f02bac1
--- /dev/null
+++ b/gcc/testsuite/gcc.target/powerpc/pr108699.c
@@ -0,0 +1,42 @@
+/* { dg-run } */
+/* { dg-options "-O2 -ftree-vectorize -fno-vect-cost-model" } */
+
+#define N 16
+
+unsigned long long vals[N];
+unsigned int res[N];
+unsigned int expects[N] = {0, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0};
+
+unsigned long long inputs[N]
+ = {0x0000000000000000ULL, 0x0000000000000001ULL, 0x8000000000000000ULL,
+ 0x0000000000000002ULL, 0x4000000000000000ULL, 0x0000000100000000ULL,
+ 0x0000000080000000ULL, 0xa5a5a5a5a5a5a5a5ULL, 0x5a5a5a5a5a5a5a5aULL,
+ 0xcafecafe00000000ULL, 0x0000cafecafe0000ULL, 0x00000000cafecafeULL,
+ 0x8070600000000000ULL, 0xffffffffffffffffULL};
+
+__attribute__ ((noipa)) void
+init ()
+{
+ for (int i = 0; i < N; i++)
+ vals[i] = inputs[i];
+}
+
+__attribute__ ((noipa)) void
+do_parity ()
+{
+ for (int i = 0; i < N; i++)
+ res[i] = __builtin_parityll (vals[i]);
+}
+
+int
+main (void)
+{
+ init ();
+ do_parity ();
+ for (int i = 0; i < N; i++)
+ if (res[i] != expects[i])
+ __builtin_abort();
+
+ return 0;
+}
+