aboutsummaryrefslogtreecommitdiff
path: root/gcc
diff options
context:
space:
mode:
authorAndrew Pinski <quic_apinski@quicinc.com>2024-06-10 00:39:54 +0000
committerAndrew Pinski <quic_apinski@quicinc.com>2024-08-15 23:55:55 -0700
commitfcc3af9949880476c4ed01a98bd7f5d7f29b7b16 (patch)
treee7adab9130bc2f65d47994a4b379002813e405f6 /gcc
parent1b8b53ef75c143cddc114705c97c74d9c8f7a64b (diff)
downloadgcc-trunk.zip
gcc-trunk.tar.gz
gcc-trunk.tar.bz2
aarch64: Improve popcount for bytes [PR113042]HEADtrunkmaster
For popcount for bytes, we don't need the reduction addition after the vector cnt instruction as we are only counting one byte's popcount. This changes the popcount extend to cover all ALLI rather than GPI. Changes since v1: * v2 - Use ALLI iterator and combine all into one pattern. Add new testcases popcnt[6-8].c. * v3 - Simplify TARGET_CSSC path. Use convert_to_mode instead of gen_zero_extend* directly. Some other small cleanups. Bootstrapped and tested on aarch64-linux-gnu with no regressions. PR target/113042 gcc/ChangeLog: * config/aarch64/aarch64.md (popcount<mode>2): Update pattern to support ALLI modes. gcc/testsuite/ChangeLog: * gcc.target/aarch64/popcnt5.c: New test. * gcc.target/aarch64/popcnt6.c: New test. * gcc.target/aarch64/popcnt7.c: New test. * gcc.target/aarch64/popcnt8.c: New test. Signed-off-by: Andrew Pinski <quic_apinski@quicinc.com>
Diffstat (limited to 'gcc')
-rw-r--r--gcc/config/aarch64/aarch64.md37
-rw-r--r--gcc/testsuite/gcc.target/aarch64/popcnt5.c19
-rw-r--r--gcc/testsuite/gcc.target/aarch64/popcnt6.c19
-rw-r--r--gcc/testsuite/gcc.target/aarch64/popcnt7.c18
-rw-r--r--gcc/testsuite/gcc.target/aarch64/popcnt8.c18
5 files changed, 98 insertions, 13 deletions
diff --git a/gcc/config/aarch64/aarch64.md b/gcc/config/aarch64/aarch64.md
index 665a333..12dcc16 100644
--- a/gcc/config/aarch64/aarch64.md
+++ b/gcc/config/aarch64/aarch64.md
@@ -5341,9 +5341,9 @@
;; MOV w0, v2.b[0]
(define_expand "popcount<mode>2"
- [(set (match_operand:GPI 0 "register_operand")
- (popcount:GPI (match_operand:GPI 1 "register_operand")))]
- "TARGET_CSSC || TARGET_SIMD"
+ [(set (match_operand:ALLI 0 "register_operand")
+ (popcount:ALLI (match_operand:ALLI 1 "register_operand")))]
+ "TARGET_CSSC ? GET_MODE_BITSIZE (<MODE>mode) >= 32 : TARGET_SIMD"
{
if (!TARGET_CSSC)
{
@@ -5351,18 +5351,29 @@
rtx v1 = gen_reg_rtx (V8QImode);
rtx in = operands[1];
rtx out = operands[0];
- if(<MODE>mode == SImode)
- {
- rtx tmp;
- tmp = gen_reg_rtx (DImode);
- /* If we have SImode, zero extend to DImode, pop count does
- not change if we have extra zeros. */
- emit_insn (gen_zero_extendsidi2 (tmp, in));
- in = tmp;
- }
+ /* SImode and HImode should be zero extended to DImode.
+ popcount does not change if we have extra zeros. */
+ if (<MODE>mode == SImode || <MODE>mode == HImode)
+ in = convert_to_mode (DImode, in, true);
+
emit_move_insn (v, gen_lowpart (V8QImode, in));
emit_insn (gen_popcountv8qi2 (v1, v));
- emit_insn (gen_aarch64_zero_extend<mode>_reduc_plus_v8qi (out, v1));
+ /* QImode, just extract from the v8qi vector. */
+ if (<MODE>mode == QImode)
+ emit_move_insn (out, gen_lowpart (QImode, v1));
+ /* HI and SI, reduction is zero extended to SImode. */
+ else if (<MODE>mode == SImode || <MODE>mode == HImode)
+ {
+ rtx out1 = gen_reg_rtx (SImode);
+ emit_insn (gen_aarch64_zero_extendsi_reduc_plus_v8qi (out1, v1));
+ emit_move_insn (out, gen_lowpart (<MODE>mode, out1));
+ }
+ /* DImode, reduction is zero extended to DImode. */
+ else
+ {
+ gcc_assert (<MODE>mode == DImode);
+ emit_insn (gen_aarch64_zero_extenddi_reduc_plus_v8qi (out, v1));
+ }
DONE;
}
})
diff --git a/gcc/testsuite/gcc.target/aarch64/popcnt5.c b/gcc/testsuite/gcc.target/aarch64/popcnt5.c
new file mode 100644
index 0000000..406369d
--- /dev/null
+++ b/gcc/testsuite/gcc.target/aarch64/popcnt5.c
@@ -0,0 +1,19 @@
+/* { dg-do compile } */
+/* { dg-options "-O2" } */
+/* { dg-final { check-function-bodies "**" "" } } */
+/* PR target/113042 */
+
+#pragma GCC target "+nocssc"
+
+/*
+** h8:
+** ldr b[0-9]+, \[x0\]
+** cnt v[0-9]+.8b, v[0-9]+.8b
+** smov w0, v[0-9]+.b\[0\]
+** ret
+*/
+/* We should not need the addv here since we only need a byte popcount. */
+
+unsigned h8 (const unsigned char *a) {
+ return __builtin_popcountg (a[0]);
+}
diff --git a/gcc/testsuite/gcc.target/aarch64/popcnt6.c b/gcc/testsuite/gcc.target/aarch64/popcnt6.c
new file mode 100644
index 0000000..e882cb2
--- /dev/null
+++ b/gcc/testsuite/gcc.target/aarch64/popcnt6.c
@@ -0,0 +1,19 @@
+/* { dg-do compile } */
+/* { dg-options "-O2" } */
+/* { dg-final { check-function-bodies "**" "" } } */
+/* PR target/113042 */
+
+#pragma GCC target "+nocssc"
+
+/*
+** h8:
+** ldr h[0-9]+, \[x0\]
+** cnt v[0-9]+.8b, v[0-9]+.8b
+** addv b[0-9]+, v[0-9]+.8b
+** fmov w0, s[0-9]+
+** ret
+*/
+
+unsigned h8 (const unsigned short *a) {
+ return __builtin_popcountg (a[0]);
+}
diff --git a/gcc/testsuite/gcc.target/aarch64/popcnt7.c b/gcc/testsuite/gcc.target/aarch64/popcnt7.c
new file mode 100644
index 0000000..8dfff21
--- /dev/null
+++ b/gcc/testsuite/gcc.target/aarch64/popcnt7.c
@@ -0,0 +1,18 @@
+/* { dg-do compile } */
+/* { dg-options "-O2" } */
+/* { dg-final { check-function-bodies "**" "" } } */
+/* PR target/113042 */
+
+#pragma GCC target "+cssc"
+
+/*
+** h8:
+** ldrb w[0-9]+, \[x0\]
+** cnt w[0-9]+, w[0-9]+
+** ret
+*/
+/* We should not produce any extra zero extend for this code */
+
+unsigned h8 (const unsigned char *a) {
+ return __builtin_popcountg (a[0]);
+}
diff --git a/gcc/testsuite/gcc.target/aarch64/popcnt8.c b/gcc/testsuite/gcc.target/aarch64/popcnt8.c
new file mode 100644
index 0000000..66a88b6
--- /dev/null
+++ b/gcc/testsuite/gcc.target/aarch64/popcnt8.c
@@ -0,0 +1,18 @@
+/* { dg-do compile } */
+/* { dg-options "-O2" } */
+/* { dg-final { check-function-bodies "**" "" } } */
+/* PR target/113042 */
+
+#pragma GCC target "+cssc"
+
+/*
+** h8:
+** ldrh w[0-9]+, \[x0\]
+** cnt w[0-9]+, w[0-9]+
+** ret
+*/
+/* We should not produce any extra zero extend for this code */
+
+unsigned h8 (const unsigned short *a) {
+ return __builtin_popcountg (a[0]);
+}