aboutsummaryrefslogtreecommitdiff
path: root/gcc
diff options
context:
space:
mode:
authorAndrew Pinski <quic_apinski@quicinc.com>2024-08-16 11:18:31 -0700
committerAndrew Pinski <quic_apinski@quicinc.com>2024-08-21 08:10:57 -0700
commit4a5d6118c10dfa4f1ed28ab4875bb80f2b2dca9d (patch)
tree8149b4689fb7d58aa1cc78fa5d6acd8862e84ab8 /gcc
parent893cef363c46fa879b47d2b042c19c98bcd2b5f8 (diff)
downloadgcc-4a5d6118c10dfa4f1ed28ab4875bb80f2b2dca9d.zip
gcc-4a5d6118c10dfa4f1ed28ab4875bb80f2b2dca9d.tar.gz
gcc-4a5d6118c10dfa4f1ed28ab4875bb80f2b2dca9d.tar.bz2
aarch64: Implement popcountti2 pattern [PR113042]
When CSSC is not enabled, 128bit popcount can be implemented just via the vector (v16qi) cnt instruction followed by a reduction, like how the 64bit one is currently implemented instead of splitting into 2 64bit popcount. Changes since v1: * v2: Make operand 0 be DImode instead of TImode and simplify. Build and tested for aarch64-linux-gnu. PR target/113042 gcc/ChangeLog: * config/aarch64/aarch64.md (popcountti2): New define_expand. gcc/testsuite/ChangeLog: * gcc.target/aarch64/popcnt10.c: New test. * gcc.target/aarch64/popcnt9.c: New test. Signed-off-by: Andrew Pinski <quic_apinski@quicinc.com>
Diffstat (limited to 'gcc')
-rw-r--r--gcc/config/aarch64/aarch64.md13
-rw-r--r--gcc/testsuite/gcc.target/aarch64/popcnt10.c25
-rw-r--r--gcc/testsuite/gcc.target/aarch64/popcnt9.c25
3 files changed, 63 insertions, 0 deletions
diff --git a/gcc/config/aarch64/aarch64.md b/gcc/config/aarch64/aarch64.md
index 12dcc16..c54b29c 100644
--- a/gcc/config/aarch64/aarch64.md
+++ b/gcc/config/aarch64/aarch64.md
@@ -5378,6 +5378,19 @@
}
})
+(define_expand "popcountti2"
+ [(match_operand:DI 0 "register_operand")
+ (match_operand:TI 1 "register_operand")]
+ "TARGET_SIMD && !TARGET_CSSC"
+{
+ rtx v = gen_reg_rtx (V16QImode);
+ rtx v1 = gen_reg_rtx (V16QImode);
+ emit_move_insn (v, gen_lowpart (V16QImode, operands[1]));
+ emit_insn (gen_popcountv16qi2 (v1, v));
+ emit_insn (gen_aarch64_zero_extenddi_reduc_plus_v16qi (operands[0], v1));
+ DONE;
+})
+
(define_insn "clrsb<mode>2"
[(set (match_operand:GPI 0 "register_operand" "=r")
(clrsb:GPI (match_operand:GPI 1 "register_operand" "r")))]
diff --git a/gcc/testsuite/gcc.target/aarch64/popcnt10.c b/gcc/testsuite/gcc.target/aarch64/popcnt10.c
new file mode 100644
index 0000000..4d01fc6
--- /dev/null
+++ b/gcc/testsuite/gcc.target/aarch64/popcnt10.c
@@ -0,0 +1,25 @@
+/* { dg-do compile } */
+/* { dg-options "-O2 -fdump-tree-optimized" } */
+/* { dg-final { check-function-bodies "**" "" } } */
+/* PR target/113042 */
+
+#pragma GCC target "+cssc"
+
+/*
+** h128:
+** ldp x([0-9]+), x([0-9]+), \[x0\]
+** cnt x([0-9]+), x([0-9]+)
+** cnt x([0-9]+), x([0-9]+)
+** add w0, w([0-9]+), w([0-9]+)
+** ret
+*/
+
+
+unsigned h128 (const unsigned __int128 *a) {
+ return __builtin_popcountg (a[0]);
+}
+
+/* popcount with CSSC should be split into 2 sections. */
+/* { dg-final { scan-tree-dump-not "POPCOUNT " "optimized" } } */
+/* { dg-final { scan-tree-dump-times " __builtin_popcount" 2 "optimized" } } */
+
diff --git a/gcc/testsuite/gcc.target/aarch64/popcnt9.c b/gcc/testsuite/gcc.target/aarch64/popcnt9.c
new file mode 100644
index 0000000..c778fc7
--- /dev/null
+++ b/gcc/testsuite/gcc.target/aarch64/popcnt9.c
@@ -0,0 +1,25 @@
+/* { dg-do compile } */
+/* { dg-options "-O2 -fdump-tree-optimized" } */
+/* { dg-final { check-function-bodies "**" "" } } */
+/* PR target/113042 */
+
+#pragma GCC target "+nocssc"
+
+/*
+** h128:
+** ldr q([0-9]+), \[x0\]
+** cnt v([0-9]+).16b, v\1.16b
+** addv b([0-9]+), v\2.16b
+** fmov w0, s\3
+** ret
+*/
+
+
+unsigned h128 (const unsigned __int128 *a) {
+ return __builtin_popcountg (a[0]);
+}
+
+/* There should be only one POPCOUNT. */
+/* { dg-final { scan-tree-dump-times "POPCOUNT " 1 "optimized" } } */
+/* { dg-final { scan-tree-dump-not " __builtin_popcount" "optimized" } } */
+