diff options
author | Andrew Pinski <quic_apinski@quicinc.com> | 2024-08-16 11:18:31 -0700 |
---|---|---|
committer | Andrew Pinski <quic_apinski@quicinc.com> | 2024-08-21 08:10:57 -0700 |
commit | 4a5d6118c10dfa4f1ed28ab4875bb80f2b2dca9d (patch) | |
tree | 8149b4689fb7d58aa1cc78fa5d6acd8862e84ab8 /gcc | |
parent | 893cef363c46fa879b47d2b042c19c98bcd2b5f8 (diff) | |
download | gcc-4a5d6118c10dfa4f1ed28ab4875bb80f2b2dca9d.zip gcc-4a5d6118c10dfa4f1ed28ab4875bb80f2b2dca9d.tar.gz gcc-4a5d6118c10dfa4f1ed28ab4875bb80f2b2dca9d.tar.bz2 |
aarch64: Implement popcountti2 pattern [PR113042]
When CSSC is not enabled, 128bit popcount can be implemented
just via the vector (v16qi) cnt instruction followed by a reduction,
like how the 64bit one is currently implemented instead of
splitting into 2 64bit popcount.
Changes since v1:
* v2: Make operand 0 be DImode instead of TImode and simplify.
Build and tested for aarch64-linux-gnu.
PR target/113042
gcc/ChangeLog:
* config/aarch64/aarch64.md (popcountti2): New define_expand.
gcc/testsuite/ChangeLog:
* gcc.target/aarch64/popcnt10.c: New test.
* gcc.target/aarch64/popcnt9.c: New test.
Signed-off-by: Andrew Pinski <quic_apinski@quicinc.com>
Diffstat (limited to 'gcc')
-rw-r--r-- | gcc/config/aarch64/aarch64.md | 13 | ||||
-rw-r--r-- | gcc/testsuite/gcc.target/aarch64/popcnt10.c | 25 | ||||
-rw-r--r-- | gcc/testsuite/gcc.target/aarch64/popcnt9.c | 25 |
3 files changed, 63 insertions, 0 deletions
diff --git a/gcc/config/aarch64/aarch64.md b/gcc/config/aarch64/aarch64.md index 12dcc16..c54b29c 100644 --- a/gcc/config/aarch64/aarch64.md +++ b/gcc/config/aarch64/aarch64.md @@ -5378,6 +5378,19 @@ } }) +(define_expand "popcountti2" + [(match_operand:DI 0 "register_operand") + (match_operand:TI 1 "register_operand")] + "TARGET_SIMD && !TARGET_CSSC" +{ + rtx v = gen_reg_rtx (V16QImode); + rtx v1 = gen_reg_rtx (V16QImode); + emit_move_insn (v, gen_lowpart (V16QImode, operands[1])); + emit_insn (gen_popcountv16qi2 (v1, v)); + emit_insn (gen_aarch64_zero_extenddi_reduc_plus_v16qi (operands[0], v1)); + DONE; +}) + (define_insn "clrsb<mode>2" [(set (match_operand:GPI 0 "register_operand" "=r") (clrsb:GPI (match_operand:GPI 1 "register_operand" "r")))] diff --git a/gcc/testsuite/gcc.target/aarch64/popcnt10.c b/gcc/testsuite/gcc.target/aarch64/popcnt10.c new file mode 100644 index 0000000..4d01fc6 --- /dev/null +++ b/gcc/testsuite/gcc.target/aarch64/popcnt10.c @@ -0,0 +1,25 @@ +/* { dg-do compile } */ +/* { dg-options "-O2 -fdump-tree-optimized" } */ +/* { dg-final { check-function-bodies "**" "" } } */ +/* PR target/113042 */ + +#pragma GCC target "+cssc" + +/* +** h128: +** ldp x([0-9]+), x([0-9]+), \[x0\] +** cnt x([0-9]+), x([0-9]+) +** cnt x([0-9]+), x([0-9]+) +** add w0, w([0-9]+), w([0-9]+) +** ret +*/ + + +unsigned h128 (const unsigned __int128 *a) { + return __builtin_popcountg (a[0]); +} + +/* popcount with CSSC should be split into 2 sections. */ +/* { dg-final { scan-tree-dump-not "POPCOUNT " "optimized" } } */ +/* { dg-final { scan-tree-dump-times " __builtin_popcount" 2 "optimized" } } */ + diff --git a/gcc/testsuite/gcc.target/aarch64/popcnt9.c b/gcc/testsuite/gcc.target/aarch64/popcnt9.c new file mode 100644 index 0000000..c778fc7 --- /dev/null +++ b/gcc/testsuite/gcc.target/aarch64/popcnt9.c @@ -0,0 +1,25 @@ +/* { dg-do compile } */ +/* { dg-options "-O2 -fdump-tree-optimized" } */ +/* { dg-final { check-function-bodies "**" "" } } */ +/* PR target/113042 */ + +#pragma GCC target "+nocssc" + +/* +** h128: +** ldr q([0-9]+), \[x0\] +** cnt v([0-9]+).16b, v\1.16b +** addv b([0-9]+), v\2.16b +** fmov w0, s\3 +** ret +*/ + + +unsigned h128 (const unsigned __int128 *a) { + return __builtin_popcountg (a[0]); +} + +/* There should be only one POPCOUNT. */ +/* { dg-final { scan-tree-dump-times "POPCOUNT " 1 "optimized" } } */ +/* { dg-final { scan-tree-dump-not " __builtin_popcount" "optimized" } } */ + |