diff options
author | Andrew Pinski <quic_apinski@quicinc.com> | 2024-06-10 00:39:54 +0000 |
---|---|---|
committer | Andrew Pinski <quic_apinski@quicinc.com> | 2024-08-15 23:55:55 -0700 |
commit | fcc3af9949880476c4ed01a98bd7f5d7f29b7b16 (patch) | |
tree | e7adab9130bc2f65d47994a4b379002813e405f6 /gcc/config/aarch64 | |
parent | 1b8b53ef75c143cddc114705c97c74d9c8f7a64b (diff) | |
download | gcc-fcc3af9949880476c4ed01a98bd7f5d7f29b7b16.zip gcc-fcc3af9949880476c4ed01a98bd7f5d7f29b7b16.tar.gz gcc-fcc3af9949880476c4ed01a98bd7f5d7f29b7b16.tar.bz2 |
aarch64: Improve popcount for bytes [PR113042]
For popcount for bytes, we don't need the reduction addition
after the vector cnt instruction as we are only counting one
byte's popcount.
This changes the popcount extend to cover all ALLI rather than GPI.
Changes since v1:
* v2 - Use ALLI iterator and combine all into one pattern.
Add new testcases popcnt[6-8].c.
* v3 - Simplify TARGET_CSSC path.
Use convert_to_mode instead of gen_zero_extend* directly.
Some other small cleanups.
Bootstrapped and tested on aarch64-linux-gnu with no regressions.
PR target/113042
gcc/ChangeLog:
* config/aarch64/aarch64.md (popcount<mode>2): Update pattern
to support ALLI modes.
gcc/testsuite/ChangeLog:
* gcc.target/aarch64/popcnt5.c: New test.
* gcc.target/aarch64/popcnt6.c: New test.
* gcc.target/aarch64/popcnt7.c: New test.
* gcc.target/aarch64/popcnt8.c: New test.
Signed-off-by: Andrew Pinski <quic_apinski@quicinc.com>
Diffstat (limited to 'gcc/config/aarch64')
-rw-r--r-- | gcc/config/aarch64/aarch64.md | 37 |
1 files changed, 24 insertions, 13 deletions
diff --git a/gcc/config/aarch64/aarch64.md b/gcc/config/aarch64/aarch64.md index 665a333..12dcc16 100644 --- a/gcc/config/aarch64/aarch64.md +++ b/gcc/config/aarch64/aarch64.md @@ -5341,9 +5341,9 @@ ;; MOV w0, v2.b[0] (define_expand "popcount<mode>2" - [(set (match_operand:GPI 0 "register_operand") - (popcount:GPI (match_operand:GPI 1 "register_operand")))] - "TARGET_CSSC || TARGET_SIMD" + [(set (match_operand:ALLI 0 "register_operand") + (popcount:ALLI (match_operand:ALLI 1 "register_operand")))] + "TARGET_CSSC ? GET_MODE_BITSIZE (<MODE>mode) >= 32 : TARGET_SIMD" { if (!TARGET_CSSC) { @@ -5351,18 +5351,29 @@ rtx v1 = gen_reg_rtx (V8QImode); rtx in = operands[1]; rtx out = operands[0]; - if(<MODE>mode == SImode) - { - rtx tmp; - tmp = gen_reg_rtx (DImode); - /* If we have SImode, zero extend to DImode, pop count does - not change if we have extra zeros. */ - emit_insn (gen_zero_extendsidi2 (tmp, in)); - in = tmp; - } + /* SImode and HImode should be zero extended to DImode. + popcount does not change if we have extra zeros. */ + if (<MODE>mode == SImode || <MODE>mode == HImode) + in = convert_to_mode (DImode, in, true); + emit_move_insn (v, gen_lowpart (V8QImode, in)); emit_insn (gen_popcountv8qi2 (v1, v)); - emit_insn (gen_aarch64_zero_extend<mode>_reduc_plus_v8qi (out, v1)); + /* QImode, just extract from the v8qi vector. */ + if (<MODE>mode == QImode) + emit_move_insn (out, gen_lowpart (QImode, v1)); + /* HI and SI, reduction is zero extended to SImode. */ + else if (<MODE>mode == SImode || <MODE>mode == HImode) + { + rtx out1 = gen_reg_rtx (SImode); + emit_insn (gen_aarch64_zero_extendsi_reduc_plus_v8qi (out1, v1)); + emit_move_insn (out, gen_lowpart (<MODE>mode, out1)); + } + /* DImode, reduction is zero extended to DImode. */ + else + { + gcc_assert (<MODE>mode == DImode); + emit_insn (gen_aarch64_zero_extenddi_reduc_plus_v8qi (out, v1)); + } DONE; } }) |