diff options
author | Kyrylo Tkachov <ktkachov@nvidia.com> | 2025-09-02 00:43:14 -0700 |
---|---|---|
committer | Kyrylo Tkachov <ktkachov@nvidia.com> | 2025-09-03 17:21:35 +0200 |
commit | cb508e54140687a50790059fac548d87515df6be (patch) | |
tree | 47a793590898f44a6b2a9b257158f7e10ffa62bc | |
parent | 3e2077d8c7a0acba2d54bd0666ae578fe114cd72 (diff) | |
download | gcc-cb508e54140687a50790059fac548d87515df6be.zip gcc-cb508e54140687a50790059fac548d87515df6be.tar.gz gcc-cb508e54140687a50790059fac548d87515df6be.tar.bz2 |
aarch64: PR target/121749: Use correct predicate for narrowing shift amounts
With g:d20b2ad845876eec0ee80a3933ad49f9f6c4ee30 the narrowing shift instructions
are now represented with standard RTL and more merging optimisations occur.
This exposed a wrong predicate for the shift amount operand.
The shift amount is the number of bits of the narrow destination, not the input
sources.
Correct this by using the vn_mode attribute when specifying the predicate, which
exists for this purpose.
I've spotted a few more narrowing shift patterns that need the restriction, so
they are updated as well.
Bootstrapped and tested on aarch64-none-linux-gnu.
Signed-off-by: Kyrylo Tkachov <ktkachov@nvidia.com>
gcc/
PR target/121749
* config/aarch64/aarch64-simd.md (aarch64_<shrn_op>shrn_n<mode>):
Use aarch64_simd_shift_imm_offset_<vn_mode> instead of
aarch64_simd_shift_imm_offset_<ve_mode> predicate.
(aarch64_<shrn_op>shrn_n<mode> VQN define_expand): Likewise.
(*aarch64_<shrn_op>rshrn_n<mode>_insn): Likewise.
(aarch64_<shrn_op>rshrn_n<mode>): Likewise.
(aarch64_<shrn_op>rshrn_n<mode> VQN define_expand): Likewise.
(aarch64_sqshrun_n<mode>_insn): Likewise.
(aarch64_sqshrun_n<mode>): Likewise.
(aarch64_sqshrun_n<mode> VQN define_expand): Likewise.
(aarch64_sqrshrun_n<mode>_insn): Likewise.
(aarch64_sqrshrun_n<mode>): Likewise.
(aarch64_sqrshrun_n<mode>): Likewise.
* config/aarch64/iterators.md (vn_mode): Handle DI, SI, HI modes.
gcc/testsuite/
PR target/121749
* gcc.target/aarch64/simd/pr121749.c: New test.
-rw-r--r-- | gcc/config/aarch64/aarch64-simd.md | 22 | ||||
-rw-r--r-- | gcc/config/aarch64/iterators.md | 3 | ||||
-rw-r--r-- | gcc/testsuite/gcc.target/aarch64/simd/pr121749.c | 11 |
3 files changed, 24 insertions, 12 deletions
diff --git a/gcc/config/aarch64/aarch64-simd.md b/gcc/config/aarch64/aarch64-simd.md index 8b75c3d..c111dc2 100644 --- a/gcc/config/aarch64/aarch64-simd.md +++ b/gcc/config/aarch64/aarch64-simd.md @@ -6731,7 +6731,7 @@ (SAT_TRUNC:<VNARROWQ> (<TRUNC_SHIFT>:SD_HSDI (match_operand:SD_HSDI 1 "register_operand" "w") - (match_operand:SI 2 "aarch64_simd_shift_imm_offset_<ve_mode>"))))] + (match_operand:SI 2 "aarch64_simd_shift_imm_offset_<vn_mode>"))))] "TARGET_SIMD" "<shrn_op>shrn\t%<vn2>0<Vmntype>, %<v>1<Vmtype>, %2" [(set_attr "type" "neon_shift_imm_narrow_q")] @@ -6753,7 +6753,7 @@ (ALL_TRUNC:<VNARROWQ> (<TRUNC_SHIFT>:VQN (match_operand:VQN 1 "register_operand") - (match_operand:SI 2 "aarch64_simd_shift_imm_offset_<ve_mode>"))))] + (match_operand:SI 2 "aarch64_simd_shift_imm_offset_<vn_mode>"))))] "TARGET_SIMD" { operands[2] = aarch64_simd_gen_const_vector_dup (<MODE>mode, @@ -6784,7 +6784,7 @@ (<TRUNCEXTEND>:<DWI> (match_operand:SD_HSDI 1 "register_operand" "w")) (match_operand:<DWI> 3 "aarch64_int_rnd_operand")) - (match_operand:SI 2 "aarch64_simd_shift_imm_offset_<ve_mode>"))))] + (match_operand:SI 2 "aarch64_simd_shift_imm_offset_<vn_mode>"))))] "TARGET_SIMD && aarch64_const_vec_rnd_cst_p (operands[3], operands[2])" "<shrn_op>rshrn\t%<vn2>0<Vmntype>, %<v>1<Vmtype>, %2" @@ -6799,7 +6799,7 @@ (<TRUNCEXTEND>:<V2XWIDE> (match_operand:SD_HSDI 1 "register_operand")) (match_dup 3)) - (match_operand:SI 2 "aarch64_simd_shift_imm_offset_<ve_mode>"))))] + (match_operand:SI 2 "aarch64_simd_shift_imm_offset_<vn_mode>"))))] "TARGET_SIMD" { /* Use this expander to create the rounding constant vector, which is @@ -6819,7 +6819,7 @@ (<TRUNCEXTEND>:<V2XWIDE> (match_operand:VQN 1 "register_operand")) (match_dup 3)) - (match_operand:SI 2 "aarch64_simd_shift_imm_offset_<ve_mode>"))))] + (match_operand:SI 2 "aarch64_simd_shift_imm_offset_<vn_mode>"))))] "TARGET_SIMD" { if (<CODE> == TRUNCATE @@ -6861,7 +6861,7 @@ (smax:SD_HSDI (ashiftrt:SD_HSDI (match_operand:SD_HSDI 1 "register_operand" "w") - (match_operand:SI 2 "aarch64_simd_shift_imm_offset_<ve_mode>")) + (match_operand:SI 2 "aarch64_simd_shift_imm_offset_<vn_mode>")) (const_int 0)) (const_int <half_mask>)))] "TARGET_SIMD" @@ -6872,7 +6872,7 @@ (define_expand "aarch64_sqshrun_n<mode>" [(match_operand:<VNARROWQ> 0 "register_operand") (match_operand:SD_HSDI 1 "register_operand") - (match_operand:SI 2 "aarch64_simd_shift_imm_offset_<ve_mode>")] + (match_operand:SI 2 "aarch64_simd_shift_imm_offset_<vn_mode>")] "TARGET_SIMD" { rtx dst = gen_reg_rtx (<MODE>mode); @@ -6890,7 +6890,7 @@ (smax:VQN (ashiftrt:VQN (match_operand:VQN 1 "register_operand") - (match_operand:SI 2 "aarch64_simd_shift_imm_offset_<ve_mode>")) + (match_operand:SI 2 "aarch64_simd_shift_imm_offset_<vn_mode>")) (match_dup 3)) (match_dup 4))))] "TARGET_SIMD" @@ -6932,7 +6932,7 @@ (sign_extend:<DWI> (match_operand:SD_HSDI 1 "register_operand" "w")) (match_operand:<DWI> 3 "aarch64_int_rnd_operand")) - (match_operand:SI 2 "aarch64_simd_shift_imm_offset_<ve_mode>")) + (match_operand:SI 2 "aarch64_simd_shift_imm_offset_<vn_mode>")) (const_int 0)) (const_int <half_mask>)))] "TARGET_SIMD @@ -6944,7 +6944,7 @@ (define_expand "aarch64_sqrshrun_n<mode>" [(match_operand:<VNARROWQ> 0 "register_operand") (match_operand:SD_HSDI 1 "register_operand") - (match_operand:SI 2 "aarch64_simd_shift_imm_offset_<ve_mode>")] + (match_operand:SI 2 "aarch64_simd_shift_imm_offset_<vn_mode>")] "TARGET_SIMD" { int prec = GET_MODE_UNIT_PRECISION (<DWI>mode); @@ -6967,7 +6967,7 @@ (sign_extend:<V2XWIDE> (match_operand:VQN 1 "register_operand")) (match_dup 3)) - (match_operand:SI 2 "aarch64_simd_shift_imm_offset_<ve_mode>")) + (match_operand:SI 2 "aarch64_simd_shift_imm_offset_<vn_mode>")) (match_dup 4)) (match_dup 5))))] "TARGET_SIMD" diff --git a/gcc/config/aarch64/iterators.md b/gcc/config/aarch64/iterators.md index b15e578..7a6ea0d 100644 --- a/gcc/config/aarch64/iterators.md +++ b/gcc/config/aarch64/iterators.md @@ -2205,7 +2205,8 @@ (SI "si")]) ;; Like ve_mode but for the half-width modes. -(define_mode_attr vn_mode [(V8HI "qi") (V4SI "hi") (V2DI "si")]) +(define_mode_attr vn_mode [(V8HI "qi") (V4SI "hi") (V2DI "si") (DI "si") + (SI "hi") (HI "qi")]) ;; Vm for lane instructions is restricted to FP_LO_REGS. (define_mode_attr vwx [(V4HI "x") (V8HI "x") (HI "x") diff --git a/gcc/testsuite/gcc.target/aarch64/simd/pr121749.c b/gcc/testsuite/gcc.target/aarch64/simd/pr121749.c new file mode 100644 index 0000000..c4e1a2d --- /dev/null +++ b/gcc/testsuite/gcc.target/aarch64/simd/pr121749.c @@ -0,0 +1,11 @@ +/* { dg-do compile } */ +/* { dg-options "-O2" } */ + +#include <arm_neon.h> + +uint32_t +foo (uint64_t v0) +{ + return vqshrnd_n_u64 (vshrd_n_u64 (v0, 26), 7); +} + |