aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorKyrylo Tkachov <ktkachov@nvidia.com>2025-09-02 00:43:14 -0700
committerKyrylo Tkachov <ktkachov@nvidia.com>2025-09-03 17:21:35 +0200
commitcb508e54140687a50790059fac548d87515df6be (patch)
tree47a793590898f44a6b2a9b257158f7e10ffa62bc
parent3e2077d8c7a0acba2d54bd0666ae578fe114cd72 (diff)
downloadgcc-cb508e54140687a50790059fac548d87515df6be.zip
gcc-cb508e54140687a50790059fac548d87515df6be.tar.gz
gcc-cb508e54140687a50790059fac548d87515df6be.tar.bz2
aarch64: PR target/121749: Use correct predicate for narrowing shift amounts
With g:d20b2ad845876eec0ee80a3933ad49f9f6c4ee30 the narrowing shift instructions are now represented with standard RTL and more merging optimisations occur. This exposed a wrong predicate for the shift amount operand. The shift amount is the number of bits of the narrow destination, not the input sources. Correct this by using the vn_mode attribute when specifying the predicate, which exists for this purpose. I've spotted a few more narrowing shift patterns that need the restriction, so they are updated as well. Bootstrapped and tested on aarch64-none-linux-gnu. Signed-off-by: Kyrylo Tkachov <ktkachov@nvidia.com> gcc/ PR target/121749 * config/aarch64/aarch64-simd.md (aarch64_<shrn_op>shrn_n<mode>): Use aarch64_simd_shift_imm_offset_<vn_mode> instead of aarch64_simd_shift_imm_offset_<ve_mode> predicate. (aarch64_<shrn_op>shrn_n<mode> VQN define_expand): Likewise. (*aarch64_<shrn_op>rshrn_n<mode>_insn): Likewise. (aarch64_<shrn_op>rshrn_n<mode>): Likewise. (aarch64_<shrn_op>rshrn_n<mode> VQN define_expand): Likewise. (aarch64_sqshrun_n<mode>_insn): Likewise. (aarch64_sqshrun_n<mode>): Likewise. (aarch64_sqshrun_n<mode> VQN define_expand): Likewise. (aarch64_sqrshrun_n<mode>_insn): Likewise. (aarch64_sqrshrun_n<mode>): Likewise. (aarch64_sqrshrun_n<mode>): Likewise. * config/aarch64/iterators.md (vn_mode): Handle DI, SI, HI modes. gcc/testsuite/ PR target/121749 * gcc.target/aarch64/simd/pr121749.c: New test.
-rw-r--r--gcc/config/aarch64/aarch64-simd.md22
-rw-r--r--gcc/config/aarch64/iterators.md3
-rw-r--r--gcc/testsuite/gcc.target/aarch64/simd/pr121749.c11
3 files changed, 24 insertions, 12 deletions
diff --git a/gcc/config/aarch64/aarch64-simd.md b/gcc/config/aarch64/aarch64-simd.md
index 8b75c3d..c111dc2 100644
--- a/gcc/config/aarch64/aarch64-simd.md
+++ b/gcc/config/aarch64/aarch64-simd.md
@@ -6731,7 +6731,7 @@
(SAT_TRUNC:<VNARROWQ>
(<TRUNC_SHIFT>:SD_HSDI
(match_operand:SD_HSDI 1 "register_operand" "w")
- (match_operand:SI 2 "aarch64_simd_shift_imm_offset_<ve_mode>"))))]
+ (match_operand:SI 2 "aarch64_simd_shift_imm_offset_<vn_mode>"))))]
"TARGET_SIMD"
"<shrn_op>shrn\t%<vn2>0<Vmntype>, %<v>1<Vmtype>, %2"
[(set_attr "type" "neon_shift_imm_narrow_q")]
@@ -6753,7 +6753,7 @@
(ALL_TRUNC:<VNARROWQ>
(<TRUNC_SHIFT>:VQN
(match_operand:VQN 1 "register_operand")
- (match_operand:SI 2 "aarch64_simd_shift_imm_offset_<ve_mode>"))))]
+ (match_operand:SI 2 "aarch64_simd_shift_imm_offset_<vn_mode>"))))]
"TARGET_SIMD"
{
operands[2] = aarch64_simd_gen_const_vector_dup (<MODE>mode,
@@ -6784,7 +6784,7 @@
(<TRUNCEXTEND>:<DWI>
(match_operand:SD_HSDI 1 "register_operand" "w"))
(match_operand:<DWI> 3 "aarch64_int_rnd_operand"))
- (match_operand:SI 2 "aarch64_simd_shift_imm_offset_<ve_mode>"))))]
+ (match_operand:SI 2 "aarch64_simd_shift_imm_offset_<vn_mode>"))))]
"TARGET_SIMD
&& aarch64_const_vec_rnd_cst_p (operands[3], operands[2])"
"<shrn_op>rshrn\t%<vn2>0<Vmntype>, %<v>1<Vmtype>, %2"
@@ -6799,7 +6799,7 @@
(<TRUNCEXTEND>:<V2XWIDE>
(match_operand:SD_HSDI 1 "register_operand"))
(match_dup 3))
- (match_operand:SI 2 "aarch64_simd_shift_imm_offset_<ve_mode>"))))]
+ (match_operand:SI 2 "aarch64_simd_shift_imm_offset_<vn_mode>"))))]
"TARGET_SIMD"
{
/* Use this expander to create the rounding constant vector, which is
@@ -6819,7 +6819,7 @@
(<TRUNCEXTEND>:<V2XWIDE>
(match_operand:VQN 1 "register_operand"))
(match_dup 3))
- (match_operand:SI 2 "aarch64_simd_shift_imm_offset_<ve_mode>"))))]
+ (match_operand:SI 2 "aarch64_simd_shift_imm_offset_<vn_mode>"))))]
"TARGET_SIMD"
{
if (<CODE> == TRUNCATE
@@ -6861,7 +6861,7 @@
(smax:SD_HSDI
(ashiftrt:SD_HSDI
(match_operand:SD_HSDI 1 "register_operand" "w")
- (match_operand:SI 2 "aarch64_simd_shift_imm_offset_<ve_mode>"))
+ (match_operand:SI 2 "aarch64_simd_shift_imm_offset_<vn_mode>"))
(const_int 0))
(const_int <half_mask>)))]
"TARGET_SIMD"
@@ -6872,7 +6872,7 @@
(define_expand "aarch64_sqshrun_n<mode>"
[(match_operand:<VNARROWQ> 0 "register_operand")
(match_operand:SD_HSDI 1 "register_operand")
- (match_operand:SI 2 "aarch64_simd_shift_imm_offset_<ve_mode>")]
+ (match_operand:SI 2 "aarch64_simd_shift_imm_offset_<vn_mode>")]
"TARGET_SIMD"
{
rtx dst = gen_reg_rtx (<MODE>mode);
@@ -6890,7 +6890,7 @@
(smax:VQN
(ashiftrt:VQN
(match_operand:VQN 1 "register_operand")
- (match_operand:SI 2 "aarch64_simd_shift_imm_offset_<ve_mode>"))
+ (match_operand:SI 2 "aarch64_simd_shift_imm_offset_<vn_mode>"))
(match_dup 3))
(match_dup 4))))]
"TARGET_SIMD"
@@ -6932,7 +6932,7 @@
(sign_extend:<DWI>
(match_operand:SD_HSDI 1 "register_operand" "w"))
(match_operand:<DWI> 3 "aarch64_int_rnd_operand"))
- (match_operand:SI 2 "aarch64_simd_shift_imm_offset_<ve_mode>"))
+ (match_operand:SI 2 "aarch64_simd_shift_imm_offset_<vn_mode>"))
(const_int 0))
(const_int <half_mask>)))]
"TARGET_SIMD
@@ -6944,7 +6944,7 @@
(define_expand "aarch64_sqrshrun_n<mode>"
[(match_operand:<VNARROWQ> 0 "register_operand")
(match_operand:SD_HSDI 1 "register_operand")
- (match_operand:SI 2 "aarch64_simd_shift_imm_offset_<ve_mode>")]
+ (match_operand:SI 2 "aarch64_simd_shift_imm_offset_<vn_mode>")]
"TARGET_SIMD"
{
int prec = GET_MODE_UNIT_PRECISION (<DWI>mode);
@@ -6967,7 +6967,7 @@
(sign_extend:<V2XWIDE>
(match_operand:VQN 1 "register_operand"))
(match_dup 3))
- (match_operand:SI 2 "aarch64_simd_shift_imm_offset_<ve_mode>"))
+ (match_operand:SI 2 "aarch64_simd_shift_imm_offset_<vn_mode>"))
(match_dup 4))
(match_dup 5))))]
"TARGET_SIMD"
diff --git a/gcc/config/aarch64/iterators.md b/gcc/config/aarch64/iterators.md
index b15e578..7a6ea0d 100644
--- a/gcc/config/aarch64/iterators.md
+++ b/gcc/config/aarch64/iterators.md
@@ -2205,7 +2205,8 @@
(SI "si")])
;; Like ve_mode but for the half-width modes.
-(define_mode_attr vn_mode [(V8HI "qi") (V4SI "hi") (V2DI "si")])
+(define_mode_attr vn_mode [(V8HI "qi") (V4SI "hi") (V2DI "si") (DI "si")
+ (SI "hi") (HI "qi")])
;; Vm for lane instructions is restricted to FP_LO_REGS.
(define_mode_attr vwx [(V4HI "x") (V8HI "x") (HI "x")
diff --git a/gcc/testsuite/gcc.target/aarch64/simd/pr121749.c b/gcc/testsuite/gcc.target/aarch64/simd/pr121749.c
new file mode 100644
index 0000000..c4e1a2d
--- /dev/null
+++ b/gcc/testsuite/gcc.target/aarch64/simd/pr121749.c
@@ -0,0 +1,11 @@
+/* { dg-do compile } */
+/* { dg-options "-O2" } */
+
+#include <arm_neon.h>
+
+uint32_t
+foo (uint64_t v0)
+{
+ return vqshrnd_n_u64 (vshrd_n_u64 (v0, 26), 7);
+}
+