aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorKyrylo Tkachov <kyrylo.tkachov@arm.com>2023-04-26 15:10:18 +0100
committerKyrylo Tkachov <kyrylo.tkachov@arm.com>2023-04-26 15:10:18 +0100
commit80afac3121778e509e1847be90a1999236cff03a (patch)
treef68eb8add978190c840e4c503b7bfe233020467b
parentbd0791e899ea542deb6b3821b0a1d435d9754d5c (diff)
downloadgcc-80afac3121778e509e1847be90a1999236cff03a.zip
gcc-80afac3121778e509e1847be90a1999236cff03a.tar.gz
gcc-80afac3121778e509e1847be90a1999236cff03a.tar.bz2
aarch64: Reimplement RSHRN intrinsic patterns with standard RTL codes
This patch reimplements the backend patterns for the rshrn intrinsics using standard RTL codes rather than UNSPECS. We already represent shrn as truncate of a shift. rshrn can be represented as truncate (src + (1 << (shft - 1)) >> shft), similar to how LLVM treats it. I have a follow-up patch to do the same for the rshrn2 pattern, which will allow us to remove the UNSPEC_RSHRN entirely. Bootstrapped and tested on aarch64-none-linux-gnu. gcc/ChangeLog: * config/aarch64/aarch64-simd.md (aarch64_rshrn<mode>_insn_le): Reimplement with standard RTL codes instead of an UNSPEC. (aarch64_rshrn<mode>_insn_be): Likewise. (aarch64_rshrn<mode>): Adjust for the above. * config/aarch64/predicates.md (aarch64_simd_rshrn_imm_vec): Define.
-rw-r--r--gcc/config/aarch64/aarch64-simd.md36
-rw-r--r--gcc/config/aarch64/predicates.md6
2 files changed, 30 insertions, 12 deletions
diff --git a/gcc/config/aarch64/aarch64-simd.md b/gcc/config/aarch64/aarch64-simd.md
index cb2223d..f891310 100644
--- a/gcc/config/aarch64/aarch64-simd.md
+++ b/gcc/config/aarch64/aarch64-simd.md
@@ -1990,11 +1990,15 @@
(define_insn "aarch64_rshrn<mode>_insn_le"
[(set (match_operand:<VNARROWQ2> 0 "register_operand" "=w")
(vec_concat:<VNARROWQ2>
- (unspec:<VNARROWQ> [(match_operand:VQN 1 "register_operand" "w")
- (match_operand:VQN 2
- "aarch64_simd_shift_imm_vec_<vn_mode>")] UNSPEC_RSHRN)
- (match_operand:<VNARROWQ> 3 "aarch64_simd_or_scalar_imm_zero")))]
- "TARGET_SIMD && !BYTES_BIG_ENDIAN"
+ (truncate:<VNARROWQ>
+ (lshiftrt:VQN
+ (plus:VQN (match_operand:VQN 1 "register_operand" "w")
+ (match_operand:VQN 3 "aarch64_simd_rshrn_imm_vec"))
+ (match_operand:VQN 2 "aarch64_simd_shift_imm_vec_<vn_mode>")))
+ (match_operand:<VNARROWQ> 4 "aarch64_simd_or_scalar_imm_zero")))]
+ "TARGET_SIMD && !BYTES_BIG_ENDIAN
+ && INTVAL (CONST_VECTOR_ELT (operands[3], 0))
+ == (HOST_WIDE_INT_1 << (INTVAL (CONST_VECTOR_ELT (operands[2], 0)) - 1))"
"rshrn\\t%0.<Vntype>, %1.<Vtype>, %2"
[(set_attr "type" "neon_shift_imm_narrow_q")]
)
@@ -2002,11 +2006,15 @@
(define_insn "aarch64_rshrn<mode>_insn_be"
[(set (match_operand:<VNARROWQ2> 0 "register_operand" "=w")
(vec_concat:<VNARROWQ2>
- (match_operand:<VNARROWQ> 3 "aarch64_simd_or_scalar_imm_zero")
- (unspec:<VNARROWQ> [(match_operand:VQN 1 "register_operand" "w")
- (match_operand:VQN 2 "aarch64_simd_shift_imm_vec_<vn_mode>")]
- UNSPEC_RSHRN)))]
- "TARGET_SIMD && BYTES_BIG_ENDIAN"
+ (match_operand:<VNARROWQ> 4 "aarch64_simd_or_scalar_imm_zero")
+ (truncate:<VNARROWQ>
+ (lshiftrt:VQN
+ (plus:VQN (match_operand:VQN 1 "register_operand" "w")
+ (match_operand:VQN 3 "aarch64_simd_rshrn_imm_vec"))
+ (match_operand:VQN 2 "aarch64_simd_shift_imm_vec_<vn_mode>")))))]
+ "TARGET_SIMD && BYTES_BIG_ENDIAN
+ && INTVAL (CONST_VECTOR_ELT (operands[3], 0))
+ == (HOST_WIDE_INT_1 << (INTVAL (CONST_VECTOR_ELT (operands[2], 0)) - 1))"
"rshrn\\t%0.<Vntype>, %1.<Vtype>, %2"
[(set_attr "type" "neon_shift_imm_narrow_q")]
)
@@ -2024,18 +2032,22 @@
}
else
{
+ rtx shft
+ = aarch64_simd_gen_const_vector_dup (<MODE>mode,
+ HOST_WIDE_INT_1U
+ << (INTVAL (operands[2]) - 1));
rtx tmp = gen_reg_rtx (<VNARROWQ2>mode);
operands[2] = aarch64_simd_gen_const_vector_dup (<MODE>mode,
INTVAL (operands[2]));
if (BYTES_BIG_ENDIAN)
emit_insn (
gen_aarch64_rshrn<mode>_insn_be (tmp, operands[1],
- operands[2],
+ operands[2], shft,
CONST0_RTX (<VNARROWQ>mode)));
else
emit_insn (
gen_aarch64_rshrn<mode>_insn_le (tmp, operands[1],
- operands[2],
+ operands[2], shft,
CONST0_RTX (<VNARROWQ>mode)));
/* The intrinsic expects a narrow result, so emit a subreg that will
diff --git a/gcc/config/aarch64/predicates.md b/gcc/config/aarch64/predicates.md
index 3f5f4df..242f10a 100644
--- a/gcc/config/aarch64/predicates.md
+++ b/gcc/config/aarch64/predicates.md
@@ -587,6 +587,12 @@
(and (match_code "const_vector")
(match_test "aarch64_const_vec_all_same_in_range_p (op, 1, 64)")))
+(define_predicate "aarch64_simd_rshrn_imm_vec"
+ (and (match_code "const_vector")
+ (match_test "aarch64_const_vec_all_same_in_range_p (op, 1,
+ HOST_WIDE_INT_1U
+ << (GET_MODE_UNIT_BITSIZE (mode) - 1))")))
+
(define_predicate "aarch64_simd_shift_imm_bitsize_qi"
(and (match_code "const_int")
(match_test "IN_RANGE (INTVAL (op), 0, 8)")))