aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorJonathan Wright <jonathan.wright@arm.com>2021-05-14 17:18:34 +0100
committerJonathan Wright <jonathan.wright@arm.com>2021-05-19 14:44:39 +0100
commitddbdb9a384f53419d0e6fbcca2a4534a2668e5f8 (patch)
tree5d00891d47d407c5c7e88a8ea3e92c49a86e2ad1
parent778ac63fe244b63380bd3b2dee4d20ff27332bce (diff)
downloadgcc-ddbdb9a384f53419d0e6fbcca2a4534a2668e5f8.zip
gcc-ddbdb9a384f53419d0e6fbcca2a4534a2668e5f8.tar.gz
gcc-ddbdb9a384f53419d0e6fbcca2a4534a2668e5f8.tar.bz2
aarch64: Refactor aarch64_<sur>q<r>shr<u>n_n<mode> RTL pattern
Split the aarch64_<sur>q<r>shr<u>n_n<mode> pattern into separate scalar and vector variants. Further split the vector pattern into big/little endian variants that model the zero-high-half semantics of the underlying instruction - allowing for more combinations with the write-to-high-half variant (aarch64_<sur>q<r>shr<u>n2_n<mode>.) gcc/ChangeLog: 2021-05-14 Jonathan Wright <jonathan.wright@arm.com> * config/aarch64/aarch64-simd-builtins.def: Split builtin generation for aarch64_<sur>q<r>shr<u>n_n<mode> pattern into separate scalar and vector generators. * config/aarch64/aarch64-simd.md (aarch64_<sur>q<r>shr<u>n_n<mode>): Define as an expander and split into... (aarch64_<sur>q<r>shr<u>n_n<mode>_insn_le): This and... (aarch64_<sur>q<r>shr<u>n_n<mode>_insn_be): This. * config/aarch64/iterators.md: Define SD_HSDI iterator.
-rw-r--r--gcc/config/aarch64/aarch64-simd-builtins.def18
-rw-r--r--gcc/config/aarch64/aarch64-simd.md54
-rw-r--r--gcc/config/aarch64/iterators.md3
3 files changed, 68 insertions, 7 deletions
diff --git a/gcc/config/aarch64/aarch64-simd-builtins.def b/gcc/config/aarch64/aarch64-simd-builtins.def
index 1e81bb5..18baa67 100644
--- a/gcc/config/aarch64/aarch64-simd-builtins.def
+++ b/gcc/config/aarch64/aarch64-simd-builtins.def
@@ -421,12 +421,18 @@
BUILTIN_VQW (SHIFTIMM, sshll2_n, 0, NONE)
BUILTIN_VQW (SHIFTIMM, ushll2_n, 0, NONE)
/* Implemented by aarch64_<sur>q<r>shr<u>n_n<mode>. */
- BUILTIN_VSQN_HSDI (SHIFTIMM, sqshrun_n, 0, NONE)
- BUILTIN_VSQN_HSDI (SHIFTIMM, sqrshrun_n, 0, NONE)
- BUILTIN_VSQN_HSDI (SHIFTIMM, sqshrn_n, 0, NONE)
- BUILTIN_VSQN_HSDI (USHIFTIMM, uqshrn_n, 0, NONE)
- BUILTIN_VSQN_HSDI (SHIFTIMM, sqrshrn_n, 0, NONE)
- BUILTIN_VSQN_HSDI (USHIFTIMM, uqrshrn_n, 0, NONE)
+ BUILTIN_VQN (SHIFTIMM, sqshrun_n, 0, NONE)
+ BUILTIN_VQN (SHIFTIMM, sqrshrun_n, 0, NONE)
+ BUILTIN_VQN (SHIFTIMM, sqshrn_n, 0, NONE)
+ BUILTIN_VQN (USHIFTIMM, uqshrn_n, 0, NONE)
+ BUILTIN_VQN (SHIFTIMM, sqrshrn_n, 0, NONE)
+ BUILTIN_VQN (USHIFTIMM, uqrshrn_n, 0, NONE)
+ BUILTIN_SD_HSDI (SHIFTIMM, sqshrun_n, 0, NONE)
+ BUILTIN_SD_HSDI (SHIFTIMM, sqrshrun_n, 0, NONE)
+ BUILTIN_SD_HSDI (SHIFTIMM, sqshrn_n, 0, NONE)
+ BUILTIN_SD_HSDI (USHIFTIMM, uqshrn_n, 0, NONE)
+ BUILTIN_SD_HSDI (SHIFTIMM, sqrshrn_n, 0, NONE)
+ BUILTIN_SD_HSDI (USHIFTIMM, uqrshrn_n, 0, NONE)
/* Implemented by aarch64_<sur>q<r>shr<u>n2_n<mode>. */
BUILTIN_VQN (SHIFT2IMM_UUSS, sqshrun2_n, 0, NONE)
BUILTIN_VQN (SHIFT2IMM_UUSS, sqrshrun2_n, 0, NONE)
diff --git a/gcc/config/aarch64/aarch64-simd.md b/gcc/config/aarch64/aarch64-simd.md
index 7952309..c67fa3f 100644
--- a/gcc/config/aarch64/aarch64-simd.md
+++ b/gcc/config/aarch64/aarch64-simd.md
@@ -6045,7 +6045,7 @@
(define_insn "aarch64_<sur>q<r>shr<u>n_n<mode>"
[(set (match_operand:<VNARROWQ> 0 "register_operand" "=w")
- (unspec:<VNARROWQ> [(match_operand:VSQN_HSDI 1 "register_operand" "w")
+ (unspec:<VNARROWQ> [(match_operand:SD_HSDI 1 "register_operand" "w")
(match_operand:SI 2
"aarch64_simd_shift_imm_offset_<ve_mode>" "i")]
VQSHRN_N))]
@@ -6054,6 +6054,58 @@
[(set_attr "type" "neon_sat_shift_imm_narrow_q")]
)
+(define_insn "aarch64_<sur>q<r>shr<u>n_n<mode>_insn_le"
+ [(set (match_operand:<VNARROWQ2> 0 "register_operand" "=w")
+ (vec_concat:<VNARROWQ2>
+ (unspec:<VNARROWQ>
+ [(match_operand:VQN 1 "register_operand" "w")
+ (match_operand:VQN 2 "aarch64_simd_shift_imm_vec_<vn_mode>")]
+ VQSHRN_N)
+ (match_operand:<VNARROWQ> 3 "aarch64_simd_or_scalar_imm_zero")))]
+ "TARGET_SIMD && !BYTES_BIG_ENDIAN"
+ "<sur>q<r>shr<u>n\\t%<vn2>0<Vmntype>, %<v>1<Vmtype>, %2"
+ [(set_attr "type" "neon_shift_imm_narrow_q")]
+)
+
+(define_insn "aarch64_<sur>q<r>shr<u>n_n<mode>_insn_be"
+ [(set (match_operand:<VNARROWQ2> 0 "register_operand" "=w")
+ (vec_concat:<VNARROWQ2>
+ (match_operand:<VNARROWQ> 3 "aarch64_simd_or_scalar_imm_zero")
+ (unspec:<VNARROWQ>
+ [(match_operand:VQN 1 "register_operand" "w")
+ (match_operand:VQN 2 "aarch64_simd_shift_imm_vec_<vn_mode>")]
+ VQSHRN_N)))]
+ "TARGET_SIMD && BYTES_BIG_ENDIAN"
+ "<sur>q<r>shr<u>n\\t%<vn2>0<Vmntype>, %<v>1<Vmtype>, %2"
+ [(set_attr "type" "neon_shift_imm_narrow_q")]
+)
+
+(define_expand "aarch64_<sur>q<r>shr<u>n_n<mode>"
+ [(set (match_operand:<VNARROWQ> 0 "register_operand" "=w")
+ (unspec:<VNARROWQ> [(match_operand:VQN 1 "register_operand")
+ (match_operand:SI 2
+ "aarch64_simd_shift_imm_offset_<ve_mode>")]
+ VQSHRN_N))]
+ "TARGET_SIMD"
+ {
+ operands[2] = aarch64_simd_gen_const_vector_dup (<MODE>mode,
+ INTVAL (operands[2]));
+ rtx tmp = gen_reg_rtx (<VNARROWQ2>mode);
+ if (BYTES_BIG_ENDIAN)
+ emit_insn (gen_aarch64_<sur>q<r>shr<u>n_n<mode>_insn_be (tmp,
+ operands[1], operands[2], CONST0_RTX (<VNARROWQ>mode)));
+ else
+ emit_insn (gen_aarch64_<sur>q<r>shr<u>n_n<mode>_insn_le (tmp,
+ operands[1], operands[2], CONST0_RTX (<VNARROWQ>mode)));
+
+ /* The intrinsic expects a narrow result, so emit a subreg that will get
+ optimized away as appropriate. */
+ emit_move_insn (operands[0], lowpart_subreg (<VNARROWQ>mode, tmp,
+ <VNARROWQ2>mode));
+ DONE;
+ }
+)
+
(define_insn "aarch64_<sur>q<r>shr<u>n2_n<mode>_insn_le"
[(set (match_operand:<VNARROWQ2> 0 "register_operand" "=w")
(vec_concat:<VNARROWQ2>
diff --git a/gcc/config/aarch64/iterators.md b/gcc/config/aarch64/iterators.md
index 0ec93b0..e9047d0 100644
--- a/gcc/config/aarch64/iterators.md
+++ b/gcc/config/aarch64/iterators.md
@@ -282,6 +282,9 @@
;; Scalar 64-bit container: 16, 32-bit integer modes
(define_mode_iterator SD_HSI [HI SI])
+;; Scalar 64-bit container: 16-bit, 32-bit and 64-bit integer modes.
+(define_mode_iterator SD_HSDI [HI SI DI])
+
;; Advanced SIMD 64-bit container: 16, 32-bit integer modes.
(define_mode_iterator VQ_HSI [V8HI V4SI])