diff options
author | Jonathan Wright <jonathan.wright@arm.com> | 2021-05-14 17:18:34 +0100 |
---|---|---|
committer | Jonathan Wright <jonathan.wright@arm.com> | 2021-05-19 14:44:39 +0100 |
commit | ddbdb9a384f53419d0e6fbcca2a4534a2668e5f8 (patch) | |
tree | 5d00891d47d407c5c7e88a8ea3e92c49a86e2ad1 | |
parent | 778ac63fe244b63380bd3b2dee4d20ff27332bce (diff) | |
download | gcc-ddbdb9a384f53419d0e6fbcca2a4534a2668e5f8.zip gcc-ddbdb9a384f53419d0e6fbcca2a4534a2668e5f8.tar.gz gcc-ddbdb9a384f53419d0e6fbcca2a4534a2668e5f8.tar.bz2 |
aarch64: Refactor aarch64_<sur>q<r>shr<u>n_n<mode> RTL pattern
Split the aarch64_<sur>q<r>shr<u>n_n<mode> pattern into separate
scalar and vector variants. Further split the vector pattern into
big/little endian variants that model the zero-high-half semantics
of the underlying instruction - allowing for more combinations with
the write-to-high-half variant (aarch64_<sur>q<r>shr<u>n2_n<mode>.)
gcc/ChangeLog:
2021-05-14 Jonathan Wright <jonathan.wright@arm.com>
* config/aarch64/aarch64-simd-builtins.def: Split builtin
generation for aarch64_<sur>q<r>shr<u>n_n<mode> pattern into
separate scalar and vector generators.
* config/aarch64/aarch64-simd.md
(aarch64_<sur>q<r>shr<u>n_n<mode>): Define as an expander and
split into...
(aarch64_<sur>q<r>shr<u>n_n<mode>_insn_le): This and...
(aarch64_<sur>q<r>shr<u>n_n<mode>_insn_be): This.
* config/aarch64/iterators.md: Define SD_HSDI iterator.
-rw-r--r-- | gcc/config/aarch64/aarch64-simd-builtins.def | 18 | ||||
-rw-r--r-- | gcc/config/aarch64/aarch64-simd.md | 54 | ||||
-rw-r--r-- | gcc/config/aarch64/iterators.md | 3 |
3 files changed, 68 insertions, 7 deletions
diff --git a/gcc/config/aarch64/aarch64-simd-builtins.def b/gcc/config/aarch64/aarch64-simd-builtins.def index 1e81bb5..18baa67 100644 --- a/gcc/config/aarch64/aarch64-simd-builtins.def +++ b/gcc/config/aarch64/aarch64-simd-builtins.def @@ -421,12 +421,18 @@ BUILTIN_VQW (SHIFTIMM, sshll2_n, 0, NONE) BUILTIN_VQW (SHIFTIMM, ushll2_n, 0, NONE) /* Implemented by aarch64_<sur>q<r>shr<u>n_n<mode>. */ - BUILTIN_VSQN_HSDI (SHIFTIMM, sqshrun_n, 0, NONE) - BUILTIN_VSQN_HSDI (SHIFTIMM, sqrshrun_n, 0, NONE) - BUILTIN_VSQN_HSDI (SHIFTIMM, sqshrn_n, 0, NONE) - BUILTIN_VSQN_HSDI (USHIFTIMM, uqshrn_n, 0, NONE) - BUILTIN_VSQN_HSDI (SHIFTIMM, sqrshrn_n, 0, NONE) - BUILTIN_VSQN_HSDI (USHIFTIMM, uqrshrn_n, 0, NONE) + BUILTIN_VQN (SHIFTIMM, sqshrun_n, 0, NONE) + BUILTIN_VQN (SHIFTIMM, sqrshrun_n, 0, NONE) + BUILTIN_VQN (SHIFTIMM, sqshrn_n, 0, NONE) + BUILTIN_VQN (USHIFTIMM, uqshrn_n, 0, NONE) + BUILTIN_VQN (SHIFTIMM, sqrshrn_n, 0, NONE) + BUILTIN_VQN (USHIFTIMM, uqrshrn_n, 0, NONE) + BUILTIN_SD_HSDI (SHIFTIMM, sqshrun_n, 0, NONE) + BUILTIN_SD_HSDI (SHIFTIMM, sqrshrun_n, 0, NONE) + BUILTIN_SD_HSDI (SHIFTIMM, sqshrn_n, 0, NONE) + BUILTIN_SD_HSDI (USHIFTIMM, uqshrn_n, 0, NONE) + BUILTIN_SD_HSDI (SHIFTIMM, sqrshrn_n, 0, NONE) + BUILTIN_SD_HSDI (USHIFTIMM, uqrshrn_n, 0, NONE) /* Implemented by aarch64_<sur>q<r>shr<u>n2_n<mode>. */ BUILTIN_VQN (SHIFT2IMM_UUSS, sqshrun2_n, 0, NONE) BUILTIN_VQN (SHIFT2IMM_UUSS, sqrshrun2_n, 0, NONE) diff --git a/gcc/config/aarch64/aarch64-simd.md b/gcc/config/aarch64/aarch64-simd.md index 7952309..c67fa3f 100644 --- a/gcc/config/aarch64/aarch64-simd.md +++ b/gcc/config/aarch64/aarch64-simd.md @@ -6045,7 +6045,7 @@ (define_insn "aarch64_<sur>q<r>shr<u>n_n<mode>" [(set (match_operand:<VNARROWQ> 0 "register_operand" "=w") - (unspec:<VNARROWQ> [(match_operand:VSQN_HSDI 1 "register_operand" "w") + (unspec:<VNARROWQ> [(match_operand:SD_HSDI 1 "register_operand" "w") (match_operand:SI 2 "aarch64_simd_shift_imm_offset_<ve_mode>" "i")] VQSHRN_N))] @@ -6054,6 +6054,58 @@ [(set_attr "type" "neon_sat_shift_imm_narrow_q")] ) +(define_insn "aarch64_<sur>q<r>shr<u>n_n<mode>_insn_le" + [(set (match_operand:<VNARROWQ2> 0 "register_operand" "=w") + (vec_concat:<VNARROWQ2> + (unspec:<VNARROWQ> + [(match_operand:VQN 1 "register_operand" "w") + (match_operand:VQN 2 "aarch64_simd_shift_imm_vec_<vn_mode>")] + VQSHRN_N) + (match_operand:<VNARROWQ> 3 "aarch64_simd_or_scalar_imm_zero")))] + "TARGET_SIMD && !BYTES_BIG_ENDIAN" + "<sur>q<r>shr<u>n\\t%<vn2>0<Vmntype>, %<v>1<Vmtype>, %2" + [(set_attr "type" "neon_shift_imm_narrow_q")] +) + +(define_insn "aarch64_<sur>q<r>shr<u>n_n<mode>_insn_be" + [(set (match_operand:<VNARROWQ2> 0 "register_operand" "=w") + (vec_concat:<VNARROWQ2> + (match_operand:<VNARROWQ> 3 "aarch64_simd_or_scalar_imm_zero") + (unspec:<VNARROWQ> + [(match_operand:VQN 1 "register_operand" "w") + (match_operand:VQN 2 "aarch64_simd_shift_imm_vec_<vn_mode>")] + VQSHRN_N)))] + "TARGET_SIMD && BYTES_BIG_ENDIAN" + "<sur>q<r>shr<u>n\\t%<vn2>0<Vmntype>, %<v>1<Vmtype>, %2" + [(set_attr "type" "neon_shift_imm_narrow_q")] +) + +(define_expand "aarch64_<sur>q<r>shr<u>n_n<mode>" + [(set (match_operand:<VNARROWQ> 0 "register_operand" "=w") + (unspec:<VNARROWQ> [(match_operand:VQN 1 "register_operand") + (match_operand:SI 2 + "aarch64_simd_shift_imm_offset_<ve_mode>")] + VQSHRN_N))] + "TARGET_SIMD" + { + operands[2] = aarch64_simd_gen_const_vector_dup (<MODE>mode, + INTVAL (operands[2])); + rtx tmp = gen_reg_rtx (<VNARROWQ2>mode); + if (BYTES_BIG_ENDIAN) + emit_insn (gen_aarch64_<sur>q<r>shr<u>n_n<mode>_insn_be (tmp, + operands[1], operands[2], CONST0_RTX (<VNARROWQ>mode))); + else + emit_insn (gen_aarch64_<sur>q<r>shr<u>n_n<mode>_insn_le (tmp, + operands[1], operands[2], CONST0_RTX (<VNARROWQ>mode))); + + /* The intrinsic expects a narrow result, so emit a subreg that will get + optimized away as appropriate. */ + emit_move_insn (operands[0], lowpart_subreg (<VNARROWQ>mode, tmp, + <VNARROWQ2>mode)); + DONE; + } +) + (define_insn "aarch64_<sur>q<r>shr<u>n2_n<mode>_insn_le" [(set (match_operand:<VNARROWQ2> 0 "register_operand" "=w") (vec_concat:<VNARROWQ2> diff --git a/gcc/config/aarch64/iterators.md b/gcc/config/aarch64/iterators.md index 0ec93b0..e9047d0 100644 --- a/gcc/config/aarch64/iterators.md +++ b/gcc/config/aarch64/iterators.md @@ -282,6 +282,9 @@ ;; Scalar 64-bit container: 16, 32-bit integer modes (define_mode_iterator SD_HSI [HI SI]) +;; Scalar 64-bit container: 16-bit, 32-bit and 64-bit integer modes. +(define_mode_iterator SD_HSDI [HI SI DI]) + ;; Advanced SIMD 64-bit container: 16, 32-bit integer modes. (define_mode_iterator VQ_HSI [V8HI V4SI]) |