diff options
author | Kyrylo Tkachov <kyrylo.tkachov@arm.com> | 2023-06-07 11:20:01 +0100 |
---|---|---|
committer | Kyrylo Tkachov <kyrylo.tkachov@arm.com> | 2023-06-16 13:52:24 +0100 |
commit | c8e9a5ced0dbe4fef6c1cefee773895a662ba868 (patch) | |
tree | cead4de7848a02a3cefb3716284f5bb134926029 | |
parent | 207db5d92f9cc533627c6bd5b3ebae9128b49741 (diff) | |
download | gcc-c8e9a5ced0dbe4fef6c1cefee773895a662ba868.zip gcc-c8e9a5ced0dbe4fef6c1cefee773895a662ba868.tar.gz gcc-c8e9a5ced0dbe4fef6c1cefee773895a662ba868.tar.bz2 |
aarch64: [US]Q(R)SHR(U)N2 refactoring
This patch is large in lines of code, but it is a fairly regular
extension of the first patch as it converts the high-half patterns
to standard RTL codes in the same fashion as the first patch did for the
low-half ones.
This now allows us to remove the unspec codes for these instructions as
there are no more uses of them left.
Bootstrapped and tested on aarch64-none-linux-gnu and
aarch64_be-none-elf.
gcc/ChangeLog:
* config/aarch64/aarch64-simd-builtins.def (shrn2): Rename builtins to...
(shrn2_n): ... This.
(rshrn2): Rename builtins to...
(rshrn2_n): ... This.
* config/aarch64/arm_neon.h (vrshrn_high_n_s16): Adjust for the above.
(vrshrn_high_n_s32): Likewise.
(vrshrn_high_n_s64): Likewise.
(vrshrn_high_n_u16): Likewise.
(vrshrn_high_n_u32): Likewise.
(vrshrn_high_n_u64): Likewise.
(vshrn_high_n_s16): Likewise.
(vshrn_high_n_s32): Likewise.
(vshrn_high_n_s64): Likewise.
(vshrn_high_n_u16): Likewise.
(vshrn_high_n_u32): Likewise.
(vshrn_high_n_u64): Likewise.
* config/aarch64/aarch64-simd.md (*aarch64_<srn_op>shrn<mode>2_vect_le):
Delete.
(*aarch64_<srn_op>shrn<mode>2_vect_be): Likewise.
(aarch64_shrn2<mode>_insn_le): Likewise.
(aarch64_shrn2<mode>_insn_be): Likewise.
(aarch64_shrn2<mode>): Likewise.
(aarch64_rshrn2<mode>_insn_le): Likewise.
(aarch64_rshrn2<mode>_insn_be): Likewise.
(aarch64_rshrn2<mode>): Likewise.
(aarch64_<sur>q<r>shr<u>n2_n<mode>_insn_le): Likewise.
(aarch64_<shrn_op>shrn2_n<mode>_insn_le): New define_insn.
(aarch64_<sur>q<r>shr<u>n2_n<mode>_insn_be): Delete.
(aarch64_<shrn_op>shrn2_n<mode>_insn_be): New define_insn.
(aarch64_<sur>q<r>shr<u>n2_n<mode>): Delete.
(aarch64_<shrn_op>shrn2_n<mode>): New define_expand.
(aarch64_<shrn_op>rshrn2_n<mode>_insn_le): New define_insn.
(aarch64_<shrn_op>rshrn2_n<mode>_insn_be): New define_insn.
(aarch64_<shrn_op>rshrn2_n<mode>): New define_expand.
(aarch64_sqshrun2_n<mode>_insn_le): New define_insn.
(aarch64_sqshrun2_n<mode>_insn_be): New define_insn.
(aarch64_sqshrun2_n<mode>): New define_expand.
(aarch64_sqrshrun2_n<mode>_insn_le): New define_insn.
(aarch64_sqrshrun2_n<mode>_insn_be): New define_insn.
(aarch64_sqrshrun2_n<mode>): New define_expand.
* config/aarch64/iterators.md (UNSPEC_SQSHRUN, UNSPEC_SQRSHRUN,
UNSPEC_SQSHRN, UNSPEC_UQSHRN, UNSPEC_SQRSHRN, UNSPEC_UQRSHRN):
Delete unspec values.
(VQSHRN_N): Delete int iterator.
-rw-r--r-- | gcc/config/aarch64/aarch64-simd-builtins.def | 11 | ||||
-rw-r--r-- | gcc/config/aarch64/aarch64-simd.md | 381 | ||||
-rw-r--r-- | gcc/config/aarch64/arm_neon.h | 24 | ||||
-rw-r--r-- | gcc/config/aarch64/iterators.md | 19 |
4 files changed, 237 insertions, 198 deletions
diff --git a/gcc/config/aarch64/aarch64-simd-builtins.def b/gcc/config/aarch64/aarch64-simd-builtins.def index 87af8f3..01cd85d 100644 --- a/gcc/config/aarch64/aarch64-simd-builtins.def +++ b/gcc/config/aarch64/aarch64-simd-builtins.def @@ -263,16 +263,14 @@ BUILTIN_VQN (SHIFTIMM, shrn_n, 0, NONE) BUILTIN_VQN (USHIFTIMM, shrn_n, 0, NONE) - /* Implemented by aarch64_shrn2<mode>. */ - BUILTIN_VQN (SHIFT2IMM, shrn2, 0, NONE) - BUILTIN_VQN (USHIFT2IMM, shrn2, 0, NONE) + BUILTIN_VQN (SHIFT2IMM, shrn2_n, 0, NONE) + BUILTIN_VQN (USHIFT2IMM, shrn2_n, 0, NONE) BUILTIN_VQN (SHIFTIMM, rshrn_n, 0, NONE) BUILTIN_VQN (USHIFTIMM, rshrn_n, 0, NONE) - /* Implemented by aarch64_rshrn2<mode>. */ - BUILTIN_VQN (SHIFT2IMM, rshrn2, 0, NONE) - BUILTIN_VQN (USHIFT2IMM, rshrn2, 0, NONE) + BUILTIN_VQN (SHIFT2IMM, rshrn2_n, 0, NONE) + BUILTIN_VQN (USHIFT2IMM, rshrn2_n, 0, NONE) /* Implemented by aarch64_<su>mlsl<mode>. */ BUILTIN_VD_BHSI (TERNOP, smlsl, 0, NONE) @@ -480,7 +478,6 @@ BUILTIN_SD_HSDI (USHIFTIMM, uqshrn_n, 0, NONE) BUILTIN_SD_HSDI (SHIFTIMM, sqrshrn_n, 0, NONE) BUILTIN_SD_HSDI (USHIFTIMM, uqrshrn_n, 0, NONE) - /* Implemented by aarch64_<sur>q<r>shr<u>n2_n<mode>. */ BUILTIN_VQN (SHIFT2IMM_UUSS, sqshrun2_n, 0, NONE) BUILTIN_VQN (SHIFT2IMM_UUSS, sqrshrun2_n, 0, NONE) BUILTIN_VQN (SHIFT2IMM, sqshrn2_n, 0, NONE) diff --git a/gcc/config/aarch64/aarch64-simd.md b/gcc/config/aarch64/aarch64-simd.md index ce5885e..b31c713 100644 --- a/gcc/config/aarch64/aarch64-simd.md +++ b/gcc/config/aarch64/aarch64-simd.md @@ -1896,30 +1896,6 @@ } ) -(define_insn "*aarch64_<srn_op>shrn<mode>2_vect_le" - [(set (match_operand:<VNARROWQ2> 0 "register_operand" "=w") - (vec_concat:<VNARROWQ2> - (match_operand:<VNARROWQ> 1 "register_operand" "0") - (truncate:<VNARROWQ> - (SHIFTRT:VQN (match_operand:VQN 2 "register_operand" "w") - (match_operand:VQN 3 "aarch64_simd_shift_imm_vec_<vn_mode>")))))] - "TARGET_SIMD && !BYTES_BIG_ENDIAN" - "shrn2\\t%0.<V2ntype>, %2.<Vtype>, %3" - [(set_attr "type" "neon_shift_imm_narrow_q")] -) - -(define_insn "*aarch64_<srn_op>shrn<mode>2_vect_be" - [(set (match_operand:<VNARROWQ2> 0 "register_operand" "=w") - (vec_concat:<VNARROWQ2> - (truncate:<VNARROWQ> - (SHIFTRT:VQN (match_operand:VQN 2 "register_operand" "w") - (match_operand:VQN 3 "aarch64_simd_shift_imm_vec_<vn_mode>"))) - (match_operand:<VNARROWQ> 1 "register_operand" "0")))] - "TARGET_SIMD && BYTES_BIG_ENDIAN" - "shrn2\\t%0.<V2ntype>, %2.<Vtype>, %3" - [(set_attr "type" "neon_shift_imm_narrow_q")] -) - (define_insn "*aarch64_<srn_op>topbits_shuffle<mode>_le" [(set (match_operand:<VNARROWQ2> 0 "register_operand" "=w") (vec_concat:<VNARROWQ2> @@ -1948,121 +1924,6 @@ [(set_attr "type" "neon_permute<q>")] ) -(define_insn "aarch64_shrn2<mode>_insn_le" - [(set (match_operand:<VNARROWQ2> 0 "register_operand" "=w") - (vec_concat:<VNARROWQ2> - (match_operand:<VNARROWQ> 1 "register_operand" "0") - (truncate:<VNARROWQ> - (lshiftrt:VQN (match_operand:VQN 2 "register_operand" "w") - (match_operand:VQN 3 "aarch64_simd_shift_imm_vec_<vn_mode>")))))] - "TARGET_SIMD && !BYTES_BIG_ENDIAN" - "shrn2\\t%0.<V2ntype>, %2.<Vtype>, %3" - [(set_attr "type" "neon_shift_imm_narrow_q")] -) - -(define_insn "aarch64_shrn2<mode>_insn_be" - [(set (match_operand:<VNARROWQ2> 0 "register_operand" "=w") - (vec_concat:<VNARROWQ2> - (truncate:<VNARROWQ> - (lshiftrt:VQN (match_operand:VQN 2 "register_operand" "w") - (match_operand:VQN 3 - "aarch64_simd_shift_imm_vec_<vn_mode>"))) - (match_operand:<VNARROWQ> 1 "register_operand" "0")))] - "TARGET_SIMD && BYTES_BIG_ENDIAN" - "shrn2\\t%0.<V2ntype>, %2.<Vtype>, %3" - [(set_attr "type" "neon_shift_imm_narrow_q")] -) - -(define_expand "aarch64_shrn2<mode>" - [(match_operand:<VNARROWQ2> 0 "register_operand") - (match_operand:<VNARROWQ> 1 "register_operand") - (match_operand:VQN 2 "register_operand") - (match_operand:SI 3 "aarch64_simd_shift_imm_offset_<vn_mode>")] - "TARGET_SIMD" - { - operands[3] = aarch64_simd_gen_const_vector_dup (<MODE>mode, - INTVAL (operands[3])); - if (BYTES_BIG_ENDIAN) - emit_insn (gen_aarch64_shrn2<mode>_insn_be (operands[0], operands[1], - operands[2], operands[3])); - else - emit_insn (gen_aarch64_shrn2<mode>_insn_le (operands[0], operands[1], - operands[2], operands[3])); - DONE; - } -) - -(define_insn "aarch64_rshrn2<mode>_insn_le" - [(set (match_operand:<VNARROWQ2> 0 "register_operand" "=w") - (vec_concat:<VNARROWQ2> - (match_operand:<VNARROWQ> 1 "register_operand" "0") - (truncate:<VNARROWQ> - (lshiftrt:VQN - (plus:VQN (match_operand:VQN 2 "register_operand" "w") - (match_operand:VQN 3 "aarch64_simd_rshrn_imm_vec")) - (match_operand:VQN 4 "aarch64_simd_shift_imm_vec_<vn_mode>")))))] - "TARGET_SIMD && !BYTES_BIG_ENDIAN - && INTVAL (CONST_VECTOR_ELT (operands[3], 0)) - == (HOST_WIDE_INT_1 << (INTVAL (CONST_VECTOR_ELT (operands[4], 0)) - 1))" - "rshrn2\\t%0.<V2ntype>, %2.<Vtype>, %4" - [(set_attr "type" "neon_shift_imm_narrow_q")] -) - -(define_insn "aarch64_rshrn2<mode>_insn_be" - [(set (match_operand:<VNARROWQ2> 0 "register_operand" "=w") - (vec_concat:<VNARROWQ2> - (truncate:<VNARROWQ> - (lshiftrt:VQN - (plus:VQN (match_operand:VQN 2 "register_operand" "w") - (match_operand:VQN 3 "aarch64_simd_rshrn_imm_vec")) - (match_operand:VQN 4 "aarch64_simd_shift_imm_vec_<vn_mode>"))) - (match_operand:<VNARROWQ> 1 "register_operand" "0")))] - "TARGET_SIMD && BYTES_BIG_ENDIAN - && INTVAL (CONST_VECTOR_ELT (operands[3], 0)) - == (HOST_WIDE_INT_1 << (INTVAL (CONST_VECTOR_ELT (operands[4], 0)) - 1))" - "rshrn2\\t%0.<V2ntype>, %2.<Vtype>, %4" - [(set_attr "type" "neon_shift_imm_narrow_q")] -) - -(define_expand "aarch64_rshrn2<mode>" - [(match_operand:<VNARROWQ2> 0 "register_operand") - (match_operand:<VNARROWQ> 1 "register_operand") - (match_operand:VQN 2 "register_operand") - (match_operand:SI 3 "aarch64_simd_shift_imm_offset_<vn_mode>")] - "TARGET_SIMD" - { - if (INTVAL (operands[3]) == GET_MODE_UNIT_BITSIZE (<VNARROWQ2>mode)) - { - rtx tmp = aarch64_gen_shareable_zero (<MODE>mode); - emit_insn (gen_aarch64_raddhn2<mode> (operands[0], operands[1], - operands[2], tmp)); - } - else - { - rtx shft - = aarch64_simd_gen_const_vector_dup (<MODE>mode, - HOST_WIDE_INT_1U - << (INTVAL (operands[3]) - 1)); - - operands[3] = aarch64_simd_gen_const_vector_dup (<MODE>mode, - INTVAL (operands[3])); - if (BYTES_BIG_ENDIAN) - emit_insn (gen_aarch64_rshrn2<mode>_insn_be (operands[0], - operands[1], - operands[2], - shft, - operands[3])); - else - emit_insn (gen_aarch64_rshrn2<mode>_insn_le (operands[0], - operands[1], - operands[2], - shft, - operands[3])); - } - DONE; - } -) - ;; Widening operations. (define_insn "aarch64_simd_vec_unpack<su>_lo_<mode>" @@ -6912,54 +6773,254 @@ } ) -(define_insn "aarch64_<sur>q<r>shr<u>n2_n<mode>_insn_le" +(define_insn "aarch64_<shrn_op>shrn2_n<mode>_insn_le" [(set (match_operand:<VNARROWQ2> 0 "register_operand" "=w") (vec_concat:<VNARROWQ2> (match_operand:<VNARROWQ> 1 "register_operand" "0") - (unspec:<VNARROWQ> [(match_operand:VQN 2 "register_operand" "w") - (match_operand:VQN 3 - "aarch64_simd_shift_imm_vec_<vn_mode>")] - VQSHRN_N)))] + (ALL_TRUNC:<VNARROWQ> + (<TRUNC_SHIFT>:VQN + (match_operand:VQN 2 "register_operand" "w") + (match_operand:VQN 3 "aarch64_simd_shift_imm_vec_<vn_mode>")))))] "TARGET_SIMD && !BYTES_BIG_ENDIAN" - "<sur>q<r>shr<u>n2\\t%<vn2>0.<V2ntype>, %<v>2.<Vtype>, %3" - [(set_attr "type" "neon_sat_shift_imm_narrow_q")] + "<shrn_op>shrn2\t%<vn2>0.<V2ntype>, %<v>2.<Vtype>, %3" + [(set_attr "type" "neon_shift_imm_narrow_q")] ) -(define_insn "aarch64_<sur>q<r>shr<u>n2_n<mode>_insn_be" +(define_insn "aarch64_<shrn_op>shrn2_n<mode>_insn_be" [(set (match_operand:<VNARROWQ2> 0 "register_operand" "=w") (vec_concat:<VNARROWQ2> - (unspec:<VNARROWQ> [(match_operand:VQN 2 "register_operand" "w") - (match_operand:VQN 3 - "aarch64_simd_shift_imm_vec_<vn_mode>")] - VQSHRN_N) + (ALL_TRUNC:<VNARROWQ> + (<TRUNC_SHIFT>:VQN + (match_operand:VQN 2 "register_operand" "w") + (match_operand:VQN 3 "aarch64_simd_shift_imm_vec_<vn_mode>"))) (match_operand:<VNARROWQ> 1 "register_operand" "0")))] "TARGET_SIMD && BYTES_BIG_ENDIAN" - "<sur>q<r>shr<u>n2\\t%<vn2>0.<V2ntype>, %<v>2.<Vtype>, %3" - [(set_attr "type" "neon_sat_shift_imm_narrow_q")] + "<shrn_op>shrn2\t%<vn2>0.<V2ntype>, %<v>2.<Vtype>, %3" + [(set_attr "type" "neon_shift_imm_narrow_q")] ) -(define_expand "aarch64_<sur>q<r>shr<u>n2_n<mode>" +(define_expand "aarch64_<shrn_op>shrn2_n<mode>" [(match_operand:<VNARROWQ2> 0 "register_operand") (match_operand:<VNARROWQ> 1 "register_operand") - (unspec:<VNARROWQ> - [(match_operand:VQN 2 "register_operand") - (match_operand:SI 3 "aarch64_simd_shift_imm_offset_<vn_mode>")] - VQSHRN_N)] + (ALL_TRUNC:<VNARROWQ> + (match_operand:VQN 2 "register_operand")) + (match_operand:SI 3 "aarch64_simd_shift_imm_offset_<vn_mode>")] "TARGET_SIMD" { operands[3] = aarch64_simd_gen_const_vector_dup (<MODE>mode, INTVAL (operands[3])); if (BYTES_BIG_ENDIAN) - emit_insn (gen_aarch64_<sur>q<r>shr<u>n2_n<mode>_insn_be (operands[0], + emit_insn (gen_aarch64_<shrn_op>shrn2_n<mode>_insn_be (operands[0], operands[1], operands[2], operands[3])); else - emit_insn (gen_aarch64_<sur>q<r>shr<u>n2_n<mode>_insn_le (operands[0], + emit_insn (gen_aarch64_<shrn_op>shrn2_n<mode>_insn_le (operands[0], operands[1], operands[2], operands[3])); DONE; } ) +(define_insn "aarch64_<shrn_op>rshrn2_n<mode>_insn_le" + [(set (match_operand:<VNARROWQ2> 0 "register_operand" "=w") + (vec_concat:<VNARROWQ2> + (match_operand:<VNARROWQ> 1 "register_operand" "0") + (ALL_TRUNC:<VNARROWQ> + (<TRUNC_SHIFT>:<V2XWIDE> + (plus:<V2XWIDE> + (<TRUNCEXTEND>:<V2XWIDE> + (match_operand:VQN 2 "register_operand" "w")) + (match_operand:<V2XWIDE> 4 "aarch64_simd_rsra_rnd_imm_vec")) + (match_operand:VQN 3 "aarch64_simd_shift_imm_vec_<vn_mode>")))))] + "TARGET_SIMD && !BYTES_BIG_ENDIAN + && aarch64_const_vec_rnd_cst_p (operands[4], operands[3])" + "<shrn_op>rshrn2\t%<vn2>0.<V2ntype>, %<v>2.<Vtype>, %3" + [(set_attr "type" "neon_shift_imm_narrow_q")] +) + +(define_insn "aarch64_<shrn_op>rshrn2_n<mode>_insn_be" + [(set (match_operand:<VNARROWQ2> 0 "register_operand" "=w") + (vec_concat:<VNARROWQ2> + (ALL_TRUNC:<VNARROWQ> + (<TRUNC_SHIFT>:<V2XWIDE> + (plus:<V2XWIDE> + (<TRUNCEXTEND>:<V2XWIDE> + (match_operand:VQN 2 "register_operand" "w")) + (match_operand:<V2XWIDE> 4 "aarch64_simd_rsra_rnd_imm_vec")) + (match_operand:VQN 3 "aarch64_simd_shift_imm_vec_<vn_mode>"))) + (match_operand:<VNARROWQ> 1 "register_operand" "0")))] + "TARGET_SIMD && BYTES_BIG_ENDIAN + && aarch64_const_vec_rnd_cst_p (operands[4], operands[3])" + "<shrn_op>rshrn2\t%<vn2>0.<V2ntype>, %<v>2.<Vtype>, %3" + [(set_attr "type" "neon_shift_imm_narrow_q")] +) + +(define_expand "aarch64_<shrn_op>rshrn2_n<mode>" + [(match_operand:<VNARROWQ2> 0 "register_operand") + (match_operand:<VNARROWQ> 1 "register_operand") + (ALL_TRUNC:<VNARROWQ> (match_operand:VQN 2 "register_operand")) + (match_operand:SI 3 "aarch64_simd_shift_imm_offset_<vn_mode>")] + "TARGET_SIMD" + { + if (<CODE> == TRUNCATE + && INTVAL (operands[3]) == GET_MODE_UNIT_BITSIZE (<VNARROWQ>mode)) + { + rtx tmp = aarch64_gen_shareable_zero (<MODE>mode); + emit_insn (gen_aarch64_raddhn2<mode> (operands[0], operands[1], + operands[2], tmp)); + DONE; + } + /* Use this expander to create the rounding constant vector, which is + 1 << (shift - 1). Use wide_int here to ensure that the right TImode + RTL is generated when handling the DImode expanders. */ + int prec = GET_MODE_UNIT_PRECISION (<V2XWIDE>mode); + wide_int rnd_wi = wi::set_bit_in_zero (INTVAL (operands[3]) - 1, prec); + rtx rnd = immed_wide_int_const (rnd_wi, GET_MODE_INNER (<V2XWIDE>mode)); + rnd = gen_const_vec_duplicate (<V2XWIDE>mode, rnd); + operands[3] = gen_const_vec_duplicate (<MODE>mode, operands[3]); + if (BYTES_BIG_ENDIAN) + emit_insn (gen_aarch64_<shrn_op>rshrn2_n<mode>_insn_be (operands[0], + operands[1], + operands[2], + operands[3], + rnd)); + else + emit_insn (gen_aarch64_<shrn_op>rshrn2_n<mode>_insn_le (operands[0], + operands[1], + operands[2], + operands[3], + rnd)); + DONE; + } +) + +(define_insn "aarch64_sqshrun2_n<mode>_insn_le" + [(set (match_operand:<VNARROWQ2> 0 "register_operand" "=w") + (vec_concat:<VNARROWQ2> + (match_operand:<VNARROWQ> 1 "register_operand" "0") + (truncate:<VNARROWQ> + (smin:VQN + (smax:VQN + (ashiftrt:VQN + (match_operand:VQN 2 "register_operand" "w") + (match_operand:VQN 3 "aarch64_simd_shift_imm_vec_<vn_mode>")) + (match_operand:VQN 4 "aarch64_simd_imm_zero")) + (match_operand:VQN 5 "aarch64_simd_umax_half_mode")))))] + "TARGET_SIMD && !BYTES_BIG_ENDIAN" + "sqshrun2\t%<vn2>0.<V2ntype>, %<v>2.<Vtype>, %3" + [(set_attr "type" "neon_shift_imm_narrow_q")] +) + +(define_insn "aarch64_sqshrun2_n<mode>_insn_be" + [(set (match_operand:<VNARROWQ2> 0 "register_operand" "=w") + (vec_concat:<VNARROWQ2> + (truncate:<VNARROWQ> + (smin:VQN + (smax:VQN + (ashiftrt:VQN + (match_operand:VQN 2 "register_operand" "w") + (match_operand:VQN 3 "aarch64_simd_shift_imm_vec_<vn_mode>")) + (match_operand:VQN 4 "aarch64_simd_imm_zero")) + (match_operand:VQN 5 "aarch64_simd_umax_half_mode"))) + (match_operand:<VNARROWQ> 1 "register_operand" "0")))] + "TARGET_SIMD && BYTES_BIG_ENDIAN" + "sqshrun2\t%<vn2>0.<V2ntype>, %<v>2.<Vtype>, %3" + [(set_attr "type" "neon_shift_imm_narrow_q")] +) + +(define_expand "aarch64_sqshrun2_n<mode>" + [(match_operand:<VNARROWQ2> 0 "register_operand") + (match_operand:<VNARROWQ> 1 "register_operand") + (match_operand:VQN 2 "register_operand") + (match_operand:SI 3 "aarch64_simd_shift_imm_offset_<vn_mode>")] + "TARGET_SIMD" + { + operands[3] = aarch64_simd_gen_const_vector_dup (<MODE>mode, + INTVAL (operands[3])); + rtx zeros = CONST0_RTX (<MODE>mode); + rtx half_umax + = aarch64_simd_gen_const_vector_dup (<MODE>mode, + GET_MODE_MASK (GET_MODE_INNER (<VNARROWQ>mode))); + if (BYTES_BIG_ENDIAN) + emit_insn (gen_aarch64_sqshrun2_n<mode>_insn_be (operands[0], + operands[1], operands[2], operands[3], + zeros, half_umax)); + else + emit_insn (gen_aarch64_sqshrun2_n<mode>_insn_le (operands[0], + operands[1], operands[2], operands[3], + zeros, half_umax)); + DONE; + } +) + +(define_insn "aarch64_sqrshrun2_n<mode>_insn_le" + [(set (match_operand:<VNARROWQ2> 0 "register_operand" "=w") + (vec_concat:<VNARROWQ2> + (match_operand:<VNARROWQ> 1 "register_operand" "0") + (truncate:<VNARROWQ> + (smin:<V2XWIDE> + (smax:<V2XWIDE> + (ashiftrt:<V2XWIDE> + (plus:<V2XWIDE> + (sign_extend:<V2XWIDE> + (match_operand:VQN 2 "register_operand" "w")) + (match_operand:<V2XWIDE> 4 "aarch64_simd_rsra_rnd_imm_vec")) + (match_operand:VQN 3 "aarch64_simd_shift_imm_vec_<vn_mode>")) + (match_operand:<V2XWIDE> 5 "aarch64_simd_imm_zero")) + (match_operand:<V2XWIDE> 6 "aarch64_simd_umax_quarter_mode")))))] + "TARGET_SIMD && !BYTES_BIG_ENDIAN + && aarch64_const_vec_rnd_cst_p (operands[4], operands[3])" + "sqrshrun2\t%<vn2>0.<V2ntype>, %<v>2.<Vtype>, %3" + [(set_attr "type" "neon_shift_imm_narrow_q")] +) + +(define_insn "aarch64_sqrshrun2_n<mode>_insn_be" + [(set (match_operand:<VNARROWQ2> 0 "register_operand" "=w") + (vec_concat:<VNARROWQ2> + (truncate:<VNARROWQ> + (smin:<V2XWIDE> + (smax:<V2XWIDE> + (ashiftrt:<V2XWIDE> + (plus:<V2XWIDE> + (sign_extend:<V2XWIDE> + (match_operand:VQN 2 "register_operand" "w")) + (match_operand:<V2XWIDE> 4 "aarch64_simd_rsra_rnd_imm_vec")) + (match_operand:VQN 3 "aarch64_simd_shift_imm_vec_<vn_mode>")) + (match_operand:<V2XWIDE> 5 "aarch64_simd_imm_zero")) + (match_operand:<V2XWIDE> 6 "aarch64_simd_umax_quarter_mode"))) + (match_operand:<VNARROWQ> 1 "register_operand" "0")))] + "TARGET_SIMD && BYTES_BIG_ENDIAN + && aarch64_const_vec_rnd_cst_p (operands[4], operands[3])" + "sqrshrun2\t%<vn2>0.<V2ntype>, %<v>2.<Vtype>, %3" + [(set_attr "type" "neon_shift_imm_narrow_q")] +) + +(define_expand "aarch64_sqrshrun2_n<mode>" + [(match_operand:<VNARROWQ2> 0 "register_operand") + (match_operand:<VNARROWQ> 1 "register_operand") + (match_operand:VQN 2 "register_operand") + (match_operand:SI 3 "aarch64_simd_shift_imm_offset_<vn_mode>")] + "TARGET_SIMD" + { + int prec = GET_MODE_UNIT_PRECISION (<V2XWIDE>mode); + wide_int rnd_wi = wi::set_bit_in_zero (INTVAL (operands[3]) - 1, prec); + rtx rnd = immed_wide_int_const (rnd_wi, GET_MODE_INNER (<V2XWIDE>mode)); + rnd = gen_const_vec_duplicate (<V2XWIDE>mode, rnd); + rtx zero = CONST0_RTX (<V2XWIDE>mode); + rtx half_umax + = aarch64_simd_gen_const_vector_dup (<V2XWIDE>mode, + GET_MODE_MASK (GET_MODE_INNER (<VNARROWQ>mode))); + operands[3] = gen_const_vec_duplicate (<MODE>mode, operands[3]); + if (BYTES_BIG_ENDIAN) + emit_insn (gen_aarch64_sqrshrun2_n<mode>_insn_be (operands[0], + operands[1], operands[2], operands[3], rnd, + zero, half_umax)); + else + emit_insn (gen_aarch64_sqrshrun2_n<mode>_insn_le (operands[0], + operands[1], operands[2], operands[3], rnd, + zero, half_umax)); + DONE; + } +) ;; cm(eq|ge|gt|lt|le) ;; Note, we have constraints for Dz and Z as different expanders diff --git a/gcc/config/aarch64/arm_neon.h b/gcc/config/aarch64/arm_neon.h index 2a46a31..d350d9e 100644 --- a/gcc/config/aarch64/arm_neon.h +++ b/gcc/config/aarch64/arm_neon.h @@ -5532,42 +5532,42 @@ __extension__ extern __inline int8x16_t __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) vrshrn_high_n_s16 (int8x8_t __a, int16x8_t __b, const int __c) { - return __builtin_aarch64_rshrn2v8hi (__a, __b, __c); + return __builtin_aarch64_rshrn2_nv8hi (__a, __b, __c); } __extension__ extern __inline int16x8_t __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) vrshrn_high_n_s32 (int16x4_t __a, int32x4_t __b, const int __c) { - return __builtin_aarch64_rshrn2v4si (__a, __b, __c); + return __builtin_aarch64_rshrn2_nv4si (__a, __b, __c); } __extension__ extern __inline int32x4_t __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) vrshrn_high_n_s64 (int32x2_t __a, int64x2_t __b, const int __c) { - return __builtin_aarch64_rshrn2v2di (__a, __b, __c); + return __builtin_aarch64_rshrn2_nv2di (__a, __b, __c); } __extension__ extern __inline uint8x16_t __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) vrshrn_high_n_u16 (uint8x8_t __a, uint16x8_t __b, const int __c) { - return __builtin_aarch64_rshrn2v8hi_uuus (__a, __b, __c); + return __builtin_aarch64_rshrn2_nv8hi_uuus (__a, __b, __c); } __extension__ extern __inline uint16x8_t __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) vrshrn_high_n_u32 (uint16x4_t __a, uint32x4_t __b, const int __c) { - return __builtin_aarch64_rshrn2v4si_uuus (__a, __b, __c); + return __builtin_aarch64_rshrn2_nv4si_uuus (__a, __b, __c); } __extension__ extern __inline uint32x4_t __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) vrshrn_high_n_u64 (uint32x2_t __a, uint64x2_t __b, const int __c) { - return __builtin_aarch64_rshrn2v2di_uuus (__a, __b, __c); + return __builtin_aarch64_rshrn2_nv2di_uuus (__a, __b, __c); } __extension__ extern __inline int8x8_t @@ -5630,42 +5630,42 @@ __extension__ extern __inline int8x16_t __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) vshrn_high_n_s16 (int8x8_t __a, int16x8_t __b, const int __c) { - return __builtin_aarch64_shrn2v8hi (__a, __b, __c); + return __builtin_aarch64_shrn2_nv8hi (__a, __b, __c); } __extension__ extern __inline int16x8_t __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) vshrn_high_n_s32 (int16x4_t __a, int32x4_t __b, const int __c) { - return __builtin_aarch64_shrn2v4si (__a, __b, __c); + return __builtin_aarch64_shrn2_nv4si (__a, __b, __c); } __extension__ extern __inline int32x4_t __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) vshrn_high_n_s64 (int32x2_t __a, int64x2_t __b, const int __c) { - return __builtin_aarch64_shrn2v2di (__a, __b, __c); + return __builtin_aarch64_shrn2_nv2di (__a, __b, __c); } __extension__ extern __inline uint8x16_t __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) vshrn_high_n_u16 (uint8x8_t __a, uint16x8_t __b, const int __c) { - return __builtin_aarch64_shrn2v8hi_uuus (__a, __b, __c); + return __builtin_aarch64_shrn2_nv8hi_uuus (__a, __b, __c); } __extension__ extern __inline uint16x8_t __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) vshrn_high_n_u32 (uint16x4_t __a, uint32x4_t __b, const int __c) { - return __builtin_aarch64_shrn2v4si_uuus (__a, __b, __c); + return __builtin_aarch64_shrn2_nv4si_uuus (__a, __b, __c); } __extension__ extern __inline uint32x4_t __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) vshrn_high_n_u64 (uint32x2_t __a, uint64x2_t __b, const int __c) { - return __builtin_aarch64_shrn2v2di_uuus (__a, __b, __c); + return __builtin_aarch64_shrn2_nv2di_uuus (__a, __b, __c); } __extension__ extern __inline poly8x8_t diff --git a/gcc/config/aarch64/iterators.md b/gcc/config/aarch64/iterators.md index 15436c8..7f9a512 100644 --- a/gcc/config/aarch64/iterators.md +++ b/gcc/config/aarch64/iterators.md @@ -645,12 +645,6 @@ UNSPEC_SQSHLU ; Used in aarch64-simd.md. UNSPEC_SQSHL ; Used in aarch64-simd.md. UNSPEC_UQSHL ; Used in aarch64-simd.md. - UNSPEC_SQSHRUN ; Used in aarch64-simd.md. - UNSPEC_SQRSHRUN ; Used in aarch64-simd.md. - UNSPEC_SQSHRN ; Used in aarch64-simd.md. - UNSPEC_UQSHRN ; Used in aarch64-simd.md. - UNSPEC_SQRSHRN ; Used in aarch64-simd.md. - UNSPEC_UQRSHRN ; Used in aarch64-simd.md. UNSPEC_SSHL ; Used in aarch64-simd.md. UNSPEC_USHL ; Used in aarch64-simd.md. UNSPEC_SRSHL ; Used in aarch64-simd.md. @@ -2660,10 +2654,6 @@ (define_int_iterator VQSHL_N [UNSPEC_SQSHLU UNSPEC_SQSHL UNSPEC_UQSHL]) -(define_int_iterator VQSHRN_N [UNSPEC_SQSHRUN UNSPEC_SQRSHRUN - UNSPEC_SQSHRN UNSPEC_UQSHRN - UNSPEC_SQRSHRN UNSPEC_UQRSHRN]) - (define_int_iterator SQRDMLH_AS [UNSPEC_SQRDMLAH UNSPEC_SQRDMLSH]) (define_int_iterator PERMUTE [UNSPEC_ZIP1 UNSPEC_ZIP2 @@ -3374,9 +3364,6 @@ (UNSPEC_URSHR "ur") (UNSPEC_SRSHR "sr") (UNSPEC_SQSHLU "s") (UNSPEC_SQSHL "s") (UNSPEC_UQSHL "u") - (UNSPEC_SQSHRUN "s") (UNSPEC_SQRSHRUN "s") - (UNSPEC_SQSHRN "s") (UNSPEC_UQSHRN "u") - (UNSPEC_SQRSHRN "s") (UNSPEC_UQRSHRN "u") (UNSPEC_USHL "u") (UNSPEC_SSHL "s") (UNSPEC_USHLL "u") (UNSPEC_SSHLL "s") (UNSPEC_URSHL "ur") (UNSPEC_SRSHL "sr") @@ -3388,9 +3375,6 @@ ]) (define_int_attr r [(UNSPEC_SQDMULH "") (UNSPEC_SQRDMULH "r") - (UNSPEC_SQSHRUN "") (UNSPEC_SQRSHRUN "r") - (UNSPEC_SQSHRN "") (UNSPEC_UQSHRN "") - (UNSPEC_SQRSHRN "r") (UNSPEC_UQRSHRN "r") (UNSPEC_SQSHL "") (UNSPEC_UQSHL "") (UNSPEC_SQRSHL "r")(UNSPEC_UQRSHL "r") (UNSPEC_SMULHS "") (UNSPEC_UMULHS "") @@ -3406,9 +3390,6 @@ (UNSPEC_SLI "l") (UNSPEC_SRI "r")]) (define_int_attr u [(UNSPEC_SQSHLU "u") (UNSPEC_SQSHL "") (UNSPEC_UQSHL "") - (UNSPEC_SQSHRUN "u") (UNSPEC_SQRSHRUN "u") - (UNSPEC_SQSHRN "") (UNSPEC_UQSHRN "") - (UNSPEC_SQRSHRN "") (UNSPEC_UQRSHRN "") (UNSPEC_SHADD "") (UNSPEC_UHADD "u") (UNSPEC_SRHADD "") (UNSPEC_URHADD "u")]) |