diff options
-rw-r--r-- | gcc/config/aarch64/aarch64-simd-builtins.def | 11 | ||||
-rw-r--r-- | gcc/config/aarch64/aarch64-simd.md | 214 | ||||
-rw-r--r-- | gcc/config/aarch64/arm_neon.h | 24 | ||||
-rw-r--r-- | gcc/config/aarch64/iterators.md | 12 | ||||
-rw-r--r-- | gcc/config/aarch64/predicates.md | 11 |
5 files changed, 174 insertions, 98 deletions
diff --git a/gcc/config/aarch64/aarch64-simd-builtins.def b/gcc/config/aarch64/aarch64-simd-builtins.def index 94ff3f1..87af8f3 100644 --- a/gcc/config/aarch64/aarch64-simd-builtins.def +++ b/gcc/config/aarch64/aarch64-simd-builtins.def @@ -260,17 +260,15 @@ BUILTIN_VDQHS (TERNOP, mls_n, 0, NONE) BUILTIN_VDQHS (TERNOPU, mls_n, 0, NONE) - /* Implemented by aarch64_shrn<mode>". */ - BUILTIN_VQN (SHIFTIMM, shrn, 0, NONE) - BUILTIN_VQN (USHIFTIMM, shrn, 0, NONE) + BUILTIN_VQN (SHIFTIMM, shrn_n, 0, NONE) + BUILTIN_VQN (USHIFTIMM, shrn_n, 0, NONE) /* Implemented by aarch64_shrn2<mode>. */ BUILTIN_VQN (SHIFT2IMM, shrn2, 0, NONE) BUILTIN_VQN (USHIFT2IMM, shrn2, 0, NONE) - /* Implemented by aarch64_rshrn<mode>". */ - BUILTIN_VQN (SHIFTIMM, rshrn, 0, NONE) - BUILTIN_VQN (USHIFTIMM, rshrn, 0, NONE) + BUILTIN_VQN (SHIFTIMM, rshrn_n, 0, NONE) + BUILTIN_VQN (USHIFTIMM, rshrn_n, 0, NONE) /* Implemented by aarch64_rshrn2<mode>. */ BUILTIN_VQN (SHIFT2IMM, rshrn2, 0, NONE) @@ -470,7 +468,6 @@ /* Implemented by aarch64_<sur>shll2_n<mode>. */ BUILTIN_VQW (SHIFTIMM, sshll2_n, 0, NONE) BUILTIN_VQW (SHIFTIMM, ushll2_n, 0, NONE) - /* Implemented by aarch64_<sur>q<r>shr<u>n_n<mode>. */ BUILTIN_VQN (SHIFTIMM, sqshrun_n, 0, NONE) BUILTIN_VQN (SHIFTIMM, sqrshrun_n, 0, NONE) BUILTIN_VQN (SHIFTIMM, sqshrn_n, 0, NONE) diff --git a/gcc/config/aarch64/aarch64-simd.md b/gcc/config/aarch64/aarch64-simd.md index 3286f42..8b92981 100644 --- a/gcc/config/aarch64/aarch64-simd.md +++ b/gcc/config/aarch64/aarch64-simd.md @@ -1896,16 +1896,6 @@ } ) -(define_insn "*aarch64_<srn_op>shrn<mode><vczle><vczbe>" - [(set (match_operand:<VNARROWQ> 0 "register_operand" "=w") - (truncate:<VNARROWQ> - (SHIFTRT:VQN (match_operand:VQN 1 "register_operand" "w") - (match_operand:VQN 2 "aarch64_simd_shift_imm_vec_<vn_mode>"))))] - "TARGET_SIMD" - "shrn\\t%0.<Vntype>, %1.<Vtype>, %2" - [(set_attr "type" "neon_shift_imm_narrow_q")] -) - (define_insn "*aarch64_<srn_op>shrn<mode>2_vect_le" [(set (match_operand:<VNARROWQ2> 0 "register_operand" "=w") (vec_concat:<VNARROWQ2> @@ -1958,58 +1948,6 @@ [(set_attr "type" "neon_permute<q>")] ) -(define_expand "aarch64_shrn<mode>" - [(set (match_operand:<VNARROWQ> 0 "register_operand") - (truncate:<VNARROWQ> - (lshiftrt:VQN (match_operand:VQN 1 "register_operand") - (match_operand:SI 2 "aarch64_simd_shift_imm_offset_<vn_mode>"))))] - "TARGET_SIMD" - { - operands[2] = aarch64_simd_gen_const_vector_dup (<MODE>mode, - INTVAL (operands[2])); - } -) - -(define_insn "aarch64_rshrn<mode><vczle><vczbe>_insn" - [(set (match_operand:<VNARROWQ> 0 "register_operand" "=w") - (truncate:<VNARROWQ> - (lshiftrt:VQN - (plus:VQN (match_operand:VQN 1 "register_operand" "w") - (match_operand:VQN 3 "aarch64_simd_rshrn_imm_vec")) - (match_operand:VQN 2 "aarch64_simd_shift_imm_vec_<vn_mode>"))))] - "TARGET_SIMD - && INTVAL (CONST_VECTOR_ELT (operands[3], 0)) - == (HOST_WIDE_INT_1 << (INTVAL (CONST_VECTOR_ELT (operands[2], 0)) - 1))" - "rshrn\\t%0.<Vntype>, %1.<Vtype>, %2" - [(set_attr "type" "neon_shift_imm_narrow_q")] -) - -(define_expand "aarch64_rshrn<mode>" - [(match_operand:<VNARROWQ> 0 "register_operand") - (match_operand:VQN 1 "register_operand") - (match_operand:SI 2 "aarch64_simd_shift_imm_offset_<vn_mode>")] - "TARGET_SIMD" - { - if (INTVAL (operands[2]) == GET_MODE_UNIT_BITSIZE (<VNARROWQ>mode)) - { - rtx tmp0 = aarch64_gen_shareable_zero (<MODE>mode); - emit_insn (gen_aarch64_raddhn<mode> (operands[0], operands[1], tmp0)); - } - else - { - rtx shft - = aarch64_simd_gen_const_vector_dup (<MODE>mode, - HOST_WIDE_INT_1U - << (INTVAL (operands[2]) - 1)); - operands[2] = aarch64_simd_gen_const_vector_dup (<MODE>mode, - INTVAL (operands[2])); - emit_insn (gen_aarch64_rshrn<mode>_insn (operands[0], operands[1], - operands[2], shft)); - } - DONE; - } -) - (define_insn "aarch64_shrn2<mode>_insn_le" [(set (match_operand:<VNARROWQ2> 0 "register_operand" "=w") (vec_concat:<VNARROWQ2> @@ -6727,31 +6665,153 @@ [(set_attr "type" "neon_sat_shift_imm_narrow_q")] ) -(define_insn "aarch64_<sur>q<r>shr<u>n_n<mode>_insn<vczle><vczbe>" +(define_insn "*aarch64_<shrn_op>shrn_n<mode>_insn<vczle><vczbe>" [(set (match_operand:<VNARROWQ> 0 "register_operand" "=w") - (unspec:<VNARROWQ> - [(match_operand:VQN 1 "register_operand" "w") - (match_operand:VQN 2 "aarch64_simd_shift_imm_vec_<vn_mode>")] - VQSHRN_N))] + (ALL_TRUNC:<VNARROWQ> + (<TRUNC_SHIFT>:VQN + (match_operand:VQN 1 "register_operand" "w") + (match_operand:VQN 2 "aarch64_simd_shift_imm_vec_<vn_mode>"))))] "TARGET_SIMD" - "<sur>q<r>shr<u>n\\t%<vn2>0<Vmntype>, %<v>1<Vmtype>, %2" + "<shrn_op>shrn\t%<vn2>0<Vmntype>, %<v>1<Vmtype>, %2" [(set_attr "type" "neon_shift_imm_narrow_q")] ) -(define_expand "aarch64_<sur>q<r>shr<u>n_n<mode>" +(define_expand "aarch64_<shrn_op>shrn_n<mode>" + [(set (match_operand:<VNARROWQ> 0 "register_operand") + (ALL_TRUNC:<VNARROWQ> + (<TRUNC_SHIFT>:VQN + (match_operand:VQN 1 "register_operand") + (match_operand:SI 2 "aarch64_simd_shift_imm_offset_<ve_mode>"))))] + "TARGET_SIMD" + { + operands[2] = aarch64_simd_gen_const_vector_dup (<MODE>mode, + INTVAL (operands[2])); + } +) + +(define_insn "*aarch64_<shrn_op>rshrn_n<mode>_insn<vczle><vczbe>" [(set (match_operand:<VNARROWQ> 0 "register_operand" "=w") - (unspec:<VNARROWQ> [(match_operand:VQN 1 "register_operand") - (match_operand:SI 2 - "aarch64_simd_shift_imm_offset_<ve_mode>")] - VQSHRN_N))] + (ALL_TRUNC:<VNARROWQ> + (<TRUNC_SHIFT>:<V2XWIDE> + (plus:<V2XWIDE> + (<TRUNCEXTEND>:<V2XWIDE> + (match_operand:VQN 1 "register_operand" "w")) + (match_operand:<V2XWIDE> 3 "aarch64_simd_rsra_rnd_imm_vec")) + (match_operand:VQN 2 "aarch64_simd_shift_imm_vec_<vn_mode>"))))] + "TARGET_SIMD + && aarch64_const_vec_rnd_cst_p (operands[3], operands[2])" + "<shrn_op>rshrn\t%<vn2>0<Vmntype>, %<v>1<Vmtype>, %2" + [(set_attr "type" "neon_shift_imm_narrow_q")] +) + +(define_expand "aarch64_<shrn_op>rshrn_n<mode>" + [(set (match_operand:<VNARROWQ> 0 "register_operand") + (ALL_TRUNC:<VNARROWQ> + (<TRUNC_SHIFT>:<V2XWIDE> + (plus:<V2XWIDE> + (<TRUNCEXTEND>:<V2XWIDE> + (match_operand:VQN 1 "register_operand")) + (match_dup 3)) + (match_operand:SI 2 "aarch64_simd_shift_imm_offset_<ve_mode>"))))] + "TARGET_SIMD" + { + if (<CODE> == TRUNCATE + && INTVAL (operands[2]) == GET_MODE_UNIT_BITSIZE (<VNARROWQ>mode)) + { + rtx tmp0 = aarch64_gen_shareable_zero (<MODE>mode); + emit_insn (gen_aarch64_raddhn<mode> (operands[0], operands[1], tmp0)); + DONE; + } + /* Use this expander to create the rounding constant vector, which is + 1 << (shift - 1). Use wide_int here to ensure that the right TImode + RTL is generated when handling the DImode expanders. */ + int prec = GET_MODE_UNIT_PRECISION (<V2XWIDE>mode); + wide_int rnd_wi = wi::set_bit_in_zero (INTVAL (operands[2]) - 1, prec); + operands[3] = immed_wide_int_const (rnd_wi, GET_MODE_INNER (<V2XWIDE>mode)); + operands[3] = gen_const_vec_duplicate (<V2XWIDE>mode, operands[3]); + operands[2] = gen_const_vec_duplicate (<MODE>mode, operands[2]); + } +) + +(define_insn "*aarch64_sqshrun_n<mode>_insn<vczle><vczbe>" + [(set (match_operand:<VNARROWQ> 0 "register_operand" "=w") + (truncate:<VNARROWQ> + (smin:VQN + (smax:VQN + (ashiftrt:VQN + (match_operand:VQN 1 "register_operand" "w") + (match_operand:VQN 2 "aarch64_simd_shift_imm_vec_<vn_mode>")) + (match_operand:VQN 3 "aarch64_simd_imm_zero")) + (match_operand:VQN 4 "aarch64_simd_umax_half_mode"))))] + "TARGET_SIMD" + "sqshrun\t%<vn2>0<Vmntype>, %<v>1<Vmtype>, %2" + [(set_attr "type" "neon_shift_imm_narrow_q")] +) + +(define_expand "aarch64_sqshrun_n<mode>" + [(set (match_operand:<VNARROWQ> 0 "register_operand") + (truncate:<VNARROWQ> + (smin:VQN + (smax:VQN + (ashiftrt:VQN + (match_operand:VQN 1 "register_operand") + (match_operand:SI 2 "aarch64_simd_shift_imm_offset_<ve_mode>")) + (match_dup 3)) + (match_dup 4))))] "TARGET_SIMD" { operands[2] = aarch64_simd_gen_const_vector_dup (<MODE>mode, INTVAL (operands[2])); - emit_insn (gen_aarch64_<sur>q<r>shr<u>n_n<mode>_insn (operands[0], - operands[1], - operands[2])); - DONE; + operands[3] = CONST0_RTX (<MODE>mode); + operands[4] + = aarch64_simd_gen_const_vector_dup (<MODE>mode, + GET_MODE_MASK (GET_MODE_INNER (<VNARROWQ>mode))); + } +) + +(define_insn "*aarch64_sqrshrun_n<mode>_insn<vczle><vczbe>" + [(set (match_operand:<VNARROWQ> 0 "register_operand" "=w") + (truncate:<VNARROWQ> + (smin:<V2XWIDE> + (smax:<V2XWIDE> + (ashiftrt:<V2XWIDE> + (plus:<V2XWIDE> + (sign_extend:<V2XWIDE> + (match_operand:VQN 1 "register_operand" "w")) + (match_operand:<V2XWIDE> 3 "aarch64_simd_rsra_rnd_imm_vec")) + (match_operand:VQN 2 "aarch64_simd_shift_imm_vec_<vn_mode>")) + (match_operand:<V2XWIDE> 4 "aarch64_simd_imm_zero")) + (match_operand:<V2XWIDE> 5 "aarch64_simd_umax_quarter_mode"))))] + "TARGET_SIMD + && aarch64_const_vec_rnd_cst_p (operands[3], operands[2])" + "sqrshrun\t%<vn2>0<Vmntype>, %<v>1<Vmtype>, %2" + [(set_attr "type" "neon_shift_imm_narrow_q")] +) + +(define_expand "aarch64_sqrshrun_n<mode>" + [(set (match_operand:<VNARROWQ> 0 "register_operand") + (truncate:<VNARROWQ> + (smin:<V2XWIDE> + (smax:<V2XWIDE> + (ashiftrt:<V2XWIDE> + (plus:<V2XWIDE> + (sign_extend:<V2XWIDE> + (match_operand:VQN 1 "register_operand")) + (match_dup 3)) + (match_operand:SI 2 "aarch64_simd_shift_imm_offset_<ve_mode>")) + (match_dup 4)) + (match_dup 5))))] + "TARGET_SIMD" + { + int prec = GET_MODE_UNIT_PRECISION (<V2XWIDE>mode); + wide_int rnd_wi = wi::set_bit_in_zero (INTVAL (operands[2]) - 1, prec); + operands[3] = immed_wide_int_const (rnd_wi, GET_MODE_INNER (<V2XWIDE>mode)); + operands[3] = gen_const_vec_duplicate (<V2XWIDE>mode, operands[3]); + operands[2] = gen_const_vec_duplicate (<MODE>mode, operands[2]); + operands[4] = CONST0_RTX (<V2XWIDE>mode); + operands[5] + = gen_int_mode (GET_MODE_MASK (GET_MODE_INNER (<VNARROWQ>mode)), DImode); + operands[5] = gen_const_vec_duplicate (<V2XWIDE>mode, operands[5]); } ) diff --git a/gcc/config/aarch64/arm_neon.h b/gcc/config/aarch64/arm_neon.h index 0bb9839..2a46a31 100644 --- a/gcc/config/aarch64/arm_neon.h +++ b/gcc/config/aarch64/arm_neon.h @@ -4755,42 +4755,42 @@ __extension__ extern __inline int8x8_t __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) vshrn_n_s16 (int16x8_t __a, const int __b) { - return __builtin_aarch64_shrnv8hi (__a, __b); + return __builtin_aarch64_shrn_nv8hi (__a, __b); } __extension__ extern __inline int16x4_t __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) vshrn_n_s32 (int32x4_t __a, const int __b) { - return __builtin_aarch64_shrnv4si (__a, __b); + return __builtin_aarch64_shrn_nv4si (__a, __b); } __extension__ extern __inline int32x2_t __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) vshrn_n_s64 (int64x2_t __a, const int __b) { - return __builtin_aarch64_shrnv2di (__a, __b); + return __builtin_aarch64_shrn_nv2di (__a, __b); } __extension__ extern __inline uint8x8_t __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) vshrn_n_u16 (uint16x8_t __a, const int __b) { - return __builtin_aarch64_shrnv8hi_uus (__a, __b); + return __builtin_aarch64_shrn_nv8hi_uus (__a, __b); } __extension__ extern __inline uint16x4_t __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) vshrn_n_u32 (uint32x4_t __a, const int __b) { - return __builtin_aarch64_shrnv4si_uus (__a, __b); + return __builtin_aarch64_shrn_nv4si_uus (__a, __b); } __extension__ extern __inline uint32x2_t __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) vshrn_n_u64 (uint64x2_t __a, const int __b) { - return __builtin_aarch64_shrnv2di_uus (__a, __b); + return __builtin_aarch64_shrn_nv2di_uus (__a, __b); } __extension__ extern __inline int32x4_t @@ -5574,42 +5574,42 @@ __extension__ extern __inline int8x8_t __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) vrshrn_n_s16 (int16x8_t __a, const int __b) { - return __builtin_aarch64_rshrnv8hi (__a, __b); + return __builtin_aarch64_rshrn_nv8hi (__a, __b); } __extension__ extern __inline int16x4_t __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) vrshrn_n_s32 (int32x4_t __a, const int __b) { - return __builtin_aarch64_rshrnv4si (__a, __b); + return __builtin_aarch64_rshrn_nv4si (__a, __b); } __extension__ extern __inline int32x2_t __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) vrshrn_n_s64 (int64x2_t __a, const int __b) { - return __builtin_aarch64_rshrnv2di (__a, __b); + return __builtin_aarch64_rshrn_nv2di (__a, __b); } __extension__ extern __inline uint8x8_t __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) vrshrn_n_u16 (uint16x8_t __a, const int __b) { - return __builtin_aarch64_rshrnv8hi_uus (__a, __b); + return __builtin_aarch64_rshrn_nv8hi_uus (__a, __b); } __extension__ extern __inline uint16x4_t __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) vrshrn_n_u32 (uint32x4_t __a, const int __b) { - return __builtin_aarch64_rshrnv4si_uus (__a, __b); + return __builtin_aarch64_rshrn_nv4si_uus (__a, __b); } __extension__ extern __inline uint32x2_t __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) vrshrn_n_u64 (uint64x2_t __a, const int __b) { - return __builtin_aarch64_rshrnv2di_uus (__a, __b); + return __builtin_aarch64_rshrn_nv2di_uus (__a, __b); } __extension__ extern __inline uint32x2_t diff --git a/gcc/config/aarch64/iterators.md b/gcc/config/aarch64/iterators.md index 56ce125..e8c62c8 100644 --- a/gcc/config/aarch64/iterators.md +++ b/gcc/config/aarch64/iterators.md @@ -2254,6 +2254,8 @@ ;; Signed and unsigned saturating truncations. (define_code_iterator SAT_TRUNC [ss_truncate us_truncate]) +(define_code_iterator ALL_TRUNC [ss_truncate us_truncate truncate]) + ;; SVE integer unary operations. (define_code_iterator SVE_INT_UNARY [abs neg not clrsb clz popcount (ss_abs "TARGET_SVE2") @@ -2339,6 +2341,10 @@ (define_code_attr SHIFTEXTEND [(ashiftrt "sign_extend") (lshiftrt "zero_extend")]) +(define_code_attr TRUNCEXTEND [(ss_truncate "sign_extend") + (us_truncate "zero_extend") + (truncate "zero_extend")]) + ;; For comparison operators we use the FCM* and CM* instructions. ;; As there are no CMLE or CMLT instructions which act on 3 vector ;; operands, we must use CMGE or CMGT and swap the order of the @@ -2428,6 +2434,12 @@ (ss_minus "sign_extend") (us_minus "zero_extend")]) +(define_code_attr TRUNC_SHIFT [(ss_truncate "ashiftrt") + (us_truncate "lshiftrt") (truncate "lshiftrt")]) + +(define_code_attr shrn_op [(ss_truncate "sq") + (us_truncate "uq") (truncate "")]) + ;; Whether a shift is left or right. (define_code_attr lr [(ashift "l") (ashiftrt "r") (lshiftrt "r")]) diff --git a/gcc/config/aarch64/predicates.md b/gcc/config/aarch64/predicates.md index 9391aba..b31ba6e 100644 --- a/gcc/config/aarch64/predicates.md +++ b/gcc/config/aarch64/predicates.md @@ -599,10 +599,17 @@ (and (match_code "const_vector") (match_test "aarch64_const_vec_all_same_in_range_p (op, (HOST_WIDE_INT_1U - << (GET_MODE_UNIT_BITSIZE (mode) / 2)) - 1, + << (GET_MODE_UNIT_BITSIZE (mode) / 2)) - 1, (HOST_WIDE_INT_1U - << (GET_MODE_UNIT_BITSIZE (mode) / 2)) - 1)"))) + << (GET_MODE_UNIT_BITSIZE (mode) / 2)) - 1)"))) +(define_predicate "aarch64_simd_umax_quarter_mode" + (and (match_code "const_vector") + (match_test "aarch64_const_vec_all_same_in_range_p (op, + (HOST_WIDE_INT_1U + << (GET_MODE_UNIT_BITSIZE (mode) / 4)) - 1, + (HOST_WIDE_INT_1U + << (GET_MODE_UNIT_BITSIZE (mode) / 4)) - 1)"))) (define_predicate "aarch64_simd_shift_imm_vec_qi" (and (match_code "const_vector") (match_test "aarch64_const_vec_all_same_in_range_p (op, 1, 8)"))) |