aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
-rw-r--r--gcc/config/aarch64/aarch64-simd-builtins.def11
-rw-r--r--gcc/config/aarch64/aarch64-simd.md214
-rw-r--r--gcc/config/aarch64/arm_neon.h24
-rw-r--r--gcc/config/aarch64/iterators.md12
-rw-r--r--gcc/config/aarch64/predicates.md11
5 files changed, 174 insertions, 98 deletions
diff --git a/gcc/config/aarch64/aarch64-simd-builtins.def b/gcc/config/aarch64/aarch64-simd-builtins.def
index 94ff3f1..87af8f3 100644
--- a/gcc/config/aarch64/aarch64-simd-builtins.def
+++ b/gcc/config/aarch64/aarch64-simd-builtins.def
@@ -260,17 +260,15 @@
BUILTIN_VDQHS (TERNOP, mls_n, 0, NONE)
BUILTIN_VDQHS (TERNOPU, mls_n, 0, NONE)
- /* Implemented by aarch64_shrn<mode>". */
- BUILTIN_VQN (SHIFTIMM, shrn, 0, NONE)
- BUILTIN_VQN (USHIFTIMM, shrn, 0, NONE)
+ BUILTIN_VQN (SHIFTIMM, shrn_n, 0, NONE)
+ BUILTIN_VQN (USHIFTIMM, shrn_n, 0, NONE)
/* Implemented by aarch64_shrn2<mode>. */
BUILTIN_VQN (SHIFT2IMM, shrn2, 0, NONE)
BUILTIN_VQN (USHIFT2IMM, shrn2, 0, NONE)
- /* Implemented by aarch64_rshrn<mode>". */
- BUILTIN_VQN (SHIFTIMM, rshrn, 0, NONE)
- BUILTIN_VQN (USHIFTIMM, rshrn, 0, NONE)
+ BUILTIN_VQN (SHIFTIMM, rshrn_n, 0, NONE)
+ BUILTIN_VQN (USHIFTIMM, rshrn_n, 0, NONE)
/* Implemented by aarch64_rshrn2<mode>. */
BUILTIN_VQN (SHIFT2IMM, rshrn2, 0, NONE)
@@ -470,7 +468,6 @@
/* Implemented by aarch64_<sur>shll2_n<mode>. */
BUILTIN_VQW (SHIFTIMM, sshll2_n, 0, NONE)
BUILTIN_VQW (SHIFTIMM, ushll2_n, 0, NONE)
- /* Implemented by aarch64_<sur>q<r>shr<u>n_n<mode>. */
BUILTIN_VQN (SHIFTIMM, sqshrun_n, 0, NONE)
BUILTIN_VQN (SHIFTIMM, sqrshrun_n, 0, NONE)
BUILTIN_VQN (SHIFTIMM, sqshrn_n, 0, NONE)
diff --git a/gcc/config/aarch64/aarch64-simd.md b/gcc/config/aarch64/aarch64-simd.md
index 3286f42..8b92981 100644
--- a/gcc/config/aarch64/aarch64-simd.md
+++ b/gcc/config/aarch64/aarch64-simd.md
@@ -1896,16 +1896,6 @@
}
)
-(define_insn "*aarch64_<srn_op>shrn<mode><vczle><vczbe>"
- [(set (match_operand:<VNARROWQ> 0 "register_operand" "=w")
- (truncate:<VNARROWQ>
- (SHIFTRT:VQN (match_operand:VQN 1 "register_operand" "w")
- (match_operand:VQN 2 "aarch64_simd_shift_imm_vec_<vn_mode>"))))]
- "TARGET_SIMD"
- "shrn\\t%0.<Vntype>, %1.<Vtype>, %2"
- [(set_attr "type" "neon_shift_imm_narrow_q")]
-)
-
(define_insn "*aarch64_<srn_op>shrn<mode>2_vect_le"
[(set (match_operand:<VNARROWQ2> 0 "register_operand" "=w")
(vec_concat:<VNARROWQ2>
@@ -1958,58 +1948,6 @@
[(set_attr "type" "neon_permute<q>")]
)
-(define_expand "aarch64_shrn<mode>"
- [(set (match_operand:<VNARROWQ> 0 "register_operand")
- (truncate:<VNARROWQ>
- (lshiftrt:VQN (match_operand:VQN 1 "register_operand")
- (match_operand:SI 2 "aarch64_simd_shift_imm_offset_<vn_mode>"))))]
- "TARGET_SIMD"
- {
- operands[2] = aarch64_simd_gen_const_vector_dup (<MODE>mode,
- INTVAL (operands[2]));
- }
-)
-
-(define_insn "aarch64_rshrn<mode><vczle><vczbe>_insn"
- [(set (match_operand:<VNARROWQ> 0 "register_operand" "=w")
- (truncate:<VNARROWQ>
- (lshiftrt:VQN
- (plus:VQN (match_operand:VQN 1 "register_operand" "w")
- (match_operand:VQN 3 "aarch64_simd_rshrn_imm_vec"))
- (match_operand:VQN 2 "aarch64_simd_shift_imm_vec_<vn_mode>"))))]
- "TARGET_SIMD
- && INTVAL (CONST_VECTOR_ELT (operands[3], 0))
- == (HOST_WIDE_INT_1 << (INTVAL (CONST_VECTOR_ELT (operands[2], 0)) - 1))"
- "rshrn\\t%0.<Vntype>, %1.<Vtype>, %2"
- [(set_attr "type" "neon_shift_imm_narrow_q")]
-)
-
-(define_expand "aarch64_rshrn<mode>"
- [(match_operand:<VNARROWQ> 0 "register_operand")
- (match_operand:VQN 1 "register_operand")
- (match_operand:SI 2 "aarch64_simd_shift_imm_offset_<vn_mode>")]
- "TARGET_SIMD"
- {
- if (INTVAL (operands[2]) == GET_MODE_UNIT_BITSIZE (<VNARROWQ>mode))
- {
- rtx tmp0 = aarch64_gen_shareable_zero (<MODE>mode);
- emit_insn (gen_aarch64_raddhn<mode> (operands[0], operands[1], tmp0));
- }
- else
- {
- rtx shft
- = aarch64_simd_gen_const_vector_dup (<MODE>mode,
- HOST_WIDE_INT_1U
- << (INTVAL (operands[2]) - 1));
- operands[2] = aarch64_simd_gen_const_vector_dup (<MODE>mode,
- INTVAL (operands[2]));
- emit_insn (gen_aarch64_rshrn<mode>_insn (operands[0], operands[1],
- operands[2], shft));
- }
- DONE;
- }
-)
-
(define_insn "aarch64_shrn2<mode>_insn_le"
[(set (match_operand:<VNARROWQ2> 0 "register_operand" "=w")
(vec_concat:<VNARROWQ2>
@@ -6727,31 +6665,153 @@
[(set_attr "type" "neon_sat_shift_imm_narrow_q")]
)
-(define_insn "aarch64_<sur>q<r>shr<u>n_n<mode>_insn<vczle><vczbe>"
+(define_insn "*aarch64_<shrn_op>shrn_n<mode>_insn<vczle><vczbe>"
[(set (match_operand:<VNARROWQ> 0 "register_operand" "=w")
- (unspec:<VNARROWQ>
- [(match_operand:VQN 1 "register_operand" "w")
- (match_operand:VQN 2 "aarch64_simd_shift_imm_vec_<vn_mode>")]
- VQSHRN_N))]
+ (ALL_TRUNC:<VNARROWQ>
+ (<TRUNC_SHIFT>:VQN
+ (match_operand:VQN 1 "register_operand" "w")
+ (match_operand:VQN 2 "aarch64_simd_shift_imm_vec_<vn_mode>"))))]
"TARGET_SIMD"
- "<sur>q<r>shr<u>n\\t%<vn2>0<Vmntype>, %<v>1<Vmtype>, %2"
+ "<shrn_op>shrn\t%<vn2>0<Vmntype>, %<v>1<Vmtype>, %2"
[(set_attr "type" "neon_shift_imm_narrow_q")]
)
-(define_expand "aarch64_<sur>q<r>shr<u>n_n<mode>"
+(define_expand "aarch64_<shrn_op>shrn_n<mode>"
+ [(set (match_operand:<VNARROWQ> 0 "register_operand")
+ (ALL_TRUNC:<VNARROWQ>
+ (<TRUNC_SHIFT>:VQN
+ (match_operand:VQN 1 "register_operand")
+ (match_operand:SI 2 "aarch64_simd_shift_imm_offset_<ve_mode>"))))]
+ "TARGET_SIMD"
+ {
+ operands[2] = aarch64_simd_gen_const_vector_dup (<MODE>mode,
+ INTVAL (operands[2]));
+ }
+)
+
+(define_insn "*aarch64_<shrn_op>rshrn_n<mode>_insn<vczle><vczbe>"
[(set (match_operand:<VNARROWQ> 0 "register_operand" "=w")
- (unspec:<VNARROWQ> [(match_operand:VQN 1 "register_operand")
- (match_operand:SI 2
- "aarch64_simd_shift_imm_offset_<ve_mode>")]
- VQSHRN_N))]
+ (ALL_TRUNC:<VNARROWQ>
+ (<TRUNC_SHIFT>:<V2XWIDE>
+ (plus:<V2XWIDE>
+ (<TRUNCEXTEND>:<V2XWIDE>
+ (match_operand:VQN 1 "register_operand" "w"))
+ (match_operand:<V2XWIDE> 3 "aarch64_simd_rsra_rnd_imm_vec"))
+ (match_operand:VQN 2 "aarch64_simd_shift_imm_vec_<vn_mode>"))))]
+ "TARGET_SIMD
+ && aarch64_const_vec_rnd_cst_p (operands[3], operands[2])"
+ "<shrn_op>rshrn\t%<vn2>0<Vmntype>, %<v>1<Vmtype>, %2"
+ [(set_attr "type" "neon_shift_imm_narrow_q")]
+)
+
+(define_expand "aarch64_<shrn_op>rshrn_n<mode>"
+ [(set (match_operand:<VNARROWQ> 0 "register_operand")
+ (ALL_TRUNC:<VNARROWQ>
+ (<TRUNC_SHIFT>:<V2XWIDE>
+ (plus:<V2XWIDE>
+ (<TRUNCEXTEND>:<V2XWIDE>
+ (match_operand:VQN 1 "register_operand"))
+ (match_dup 3))
+ (match_operand:SI 2 "aarch64_simd_shift_imm_offset_<ve_mode>"))))]
+ "TARGET_SIMD"
+ {
+ if (<CODE> == TRUNCATE
+ && INTVAL (operands[2]) == GET_MODE_UNIT_BITSIZE (<VNARROWQ>mode))
+ {
+ rtx tmp0 = aarch64_gen_shareable_zero (<MODE>mode);
+ emit_insn (gen_aarch64_raddhn<mode> (operands[0], operands[1], tmp0));
+ DONE;
+ }
+ /* Use this expander to create the rounding constant vector, which is
+ 1 << (shift - 1). Use wide_int here to ensure that the right TImode
+ RTL is generated when handling the DImode expanders. */
+ int prec = GET_MODE_UNIT_PRECISION (<V2XWIDE>mode);
+ wide_int rnd_wi = wi::set_bit_in_zero (INTVAL (operands[2]) - 1, prec);
+ operands[3] = immed_wide_int_const (rnd_wi, GET_MODE_INNER (<V2XWIDE>mode));
+ operands[3] = gen_const_vec_duplicate (<V2XWIDE>mode, operands[3]);
+ operands[2] = gen_const_vec_duplicate (<MODE>mode, operands[2]);
+ }
+)
+
+(define_insn "*aarch64_sqshrun_n<mode>_insn<vczle><vczbe>"
+ [(set (match_operand:<VNARROWQ> 0 "register_operand" "=w")
+ (truncate:<VNARROWQ>
+ (smin:VQN
+ (smax:VQN
+ (ashiftrt:VQN
+ (match_operand:VQN 1 "register_operand" "w")
+ (match_operand:VQN 2 "aarch64_simd_shift_imm_vec_<vn_mode>"))
+ (match_operand:VQN 3 "aarch64_simd_imm_zero"))
+ (match_operand:VQN 4 "aarch64_simd_umax_half_mode"))))]
+ "TARGET_SIMD"
+ "sqshrun\t%<vn2>0<Vmntype>, %<v>1<Vmtype>, %2"
+ [(set_attr "type" "neon_shift_imm_narrow_q")]
+)
+
+(define_expand "aarch64_sqshrun_n<mode>"
+ [(set (match_operand:<VNARROWQ> 0 "register_operand")
+ (truncate:<VNARROWQ>
+ (smin:VQN
+ (smax:VQN
+ (ashiftrt:VQN
+ (match_operand:VQN 1 "register_operand")
+ (match_operand:SI 2 "aarch64_simd_shift_imm_offset_<ve_mode>"))
+ (match_dup 3))
+ (match_dup 4))))]
"TARGET_SIMD"
{
operands[2] = aarch64_simd_gen_const_vector_dup (<MODE>mode,
INTVAL (operands[2]));
- emit_insn (gen_aarch64_<sur>q<r>shr<u>n_n<mode>_insn (operands[0],
- operands[1],
- operands[2]));
- DONE;
+ operands[3] = CONST0_RTX (<MODE>mode);
+ operands[4]
+ = aarch64_simd_gen_const_vector_dup (<MODE>mode,
+ GET_MODE_MASK (GET_MODE_INNER (<VNARROWQ>mode)));
+ }
+)
+
+(define_insn "*aarch64_sqrshrun_n<mode>_insn<vczle><vczbe>"
+ [(set (match_operand:<VNARROWQ> 0 "register_operand" "=w")
+ (truncate:<VNARROWQ>
+ (smin:<V2XWIDE>
+ (smax:<V2XWIDE>
+ (ashiftrt:<V2XWIDE>
+ (plus:<V2XWIDE>
+ (sign_extend:<V2XWIDE>
+ (match_operand:VQN 1 "register_operand" "w"))
+ (match_operand:<V2XWIDE> 3 "aarch64_simd_rsra_rnd_imm_vec"))
+ (match_operand:VQN 2 "aarch64_simd_shift_imm_vec_<vn_mode>"))
+ (match_operand:<V2XWIDE> 4 "aarch64_simd_imm_zero"))
+ (match_operand:<V2XWIDE> 5 "aarch64_simd_umax_quarter_mode"))))]
+ "TARGET_SIMD
+ && aarch64_const_vec_rnd_cst_p (operands[3], operands[2])"
+ "sqrshrun\t%<vn2>0<Vmntype>, %<v>1<Vmtype>, %2"
+ [(set_attr "type" "neon_shift_imm_narrow_q")]
+)
+
+(define_expand "aarch64_sqrshrun_n<mode>"
+ [(set (match_operand:<VNARROWQ> 0 "register_operand")
+ (truncate:<VNARROWQ>
+ (smin:<V2XWIDE>
+ (smax:<V2XWIDE>
+ (ashiftrt:<V2XWIDE>
+ (plus:<V2XWIDE>
+ (sign_extend:<V2XWIDE>
+ (match_operand:VQN 1 "register_operand"))
+ (match_dup 3))
+ (match_operand:SI 2 "aarch64_simd_shift_imm_offset_<ve_mode>"))
+ (match_dup 4))
+ (match_dup 5))))]
+ "TARGET_SIMD"
+ {
+ int prec = GET_MODE_UNIT_PRECISION (<V2XWIDE>mode);
+ wide_int rnd_wi = wi::set_bit_in_zero (INTVAL (operands[2]) - 1, prec);
+ operands[3] = immed_wide_int_const (rnd_wi, GET_MODE_INNER (<V2XWIDE>mode));
+ operands[3] = gen_const_vec_duplicate (<V2XWIDE>mode, operands[3]);
+ operands[2] = gen_const_vec_duplicate (<MODE>mode, operands[2]);
+ operands[4] = CONST0_RTX (<V2XWIDE>mode);
+ operands[5]
+ = gen_int_mode (GET_MODE_MASK (GET_MODE_INNER (<VNARROWQ>mode)), DImode);
+ operands[5] = gen_const_vec_duplicate (<V2XWIDE>mode, operands[5]);
}
)
diff --git a/gcc/config/aarch64/arm_neon.h b/gcc/config/aarch64/arm_neon.h
index 0bb9839..2a46a31 100644
--- a/gcc/config/aarch64/arm_neon.h
+++ b/gcc/config/aarch64/arm_neon.h
@@ -4755,42 +4755,42 @@ __extension__ extern __inline int8x8_t
__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
vshrn_n_s16 (int16x8_t __a, const int __b)
{
- return __builtin_aarch64_shrnv8hi (__a, __b);
+ return __builtin_aarch64_shrn_nv8hi (__a, __b);
}
__extension__ extern __inline int16x4_t
__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
vshrn_n_s32 (int32x4_t __a, const int __b)
{
- return __builtin_aarch64_shrnv4si (__a, __b);
+ return __builtin_aarch64_shrn_nv4si (__a, __b);
}
__extension__ extern __inline int32x2_t
__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
vshrn_n_s64 (int64x2_t __a, const int __b)
{
- return __builtin_aarch64_shrnv2di (__a, __b);
+ return __builtin_aarch64_shrn_nv2di (__a, __b);
}
__extension__ extern __inline uint8x8_t
__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
vshrn_n_u16 (uint16x8_t __a, const int __b)
{
- return __builtin_aarch64_shrnv8hi_uus (__a, __b);
+ return __builtin_aarch64_shrn_nv8hi_uus (__a, __b);
}
__extension__ extern __inline uint16x4_t
__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
vshrn_n_u32 (uint32x4_t __a, const int __b)
{
- return __builtin_aarch64_shrnv4si_uus (__a, __b);
+ return __builtin_aarch64_shrn_nv4si_uus (__a, __b);
}
__extension__ extern __inline uint32x2_t
__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
vshrn_n_u64 (uint64x2_t __a, const int __b)
{
- return __builtin_aarch64_shrnv2di_uus (__a, __b);
+ return __builtin_aarch64_shrn_nv2di_uus (__a, __b);
}
__extension__ extern __inline int32x4_t
@@ -5574,42 +5574,42 @@ __extension__ extern __inline int8x8_t
__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
vrshrn_n_s16 (int16x8_t __a, const int __b)
{
- return __builtin_aarch64_rshrnv8hi (__a, __b);
+ return __builtin_aarch64_rshrn_nv8hi (__a, __b);
}
__extension__ extern __inline int16x4_t
__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
vrshrn_n_s32 (int32x4_t __a, const int __b)
{
- return __builtin_aarch64_rshrnv4si (__a, __b);
+ return __builtin_aarch64_rshrn_nv4si (__a, __b);
}
__extension__ extern __inline int32x2_t
__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
vrshrn_n_s64 (int64x2_t __a, const int __b)
{
- return __builtin_aarch64_rshrnv2di (__a, __b);
+ return __builtin_aarch64_rshrn_nv2di (__a, __b);
}
__extension__ extern __inline uint8x8_t
__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
vrshrn_n_u16 (uint16x8_t __a, const int __b)
{
- return __builtin_aarch64_rshrnv8hi_uus (__a, __b);
+ return __builtin_aarch64_rshrn_nv8hi_uus (__a, __b);
}
__extension__ extern __inline uint16x4_t
__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
vrshrn_n_u32 (uint32x4_t __a, const int __b)
{
- return __builtin_aarch64_rshrnv4si_uus (__a, __b);
+ return __builtin_aarch64_rshrn_nv4si_uus (__a, __b);
}
__extension__ extern __inline uint32x2_t
__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
vrshrn_n_u64 (uint64x2_t __a, const int __b)
{
- return __builtin_aarch64_rshrnv2di_uus (__a, __b);
+ return __builtin_aarch64_rshrn_nv2di_uus (__a, __b);
}
__extension__ extern __inline uint32x2_t
diff --git a/gcc/config/aarch64/iterators.md b/gcc/config/aarch64/iterators.md
index 56ce125..e8c62c8 100644
--- a/gcc/config/aarch64/iterators.md
+++ b/gcc/config/aarch64/iterators.md
@@ -2254,6 +2254,8 @@
;; Signed and unsigned saturating truncations.
(define_code_iterator SAT_TRUNC [ss_truncate us_truncate])
+(define_code_iterator ALL_TRUNC [ss_truncate us_truncate truncate])
+
;; SVE integer unary operations.
(define_code_iterator SVE_INT_UNARY [abs neg not clrsb clz popcount
(ss_abs "TARGET_SVE2")
@@ -2339,6 +2341,10 @@
(define_code_attr SHIFTEXTEND [(ashiftrt "sign_extend") (lshiftrt "zero_extend")])
+(define_code_attr TRUNCEXTEND [(ss_truncate "sign_extend")
+ (us_truncate "zero_extend")
+ (truncate "zero_extend")])
+
;; For comparison operators we use the FCM* and CM* instructions.
;; As there are no CMLE or CMLT instructions which act on 3 vector
;; operands, we must use CMGE or CMGT and swap the order of the
@@ -2428,6 +2434,12 @@
(ss_minus "sign_extend")
(us_minus "zero_extend")])
+(define_code_attr TRUNC_SHIFT [(ss_truncate "ashiftrt")
+ (us_truncate "lshiftrt") (truncate "lshiftrt")])
+
+(define_code_attr shrn_op [(ss_truncate "sq")
+ (us_truncate "uq") (truncate "")])
+
;; Whether a shift is left or right.
(define_code_attr lr [(ashift "l") (ashiftrt "r") (lshiftrt "r")])
diff --git a/gcc/config/aarch64/predicates.md b/gcc/config/aarch64/predicates.md
index 9391aba..b31ba6e 100644
--- a/gcc/config/aarch64/predicates.md
+++ b/gcc/config/aarch64/predicates.md
@@ -599,10 +599,17 @@
(and (match_code "const_vector")
(match_test "aarch64_const_vec_all_same_in_range_p (op,
(HOST_WIDE_INT_1U
- << (GET_MODE_UNIT_BITSIZE (mode) / 2)) - 1,
+ << (GET_MODE_UNIT_BITSIZE (mode) / 2)) - 1,
(HOST_WIDE_INT_1U
- << (GET_MODE_UNIT_BITSIZE (mode) / 2)) - 1)")))
+ << (GET_MODE_UNIT_BITSIZE (mode) / 2)) - 1)")))
+(define_predicate "aarch64_simd_umax_quarter_mode"
+ (and (match_code "const_vector")
+ (match_test "aarch64_const_vec_all_same_in_range_p (op,
+ (HOST_WIDE_INT_1U
+ << (GET_MODE_UNIT_BITSIZE (mode) / 4)) - 1,
+ (HOST_WIDE_INT_1U
+ << (GET_MODE_UNIT_BITSIZE (mode) / 4)) - 1)")))
(define_predicate "aarch64_simd_shift_imm_vec_qi"
(and (match_code "const_vector")
(match_test "aarch64_const_vec_all_same_in_range_p (op, 1, 8)")))