aboutsummaryrefslogtreecommitdiff
path: root/gcc/config/aarch64/aarch64-sve.md
diff options
context:
space:
mode:
Diffstat (limited to 'gcc/config/aarch64/aarch64-sve.md')
-rw-r--r--gcc/config/aarch64/aarch64-sve.md921
1 files changed, 747 insertions, 174 deletions
diff --git a/gcc/config/aarch64/aarch64-sve.md b/gcc/config/aarch64/aarch64-sve.md
index b252eef..88d323a 100644
--- a/gcc/config/aarch64/aarch64-sve.md
+++ b/gcc/config/aarch64/aarch64-sve.md
@@ -2990,10 +2990,7 @@
(vec_duplicate:PRED_ALL (match_operand:QI 1 "register_operand")))]
"TARGET_SVE"
{
- rtx tmp = gen_reg_rtx (DImode);
- rtx op1 = gen_lowpart (DImode, operands[1]);
- emit_insn (gen_ashldi3 (tmp, op1, gen_int_mode (63, DImode)));
- emit_insn (gen_while_ultdi<mode> (operands[0], const0_rtx, tmp));
+ aarch64_emit_sve_pred_vec_duplicate (<MODE>mode, operands[0], operands[1]);
DONE;
}
)
@@ -5605,18 +5602,21 @@
;; Predicated floating-point operations with merging.
(define_expand "@cond_<optab><mode>"
- [(set (match_operand:SVE_FULL_F_B16B16 0 "register_operand")
- (unspec:SVE_FULL_F_B16B16
+ [(set (match_operand:SVE_F_B16B16 0 "register_operand")
+ (unspec:SVE_F_B16B16
[(match_operand:<VPRED> 1 "register_operand")
- (unspec:SVE_FULL_F_B16B16
+ (unspec:SVE_F_B16B16
[(match_dup 1)
(const_int SVE_STRICT_GP)
- (match_operand:SVE_FULL_F_B16B16 2 "<sve_pred_fp_rhs1_operand>")
- (match_operand:SVE_FULL_F_B16B16 3 "<sve_pred_fp_rhs2_operand>")]
+ (match_operand:SVE_F_B16B16 2 "<sve_pred_fp_rhs1_operand>")
+ (match_operand:SVE_F_B16B16 3 "<sve_pred_fp_rhs2_operand>")]
SVE_COND_FP_BINARY)
- (match_operand:SVE_FULL_F_B16B16 4 "aarch64_simd_reg_or_zero")]
+ (match_operand:SVE_F_B16B16 4 "aarch64_simd_reg_or_zero")]
UNSPEC_SEL))]
"TARGET_SVE && (<supports_bf16> || !<is_bf16>)"
+ {
+ operands[1] = aarch64_sve_emit_masked_fp_pred (<MODE>mode, operands[1]);
+ }
)
;; Predicated floating-point operations, merging with the first input.
@@ -5644,14 +5644,14 @@
)
(define_insn "*cond_<optab><mode>_2_strict"
- [(set (match_operand:SVE_FULL_F_B16B16 0 "register_operand")
- (unspec:SVE_FULL_F_B16B16
- [(match_operand:<VPRED> 1 "register_operand")
- (unspec:SVE_FULL_F_B16B16
+ [(set (match_operand:SVE_F_B16B16 0 "register_operand")
+ (unspec:SVE_F_B16B16
+ [(match_operand:<VPRED> 1 "aarch64_predicate_operand")
+ (unspec:SVE_F_B16B16
[(match_dup 1)
(const_int SVE_STRICT_GP)
- (match_operand:SVE_FULL_F_B16B16 2 "register_operand")
- (match_operand:SVE_FULL_F_B16B16 3 "register_operand")]
+ (match_operand:SVE_F_B16B16 2 "register_operand")
+ (match_operand:SVE_F_B16B16 3 "register_operand")]
SVE_COND_FP_BINARY)
(match_dup 2)]
UNSPEC_SEL))]
@@ -5687,14 +5687,14 @@
)
(define_insn "*cond_<optab><mode>_2_const_strict"
- [(set (match_operand:SVE_FULL_F 0 "register_operand")
- (unspec:SVE_FULL_F
- [(match_operand:<VPRED> 1 "register_operand")
- (unspec:SVE_FULL_F
+ [(set (match_operand:SVE_F 0 "register_operand")
+ (unspec:SVE_F
+ [(match_operand:<VPRED> 1 "aarch64_predicate_operand")
+ (unspec:SVE_F
[(match_dup 1)
(const_int SVE_STRICT_GP)
- (match_operand:SVE_FULL_F 2 "register_operand")
- (match_operand:SVE_FULL_F 3 "<sve_pred_fp_rhs2_immediate>")]
+ (match_operand:SVE_F 2 "register_operand")
+ (match_operand:SVE_F 3 "<sve_pred_fp_rhs2_immediate>")]
SVE_COND_FP_BINARY_I1)
(match_dup 2)]
UNSPEC_SEL))]
@@ -5730,14 +5730,14 @@
)
(define_insn "*cond_<optab><mode>_3_strict"
- [(set (match_operand:SVE_FULL_F_B16B16 0 "register_operand")
- (unspec:SVE_FULL_F_B16B16
- [(match_operand:<VPRED> 1 "register_operand")
- (unspec:SVE_FULL_F_B16B16
+ [(set (match_operand:SVE_F_B16B16 0 "register_operand")
+ (unspec:SVE_F_B16B16
+ [(match_operand:<VPRED> 1 "aarch64_predicate_operand")
+ (unspec:SVE_F_B16B16
[(match_dup 1)
(const_int SVE_STRICT_GP)
- (match_operand:SVE_FULL_F_B16B16 2 "register_operand")
- (match_operand:SVE_FULL_F_B16B16 3 "register_operand")]
+ (match_operand:SVE_F_B16B16 2 "register_operand")
+ (match_operand:SVE_F_B16B16 3 "register_operand")]
SVE_COND_FP_BINARY)
(match_dup 3)]
UNSPEC_SEL))]
@@ -5794,16 +5794,16 @@
)
(define_insn_and_rewrite "*cond_<optab><mode>_any_strict"
- [(set (match_operand:SVE_FULL_F_B16B16 0 "register_operand")
- (unspec:SVE_FULL_F_B16B16
- [(match_operand:<VPRED> 1 "register_operand")
- (unspec:SVE_FULL_F_B16B16
+ [(set (match_operand:SVE_F_B16B16 0 "register_operand")
+ (unspec:SVE_F_B16B16
+ [(match_operand:<VPRED> 1 "aarch64_predicate_operand")
+ (unspec:SVE_F_B16B16
[(match_dup 1)
(const_int SVE_STRICT_GP)
- (match_operand:SVE_FULL_F_B16B16 2 "register_operand")
- (match_operand:SVE_FULL_F_B16B16 3 "register_operand")]
+ (match_operand:SVE_F_B16B16 2 "register_operand")
+ (match_operand:SVE_F_B16B16 3 "register_operand")]
SVE_COND_FP_BINARY)
- (match_operand:SVE_FULL_F_B16B16 4 "aarch64_simd_reg_or_zero")]
+ (match_operand:SVE_F_B16B16 4 "aarch64_simd_reg_or_zero")]
UNSPEC_SEL))]
"TARGET_SVE
&& (<supports_bf16> || !<is_bf16>)
@@ -5868,16 +5868,16 @@
)
(define_insn_and_rewrite "*cond_<optab><mode>_any_const_strict"
- [(set (match_operand:SVE_FULL_F 0 "register_operand")
- (unspec:SVE_FULL_F
- [(match_operand:<VPRED> 1 "register_operand")
- (unspec:SVE_FULL_F
+ [(set (match_operand:SVE_F 0 "register_operand")
+ (unspec:SVE_F
+ [(match_operand:<VPRED> 1 "aarch64_predicate_operand")
+ (unspec:SVE_F
[(match_dup 1)
(const_int SVE_STRICT_GP)
- (match_operand:SVE_FULL_F 2 "register_operand")
- (match_operand:SVE_FULL_F 3 "<sve_pred_fp_rhs2_immediate>")]
+ (match_operand:SVE_F 2 "register_operand")
+ (match_operand:SVE_F 3 "<sve_pred_fp_rhs2_immediate>")]
SVE_COND_FP_BINARY_I1)
- (match_operand:SVE_FULL_F 4 "aarch64_simd_reg_or_zero")]
+ (match_operand:SVE_F 4 "aarch64_simd_reg_or_zero")]
UNSPEC_SEL))]
"TARGET_SVE && !rtx_equal_p (operands[2], operands[4])"
{@ [ cons: =0 , 1 , 2 , 4 ]
@@ -5953,14 +5953,14 @@
)
(define_insn "*cond_add<mode>_2_const_strict"
- [(set (match_operand:SVE_FULL_F 0 "register_operand")
- (unspec:SVE_FULL_F
- [(match_operand:<VPRED> 1 "register_operand")
- (unspec:SVE_FULL_F
+ [(set (match_operand:SVE_F 0 "register_operand")
+ (unspec:SVE_F
+ [(match_operand:<VPRED> 1 "aarch64_predicate_operand")
+ (unspec:SVE_F
[(match_dup 1)
(const_int SVE_STRICT_GP)
- (match_operand:SVE_FULL_F 2 "register_operand")
- (match_operand:SVE_FULL_F 3 "aarch64_sve_float_arith_with_sub_immediate")]
+ (match_operand:SVE_F 2 "register_operand")
+ (match_operand:SVE_F 3 "aarch64_sve_float_arith_with_sub_immediate")]
UNSPEC_COND_FADD)
(match_dup 2)]
UNSPEC_SEL))]
@@ -6015,16 +6015,16 @@
)
(define_insn_and_rewrite "*cond_add<mode>_any_const_strict"
- [(set (match_operand:SVE_FULL_F 0 "register_operand")
- (unspec:SVE_FULL_F
- [(match_operand:<VPRED> 1 "register_operand")
- (unspec:SVE_FULL_F
+ [(set (match_operand:SVE_F 0 "register_operand")
+ (unspec:SVE_F
+ [(match_operand:<VPRED> 1 "aarch64_predicate_operand")
+ (unspec:SVE_F
[(match_dup 1)
(const_int SVE_STRICT_GP)
- (match_operand:SVE_FULL_F 2 "register_operand")
- (match_operand:SVE_FULL_F 3 "aarch64_sve_float_arith_with_sub_immediate")]
+ (match_operand:SVE_F 2 "register_operand")
+ (match_operand:SVE_F 3 "aarch64_sve_float_arith_with_sub_immediate")]
UNSPEC_COND_FADD)
- (match_operand:SVE_FULL_F 4 "aarch64_simd_reg_or_zero")]
+ (match_operand:SVE_F 4 "aarch64_simd_reg_or_zero")]
UNSPEC_SEL))]
"TARGET_SVE && !rtx_equal_p (operands[2], operands[4])"
{@ [ cons: =0 , 1 , 2 , 3 , 4 ]
@@ -6266,14 +6266,14 @@
)
(define_insn "*cond_sub<mode>_3_const_strict"
- [(set (match_operand:SVE_FULL_F 0 "register_operand")
- (unspec:SVE_FULL_F
- [(match_operand:<VPRED> 1 "register_operand")
- (unspec:SVE_FULL_F
+ [(set (match_operand:SVE_F 0 "register_operand")
+ (unspec:SVE_F
+ [(match_operand:<VPRED> 1 "aarch64_predicate_operand")
+ (unspec:SVE_F
[(match_dup 1)
(const_int SVE_STRICT_GP)
- (match_operand:SVE_FULL_F 2 "aarch64_sve_float_arith_immediate")
- (match_operand:SVE_FULL_F 3 "register_operand")]
+ (match_operand:SVE_F 2 "aarch64_sve_float_arith_immediate")
+ (match_operand:SVE_F 3 "register_operand")]
UNSPEC_COND_FSUB)
(match_dup 3)]
UNSPEC_SEL))]
@@ -6323,16 +6323,16 @@
)
(define_insn_and_rewrite "*cond_sub<mode>_const_strict"
- [(set (match_operand:SVE_FULL_F 0 "register_operand")
- (unspec:SVE_FULL_F
- [(match_operand:<VPRED> 1 "register_operand")
- (unspec:SVE_FULL_F
+ [(set (match_operand:SVE_F 0 "register_operand")
+ (unspec:SVE_F
+ [(match_operand:<VPRED> 1 "aarch64_predicate_operand")
+ (unspec:SVE_F
[(match_dup 1)
(const_int SVE_STRICT_GP)
- (match_operand:SVE_FULL_F 2 "aarch64_sve_float_arith_immediate")
- (match_operand:SVE_FULL_F 3 "register_operand")]
+ (match_operand:SVE_F 2 "aarch64_sve_float_arith_immediate")
+ (match_operand:SVE_F 3 "register_operand")]
UNSPEC_COND_FSUB)
- (match_operand:SVE_FULL_F 4 "aarch64_simd_reg_or_zero")]
+ (match_operand:SVE_F 4 "aarch64_simd_reg_or_zero")]
UNSPEC_SEL))]
"TARGET_SVE && !rtx_equal_p (operands[3], operands[4])"
{@ [ cons: =0 , 1 , 3 , 4 ]
@@ -6913,7 +6913,7 @@
;; Predicate AND. We can reuse one of the inputs as the GP.
;; Doubling the second operand is the preferred implementation
;; of the MOV alias, so we use that instead of %1/z, %1, %2.
-(define_insn "and<mode>3"
+(define_insn "@and<mode>3"
[(set (match_operand:PRED_ALL 0 "register_operand")
(and:PRED_ALL (match_operand:PRED_ALL 1 "register_operand")
(match_operand:PRED_ALL 2 "register_operand")))]
@@ -7595,29 +7595,29 @@
;; Unpredicated floating-point ternary operations.
(define_expand "<optab><mode>4"
- [(set (match_operand:SVE_FULL_F_B16B16 0 "register_operand")
- (unspec:SVE_FULL_F_B16B16
+ [(set (match_operand:SVE_F_B16B16 0 "register_operand")
+ (unspec:SVE_F_B16B16
[(match_dup 4)
- (const_int SVE_RELAXED_GP)
- (match_operand:SVE_FULL_F_B16B16 1 "register_operand")
- (match_operand:SVE_FULL_F_B16B16 2 "register_operand")
- (match_operand:SVE_FULL_F_B16B16 3 "register_operand")]
+ (match_dup 5)
+ (match_operand:SVE_F_B16B16 1 "register_operand")
+ (match_operand:SVE_F_B16B16 2 "register_operand")
+ (match_operand:SVE_F_B16B16 3 "register_operand")]
SVE_COND_FP_TERNARY))]
"TARGET_SVE && (<supports_bf16> || !<is_bf16>)"
{
- operands[4] = aarch64_ptrue_reg (<VPRED>mode);
+ operands[4] = aarch64_sve_fp_pred (<MODE>mode, &operands[5]);
}
)
;; Predicated floating-point ternary operations.
(define_insn "@aarch64_pred_<optab><mode>"
- [(set (match_operand:SVE_FULL_F_B16B16 0 "register_operand")
- (unspec:SVE_FULL_F_B16B16
- [(match_operand:<VPRED> 1 "register_operand")
+ [(set (match_operand:SVE_F_B16B16 0 "register_operand")
+ (unspec:SVE_F_B16B16
+ [(match_operand:<VPRED> 1 "aarch64_predicate_operand")
(match_operand:SI 5 "aarch64_sve_gp_strictness")
- (match_operand:SVE_FULL_F_B16B16 2 "register_operand")
- (match_operand:SVE_FULL_F_B16B16 3 "register_operand")
- (match_operand:SVE_FULL_F_B16B16 4 "register_operand")]
+ (match_operand:SVE_F_B16B16 2 "register_operand")
+ (match_operand:SVE_F_B16B16 3 "register_operand")
+ (match_operand:SVE_F_B16B16 4 "register_operand")]
SVE_COND_FP_TERNARY))]
"TARGET_SVE && (<supports_bf16> || !<is_bf16>)"
{@ [ cons: =0 , 1 , %2 , 3 , 4 ; attrs: movprfx , is_rev ]
@@ -7631,17 +7631,17 @@
;; Predicated floating-point ternary operations with merging.
(define_expand "@cond_<optab><mode>"
- [(set (match_operand:SVE_FULL_F_B16B16 0 "register_operand")
- (unspec:SVE_FULL_F_B16B16
+ [(set (match_operand:SVE_F_B16B16 0 "register_operand")
+ (unspec:SVE_F_B16B16
[(match_operand:<VPRED> 1 "register_operand")
- (unspec:SVE_FULL_F_B16B16
+ (unspec:SVE_F_B16B16
[(match_dup 1)
(const_int SVE_STRICT_GP)
- (match_operand:SVE_FULL_F_B16B16 2 "register_operand")
- (match_operand:SVE_FULL_F_B16B16 3 "register_operand")
- (match_operand:SVE_FULL_F_B16B16 4 "register_operand")]
+ (match_operand:SVE_F_B16B16 2 "register_operand")
+ (match_operand:SVE_F_B16B16 3 "register_operand")
+ (match_operand:SVE_F_B16B16 4 "register_operand")]
SVE_COND_FP_TERNARY)
- (match_operand:SVE_FULL_F_B16B16 5 "aarch64_simd_reg_or_zero")]
+ (match_operand:SVE_F_B16B16 5 "aarch64_simd_reg_or_zero")]
UNSPEC_SEL))]
"TARGET_SVE && (<supports_bf16> || !<is_bf16>)"
{
@@ -7649,20 +7649,22 @@
second of the two. */
if (rtx_equal_p (operands[3], operands[5]))
std::swap (operands[2], operands[3]);
+
+ operands[1] = aarch64_sve_emit_masked_fp_pred (<MODE>mode, operands[1]);
})
;; Predicated floating-point ternary operations, merging with the
;; first input.
(define_insn_and_rewrite "*cond_<optab><mode>_2_relaxed"
- [(set (match_operand:SVE_FULL_F 0 "register_operand")
- (unspec:SVE_FULL_F
+ [(set (match_operand:SVE_F 0 "register_operand")
+ (unspec:SVE_F
[(match_operand:<VPRED> 1 "register_operand")
- (unspec:SVE_FULL_F
+ (unspec:SVE_F
[(match_operand 5)
(const_int SVE_RELAXED_GP)
- (match_operand:SVE_FULL_F 2 "register_operand")
- (match_operand:SVE_FULL_F 3 "register_operand")
- (match_operand:SVE_FULL_F 4 "register_operand")]
+ (match_operand:SVE_F 2 "register_operand")
+ (match_operand:SVE_F 3 "register_operand")
+ (match_operand:SVE_F 4 "register_operand")]
SVE_COND_FP_TERNARY)
(match_dup 2)]
UNSPEC_SEL))]
@@ -7678,15 +7680,15 @@
)
(define_insn "*cond_<optab><mode>_2_strict"
- [(set (match_operand:SVE_FULL_F 0 "register_operand")
- (unspec:SVE_FULL_F
- [(match_operand:<VPRED> 1 "register_operand")
- (unspec:SVE_FULL_F
+ [(set (match_operand:SVE_F 0 "register_operand")
+ (unspec:SVE_F
+ [(match_operand:<VPRED> 1 "aarch64_predicate_operand")
+ (unspec:SVE_F
[(match_dup 1)
(const_int SVE_STRICT_GP)
- (match_operand:SVE_FULL_F 2 "register_operand")
- (match_operand:SVE_FULL_F 3 "register_operand")
- (match_operand:SVE_FULL_F 4 "register_operand")]
+ (match_operand:SVE_F 2 "register_operand")
+ (match_operand:SVE_F 3 "register_operand")
+ (match_operand:SVE_F 4 "register_operand")]
SVE_COND_FP_TERNARY)
(match_dup 2)]
UNSPEC_SEL))]
@@ -7700,15 +7702,15 @@
;; Predicated floating-point ternary operations, merging with the
;; third input.
(define_insn_and_rewrite "*cond_<optab><mode>_4_relaxed"
- [(set (match_operand:SVE_FULL_F_B16B16 0 "register_operand")
- (unspec:SVE_FULL_F_B16B16
+ [(set (match_operand:SVE_F_B16B16 0 "register_operand")
+ (unspec:SVE_F_B16B16
[(match_operand:<VPRED> 1 "register_operand")
- (unspec:SVE_FULL_F_B16B16
+ (unspec:SVE_F_B16B16
[(match_operand 5)
(const_int SVE_RELAXED_GP)
- (match_operand:SVE_FULL_F_B16B16 2 "register_operand")
- (match_operand:SVE_FULL_F_B16B16 3 "register_operand")
- (match_operand:SVE_FULL_F_B16B16 4 "register_operand")]
+ (match_operand:SVE_F_B16B16 2 "register_operand")
+ (match_operand:SVE_F_B16B16 3 "register_operand")
+ (match_operand:SVE_F_B16B16 4 "register_operand")]
SVE_COND_FP_TERNARY)
(match_dup 4)]
UNSPEC_SEL))]
@@ -7724,15 +7726,15 @@
)
(define_insn "*cond_<optab><mode>_4_strict"
- [(set (match_operand:SVE_FULL_F_B16B16 0 "register_operand")
- (unspec:SVE_FULL_F_B16B16
- [(match_operand:<VPRED> 1 "register_operand")
- (unspec:SVE_FULL_F_B16B16
+ [(set (match_operand:SVE_F_B16B16 0 "register_operand")
+ (unspec:SVE_F_B16B16
+ [(match_operand:<VPRED> 1 "aarch64_predicate_operand")
+ (unspec:SVE_F_B16B16
[(match_dup 1)
(const_int SVE_STRICT_GP)
- (match_operand:SVE_FULL_F_B16B16 2 "register_operand")
- (match_operand:SVE_FULL_F_B16B16 3 "register_operand")
- (match_operand:SVE_FULL_F_B16B16 4 "register_operand")]
+ (match_operand:SVE_F_B16B16 2 "register_operand")
+ (match_operand:SVE_F_B16B16 3 "register_operand")
+ (match_operand:SVE_F_B16B16 4 "register_operand")]
SVE_COND_FP_TERNARY)
(match_dup 4)]
UNSPEC_SEL))]
@@ -7746,17 +7748,17 @@
;; Predicated floating-point ternary operations, merging with an
;; independent value.
(define_insn_and_rewrite "*cond_<optab><mode>_any_relaxed"
- [(set (match_operand:SVE_FULL_F_B16B16 0 "register_operand")
- (unspec:SVE_FULL_F_B16B16
+ [(set (match_operand:SVE_F_B16B16 0 "register_operand")
+ (unspec:SVE_F_B16B16
[(match_operand:<VPRED> 1 "register_operand")
- (unspec:SVE_FULL_F_B16B16
+ (unspec:SVE_F_B16B16
[(match_operand 6)
(const_int SVE_RELAXED_GP)
- (match_operand:SVE_FULL_F_B16B16 2 "register_operand")
- (match_operand:SVE_FULL_F_B16B16 3 "register_operand")
- (match_operand:SVE_FULL_F_B16B16 4 "register_operand")]
+ (match_operand:SVE_F_B16B16 2 "register_operand")
+ (match_operand:SVE_F_B16B16 3 "register_operand")
+ (match_operand:SVE_F_B16B16 4 "register_operand")]
SVE_COND_FP_TERNARY)
- (match_operand:SVE_FULL_F_B16B16 5 "aarch64_simd_reg_or_zero")]
+ (match_operand:SVE_F_B16B16 5 "aarch64_simd_reg_or_zero")]
UNSPEC_SEL))]
"TARGET_SVE
&& (<supports_bf16> || !<is_bf16>)
@@ -7792,17 +7794,17 @@
)
(define_insn_and_rewrite "*cond_<optab><mode>_any_strict"
- [(set (match_operand:SVE_FULL_F_B16B16 0 "register_operand")
- (unspec:SVE_FULL_F_B16B16
- [(match_operand:<VPRED> 1 "register_operand")
- (unspec:SVE_FULL_F_B16B16
+ [(set (match_operand:SVE_F_B16B16 0 "register_operand")
+ (unspec:SVE_F_B16B16
+ [(match_operand:<VPRED> 1 "aarch64_predicate_operand")
+ (unspec:SVE_F_B16B16
[(match_dup 1)
(const_int SVE_STRICT_GP)
- (match_operand:SVE_FULL_F_B16B16 2 "register_operand")
- (match_operand:SVE_FULL_F_B16B16 3 "register_operand")
- (match_operand:SVE_FULL_F_B16B16 4 "register_operand")]
+ (match_operand:SVE_F_B16B16 2 "register_operand")
+ (match_operand:SVE_F_B16B16 3 "register_operand")
+ (match_operand:SVE_F_B16B16 4 "register_operand")]
SVE_COND_FP_TERNARY)
- (match_operand:SVE_FULL_F_B16B16 5 "aarch64_simd_reg_or_zero")]
+ (match_operand:SVE_F_B16B16 5 "aarch64_simd_reg_or_zero")]
UNSPEC_SEL))]
"TARGET_SVE
&& (<supports_bf16> || !<is_bf16>)
@@ -8201,20 +8203,23 @@
;;
;; For unpacked vectors, it doesn't really matter whether SEL uses the
;; the container size or the element size. If SEL used the container size,
-;; it would ignore undefined bits of the predicate but would copy the
-;; upper (undefined) bits of each container along with the defined bits.
-;; If SEL used the element size, it would use undefined bits of the predicate
-;; to select between undefined elements in each input vector. Thus the only
-;; difference is whether the undefined bits in a container always come from
-;; the same input as the defined bits, or whether the choice can vary
-;; independently of the defined bits.
+;; it would would copy the upper (undefined) bits of each container along
+;; with the corresponding defined bits. If SEL used the element size,
+;; it would use separate predicate bits to select between the undefined
+;; elements in each input vector; these seperate predicate bits might
+;; themselves be undefined, depending on the mode of the predicate.
+;;
+;; Thus the only difference is whether the undefined bits in a container
+;; always come from the same input as the defined bits, or whether the
+;; choice can vary independently of the defined bits.
;;
;; For the other instructions, using the element size is more natural,
;; so we do that for SEL as well.
+;;
(define_insn "*vcond_mask_<mode><vpred>"
[(set (match_operand:SVE_ALL 0 "register_operand")
(unspec:SVE_ALL
- [(match_operand:<VPRED> 3 "register_operand")
+ [(match_operand:<VPRED> 3 "aarch64_predicate_operand")
(match_operand:SVE_ALL 1 "aarch64_sve_reg_or_dup_imm")
(match_operand:SVE_ALL 2 "aarch64_simd_reg_or_zero")]
UNSPEC_SEL))]
@@ -8353,6 +8358,71 @@
}
)
+;; Likewise, but yield a VNx16BI result regardless of the element width.
+;; The .b case is equivalent to the above.
+(define_expand "@aarch64_pred_cmp<cmp_op><mode>_acle"
+ [(parallel
+ [(set (match_operand:<VPRED> 0 "register_operand")
+ (unspec:<VPRED>
+ [(match_operand:<VPRED> 1 "register_operand")
+ (match_operand:SI 2 "aarch64_sve_ptrue_flag")
+ (SVE_INT_CMP:<VPRED>
+ (match_operand:VNx16QI_ONLY 3 "register_operand")
+ (match_operand:VNx16QI_ONLY 4 "aarch64_sve_cmp_<sve_imm_con>_operand"))]
+ UNSPEC_PRED_Z))
+ (clobber (reg:CC_NZC CC_REGNUM))])]
+ "TARGET_SVE"
+)
+
+;; For wider elements, bitcast the predicate result to a VNx16BI and use
+;; an (and ...) to indicate that only every second, fourth, or eighth bit
+;; is set.
+(define_expand "@aarch64_pred_cmp<cmp_op><mode>_acle"
+ [(parallel
+ [(set (match_operand:VNx16BI 0 "register_operand")
+ (and:VNx16BI
+ (subreg:VNx16BI
+ (unspec:<VPRED>
+ [(match_operand:<VPRED> 1 "register_operand")
+ (match_operand:SI 2 "aarch64_sve_ptrue_flag")
+ (SVE_INT_CMP:<VPRED>
+ (match_operand:SVE_FULL_HSDI 3 "register_operand")
+ (match_operand:SVE_FULL_HSDI 4 "aarch64_sve_cmp_<sve_imm_con>_operand"))]
+ UNSPEC_PRED_Z)
+ 0)
+ (match_dup 5)))
+ (clobber (reg:CC_NZC CC_REGNUM))])]
+ "TARGET_SVE"
+ {
+ operands[5] = aarch64_ptrue_all (GET_MODE_UNIT_SIZE (<MODE>mode));
+ }
+)
+
+(define_insn "*aarch64_pred_cmp<cmp_op><mode>_acle"
+ [(set (match_operand:VNx16BI 0 "register_operand")
+ (and:VNx16BI
+ (subreg:VNx16BI
+ (unspec:<VPRED>
+ [(match_operand:<VPRED> 1 "register_operand")
+ (match_operand:SI 2 "aarch64_sve_ptrue_flag")
+ (SVE_INT_CMP:<VPRED>
+ (match_operand:SVE_FULL_HSDI 3 "register_operand")
+ (match_operand:SVE_FULL_HSDI 4 "aarch64_sve_cmp_<sve_imm_con>_operand"))]
+ UNSPEC_PRED_Z)
+ 0)
+ (match_operand:<VPRED> 5 "aarch64_ptrue_all_operand")))
+ (clobber (reg:CC_NZC CC_REGNUM))]
+ "TARGET_SVE"
+ {@ [ cons: =0 , 1 , 3 , 4 ; attrs: pred_clobber ]
+ [ &Upa , Upl, w , <sve_imm_con>; yes ] cmp<cmp_op>\t%0.<Vetype>, %1/z, %3.<Vetype>, #%4
+ [ ?Upl , 0 , w , <sve_imm_con>; yes ] ^
+ [ Upa , Upl, w , <sve_imm_con>; no ] ^
+ [ &Upa , Upl, w , w ; yes ] cmp<cmp_op>\t%0.<Vetype>, %1/z, %3.<Vetype>, %4.<Vetype>
+ [ ?Upl , 0 , w , w ; yes ] ^
+ [ Upa , Upl, w , w ; no ] ^
+ }
+)
+
;; Predicated integer comparisons in which both the flag and predicate
;; results are interesting.
(define_insn_and_rewrite "*cmp<cmp_op><mode>_cc"
@@ -8394,6 +8464,49 @@
}
)
+(define_insn_and_rewrite "*cmp<cmp_op><mode>_acle_cc"
+ [(set (reg:CC_NZC CC_REGNUM)
+ (unspec:CC_NZC
+ [(match_operand:VNx16BI 1 "register_operand")
+ (match_operand 4)
+ (match_operand:SI 5 "aarch64_sve_ptrue_flag")
+ (unspec:<VPRED>
+ [(match_operand 6)
+ (match_operand:SI 7 "aarch64_sve_ptrue_flag")
+ (SVE_INT_CMP:<VPRED>
+ (match_operand:SVE_FULL_HSDI 2 "register_operand")
+ (match_operand:SVE_FULL_HSDI 3 "aarch64_sve_cmp_<sve_imm_con>_operand"))]
+ UNSPEC_PRED_Z)]
+ UNSPEC_PTEST))
+ (set (match_operand:VNx16BI 0 "register_operand")
+ (and:VNx16BI
+ (subreg:VNx16BI
+ (unspec:<VPRED>
+ [(match_dup 6)
+ (match_dup 7)
+ (SVE_INT_CMP:<VPRED>
+ (match_dup 2)
+ (match_dup 3))]
+ UNSPEC_PRED_Z)
+ 0)
+ (match_operand:<VPRED> 8 "aarch64_ptrue_all_operand")))]
+ "TARGET_SVE
+ && aarch64_sve_same_pred_for_ptest_p (&operands[4], &operands[6])"
+ {@ [ cons: =0 , 1 , 2 , 3 ; attrs: pred_clobber ]
+ [ &Upa , Upl, w , <sve_imm_con>; yes ] cmp<cmp_op>\t%0.<Vetype>, %1/z, %2.<Vetype>, #%3
+ [ ?Upl , 0 , w , <sve_imm_con>; yes ] ^
+ [ Upa , Upl, w , <sve_imm_con>; no ] ^
+ [ &Upa , Upl, w , w ; yes ] cmp<cmp_op>\t%0.<Vetype>, %1/z, %2.<Vetype>, %3.<Vetype>
+ [ ?Upl , 0 , w , w ; yes ] ^
+ [ Upa , Upl, w , w ; no ] ^
+ }
+ "&& !rtx_equal_p (operands[4], operands[6])"
+ {
+ operands[6] = copy_rtx (operands[4]);
+ operands[7] = operands[5];
+ }
+)
+
;; Predicated integer comparisons in which only the flags result is
;; interesting.
(define_insn_and_rewrite "*cmp<cmp_op><mode>_ptest"
@@ -8459,14 +8572,52 @@
(clobber (reg:CC_NZC CC_REGNUM))])]
)
+(define_insn_and_split "*cmp<cmp_op><mode>_acle_and"
+ [(set (match_operand:VNx16BI 0 "register_operand" "=Upa, Upa")
+ (and:VNx16BI
+ (and:VNx16BI
+ (subreg:VNx16BI
+ (unspec:<VPRED>
+ [(match_operand 4)
+ (const_int SVE_KNOWN_PTRUE)
+ (SVE_INT_CMP:<VPRED>
+ (match_operand:SVE_FULL_HSDI 2 "register_operand" "w, w")
+ (match_operand:SVE_FULL_HSDI 3 "aarch64_sve_cmp_<sve_imm_con>_operand" "<sve_imm_con>, w"))]
+ UNSPEC_PRED_Z)
+ 0)
+ (match_operand:VNx16BI 1 "register_operand" "Upl, Upl"))
+ (match_operand:<VPRED> 5 "aarch64_ptrue_all_operand")))
+ (clobber (reg:CC_NZC CC_REGNUM))]
+ "TARGET_SVE"
+ "#"
+ "&& 1"
+ [(parallel
+ [(set (match_dup 0)
+ (and:VNx16BI
+ (subreg:VNx16BI
+ (unspec:<VPRED>
+ [(match_dup 1)
+ (const_int SVE_MAYBE_NOT_PTRUE)
+ (SVE_INT_CMP:<VPRED>
+ (match_dup 2)
+ (match_dup 3))]
+ UNSPEC_PRED_Z)
+ 0)
+ (match_dup 5)))
+ (clobber (reg:CC_NZC CC_REGNUM))])]
+ {
+ operands[1] = gen_lowpart (<VPRED>mode, operands[1]);
+ }
+)
+
;; Predicated integer wide comparisons.
(define_insn "@aarch64_pred_cmp<cmp_op><mode>_wide"
[(set (match_operand:<VPRED> 0 "register_operand")
(unspec:<VPRED>
- [(match_operand:VNx16BI 1 "register_operand")
+ [(match_operand:<VPRED> 1 "register_operand")
(match_operand:SI 2 "aarch64_sve_ptrue_flag")
(unspec:<VPRED>
- [(match_operand:SVE_FULL_BHSI 3 "register_operand")
+ [(match_operand:VNx16QI_ONLY 3 "register_operand")
(match_operand:VNx2DI 4 "register_operand")]
SVE_COND_INT_CMP_WIDE)]
UNSPEC_PRED_Z))
@@ -8479,16 +8630,61 @@
}
)
+(define_expand "@aarch64_pred_cmp<cmp_op><mode>_wide"
+ [(parallel
+ [(set (match_operand:VNx16BI 0 "register_operand")
+ (and:VNx16BI
+ (subreg:VNx16BI
+ (unspec:<VPRED>
+ [(match_operand:<VPRED> 1 "register_operand")
+ (match_operand:SI 2 "aarch64_sve_ptrue_flag")
+ (unspec:<VPRED>
+ [(match_operand:SVE_FULL_HSI 3 "register_operand")
+ (match_operand:VNx2DI 4 "register_operand")]
+ SVE_COND_INT_CMP_WIDE)]
+ UNSPEC_PRED_Z)
+ 0)
+ (match_dup 5)))
+ (clobber (reg:CC_NZC CC_REGNUM))])]
+ "TARGET_SVE"
+ {
+ operands[5] = aarch64_ptrue_all (GET_MODE_UNIT_SIZE (<MODE>mode));
+ }
+)
+
+(define_insn "*aarch64_pred_cmp<cmp_op><mode>_wide"
+ [(set (match_operand:VNx16BI 0 "register_operand")
+ (and:VNx16BI
+ (subreg:VNx16BI
+ (unspec:<VPRED>
+ [(match_operand:<VPRED> 1 "register_operand")
+ (match_operand:SI 2 "aarch64_sve_ptrue_flag")
+ (unspec:<VPRED>
+ [(match_operand:SVE_FULL_HSI 3 "register_operand")
+ (match_operand:VNx2DI 4 "register_operand")]
+ SVE_COND_INT_CMP_WIDE)]
+ UNSPEC_PRED_Z)
+ 0)
+ (match_operand:<VPRED> 5 "aarch64_ptrue_all_operand")))
+ (clobber (reg:CC_NZC CC_REGNUM))]
+ "TARGET_SVE"
+ {@ [ cons: =0, 1 , 2, 3, 4; attrs: pred_clobber ]
+ [ &Upa , Upl, , w, w; yes ] cmp<cmp_op>\t%0.<Vetype>, %1/z, %3.<Vetype>, %4.d
+ [ ?Upl , 0 , , w, w; yes ] ^
+ [ Upa , Upl, , w, w; no ] ^
+ }
+)
+
;; Predicated integer wide comparisons in which both the flag and
;; predicate results are interesting.
-(define_insn "*aarch64_pred_cmp<cmp_op><mode>_wide_cc"
+(define_insn_and_rewrite "*aarch64_pred_cmp<cmp_op><mode>_wide_cc"
[(set (reg:CC_NZC CC_REGNUM)
(unspec:CC_NZC
[(match_operand:VNx16BI 1 "register_operand")
(match_operand 4)
(match_operand:SI 5 "aarch64_sve_ptrue_flag")
(unspec:<VPRED>
- [(match_operand:VNx16BI 6 "register_operand")
+ [(match_operand:<VPRED> 6 "register_operand")
(match_operand:SI 7 "aarch64_sve_ptrue_flag")
(unspec:<VPRED>
[(match_operand:SVE_FULL_BHSI 2 "register_operand")
@@ -8512,18 +8708,65 @@
[ ?Upl , 0 , w, w, Upl; yes ] ^
[ Upa , Upl, w, w, Upl; no ] ^
}
+ "&& !rtx_equal_p (operands[4], operands[6])"
+ {
+ operands[6] = copy_rtx (operands[4]);
+ operands[7] = operands[5];
+ }
+)
+
+(define_insn_and_rewrite "*aarch64_pred_cmp<cmp_op><mode>_wide_cc"
+ [(set (reg:CC_NZC CC_REGNUM)
+ (unspec:CC_NZC
+ [(match_operand:VNx16BI 1 "register_operand")
+ (match_operand 4)
+ (match_operand:SI 5 "aarch64_sve_ptrue_flag")
+ (unspec:<VPRED>
+ [(match_operand:<VPRED> 6 "register_operand")
+ (match_operand:SI 7 "aarch64_sve_ptrue_flag")
+ (unspec:<VPRED>
+ [(match_operand:SVE_FULL_HSI 2 "register_operand")
+ (match_operand:VNx2DI 3 "register_operand")]
+ SVE_COND_INT_CMP_WIDE)]
+ UNSPEC_PRED_Z)]
+ UNSPEC_PTEST))
+ (set (match_operand:VNx16BI 0 "register_operand")
+ (and:VNx16BI
+ (subreg:VNx16BI
+ (unspec:<VPRED>
+ [(match_dup 6)
+ (match_dup 7)
+ (unspec:<VPRED>
+ [(match_dup 2)
+ (match_dup 3)]
+ SVE_COND_INT_CMP_WIDE)]
+ UNSPEC_PRED_Z)
+ 0)
+ (match_operand:<VPRED> 8 "aarch64_ptrue_all_operand")))]
+ "TARGET_SVE
+ && aarch64_sve_same_pred_for_ptest_p (&operands[4], &operands[6])"
+ {@ [ cons: =0, 1 , 2, 3, 6 ; attrs: pred_clobber ]
+ [ &Upa , Upl, w, w, Upl; yes ] cmp<cmp_op>\t%0.<Vetype>, %1/z, %2.<Vetype>, %3.d
+ [ ?Upl , 0 , w, w, Upl; yes ] ^
+ [ Upa , Upl, w, w, Upl; no ] ^
+ }
+ "&& !rtx_equal_p (operands[4], operands[6])"
+ {
+ operands[6] = copy_rtx (operands[4]);
+ operands[7] = operands[5];
+ }
)
;; Predicated integer wide comparisons in which only the flags result
;; is interesting.
-(define_insn "*aarch64_pred_cmp<cmp_op><mode>_wide_ptest"
+(define_insn_and_rewrite "*aarch64_pred_cmp<cmp_op><mode>_wide_ptest"
[(set (reg:CC_NZC CC_REGNUM)
(unspec:CC_NZC
[(match_operand:VNx16BI 1 "register_operand")
(match_operand 4)
(match_operand:SI 5 "aarch64_sve_ptrue_flag")
(unspec:<VPRED>
- [(match_operand:VNx16BI 6 "register_operand")
+ [(match_operand:<VPRED> 6 "register_operand")
(match_operand:SI 7 "aarch64_sve_ptrue_flag")
(unspec:<VPRED>
[(match_operand:SVE_FULL_BHSI 2 "register_operand")
@@ -8539,6 +8782,11 @@
[ ?Upl , 0 , w, w, Upl; yes ] ^
[ Upa , Upl, w, w, Upl; no ] ^
}
+ "&& !rtx_equal_p (operands[4], operands[6])"
+ {
+ operands[6] = copy_rtx (operands[4]);
+ operands[7] = operands[5];
+ }
)
;; -------------------------------------------------------------------------
@@ -8576,6 +8824,58 @@
"while<cmp_op>\t%0.<PRED_ALL:Vetype>, %<w>1, %<w>2"
)
+;; Likewise, but yield a VNx16BI result regardless of the element width.
+;; The .b case is equivalent to the above.
+(define_expand "@aarch64_sve_while_<while_optab_cmp><GPI:mode><VNx16BI_ONLY:mode>_acle"
+ [(parallel
+ [(set (match_operand:VNx16BI_ONLY 0 "register_operand")
+ (unspec:VNx16BI_ONLY
+ [(const_int SVE_WHILE_B)
+ (match_operand:GPI 1 "aarch64_reg_or_zero")
+ (match_operand:GPI 2 "aarch64_reg_or_zero")]
+ SVE_WHILE))
+ (clobber (reg:CC_NZC CC_REGNUM))])]
+ "TARGET_SVE"
+)
+
+;; For wider elements, bitcast the predicate result to a VNx16BI and use
+;; an (and ...) to indicate that only every second, fourth, or eighth bit
+;; is set.
+(define_expand "@aarch64_sve_while_<while_optab_cmp><GPI:mode><PRED_HSD:mode>_acle"
+ [(parallel
+ [(set (match_operand:VNx16BI 0 "register_operand")
+ (and:VNx16BI
+ (subreg:VNx16BI
+ (unspec:PRED_HSD
+ [(const_int SVE_WHILE_B)
+ (match_operand:GPI 1 "aarch64_reg_or_zero")
+ (match_operand:GPI 2 "aarch64_reg_or_zero")]
+ SVE_WHILE)
+ 0)
+ (match_dup 3)))
+ (clobber (reg:CC_NZC CC_REGNUM))])]
+ "TARGET_SVE"
+ {
+ operands[3] = aarch64_ptrue_all (<data_bytes>);
+ }
+)
+
+(define_insn "*aarch64_sve_while_<while_optab_cmp><GPI:mode><PRED_HSD:mode>_acle"
+ [(set (match_operand:VNx16BI 0 "register_operand" "=Upa")
+ (and:VNx16BI
+ (subreg:VNx16BI
+ (unspec:PRED_HSD
+ [(const_int SVE_WHILE_B)
+ (match_operand:GPI 1 "aarch64_reg_or_zero" "rZ")
+ (match_operand:GPI 2 "aarch64_reg_or_zero" "rZ")]
+ SVE_WHILE)
+ 0)
+ (match_operand:PRED_HSD 3 "aarch64_ptrue_all_operand")))
+ (clobber (reg:CC_NZC CC_REGNUM))]
+ "TARGET_SVE"
+ "while<cmp_op>\t%0.<PRED_HSD:Vetype>, %<w>1, %<w>2"
+)
+
;; The WHILE instructions set the flags in the same way as a PTEST with
;; a PTRUE GP. Handle the case in which both results are useful. The GP
;; operands to the PTEST aren't needed, so we allow them to be anything.
@@ -8607,6 +8907,38 @@
}
)
+(define_insn_and_rewrite "*while_<while_optab_cmp><GPI:mode><PRED_HSD:mode>_acle_cc"
+ [(set (reg:CC_NZC CC_REGNUM)
+ (unspec:CC_NZC
+ [(match_operand 3)
+ (match_operand 4)
+ (const_int SVE_KNOWN_PTRUE)
+ (unspec:PRED_HSD
+ [(const_int SVE_WHILE_B)
+ (match_operand:GPI 1 "aarch64_reg_or_zero" "rZ")
+ (match_operand:GPI 2 "aarch64_reg_or_zero" "rZ")]
+ SVE_WHILE)]
+ UNSPEC_PTEST))
+ (set (match_operand:VNx16BI 0 "register_operand" "=Upa")
+ (and:VNx16BI
+ (subreg:VNx16BI
+ (unspec:PRED_HSD [(const_int SVE_WHILE_B)
+ (match_dup 1)
+ (match_dup 2)]
+ SVE_WHILE)
+ 0)
+ (match_operand:PRED_HSD 5 "aarch64_ptrue_all_operand")))]
+ "TARGET_SVE"
+ "while<cmp_op>\t%0.<PRED_HSD:Vetype>, %<w>1, %<w>2"
+ ;; Force the compiler to drop the unused predicate operand, so that we
+ ;; don't have an unnecessary PTRUE.
+ "&& (!CONSTANT_P (operands[3]) || !CONSTANT_P (operands[4]))"
+ {
+ operands[3] = CONSTM1_RTX (VNx16BImode);
+ operands[4] = CONSTM1_RTX (<PRED_HSD:MODE>mode);
+ }
+)
+
;; Same, but handle the case in which only the flags result is useful.
(define_insn_and_rewrite "@while_<while_optab_cmp><GPI:mode><PRED_ALL:mode>_ptest"
[(set (reg:CC_NZC CC_REGNUM)
@@ -8677,6 +9009,43 @@
}
)
+(define_expand "@aarch64_pred_fcm<cmp_op><mode>_acle"
+ [(set (match_operand:VNx16BI 0 "register_operand")
+ (and:VNx16BI
+ (subreg:VNx16BI
+ (unspec:<VPRED>
+ [(match_operand:<VPRED> 1 "aarch64_predicate_operand")
+ (match_operand:SI 2 "aarch64_sve_ptrue_flag")
+ (match_operand:SVE_F 3 "register_operand")
+ (match_operand:SVE_F 4 "aarch64_simd_reg_or_zero")]
+ SVE_COND_FP_CMP_I0)
+ 0)
+ (match_dup 5)))]
+ "TARGET_SVE"
+ {
+ operands[5] = aarch64_ptrue_all (GET_MODE_UNIT_SIZE (<MODE>mode));
+ }
+)
+
+(define_insn "*aarch64_pred_fcm<cmp_op><mode>_acle"
+ [(set (match_operand:VNx16BI 0 "register_operand")
+ (and:VNx16BI
+ (subreg:VNx16BI
+ (unspec:<VPRED>
+ [(match_operand:<VPRED> 1 "aarch64_predicate_operand")
+ (match_operand:SI 2 "aarch64_sve_ptrue_flag")
+ (match_operand:SVE_F 3 "register_operand")
+ (match_operand:SVE_F 4 "aarch64_simd_reg_or_zero")]
+ SVE_COND_FP_CMP_I0)
+ 0)
+ (match_operand:<VPRED> 5 "aarch64_ptrue_all_operand")))]
+ "TARGET_SVE"
+ {@ [ cons: =0 , 1 , 3 , 4 ]
+ [ Upa , Upl , w , Dz ] fcm<cmp_op>\t%0.<Vetype>, %1/z, %3.<Vetype>, #0.0
+ [ Upa , Upl , w , w ] fcm<cmp_op>\t%0.<Vetype>, %1/z, %3.<Vetype>, %4.<Vetype>
+ }
+)
+
;; Same for unordered comparisons.
(define_insn "@aarch64_pred_fcmuo<mode>"
[(set (match_operand:<VPRED> 0 "register_operand" "=Upa")
@@ -8690,6 +9059,40 @@
"fcmuo\t%0.<Vetype>, %1/z, %3.<Vetype>, %4.<Vetype>"
)
+(define_expand "@aarch64_pred_fcmuo<mode>_acle"
+ [(set (match_operand:VNx16BI 0 "register_operand")
+ (and:VNx16BI
+ (subreg:VNx16BI
+ (unspec:<VPRED>
+ [(match_operand:<VPRED> 1 "aarch64_predicate_operand")
+ (match_operand:SI 2 "aarch64_sve_ptrue_flag")
+ (match_operand:SVE_F 3 "register_operand")
+ (match_operand:SVE_F 4 "register_operand")]
+ UNSPEC_COND_FCMUO)
+ 0)
+ (match_dup 5)))]
+ "TARGET_SVE"
+ {
+ operands[5] = aarch64_ptrue_all (GET_MODE_UNIT_SIZE (<MODE>mode));
+ }
+)
+
+(define_insn "*aarch64_pred_fcmuo<mode>_acle"
+ [(set (match_operand:VNx16BI 0 "register_operand" "=Upa")
+ (and:VNx16BI
+ (subreg:VNx16BI
+ (unspec:<VPRED>
+ [(match_operand:<VPRED> 1 "aarch64_predicate_operand" "Upl")
+ (match_operand:SI 2 "aarch64_sve_ptrue_flag")
+ (match_operand:SVE_F 3 "register_operand" "w")
+ (match_operand:SVE_F 4 "register_operand" "w")]
+ UNSPEC_COND_FCMUO)
+ 0)
+ (match_operand:<VPRED> 5 "aarch64_ptrue_all_operand")))]
+ "TARGET_SVE"
+ "fcmuo\t%0.<Vetype>, %1/z, %3.<Vetype>, %4.<Vetype>"
+)
+
;; Floating-point comparisons predicated on a PTRUE, with the results ANDed
;; with another predicate P. This does not have the same trapping behavior
;; as predicating the comparison itself on P, but it's a legitimate fold,
@@ -8908,23 +9311,30 @@
;; -------------------------------------------------------------------------
;; Predicated floating-point absolute comparisons.
-(define_expand "@aarch64_pred_fac<cmp_op><mode>"
- [(set (match_operand:<VPRED> 0 "register_operand")
- (unspec:<VPRED>
- [(match_operand:<VPRED> 1 "register_operand")
- (match_operand:SI 2 "aarch64_sve_ptrue_flag")
- (unspec:SVE_FULL_F
- [(match_dup 1)
- (match_dup 2)
- (match_operand:SVE_FULL_F 3 "register_operand")]
- UNSPEC_COND_FABS)
- (unspec:SVE_FULL_F
- [(match_dup 1)
- (match_dup 2)
- (match_operand:SVE_FULL_F 4 "register_operand")]
- UNSPEC_COND_FABS)]
- SVE_COND_FP_ABS_CMP))]
+(define_expand "@aarch64_pred_fac<cmp_op><mode>_acle"
+ [(set (match_operand:VNx16BI 0 "register_operand")
+ (and:VNx16BI
+ (subreg:VNx16BI
+ (unspec:<VPRED>
+ [(match_operand:<VPRED> 1 "register_operand")
+ (match_operand:SI 2 "aarch64_sve_ptrue_flag")
+ (unspec:SVE_FULL_F
+ [(match_dup 1)
+ (match_dup 2)
+ (match_operand:SVE_FULL_F 3 "register_operand")]
+ UNSPEC_COND_FABS)
+ (unspec:SVE_FULL_F
+ [(match_dup 1)
+ (match_dup 2)
+ (match_operand:SVE_FULL_F 4 "register_operand")]
+ UNSPEC_COND_FABS)]
+ SVE_COND_FP_ABS_CMP)
+ 0)
+ (match_dup 5)))]
"TARGET_SVE"
+ {
+ operands[5] = aarch64_ptrue_all (GET_MODE_UNIT_SIZE (<MODE>mode));
+ }
)
(define_insn_and_rewrite "*aarch64_pred_fac<cmp_op><mode>_relaxed"
@@ -8973,6 +9383,30 @@
"fac<cmp_op>\t%0.<Vetype>, %1/z, %2.<Vetype>, %3.<Vetype>"
)
+(define_insn "*aarch64_pred_fac<cmp_op><mode>_strict_acle"
+ [(set (match_operand:VNx16BI 0 "register_operand" "=Upa")
+ (and:VNx16BI
+ (subreg:VNx16BI
+ (unspec:<VPRED>
+ [(match_operand:<VPRED> 1 "register_operand" "Upl")
+ (match_operand:SI 4 "aarch64_sve_ptrue_flag")
+ (unspec:SVE_FULL_F
+ [(match_dup 1)
+ (match_operand:SI 5 "aarch64_sve_gp_strictness")
+ (match_operand:SVE_FULL_F 2 "register_operand" "w")]
+ UNSPEC_COND_FABS)
+ (unspec:SVE_FULL_F
+ [(match_dup 1)
+ (match_operand:SI 6 "aarch64_sve_gp_strictness")
+ (match_operand:SVE_FULL_F 3 "register_operand" "w")]
+ UNSPEC_COND_FABS)]
+ SVE_COND_FP_ABS_CMP)
+ 0)
+ (match_operand:<VPRED> 7 "aarch64_ptrue_all_operand")))]
+ "TARGET_SVE"
+ "fac<cmp_op>\t%0.<Vetype>, %1/z, %2.<Vetype>, %3.<Vetype>"
+)
+
;; -------------------------------------------------------------------------
;; ---- [PRED] Select
;; -------------------------------------------------------------------------
@@ -9421,7 +9855,30 @@
(unspec:PRED_ALL [(match_operand:PRED_ALL 1 "register_operand" "Upa")]
UNSPEC_REV))]
"TARGET_SVE"
- "rev\t%0.<Vetype>, %1.<Vetype>")
+ "rev\t%0.<Vetype>, %1.<Vetype>"
+)
+
+(define_expand "@aarch64_sve_rev<mode>_acle"
+ [(set (match_operand:VNx16BI 0 "register_operand")
+ (unspec:VNx16BI
+ [(match_operand:VNx16BI 1 "register_operand")
+ (match_dup:PRED_ALL 2)]
+ UNSPEC_REV_PRED))]
+ "TARGET_SVE"
+ {
+ operands[2] = CONST0_RTX (<MODE>mode);
+ }
+)
+
+(define_insn "*aarch64_sve_rev<mode>_acle"
+ [(set (match_operand:VNx16BI 0 "register_operand" "=Upa")
+ (unspec:VNx16BI
+ [(match_operand:VNx16BI 1 "register_operand" "Upa")
+ (match_operand:PRED_ALL 2 "aarch64_simd_imm_zero")]
+ UNSPEC_REV_PRED))]
+ "TARGET_SVE"
+ "rev\t%0.<Vetype>, %1.<Vetype>"
+)
;; -------------------------------------------------------------------------
;; ---- [PRED] Special-purpose binary permutes
@@ -9446,18 +9903,39 @@
"<perm_insn>\t%0.<Vetype>, %1.<Vetype>, %2.<Vetype>"
)
-;; Special purpose permute used by the predicate generation instructions.
-;; Unlike the normal permute patterns, these instructions operate on VNx16BI
-;; regardless of the element size, so that all input and output bits are
-;; well-defined. Operand 3 then indicates the size of the permute.
-(define_insn "@aarch64_sve_trn1_conv<mode>"
+;; Special-purpose permutes used by the ACLE intrinsics and predicate
+;; generation instructions. Unlike the normal permute patterns, these
+;; instructions operate on VNx16BI regardless of the element size, so that
+;; all input and output bits are well-defined. Operand 3 then indicates
+;; the size of the permute.
+;;
+;; To make generation easier, this pattern embeds the permute type as the
+;; fourth operand to the unspec. On the one hand, this avoids overloading
+;; unspecs like UNSPEC_ZIP1 to represent two different operations. On the
+;; other hand, it avoids having a separate unspec for each variant, and
+;; having to map from one kind of unspec to the other.
+(define_expand "@aarch64_sve_<perm_insn><mode>_acle"
+ [(set (match_operand:VNx16BI 0 "register_operand")
+ (unspec:VNx16BI [(match_operand:VNx16BI 1 "register_operand")
+ (match_operand:VNx16BI 2 "register_operand")
+ (match_dup:PRED_ALL 3)
+ (const_int PERMUTE)]
+ UNSPEC_PERMUTE_PRED))]
+ "TARGET_SVE"
+ {
+ operands[3] = CONST0_RTX (<MODE>mode);
+ }
+)
+
+(define_insn "*aarch64_sve_<perm_insn><mode>_acle"
[(set (match_operand:VNx16BI 0 "register_operand" "=Upa")
(unspec:VNx16BI [(match_operand:VNx16BI 1 "register_operand" "Upa")
(match_operand:VNx16BI 2 "register_operand" "Upa")
- (match_operand:PRED_ALL 3 "aarch64_simd_imm_zero")]
- UNSPEC_TRN1_CONV))]
+ (match_operand:PRED_ALL 3 "aarch64_simd_imm_zero")
+ (const_int PERMUTE)]
+ UNSPEC_PERMUTE_PRED))]
"TARGET_SVE"
- "trn1\t%0.<PRED_ALL:Vetype>, %1.<PRED_ALL:Vetype>, %2.<PRED_ALL:Vetype>"
+ "<perm_insn>\t%0.<PRED_ALL:Vetype>, %1.<PRED_ALL:Vetype>, %2.<PRED_ALL:Vetype>"
)
;; =========================================================================
@@ -10446,6 +10924,34 @@
"punpk<perm_hilo>\t%0.h, %1.b"
)
+(define_expand "@aarch64_sve_punpk<perm_hilo>_acle"
+ [(set (match_operand:VNx16BI 0 "register_operand")
+ (and:VNx16BI
+ (subreg:VNx16BI
+ (unspec:VNx8BI
+ [(match_operand:VNx16BI 1 "register_operand")]
+ UNPACK_UNSIGNED)
+ 0)
+ (match_dup 2)))]
+ "TARGET_SVE"
+ {
+ operands[2] = aarch64_ptrue_all (2);
+ }
+)
+
+(define_insn "*aarch64_sve_punpk<perm_hilo>_acle"
+ [(set (match_operand:VNx16BI 0 "register_operand" "=Upa")
+ (and:VNx16BI
+ (subreg:VNx16BI
+ (unspec:VNx8BI
+ [(match_operand:VNx16BI 1 "register_operand" "Upa")]
+ UNPACK_UNSIGNED)
+ 0)
+ (match_operand:VNx8BI 2 "aarch64_ptrue_all_operand")))]
+ "TARGET_SVE"
+ "punpk<perm_hilo>\t%0.h, %1.b"
+)
+
;; =========================================================================
;; == Vector partitioning
;; =========================================================================
@@ -10670,14 +11176,49 @@
;; -------------------------------------------------------------------------
(define_insn "@aarch64_sve_<sve_pred_op><mode>"
- [(set (match_operand:PRED_ALL 0 "register_operand" "=Upa")
- (unspec:PRED_ALL
- [(match_operand:PRED_ALL 1 "register_operand" "Upa")
+ [(set (match_operand:VNx16BI_ONLY 0 "register_operand" "=Upa")
+ (unspec:VNx16BI_ONLY
+ [(match_operand:VNx16BI_ONLY 1 "register_operand" "Upa")
(match_operand:SI 2 "aarch64_sve_ptrue_flag")
- (match_operand:PRED_ALL 3 "register_operand" "0")]
+ (match_operand:VNx16BI_ONLY 3 "register_operand" "0")]
SVE_PITER))
(clobber (reg:CC_NZC CC_REGNUM))]
- "TARGET_SVE && <max_elem_bits> >= <elem_bits>"
+ "TARGET_SVE"
+ "<sve_pred_op>\t%0.<Vetype>, %1, %0.<Vetype>"
+)
+
+(define_expand "@aarch64_sve_<sve_pred_op><mode>"
+ [(parallel
+ [(set (match_operand:VNx16BI 0 "register_operand")
+ (and:VNx16BI
+ (subreg:VNx16BI
+ (unspec:PRED_HSD
+ [(match_operand:PRED_HSD 1 "register_operand")
+ (match_operand:SI 2 "aarch64_sve_ptrue_flag")
+ (match_operand:PRED_HSD 3 "register_operand")]
+ PNEXT_ONLY)
+ 0)
+ (match_dup 4)))
+ (clobber (reg:CC_NZC CC_REGNUM))])]
+ "TARGET_SVE"
+ {
+ operands[4] = aarch64_ptrue_all (<data_bytes>);
+ }
+)
+
+(define_insn "*aarch64_sve_<sve_pred_op><mode>"
+ [(set (match_operand:VNx16BI 0 "register_operand" "=Upa")
+ (and:VNx16BI
+ (subreg:VNx16BI
+ (unspec:PRED_HSD
+ [(match_operand:PRED_HSD 1 "register_operand" "Upa")
+ (match_operand:SI 2 "aarch64_sve_ptrue_flag")
+ (match_operand:PRED_HSD 3 "register_operand" "0")]
+ PNEXT_ONLY)
+ 0)
+ (match_operand:PRED_HSD 4 "aarch64_ptrue_all_operand")))
+ (clobber (reg:CC_NZC CC_REGNUM))]
+ "TARGET_SVE"
"<sve_pred_op>\t%0.<Vetype>, %1, %0.<Vetype>"
)
@@ -10711,6 +11252,38 @@
}
)
+(define_insn_and_rewrite "*aarch64_sve_<sve_pred_op><mode>_cc"
+ [(set (reg:CC_NZC CC_REGNUM)
+ (unspec:CC_NZC
+ [(match_operand:VNx16BI 1 "register_operand" "Upa")
+ (match_operand 2)
+ (match_operand:SI 3 "aarch64_sve_ptrue_flag")
+ (unspec:PRED_HSD
+ [(match_operand 4)
+ (match_operand:SI 5 "aarch64_sve_ptrue_flag")
+ (match_operand:PRED_HSD 6 "register_operand" "0")]
+ PNEXT_ONLY)]
+ UNSPEC_PTEST))
+ (set (match_operand:VNx16BI 0 "register_operand" "=Upa")
+ (and:VNx16BI
+ (subreg:VNx16BI
+ (unspec:PRED_HSD
+ [(match_dup 4)
+ (match_dup 5)
+ (match_dup 6)]
+ PNEXT_ONLY)
+ 0)
+ (match_operand:PRED_HSD 7 "aarch64_ptrue_all_operand")))]
+ "TARGET_SVE
+ && aarch64_sve_same_pred_for_ptest_p (&operands[2], &operands[4])"
+ "<sve_pred_op>\t%0.<Vetype>, %1, %0.<Vetype>"
+ "&& !rtx_equal_p (operands[2], operands[4])"
+ {
+ operands[4] = operands[2];
+ operands[5] = operands[3];
+ }
+)
+
;; Same, but with only the flags result being interesting.
(define_insn_and_rewrite "*aarch64_sve_<sve_pred_op><mode>_ptest"
[(set (reg:CC_NZC CC_REGNUM)