diff options
Diffstat (limited to 'gcc/config/aarch64/aarch64-sve.md')
-rw-r--r-- | gcc/config/aarch64/aarch64-sve.md | 921 |
1 files changed, 747 insertions, 174 deletions
diff --git a/gcc/config/aarch64/aarch64-sve.md b/gcc/config/aarch64/aarch64-sve.md index b252eef..88d323a 100644 --- a/gcc/config/aarch64/aarch64-sve.md +++ b/gcc/config/aarch64/aarch64-sve.md @@ -2990,10 +2990,7 @@ (vec_duplicate:PRED_ALL (match_operand:QI 1 "register_operand")))] "TARGET_SVE" { - rtx tmp = gen_reg_rtx (DImode); - rtx op1 = gen_lowpart (DImode, operands[1]); - emit_insn (gen_ashldi3 (tmp, op1, gen_int_mode (63, DImode))); - emit_insn (gen_while_ultdi<mode> (operands[0], const0_rtx, tmp)); + aarch64_emit_sve_pred_vec_duplicate (<MODE>mode, operands[0], operands[1]); DONE; } ) @@ -5605,18 +5602,21 @@ ;; Predicated floating-point operations with merging. (define_expand "@cond_<optab><mode>" - [(set (match_operand:SVE_FULL_F_B16B16 0 "register_operand") - (unspec:SVE_FULL_F_B16B16 + [(set (match_operand:SVE_F_B16B16 0 "register_operand") + (unspec:SVE_F_B16B16 [(match_operand:<VPRED> 1 "register_operand") - (unspec:SVE_FULL_F_B16B16 + (unspec:SVE_F_B16B16 [(match_dup 1) (const_int SVE_STRICT_GP) - (match_operand:SVE_FULL_F_B16B16 2 "<sve_pred_fp_rhs1_operand>") - (match_operand:SVE_FULL_F_B16B16 3 "<sve_pred_fp_rhs2_operand>")] + (match_operand:SVE_F_B16B16 2 "<sve_pred_fp_rhs1_operand>") + (match_operand:SVE_F_B16B16 3 "<sve_pred_fp_rhs2_operand>")] SVE_COND_FP_BINARY) - (match_operand:SVE_FULL_F_B16B16 4 "aarch64_simd_reg_or_zero")] + (match_operand:SVE_F_B16B16 4 "aarch64_simd_reg_or_zero")] UNSPEC_SEL))] "TARGET_SVE && (<supports_bf16> || !<is_bf16>)" + { + operands[1] = aarch64_sve_emit_masked_fp_pred (<MODE>mode, operands[1]); + } ) ;; Predicated floating-point operations, merging with the first input. @@ -5644,14 +5644,14 @@ ) (define_insn "*cond_<optab><mode>_2_strict" - [(set (match_operand:SVE_FULL_F_B16B16 0 "register_operand") - (unspec:SVE_FULL_F_B16B16 - [(match_operand:<VPRED> 1 "register_operand") - (unspec:SVE_FULL_F_B16B16 + [(set (match_operand:SVE_F_B16B16 0 "register_operand") + (unspec:SVE_F_B16B16 + [(match_operand:<VPRED> 1 "aarch64_predicate_operand") + (unspec:SVE_F_B16B16 [(match_dup 1) (const_int SVE_STRICT_GP) - (match_operand:SVE_FULL_F_B16B16 2 "register_operand") - (match_operand:SVE_FULL_F_B16B16 3 "register_operand")] + (match_operand:SVE_F_B16B16 2 "register_operand") + (match_operand:SVE_F_B16B16 3 "register_operand")] SVE_COND_FP_BINARY) (match_dup 2)] UNSPEC_SEL))] @@ -5687,14 +5687,14 @@ ) (define_insn "*cond_<optab><mode>_2_const_strict" - [(set (match_operand:SVE_FULL_F 0 "register_operand") - (unspec:SVE_FULL_F - [(match_operand:<VPRED> 1 "register_operand") - (unspec:SVE_FULL_F + [(set (match_operand:SVE_F 0 "register_operand") + (unspec:SVE_F + [(match_operand:<VPRED> 1 "aarch64_predicate_operand") + (unspec:SVE_F [(match_dup 1) (const_int SVE_STRICT_GP) - (match_operand:SVE_FULL_F 2 "register_operand") - (match_operand:SVE_FULL_F 3 "<sve_pred_fp_rhs2_immediate>")] + (match_operand:SVE_F 2 "register_operand") + (match_operand:SVE_F 3 "<sve_pred_fp_rhs2_immediate>")] SVE_COND_FP_BINARY_I1) (match_dup 2)] UNSPEC_SEL))] @@ -5730,14 +5730,14 @@ ) (define_insn "*cond_<optab><mode>_3_strict" - [(set (match_operand:SVE_FULL_F_B16B16 0 "register_operand") - (unspec:SVE_FULL_F_B16B16 - [(match_operand:<VPRED> 1 "register_operand") - (unspec:SVE_FULL_F_B16B16 + [(set (match_operand:SVE_F_B16B16 0 "register_operand") + (unspec:SVE_F_B16B16 + [(match_operand:<VPRED> 1 "aarch64_predicate_operand") + (unspec:SVE_F_B16B16 [(match_dup 1) (const_int SVE_STRICT_GP) - (match_operand:SVE_FULL_F_B16B16 2 "register_operand") - (match_operand:SVE_FULL_F_B16B16 3 "register_operand")] + (match_operand:SVE_F_B16B16 2 "register_operand") + (match_operand:SVE_F_B16B16 3 "register_operand")] SVE_COND_FP_BINARY) (match_dup 3)] UNSPEC_SEL))] @@ -5794,16 +5794,16 @@ ) (define_insn_and_rewrite "*cond_<optab><mode>_any_strict" - [(set (match_operand:SVE_FULL_F_B16B16 0 "register_operand") - (unspec:SVE_FULL_F_B16B16 - [(match_operand:<VPRED> 1 "register_operand") - (unspec:SVE_FULL_F_B16B16 + [(set (match_operand:SVE_F_B16B16 0 "register_operand") + (unspec:SVE_F_B16B16 + [(match_operand:<VPRED> 1 "aarch64_predicate_operand") + (unspec:SVE_F_B16B16 [(match_dup 1) (const_int SVE_STRICT_GP) - (match_operand:SVE_FULL_F_B16B16 2 "register_operand") - (match_operand:SVE_FULL_F_B16B16 3 "register_operand")] + (match_operand:SVE_F_B16B16 2 "register_operand") + (match_operand:SVE_F_B16B16 3 "register_operand")] SVE_COND_FP_BINARY) - (match_operand:SVE_FULL_F_B16B16 4 "aarch64_simd_reg_or_zero")] + (match_operand:SVE_F_B16B16 4 "aarch64_simd_reg_or_zero")] UNSPEC_SEL))] "TARGET_SVE && (<supports_bf16> || !<is_bf16>) @@ -5868,16 +5868,16 @@ ) (define_insn_and_rewrite "*cond_<optab><mode>_any_const_strict" - [(set (match_operand:SVE_FULL_F 0 "register_operand") - (unspec:SVE_FULL_F - [(match_operand:<VPRED> 1 "register_operand") - (unspec:SVE_FULL_F + [(set (match_operand:SVE_F 0 "register_operand") + (unspec:SVE_F + [(match_operand:<VPRED> 1 "aarch64_predicate_operand") + (unspec:SVE_F [(match_dup 1) (const_int SVE_STRICT_GP) - (match_operand:SVE_FULL_F 2 "register_operand") - (match_operand:SVE_FULL_F 3 "<sve_pred_fp_rhs2_immediate>")] + (match_operand:SVE_F 2 "register_operand") + (match_operand:SVE_F 3 "<sve_pred_fp_rhs2_immediate>")] SVE_COND_FP_BINARY_I1) - (match_operand:SVE_FULL_F 4 "aarch64_simd_reg_or_zero")] + (match_operand:SVE_F 4 "aarch64_simd_reg_or_zero")] UNSPEC_SEL))] "TARGET_SVE && !rtx_equal_p (operands[2], operands[4])" {@ [ cons: =0 , 1 , 2 , 4 ] @@ -5953,14 +5953,14 @@ ) (define_insn "*cond_add<mode>_2_const_strict" - [(set (match_operand:SVE_FULL_F 0 "register_operand") - (unspec:SVE_FULL_F - [(match_operand:<VPRED> 1 "register_operand") - (unspec:SVE_FULL_F + [(set (match_operand:SVE_F 0 "register_operand") + (unspec:SVE_F + [(match_operand:<VPRED> 1 "aarch64_predicate_operand") + (unspec:SVE_F [(match_dup 1) (const_int SVE_STRICT_GP) - (match_operand:SVE_FULL_F 2 "register_operand") - (match_operand:SVE_FULL_F 3 "aarch64_sve_float_arith_with_sub_immediate")] + (match_operand:SVE_F 2 "register_operand") + (match_operand:SVE_F 3 "aarch64_sve_float_arith_with_sub_immediate")] UNSPEC_COND_FADD) (match_dup 2)] UNSPEC_SEL))] @@ -6015,16 +6015,16 @@ ) (define_insn_and_rewrite "*cond_add<mode>_any_const_strict" - [(set (match_operand:SVE_FULL_F 0 "register_operand") - (unspec:SVE_FULL_F - [(match_operand:<VPRED> 1 "register_operand") - (unspec:SVE_FULL_F + [(set (match_operand:SVE_F 0 "register_operand") + (unspec:SVE_F + [(match_operand:<VPRED> 1 "aarch64_predicate_operand") + (unspec:SVE_F [(match_dup 1) (const_int SVE_STRICT_GP) - (match_operand:SVE_FULL_F 2 "register_operand") - (match_operand:SVE_FULL_F 3 "aarch64_sve_float_arith_with_sub_immediate")] + (match_operand:SVE_F 2 "register_operand") + (match_operand:SVE_F 3 "aarch64_sve_float_arith_with_sub_immediate")] UNSPEC_COND_FADD) - (match_operand:SVE_FULL_F 4 "aarch64_simd_reg_or_zero")] + (match_operand:SVE_F 4 "aarch64_simd_reg_or_zero")] UNSPEC_SEL))] "TARGET_SVE && !rtx_equal_p (operands[2], operands[4])" {@ [ cons: =0 , 1 , 2 , 3 , 4 ] @@ -6266,14 +6266,14 @@ ) (define_insn "*cond_sub<mode>_3_const_strict" - [(set (match_operand:SVE_FULL_F 0 "register_operand") - (unspec:SVE_FULL_F - [(match_operand:<VPRED> 1 "register_operand") - (unspec:SVE_FULL_F + [(set (match_operand:SVE_F 0 "register_operand") + (unspec:SVE_F + [(match_operand:<VPRED> 1 "aarch64_predicate_operand") + (unspec:SVE_F [(match_dup 1) (const_int SVE_STRICT_GP) - (match_operand:SVE_FULL_F 2 "aarch64_sve_float_arith_immediate") - (match_operand:SVE_FULL_F 3 "register_operand")] + (match_operand:SVE_F 2 "aarch64_sve_float_arith_immediate") + (match_operand:SVE_F 3 "register_operand")] UNSPEC_COND_FSUB) (match_dup 3)] UNSPEC_SEL))] @@ -6323,16 +6323,16 @@ ) (define_insn_and_rewrite "*cond_sub<mode>_const_strict" - [(set (match_operand:SVE_FULL_F 0 "register_operand") - (unspec:SVE_FULL_F - [(match_operand:<VPRED> 1 "register_operand") - (unspec:SVE_FULL_F + [(set (match_operand:SVE_F 0 "register_operand") + (unspec:SVE_F + [(match_operand:<VPRED> 1 "aarch64_predicate_operand") + (unspec:SVE_F [(match_dup 1) (const_int SVE_STRICT_GP) - (match_operand:SVE_FULL_F 2 "aarch64_sve_float_arith_immediate") - (match_operand:SVE_FULL_F 3 "register_operand")] + (match_operand:SVE_F 2 "aarch64_sve_float_arith_immediate") + (match_operand:SVE_F 3 "register_operand")] UNSPEC_COND_FSUB) - (match_operand:SVE_FULL_F 4 "aarch64_simd_reg_or_zero")] + (match_operand:SVE_F 4 "aarch64_simd_reg_or_zero")] UNSPEC_SEL))] "TARGET_SVE && !rtx_equal_p (operands[3], operands[4])" {@ [ cons: =0 , 1 , 3 , 4 ] @@ -6913,7 +6913,7 @@ ;; Predicate AND. We can reuse one of the inputs as the GP. ;; Doubling the second operand is the preferred implementation ;; of the MOV alias, so we use that instead of %1/z, %1, %2. -(define_insn "and<mode>3" +(define_insn "@and<mode>3" [(set (match_operand:PRED_ALL 0 "register_operand") (and:PRED_ALL (match_operand:PRED_ALL 1 "register_operand") (match_operand:PRED_ALL 2 "register_operand")))] @@ -7595,29 +7595,29 @@ ;; Unpredicated floating-point ternary operations. (define_expand "<optab><mode>4" - [(set (match_operand:SVE_FULL_F_B16B16 0 "register_operand") - (unspec:SVE_FULL_F_B16B16 + [(set (match_operand:SVE_F_B16B16 0 "register_operand") + (unspec:SVE_F_B16B16 [(match_dup 4) - (const_int SVE_RELAXED_GP) - (match_operand:SVE_FULL_F_B16B16 1 "register_operand") - (match_operand:SVE_FULL_F_B16B16 2 "register_operand") - (match_operand:SVE_FULL_F_B16B16 3 "register_operand")] + (match_dup 5) + (match_operand:SVE_F_B16B16 1 "register_operand") + (match_operand:SVE_F_B16B16 2 "register_operand") + (match_operand:SVE_F_B16B16 3 "register_operand")] SVE_COND_FP_TERNARY))] "TARGET_SVE && (<supports_bf16> || !<is_bf16>)" { - operands[4] = aarch64_ptrue_reg (<VPRED>mode); + operands[4] = aarch64_sve_fp_pred (<MODE>mode, &operands[5]); } ) ;; Predicated floating-point ternary operations. (define_insn "@aarch64_pred_<optab><mode>" - [(set (match_operand:SVE_FULL_F_B16B16 0 "register_operand") - (unspec:SVE_FULL_F_B16B16 - [(match_operand:<VPRED> 1 "register_operand") + [(set (match_operand:SVE_F_B16B16 0 "register_operand") + (unspec:SVE_F_B16B16 + [(match_operand:<VPRED> 1 "aarch64_predicate_operand") (match_operand:SI 5 "aarch64_sve_gp_strictness") - (match_operand:SVE_FULL_F_B16B16 2 "register_operand") - (match_operand:SVE_FULL_F_B16B16 3 "register_operand") - (match_operand:SVE_FULL_F_B16B16 4 "register_operand")] + (match_operand:SVE_F_B16B16 2 "register_operand") + (match_operand:SVE_F_B16B16 3 "register_operand") + (match_operand:SVE_F_B16B16 4 "register_operand")] SVE_COND_FP_TERNARY))] "TARGET_SVE && (<supports_bf16> || !<is_bf16>)" {@ [ cons: =0 , 1 , %2 , 3 , 4 ; attrs: movprfx , is_rev ] @@ -7631,17 +7631,17 @@ ;; Predicated floating-point ternary operations with merging. (define_expand "@cond_<optab><mode>" - [(set (match_operand:SVE_FULL_F_B16B16 0 "register_operand") - (unspec:SVE_FULL_F_B16B16 + [(set (match_operand:SVE_F_B16B16 0 "register_operand") + (unspec:SVE_F_B16B16 [(match_operand:<VPRED> 1 "register_operand") - (unspec:SVE_FULL_F_B16B16 + (unspec:SVE_F_B16B16 [(match_dup 1) (const_int SVE_STRICT_GP) - (match_operand:SVE_FULL_F_B16B16 2 "register_operand") - (match_operand:SVE_FULL_F_B16B16 3 "register_operand") - (match_operand:SVE_FULL_F_B16B16 4 "register_operand")] + (match_operand:SVE_F_B16B16 2 "register_operand") + (match_operand:SVE_F_B16B16 3 "register_operand") + (match_operand:SVE_F_B16B16 4 "register_operand")] SVE_COND_FP_TERNARY) - (match_operand:SVE_FULL_F_B16B16 5 "aarch64_simd_reg_or_zero")] + (match_operand:SVE_F_B16B16 5 "aarch64_simd_reg_or_zero")] UNSPEC_SEL))] "TARGET_SVE && (<supports_bf16> || !<is_bf16>)" { @@ -7649,20 +7649,22 @@ second of the two. */ if (rtx_equal_p (operands[3], operands[5])) std::swap (operands[2], operands[3]); + + operands[1] = aarch64_sve_emit_masked_fp_pred (<MODE>mode, operands[1]); }) ;; Predicated floating-point ternary operations, merging with the ;; first input. (define_insn_and_rewrite "*cond_<optab><mode>_2_relaxed" - [(set (match_operand:SVE_FULL_F 0 "register_operand") - (unspec:SVE_FULL_F + [(set (match_operand:SVE_F 0 "register_operand") + (unspec:SVE_F [(match_operand:<VPRED> 1 "register_operand") - (unspec:SVE_FULL_F + (unspec:SVE_F [(match_operand 5) (const_int SVE_RELAXED_GP) - (match_operand:SVE_FULL_F 2 "register_operand") - (match_operand:SVE_FULL_F 3 "register_operand") - (match_operand:SVE_FULL_F 4 "register_operand")] + (match_operand:SVE_F 2 "register_operand") + (match_operand:SVE_F 3 "register_operand") + (match_operand:SVE_F 4 "register_operand")] SVE_COND_FP_TERNARY) (match_dup 2)] UNSPEC_SEL))] @@ -7678,15 +7680,15 @@ ) (define_insn "*cond_<optab><mode>_2_strict" - [(set (match_operand:SVE_FULL_F 0 "register_operand") - (unspec:SVE_FULL_F - [(match_operand:<VPRED> 1 "register_operand") - (unspec:SVE_FULL_F + [(set (match_operand:SVE_F 0 "register_operand") + (unspec:SVE_F + [(match_operand:<VPRED> 1 "aarch64_predicate_operand") + (unspec:SVE_F [(match_dup 1) (const_int SVE_STRICT_GP) - (match_operand:SVE_FULL_F 2 "register_operand") - (match_operand:SVE_FULL_F 3 "register_operand") - (match_operand:SVE_FULL_F 4 "register_operand")] + (match_operand:SVE_F 2 "register_operand") + (match_operand:SVE_F 3 "register_operand") + (match_operand:SVE_F 4 "register_operand")] SVE_COND_FP_TERNARY) (match_dup 2)] UNSPEC_SEL))] @@ -7700,15 +7702,15 @@ ;; Predicated floating-point ternary operations, merging with the ;; third input. (define_insn_and_rewrite "*cond_<optab><mode>_4_relaxed" - [(set (match_operand:SVE_FULL_F_B16B16 0 "register_operand") - (unspec:SVE_FULL_F_B16B16 + [(set (match_operand:SVE_F_B16B16 0 "register_operand") + (unspec:SVE_F_B16B16 [(match_operand:<VPRED> 1 "register_operand") - (unspec:SVE_FULL_F_B16B16 + (unspec:SVE_F_B16B16 [(match_operand 5) (const_int SVE_RELAXED_GP) - (match_operand:SVE_FULL_F_B16B16 2 "register_operand") - (match_operand:SVE_FULL_F_B16B16 3 "register_operand") - (match_operand:SVE_FULL_F_B16B16 4 "register_operand")] + (match_operand:SVE_F_B16B16 2 "register_operand") + (match_operand:SVE_F_B16B16 3 "register_operand") + (match_operand:SVE_F_B16B16 4 "register_operand")] SVE_COND_FP_TERNARY) (match_dup 4)] UNSPEC_SEL))] @@ -7724,15 +7726,15 @@ ) (define_insn "*cond_<optab><mode>_4_strict" - [(set (match_operand:SVE_FULL_F_B16B16 0 "register_operand") - (unspec:SVE_FULL_F_B16B16 - [(match_operand:<VPRED> 1 "register_operand") - (unspec:SVE_FULL_F_B16B16 + [(set (match_operand:SVE_F_B16B16 0 "register_operand") + (unspec:SVE_F_B16B16 + [(match_operand:<VPRED> 1 "aarch64_predicate_operand") + (unspec:SVE_F_B16B16 [(match_dup 1) (const_int SVE_STRICT_GP) - (match_operand:SVE_FULL_F_B16B16 2 "register_operand") - (match_operand:SVE_FULL_F_B16B16 3 "register_operand") - (match_operand:SVE_FULL_F_B16B16 4 "register_operand")] + (match_operand:SVE_F_B16B16 2 "register_operand") + (match_operand:SVE_F_B16B16 3 "register_operand") + (match_operand:SVE_F_B16B16 4 "register_operand")] SVE_COND_FP_TERNARY) (match_dup 4)] UNSPEC_SEL))] @@ -7746,17 +7748,17 @@ ;; Predicated floating-point ternary operations, merging with an ;; independent value. (define_insn_and_rewrite "*cond_<optab><mode>_any_relaxed" - [(set (match_operand:SVE_FULL_F_B16B16 0 "register_operand") - (unspec:SVE_FULL_F_B16B16 + [(set (match_operand:SVE_F_B16B16 0 "register_operand") + (unspec:SVE_F_B16B16 [(match_operand:<VPRED> 1 "register_operand") - (unspec:SVE_FULL_F_B16B16 + (unspec:SVE_F_B16B16 [(match_operand 6) (const_int SVE_RELAXED_GP) - (match_operand:SVE_FULL_F_B16B16 2 "register_operand") - (match_operand:SVE_FULL_F_B16B16 3 "register_operand") - (match_operand:SVE_FULL_F_B16B16 4 "register_operand")] + (match_operand:SVE_F_B16B16 2 "register_operand") + (match_operand:SVE_F_B16B16 3 "register_operand") + (match_operand:SVE_F_B16B16 4 "register_operand")] SVE_COND_FP_TERNARY) - (match_operand:SVE_FULL_F_B16B16 5 "aarch64_simd_reg_or_zero")] + (match_operand:SVE_F_B16B16 5 "aarch64_simd_reg_or_zero")] UNSPEC_SEL))] "TARGET_SVE && (<supports_bf16> || !<is_bf16>) @@ -7792,17 +7794,17 @@ ) (define_insn_and_rewrite "*cond_<optab><mode>_any_strict" - [(set (match_operand:SVE_FULL_F_B16B16 0 "register_operand") - (unspec:SVE_FULL_F_B16B16 - [(match_operand:<VPRED> 1 "register_operand") - (unspec:SVE_FULL_F_B16B16 + [(set (match_operand:SVE_F_B16B16 0 "register_operand") + (unspec:SVE_F_B16B16 + [(match_operand:<VPRED> 1 "aarch64_predicate_operand") + (unspec:SVE_F_B16B16 [(match_dup 1) (const_int SVE_STRICT_GP) - (match_operand:SVE_FULL_F_B16B16 2 "register_operand") - (match_operand:SVE_FULL_F_B16B16 3 "register_operand") - (match_operand:SVE_FULL_F_B16B16 4 "register_operand")] + (match_operand:SVE_F_B16B16 2 "register_operand") + (match_operand:SVE_F_B16B16 3 "register_operand") + (match_operand:SVE_F_B16B16 4 "register_operand")] SVE_COND_FP_TERNARY) - (match_operand:SVE_FULL_F_B16B16 5 "aarch64_simd_reg_or_zero")] + (match_operand:SVE_F_B16B16 5 "aarch64_simd_reg_or_zero")] UNSPEC_SEL))] "TARGET_SVE && (<supports_bf16> || !<is_bf16>) @@ -8201,20 +8203,23 @@ ;; ;; For unpacked vectors, it doesn't really matter whether SEL uses the ;; the container size or the element size. If SEL used the container size, -;; it would ignore undefined bits of the predicate but would copy the -;; upper (undefined) bits of each container along with the defined bits. -;; If SEL used the element size, it would use undefined bits of the predicate -;; to select between undefined elements in each input vector. Thus the only -;; difference is whether the undefined bits in a container always come from -;; the same input as the defined bits, or whether the choice can vary -;; independently of the defined bits. +;; it would would copy the upper (undefined) bits of each container along +;; with the corresponding defined bits. If SEL used the element size, +;; it would use separate predicate bits to select between the undefined +;; elements in each input vector; these seperate predicate bits might +;; themselves be undefined, depending on the mode of the predicate. +;; +;; Thus the only difference is whether the undefined bits in a container +;; always come from the same input as the defined bits, or whether the +;; choice can vary independently of the defined bits. ;; ;; For the other instructions, using the element size is more natural, ;; so we do that for SEL as well. +;; (define_insn "*vcond_mask_<mode><vpred>" [(set (match_operand:SVE_ALL 0 "register_operand") (unspec:SVE_ALL - [(match_operand:<VPRED> 3 "register_operand") + [(match_operand:<VPRED> 3 "aarch64_predicate_operand") (match_operand:SVE_ALL 1 "aarch64_sve_reg_or_dup_imm") (match_operand:SVE_ALL 2 "aarch64_simd_reg_or_zero")] UNSPEC_SEL))] @@ -8353,6 +8358,71 @@ } ) +;; Likewise, but yield a VNx16BI result regardless of the element width. +;; The .b case is equivalent to the above. +(define_expand "@aarch64_pred_cmp<cmp_op><mode>_acle" + [(parallel + [(set (match_operand:<VPRED> 0 "register_operand") + (unspec:<VPRED> + [(match_operand:<VPRED> 1 "register_operand") + (match_operand:SI 2 "aarch64_sve_ptrue_flag") + (SVE_INT_CMP:<VPRED> + (match_operand:VNx16QI_ONLY 3 "register_operand") + (match_operand:VNx16QI_ONLY 4 "aarch64_sve_cmp_<sve_imm_con>_operand"))] + UNSPEC_PRED_Z)) + (clobber (reg:CC_NZC CC_REGNUM))])] + "TARGET_SVE" +) + +;; For wider elements, bitcast the predicate result to a VNx16BI and use +;; an (and ...) to indicate that only every second, fourth, or eighth bit +;; is set. +(define_expand "@aarch64_pred_cmp<cmp_op><mode>_acle" + [(parallel + [(set (match_operand:VNx16BI 0 "register_operand") + (and:VNx16BI + (subreg:VNx16BI + (unspec:<VPRED> + [(match_operand:<VPRED> 1 "register_operand") + (match_operand:SI 2 "aarch64_sve_ptrue_flag") + (SVE_INT_CMP:<VPRED> + (match_operand:SVE_FULL_HSDI 3 "register_operand") + (match_operand:SVE_FULL_HSDI 4 "aarch64_sve_cmp_<sve_imm_con>_operand"))] + UNSPEC_PRED_Z) + 0) + (match_dup 5))) + (clobber (reg:CC_NZC CC_REGNUM))])] + "TARGET_SVE" + { + operands[5] = aarch64_ptrue_all (GET_MODE_UNIT_SIZE (<MODE>mode)); + } +) + +(define_insn "*aarch64_pred_cmp<cmp_op><mode>_acle" + [(set (match_operand:VNx16BI 0 "register_operand") + (and:VNx16BI + (subreg:VNx16BI + (unspec:<VPRED> + [(match_operand:<VPRED> 1 "register_operand") + (match_operand:SI 2 "aarch64_sve_ptrue_flag") + (SVE_INT_CMP:<VPRED> + (match_operand:SVE_FULL_HSDI 3 "register_operand") + (match_operand:SVE_FULL_HSDI 4 "aarch64_sve_cmp_<sve_imm_con>_operand"))] + UNSPEC_PRED_Z) + 0) + (match_operand:<VPRED> 5 "aarch64_ptrue_all_operand"))) + (clobber (reg:CC_NZC CC_REGNUM))] + "TARGET_SVE" + {@ [ cons: =0 , 1 , 3 , 4 ; attrs: pred_clobber ] + [ &Upa , Upl, w , <sve_imm_con>; yes ] cmp<cmp_op>\t%0.<Vetype>, %1/z, %3.<Vetype>, #%4 + [ ?Upl , 0 , w , <sve_imm_con>; yes ] ^ + [ Upa , Upl, w , <sve_imm_con>; no ] ^ + [ &Upa , Upl, w , w ; yes ] cmp<cmp_op>\t%0.<Vetype>, %1/z, %3.<Vetype>, %4.<Vetype> + [ ?Upl , 0 , w , w ; yes ] ^ + [ Upa , Upl, w , w ; no ] ^ + } +) + ;; Predicated integer comparisons in which both the flag and predicate ;; results are interesting. (define_insn_and_rewrite "*cmp<cmp_op><mode>_cc" @@ -8394,6 +8464,49 @@ } ) +(define_insn_and_rewrite "*cmp<cmp_op><mode>_acle_cc" + [(set (reg:CC_NZC CC_REGNUM) + (unspec:CC_NZC + [(match_operand:VNx16BI 1 "register_operand") + (match_operand 4) + (match_operand:SI 5 "aarch64_sve_ptrue_flag") + (unspec:<VPRED> + [(match_operand 6) + (match_operand:SI 7 "aarch64_sve_ptrue_flag") + (SVE_INT_CMP:<VPRED> + (match_operand:SVE_FULL_HSDI 2 "register_operand") + (match_operand:SVE_FULL_HSDI 3 "aarch64_sve_cmp_<sve_imm_con>_operand"))] + UNSPEC_PRED_Z)] + UNSPEC_PTEST)) + (set (match_operand:VNx16BI 0 "register_operand") + (and:VNx16BI + (subreg:VNx16BI + (unspec:<VPRED> + [(match_dup 6) + (match_dup 7) + (SVE_INT_CMP:<VPRED> + (match_dup 2) + (match_dup 3))] + UNSPEC_PRED_Z) + 0) + (match_operand:<VPRED> 8 "aarch64_ptrue_all_operand")))] + "TARGET_SVE + && aarch64_sve_same_pred_for_ptest_p (&operands[4], &operands[6])" + {@ [ cons: =0 , 1 , 2 , 3 ; attrs: pred_clobber ] + [ &Upa , Upl, w , <sve_imm_con>; yes ] cmp<cmp_op>\t%0.<Vetype>, %1/z, %2.<Vetype>, #%3 + [ ?Upl , 0 , w , <sve_imm_con>; yes ] ^ + [ Upa , Upl, w , <sve_imm_con>; no ] ^ + [ &Upa , Upl, w , w ; yes ] cmp<cmp_op>\t%0.<Vetype>, %1/z, %2.<Vetype>, %3.<Vetype> + [ ?Upl , 0 , w , w ; yes ] ^ + [ Upa , Upl, w , w ; no ] ^ + } + "&& !rtx_equal_p (operands[4], operands[6])" + { + operands[6] = copy_rtx (operands[4]); + operands[7] = operands[5]; + } +) + ;; Predicated integer comparisons in which only the flags result is ;; interesting. (define_insn_and_rewrite "*cmp<cmp_op><mode>_ptest" @@ -8459,14 +8572,52 @@ (clobber (reg:CC_NZC CC_REGNUM))])] ) +(define_insn_and_split "*cmp<cmp_op><mode>_acle_and" + [(set (match_operand:VNx16BI 0 "register_operand" "=Upa, Upa") + (and:VNx16BI + (and:VNx16BI + (subreg:VNx16BI + (unspec:<VPRED> + [(match_operand 4) + (const_int SVE_KNOWN_PTRUE) + (SVE_INT_CMP:<VPRED> + (match_operand:SVE_FULL_HSDI 2 "register_operand" "w, w") + (match_operand:SVE_FULL_HSDI 3 "aarch64_sve_cmp_<sve_imm_con>_operand" "<sve_imm_con>, w"))] + UNSPEC_PRED_Z) + 0) + (match_operand:VNx16BI 1 "register_operand" "Upl, Upl")) + (match_operand:<VPRED> 5 "aarch64_ptrue_all_operand"))) + (clobber (reg:CC_NZC CC_REGNUM))] + "TARGET_SVE" + "#" + "&& 1" + [(parallel + [(set (match_dup 0) + (and:VNx16BI + (subreg:VNx16BI + (unspec:<VPRED> + [(match_dup 1) + (const_int SVE_MAYBE_NOT_PTRUE) + (SVE_INT_CMP:<VPRED> + (match_dup 2) + (match_dup 3))] + UNSPEC_PRED_Z) + 0) + (match_dup 5))) + (clobber (reg:CC_NZC CC_REGNUM))])] + { + operands[1] = gen_lowpart (<VPRED>mode, operands[1]); + } +) + ;; Predicated integer wide comparisons. (define_insn "@aarch64_pred_cmp<cmp_op><mode>_wide" [(set (match_operand:<VPRED> 0 "register_operand") (unspec:<VPRED> - [(match_operand:VNx16BI 1 "register_operand") + [(match_operand:<VPRED> 1 "register_operand") (match_operand:SI 2 "aarch64_sve_ptrue_flag") (unspec:<VPRED> - [(match_operand:SVE_FULL_BHSI 3 "register_operand") + [(match_operand:VNx16QI_ONLY 3 "register_operand") (match_operand:VNx2DI 4 "register_operand")] SVE_COND_INT_CMP_WIDE)] UNSPEC_PRED_Z)) @@ -8479,16 +8630,61 @@ } ) +(define_expand "@aarch64_pred_cmp<cmp_op><mode>_wide" + [(parallel + [(set (match_operand:VNx16BI 0 "register_operand") + (and:VNx16BI + (subreg:VNx16BI + (unspec:<VPRED> + [(match_operand:<VPRED> 1 "register_operand") + (match_operand:SI 2 "aarch64_sve_ptrue_flag") + (unspec:<VPRED> + [(match_operand:SVE_FULL_HSI 3 "register_operand") + (match_operand:VNx2DI 4 "register_operand")] + SVE_COND_INT_CMP_WIDE)] + UNSPEC_PRED_Z) + 0) + (match_dup 5))) + (clobber (reg:CC_NZC CC_REGNUM))])] + "TARGET_SVE" + { + operands[5] = aarch64_ptrue_all (GET_MODE_UNIT_SIZE (<MODE>mode)); + } +) + +(define_insn "*aarch64_pred_cmp<cmp_op><mode>_wide" + [(set (match_operand:VNx16BI 0 "register_operand") + (and:VNx16BI + (subreg:VNx16BI + (unspec:<VPRED> + [(match_operand:<VPRED> 1 "register_operand") + (match_operand:SI 2 "aarch64_sve_ptrue_flag") + (unspec:<VPRED> + [(match_operand:SVE_FULL_HSI 3 "register_operand") + (match_operand:VNx2DI 4 "register_operand")] + SVE_COND_INT_CMP_WIDE)] + UNSPEC_PRED_Z) + 0) + (match_operand:<VPRED> 5 "aarch64_ptrue_all_operand"))) + (clobber (reg:CC_NZC CC_REGNUM))] + "TARGET_SVE" + {@ [ cons: =0, 1 , 2, 3, 4; attrs: pred_clobber ] + [ &Upa , Upl, , w, w; yes ] cmp<cmp_op>\t%0.<Vetype>, %1/z, %3.<Vetype>, %4.d + [ ?Upl , 0 , , w, w; yes ] ^ + [ Upa , Upl, , w, w; no ] ^ + } +) + ;; Predicated integer wide comparisons in which both the flag and ;; predicate results are interesting. -(define_insn "*aarch64_pred_cmp<cmp_op><mode>_wide_cc" +(define_insn_and_rewrite "*aarch64_pred_cmp<cmp_op><mode>_wide_cc" [(set (reg:CC_NZC CC_REGNUM) (unspec:CC_NZC [(match_operand:VNx16BI 1 "register_operand") (match_operand 4) (match_operand:SI 5 "aarch64_sve_ptrue_flag") (unspec:<VPRED> - [(match_operand:VNx16BI 6 "register_operand") + [(match_operand:<VPRED> 6 "register_operand") (match_operand:SI 7 "aarch64_sve_ptrue_flag") (unspec:<VPRED> [(match_operand:SVE_FULL_BHSI 2 "register_operand") @@ -8512,18 +8708,65 @@ [ ?Upl , 0 , w, w, Upl; yes ] ^ [ Upa , Upl, w, w, Upl; no ] ^ } + "&& !rtx_equal_p (operands[4], operands[6])" + { + operands[6] = copy_rtx (operands[4]); + operands[7] = operands[5]; + } +) + +(define_insn_and_rewrite "*aarch64_pred_cmp<cmp_op><mode>_wide_cc" + [(set (reg:CC_NZC CC_REGNUM) + (unspec:CC_NZC + [(match_operand:VNx16BI 1 "register_operand") + (match_operand 4) + (match_operand:SI 5 "aarch64_sve_ptrue_flag") + (unspec:<VPRED> + [(match_operand:<VPRED> 6 "register_operand") + (match_operand:SI 7 "aarch64_sve_ptrue_flag") + (unspec:<VPRED> + [(match_operand:SVE_FULL_HSI 2 "register_operand") + (match_operand:VNx2DI 3 "register_operand")] + SVE_COND_INT_CMP_WIDE)] + UNSPEC_PRED_Z)] + UNSPEC_PTEST)) + (set (match_operand:VNx16BI 0 "register_operand") + (and:VNx16BI + (subreg:VNx16BI + (unspec:<VPRED> + [(match_dup 6) + (match_dup 7) + (unspec:<VPRED> + [(match_dup 2) + (match_dup 3)] + SVE_COND_INT_CMP_WIDE)] + UNSPEC_PRED_Z) + 0) + (match_operand:<VPRED> 8 "aarch64_ptrue_all_operand")))] + "TARGET_SVE + && aarch64_sve_same_pred_for_ptest_p (&operands[4], &operands[6])" + {@ [ cons: =0, 1 , 2, 3, 6 ; attrs: pred_clobber ] + [ &Upa , Upl, w, w, Upl; yes ] cmp<cmp_op>\t%0.<Vetype>, %1/z, %2.<Vetype>, %3.d + [ ?Upl , 0 , w, w, Upl; yes ] ^ + [ Upa , Upl, w, w, Upl; no ] ^ + } + "&& !rtx_equal_p (operands[4], operands[6])" + { + operands[6] = copy_rtx (operands[4]); + operands[7] = operands[5]; + } ) ;; Predicated integer wide comparisons in which only the flags result ;; is interesting. -(define_insn "*aarch64_pred_cmp<cmp_op><mode>_wide_ptest" +(define_insn_and_rewrite "*aarch64_pred_cmp<cmp_op><mode>_wide_ptest" [(set (reg:CC_NZC CC_REGNUM) (unspec:CC_NZC [(match_operand:VNx16BI 1 "register_operand") (match_operand 4) (match_operand:SI 5 "aarch64_sve_ptrue_flag") (unspec:<VPRED> - [(match_operand:VNx16BI 6 "register_operand") + [(match_operand:<VPRED> 6 "register_operand") (match_operand:SI 7 "aarch64_sve_ptrue_flag") (unspec:<VPRED> [(match_operand:SVE_FULL_BHSI 2 "register_operand") @@ -8539,6 +8782,11 @@ [ ?Upl , 0 , w, w, Upl; yes ] ^ [ Upa , Upl, w, w, Upl; no ] ^ } + "&& !rtx_equal_p (operands[4], operands[6])" + { + operands[6] = copy_rtx (operands[4]); + operands[7] = operands[5]; + } ) ;; ------------------------------------------------------------------------- @@ -8576,6 +8824,58 @@ "while<cmp_op>\t%0.<PRED_ALL:Vetype>, %<w>1, %<w>2" ) +;; Likewise, but yield a VNx16BI result regardless of the element width. +;; The .b case is equivalent to the above. +(define_expand "@aarch64_sve_while_<while_optab_cmp><GPI:mode><VNx16BI_ONLY:mode>_acle" + [(parallel + [(set (match_operand:VNx16BI_ONLY 0 "register_operand") + (unspec:VNx16BI_ONLY + [(const_int SVE_WHILE_B) + (match_operand:GPI 1 "aarch64_reg_or_zero") + (match_operand:GPI 2 "aarch64_reg_or_zero")] + SVE_WHILE)) + (clobber (reg:CC_NZC CC_REGNUM))])] + "TARGET_SVE" +) + +;; For wider elements, bitcast the predicate result to a VNx16BI and use +;; an (and ...) to indicate that only every second, fourth, or eighth bit +;; is set. +(define_expand "@aarch64_sve_while_<while_optab_cmp><GPI:mode><PRED_HSD:mode>_acle" + [(parallel + [(set (match_operand:VNx16BI 0 "register_operand") + (and:VNx16BI + (subreg:VNx16BI + (unspec:PRED_HSD + [(const_int SVE_WHILE_B) + (match_operand:GPI 1 "aarch64_reg_or_zero") + (match_operand:GPI 2 "aarch64_reg_or_zero")] + SVE_WHILE) + 0) + (match_dup 3))) + (clobber (reg:CC_NZC CC_REGNUM))])] + "TARGET_SVE" + { + operands[3] = aarch64_ptrue_all (<data_bytes>); + } +) + +(define_insn "*aarch64_sve_while_<while_optab_cmp><GPI:mode><PRED_HSD:mode>_acle" + [(set (match_operand:VNx16BI 0 "register_operand" "=Upa") + (and:VNx16BI + (subreg:VNx16BI + (unspec:PRED_HSD + [(const_int SVE_WHILE_B) + (match_operand:GPI 1 "aarch64_reg_or_zero" "rZ") + (match_operand:GPI 2 "aarch64_reg_or_zero" "rZ")] + SVE_WHILE) + 0) + (match_operand:PRED_HSD 3 "aarch64_ptrue_all_operand"))) + (clobber (reg:CC_NZC CC_REGNUM))] + "TARGET_SVE" + "while<cmp_op>\t%0.<PRED_HSD:Vetype>, %<w>1, %<w>2" +) + ;; The WHILE instructions set the flags in the same way as a PTEST with ;; a PTRUE GP. Handle the case in which both results are useful. The GP ;; operands to the PTEST aren't needed, so we allow them to be anything. @@ -8607,6 +8907,38 @@ } ) +(define_insn_and_rewrite "*while_<while_optab_cmp><GPI:mode><PRED_HSD:mode>_acle_cc" + [(set (reg:CC_NZC CC_REGNUM) + (unspec:CC_NZC + [(match_operand 3) + (match_operand 4) + (const_int SVE_KNOWN_PTRUE) + (unspec:PRED_HSD + [(const_int SVE_WHILE_B) + (match_operand:GPI 1 "aarch64_reg_or_zero" "rZ") + (match_operand:GPI 2 "aarch64_reg_or_zero" "rZ")] + SVE_WHILE)] + UNSPEC_PTEST)) + (set (match_operand:VNx16BI 0 "register_operand" "=Upa") + (and:VNx16BI + (subreg:VNx16BI + (unspec:PRED_HSD [(const_int SVE_WHILE_B) + (match_dup 1) + (match_dup 2)] + SVE_WHILE) + 0) + (match_operand:PRED_HSD 5 "aarch64_ptrue_all_operand")))] + "TARGET_SVE" + "while<cmp_op>\t%0.<PRED_HSD:Vetype>, %<w>1, %<w>2" + ;; Force the compiler to drop the unused predicate operand, so that we + ;; don't have an unnecessary PTRUE. + "&& (!CONSTANT_P (operands[3]) || !CONSTANT_P (operands[4]))" + { + operands[3] = CONSTM1_RTX (VNx16BImode); + operands[4] = CONSTM1_RTX (<PRED_HSD:MODE>mode); + } +) + ;; Same, but handle the case in which only the flags result is useful. (define_insn_and_rewrite "@while_<while_optab_cmp><GPI:mode><PRED_ALL:mode>_ptest" [(set (reg:CC_NZC CC_REGNUM) @@ -8677,6 +9009,43 @@ } ) +(define_expand "@aarch64_pred_fcm<cmp_op><mode>_acle" + [(set (match_operand:VNx16BI 0 "register_operand") + (and:VNx16BI + (subreg:VNx16BI + (unspec:<VPRED> + [(match_operand:<VPRED> 1 "aarch64_predicate_operand") + (match_operand:SI 2 "aarch64_sve_ptrue_flag") + (match_operand:SVE_F 3 "register_operand") + (match_operand:SVE_F 4 "aarch64_simd_reg_or_zero")] + SVE_COND_FP_CMP_I0) + 0) + (match_dup 5)))] + "TARGET_SVE" + { + operands[5] = aarch64_ptrue_all (GET_MODE_UNIT_SIZE (<MODE>mode)); + } +) + +(define_insn "*aarch64_pred_fcm<cmp_op><mode>_acle" + [(set (match_operand:VNx16BI 0 "register_operand") + (and:VNx16BI + (subreg:VNx16BI + (unspec:<VPRED> + [(match_operand:<VPRED> 1 "aarch64_predicate_operand") + (match_operand:SI 2 "aarch64_sve_ptrue_flag") + (match_operand:SVE_F 3 "register_operand") + (match_operand:SVE_F 4 "aarch64_simd_reg_or_zero")] + SVE_COND_FP_CMP_I0) + 0) + (match_operand:<VPRED> 5 "aarch64_ptrue_all_operand")))] + "TARGET_SVE" + {@ [ cons: =0 , 1 , 3 , 4 ] + [ Upa , Upl , w , Dz ] fcm<cmp_op>\t%0.<Vetype>, %1/z, %3.<Vetype>, #0.0 + [ Upa , Upl , w , w ] fcm<cmp_op>\t%0.<Vetype>, %1/z, %3.<Vetype>, %4.<Vetype> + } +) + ;; Same for unordered comparisons. (define_insn "@aarch64_pred_fcmuo<mode>" [(set (match_operand:<VPRED> 0 "register_operand" "=Upa") @@ -8690,6 +9059,40 @@ "fcmuo\t%0.<Vetype>, %1/z, %3.<Vetype>, %4.<Vetype>" ) +(define_expand "@aarch64_pred_fcmuo<mode>_acle" + [(set (match_operand:VNx16BI 0 "register_operand") + (and:VNx16BI + (subreg:VNx16BI + (unspec:<VPRED> + [(match_operand:<VPRED> 1 "aarch64_predicate_operand") + (match_operand:SI 2 "aarch64_sve_ptrue_flag") + (match_operand:SVE_F 3 "register_operand") + (match_operand:SVE_F 4 "register_operand")] + UNSPEC_COND_FCMUO) + 0) + (match_dup 5)))] + "TARGET_SVE" + { + operands[5] = aarch64_ptrue_all (GET_MODE_UNIT_SIZE (<MODE>mode)); + } +) + +(define_insn "*aarch64_pred_fcmuo<mode>_acle" + [(set (match_operand:VNx16BI 0 "register_operand" "=Upa") + (and:VNx16BI + (subreg:VNx16BI + (unspec:<VPRED> + [(match_operand:<VPRED> 1 "aarch64_predicate_operand" "Upl") + (match_operand:SI 2 "aarch64_sve_ptrue_flag") + (match_operand:SVE_F 3 "register_operand" "w") + (match_operand:SVE_F 4 "register_operand" "w")] + UNSPEC_COND_FCMUO) + 0) + (match_operand:<VPRED> 5 "aarch64_ptrue_all_operand")))] + "TARGET_SVE" + "fcmuo\t%0.<Vetype>, %1/z, %3.<Vetype>, %4.<Vetype>" +) + ;; Floating-point comparisons predicated on a PTRUE, with the results ANDed ;; with another predicate P. This does not have the same trapping behavior ;; as predicating the comparison itself on P, but it's a legitimate fold, @@ -8908,23 +9311,30 @@ ;; ------------------------------------------------------------------------- ;; Predicated floating-point absolute comparisons. -(define_expand "@aarch64_pred_fac<cmp_op><mode>" - [(set (match_operand:<VPRED> 0 "register_operand") - (unspec:<VPRED> - [(match_operand:<VPRED> 1 "register_operand") - (match_operand:SI 2 "aarch64_sve_ptrue_flag") - (unspec:SVE_FULL_F - [(match_dup 1) - (match_dup 2) - (match_operand:SVE_FULL_F 3 "register_operand")] - UNSPEC_COND_FABS) - (unspec:SVE_FULL_F - [(match_dup 1) - (match_dup 2) - (match_operand:SVE_FULL_F 4 "register_operand")] - UNSPEC_COND_FABS)] - SVE_COND_FP_ABS_CMP))] +(define_expand "@aarch64_pred_fac<cmp_op><mode>_acle" + [(set (match_operand:VNx16BI 0 "register_operand") + (and:VNx16BI + (subreg:VNx16BI + (unspec:<VPRED> + [(match_operand:<VPRED> 1 "register_operand") + (match_operand:SI 2 "aarch64_sve_ptrue_flag") + (unspec:SVE_FULL_F + [(match_dup 1) + (match_dup 2) + (match_operand:SVE_FULL_F 3 "register_operand")] + UNSPEC_COND_FABS) + (unspec:SVE_FULL_F + [(match_dup 1) + (match_dup 2) + (match_operand:SVE_FULL_F 4 "register_operand")] + UNSPEC_COND_FABS)] + SVE_COND_FP_ABS_CMP) + 0) + (match_dup 5)))] "TARGET_SVE" + { + operands[5] = aarch64_ptrue_all (GET_MODE_UNIT_SIZE (<MODE>mode)); + } ) (define_insn_and_rewrite "*aarch64_pred_fac<cmp_op><mode>_relaxed" @@ -8973,6 +9383,30 @@ "fac<cmp_op>\t%0.<Vetype>, %1/z, %2.<Vetype>, %3.<Vetype>" ) +(define_insn "*aarch64_pred_fac<cmp_op><mode>_strict_acle" + [(set (match_operand:VNx16BI 0 "register_operand" "=Upa") + (and:VNx16BI + (subreg:VNx16BI + (unspec:<VPRED> + [(match_operand:<VPRED> 1 "register_operand" "Upl") + (match_operand:SI 4 "aarch64_sve_ptrue_flag") + (unspec:SVE_FULL_F + [(match_dup 1) + (match_operand:SI 5 "aarch64_sve_gp_strictness") + (match_operand:SVE_FULL_F 2 "register_operand" "w")] + UNSPEC_COND_FABS) + (unspec:SVE_FULL_F + [(match_dup 1) + (match_operand:SI 6 "aarch64_sve_gp_strictness") + (match_operand:SVE_FULL_F 3 "register_operand" "w")] + UNSPEC_COND_FABS)] + SVE_COND_FP_ABS_CMP) + 0) + (match_operand:<VPRED> 7 "aarch64_ptrue_all_operand")))] + "TARGET_SVE" + "fac<cmp_op>\t%0.<Vetype>, %1/z, %2.<Vetype>, %3.<Vetype>" +) + ;; ------------------------------------------------------------------------- ;; ---- [PRED] Select ;; ------------------------------------------------------------------------- @@ -9421,7 +9855,30 @@ (unspec:PRED_ALL [(match_operand:PRED_ALL 1 "register_operand" "Upa")] UNSPEC_REV))] "TARGET_SVE" - "rev\t%0.<Vetype>, %1.<Vetype>") + "rev\t%0.<Vetype>, %1.<Vetype>" +) + +(define_expand "@aarch64_sve_rev<mode>_acle" + [(set (match_operand:VNx16BI 0 "register_operand") + (unspec:VNx16BI + [(match_operand:VNx16BI 1 "register_operand") + (match_dup:PRED_ALL 2)] + UNSPEC_REV_PRED))] + "TARGET_SVE" + { + operands[2] = CONST0_RTX (<MODE>mode); + } +) + +(define_insn "*aarch64_sve_rev<mode>_acle" + [(set (match_operand:VNx16BI 0 "register_operand" "=Upa") + (unspec:VNx16BI + [(match_operand:VNx16BI 1 "register_operand" "Upa") + (match_operand:PRED_ALL 2 "aarch64_simd_imm_zero")] + UNSPEC_REV_PRED))] + "TARGET_SVE" + "rev\t%0.<Vetype>, %1.<Vetype>" +) ;; ------------------------------------------------------------------------- ;; ---- [PRED] Special-purpose binary permutes @@ -9446,18 +9903,39 @@ "<perm_insn>\t%0.<Vetype>, %1.<Vetype>, %2.<Vetype>" ) -;; Special purpose permute used by the predicate generation instructions. -;; Unlike the normal permute patterns, these instructions operate on VNx16BI -;; regardless of the element size, so that all input and output bits are -;; well-defined. Operand 3 then indicates the size of the permute. -(define_insn "@aarch64_sve_trn1_conv<mode>" +;; Special-purpose permutes used by the ACLE intrinsics and predicate +;; generation instructions. Unlike the normal permute patterns, these +;; instructions operate on VNx16BI regardless of the element size, so that +;; all input and output bits are well-defined. Operand 3 then indicates +;; the size of the permute. +;; +;; To make generation easier, this pattern embeds the permute type as the +;; fourth operand to the unspec. On the one hand, this avoids overloading +;; unspecs like UNSPEC_ZIP1 to represent two different operations. On the +;; other hand, it avoids having a separate unspec for each variant, and +;; having to map from one kind of unspec to the other. +(define_expand "@aarch64_sve_<perm_insn><mode>_acle" + [(set (match_operand:VNx16BI 0 "register_operand") + (unspec:VNx16BI [(match_operand:VNx16BI 1 "register_operand") + (match_operand:VNx16BI 2 "register_operand") + (match_dup:PRED_ALL 3) + (const_int PERMUTE)] + UNSPEC_PERMUTE_PRED))] + "TARGET_SVE" + { + operands[3] = CONST0_RTX (<MODE>mode); + } +) + +(define_insn "*aarch64_sve_<perm_insn><mode>_acle" [(set (match_operand:VNx16BI 0 "register_operand" "=Upa") (unspec:VNx16BI [(match_operand:VNx16BI 1 "register_operand" "Upa") (match_operand:VNx16BI 2 "register_operand" "Upa") - (match_operand:PRED_ALL 3 "aarch64_simd_imm_zero")] - UNSPEC_TRN1_CONV))] + (match_operand:PRED_ALL 3 "aarch64_simd_imm_zero") + (const_int PERMUTE)] + UNSPEC_PERMUTE_PRED))] "TARGET_SVE" - "trn1\t%0.<PRED_ALL:Vetype>, %1.<PRED_ALL:Vetype>, %2.<PRED_ALL:Vetype>" + "<perm_insn>\t%0.<PRED_ALL:Vetype>, %1.<PRED_ALL:Vetype>, %2.<PRED_ALL:Vetype>" ) ;; ========================================================================= @@ -10446,6 +10924,34 @@ "punpk<perm_hilo>\t%0.h, %1.b" ) +(define_expand "@aarch64_sve_punpk<perm_hilo>_acle" + [(set (match_operand:VNx16BI 0 "register_operand") + (and:VNx16BI + (subreg:VNx16BI + (unspec:VNx8BI + [(match_operand:VNx16BI 1 "register_operand")] + UNPACK_UNSIGNED) + 0) + (match_dup 2)))] + "TARGET_SVE" + { + operands[2] = aarch64_ptrue_all (2); + } +) + +(define_insn "*aarch64_sve_punpk<perm_hilo>_acle" + [(set (match_operand:VNx16BI 0 "register_operand" "=Upa") + (and:VNx16BI + (subreg:VNx16BI + (unspec:VNx8BI + [(match_operand:VNx16BI 1 "register_operand" "Upa")] + UNPACK_UNSIGNED) + 0) + (match_operand:VNx8BI 2 "aarch64_ptrue_all_operand")))] + "TARGET_SVE" + "punpk<perm_hilo>\t%0.h, %1.b" +) + ;; ========================================================================= ;; == Vector partitioning ;; ========================================================================= @@ -10670,14 +11176,49 @@ ;; ------------------------------------------------------------------------- (define_insn "@aarch64_sve_<sve_pred_op><mode>" - [(set (match_operand:PRED_ALL 0 "register_operand" "=Upa") - (unspec:PRED_ALL - [(match_operand:PRED_ALL 1 "register_operand" "Upa") + [(set (match_operand:VNx16BI_ONLY 0 "register_operand" "=Upa") + (unspec:VNx16BI_ONLY + [(match_operand:VNx16BI_ONLY 1 "register_operand" "Upa") (match_operand:SI 2 "aarch64_sve_ptrue_flag") - (match_operand:PRED_ALL 3 "register_operand" "0")] + (match_operand:VNx16BI_ONLY 3 "register_operand" "0")] SVE_PITER)) (clobber (reg:CC_NZC CC_REGNUM))] - "TARGET_SVE && <max_elem_bits> >= <elem_bits>" + "TARGET_SVE" + "<sve_pred_op>\t%0.<Vetype>, %1, %0.<Vetype>" +) + +(define_expand "@aarch64_sve_<sve_pred_op><mode>" + [(parallel + [(set (match_operand:VNx16BI 0 "register_operand") + (and:VNx16BI + (subreg:VNx16BI + (unspec:PRED_HSD + [(match_operand:PRED_HSD 1 "register_operand") + (match_operand:SI 2 "aarch64_sve_ptrue_flag") + (match_operand:PRED_HSD 3 "register_operand")] + PNEXT_ONLY) + 0) + (match_dup 4))) + (clobber (reg:CC_NZC CC_REGNUM))])] + "TARGET_SVE" + { + operands[4] = aarch64_ptrue_all (<data_bytes>); + } +) + +(define_insn "*aarch64_sve_<sve_pred_op><mode>" + [(set (match_operand:VNx16BI 0 "register_operand" "=Upa") + (and:VNx16BI + (subreg:VNx16BI + (unspec:PRED_HSD + [(match_operand:PRED_HSD 1 "register_operand" "Upa") + (match_operand:SI 2 "aarch64_sve_ptrue_flag") + (match_operand:PRED_HSD 3 "register_operand" "0")] + PNEXT_ONLY) + 0) + (match_operand:PRED_HSD 4 "aarch64_ptrue_all_operand"))) + (clobber (reg:CC_NZC CC_REGNUM))] + "TARGET_SVE" "<sve_pred_op>\t%0.<Vetype>, %1, %0.<Vetype>" ) @@ -10711,6 +11252,38 @@ } ) +(define_insn_and_rewrite "*aarch64_sve_<sve_pred_op><mode>_cc" + [(set (reg:CC_NZC CC_REGNUM) + (unspec:CC_NZC + [(match_operand:VNx16BI 1 "register_operand" "Upa") + (match_operand 2) + (match_operand:SI 3 "aarch64_sve_ptrue_flag") + (unspec:PRED_HSD + [(match_operand 4) + (match_operand:SI 5 "aarch64_sve_ptrue_flag") + (match_operand:PRED_HSD 6 "register_operand" "0")] + PNEXT_ONLY)] + UNSPEC_PTEST)) + (set (match_operand:VNx16BI 0 "register_operand" "=Upa") + (and:VNx16BI + (subreg:VNx16BI + (unspec:PRED_HSD + [(match_dup 4) + (match_dup 5) + (match_dup 6)] + PNEXT_ONLY) + 0) + (match_operand:PRED_HSD 7 "aarch64_ptrue_all_operand")))] + "TARGET_SVE + && aarch64_sve_same_pred_for_ptest_p (&operands[2], &operands[4])" + "<sve_pred_op>\t%0.<Vetype>, %1, %0.<Vetype>" + "&& !rtx_equal_p (operands[2], operands[4])" + { + operands[4] = operands[2]; + operands[5] = operands[3]; + } +) + ;; Same, but with only the flags result being interesting. (define_insn_and_rewrite "*aarch64_sve_<sve_pred_op><mode>_ptest" [(set (reg:CC_NZC CC_REGNUM) |