diff options
Diffstat (limited to 'gcc/config/aarch64/aarch64-sve.md')
-rw-r--r-- | gcc/config/aarch64/aarch64-sve.md | 1982 |
1 files changed, 1480 insertions, 502 deletions
diff --git a/gcc/config/aarch64/aarch64-sve.md b/gcc/config/aarch64/aarch64-sve.md index 3dbd659..51e2d7d 100644 --- a/gcc/config/aarch64/aarch64-sve.md +++ b/gcc/config/aarch64/aarch64-sve.md @@ -154,8 +154,10 @@ ;; ---- [FP<-INT] Packs ;; ---- [FP<-INT] Unpacks ;; ---- [FP<-FP] Packs +;; ---- [FP<-FP] Truncating conversions ;; ---- [FP<-FP] Packs (bfloat16) ;; ---- [FP<-FP] Unpacks +;; ---- [FP<-FP] Extending conversions ;; ---- [PRED<-PRED] Packs ;; ---- [PRED<-PRED] Unpacks ;; @@ -702,6 +704,23 @@ } ) +;; Fold predicated loads/stores with a PTRUE predicate to unpredicated +;; loads/stores after RA. +(define_insn_and_split "*aarch64_sve_ptrue<mode>_ldr_str" + [(set (match_operand:SVE_FULL 0 "aarch64_sve_nonimmediate_operand" "=Utr,w") + (unspec:SVE_FULL + [(match_operand:<VPRED> 1 "aarch64_simd_imm_one") + (match_operand:SVE_FULL 2 "aarch64_sve_nonimmediate_operand" "w,Utr")] + UNSPEC_PRED_X))] + "TARGET_SVE && reload_completed + && (<MODE>mode == VNx16QImode || !BYTES_BIG_ENDIAN) + && ((REG_P (operands[0]) && MEM_P (operands[2])) + || (REG_P (operands[2]) && MEM_P (operands[0])))" + "#" + "&& 1" + [(set (match_dup 0) + (match_dup 2))]) + ;; Unpredicated moves that cannot use LDR and STR, i.e. partial vectors ;; or vectors for which little-endian ordering isn't acceptable. Memory ;; accesses require secondary reloads. @@ -1286,7 +1305,24 @@ ;; ------------------------------------------------------------------------- ;; Predicated LD1 (single). -(define_insn "maskload<mode><vpred>" +(define_expand "maskload<mode><vpred>" + [(set (match_operand:SVE_ALL 0 "register_operand") + (unspec:SVE_ALL + [(match_operand:<VPRED> 2 "nonmemory_operand") + (match_operand:SVE_ALL 1 "memory_operand") + (match_operand:SVE_ALL 3 "aarch64_maskload_else_operand")] + UNSPEC_LD1_SVE))] + "TARGET_SVE" + { + if (aarch64_expand_maskloadstore (operands, <MODE>mode)) + DONE; + if (CONSTANT_P (operands[2])) + operands[2] = force_reg (<VPRED>mode, operands[2]); + } +) + +;; Predicated LD1 (single). +(define_insn "*aarch64_maskload<mode><vpred>" [(set (match_operand:SVE_ALL 0 "register_operand" "=w") (unspec:SVE_ALL [(match_operand:<VPRED> 2 "register_operand" "Upl") @@ -1308,7 +1344,7 @@ "TARGET_SVE" { operands[2] = aarch64_ptrue_reg (<VPRED>mode); - operands[3] = CONST0_RTX (<MODE>mode); + operands[3] = CONST0_RTX (<VSINGLE>mode); } ) @@ -1318,7 +1354,7 @@ (unspec:SVE_STRUCT [(match_operand:<VPRED> 2 "register_operand" "Upl") (match_operand:SVE_STRUCT 1 "memory_operand" "m") - (match_operand 3 "aarch64_maskload_else_operand")] + (match_operand:<VSINGLE> 3 "aarch64_maskload_else_operand")] UNSPEC_LDN))] "TARGET_SVE" "ld<vector_count><Vesize>\t%0, %2/z, %1" @@ -1506,18 +1542,18 @@ UNSPEC_LD1_GATHER))] "TARGET_SVE && TARGET_NON_STREAMING" {@ [cons: =0, 1, 2, 3, 4, 5 ] - [&w, Z, w, Ui1, Ui1, Upl] ld1<Vesize>\t%0.s, %5/z, [%2.s] - [?w, Z, 0, Ui1, Ui1, Upl] ^ - [&w, vgw, w, Ui1, Ui1, Upl] ld1<Vesize>\t%0.s, %5/z, [%2.s, #%1] - [?w, vgw, 0, Ui1, Ui1, Upl] ^ - [&w, rk, w, Z, Ui1, Upl] ld1<Vesize>\t%0.s, %5/z, [%1, %2.s, sxtw] - [?w, rk, 0, Z, Ui1, Upl] ^ - [&w, rk, w, Ui1, Ui1, Upl] ld1<Vesize>\t%0.s, %5/z, [%1, %2.s, uxtw] - [?w, rk, 0, Ui1, Ui1, Upl] ^ - [&w, rk, w, Z, i, Upl] ld1<Vesize>\t%0.s, %5/z, [%1, %2.s, sxtw %p4] - [?w, rk, 0, Z, i, Upl] ^ - [&w, rk, w, Ui1, i, Upl] ld1<Vesize>\t%0.s, %5/z, [%1, %2.s, uxtw %p4] - [?w, rk, 0, Ui1, i, Upl] ^ + [&w, Z, w, Ui1, Ui1, Upl] ld1<Vesize>\t%0.s, %5/z, [%2.s] + [?w, Z, 0, Ui1, Ui1, Upl] ^ + [&w, vg<Vesize>, w, Ui1, Ui1, Upl] ld1<Vesize>\t%0.s, %5/z, [%2.s, #%1] + [?w, vg<Vesize>, 0, Ui1, Ui1, Upl] ^ + [&w, rk, w, Z, Ui1, Upl] ld1<Vesize>\t%0.s, %5/z, [%1, %2.s, sxtw] + [?w, rk, 0, Z, Ui1, Upl] ^ + [&w, rk, w, Ui1, Ui1, Upl] ld1<Vesize>\t%0.s, %5/z, [%1, %2.s, uxtw] + [?w, rk, 0, Ui1, Ui1, Upl] ^ + [&w, rk, w, Z, i, Upl] ld1<Vesize>\t%0.s, %5/z, [%1, %2.s, sxtw %p4] + [?w, rk, 0, Z, i, Upl] ^ + [&w, rk, w, Ui1, i, Upl] ld1<Vesize>\t%0.s, %5/z, [%1, %2.s, uxtw %p4] + [?w, rk, 0, Ui1, i, Upl] ^ } ) @@ -1536,14 +1572,14 @@ UNSPEC_LD1_GATHER))] "TARGET_SVE && TARGET_NON_STREAMING" {@ [cons: =0, 1, 2, 3, 4, 5] - [&w, Z, w, i, Ui1, Upl] ld1<Vesize>\t%0.d, %5/z, [%2.d] - [?w, Z, 0, i, Ui1, Upl] ^ - [&w, vgd, w, i, Ui1, Upl] ld1<Vesize>\t%0.d, %5/z, [%2.d, #%1] - [?w, vgd, 0, i, Ui1, Upl] ^ - [&w, rk, w, i, Ui1, Upl] ld1<Vesize>\t%0.d, %5/z, [%1, %2.d] - [?w, rk, 0, i, Ui1, Upl] ^ - [&w, rk, w, i, i, Upl] ld1<Vesize>\t%0.d, %5/z, [%1, %2.d, lsl %p4] - [?w, rk, 0, i, i, Upl] ^ + [&w, Z, w, i, Ui1, Upl] ld1<Vesize>\t%0.d, %5/z, [%2.d] + [?w, Z, 0, i, Ui1, Upl] ^ + [&w, vg<Vesize>, w, i, Ui1, Upl] ld1<Vesize>\t%0.d, %5/z, [%2.d, #%1] + [?w, vg<Vesize>, 0, i, Ui1, Upl] ^ + [&w, rk, w, i, Ui1, Upl] ld1<Vesize>\t%0.d, %5/z, [%1, %2.d] + [?w, rk, 0, i, Ui1, Upl] ^ + [&w, rk, w, i, i, Upl] ld1<Vesize>\t%0.d, %5/z, [%1, %2.d, lsl %p4] + [?w, rk, 0, i, i, Upl] ^ } ) @@ -2287,7 +2323,24 @@ ;; ------------------------------------------------------------------------- ;; Predicated ST1 (single). -(define_insn "maskstore<mode><vpred>" +(define_expand "maskstore<mode><vpred>" + [(set (match_operand:SVE_ALL 0 "memory_operand") + (unspec:SVE_ALL + [(match_operand:<VPRED> 2 "nonmemory_operand") + (match_operand:SVE_ALL 1 "register_operand") + (match_dup 0)] + UNSPEC_ST1_SVE))] + "TARGET_SVE" + { + if (aarch64_expand_maskloadstore (operands, <MODE>mode)) + DONE; + if (CONSTANT_P (operands[2])) + operands[2] = force_reg (<VPRED>mode, operands[2]); + } +) + +;; Predicated ST1 (single). +(define_insn "*aarch64_maskstore<mode><vpred>" [(set (match_operand:SVE_ALL 0 "memory_operand" "+m") (unspec:SVE_ALL [(match_operand:<VPRED> 2 "register_operand" "Upl") @@ -2435,13 +2488,13 @@ (match_operand:SVE_4 4 "register_operand")] UNSPEC_ST1_SCATTER))] "TARGET_SVE && TARGET_NON_STREAMING" - {@ [ cons: 0 , 1 , 2 , 3 , 4 , 5 ] - [ Z , w , Ui1 , Ui1 , w , Upl ] st1<Vesize>\t%4.s, %5, [%1.s] - [ vgw , w , Ui1 , Ui1 , w , Upl ] st1<Vesize>\t%4.s, %5, [%1.s, #%0] - [ rk , w , Z , Ui1 , w , Upl ] st1<Vesize>\t%4.s, %5, [%0, %1.s, sxtw] - [ rk , w , Ui1 , Ui1 , w , Upl ] st1<Vesize>\t%4.s, %5, [%0, %1.s, uxtw] - [ rk , w , Z , i , w , Upl ] st1<Vesize>\t%4.s, %5, [%0, %1.s, sxtw %p3] - [ rk , w , Ui1 , i , w , Upl ] st1<Vesize>\t%4.s, %5, [%0, %1.s, uxtw %p3] + {@ [ cons: 0 , 1 , 2 , 3 , 4 , 5 ] + [ Z , w , Ui1 , Ui1 , w , Upl ] st1<Vesize>\t%4.s, %5, [%1.s] + [ vg<Vesize> , w , Ui1 , Ui1 , w , Upl ] st1<Vesize>\t%4.s, %5, [%1.s, #%0] + [ rk , w , Z , Ui1 , w , Upl ] st1<Vesize>\t%4.s, %5, [%0, %1.s, sxtw] + [ rk , w , Ui1 , Ui1 , w , Upl ] st1<Vesize>\t%4.s, %5, [%0, %1.s, uxtw] + [ rk , w , Z , i , w , Upl ] st1<Vesize>\t%4.s, %5, [%0, %1.s, sxtw %p3] + [ rk , w , Ui1 , i , w , Upl ] st1<Vesize>\t%4.s, %5, [%0, %1.s, uxtw %p3] } ) @@ -2458,11 +2511,11 @@ (match_operand:SVE_2 4 "register_operand")] UNSPEC_ST1_SCATTER))] "TARGET_SVE && TARGET_NON_STREAMING" - {@ [ cons: 0 , 1 , 3 , 4 , 5 ] - [ Z , w , Ui1 , w , Upl ] st1<Vesize>\t%4.d, %5, [%1.d] - [ vgd , w , Ui1 , w , Upl ] st1<Vesize>\t%4.d, %5, [%1.d, #%0] - [ rk , w , Ui1 , w , Upl ] st1<Vesize>\t%4.d, %5, [%0, %1.d] - [ rk , w , i , w , Upl ] st1<Vesize>\t%4.d, %5, [%0, %1.d, lsl %p3] + {@ [ cons: 0 , 1 , 3 , 4 , 5 ] + [ Z , w , Ui1 , w , Upl ] st1<Vesize>\t%4.d, %5, [%1.d] + [ vg<Vesize> , w , Ui1 , w , Upl ] st1<Vesize>\t%4.d, %5, [%1.d, #%0] + [ rk , w , Ui1 , w , Upl ] st1<Vesize>\t%4.d, %5, [%0, %1.d] + [ rk , w , i , w , Upl ] st1<Vesize>\t%4.d, %5, [%0, %1.d, lsl %p3] } ) @@ -2937,10 +2990,7 @@ (vec_duplicate:PRED_ALL (match_operand:QI 1 "register_operand")))] "TARGET_SVE" { - rtx tmp = gen_reg_rtx (DImode); - rtx op1 = gen_lowpart (DImode, operands[1]); - emit_insn (gen_ashldi3 (tmp, op1, gen_int_mode (63, DImode))); - emit_insn (gen_while_ultdi<mode> (operands[0], const0_rtx, tmp)); + aarch64_emit_sve_pred_vec_duplicate (<MODE>mode, operands[0], operands[1]); DONE; } ) @@ -2969,10 +3019,11 @@ { poly_int64 val; if (poly_int_rtx_p (operands[2], &val) - && known_eq (val, GET_MODE_NUNITS (<MODE>mode) - 1)) + && known_eq (val, GET_MODE_NUNITS (<MODE>mode) - 1) + && !val.is_constant ()) { - /* The last element can be extracted with a LASTB and a false - predicate. */ + /* For VLA, extract the last element with a LASTB and a false + predicate. */ rtx sel = aarch64_pfalse_reg (<VPRED>mode); emit_insn (gen_extract_last_<mode> (operands[0], sel, operands[1])); DONE; @@ -3133,9 +3184,9 @@ "TARGET_SVE" { rtx tmp = gen_reg_rtx (<MODE>mode); - emit_insn (gen_vcond_mask_<mode><vpred> (tmp, operands[1], - CONST1_RTX (<MODE>mode), - CONST0_RTX (<MODE>mode))); + emit_insn (gen_vcond_mask_<mode><vpred> (tmp, CONST1_RTX (<MODE>mode), + CONST0_RTX (<MODE>mode), + operands[1])); emit_insn (gen_vec_extract<mode><Vel> (operands[0], tmp, operands[2])); DONE; } @@ -3265,6 +3316,61 @@ ;; - REVW ;; ------------------------------------------------------------------------- +(define_split + [(set (match_operand:SVE_FULL_HSDI 0 "register_operand") + (rotate:SVE_FULL_HSDI + (match_operand:SVE_FULL_HSDI 1 "register_operand") + (match_operand:SVE_FULL_HSDI 2 "aarch64_constant_vector_operand")))] + "TARGET_SVE && can_create_pseudo_p ()" + [(set (match_dup 3) + (ashift:SVE_FULL_HSDI (match_dup 1) + (match_dup 2))) + (set (match_dup 0) + (plus:SVE_FULL_HSDI + (lshiftrt:SVE_FULL_HSDI (match_dup 1) + (match_dup 4)) + (match_dup 3)))] + { + if (aarch64_emit_opt_vec_rotate (operands[0], operands[1], operands[2])) + DONE; + + if (!TARGET_SVE2) + FAIL; + + operands[3] = gen_reg_rtx (<MODE>mode); + HOST_WIDE_INT shift_amount = + INTVAL (unwrap_const_vec_duplicate (operands[2])); + int bitwidth = GET_MODE_UNIT_BITSIZE (<MODE>mode); + operands[4] = aarch64_simd_gen_const_vector_dup (<MODE>mode, + bitwidth - shift_amount); + } +) + +;; The RTL combiners are able to combine "ior (ashift, ashiftrt)" to a "bswap". +;; Match that as well. +(define_insn_and_split "*v_revvnx8hi" + [(parallel + [(set (match_operand:VNx8HI 0 "register_operand" "=w") + (bswap:VNx8HI (match_operand 1 "register_operand" "w"))) + (clobber (match_scratch:VNx8BI 2 "=Upl"))])] + "TARGET_SVE" + "#" + "&& 1" + [(set (match_dup 0) + (unspec:VNx8HI + [(match_dup 2) + (unspec:VNx8HI + [(match_dup 1)] + UNSPEC_REVB)] + UNSPEC_PRED_X))] + { + if (!can_create_pseudo_p ()) + emit_move_insn (operands[2], CONSTM1_RTX (VNx8BImode)); + else + operands[2] = aarch64_ptrue_reg (VNx8BImode); + } +) + ;; Predicated integer unary operations. (define_insn "@aarch64_pred_<optab><mode>" [(set (match_operand:SVE_FULL_I 0 "register_operand") @@ -3643,9 +3749,9 @@ ;; Unpredicated floating-point unary operations. (define_insn "@aarch64_sve_<optab><mode>" - [(set (match_operand:SVE_FULL_F 0 "register_operand" "=w") - (unspec:SVE_FULL_F - [(match_operand:SVE_FULL_F 1 "register_operand" "w")] + [(set (match_operand:SVE_F 0 "register_operand" "=w") + (unspec:SVE_F + [(match_operand:SVE_F 1 "register_operand" "w")] SVE_FP_UNARY))] "TARGET_SVE" "<sve_fp_op>\t%0.<Vetype>, %1.<Vetype>" @@ -3653,25 +3759,41 @@ ;; Unpredicated floating-point unary operations. (define_expand "<optab><mode>2" - [(set (match_operand:SVE_FULL_F 0 "register_operand") - (unspec:SVE_FULL_F + [(set (match_operand:SVE_F 0 "register_operand") + (unspec:SVE_F [(match_dup 2) - (const_int SVE_RELAXED_GP) - (match_operand:SVE_FULL_F 1 "register_operand")] + (match_dup 3) + (match_operand:SVE_F 1 "register_operand")] SVE_COND_FP_UNARY_OPTAB))] "TARGET_SVE" { + operands[2] = aarch64_sve_fp_pred (<MODE>mode, &operands[3]); + } +) + +;; FABS and FNEG are non-trapping, so we can always expand with a <VPRED> +;; predicate. It doesn't matter whether the padding bits of a partial +;; vector mode are active or inactive. +(define_expand "<optab><mode>2" + [(set (match_operand:SVE_F 0 "register_operand") + (unspec:SVE_F + [(match_dup 2) + (const_int SVE_RELAXED_GP) + (match_operand:SVE_F 1 "register_operand")] + SVE_COND_FP_UNARY_BITWISE))] + "TARGET_SVE" + { operands[2] = aarch64_ptrue_reg (<VPRED>mode); } ) ;; Predicated floating-point unary operations. (define_insn "@aarch64_pred_<optab><mode>" - [(set (match_operand:SVE_FULL_F 0 "register_operand") - (unspec:SVE_FULL_F - [(match_operand:<VPRED> 1 "register_operand") + [(set (match_operand:SVE_F 0 "register_operand") + (unspec:SVE_F + [(match_operand:<VPRED> 1 "aarch64_predicate_operand") (match_operand:SI 3 "aarch64_sve_gp_strictness") - (match_operand:SVE_FULL_F 2 "register_operand")] + (match_operand:SVE_F 2 "register_operand")] SVE_COND_FP_UNARY))] "TARGET_SVE" {@ [ cons: =0 , 1 , 2 ; attrs: movprfx ] @@ -3697,13 +3819,13 @@ ;; Predicated floating-point unary arithmetic, merging with the first input. (define_insn_and_rewrite "*cond_<optab><mode>_2_relaxed" - [(set (match_operand:SVE_FULL_F 0 "register_operand") - (unspec:SVE_FULL_F + [(set (match_operand:SVE_F 0 "register_operand") + (unspec:SVE_F [(match_operand:<VPRED> 1 "register_operand") - (unspec:SVE_FULL_F + (unspec:SVE_F [(match_operand 3) (const_int SVE_RELAXED_GP) - (match_operand:SVE_FULL_F 2 "register_operand")] + (match_operand:SVE_F 2 "register_operand")] SVE_COND_FP_UNARY) (match_dup 2)] UNSPEC_SEL))] @@ -3745,15 +3867,15 @@ ;; as earlyclobber helps to make the instruction more regular to the ;; register allocator. (define_insn_and_rewrite "*cond_<optab><mode>_any_relaxed" - [(set (match_operand:SVE_FULL_F 0 "register_operand") - (unspec:SVE_FULL_F + [(set (match_operand:SVE_F 0 "register_operand") + (unspec:SVE_F [(match_operand:<VPRED> 1 "register_operand") - (unspec:SVE_FULL_F + (unspec:SVE_F [(match_operand 4) (const_int SVE_RELAXED_GP) - (match_operand:SVE_FULL_F 2 "register_operand")] + (match_operand:SVE_F 2 "register_operand")] SVE_COND_FP_UNARY) - (match_operand:SVE_FULL_F 3 "aarch64_simd_reg_or_zero")] + (match_operand:SVE_F 3 "aarch64_simd_reg_or_zero")] UNSPEC_SEL))] "TARGET_SVE && !rtx_equal_p (operands[2], operands[3])" {@ [ cons: =0 , 1 , 2 , 3 ; attrs: movprfx ] @@ -3841,6 +3963,7 @@ ;; ------------------------------------------------------------------------- ;; Includes: ;; - NOT +;; - NOTS ;; ------------------------------------------------------------------------- ;; Unpredicated predicate inverse. @@ -3856,7 +3979,7 @@ ) ;; Predicated predicate inverse. -(define_insn "*one_cmpl<mode>3" +(define_insn "@aarch64_pred_one_cmpl<mode>_z" [(set (match_operand:PRED_ALL 0 "register_operand" "=Upa") (and:PRED_ALL (not:PRED_ALL (match_operand:PRED_ALL 2 "register_operand" "Upa")) @@ -3865,6 +3988,42 @@ "not\t%0.b, %1/z, %2.b" ) +;; Predicated predicate inverse in which the flags are set in the same +;; way as a PTEST. +(define_insn "*one_cmpl<mode>3_cc" + [(set (reg:CC_NZC CC_REGNUM) + (unspec:CC_NZC + [(match_operand:VNx16BI 1 "register_operand" "Upa") + (match_operand 3) + (match_operand:SI 4 "aarch64_sve_ptrue_flag") + (and:PRED_ALL + (not:PRED_ALL + (match_operand:PRED_ALL 2 "register_operand" "Upa")) + (match_dup 3))] + UNSPEC_PTEST)) + (set (match_operand:PRED_ALL 0 "register_operand" "=Upa") + (and:PRED_ALL (not:PRED_ALL (match_dup 2)) (match_dup 3)))] + "TARGET_SVE" + "nots\t%0.b, %1/z, %2.b" +) + +;; Same, where only the flags result is interesting. +(define_insn "*one_cmpl<mode>3_ptest" + [(set (reg:CC_NZC CC_REGNUM) + (unspec:CC_NZC + [(match_operand:VNx16BI 1 "register_operand" "Upa") + (match_operand 3) + (match_operand:SI 4 "aarch64_sve_ptrue_flag") + (and:PRED_ALL + (not:PRED_ALL + (match_operand:PRED_ALL 2 "register_operand" "Upa")) + (match_dup 3))] + UNSPEC_PTEST)) + (clobber (match_scratch:PRED_ALL 0 "=Upa"))] + "TARGET_SVE" + "nots\t%0.b, %1/z, %2.b" +) + ;; ========================================================================= ;; == Binary arithmetic ;; ========================================================================= @@ -3949,8 +4108,8 @@ (match_operand:SVE_I_SIMD_DI 3 "aarch64_sve_<sve_imm_con>_operand"))] UNSPEC_PRED_X))] "TARGET_SVE" - {@ [ cons: =0 , 1 , 2 , 3 ; attrs: movprfx ] - [ w , Upl , %0 , <sve_imm_con> ; * ] # + {@ [ cons: =0 , 1 , %2 , 3 ; attrs: movprfx ] + [ w , Upl , 0 , <sve_imm_con> ; * ] # [ w , Upl , 0 , w ; * ] <sve_int_op>\t%Z0.<Vetype>, %1/m, %Z0.<Vetype>, %Z3.<Vetype> [ ?&w , Upl , w , <sve_imm_con> ; yes ] # [ ?&w , Upl , w , w ; yes ] movprfx\t%Z0, %Z2\;<sve_int_op>\t%Z0.<Vetype>, %1/m, %Z0.<Vetype>, %Z3.<Vetype> @@ -4079,8 +4238,8 @@ (match_operand:SVE_I 1 "register_operand") (match_operand:SVE_I 2 "aarch64_sve_add_operand")))] "TARGET_SVE" - {@ [ cons: =0 , 1 , 2 ; attrs: movprfx ] - [ w , %0 , vsa ; * ] add\t%0.<Vetype>, %0.<Vetype>, #%D2 + {@ [ cons: =0 , %1 , 2 ; attrs: movprfx ] + [ w , 0 , vsa ; * ] add\t%0.<Vetype>, %0.<Vetype>, #%D2 [ w , 0 , vsn ; * ] sub\t%0.<Vetype>, %0.<Vetype>, #%N2 [ w , 0 , vsi ; * ] << aarch64_output_sve_vector_inc_dec ("%0.<Vetype>", operands[2]); [ ?w , w , vsa ; yes ] movprfx\t%0, %1\;add\t%0.<Vetype>, %0.<Vetype>, #%D2 @@ -4182,80 +4341,57 @@ (define_expand "@aarch64_adr<mode>_shift" [(set (match_operand:SVE_FULL_SDI 0 "register_operand") (plus:SVE_FULL_SDI - (unspec:SVE_FULL_SDI - [(match_dup 4) - (ashift:SVE_FULL_SDI - (match_operand:SVE_FULL_SDI 2 "register_operand") - (match_operand:SVE_FULL_SDI 3 "const_1_to_3_operand"))] - UNSPEC_PRED_X) + (ashift:SVE_FULL_SDI + (match_operand:SVE_FULL_SDI 2 "register_operand") + (match_operand:SVE_FULL_SDI 3 "const_1_to_3_operand")) (match_operand:SVE_FULL_SDI 1 "register_operand")))] "TARGET_SVE && TARGET_NON_STREAMING" - { - operands[4] = CONSTM1_RTX (<VPRED>mode); - } ) -(define_insn_and_rewrite "*aarch64_adr<mode>_shift" +(define_insn "*aarch64_adr<mode>_shift" [(set (match_operand:SVE_24I 0 "register_operand" "=w") (plus:SVE_24I - (unspec:SVE_24I - [(match_operand 4) - (ashift:SVE_24I - (match_operand:SVE_24I 2 "register_operand" "w") - (match_operand:SVE_24I 3 "const_1_to_3_operand"))] - UNSPEC_PRED_X) + (ashift:SVE_24I + (match_operand:SVE_24I 2 "register_operand" "w") + (match_operand:SVE_24I 3 "const_1_to_3_operand")) (match_operand:SVE_24I 1 "register_operand" "w")))] "TARGET_SVE && TARGET_NON_STREAMING" "adr\t%0.<Vctype>, [%1.<Vctype>, %2.<Vctype>, lsl %3]" - "&& !CONSTANT_P (operands[4])" - { - operands[4] = CONSTM1_RTX (<VPRED>mode); - } ) ;; Same, but with the index being sign-extended from the low 32 bits. (define_insn_and_rewrite "*aarch64_adr_shift_sxtw" [(set (match_operand:VNx2DI 0 "register_operand" "=w") (plus:VNx2DI - (unspec:VNx2DI - [(match_operand 4) - (ashift:VNx2DI - (unspec:VNx2DI - [(match_operand 5) - (sign_extend:VNx2DI - (truncate:VNx2SI - (match_operand:VNx2DI 2 "register_operand" "w")))] - UNSPEC_PRED_X) - (match_operand:VNx2DI 3 "const_1_to_3_operand"))] - UNSPEC_PRED_X) + (ashift:VNx2DI + (unspec:VNx2DI + [(match_operand 4) + (sign_extend:VNx2DI + (truncate:VNx2SI + (match_operand:VNx2DI 2 "register_operand" "w")))] + UNSPEC_PRED_X) + (match_operand:VNx2DI 3 "const_1_to_3_operand")) (match_operand:VNx2DI 1 "register_operand" "w")))] "TARGET_SVE && TARGET_NON_STREAMING" "adr\t%0.d, [%1.d, %2.d, sxtw %3]" - "&& (!CONSTANT_P (operands[4]) || !CONSTANT_P (operands[5]))" + "&& !CONSTANT_P (operands[4])" { - operands[5] = operands[4] = CONSTM1_RTX (VNx2BImode); + operands[4] = CONSTM1_RTX (VNx2BImode); } ) ;; Same, but with the index being zero-extended from the low 32 bits. -(define_insn_and_rewrite "*aarch64_adr_shift_uxtw" +(define_insn "*aarch64_adr_shift_uxtw" [(set (match_operand:VNx2DI 0 "register_operand" "=w") (plus:VNx2DI - (unspec:VNx2DI - [(match_operand 5) - (ashift:VNx2DI - (and:VNx2DI - (match_operand:VNx2DI 2 "register_operand" "w") - (match_operand:VNx2DI 4 "aarch64_sve_uxtw_immediate")) - (match_operand:VNx2DI 3 "const_1_to_3_operand"))] - UNSPEC_PRED_X) + (ashift:VNx2DI + (and:VNx2DI + (match_operand:VNx2DI 2 "register_operand" "w") + (match_operand:VNx2DI 4 "aarch64_sve_uxtw_immediate")) + (match_operand:VNx2DI 3 "const_1_to_3_operand")) (match_operand:VNx2DI 1 "register_operand" "w")))] "TARGET_SVE && TARGET_NON_STREAMING" "adr\t%0.d, [%1.d, %2.d, uxtw %3]" - "&& !CONSTANT_P (operands[5])" - { - operands[5] = CONSTM1_RTX (VNx2BImode); - } ) ;; ------------------------------------------------------------------------- @@ -4298,8 +4434,8 @@ (match_dup 3))] UNSPEC_PRED_X)))] "TARGET_SVE" - {@ [ cons: =0 , 1 , 2 , 3 ; attrs: movprfx ] - [ w , Upl , %0 , w ; * ] <su>abd\t%0.<Vetype>, %1/m, %0.<Vetype>, %3.<Vetype> + {@ [ cons: =0 , 1 , %2 , 3 ; attrs: movprfx ] + [ w , Upl , 0 , w ; * ] <su>abd\t%0.<Vetype>, %1/m, %0.<Vetype>, %3.<Vetype> [ ?&w , Upl , w , w ; yes ] movprfx\t%0, %2\;<su>abd\t%0.<Vetype>, %1/m, %0.<Vetype>, %3.<Vetype> } ) @@ -4513,8 +4649,8 @@ MUL_HIGHPART)] UNSPEC_PRED_X))] "TARGET_SVE" - {@ [ cons: =0 , 1 , 2 , 3 ; attrs: movprfx ] - [ w , Upl , %0 , w ; * ] <su>mulh\t%0.<Vetype>, %1/m, %0.<Vetype>, %3.<Vetype> + {@ [ cons: =0 , 1 , %2 , 3 ; attrs: movprfx ] + [ w , Upl , 0 , w ; * ] <su>mulh\t%0.<Vetype>, %1/m, %0.<Vetype>, %3.<Vetype> [ ?&w , Upl , w , w ; yes ] movprfx\t%0, %2\;<su>mulh\t%0.<Vetype>, %1/m, %0.<Vetype>, %3.<Vetype> } ) @@ -4568,8 +4704,8 @@ (match_operand:SVE_FULL_I 4 "aarch64_simd_imm_zero")] UNSPEC_SEL))] "TARGET_SVE" - {@ [ cons: =0 , 1 , 2 , 3 ] - [ &w , Upl , %0 , w ] movprfx\t%0.<Vetype>, %1/z, %0.<Vetype>\;<sve_int_op>\t%0.<Vetype>, %1/m, %0.<Vetype>, %3.<Vetype> + {@ [ cons: =0 , 1 , %2 , 3 ] + [ &w , Upl , 0 , w ] movprfx\t%0.<Vetype>, %1/z, %0.<Vetype>\;<sve_int_op>\t%0.<Vetype>, %1/m, %0.<Vetype>, %3.<Vetype> [ &w , Upl , w , w ] movprfx\t%0.<Vetype>, %1/z, %2.<Vetype>\;<sve_int_op>\t%0.<Vetype>, %1/m, %0.<Vetype>, %3.<Vetype> } [(set_attr "movprfx" "yes")]) @@ -4713,8 +4849,8 @@ (match_operand:SVE_I 1 "register_operand") (match_operand:SVE_I 2 "aarch64_sve_logical_operand")))] "TARGET_SVE" - {@ [ cons: =0 , 1 , 2 ; attrs: movprfx ] - [ w , %0 , vsl ; * ] <logical>\t%0.<Vetype>, %0.<Vetype>, #%C2 + {@ [ cons: =0 , %1 , 2 ; attrs: movprfx ] + [ w , 0 , vsl ; * ] <logical>\t%0.<Vetype>, %0.<Vetype>, #%C2 [ ?w , w , vsl ; yes ] movprfx\t%0, %1\;<logical>\t%0.<Vetype>, %0.<Vetype>, #%C2 [ w , w , w ; * ] <logical>\t%0.d, %1.d, %2.d } @@ -4847,7 +4983,7 @@ if (CONST_INT_P (operands[2])) { amount = gen_const_vec_duplicate (<MODE>mode, operands[2]); - if (!aarch64_sve_<lr>shift_operand (operands[2], <MODE>mode)) + if (!aarch64_sve_<lr>shift_operand (amount, <MODE>mode)) amount = force_reg (<MODE>mode, amount); } else @@ -4871,15 +5007,40 @@ UNSPEC_PRED_X))] "TARGET_SVE" { + if (CONSTANT_P (operands[2])) + { + emit_insn (gen_aarch64_v<optab><mode>3_const (operands[0], operands[1], + operands[2])); + DONE; + } operands[3] = aarch64_ptrue_reg (<VPRED>mode); } ) -;; Shift by a vector, predicated with a PTRUE. We don't actually need -;; the predicate for the first alternative, but using Upa or X isn't -;; likely to gain much and would make the instruction seem less uniform -;; to the register allocator. -(define_insn_and_split "@aarch64_pred_<optab><mode>" +;; Shift by a vector, predicated with a PTRUE. +(define_expand "@aarch64_pred_<optab><mode>" + [(set (match_operand:SVE_I 0 "register_operand") + (unspec:SVE_I + [(match_operand:<VPRED> 1 "register_operand") + (ASHIFT:SVE_I + (match_operand:SVE_I 2 "register_operand") + (match_operand:SVE_I 3 "aarch64_sve_<lr>shift_operand"))] + UNSPEC_PRED_X))] + "TARGET_SVE" + { + if (CONSTANT_P (operands[3])) + { + emit_insn (gen_aarch64_v<optab><mode>3_const (operands[0], operands[2], + operands[3])); + DONE; + } + } +) + +;; We don't actually need the predicate for the first alternative, but +;; using Upa or X isn't likely to gain much and would make the instruction +;; seem less uniform to the register allocator. +(define_insn_and_split "*aarch64_pred_<optab><mode>" [(set (match_operand:SVE_I 0 "register_operand") (unspec:SVE_I [(match_operand:<VPRED> 1 "register_operand") @@ -4894,33 +5055,32 @@ [ w , Upl , w , 0 ; * ] <shift>r\t%0.<Vetype>, %1/m, %3.<Vetype>, %2.<Vetype> [ ?&w , Upl , w , w ; yes ] movprfx\t%0, %2\;<shift>\t%0.<Vetype>, %1/m, %0.<Vetype>, %3.<Vetype> } - "&& reload_completed - && !register_operand (operands[3], <MODE>mode)" + "&& !register_operand (operands[3], <MODE>mode)" [(set (match_dup 0) (ASHIFT:SVE_I (match_dup 2) (match_dup 3)))] "" ) -;; Unpredicated shift operations by a constant (post-RA only). +;; Unpredicated shift operations by a constant. ;; These are generated by splitting a predicated instruction whose ;; predicate is unused. -(define_insn "*post_ra_v_ashl<mode>3" +(define_insn "aarch64_vashl<mode>3_const" [(set (match_operand:SVE_I 0 "register_operand") (ashift:SVE_I (match_operand:SVE_I 1 "register_operand") (match_operand:SVE_I 2 "aarch64_simd_lshift_imm")))] - "TARGET_SVE && reload_completed" + "TARGET_SVE" {@ [ cons: =0 , 1 , 2 ] [ w , w , vs1 ] add\t%0.<Vetype>, %1.<Vetype>, %1.<Vetype> [ w , w , Dl ] lsl\t%0.<Vetype>, %1.<Vetype>, #%2 } ) -(define_insn "*post_ra_v_<optab><mode>3" +(define_insn "aarch64_v<optab><mode>3_const" [(set (match_operand:SVE_I 0 "register_operand" "=w") (SHIFTRT:SVE_I (match_operand:SVE_I 1 "register_operand" "w") (match_operand:SVE_I 2 "aarch64_simd_rshift_imm")))] - "TARGET_SVE && reload_completed" + "TARGET_SVE" "<shift>\t%0.<Vetype>, %1.<Vetype>, #%2" ) @@ -5348,27 +5508,25 @@ ;; Split a predicated instruction whose predicate is unused into an ;; unpredicated instruction. (define_split - [(set (match_operand:SVE_FULL_F_BF 0 "register_operand") - (unspec:SVE_FULL_F_BF + [(set (match_operand:SVE_F_B16B16 0 "register_operand") + (unspec:SVE_F_B16B16 [(match_operand:<VPRED> 1 "register_operand") - (match_operand:SI 4 "aarch64_sve_gp_strictness") - (match_operand:SVE_FULL_F_BF 2 "register_operand") - (match_operand:SVE_FULL_F_BF 3 "register_operand")] + (const_int SVE_RELAXED_GP) + (match_operand:SVE_F_B16B16 2 "register_operand") + (match_operand:SVE_F_B16B16 3 "register_operand")] <SVE_COND_FP>))] - "TARGET_SVE - && reload_completed - && INTVAL (operands[4]) == SVE_RELAXED_GP" + "TARGET_SVE && reload_completed" [(set (match_dup 0) - (SVE_UNPRED_FP_BINARY:SVE_FULL_F_BF (match_dup 2) (match_dup 3)))] + (SVE_UNPRED_FP_BINARY:SVE_F_B16B16 (match_dup 2) (match_dup 3)))] ) ;; Unpredicated floating-point binary operations (post-RA only). ;; These are generated by the split above. (define_insn "*post_ra_<sve_fp_op><mode>3" - [(set (match_operand:SVE_FULL_F_BF 0 "register_operand" "=w") - (SVE_UNPRED_FP_BINARY:SVE_FULL_F_BF - (match_operand:SVE_FULL_F_BF 1 "register_operand" "w") - (match_operand:SVE_FULL_F_BF 2 "register_operand" "w")))] + [(set (match_operand:SVE_F_B16B16 0 "register_operand" "=w") + (SVE_UNPRED_FP_BINARY:SVE_F_B16B16 + (match_operand:SVE_F_B16B16 1 "register_operand" "w") + (match_operand:SVE_F_B16B16 2 "register_operand" "w")))] "TARGET_SVE && reload_completed" "<b><sve_fp_op>\t%0.<Vetype>, %1.<Vetype>, %2.<Vetype>") @@ -5400,10 +5558,10 @@ ;; Unpredicated floating-point binary operations. (define_insn "@aarch64_sve_<optab><mode>" - [(set (match_operand:SVE_FULL_F 0 "register_operand" "=w") - (unspec:SVE_FULL_F - [(match_operand:SVE_FULL_F 1 "register_operand" "w") - (match_operand:SVE_FULL_F 2 "register_operand" "w")] + [(set (match_operand:SVE_F 0 "register_operand" "=w") + (unspec:SVE_F + [(match_operand:SVE_F 1 "register_operand" "w") + (match_operand:SVE_F 2 "register_operand" "w")] SVE_FP_BINARY))] "TARGET_SVE" "<sve_fp_op>\t%0.<Vetype>, %1.<Vetype>, %2.<Vetype>" @@ -5412,27 +5570,27 @@ ;; Unpredicated floating-point binary operations that need to be predicated ;; for SVE. (define_expand "<optab><mode>3" - [(set (match_operand:SVE_FULL_F_BF 0 "register_operand") - (unspec:SVE_FULL_F_BF + [(set (match_operand:SVE_F_B16B16 0 "register_operand") + (unspec:SVE_F_B16B16 [(match_dup 3) - (const_int SVE_RELAXED_GP) - (match_operand:SVE_FULL_F_BF 1 "<sve_pred_fp_rhs1_operand>") - (match_operand:SVE_FULL_F_BF 2 "<sve_pred_fp_rhs2_operand>")] + (match_dup 4) + (match_operand:SVE_F_B16B16 1 "<sve_pred_fp_rhs1_operand>") + (match_operand:SVE_F_B16B16 2 "<sve_pred_fp_rhs2_operand>")] SVE_COND_FP_BINARY_OPTAB))] "TARGET_SVE && (<supports_bf16> || !<is_bf16>)" { - operands[3] = aarch64_ptrue_reg (<VPRED>mode); + operands[3] = aarch64_sve_fp_pred (<MODE>mode, &operands[4]); } ) ;; Predicated floating-point binary operations that have no immediate forms. (define_insn "@aarch64_pred_<optab><mode>" - [(set (match_operand:SVE_FULL_F 0 "register_operand") - (unspec:SVE_FULL_F - [(match_operand:<VPRED> 1 "register_operand") + [(set (match_operand:SVE_F 0 "register_operand") + (unspec:SVE_F + [(match_operand:<VPRED> 1 "aarch64_predicate_operand") (match_operand:SI 4 "aarch64_sve_gp_strictness") - (match_operand:SVE_FULL_F 2 "register_operand") - (match_operand:SVE_FULL_F 3 "register_operand")] + (match_operand:SVE_F 2 "register_operand") + (match_operand:SVE_F 3 "register_operand")] SVE_COND_FP_BINARY_REG))] "TARGET_SVE" {@ [ cons: =0 , 1 , 2 , 3 ; attrs: movprfx ] @@ -5444,30 +5602,33 @@ ;; Predicated floating-point operations with merging. (define_expand "@cond_<optab><mode>" - [(set (match_operand:SVE_FULL_F_BF 0 "register_operand") - (unspec:SVE_FULL_F_BF + [(set (match_operand:SVE_F_B16B16 0 "register_operand") + (unspec:SVE_F_B16B16 [(match_operand:<VPRED> 1 "register_operand") - (unspec:SVE_FULL_F_BF + (unspec:SVE_F_B16B16 [(match_dup 1) (const_int SVE_STRICT_GP) - (match_operand:SVE_FULL_F_BF 2 "<sve_pred_fp_rhs1_operand>") - (match_operand:SVE_FULL_F_BF 3 "<sve_pred_fp_rhs2_operand>")] + (match_operand:SVE_F_B16B16 2 "<sve_pred_fp_rhs1_operand>") + (match_operand:SVE_F_B16B16 3 "<sve_pred_fp_rhs2_operand>")] SVE_COND_FP_BINARY) - (match_operand:SVE_FULL_F_BF 4 "aarch64_simd_reg_or_zero")] + (match_operand:SVE_F_B16B16 4 "aarch64_simd_reg_or_zero")] UNSPEC_SEL))] "TARGET_SVE && (<supports_bf16> || !<is_bf16>)" + { + operands[1] = aarch64_sve_emit_masked_fp_pred (<MODE>mode, operands[1]); + } ) ;; Predicated floating-point operations, merging with the first input. (define_insn_and_rewrite "*cond_<optab><mode>_2_relaxed" - [(set (match_operand:SVE_FULL_F_BF 0 "register_operand") - (unspec:SVE_FULL_F_BF + [(set (match_operand:SVE_F_B16B16 0 "register_operand") + (unspec:SVE_F_B16B16 [(match_operand:<VPRED> 1 "register_operand") - (unspec:SVE_FULL_F_BF + (unspec:SVE_F_B16B16 [(match_operand 4) (const_int SVE_RELAXED_GP) - (match_operand:SVE_FULL_F_BF 2 "register_operand") - (match_operand:SVE_FULL_F_BF 3 "register_operand")] + (match_operand:SVE_F_B16B16 2 "register_operand") + (match_operand:SVE_F_B16B16 3 "register_operand")] SVE_COND_FP_BINARY) (match_dup 2)] UNSPEC_SEL))] @@ -5483,14 +5644,14 @@ ) (define_insn "*cond_<optab><mode>_2_strict" - [(set (match_operand:SVE_FULL_F_BF 0 "register_operand") - (unspec:SVE_FULL_F_BF - [(match_operand:<VPRED> 1 "register_operand") - (unspec:SVE_FULL_F_BF + [(set (match_operand:SVE_F_B16B16 0 "register_operand") + (unspec:SVE_F_B16B16 + [(match_operand:<VPRED> 1 "aarch64_predicate_operand") + (unspec:SVE_F_B16B16 [(match_dup 1) (const_int SVE_STRICT_GP) - (match_operand:SVE_FULL_F_BF 2 "register_operand") - (match_operand:SVE_FULL_F_BF 3 "register_operand")] + (match_operand:SVE_F_B16B16 2 "register_operand") + (match_operand:SVE_F_B16B16 3 "register_operand")] SVE_COND_FP_BINARY) (match_dup 2)] UNSPEC_SEL))] @@ -5503,14 +5664,14 @@ ;; Same for operations that take a 1-bit constant. (define_insn_and_rewrite "*cond_<optab><mode>_2_const_relaxed" - [(set (match_operand:SVE_FULL_F 0 "register_operand") - (unspec:SVE_FULL_F + [(set (match_operand:SVE_F 0 "register_operand") + (unspec:SVE_F [(match_operand:<VPRED> 1 "register_operand") - (unspec:SVE_FULL_F + (unspec:SVE_F [(match_operand 4) (const_int SVE_RELAXED_GP) - (match_operand:SVE_FULL_F 2 "register_operand") - (match_operand:SVE_FULL_F 3 "<sve_pred_fp_rhs2_immediate>")] + (match_operand:SVE_F 2 "register_operand") + (match_operand:SVE_F 3 "<sve_pred_fp_rhs2_immediate>")] SVE_COND_FP_BINARY_I1) (match_dup 2)] UNSPEC_SEL))] @@ -5526,14 +5687,14 @@ ) (define_insn "*cond_<optab><mode>_2_const_strict" - [(set (match_operand:SVE_FULL_F 0 "register_operand") - (unspec:SVE_FULL_F - [(match_operand:<VPRED> 1 "register_operand") - (unspec:SVE_FULL_F + [(set (match_operand:SVE_F 0 "register_operand") + (unspec:SVE_F + [(match_operand:<VPRED> 1 "aarch64_predicate_operand") + (unspec:SVE_F [(match_dup 1) (const_int SVE_STRICT_GP) - (match_operand:SVE_FULL_F 2 "register_operand") - (match_operand:SVE_FULL_F 3 "<sve_pred_fp_rhs2_immediate>")] + (match_operand:SVE_F 2 "register_operand") + (match_operand:SVE_F 3 "<sve_pred_fp_rhs2_immediate>")] SVE_COND_FP_BINARY_I1) (match_dup 2)] UNSPEC_SEL))] @@ -5546,14 +5707,14 @@ ;; Predicated floating-point operations, merging with the second input. (define_insn_and_rewrite "*cond_<optab><mode>_3_relaxed" - [(set (match_operand:SVE_FULL_F_BF 0 "register_operand") - (unspec:SVE_FULL_F_BF + [(set (match_operand:SVE_F_B16B16 0 "register_operand") + (unspec:SVE_F_B16B16 [(match_operand:<VPRED> 1 "register_operand") - (unspec:SVE_FULL_F_BF + (unspec:SVE_F_B16B16 [(match_operand 4) (const_int SVE_RELAXED_GP) - (match_operand:SVE_FULL_F_BF 2 "register_operand") - (match_operand:SVE_FULL_F_BF 3 "register_operand")] + (match_operand:SVE_F_B16B16 2 "register_operand") + (match_operand:SVE_F_B16B16 3 "register_operand")] SVE_COND_FP_BINARY) (match_dup 3)] UNSPEC_SEL))] @@ -5569,14 +5730,14 @@ ) (define_insn "*cond_<optab><mode>_3_strict" - [(set (match_operand:SVE_FULL_F_BF 0 "register_operand") - (unspec:SVE_FULL_F_BF - [(match_operand:<VPRED> 1 "register_operand") - (unspec:SVE_FULL_F_BF + [(set (match_operand:SVE_F_B16B16 0 "register_operand") + (unspec:SVE_F_B16B16 + [(match_operand:<VPRED> 1 "aarch64_predicate_operand") + (unspec:SVE_F_B16B16 [(match_dup 1) (const_int SVE_STRICT_GP) - (match_operand:SVE_FULL_F_BF 2 "register_operand") - (match_operand:SVE_FULL_F_BF 3 "register_operand")] + (match_operand:SVE_F_B16B16 2 "register_operand") + (match_operand:SVE_F_B16B16 3 "register_operand")] SVE_COND_FP_BINARY) (match_dup 3)] UNSPEC_SEL))] @@ -5589,16 +5750,16 @@ ;; Predicated floating-point operations, merging with an independent value. (define_insn_and_rewrite "*cond_<optab><mode>_any_relaxed" - [(set (match_operand:SVE_FULL_F_BF 0 "register_operand") - (unspec:SVE_FULL_F_BF + [(set (match_operand:SVE_F_B16B16 0 "register_operand") + (unspec:SVE_F_B16B16 [(match_operand:<VPRED> 1 "register_operand") - (unspec:SVE_FULL_F_BF + (unspec:SVE_F_B16B16 [(match_operand 5) (const_int SVE_RELAXED_GP) - (match_operand:SVE_FULL_F_BF 2 "register_operand") - (match_operand:SVE_FULL_F_BF 3 "register_operand")] + (match_operand:SVE_F_B16B16 2 "register_operand") + (match_operand:SVE_F_B16B16 3 "register_operand")] SVE_COND_FP_BINARY) - (match_operand:SVE_FULL_F_BF 4 "aarch64_simd_reg_or_zero")] + (match_operand:SVE_F_B16B16 4 "aarch64_simd_reg_or_zero")] UNSPEC_SEL))] "TARGET_SVE && (<supports_bf16> || !<is_bf16>) @@ -5633,16 +5794,16 @@ ) (define_insn_and_rewrite "*cond_<optab><mode>_any_strict" - [(set (match_operand:SVE_FULL_F_BF 0 "register_operand") - (unspec:SVE_FULL_F_BF - [(match_operand:<VPRED> 1 "register_operand") - (unspec:SVE_FULL_F_BF + [(set (match_operand:SVE_F_B16B16 0 "register_operand") + (unspec:SVE_F_B16B16 + [(match_operand:<VPRED> 1 "aarch64_predicate_operand") + (unspec:SVE_F_B16B16 [(match_dup 1) (const_int SVE_STRICT_GP) - (match_operand:SVE_FULL_F_BF 2 "register_operand") - (match_operand:SVE_FULL_F_BF 3 "register_operand")] + (match_operand:SVE_F_B16B16 2 "register_operand") + (match_operand:SVE_F_B16B16 3 "register_operand")] SVE_COND_FP_BINARY) - (match_operand:SVE_FULL_F_BF 4 "aarch64_simd_reg_or_zero")] + (match_operand:SVE_F_B16B16 4 "aarch64_simd_reg_or_zero")] UNSPEC_SEL))] "TARGET_SVE && (<supports_bf16> || !<is_bf16>) @@ -5671,16 +5832,16 @@ ;; Same for operations that take a 1-bit constant. (define_insn_and_rewrite "*cond_<optab><mode>_any_const_relaxed" - [(set (match_operand:SVE_FULL_F 0 "register_operand") - (unspec:SVE_FULL_F + [(set (match_operand:SVE_F 0 "register_operand") + (unspec:SVE_F [(match_operand:<VPRED> 1 "register_operand") - (unspec:SVE_FULL_F + (unspec:SVE_F [(match_operand 5) (const_int SVE_RELAXED_GP) - (match_operand:SVE_FULL_F 2 "register_operand") - (match_operand:SVE_FULL_F 3 "<sve_pred_fp_rhs2_immediate>")] + (match_operand:SVE_F 2 "register_operand") + (match_operand:SVE_F 3 "<sve_pred_fp_rhs2_immediate>")] SVE_COND_FP_BINARY_I1) - (match_operand:SVE_FULL_F 4 "aarch64_simd_reg_or_zero")] + (match_operand:SVE_F 4 "aarch64_simd_reg_or_zero")] UNSPEC_SEL))] "TARGET_SVE && !rtx_equal_p (operands[2], operands[4])" {@ [ cons: =0 , 1 , 2 , 4 ] @@ -5707,16 +5868,16 @@ ) (define_insn_and_rewrite "*cond_<optab><mode>_any_const_strict" - [(set (match_operand:SVE_FULL_F 0 "register_operand") - (unspec:SVE_FULL_F - [(match_operand:<VPRED> 1 "register_operand") - (unspec:SVE_FULL_F + [(set (match_operand:SVE_F 0 "register_operand") + (unspec:SVE_F + [(match_operand:<VPRED> 1 "aarch64_predicate_operand") + (unspec:SVE_F [(match_dup 1) (const_int SVE_STRICT_GP) - (match_operand:SVE_FULL_F 2 "register_operand") - (match_operand:SVE_FULL_F 3 "<sve_pred_fp_rhs2_immediate>")] + (match_operand:SVE_F 2 "register_operand") + (match_operand:SVE_F 3 "<sve_pred_fp_rhs2_immediate>")] SVE_COND_FP_BINARY_I1) - (match_operand:SVE_FULL_F 4 "aarch64_simd_reg_or_zero")] + (match_operand:SVE_F 4 "aarch64_simd_reg_or_zero")] UNSPEC_SEL))] "TARGET_SVE && !rtx_equal_p (operands[2], operands[4])" {@ [ cons: =0 , 1 , 2 , 4 ] @@ -5745,16 +5906,16 @@ ;; Predicated floating-point addition. (define_insn "@aarch64_pred_<optab><mode>" - [(set (match_operand:SVE_FULL_F 0 "register_operand") - (unspec:SVE_FULL_F - [(match_operand:<VPRED> 1 "register_operand") + [(set (match_operand:SVE_F 0 "register_operand") + (unspec:SVE_F + [(match_operand:<VPRED> 1 "aarch64_predicate_operand") (match_operand:SI 4 "aarch64_sve_gp_strictness") - (match_operand:SVE_FULL_F 2 "register_operand") - (match_operand:SVE_FULL_F 3 "aarch64_sve_float_arith_with_sub_operand")] + (match_operand:SVE_F 2 "register_operand") + (match_operand:SVE_F 3 "aarch64_sve_float_arith_with_sub_operand")] SVE_COND_FP_ADD))] "TARGET_SVE" - {@ [ cons: =0 , 1 , 2 , 3 , 4 ; attrs: movprfx ] - [ w , Upl , %0 , vsA , i ; * ] fadd\t%0.<Vetype>, %1/m, %0.<Vetype>, #%3 + {@ [ cons: =0 , 1 , %2 , 3 , 4 ; attrs: movprfx ] + [ w , Upl , 0 , vsA , i ; * ] fadd\t%0.<Vetype>, %1/m, %0.<Vetype>, #%3 [ w , Upl , 0 , vsN , i ; * ] fsub\t%0.<Vetype>, %1/m, %0.<Vetype>, #%N3 [ w , Upl , w , w , Z ; * ] # [ w , Upl , 0 , w , Ui1 ; * ] fadd\t%0.<Vetype>, %1/m, %0.<Vetype>, %3.<Vetype> @@ -5767,14 +5928,14 @@ ;; Predicated floating-point addition of a constant, merging with the ;; first input. (define_insn_and_rewrite "*cond_add<mode>_2_const_relaxed" - [(set (match_operand:SVE_FULL_F 0 "register_operand") - (unspec:SVE_FULL_F + [(set (match_operand:SVE_F 0 "register_operand") + (unspec:SVE_F [(match_operand:<VPRED> 1 "register_operand") - (unspec:SVE_FULL_F + (unspec:SVE_F [(match_operand 4) (const_int SVE_RELAXED_GP) - (match_operand:SVE_FULL_F 2 "register_operand") - (match_operand:SVE_FULL_F 3 "aarch64_sve_float_arith_with_sub_immediate")] + (match_operand:SVE_F 2 "register_operand") + (match_operand:SVE_F 3 "aarch64_sve_float_arith_with_sub_immediate")] UNSPEC_COND_FADD) (match_dup 2)] UNSPEC_SEL))] @@ -5792,14 +5953,14 @@ ) (define_insn "*cond_add<mode>_2_const_strict" - [(set (match_operand:SVE_FULL_F 0 "register_operand") - (unspec:SVE_FULL_F - [(match_operand:<VPRED> 1 "register_operand") - (unspec:SVE_FULL_F + [(set (match_operand:SVE_F 0 "register_operand") + (unspec:SVE_F + [(match_operand:<VPRED> 1 "aarch64_predicate_operand") + (unspec:SVE_F [(match_dup 1) (const_int SVE_STRICT_GP) - (match_operand:SVE_FULL_F 2 "register_operand") - (match_operand:SVE_FULL_F 3 "aarch64_sve_float_arith_with_sub_immediate")] + (match_operand:SVE_F 2 "register_operand") + (match_operand:SVE_F 3 "aarch64_sve_float_arith_with_sub_immediate")] UNSPEC_COND_FADD) (match_dup 2)] UNSPEC_SEL))] @@ -5815,16 +5976,16 @@ ;; Predicated floating-point addition of a constant, merging with an ;; independent value. (define_insn_and_rewrite "*cond_add<mode>_any_const_relaxed" - [(set (match_operand:SVE_FULL_F 0 "register_operand") - (unspec:SVE_FULL_F + [(set (match_operand:SVE_F 0 "register_operand") + (unspec:SVE_F [(match_operand:<VPRED> 1 "register_operand") - (unspec:SVE_FULL_F + (unspec:SVE_F [(match_operand 5) (const_int SVE_RELAXED_GP) - (match_operand:SVE_FULL_F 2 "register_operand") - (match_operand:SVE_FULL_F 3 "aarch64_sve_float_arith_with_sub_immediate")] + (match_operand:SVE_F 2 "register_operand") + (match_operand:SVE_F 3 "aarch64_sve_float_arith_with_sub_immediate")] UNSPEC_COND_FADD) - (match_operand:SVE_FULL_F 4 "aarch64_simd_reg_or_zero")] + (match_operand:SVE_F 4 "aarch64_simd_reg_or_zero")] UNSPEC_SEL))] "TARGET_SVE && !rtx_equal_p (operands[2], operands[4])" {@ [ cons: =0 , 1 , 2 , 3 , 4 ] @@ -5854,16 +6015,16 @@ ) (define_insn_and_rewrite "*cond_add<mode>_any_const_strict" - [(set (match_operand:SVE_FULL_F 0 "register_operand") - (unspec:SVE_FULL_F - [(match_operand:<VPRED> 1 "register_operand") - (unspec:SVE_FULL_F + [(set (match_operand:SVE_F 0 "register_operand") + (unspec:SVE_F + [(match_operand:<VPRED> 1 "aarch64_predicate_operand") + (unspec:SVE_F [(match_dup 1) (const_int SVE_STRICT_GP) - (match_operand:SVE_FULL_F 2 "register_operand") - (match_operand:SVE_FULL_F 3 "aarch64_sve_float_arith_with_sub_immediate")] + (match_operand:SVE_F 2 "register_operand") + (match_operand:SVE_F 3 "aarch64_sve_float_arith_with_sub_immediate")] UNSPEC_COND_FADD) - (match_operand:SVE_FULL_F 4 "aarch64_simd_reg_or_zero")] + (match_operand:SVE_F 4 "aarch64_simd_reg_or_zero")] UNSPEC_SEL))] "TARGET_SVE && !rtx_equal_p (operands[2], operands[4])" {@ [ cons: =0 , 1 , 2 , 3 , 4 ] @@ -6061,12 +6222,12 @@ ;; Predicated floating-point subtraction. (define_insn "@aarch64_pred_<optab><mode>" - [(set (match_operand:SVE_FULL_F 0 "register_operand") - (unspec:SVE_FULL_F - [(match_operand:<VPRED> 1 "register_operand") + [(set (match_operand:SVE_F 0 "register_operand") + (unspec:SVE_F + [(match_operand:<VPRED> 1 "aarch64_predicate_operand") (match_operand:SI 4 "aarch64_sve_gp_strictness") - (match_operand:SVE_FULL_F 2 "aarch64_sve_float_arith_operand") - (match_operand:SVE_FULL_F 3 "register_operand")] + (match_operand:SVE_F 2 "aarch64_sve_float_arith_operand") + (match_operand:SVE_F 3 "register_operand")] SVE_COND_FP_SUB))] "TARGET_SVE" {@ [ cons: =0 , 1 , 2 , 3 , 4 ; attrs: movprfx ] @@ -6082,14 +6243,14 @@ ;; Predicated floating-point subtraction from a constant, merging with the ;; second input. (define_insn_and_rewrite "*cond_sub<mode>_3_const_relaxed" - [(set (match_operand:SVE_FULL_F 0 "register_operand") - (unspec:SVE_FULL_F + [(set (match_operand:SVE_F 0 "register_operand") + (unspec:SVE_F [(match_operand:<VPRED> 1 "register_operand") - (unspec:SVE_FULL_F + (unspec:SVE_F [(match_operand 4) (const_int SVE_RELAXED_GP) - (match_operand:SVE_FULL_F 2 "aarch64_sve_float_arith_immediate") - (match_operand:SVE_FULL_F 3 "register_operand")] + (match_operand:SVE_F 2 "aarch64_sve_float_arith_immediate") + (match_operand:SVE_F 3 "register_operand")] UNSPEC_COND_FSUB) (match_dup 3)] UNSPEC_SEL))] @@ -6105,14 +6266,14 @@ ) (define_insn "*cond_sub<mode>_3_const_strict" - [(set (match_operand:SVE_FULL_F 0 "register_operand") - (unspec:SVE_FULL_F - [(match_operand:<VPRED> 1 "register_operand") - (unspec:SVE_FULL_F + [(set (match_operand:SVE_F 0 "register_operand") + (unspec:SVE_F + [(match_operand:<VPRED> 1 "aarch64_predicate_operand") + (unspec:SVE_F [(match_dup 1) (const_int SVE_STRICT_GP) - (match_operand:SVE_FULL_F 2 "aarch64_sve_float_arith_immediate") - (match_operand:SVE_FULL_F 3 "register_operand")] + (match_operand:SVE_F 2 "aarch64_sve_float_arith_immediate") + (match_operand:SVE_F 3 "register_operand")] UNSPEC_COND_FSUB) (match_dup 3)] UNSPEC_SEL))] @@ -6126,16 +6287,16 @@ ;; Predicated floating-point subtraction from a constant, merging with an ;; independent value. (define_insn_and_rewrite "*cond_sub<mode>_const_relaxed" - [(set (match_operand:SVE_FULL_F 0 "register_operand") - (unspec:SVE_FULL_F + [(set (match_operand:SVE_F 0 "register_operand") + (unspec:SVE_F [(match_operand:<VPRED> 1 "register_operand") - (unspec:SVE_FULL_F + (unspec:SVE_F [(match_operand 5) (const_int SVE_RELAXED_GP) - (match_operand:SVE_FULL_F 2 "aarch64_sve_float_arith_immediate") - (match_operand:SVE_FULL_F 3 "register_operand")] + (match_operand:SVE_F 2 "aarch64_sve_float_arith_immediate") + (match_operand:SVE_F 3 "register_operand")] UNSPEC_COND_FSUB) - (match_operand:SVE_FULL_F 4 "aarch64_simd_reg_or_zero")] + (match_operand:SVE_F 4 "aarch64_simd_reg_or_zero")] UNSPEC_SEL))] "TARGET_SVE && !rtx_equal_p (operands[3], operands[4])" {@ [ cons: =0 , 1 , 3 , 4 ] @@ -6162,16 +6323,16 @@ ) (define_insn_and_rewrite "*cond_sub<mode>_const_strict" - [(set (match_operand:SVE_FULL_F 0 "register_operand") - (unspec:SVE_FULL_F - [(match_operand:<VPRED> 1 "register_operand") - (unspec:SVE_FULL_F + [(set (match_operand:SVE_F 0 "register_operand") + (unspec:SVE_F + [(match_operand:<VPRED> 1 "aarch64_predicate_operand") + (unspec:SVE_F [(match_dup 1) (const_int SVE_STRICT_GP) - (match_operand:SVE_FULL_F 2 "aarch64_sve_float_arith_immediate") - (match_operand:SVE_FULL_F 3 "register_operand")] + (match_operand:SVE_F 2 "aarch64_sve_float_arith_immediate") + (match_operand:SVE_F 3 "register_operand")] UNSPEC_COND_FSUB) - (match_operand:SVE_FULL_F 4 "aarch64_simd_reg_or_zero")] + (match_operand:SVE_F 4 "aarch64_simd_reg_or_zero")] UNSPEC_SEL))] "TARGET_SVE && !rtx_equal_p (operands[3], operands[4])" {@ [ cons: =0 , 1 , 3 , 4 ] @@ -6228,8 +6389,8 @@ UNSPEC_COND_FSUB)] UNSPEC_COND_FABS))] "TARGET_SVE" - {@ [ cons: =0 , 1 , 2 , 3 ; attrs: movprfx ] - [ w , Upl , %0 , w ; * ] fabd\t%0.<Vetype>, %1/m, %0.<Vetype>, %3.<Vetype> + {@ [ cons: =0 , 1 , %2 , 3 ; attrs: movprfx ] + [ w , Upl , 0 , w ; * ] fabd\t%0.<Vetype>, %1/m, %0.<Vetype>, %3.<Vetype> [ ?&w , Upl , w , w ; yes ] movprfx\t%0, %2\;fabd\t%0.<Vetype>, %1/m, %0.<Vetype>, %3.<Vetype> } "&& !rtx_equal_p (operands[1], operands[5])" @@ -6251,8 +6412,8 @@ UNSPEC_COND_FSUB)] UNSPEC_COND_FABS))] "TARGET_SVE" - {@ [ cons: =0 , 1 , 2 , 3 ; attrs: movprfx ] - [ w , Upl , %0 , w ; * ] fabd\t%0.<Vetype>, %1/m, %0.<Vetype>, %3.<Vetype> + {@ [ cons: =0 , 1 , %2 , 3 ; attrs: movprfx ] + [ w , Upl , 0 , w ; * ] fabd\t%0.<Vetype>, %1/m, %0.<Vetype>, %3.<Vetype> [ ?&w , Upl , w , w ; yes ] movprfx\t%0, %2\;fabd\t%0.<Vetype>, %1/m, %0.<Vetype>, %3.<Vetype> } ) @@ -6484,16 +6645,16 @@ ;; Predicated floating-point multiplication. (define_insn "@aarch64_pred_<optab><mode>" - [(set (match_operand:SVE_FULL_F 0 "register_operand") - (unspec:SVE_FULL_F - [(match_operand:<VPRED> 1 "register_operand") + [(set (match_operand:SVE_F 0 "register_operand") + (unspec:SVE_F + [(match_operand:<VPRED> 1 "aarch64_predicate_operand") (match_operand:SI 4 "aarch64_sve_gp_strictness") - (match_operand:SVE_FULL_F 2 "register_operand") - (match_operand:SVE_FULL_F 3 "aarch64_sve_float_mul_operand")] + (match_operand:SVE_F 2 "register_operand") + (match_operand:SVE_F 3 "aarch64_sve_float_mul_operand")] SVE_COND_FP_MUL))] "TARGET_SVE" - {@ [ cons: =0 , 1 , 2 , 3 , 4 ; attrs: movprfx ] - [ w , Upl , %0 , vsM , i ; * ] fmul\t%0.<Vetype>, %1/m, %0.<Vetype>, #%3 + {@ [ cons: =0 , 1 , %2 , 3 , 4 ; attrs: movprfx ] + [ w , Upl , 0 , vsM , i ; * ] fmul\t%0.<Vetype>, %1/m, %0.<Vetype>, #%3 [ w , Upl , w , w , Z ; * ] # [ w , Upl , 0 , w , Ui1 ; * ] fmul\t%0.<Vetype>, %1/m, %0.<Vetype>, %3.<Vetype> [ ?&w , Upl , w , vsM , i ; yes ] movprfx\t%0, %2\;fmul\t%0.<Vetype>, %1/m, %0.<Vetype>, #%3 @@ -6506,13 +6667,13 @@ ;; Unpredicated multiplication by selected lanes. (define_insn "@aarch64_mul_lane_<mode>" - [(set (match_operand:SVE_FULL_F_BF 0 "register_operand" "=w") - (mult:SVE_FULL_F_BF - (unspec:SVE_FULL_F_BF - [(match_operand:SVE_FULL_F_BF 2 "register_operand" "<sve_lane_con>") + [(set (match_operand:SVE_FULL_F_B16B16 0 "register_operand" "=w") + (mult:SVE_FULL_F_B16B16 + (unspec:SVE_FULL_F_B16B16 + [(match_operand:SVE_FULL_F_B16B16 2 "register_operand" "<sve_lane_con>") (match_operand:SI 3 "const_int_operand")] UNSPEC_SVE_LANE_SELECT) - (match_operand:SVE_FULL_F_BF 1 "register_operand" "w")))] + (match_operand:SVE_FULL_F_B16B16 1 "register_operand" "w")))] "TARGET_SVE" "<b>fmul\t%0.<Vetype>, %1.<Vetype>, %2.<Vetype>[%3]" ) @@ -6524,12 +6685,12 @@ ;; ------------------------------------------------------------------------- (define_expand "div<mode>3" - [(set (match_operand:SVE_FULL_F 0 "register_operand") - (unspec:SVE_FULL_F + [(set (match_operand:SVE_F 0 "register_operand") + (unspec:SVE_F [(match_dup 3) - (const_int SVE_RELAXED_GP) - (match_operand:SVE_FULL_F 1 "nonmemory_operand") - (match_operand:SVE_FULL_F 2 "register_operand")] + (match_dup 4) + (match_operand:SVE_F 1 "nonmemory_operand") + (match_operand:SVE_F 2 "register_operand")] UNSPEC_COND_FDIV))] "TARGET_SVE" { @@ -6537,23 +6698,23 @@ DONE; operands[1] = force_reg (<MODE>mode, operands[1]); - operands[3] = aarch64_ptrue_reg (<VPRED>mode); + operands[3] = aarch64_sve_fp_pred (<MODE>mode, &operands[4]); } ) (define_expand "@aarch64_frecpe<mode>" - [(set (match_operand:SVE_FULL_F 0 "register_operand") - (unspec:SVE_FULL_F - [(match_operand:SVE_FULL_F 1 "register_operand")] + [(set (match_operand:SVE_F 0 "register_operand") + (unspec:SVE_F + [(match_operand:SVE_F 1 "register_operand")] UNSPEC_FRECPE))] "TARGET_SVE" ) (define_expand "@aarch64_frecps<mode>" - [(set (match_operand:SVE_FULL_F 0 "register_operand") - (unspec:SVE_FULL_F - [(match_operand:SVE_FULL_F 1 "register_operand") - (match_operand:SVE_FULL_F 2 "register_operand")] + [(set (match_operand:SVE_F 0 "register_operand") + (unspec:SVE_F + [(match_operand:SVE_F 1 "register_operand") + (match_operand:SVE_F 2 "register_operand")] UNSPEC_FRECPS))] "TARGET_SVE" ) @@ -6571,10 +6732,10 @@ ;; by providing this, but we need to use UNSPECs since rtx logical ops ;; aren't defined for floating-point modes. (define_insn "*<optab><mode>3" - [(set (match_operand:SVE_F 0 "register_operand" "=w") - (unspec:SVE_F - [(match_operand:SVE_F 1 "register_operand" "w") - (match_operand:SVE_F 2 "register_operand" "w")] + [(set (match_operand:SVE_F_BF 0 "register_operand" "=w") + (unspec:SVE_F_BF + [(match_operand:SVE_F_BF 1 "register_operand" "w") + (match_operand:SVE_F_BF 2 "register_operand" "w")] LOGICALF))] "TARGET_SVE" "<logicalf_op>\t%0.d, %1.d, %2.d" @@ -6718,16 +6879,16 @@ ;; Predicated floating-point maximum/minimum. (define_insn "@aarch64_pred_<optab><mode>" - [(set (match_operand:SVE_FULL_F 0 "register_operand") - (unspec:SVE_FULL_F - [(match_operand:<VPRED> 1 "register_operand") + [(set (match_operand:SVE_F 0 "register_operand") + (unspec:SVE_F + [(match_operand:<VPRED> 1 "aarch64_predicate_operand") (match_operand:SI 4 "aarch64_sve_gp_strictness") - (match_operand:SVE_FULL_F 2 "register_operand") - (match_operand:SVE_FULL_F 3 "aarch64_sve_float_maxmin_operand")] + (match_operand:SVE_F 2 "register_operand") + (match_operand:SVE_F 3 "aarch64_sve_float_maxmin_operand")] SVE_COND_FP_MAXMIN))] "TARGET_SVE" - {@ [ cons: =0 , 1 , 2 , 3 ; attrs: movprfx ] - [ w , Upl , %0 , vsB ; * ] <sve_fp_op>\t%0.<Vetype>, %1/m, %0.<Vetype>, #%3 + {@ [ cons: =0 , 1 , %2 , 3 ; attrs: movprfx ] + [ w , Upl , 0 , vsB ; * ] <sve_fp_op>\t%0.<Vetype>, %1/m, %0.<Vetype>, #%3 [ w , Upl , 0 , w ; * ] <sve_fp_op>\t%0.<Vetype>, %1/m, %0.<Vetype>, %3.<Vetype> [ ?&w , Upl , w , vsB ; yes ] movprfx\t%0, %2\;<sve_fp_op>\t%0.<Vetype>, %1/m, %0.<Vetype>, #%3 [ ?&w , Upl , w , w ; yes ] movprfx\t%0, %2\;<sve_fp_op>\t%0.<Vetype>, %1/m, %0.<Vetype>, %3.<Vetype> @@ -6752,7 +6913,7 @@ ;; Predicate AND. We can reuse one of the inputs as the GP. ;; Doubling the second operand is the preferred implementation ;; of the MOV alias, so we use that instead of %1/z, %1, %2. -(define_insn "and<mode>3" +(define_insn "@and<mode>3" [(set (match_operand:PRED_ALL 0 "register_operand") (and:PRED_ALL (match_operand:PRED_ALL 1 "register_operand") (match_operand:PRED_ALL 2 "register_operand")))] @@ -7035,8 +7196,8 @@ UNSPEC_PRED_X) (match_operand:SVE_I 4 "register_operand")))] "TARGET_SVE" - {@ [ cons: =0 , 1 , 2 , 3 , 4 ; attrs: movprfx ] - [ w , Upl , %0 , w , w ; * ] mad\t%0.<Vetype>, %1/m, %3.<Vetype>, %4.<Vetype> + {@ [ cons: =0 , 1 , %2 , 3 , 4 ; attrs: movprfx ] + [ w , Upl , 0 , w , w ; * ] mad\t%0.<Vetype>, %1/m, %3.<Vetype>, %4.<Vetype> [ w , Upl , w , w , 0 ; * ] mla\t%0.<Vetype>, %1/m, %2.<Vetype>, %3.<Vetype> [ ?&w , Upl , w , w , w ; yes ] movprfx\t%0, %4\;mla\t%0.<Vetype>, %1/m, %2.<Vetype>, %3.<Vetype> } @@ -7177,8 +7338,8 @@ (match_operand:SVE_I 3 "register_operand"))] UNSPEC_PRED_X)))] "TARGET_SVE" - {@ [ cons: =0 , 1 , 2 , 3 , 4 ; attrs: movprfx ] - [ w , Upl , %0 , w , w ; * ] msb\t%0.<Vetype>, %1/m, %3.<Vetype>, %4.<Vetype> + {@ [ cons: =0 , 1 , %2 , 3 , 4 ; attrs: movprfx ] + [ w , Upl , 0 , w , w ; * ] msb\t%0.<Vetype>, %1/m, %3.<Vetype>, %4.<Vetype> [ w , Upl , w , w , 0 ; * ] mls\t%0.<Vetype>, %1/m, %2.<Vetype>, %3.<Vetype> [ ?&w , Upl , w , w , w ; yes ] movprfx\t%0, %4\;mls\t%0.<Vetype>, %1/m, %2.<Vetype>, %3.<Vetype> } @@ -7434,33 +7595,33 @@ ;; Unpredicated floating-point ternary operations. (define_expand "<optab><mode>4" - [(set (match_operand:SVE_FULL_F_BF 0 "register_operand") - (unspec:SVE_FULL_F_BF + [(set (match_operand:SVE_F_B16B16 0 "register_operand") + (unspec:SVE_F_B16B16 [(match_dup 4) - (const_int SVE_RELAXED_GP) - (match_operand:SVE_FULL_F_BF 1 "register_operand") - (match_operand:SVE_FULL_F_BF 2 "register_operand") - (match_operand:SVE_FULL_F_BF 3 "register_operand")] + (match_dup 5) + (match_operand:SVE_F_B16B16 1 "register_operand") + (match_operand:SVE_F_B16B16 2 "register_operand") + (match_operand:SVE_F_B16B16 3 "register_operand")] SVE_COND_FP_TERNARY))] "TARGET_SVE && (<supports_bf16> || !<is_bf16>)" { - operands[4] = aarch64_ptrue_reg (<VPRED>mode); + operands[4] = aarch64_sve_fp_pred (<MODE>mode, &operands[5]); } ) ;; Predicated floating-point ternary operations. (define_insn "@aarch64_pred_<optab><mode>" - [(set (match_operand:SVE_FULL_F_BF 0 "register_operand") - (unspec:SVE_FULL_F_BF - [(match_operand:<VPRED> 1 "register_operand") + [(set (match_operand:SVE_F_B16B16 0 "register_operand") + (unspec:SVE_F_B16B16 + [(match_operand:<VPRED> 1 "aarch64_predicate_operand") (match_operand:SI 5 "aarch64_sve_gp_strictness") - (match_operand:SVE_FULL_F_BF 2 "register_operand") - (match_operand:SVE_FULL_F_BF 3 "register_operand") - (match_operand:SVE_FULL_F_BF 4 "register_operand")] + (match_operand:SVE_F_B16B16 2 "register_operand") + (match_operand:SVE_F_B16B16 3 "register_operand") + (match_operand:SVE_F_B16B16 4 "register_operand")] SVE_COND_FP_TERNARY))] "TARGET_SVE && (<supports_bf16> || !<is_bf16>)" - {@ [ cons: =0 , 1 , 2 , 3 , 4 ; attrs: movprfx , is_rev ] - [ w , Upl , %w , w , 0 ; * , * ] <b><sve_fmla_op>\t%0.<Vetype>, %1/m, %2.<Vetype>, %3.<Vetype> + {@ [ cons: =0 , 1 , %2 , 3 , 4 ; attrs: movprfx , is_rev ] + [ w , Upl , w , w , 0 ; * , * ] <b><sve_fmla_op>\t%0.<Vetype>, %1/m, %2.<Vetype>, %3.<Vetype> [ w , Upl , 0 , w , w ; * , true ] <b><sve_fmad_op>\t%0.<Vetype>, %1/m, %3.<Vetype>, %4.<Vetype> [ ?&w , Upl , w , w , w ; yes , * ] movprfx\t%0, %4\;<b><sve_fmla_op>\t%0.<Vetype>, %1/m, %2.<Vetype>, %3.<Vetype> } @@ -7470,17 +7631,17 @@ ;; Predicated floating-point ternary operations with merging. (define_expand "@cond_<optab><mode>" - [(set (match_operand:SVE_FULL_F_BF 0 "register_operand") - (unspec:SVE_FULL_F_BF + [(set (match_operand:SVE_F_B16B16 0 "register_operand") + (unspec:SVE_F_B16B16 [(match_operand:<VPRED> 1 "register_operand") - (unspec:SVE_FULL_F_BF + (unspec:SVE_F_B16B16 [(match_dup 1) (const_int SVE_STRICT_GP) - (match_operand:SVE_FULL_F_BF 2 "register_operand") - (match_operand:SVE_FULL_F_BF 3 "register_operand") - (match_operand:SVE_FULL_F_BF 4 "register_operand")] + (match_operand:SVE_F_B16B16 2 "register_operand") + (match_operand:SVE_F_B16B16 3 "register_operand") + (match_operand:SVE_F_B16B16 4 "register_operand")] SVE_COND_FP_TERNARY) - (match_operand:SVE_FULL_F_BF 5 "aarch64_simd_reg_or_zero")] + (match_operand:SVE_F_B16B16 5 "aarch64_simd_reg_or_zero")] UNSPEC_SEL))] "TARGET_SVE && (<supports_bf16> || !<is_bf16>)" { @@ -7488,20 +7649,22 @@ second of the two. */ if (rtx_equal_p (operands[3], operands[5])) std::swap (operands[2], operands[3]); + + operands[1] = aarch64_sve_emit_masked_fp_pred (<MODE>mode, operands[1]); }) ;; Predicated floating-point ternary operations, merging with the ;; first input. (define_insn_and_rewrite "*cond_<optab><mode>_2_relaxed" - [(set (match_operand:SVE_FULL_F 0 "register_operand") - (unspec:SVE_FULL_F + [(set (match_operand:SVE_F 0 "register_operand") + (unspec:SVE_F [(match_operand:<VPRED> 1 "register_operand") - (unspec:SVE_FULL_F + (unspec:SVE_F [(match_operand 5) (const_int SVE_RELAXED_GP) - (match_operand:SVE_FULL_F 2 "register_operand") - (match_operand:SVE_FULL_F 3 "register_operand") - (match_operand:SVE_FULL_F 4 "register_operand")] + (match_operand:SVE_F 2 "register_operand") + (match_operand:SVE_F 3 "register_operand") + (match_operand:SVE_F 4 "register_operand")] SVE_COND_FP_TERNARY) (match_dup 2)] UNSPEC_SEL))] @@ -7517,15 +7680,15 @@ ) (define_insn "*cond_<optab><mode>_2_strict" - [(set (match_operand:SVE_FULL_F 0 "register_operand") - (unspec:SVE_FULL_F - [(match_operand:<VPRED> 1 "register_operand") - (unspec:SVE_FULL_F + [(set (match_operand:SVE_F 0 "register_operand") + (unspec:SVE_F + [(match_operand:<VPRED> 1 "aarch64_predicate_operand") + (unspec:SVE_F [(match_dup 1) (const_int SVE_STRICT_GP) - (match_operand:SVE_FULL_F 2 "register_operand") - (match_operand:SVE_FULL_F 3 "register_operand") - (match_operand:SVE_FULL_F 4 "register_operand")] + (match_operand:SVE_F 2 "register_operand") + (match_operand:SVE_F 3 "register_operand") + (match_operand:SVE_F 4 "register_operand")] SVE_COND_FP_TERNARY) (match_dup 2)] UNSPEC_SEL))] @@ -7539,15 +7702,15 @@ ;; Predicated floating-point ternary operations, merging with the ;; third input. (define_insn_and_rewrite "*cond_<optab><mode>_4_relaxed" - [(set (match_operand:SVE_FULL_F_BF 0 "register_operand") - (unspec:SVE_FULL_F_BF + [(set (match_operand:SVE_F_B16B16 0 "register_operand") + (unspec:SVE_F_B16B16 [(match_operand:<VPRED> 1 "register_operand") - (unspec:SVE_FULL_F_BF + (unspec:SVE_F_B16B16 [(match_operand 5) (const_int SVE_RELAXED_GP) - (match_operand:SVE_FULL_F_BF 2 "register_operand") - (match_operand:SVE_FULL_F_BF 3 "register_operand") - (match_operand:SVE_FULL_F_BF 4 "register_operand")] + (match_operand:SVE_F_B16B16 2 "register_operand") + (match_operand:SVE_F_B16B16 3 "register_operand") + (match_operand:SVE_F_B16B16 4 "register_operand")] SVE_COND_FP_TERNARY) (match_dup 4)] UNSPEC_SEL))] @@ -7563,15 +7726,15 @@ ) (define_insn "*cond_<optab><mode>_4_strict" - [(set (match_operand:SVE_FULL_F_BF 0 "register_operand") - (unspec:SVE_FULL_F_BF - [(match_operand:<VPRED> 1 "register_operand") - (unspec:SVE_FULL_F_BF + [(set (match_operand:SVE_F_B16B16 0 "register_operand") + (unspec:SVE_F_B16B16 + [(match_operand:<VPRED> 1 "aarch64_predicate_operand") + (unspec:SVE_F_B16B16 [(match_dup 1) (const_int SVE_STRICT_GP) - (match_operand:SVE_FULL_F_BF 2 "register_operand") - (match_operand:SVE_FULL_F_BF 3 "register_operand") - (match_operand:SVE_FULL_F_BF 4 "register_operand")] + (match_operand:SVE_F_B16B16 2 "register_operand") + (match_operand:SVE_F_B16B16 3 "register_operand") + (match_operand:SVE_F_B16B16 4 "register_operand")] SVE_COND_FP_TERNARY) (match_dup 4)] UNSPEC_SEL))] @@ -7585,17 +7748,17 @@ ;; Predicated floating-point ternary operations, merging with an ;; independent value. (define_insn_and_rewrite "*cond_<optab><mode>_any_relaxed" - [(set (match_operand:SVE_FULL_F_BF 0 "register_operand") - (unspec:SVE_FULL_F_BF + [(set (match_operand:SVE_F_B16B16 0 "register_operand") + (unspec:SVE_F_B16B16 [(match_operand:<VPRED> 1 "register_operand") - (unspec:SVE_FULL_F_BF + (unspec:SVE_F_B16B16 [(match_operand 6) (const_int SVE_RELAXED_GP) - (match_operand:SVE_FULL_F_BF 2 "register_operand") - (match_operand:SVE_FULL_F_BF 3 "register_operand") - (match_operand:SVE_FULL_F_BF 4 "register_operand")] + (match_operand:SVE_F_B16B16 2 "register_operand") + (match_operand:SVE_F_B16B16 3 "register_operand") + (match_operand:SVE_F_B16B16 4 "register_operand")] SVE_COND_FP_TERNARY) - (match_operand:SVE_FULL_F_BF 5 "aarch64_simd_reg_or_zero")] + (match_operand:SVE_F_B16B16 5 "aarch64_simd_reg_or_zero")] UNSPEC_SEL))] "TARGET_SVE && (<supports_bf16> || !<is_bf16>) @@ -7631,17 +7794,17 @@ ) (define_insn_and_rewrite "*cond_<optab><mode>_any_strict" - [(set (match_operand:SVE_FULL_F_BF 0 "register_operand") - (unspec:SVE_FULL_F_BF - [(match_operand:<VPRED> 1 "register_operand") - (unspec:SVE_FULL_F_BF + [(set (match_operand:SVE_F_B16B16 0 "register_operand") + (unspec:SVE_F_B16B16 + [(match_operand:<VPRED> 1 "aarch64_predicate_operand") + (unspec:SVE_F_B16B16 [(match_dup 1) (const_int SVE_STRICT_GP) - (match_operand:SVE_FULL_F_BF 2 "register_operand") - (match_operand:SVE_FULL_F_BF 3 "register_operand") - (match_operand:SVE_FULL_F_BF 4 "register_operand")] + (match_operand:SVE_F_B16B16 2 "register_operand") + (match_operand:SVE_F_B16B16 3 "register_operand") + (match_operand:SVE_F_B16B16 4 "register_operand")] SVE_COND_FP_TERNARY) - (match_operand:SVE_FULL_F_BF 5 "aarch64_simd_reg_or_zero")] + (match_operand:SVE_F_B16B16 5 "aarch64_simd_reg_or_zero")] UNSPEC_SEL))] "TARGET_SVE && (<supports_bf16> || !<is_bf16>) @@ -7672,14 +7835,14 @@ ;; Unpredicated FMLA and FMLS by selected lanes. It doesn't seem worth using ;; (fma ...) since target-independent code won't understand the indexing. (define_insn "@aarch64_<optab>_lane_<mode>" - [(set (match_operand:SVE_FULL_F_BF 0 "register_operand") - (unspec:SVE_FULL_F_BF - [(match_operand:SVE_FULL_F_BF 1 "register_operand") - (unspec:SVE_FULL_F_BF - [(match_operand:SVE_FULL_F_BF 2 "register_operand") + [(set (match_operand:SVE_FULL_F_B16B16 0 "register_operand") + (unspec:SVE_FULL_F_B16B16 + [(match_operand:SVE_FULL_F_B16B16 1 "register_operand") + (unspec:SVE_FULL_F_B16B16 + [(match_operand:SVE_FULL_F_B16B16 2 "register_operand") (match_operand:SI 3 "const_int_operand")] UNSPEC_SVE_LANE_SELECT) - (match_operand:SVE_FULL_F_BF 4 "register_operand")] + (match_operand:SVE_FULL_F_B16B16 4 "register_operand")] SVE_FP_TERNARY_LANE))] "TARGET_SVE" {@ [ cons: =0 , 1 , 2 , 4 ; attrs: movprfx ] @@ -8040,20 +8203,23 @@ ;; ;; For unpacked vectors, it doesn't really matter whether SEL uses the ;; the container size or the element size. If SEL used the container size, -;; it would ignore undefined bits of the predicate but would copy the -;; upper (undefined) bits of each container along with the defined bits. -;; If SEL used the element size, it would use undefined bits of the predicate -;; to select between undefined elements in each input vector. Thus the only -;; difference is whether the undefined bits in a container always come from -;; the same input as the defined bits, or whether the choice can vary -;; independently of the defined bits. +;; it would would copy the upper (undefined) bits of each container along +;; with the corresponding defined bits. If SEL used the element size, +;; it would use separate predicate bits to select between the undefined +;; elements in each input vector; these seperate predicate bits might +;; themselves be undefined, depending on the mode of the predicate. +;; +;; Thus the only difference is whether the undefined bits in a container +;; always come from the same input as the defined bits, or whether the +;; choice can vary independently of the defined bits. ;; ;; For the other instructions, using the element size is more natural, ;; so we do that for SEL as well. +;; (define_insn "*vcond_mask_<mode><vpred>" [(set (match_operand:SVE_ALL 0 "register_operand") (unspec:SVE_ALL - [(match_operand:<VPRED> 3 "register_operand") + [(match_operand:<VPRED> 3 "aarch64_predicate_operand") (match_operand:SVE_ALL 1 "aarch64_sve_reg_or_dup_imm") (match_operand:SVE_ALL 2 "aarch64_simd_reg_or_zero")] UNSPEC_SEL))] @@ -8192,6 +8358,71 @@ } ) +;; Likewise, but yield a VNx16BI result regardless of the element width. +;; The .b case is equivalent to the above. +(define_expand "@aarch64_pred_cmp<cmp_op><mode>_acle" + [(parallel + [(set (match_operand:<VPRED> 0 "register_operand") + (unspec:<VPRED> + [(match_operand:<VPRED> 1 "register_operand") + (match_operand:SI 2 "aarch64_sve_ptrue_flag") + (SVE_INT_CMP:<VPRED> + (match_operand:VNx16QI_ONLY 3 "register_operand") + (match_operand:VNx16QI_ONLY 4 "aarch64_sve_cmp_<sve_imm_con>_operand"))] + UNSPEC_PRED_Z)) + (clobber (reg:CC_NZC CC_REGNUM))])] + "TARGET_SVE" +) + +;; For wider elements, bitcast the predicate result to a VNx16BI and use +;; an (and ...) to indicate that only every second, fourth, or eighth bit +;; is set. +(define_expand "@aarch64_pred_cmp<cmp_op><mode>_acle" + [(parallel + [(set (match_operand:VNx16BI 0 "register_operand") + (and:VNx16BI + (subreg:VNx16BI + (unspec:<VPRED> + [(match_operand:<VPRED> 1 "register_operand") + (match_operand:SI 2 "aarch64_sve_ptrue_flag") + (SVE_INT_CMP:<VPRED> + (match_operand:SVE_FULL_HSDI 3 "register_operand") + (match_operand:SVE_FULL_HSDI 4 "aarch64_sve_cmp_<sve_imm_con>_operand"))] + UNSPEC_PRED_Z) + 0) + (match_dup 5))) + (clobber (reg:CC_NZC CC_REGNUM))])] + "TARGET_SVE" + { + operands[5] = aarch64_ptrue_all (GET_MODE_UNIT_SIZE (<MODE>mode)); + } +) + +(define_insn "*aarch64_pred_cmp<cmp_op><mode>_acle" + [(set (match_operand:VNx16BI 0 "register_operand") + (and:VNx16BI + (subreg:VNx16BI + (unspec:<VPRED> + [(match_operand:<VPRED> 1 "register_operand") + (match_operand:SI 2 "aarch64_sve_ptrue_flag") + (SVE_INT_CMP:<VPRED> + (match_operand:SVE_FULL_HSDI 3 "register_operand") + (match_operand:SVE_FULL_HSDI 4 "aarch64_sve_cmp_<sve_imm_con>_operand"))] + UNSPEC_PRED_Z) + 0) + (match_operand:<VPRED> 5 "aarch64_ptrue_all_operand"))) + (clobber (reg:CC_NZC CC_REGNUM))] + "TARGET_SVE" + {@ [ cons: =0 , 1 , 3 , 4 ; attrs: pred_clobber ] + [ &Upa , Upl, w , <sve_imm_con>; yes ] cmp<cmp_op>\t%0.<Vetype>, %1/z, %3.<Vetype>, #%4 + [ ?Upl , 0 , w , <sve_imm_con>; yes ] ^ + [ Upa , Upl, w , <sve_imm_con>; no ] ^ + [ &Upa , Upl, w , w ; yes ] cmp<cmp_op>\t%0.<Vetype>, %1/z, %3.<Vetype>, %4.<Vetype> + [ ?Upl , 0 , w , w ; yes ] ^ + [ Upa , Upl, w , w ; no ] ^ + } +) + ;; Predicated integer comparisons in which both the flag and predicate ;; results are interesting. (define_insn_and_rewrite "*cmp<cmp_op><mode>_cc" @@ -8233,6 +8464,49 @@ } ) +(define_insn_and_rewrite "*cmp<cmp_op><mode>_acle_cc" + [(set (reg:CC_NZC CC_REGNUM) + (unspec:CC_NZC + [(match_operand:VNx16BI 1 "register_operand") + (match_operand 4) + (match_operand:SI 5 "aarch64_sve_ptrue_flag") + (unspec:<VPRED> + [(match_operand 6) + (match_operand:SI 7 "aarch64_sve_ptrue_flag") + (SVE_INT_CMP:<VPRED> + (match_operand:SVE_FULL_HSDI 2 "register_operand") + (match_operand:SVE_FULL_HSDI 3 "aarch64_sve_cmp_<sve_imm_con>_operand"))] + UNSPEC_PRED_Z)] + UNSPEC_PTEST)) + (set (match_operand:VNx16BI 0 "register_operand") + (and:VNx16BI + (subreg:VNx16BI + (unspec:<VPRED> + [(match_dup 6) + (match_dup 7) + (SVE_INT_CMP:<VPRED> + (match_dup 2) + (match_dup 3))] + UNSPEC_PRED_Z) + 0) + (match_operand:<VPRED> 8 "aarch64_ptrue_all_operand")))] + "TARGET_SVE + && aarch64_sve_same_pred_for_ptest_p (&operands[4], &operands[6])" + {@ [ cons: =0 , 1 , 2 , 3 ; attrs: pred_clobber ] + [ &Upa , Upl, w , <sve_imm_con>; yes ] cmp<cmp_op>\t%0.<Vetype>, %1/z, %2.<Vetype>, #%3 + [ ?Upl , 0 , w , <sve_imm_con>; yes ] ^ + [ Upa , Upl, w , <sve_imm_con>; no ] ^ + [ &Upa , Upl, w , w ; yes ] cmp<cmp_op>\t%0.<Vetype>, %1/z, %2.<Vetype>, %3.<Vetype> + [ ?Upl , 0 , w , w ; yes ] ^ + [ Upa , Upl, w , w ; no ] ^ + } + "&& !rtx_equal_p (operands[4], operands[6])" + { + operands[6] = copy_rtx (operands[4]); + operands[7] = operands[5]; + } +) + ;; Predicated integer comparisons in which only the flags result is ;; interesting. (define_insn_and_rewrite "*cmp<cmp_op><mode>_ptest" @@ -8298,14 +8572,52 @@ (clobber (reg:CC_NZC CC_REGNUM))])] ) +(define_insn_and_split "*cmp<cmp_op><mode>_acle_and" + [(set (match_operand:VNx16BI 0 "register_operand" "=Upa, Upa") + (and:VNx16BI + (and:VNx16BI + (subreg:VNx16BI + (unspec:<VPRED> + [(match_operand 4) + (const_int SVE_KNOWN_PTRUE) + (SVE_INT_CMP:<VPRED> + (match_operand:SVE_FULL_HSDI 2 "register_operand" "w, w") + (match_operand:SVE_FULL_HSDI 3 "aarch64_sve_cmp_<sve_imm_con>_operand" "<sve_imm_con>, w"))] + UNSPEC_PRED_Z) + 0) + (match_operand:VNx16BI 1 "register_operand" "Upl, Upl")) + (match_operand:<VPRED> 5 "aarch64_ptrue_all_operand"))) + (clobber (reg:CC_NZC CC_REGNUM))] + "TARGET_SVE" + "#" + "&& 1" + [(parallel + [(set (match_dup 0) + (and:VNx16BI + (subreg:VNx16BI + (unspec:<VPRED> + [(match_dup 1) + (const_int SVE_MAYBE_NOT_PTRUE) + (SVE_INT_CMP:<VPRED> + (match_dup 2) + (match_dup 3))] + UNSPEC_PRED_Z) + 0) + (match_dup 5))) + (clobber (reg:CC_NZC CC_REGNUM))])] + { + operands[1] = gen_lowpart (<VPRED>mode, operands[1]); + } +) + ;; Predicated integer wide comparisons. (define_insn "@aarch64_pred_cmp<cmp_op><mode>_wide" [(set (match_operand:<VPRED> 0 "register_operand") (unspec:<VPRED> - [(match_operand:VNx16BI 1 "register_operand") + [(match_operand:<VPRED> 1 "register_operand") (match_operand:SI 2 "aarch64_sve_ptrue_flag") (unspec:<VPRED> - [(match_operand:SVE_FULL_BHSI 3 "register_operand") + [(match_operand:VNx16QI_ONLY 3 "register_operand") (match_operand:VNx2DI 4 "register_operand")] SVE_COND_INT_CMP_WIDE)] UNSPEC_PRED_Z)) @@ -8318,16 +8630,61 @@ } ) +(define_expand "@aarch64_pred_cmp<cmp_op><mode>_wide" + [(parallel + [(set (match_operand:VNx16BI 0 "register_operand") + (and:VNx16BI + (subreg:VNx16BI + (unspec:<VPRED> + [(match_operand:<VPRED> 1 "register_operand") + (match_operand:SI 2 "aarch64_sve_ptrue_flag") + (unspec:<VPRED> + [(match_operand:SVE_FULL_HSI 3 "register_operand") + (match_operand:VNx2DI 4 "register_operand")] + SVE_COND_INT_CMP_WIDE)] + UNSPEC_PRED_Z) + 0) + (match_dup 5))) + (clobber (reg:CC_NZC CC_REGNUM))])] + "TARGET_SVE" + { + operands[5] = aarch64_ptrue_all (GET_MODE_UNIT_SIZE (<MODE>mode)); + } +) + +(define_insn "*aarch64_pred_cmp<cmp_op><mode>_wide" + [(set (match_operand:VNx16BI 0 "register_operand") + (and:VNx16BI + (subreg:VNx16BI + (unspec:<VPRED> + [(match_operand:<VPRED> 1 "register_operand") + (match_operand:SI 2 "aarch64_sve_ptrue_flag") + (unspec:<VPRED> + [(match_operand:SVE_FULL_HSI 3 "register_operand") + (match_operand:VNx2DI 4 "register_operand")] + SVE_COND_INT_CMP_WIDE)] + UNSPEC_PRED_Z) + 0) + (match_operand:<VPRED> 5 "aarch64_ptrue_all_operand"))) + (clobber (reg:CC_NZC CC_REGNUM))] + "TARGET_SVE" + {@ [ cons: =0, 1 , 2, 3, 4; attrs: pred_clobber ] + [ &Upa , Upl, , w, w; yes ] cmp<cmp_op>\t%0.<Vetype>, %1/z, %3.<Vetype>, %4.d + [ ?Upl , 0 , , w, w; yes ] ^ + [ Upa , Upl, , w, w; no ] ^ + } +) + ;; Predicated integer wide comparisons in which both the flag and ;; predicate results are interesting. -(define_insn "*aarch64_pred_cmp<cmp_op><mode>_wide_cc" +(define_insn_and_rewrite "*aarch64_pred_cmp<cmp_op><mode>_wide_cc" [(set (reg:CC_NZC CC_REGNUM) (unspec:CC_NZC [(match_operand:VNx16BI 1 "register_operand") (match_operand 4) (match_operand:SI 5 "aarch64_sve_ptrue_flag") (unspec:<VPRED> - [(match_operand:VNx16BI 6 "register_operand") + [(match_operand:<VPRED> 6 "register_operand") (match_operand:SI 7 "aarch64_sve_ptrue_flag") (unspec:<VPRED> [(match_operand:SVE_FULL_BHSI 2 "register_operand") @@ -8351,18 +8708,65 @@ [ ?Upl , 0 , w, w, Upl; yes ] ^ [ Upa , Upl, w, w, Upl; no ] ^ } + "&& !rtx_equal_p (operands[4], operands[6])" + { + operands[6] = copy_rtx (operands[4]); + operands[7] = operands[5]; + } +) + +(define_insn_and_rewrite "*aarch64_pred_cmp<cmp_op><mode>_wide_cc" + [(set (reg:CC_NZC CC_REGNUM) + (unspec:CC_NZC + [(match_operand:VNx16BI 1 "register_operand") + (match_operand 4) + (match_operand:SI 5 "aarch64_sve_ptrue_flag") + (unspec:<VPRED> + [(match_operand:<VPRED> 6 "register_operand") + (match_operand:SI 7 "aarch64_sve_ptrue_flag") + (unspec:<VPRED> + [(match_operand:SVE_FULL_HSI 2 "register_operand") + (match_operand:VNx2DI 3 "register_operand")] + SVE_COND_INT_CMP_WIDE)] + UNSPEC_PRED_Z)] + UNSPEC_PTEST)) + (set (match_operand:VNx16BI 0 "register_operand") + (and:VNx16BI + (subreg:VNx16BI + (unspec:<VPRED> + [(match_dup 6) + (match_dup 7) + (unspec:<VPRED> + [(match_dup 2) + (match_dup 3)] + SVE_COND_INT_CMP_WIDE)] + UNSPEC_PRED_Z) + 0) + (match_operand:<VPRED> 8 "aarch64_ptrue_all_operand")))] + "TARGET_SVE + && aarch64_sve_same_pred_for_ptest_p (&operands[4], &operands[6])" + {@ [ cons: =0, 1 , 2, 3, 6 ; attrs: pred_clobber ] + [ &Upa , Upl, w, w, Upl; yes ] cmp<cmp_op>\t%0.<Vetype>, %1/z, %2.<Vetype>, %3.d + [ ?Upl , 0 , w, w, Upl; yes ] ^ + [ Upa , Upl, w, w, Upl; no ] ^ + } + "&& !rtx_equal_p (operands[4], operands[6])" + { + operands[6] = copy_rtx (operands[4]); + operands[7] = operands[5]; + } ) ;; Predicated integer wide comparisons in which only the flags result ;; is interesting. -(define_insn "*aarch64_pred_cmp<cmp_op><mode>_wide_ptest" +(define_insn_and_rewrite "*aarch64_pred_cmp<cmp_op><mode>_wide_ptest" [(set (reg:CC_NZC CC_REGNUM) (unspec:CC_NZC [(match_operand:VNx16BI 1 "register_operand") (match_operand 4) (match_operand:SI 5 "aarch64_sve_ptrue_flag") (unspec:<VPRED> - [(match_operand:VNx16BI 6 "register_operand") + [(match_operand:<VPRED> 6 "register_operand") (match_operand:SI 7 "aarch64_sve_ptrue_flag") (unspec:<VPRED> [(match_operand:SVE_FULL_BHSI 2 "register_operand") @@ -8378,6 +8782,11 @@ [ ?Upl , 0 , w, w, Upl; yes ] ^ [ Upa , Upl, w, w, Upl; no ] ^ } + "&& !rtx_equal_p (operands[4], operands[6])" + { + operands[6] = copy_rtx (operands[4]); + operands[7] = operands[5]; + } ) ;; ------------------------------------------------------------------------- @@ -8415,6 +8824,58 @@ "while<cmp_op>\t%0.<PRED_ALL:Vetype>, %<w>1, %<w>2" ) +;; Likewise, but yield a VNx16BI result regardless of the element width. +;; The .b case is equivalent to the above. +(define_expand "@aarch64_sve_while_<while_optab_cmp><GPI:mode><VNx16BI_ONLY:mode>_acle" + [(parallel + [(set (match_operand:VNx16BI_ONLY 0 "register_operand") + (unspec:VNx16BI_ONLY + [(const_int SVE_WHILE_B) + (match_operand:GPI 1 "aarch64_reg_or_zero") + (match_operand:GPI 2 "aarch64_reg_or_zero")] + SVE_WHILE)) + (clobber (reg:CC_NZC CC_REGNUM))])] + "TARGET_SVE" +) + +;; For wider elements, bitcast the predicate result to a VNx16BI and use +;; an (and ...) to indicate that only every second, fourth, or eighth bit +;; is set. +(define_expand "@aarch64_sve_while_<while_optab_cmp><GPI:mode><PRED_HSD:mode>_acle" + [(parallel + [(set (match_operand:VNx16BI 0 "register_operand") + (and:VNx16BI + (subreg:VNx16BI + (unspec:PRED_HSD + [(const_int SVE_WHILE_B) + (match_operand:GPI 1 "aarch64_reg_or_zero") + (match_operand:GPI 2 "aarch64_reg_or_zero")] + SVE_WHILE) + 0) + (match_dup 3))) + (clobber (reg:CC_NZC CC_REGNUM))])] + "TARGET_SVE" + { + operands[3] = aarch64_ptrue_all (<data_bytes>); + } +) + +(define_insn "*aarch64_sve_while_<while_optab_cmp><GPI:mode><PRED_HSD:mode>_acle" + [(set (match_operand:VNx16BI 0 "register_operand" "=Upa") + (and:VNx16BI + (subreg:VNx16BI + (unspec:PRED_HSD + [(const_int SVE_WHILE_B) + (match_operand:GPI 1 "aarch64_reg_or_zero" "rZ") + (match_operand:GPI 2 "aarch64_reg_or_zero" "rZ")] + SVE_WHILE) + 0) + (match_operand:PRED_HSD 3 "aarch64_ptrue_all_operand"))) + (clobber (reg:CC_NZC CC_REGNUM))] + "TARGET_SVE" + "while<cmp_op>\t%0.<PRED_HSD:Vetype>, %<w>1, %<w>2" +) + ;; The WHILE instructions set the flags in the same way as a PTEST with ;; a PTRUE GP. Handle the case in which both results are useful. The GP ;; operands to the PTEST aren't needed, so we allow them to be anything. @@ -8446,6 +8907,38 @@ } ) +(define_insn_and_rewrite "*while_<while_optab_cmp><GPI:mode><PRED_HSD:mode>_acle_cc" + [(set (reg:CC_NZC CC_REGNUM) + (unspec:CC_NZC + [(match_operand 3) + (match_operand 4) + (const_int SVE_KNOWN_PTRUE) + (unspec:PRED_HSD + [(const_int SVE_WHILE_B) + (match_operand:GPI 1 "aarch64_reg_or_zero" "rZ") + (match_operand:GPI 2 "aarch64_reg_or_zero" "rZ")] + SVE_WHILE)] + UNSPEC_PTEST)) + (set (match_operand:VNx16BI 0 "register_operand" "=Upa") + (and:VNx16BI + (subreg:VNx16BI + (unspec:PRED_HSD [(const_int SVE_WHILE_B) + (match_dup 1) + (match_dup 2)] + SVE_WHILE) + 0) + (match_operand:PRED_HSD 5 "aarch64_ptrue_all_operand")))] + "TARGET_SVE" + "while<cmp_op>\t%0.<PRED_HSD:Vetype>, %<w>1, %<w>2" + ;; Force the compiler to drop the unused predicate operand, so that we + ;; don't have an unnecessary PTRUE. + "&& (!CONSTANT_P (operands[3]) || !CONSTANT_P (operands[4]))" + { + operands[3] = CONSTM1_RTX (VNx16BImode); + operands[4] = CONSTM1_RTX (<PRED_HSD:MODE>mode); + } +) + ;; Same, but handle the case in which only the flags result is useful. (define_insn_and_rewrite "@while_<while_optab_cmp><GPI:mode><PRED_ALL:mode>_ptest" [(set (reg:CC_NZC CC_REGNUM) @@ -8490,8 +8983,8 @@ (define_expand "vec_cmp<mode><vpred>" [(set (match_operand:<VPRED> 0 "register_operand") (match_operator:<VPRED> 1 "comparison_operator" - [(match_operand:SVE_FULL_F 2 "register_operand") - (match_operand:SVE_FULL_F 3 "aarch64_simd_reg_or_zero")]))] + [(match_operand:SVE_F 2 "register_operand") + (match_operand:SVE_F 3 "aarch64_simd_reg_or_zero")]))] "TARGET_SVE" { aarch64_expand_sve_vec_cmp_float (operands[0], GET_CODE (operands[1]), @@ -8504,10 +8997,10 @@ (define_insn "@aarch64_pred_fcm<cmp_op><mode>" [(set (match_operand:<VPRED> 0 "register_operand") (unspec:<VPRED> - [(match_operand:<VPRED> 1 "register_operand") + [(match_operand:<VPRED> 1 "aarch64_predicate_operand") (match_operand:SI 2 "aarch64_sve_ptrue_flag") - (match_operand:SVE_FULL_F 3 "register_operand") - (match_operand:SVE_FULL_F 4 "aarch64_simd_reg_or_zero")] + (match_operand:SVE_F 3 "register_operand") + (match_operand:SVE_F 4 "aarch64_simd_reg_or_zero")] SVE_COND_FP_CMP_I0))] "TARGET_SVE" {@ [ cons: =0 , 1 , 3 , 4 ] @@ -8516,19 +9009,90 @@ } ) +(define_expand "@aarch64_pred_fcm<cmp_op><mode>_acle" + [(set (match_operand:VNx16BI 0 "register_operand") + (and:VNx16BI + (subreg:VNx16BI + (unspec:<VPRED> + [(match_operand:<VPRED> 1 "aarch64_predicate_operand") + (match_operand:SI 2 "aarch64_sve_ptrue_flag") + (match_operand:SVE_F 3 "register_operand") + (match_operand:SVE_F 4 "aarch64_simd_reg_or_zero")] + SVE_COND_FP_CMP_I0) + 0) + (match_dup 5)))] + "TARGET_SVE" + { + operands[5] = aarch64_ptrue_all (GET_MODE_UNIT_SIZE (<MODE>mode)); + } +) + +(define_insn "*aarch64_pred_fcm<cmp_op><mode>_acle" + [(set (match_operand:VNx16BI 0 "register_operand") + (and:VNx16BI + (subreg:VNx16BI + (unspec:<VPRED> + [(match_operand:<VPRED> 1 "aarch64_predicate_operand") + (match_operand:SI 2 "aarch64_sve_ptrue_flag") + (match_operand:SVE_F 3 "register_operand") + (match_operand:SVE_F 4 "aarch64_simd_reg_or_zero")] + SVE_COND_FP_CMP_I0) + 0) + (match_operand:<VPRED> 5 "aarch64_ptrue_all_operand")))] + "TARGET_SVE" + {@ [ cons: =0 , 1 , 3 , 4 ] + [ Upa , Upl , w , Dz ] fcm<cmp_op>\t%0.<Vetype>, %1/z, %3.<Vetype>, #0.0 + [ Upa , Upl , w , w ] fcm<cmp_op>\t%0.<Vetype>, %1/z, %3.<Vetype>, %4.<Vetype> + } +) + ;; Same for unordered comparisons. (define_insn "@aarch64_pred_fcmuo<mode>" [(set (match_operand:<VPRED> 0 "register_operand" "=Upa") (unspec:<VPRED> - [(match_operand:<VPRED> 1 "register_operand" "Upl") + [(match_operand:<VPRED> 1 "aarch64_predicate_operand" "Upl") (match_operand:SI 2 "aarch64_sve_ptrue_flag") - (match_operand:SVE_FULL_F 3 "register_operand" "w") - (match_operand:SVE_FULL_F 4 "register_operand" "w")] + (match_operand:SVE_F 3 "register_operand" "w") + (match_operand:SVE_F 4 "register_operand" "w")] UNSPEC_COND_FCMUO))] "TARGET_SVE" "fcmuo\t%0.<Vetype>, %1/z, %3.<Vetype>, %4.<Vetype>" ) +(define_expand "@aarch64_pred_fcmuo<mode>_acle" + [(set (match_operand:VNx16BI 0 "register_operand") + (and:VNx16BI + (subreg:VNx16BI + (unspec:<VPRED> + [(match_operand:<VPRED> 1 "aarch64_predicate_operand") + (match_operand:SI 2 "aarch64_sve_ptrue_flag") + (match_operand:SVE_F 3 "register_operand") + (match_operand:SVE_F 4 "register_operand")] + UNSPEC_COND_FCMUO) + 0) + (match_dup 5)))] + "TARGET_SVE" + { + operands[5] = aarch64_ptrue_all (GET_MODE_UNIT_SIZE (<MODE>mode)); + } +) + +(define_insn "*aarch64_pred_fcmuo<mode>_acle" + [(set (match_operand:VNx16BI 0 "register_operand" "=Upa") + (and:VNx16BI + (subreg:VNx16BI + (unspec:<VPRED> + [(match_operand:<VPRED> 1 "aarch64_predicate_operand" "Upl") + (match_operand:SI 2 "aarch64_sve_ptrue_flag") + (match_operand:SVE_F 3 "register_operand" "w") + (match_operand:SVE_F 4 "register_operand" "w")] + UNSPEC_COND_FCMUO) + 0) + (match_operand:<VPRED> 5 "aarch64_ptrue_all_operand")))] + "TARGET_SVE" + "fcmuo\t%0.<Vetype>, %1/z, %3.<Vetype>, %4.<Vetype>" +) + ;; Floating-point comparisons predicated on a PTRUE, with the results ANDed ;; with another predicate P. This does not have the same trapping behavior ;; as predicating the comparison itself on P, but it's a legitimate fold, @@ -8543,8 +9107,8 @@ (unspec:<VPRED> [(match_operand:<VPRED> 1) (const_int SVE_KNOWN_PTRUE) - (match_operand:SVE_FULL_F 2 "register_operand" "w, w") - (match_operand:SVE_FULL_F 3 "aarch64_simd_reg_or_zero" "Dz, w")] + (match_operand:SVE_F 2 "register_operand" "w, w") + (match_operand:SVE_F 3 "aarch64_simd_reg_or_zero" "Dz, w")] SVE_COND_FP_CMP_I0) (match_operand:<VPRED> 4 "register_operand" "Upl, Upl")))] "TARGET_SVE" @@ -8566,8 +9130,8 @@ (unspec:<VPRED> [(match_operand:<VPRED> 1) (const_int SVE_KNOWN_PTRUE) - (match_operand:SVE_FULL_F 2 "register_operand" "w") - (match_operand:SVE_FULL_F 3 "register_operand" "w")] + (match_operand:SVE_F 2 "register_operand" "w") + (match_operand:SVE_F 3 "register_operand" "w")] UNSPEC_COND_FCMUO) (match_operand:<VPRED> 4 "register_operand" "Upl")))] "TARGET_SVE" @@ -8593,8 +9157,8 @@ (unspec:<VPRED> [(match_operand:<VPRED> 1) (const_int SVE_KNOWN_PTRUE) - (match_operand:SVE_FULL_F 2 "register_operand" "w") - (match_operand:SVE_FULL_F 3 "aarch64_simd_reg_or_zero" "wDz")] + (match_operand:SVE_F 2 "register_operand" "w") + (match_operand:SVE_F 3 "aarch64_simd_reg_or_zero" "wDz")] SVE_COND_FP_CMP_I0)) (match_operand:<VPRED> 4 "register_operand" "Upa")) (match_dup:<VPRED> 1))) @@ -8630,8 +9194,8 @@ (unspec:<VPRED> [(match_operand:<VPRED> 1) (const_int SVE_KNOWN_PTRUE) - (match_operand:SVE_FULL_F 2 "register_operand" "w") - (match_operand:SVE_FULL_F 3 "aarch64_simd_reg_or_zero" "wDz")] + (match_operand:SVE_F 2 "register_operand" "w") + (match_operand:SVE_F 3 "aarch64_simd_reg_or_zero" "wDz")] SVE_COND_FP_CMP_I0)) (not:<VPRED> (match_operand:<VPRED> 4 "register_operand" "Upa"))) @@ -8661,6 +9225,7 @@ } ) +;; Same for unordered comparisons. (define_insn_and_split "*fcmuo<mode>_bic_combine" [(set (match_operand:<VPRED> 0 "register_operand" "=Upa") (and:<VPRED> @@ -8669,8 +9234,8 @@ (unspec:<VPRED> [(match_operand:<VPRED> 1) (const_int SVE_KNOWN_PTRUE) - (match_operand:SVE_FULL_F 2 "register_operand" "w") - (match_operand:SVE_FULL_F 3 "register_operand" "w")] + (match_operand:SVE_F 2 "register_operand" "w") + (match_operand:SVE_F 3 "register_operand" "w")] UNSPEC_COND_FCMUO)) (match_operand:<VPRED> 4 "register_operand" "Upa")) (match_dup:<VPRED> 1))) @@ -8696,7 +9261,6 @@ } ) -;; Same for unordered comparisons. (define_insn_and_split "*fcmuo<mode>_nor_combine" [(set (match_operand:<VPRED> 0 "register_operand" "=Upa") (and:<VPRED> @@ -8705,8 +9269,8 @@ (unspec:<VPRED> [(match_operand:<VPRED> 1) (const_int SVE_KNOWN_PTRUE) - (match_operand:SVE_FULL_F 2 "register_operand" "w") - (match_operand:SVE_FULL_F 3 "register_operand" "w")] + (match_operand:SVE_F 2 "register_operand" "w") + (match_operand:SVE_F 3 "register_operand" "w")] UNSPEC_COND_FCMUO)) (not:<VPRED> (match_operand:<VPRED> 4 "register_operand" "Upa"))) @@ -8747,23 +9311,30 @@ ;; ------------------------------------------------------------------------- ;; Predicated floating-point absolute comparisons. -(define_expand "@aarch64_pred_fac<cmp_op><mode>" - [(set (match_operand:<VPRED> 0 "register_operand") - (unspec:<VPRED> - [(match_operand:<VPRED> 1 "register_operand") - (match_operand:SI 2 "aarch64_sve_ptrue_flag") - (unspec:SVE_FULL_F - [(match_dup 1) - (match_dup 2) - (match_operand:SVE_FULL_F 3 "register_operand")] - UNSPEC_COND_FABS) - (unspec:SVE_FULL_F - [(match_dup 1) - (match_dup 2) - (match_operand:SVE_FULL_F 4 "register_operand")] - UNSPEC_COND_FABS)] - SVE_COND_FP_ABS_CMP))] +(define_expand "@aarch64_pred_fac<cmp_op><mode>_acle" + [(set (match_operand:VNx16BI 0 "register_operand") + (and:VNx16BI + (subreg:VNx16BI + (unspec:<VPRED> + [(match_operand:<VPRED> 1 "register_operand") + (match_operand:SI 2 "aarch64_sve_ptrue_flag") + (unspec:SVE_FULL_F + [(match_dup 1) + (match_dup 2) + (match_operand:SVE_FULL_F 3 "register_operand")] + UNSPEC_COND_FABS) + (unspec:SVE_FULL_F + [(match_dup 1) + (match_dup 2) + (match_operand:SVE_FULL_F 4 "register_operand")] + UNSPEC_COND_FABS)] + SVE_COND_FP_ABS_CMP) + 0) + (match_dup 5)))] "TARGET_SVE" + { + operands[5] = aarch64_ptrue_all (GET_MODE_UNIT_SIZE (<MODE>mode)); + } ) (define_insn_and_rewrite "*aarch64_pred_fac<cmp_op><mode>_relaxed" @@ -8812,6 +9383,30 @@ "fac<cmp_op>\t%0.<Vetype>, %1/z, %2.<Vetype>, %3.<Vetype>" ) +(define_insn "*aarch64_pred_fac<cmp_op><mode>_strict_acle" + [(set (match_operand:VNx16BI 0 "register_operand" "=Upa") + (and:VNx16BI + (subreg:VNx16BI + (unspec:<VPRED> + [(match_operand:<VPRED> 1 "register_operand" "Upl") + (match_operand:SI 4 "aarch64_sve_ptrue_flag") + (unspec:SVE_FULL_F + [(match_dup 1) + (match_operand:SI 5 "aarch64_sve_gp_strictness") + (match_operand:SVE_FULL_F 2 "register_operand" "w")] + UNSPEC_COND_FABS) + (unspec:SVE_FULL_F + [(match_dup 1) + (match_operand:SI 6 "aarch64_sve_gp_strictness") + (match_operand:SVE_FULL_F 3 "register_operand" "w")] + UNSPEC_COND_FABS)] + SVE_COND_FP_ABS_CMP) + 0) + (match_operand:<VPRED> 7 "aarch64_ptrue_all_operand")))] + "TARGET_SVE" + "fac<cmp_op>\t%0.<Vetype>, %1/z, %2.<Vetype>, %3.<Vetype>" +) + ;; ------------------------------------------------------------------------- ;; ---- [PRED] Select ;; ------------------------------------------------------------------------- @@ -9260,7 +9855,30 @@ (unspec:PRED_ALL [(match_operand:PRED_ALL 1 "register_operand" "Upa")] UNSPEC_REV))] "TARGET_SVE" - "rev\t%0.<Vetype>, %1.<Vetype>") + "rev\t%0.<Vetype>, %1.<Vetype>" +) + +(define_expand "@aarch64_sve_rev<mode>_acle" + [(set (match_operand:VNx16BI 0 "register_operand") + (unspec:VNx16BI + [(match_operand:VNx16BI 1 "register_operand") + (match_dup:PRED_ALL 2)] + UNSPEC_REV_PRED))] + "TARGET_SVE" + { + operands[2] = CONST0_RTX (<MODE>mode); + } +) + +(define_insn "*aarch64_sve_rev<mode>_acle" + [(set (match_operand:VNx16BI 0 "register_operand" "=Upa") + (unspec:VNx16BI + [(match_operand:VNx16BI 1 "register_operand" "Upa") + (match_operand:PRED_ALL 2 "aarch64_simd_imm_zero")] + UNSPEC_REV_PRED))] + "TARGET_SVE" + "rev\t%0.<Vetype>, %1.<Vetype>" +) ;; ------------------------------------------------------------------------- ;; ---- [PRED] Special-purpose binary permutes @@ -9285,18 +9903,39 @@ "<perm_insn>\t%0.<Vetype>, %1.<Vetype>, %2.<Vetype>" ) -;; Special purpose permute used by the predicate generation instructions. -;; Unlike the normal permute patterns, these instructions operate on VNx16BI -;; regardless of the element size, so that all input and output bits are -;; well-defined. Operand 3 then indicates the size of the permute. -(define_insn "@aarch64_sve_trn1_conv<mode>" +;; Special-purpose permutes used by the ACLE intrinsics and predicate +;; generation instructions. Unlike the normal permute patterns, these +;; instructions operate on VNx16BI regardless of the element size, so that +;; all input and output bits are well-defined. Operand 3 then indicates +;; the size of the permute. +;; +;; To make generation easier, this pattern embeds the permute type as the +;; fourth operand to the unspec. On the one hand, this avoids overloading +;; unspecs like UNSPEC_ZIP1 to represent two different operations. On the +;; other hand, it avoids having a separate unspec for each variant, and +;; having to map from one kind of unspec to the other. +(define_expand "@aarch64_sve_<perm_insn><mode>_acle" + [(set (match_operand:VNx16BI 0 "register_operand") + (unspec:VNx16BI [(match_operand:VNx16BI 1 "register_operand") + (match_operand:VNx16BI 2 "register_operand") + (match_dup:PRED_ALL 3) + (const_int PERMUTE)] + UNSPEC_PERMUTE_PRED))] + "TARGET_SVE" + { + operands[3] = CONST0_RTX (<MODE>mode); + } +) + +(define_insn "*aarch64_sve_<perm_insn><mode>_acle" [(set (match_operand:VNx16BI 0 "register_operand" "=Upa") (unspec:VNx16BI [(match_operand:VNx16BI 1 "register_operand" "Upa") (match_operand:VNx16BI 2 "register_operand" "Upa") - (match_operand:PRED_ALL 3 "aarch64_simd_imm_zero")] - UNSPEC_TRN1_CONV))] + (match_operand:PRED_ALL 3 "aarch64_simd_imm_zero") + (const_int PERMUTE)] + UNSPEC_PERMUTE_PRED))] "TARGET_SVE" - "trn1\t%0.<PRED_ALL:Vetype>, %1.<PRED_ALL:Vetype>, %2.<PRED_ALL:Vetype>" + "<perm_insn>\t%0.<PRED_ALL:Vetype>, %1.<PRED_ALL:Vetype>, %2.<PRED_ALL:Vetype>" ) ;; ========================================================================= @@ -9379,18 +10018,37 @@ ;; - FCVTZU ;; ------------------------------------------------------------------------- -;; Unpredicated conversion of floats to integers of the same size (HF to HI, -;; SF to SI or DF to DI). -(define_expand "<optab><mode><v_int_equiv>2" - [(set (match_operand:<V_INT_EQUIV> 0 "register_operand") - (unspec:<V_INT_EQUIV> +;; Unpredicated conversion of floats to integers of the same size or wider, +;; excluding conversions from DF (see below). +(define_expand "<optab><SVE_HSF:mode><SVE_HSDI:mode>2" + [(set (match_operand:SVE_HSDI 0 "register_operand") + (unspec:SVE_HSDI + [(match_dup 2) + (match_dup 3) + (match_operand:SVE_HSF 1 "register_operand")] + SVE_COND_FCVTI))] + "TARGET_SVE + && (~(<SVE_HSDI:self_mask> | <SVE_HSDI:narrower_mask>) & <SVE_HSF:self_mask>) == 0" + { + operands[2] = aarch64_sve_fp_pred (<SVE_HSDI:MODE>mode, &operands[3]); + } +) + +;; SI <- DF can't use SI <- trunc (DI <- DF) without -ffast-math, so this +;; truncating variant of FCVTZ{S,U} is useful for auto-vectorization. +;; +;; DF is the only source mode for which the mask used above doesn't apply, +;; we define a separate pattern for it here. +(define_expand "<optab><VNx2DF_ONLY:mode><SVE_2SDI:mode>2" + [(set (match_operand:SVE_2SDI 0 "register_operand") + (unspec:SVE_2SDI [(match_dup 2) (const_int SVE_RELAXED_GP) - (match_operand:SVE_FULL_F 1 "register_operand")] + (match_operand:VNx2DF_ONLY 1 "register_operand")] SVE_COND_FCVTI))] "TARGET_SVE" { - operands[2] = aarch64_ptrue_reg (<VPRED>mode); + operands[2] = aarch64_ptrue_reg (VNx2BImode); } ) @@ -9409,18 +10067,37 @@ } ) -;; Predicated narrowing float-to-integer conversion. -(define_insn "@aarch64_sve_<optab>_trunc<VNx2DF_ONLY:mode><VNx4SI_ONLY:mode>" - [(set (match_operand:VNx4SI_ONLY 0 "register_operand") - (unspec:VNx4SI_ONLY +;; As above, for pairs used by the auto-vectorizer only. +(define_insn "*aarch64_sve_<optab>_nontrunc<SVE_PARTIAL_F:mode><SVE_HSDI:mode>" + [(set (match_operand:SVE_HSDI 0 "register_operand") + (unspec:SVE_HSDI + [(match_operand:<SVE_HSDI:VPRED> 1 "aarch64_predicate_operand") + (match_operand:SI 3 "aarch64_sve_gp_strictness") + (match_operand:SVE_PARTIAL_F 2 "register_operand")] + SVE_COND_FCVTI))] + "TARGET_SVE + && (~(<SVE_HSDI:self_mask> | <SVE_HSDI:narrower_mask>) & <SVE_PARTIAL_F:self_mask>) == 0" + {@ [ cons: =0 , 1 , 2 ; attrs: movprfx ] + [ w , Upl , 0 ; * ] fcvtz<su>\t%0.<SVE_HSDI:Vetype>, %1/m, %2.<SVE_PARTIAL_F:Vetype> + [ ?&w , Upl , w ; yes ] movprfx\t%0, %2\;fcvtz<su>\t%0.<SVE_HSDI:Vetype>, %1/m, %2.<SVE_PARTIAL_F:Vetype> + } +) + +;; Predicated narrowing float-to-integer conversion. The VNx2DF->VNx4SI +;; variant is provided for the ACLE, where the zeroed odd-indexed lanes are +;; significant. The VNx2DF->VNx2SI variant is provided for auto-vectorization, +;; where the upper 32 bits of each container are ignored. +(define_insn "@aarch64_sve_<optab>_trunc<VNx2DF_ONLY:mode><SVE_SI:mode>" + [(set (match_operand:SVE_SI 0 "register_operand") + (unspec:SVE_SI [(match_operand:VNx2BI 1 "register_operand") (match_operand:SI 3 "aarch64_sve_gp_strictness") (match_operand:VNx2DF_ONLY 2 "register_operand")] SVE_COND_FCVTI))] "TARGET_SVE" {@ [ cons: =0 , 1 , 2 ; attrs: movprfx ] - [ w , Upl , 0 ; * ] fcvtz<su>\t%0.<VNx4SI_ONLY:Vetype>, %1/m, %2.<VNx2DF_ONLY:Vetype> - [ ?&w , Upl , w ; yes ] movprfx\t%0, %2\;fcvtz<su>\t%0.<VNx4SI_ONLY:Vetype>, %1/m, %2.<VNx2DF_ONLY:Vetype> + [ w , Upl , 0 ; * ] fcvtz<su>\t%0.<SVE_SI:Vetype>, %1/m, %2.<VNx2DF_ONLY:Vetype> + [ ?&w , Upl , w ; yes ] movprfx\t%0, %2\;fcvtz<su>\t%0.<SVE_SI:Vetype>, %1/m, %2.<VNx2DF_ONLY:Vetype> } ) @@ -9468,6 +10145,31 @@ } ) +;; As above, for pairs that are used by the auto-vectorizer only. +(define_insn_and_rewrite "*cond_<optab>_nontrunc<SVE_PARTIAL_F:mode><SVE_HSDI:mode>_relaxed" + [(set (match_operand:SVE_HSDI 0 "register_operand") + (unspec:SVE_HSDI + [(match_operand:<SVE_HSDI:VPRED> 1 "register_operand") + (unspec:SVE_HSDI + [(match_operand 4) + (const_int SVE_RELAXED_GP) + (match_operand:SVE_PARTIAL_F 2 "register_operand")] + SVE_COND_FCVTI) + (match_operand:SVE_HSDI 3 "aarch64_simd_reg_or_zero")] + UNSPEC_SEL))] + "TARGET_SVE + && (~(<SVE_HSDI:self_mask> | <SVE_HSDI:narrower_mask>) & <SVE_PARTIAL_F:self_mask>) == 0" + {@ [ cons: =0 , 1 , 2 , 3 ; attrs: movprfx ] + [ &w , Upl , w , 0 ; * ] fcvtz<su>\t%0.<SVE_HSDI:Vetype>, %1/m, %2.<SVE_PARTIAL_F:Vetype> + [ &w , Upl , w , Dz ; yes ] movprfx\t%0.<SVE_HSDI:Vetype>, %1/z, %2.<SVE_HSDI:Vetype>\;fcvtz<su>\t%0.<SVE_HSDI:Vetype>, %1/m, %2.<SVE_PARTIAL_F:Vetype> + [ ?&w , Upl , w , w ; yes ] movprfx\t%0, %3\;fcvtz<su>\t%0.<SVE_HSDI:Vetype>, %1/m, %2.<SVE_PARTIAL_F:Vetype> + } + "&& !rtx_equal_p (operands[1], operands[4])" + { + operands[4] = copy_rtx (operands[1]); + } +) + (define_insn "*cond_<optab>_nontrunc<SVE_FULL_F:mode><SVE_FULL_HSDI:mode>_strict" [(set (match_operand:SVE_FULL_HSDI 0 "register_operand") (unspec:SVE_FULL_HSDI @@ -9521,6 +10223,29 @@ } ) +(define_insn_and_rewrite "*cond_<optab>_trunc<VNx2DF_ONLY:mode><VNx2SI_ONLY:mode>_relaxed" + [(set (match_operand:VNx2SI_ONLY 0 "register_operand") + (unspec:VNx2SI_ONLY + [(match_operand:VNx2BI 1 "register_operand") + (unspec:VNx2SI_ONLY + [(match_operand 4) + (const_int SVE_RELAXED_GP) + (match_operand:VNx2DF_ONLY 2 "register_operand")] + SVE_COND_FCVTI) + (match_operand:VNx2SI_ONLY 3 "aarch64_simd_reg_or_zero")] + UNSPEC_SEL))] + "TARGET_SVE" + {@ [ cons: =0 , 1 , 2 , 3 ; attrs: movprfx ] + [ &w , Upl , w , 0 ; * ] fcvtz<su>\t%0.<VNx2SI_ONLY:Vetype>, %1/m, %2.<VNx2DF_ONLY:Vetype> + [ &w , Upl , w , Dz ; yes ] movprfx\t%0.<VNx2DF_ONLY:Vetype>, %1/z, %2.<VNx2DF_ONLY:Vetype>\;fcvtz<su>\t%0.<VNx2SI_ONLY:Vetype>, %1/m, %2.<VNx2DF_ONLY:Vetype> + [ ?&w , Upl , w , w ; yes ] movprfx\t%0, %3\;fcvtz<su>\t%0.<VNx2SI_ONLY:Vetype>, %1/m, %2.<VNx2DF_ONLY:Vetype> + } + "&& !rtx_equal_p (operands[1], operands[4])" + { + operands[4] = copy_rtx (operands[1]); + } +) + ;; ------------------------------------------------------------------------- ;; ---- [INT<-FP] Packs ;; ------------------------------------------------------------------------- @@ -9565,18 +10290,19 @@ ;; - UCVTF ;; ------------------------------------------------------------------------- -;; Unpredicated conversion of integers to floats of the same size -;; (HI to HF, SI to SF or DI to DF). -(define_expand "<optab><v_int_equiv><mode>2" - [(set (match_operand:SVE_FULL_F 0 "register_operand") - (unspec:SVE_FULL_F +;; Unpredicated conversion of integers to floats of the same size or +;; narrower. +(define_expand "<optab><SVE_HSDI:mode><SVE_F:mode>2" + [(set (match_operand:SVE_F 0 "register_operand") + (unspec:SVE_F [(match_dup 2) - (const_int SVE_RELAXED_GP) - (match_operand:<V_INT_EQUIV> 1 "register_operand")] + (match_dup 3) + (match_operand:SVE_HSDI 1 "register_operand")] SVE_COND_ICVTF))] - "TARGET_SVE" + "TARGET_SVE + && (~(<SVE_HSDI:self_mask> | <SVE_HSDI:narrower_mask>) & <SVE_F:self_mask>) == 0" { - operands[2] = aarch64_ptrue_reg (<VPRED>mode); + operands[2] = aarch64_sve_fp_pred (<SVE_HSDI:MODE>mode, &operands[3]); } ) @@ -9596,6 +10322,22 @@ } ) +;; As above, for pairs that are used by the auto-vectorizer only. +(define_insn "*aarch64_sve_<optab>_nonextend<SVE_HSDI:mode><SVE_PARTIAL_F:mode>" + [(set (match_operand:SVE_PARTIAL_F 0 "register_operand") + (unspec:SVE_PARTIAL_F + [(match_operand:<SVE_HSDI:VPRED> 1 "aarch64_predicate_operand") + (match_operand:SI 3 "aarch64_sve_gp_strictness") + (match_operand:SVE_HSDI 2 "register_operand")] + SVE_COND_ICVTF))] + "TARGET_SVE + && (~(<SVE_HSDI:self_mask> | <SVE_HSDI:narrower_mask>) & <SVE_PARTIAL_F:self_mask>) == 0" + {@ [ cons: =0 , 1 , 2 ; attrs: movprfx ] + [ w , Upl , 0 ; * ] <su>cvtf\t%0.<SVE_PARTIAL_F:Vetype>, %1/m, %2.<SVE_HSDI:Vetype> + [ ?&w , Upl , w ; yes ] movprfx\t%0, %2\;<su>cvtf\t%0.<SVE_PARTIAL_F:Vetype>, %1/m, %2.<SVE_HSDI:Vetype> + } +) + ;; Predicated widening integer-to-float conversion. (define_insn "@aarch64_sve_<optab>_extend<VNx4SI_ONLY:mode><VNx2DF_ONLY:mode>" [(set (match_operand:VNx2DF_ONLY 0 "register_operand") @@ -9655,6 +10397,31 @@ } ) +;; As above, for pairs that are used by the auto-vectorizer only. +(define_insn_and_rewrite "*cond_<optab>_nonextend<SVE_HSDI:mode><SVE_PARTIAL_F:mode>_relaxed" + [(set (match_operand:SVE_PARTIAL_F 0 "register_operand") + (unspec:SVE_PARTIAL_F + [(match_operand:<SVE_HSDI:VPRED> 1 "register_operand") + (unspec:SVE_PARTIAL_F + [(match_operand 4) + (const_int SVE_RELAXED_GP) + (match_operand:SVE_HSDI 2 "register_operand")] + SVE_COND_ICVTF) + (match_operand:SVE_PARTIAL_F 3 "aarch64_simd_reg_or_zero")] + UNSPEC_SEL))] + "TARGET_SVE + && (~(<SVE_HSDI:self_mask> | <SVE_HSDI:narrower_mask>) & <SVE_PARTIAL_F:self_mask>) == 0" + {@ [ cons: =0 , 1 , 2 , 3 ; attrs: movprfx ] + [ &w , Upl , w , 0 ; * ] <su>cvtf\t%0.<SVE_PARTIAL_F:Vetype>, %1/m, %2.<SVE_HSDI:Vetype> + [ &w , Upl , w , Dz ; yes ] movprfx\t%0.<SVE_HSDI:Vetype>, %1/z, %2.<SVE_HSDI:Vetype>\;<su>cvtf\t%0.<SVE_PARTIAL_F:Vetype>, %1/m, %2.<SVE_HSDI:Vetype> + [ ?&w , Upl , w , w ; yes ] movprfx\t%0, %3\;<su>cvtf\t%0.<SVE_PARTIAL_F:Vetype>, %1/m, %2.<SVE_HSDI:Vetype> + } + "&& !rtx_equal_p (operands[1], operands[4])" + { + operands[4] = copy_rtx (operands[1]); + } +) + (define_insn "*cond_<optab>_nonextend<SVE_FULL_HSDI:mode><SVE_FULL_F:mode>_strict" [(set (match_operand:SVE_FULL_F 0 "register_operand") (unspec:SVE_FULL_F @@ -9779,6 +10546,27 @@ } ) +;; ------------------------------------------------------------------------- +;; ---- [FP<-FP] Truncating conversions +;; ------------------------------------------------------------------------- +;; Includes: +;; - FCVT +;; ------------------------------------------------------------------------- + +;; Unpredicated float-to-float truncation. +(define_expand "trunc<SVE_SDF:mode><SVE_PARTIAL_HSF:mode>2" + [(set (match_operand:SVE_PARTIAL_HSF 0 "register_operand") + (unspec:SVE_PARTIAL_HSF + [(match_dup 2) + (match_dup 3) + (match_operand:SVE_SDF 1 "register_operand")] + SVE_COND_FCVT))] + "TARGET_SVE && (~<SVE_SDF:narrower_mask> & <SVE_PARTIAL_HSF:self_mask>) == 0" + { + operands[2] = aarch64_sve_fp_pred (<SVE_SDF:MODE>mode, &operands[3]); + } +) + ;; Predicated float-to-float truncation. (define_insn "@aarch64_sve_<optab>_trunc<SVE_FULL_SDF:mode><SVE_FULL_HSF:mode>" [(set (match_operand:SVE_FULL_HSF 0 "register_operand") @@ -9794,6 +10582,21 @@ } ) +;; As above, for pairs that are used by the auto-vectorizer only. +(define_insn "*aarch64_sve_<optab>_trunc<SVE_SDF:mode><SVE_PARTIAL_HSF:mode>" + [(set (match_operand:SVE_PARTIAL_HSF 0 "register_operand") + (unspec:SVE_PARTIAL_HSF + [(match_operand:<SVE_SDF:VPRED> 1 "aarch64_predicate_operand") + (match_operand:SI 3 "aarch64_sve_gp_strictness") + (match_operand:SVE_SDF 2 "register_operand")] + SVE_COND_FCVT))] + "TARGET_SVE && (~<SVE_SDF:narrower_mask> & <SVE_PARTIAL_HSF:self_mask>) == 0" + {@ [ cons: =0 , 1 , 2 ; attrs: movprfx ] + [ w , Upl , 0 ; * ] fcvt\t%0.<SVE_PARTIAL_HSF:Vetype>, %1/m, %2.<SVE_SDF:Vetype> + [ ?&w , Upl , w ; yes ] movprfx\t%0, %2\;fcvt\t%0.<SVE_PARTIAL_HSF:Vetype>, %1/m, %2.<SVE_SDF:Vetype> + } +) + ;; Predicated float-to-float truncation with merging. (define_expand "@cond_<optab>_trunc<SVE_FULL_SDF:mode><SVE_FULL_HSF:mode>" [(set (match_operand:SVE_FULL_HSF 0 "register_operand") @@ -9828,6 +10631,30 @@ } ) +;; As above, for pairs that are used by the auto-vectorizer only. +(define_insn_and_rewrite "*cond_<optab>_trunc<SVE_SDF:mode><SVE_PARTIAL_HSF:mode>" + [(set (match_operand:SVE_PARTIAL_HSF 0 "register_operand") + (unspec:SVE_PARTIAL_HSF + [(match_operand:<SVE_SDF:VPRED> 1 "register_operand") + (unspec:SVE_PARTIAL_HSF + [(match_operand 4) + (const_int SVE_RELAXED_GP) + (match_operand:SVE_SDF 2 "register_operand")] + SVE_COND_FCVT) + (match_operand:SVE_PARTIAL_HSF 3 "aarch64_simd_reg_or_zero")] + UNSPEC_SEL))] + "TARGET_SVE && (~<SVE_SDF:narrower_mask> & <SVE_PARTIAL_HSF:self_mask>) == 0" + {@ [ cons: =0 , 1 , 2 , 3 ; attrs: movprfx ] + [ w , Upl , w , 0 ; * ] fcvt\t%0.<SVE_PARTIAL_HSF:Vetype>, %1/m, %2.<SVE_SDF:Vetype> + [ ?&w , Upl , w , Dz ; yes ] movprfx\t%0.<SVE_SDF:Vetype>, %1/z, %2.<SVE_SDF:Vetype>\;fcvt\t%0.<SVE_PARTIAL_HSF:Vetype>, %1/m, %2.<SVE_SDF:Vetype> + [ ?&w , Upl , w , w ; yes ] movprfx\t%0, %3\;fcvt\t%0.<SVE_PARTIAL_HSF:Vetype>, %1/m, %2.<SVE_SDF:Vetype> + } + "&& !rtx_equal_p (operands[1], operands[4])" + { + operands[4] = copy_rtx (operands[1]); + } +) + ;; ------------------------------------------------------------------------- ;; ---- [FP<-FP] Packs (bfloat16) ;; ------------------------------------------------------------------------- @@ -9936,6 +10763,27 @@ } ) +;; ------------------------------------------------------------------------- +;; ---- [FP<-FP] Extending conversions +;; ------------------------------------------------------------------------- +;; Includes: +;; - FCVT +;; ------------------------------------------------------------------------- + +;; Unpredicated float-to-float extension. +(define_expand "extend<SVE_PARTIAL_HSF:mode><SVE_SDF:mode>2" + [(set (match_operand:SVE_SDF 0 "register_operand") + (unspec:SVE_SDF + [(match_dup 2) + (match_dup 3) + (match_operand:SVE_PARTIAL_HSF 1 "register_operand")] + SVE_COND_FCVT))] + "TARGET_SVE && (~<SVE_SDF:narrower_mask> & <SVE_PARTIAL_HSF:self_mask>) == 0" + { + operands[2] = aarch64_sve_fp_pred (<SVE_SDF:MODE>mode, &operands[3]); + } +) + ;; Predicated float-to-float extension. (define_insn "@aarch64_sve_<optab>_nontrunc<SVE_FULL_HSF:mode><SVE_FULL_SDF:mode>" [(set (match_operand:SVE_FULL_SDF 0 "register_operand") @@ -9951,6 +10799,21 @@ } ) +;; As above, for pairs that are used by the auto-vectorizer only. +(define_insn "*aarch64_sve_<optab>_nontrunc<SVE_PARTIAL_HSF:mode><SVE_SDF:mode>" + [(set (match_operand:SVE_SDF 0 "register_operand") + (unspec:SVE_SDF + [(match_operand:<SVE_SDF:VPRED> 1 "aarch64_predicate_operand") + (match_operand:SI 3 "aarch64_sve_gp_strictness") + (match_operand:SVE_PARTIAL_HSF 2 "register_operand")] + SVE_COND_FCVT))] + "TARGET_SVE && (~<SVE_SDF:narrower_mask> & <SVE_PARTIAL_HSF:self_mask>) == 0" + {@ [ cons: =0 , 1 , 2 ; attrs: movprfx ] + [ w , Upl , 0 ; * ] fcvt\t%0.<SVE_SDF:Vetype>, %1/m, %2.<SVE_PARTIAL_HSF:Vetype> + [ ?&w , Upl , w ; yes ] movprfx\t%0, %2\;fcvt\t%0.<SVE_SDF:Vetype>, %1/m, %2.<SVE_PARTIAL_HSF:Vetype> + } +) + ;; Predicated float-to-float extension with merging. (define_expand "@cond_<optab>_nontrunc<SVE_FULL_HSF:mode><SVE_FULL_SDF:mode>" [(set (match_operand:SVE_FULL_SDF 0 "register_operand") @@ -9985,6 +10848,30 @@ } ) +;; As above, for pairs that are used by the auto-vectorizer only. +(define_insn_and_rewrite "*cond_<optab>_nontrunc<SVE_PARTIAL_HSF:mode><SVE_SDF:mode>_relaxed" + [(set (match_operand:SVE_SDF 0 "register_operand") + (unspec:SVE_SDF + [(match_operand:<SVE_SDF:VPRED> 1 "register_operand") + (unspec:SVE_SDF + [(match_operand 4) + (const_int SVE_RELAXED_GP) + (match_operand:SVE_PARTIAL_HSF 2 "register_operand")] + SVE_COND_FCVT) + (match_operand:SVE_SDF 3 "aarch64_simd_reg_or_zero")] + UNSPEC_SEL))] + "TARGET_SVE && (~<SVE_SDF:narrower_mask> & <SVE_PARTIAL_HSF:self_mask>) == 0" + {@ [ cons: =0 , 1 , 2 , 3 ; attrs: movprfx ] + [ w , Upl , w , 0 ; * ] fcvt\t%0.<SVE_SDF:Vetype>, %1/m, %2.<SVE_PARTIAL_HSF:Vetype> + [ ?&w , Upl , w , Dz ; yes ] movprfx\t%0.<SVE_SDF:Vetype>, %1/z, %2.<SVE_SDF:Vetype>\;fcvt\t%0.<SVE_SDF:Vetype>, %1/m, %2.<SVE_PARTIAL_HSF:Vetype> + [ ?&w , Upl , w , w ; yes ] movprfx\t%0, %3\;fcvt\t%0.<SVE_SDF:Vetype>, %1/m, %2.<SVE_PARTIAL_HSF:Vetype> + } + "&& !rtx_equal_p (operands[1], operands[4])" + { + operands[4] = copy_rtx (operands[1]); + } +) + ;; ------------------------------------------------------------------------- ;; ---- [PRED<-PRED] Packs ;; ------------------------------------------------------------------------- @@ -10037,6 +10924,34 @@ "punpk<perm_hilo>\t%0.h, %1.b" ) +(define_expand "@aarch64_sve_punpk<perm_hilo>_acle" + [(set (match_operand:VNx16BI 0 "register_operand") + (and:VNx16BI + (subreg:VNx16BI + (unspec:VNx8BI + [(match_operand:VNx16BI 1 "register_operand")] + UNPACK_UNSIGNED) + 0) + (match_dup 2)))] + "TARGET_SVE" + { + operands[2] = aarch64_ptrue_all (2); + } +) + +(define_insn "*aarch64_sve_punpk<perm_hilo>_acle" + [(set (match_operand:VNx16BI 0 "register_operand" "=Upa") + (and:VNx16BI + (subreg:VNx16BI + (unspec:VNx8BI + [(match_operand:VNx16BI 1 "register_operand" "Upa")] + UNPACK_UNSIGNED) + 0) + (match_operand:VNx8BI 2 "aarch64_ptrue_all_operand")))] + "TARGET_SVE" + "punpk<perm_hilo>\t%0.h, %1.b" +) + ;; ========================================================================= ;; == Vector partitioning ;; ========================================================================= @@ -10261,14 +11176,49 @@ ;; ------------------------------------------------------------------------- (define_insn "@aarch64_sve_<sve_pred_op><mode>" - [(set (match_operand:PRED_ALL 0 "register_operand" "=Upa") - (unspec:PRED_ALL - [(match_operand:PRED_ALL 1 "register_operand" "Upa") + [(set (match_operand:VNx16BI_ONLY 0 "register_operand" "=Upa") + (unspec:VNx16BI_ONLY + [(match_operand:VNx16BI_ONLY 1 "register_operand" "Upa") (match_operand:SI 2 "aarch64_sve_ptrue_flag") - (match_operand:PRED_ALL 3 "register_operand" "0")] + (match_operand:VNx16BI_ONLY 3 "register_operand" "0")] SVE_PITER)) (clobber (reg:CC_NZC CC_REGNUM))] - "TARGET_SVE && <max_elem_bits> >= <elem_bits>" + "TARGET_SVE" + "<sve_pred_op>\t%0.<Vetype>, %1, %0.<Vetype>" +) + +(define_expand "@aarch64_sve_<sve_pred_op><mode>" + [(parallel + [(set (match_operand:VNx16BI 0 "register_operand") + (and:VNx16BI + (subreg:VNx16BI + (unspec:PRED_HSD + [(match_operand:PRED_HSD 1 "register_operand") + (match_operand:SI 2 "aarch64_sve_ptrue_flag") + (match_operand:PRED_HSD 3 "register_operand")] + PNEXT_ONLY) + 0) + (match_dup 4))) + (clobber (reg:CC_NZC CC_REGNUM))])] + "TARGET_SVE" + { + operands[4] = aarch64_ptrue_all (<data_bytes>); + } +) + +(define_insn "*aarch64_sve_<sve_pred_op><mode>" + [(set (match_operand:VNx16BI 0 "register_operand" "=Upa") + (and:VNx16BI + (subreg:VNx16BI + (unspec:PRED_HSD + [(match_operand:PRED_HSD 1 "register_operand" "Upa") + (match_operand:SI 2 "aarch64_sve_ptrue_flag") + (match_operand:PRED_HSD 3 "register_operand" "0")] + PNEXT_ONLY) + 0) + (match_operand:PRED_HSD 4 "aarch64_ptrue_all_operand"))) + (clobber (reg:CC_NZC CC_REGNUM))] + "TARGET_SVE" "<sve_pred_op>\t%0.<Vetype>, %1, %0.<Vetype>" ) @@ -10302,6 +11252,38 @@ } ) +(define_insn_and_rewrite "*aarch64_sve_<sve_pred_op><mode>_cc" + [(set (reg:CC_NZC CC_REGNUM) + (unspec:CC_NZC + [(match_operand:VNx16BI 1 "register_operand" "Upa") + (match_operand 2) + (match_operand:SI 3 "aarch64_sve_ptrue_flag") + (unspec:PRED_HSD + [(match_operand 4) + (match_operand:SI 5 "aarch64_sve_ptrue_flag") + (match_operand:PRED_HSD 6 "register_operand" "0")] + PNEXT_ONLY)] + UNSPEC_PTEST)) + (set (match_operand:VNx16BI 0 "register_operand" "=Upa") + (and:VNx16BI + (subreg:VNx16BI + (unspec:PRED_HSD + [(match_dup 4) + (match_dup 5) + (match_dup 6)] + PNEXT_ONLY) + 0) + (match_operand:PRED_HSD 7 "aarch64_ptrue_all_operand")))] + "TARGET_SVE + && aarch64_sve_same_pred_for_ptest_p (&operands[2], &operands[4])" + "<sve_pred_op>\t%0.<Vetype>, %1, %0.<Vetype>" + "&& !rtx_equal_p (operands[2], operands[4])" + { + operands[4] = operands[2]; + operands[5] = operands[3]; + } +) + ;; Same, but with only the flags result being interesting. (define_insn_and_rewrite "*aarch64_sve_<sve_pred_op><mode>_ptest" [(set (reg:CC_NZC CC_REGNUM) @@ -11163,16 +12145,12 @@ (define_insn "@aarch64_sve_set_neonq_<mode>" [(set (match_operand:SVE_FULL 0 "register_operand" "=w") - (unspec:SVE_FULL - [(match_operand:SVE_FULL 1 "register_operand" "w") - (match_operand:<V128> 2 "register_operand" "w") - (match_operand:<VPRED> 3 "register_operand" "Upl")] - UNSPEC_SET_NEONQ))] + (unspec:SVE_FULL + [(match_operand:SVE_FULL 1 "register_operand" "w") + (match_operand:<V128> 2 "register_operand" "w") + (match_operand:<VPRED> 3 "register_operand" "Upl")] + UNSPEC_SET_NEONQ))] "TARGET_SVE && BYTES_BIG_ENDIAN" - { - operands[2] = lowpart_subreg (<MODE>mode, operands[2], - GET_MODE (operands[2])); - return "sel\t%0.<Vetype>, %3, %2.<Vetype>, %1.<Vetype>"; - } + "sel\t%0.<Vetype>, %3, %Z2.<Vetype>, %1.<Vetype>" ) |