diff options
Diffstat (limited to 'gcc/config/riscv')
58 files changed, 9163 insertions, 1497 deletions
diff --git a/gcc/config/riscv/autovec-opt.md b/gcc/config/riscv/autovec-opt.md index 0c3b0cc..f372f0e 100644 --- a/gcc/config/riscv/autovec-opt.md +++ b/gcc/config/riscv/autovec-opt.md @@ -1673,3 +1673,174 @@ DONE; } [(set_attr "type" "vandn")]) + + +;; ============================================================================= +;; Combine vec_duplicate + op.vv to op.vx +;; Include +;; - vadd.vx +;; ============================================================================= +(define_insn_and_split "*<optab>_vx_<mode>" + [(set (match_operand:V_VLSI 0 "register_operand") + (any_int_binop_no_shift_vdup_v:V_VLSI + (vec_duplicate:V_VLSI + (match_operand:<VEL> 1 "register_operand")) + (match_operand:V_VLSI 2 "<binop_rhs2_predicate>")))] + "TARGET_VECTOR && can_create_pseudo_p ()" + "#" + "&& 1" + [(const_int 0)] + { + riscv_vector::expand_vx_binary_vec_dup_vec (operands[0], operands[2], + operands[1], <CODE>, + <MODE>mode); + } + [(set_attr "type" "vialu")]) + +(define_insn_and_split "*<optab>_vx_<mode>" + [(set (match_operand:V_VLSI 0 "register_operand") + (any_int_binop_no_shift_v_vdup:V_VLSI + (match_operand:V_VLSI 1 "<binop_rhs2_predicate>") + (vec_duplicate:V_VLSI + (match_operand:<VEL> 2 "register_operand"))))] + "TARGET_VECTOR && can_create_pseudo_p ()" + "#" + "&& 1" + [(const_int 0)] + { + riscv_vector::expand_vx_binary_vec_vec_dup (operands[0], operands[1], + operands[2], <CODE>, + <MODE>mode); + } + [(set_attr "type" "vialu")]) + +;; ============================================================================= +;; Combine vec_duplicate + op.vv to op.vf +;; Include +;; - vfmadd.vf +;; - vfmsub.vf +;; - vfnmadd.vf +;; - vfnmsub.vf +;; - vfmacc.vf +;; - vfmsac.vf +;; - vfnmacc.vf +;; - vfnmsac.vf +;; - vfwmacc.vf +;; - vfwmsac.vf +;; ============================================================================= + +;; vfmadd.vf, vfmsub.vf, vfmacc.vf, vfmsac.vf +(define_insn_and_split "*<optab>_vf_<mode>" + [(set (match_operand:V_VLSF 0 "register_operand") + (plus_minus:V_VLSF + (mult:V_VLSF + (vec_duplicate:V_VLSF + (match_operand:<VEL> 1 "register_operand")) + (match_operand:V_VLSF 2 "register_operand")) + (match_operand:V_VLSF 3 "register_operand")))] + "TARGET_VECTOR && can_create_pseudo_p ()" + "#" + "&& 1" + [(const_int 0)] + { + rtx ops[] = {operands[0], operands[1], operands[2], operands[3], + RVV_VUNDEF(<MODE>mode)}; + riscv_vector::emit_vlmax_insn (code_for_pred_mul_scalar (<CODE>, <MODE>mode), + riscv_vector::TERNARY_OP_FRM_DYN, ops); + DONE; + } + [(set_attr "type" "vfmuladd")] +) + +;; vfnmsub.vf, vfnmsac.vf +(define_insn_and_split "*vfnmsub_<mode>" + [(set (match_operand:V_VLSF 0 "register_operand") + (minus:V_VLSF + (match_operand:V_VLSF 3 "register_operand") + (mult:V_VLSF + (vec_duplicate:V_VLSF + (match_operand:<VEL> 1 "register_operand")) + (match_operand:V_VLSF 2 "register_operand"))))] + "TARGET_VECTOR && can_create_pseudo_p ()" + "#" + "&& 1" + [(const_int 0)] + { + rtx ops[] = {operands[0], operands[1], operands[2], operands[3], + RVV_VUNDEF(<MODE>mode)}; + riscv_vector::emit_vlmax_insn (code_for_pred_mul_neg_scalar (PLUS, <MODE>mode), + riscv_vector::TERNARY_OP_FRM_DYN, ops); + DONE; + } + [(set_attr "type" "vfmuladd")] +) + +;; vfnmadd.vf, vfnmacc.vf +(define_insn_and_split "*vfnmadd_<mode>" + [(set (match_operand:V_VLSF 0 "register_operand") + (minus:V_VLSF + (mult:V_VLSF + (neg:V_VLSF + (match_operand:V_VLSF 2 "register_operand")) + (vec_duplicate:V_VLSF + (match_operand:<VEL> 1 "register_operand"))) + (match_operand:V_VLSF 3 "register_operand")))] + "TARGET_VECTOR && can_create_pseudo_p ()" + "#" + "&& 1" + [(const_int 0)] + { + rtx ops[] = {operands[0], operands[1], operands[2], operands[3], + RVV_VUNDEF(<MODE>mode)}; + riscv_vector::emit_vlmax_insn (code_for_pred_mul_neg_scalar (MINUS, <MODE>mode), + riscv_vector::TERNARY_OP_FRM_DYN, ops); + DONE; + } + [(set_attr "type" "vfmuladd")] +) + +;; vfwmacc.vf, vfwmsac.vf +(define_insn_and_split "*vfwmacc_vf_<mode>" + [(set (match_operand:VWEXTF 0 "register_operand") + (plus_minus:VWEXTF + (mult:VWEXTF + (float_extend:VWEXTF + (match_operand:<V_DOUBLE_TRUNC> 3 "register_operand")) + (vec_duplicate:VWEXTF + (float_extend:<VEL> + (match_operand:<VSUBEL> 2 "register_operand")))) + (match_operand:VWEXTF 1 "register_operand")))] + "TARGET_VECTOR && can_create_pseudo_p ()" + "#" + "&& 1" + [(const_int 0)] + { + rtx ops[] = {operands[0], operands[1], operands[2], operands[3]}; + riscv_vector::emit_vlmax_insn (code_for_pred_widen_mul_scalar (<CODE>, <MODE>mode), + riscv_vector::WIDEN_TERNARY_OP_FRM_DYN, ops); + DONE; + } + [(set_attr "type" "vfwmuladd")] +) + +;; Intermediate pattern for vfwmacc.vf and vfwmsac.vf used by combine +(define_insn_and_split "*extend_vf_<mode>" + [(set (match_operand:VWEXTF 0 "register_operand") + (vec_duplicate:VWEXTF + (float_extend:<VEL> + (match_operand:<VSUBEL> 1 "register_operand"))))] + "TARGET_VECTOR && can_create_pseudo_p ()" + "#" + "&& 1" + [(const_int 0)] + { + rtx tmp = gen_reg_rtx (<VEL>mode); + emit_insn (gen_extend<vsubel><vel>2(tmp, operands[1])); + + rtx ops[] = {operands[0], tmp}; + riscv_vector::emit_vlmax_insn (code_for_pred_broadcast (<MODE>mode), + riscv_vector::UNARY_OP, ops); + DONE; + } + [(set_attr "type" "vfwmuladd")] +) diff --git a/gcc/config/riscv/autovec.md b/gcc/config/riscv/autovec.md index 9e51e3c..94a61bd 100644 --- a/gcc/config/riscv/autovec.md +++ b/gcc/config/riscv/autovec.md @@ -1338,7 +1338,7 @@ (define_expand "select_vl<mode>" [(match_operand:P 0 "register_operand") (match_operand:P 1 "vector_length_operand") - (match_operand:P 2 "")] + (match_operand:P 2 "immediate_operand")] "TARGET_VECTOR" { riscv_vector::expand_select_vl (operands); @@ -2491,19 +2491,13 @@ (sign_extend:VWEXTI (match_operand:<V_DOUBLE_TRUNC> 2 "register_operand"))))))] "TARGET_VECTOR" -{ - /* First emit a widening addition. */ - rtx tmp1 = gen_reg_rtx (<MODE>mode); - rtx ops1[] = {tmp1, operands[1], operands[2]}; - insn_code icode = code_for_pred_dual_widen (PLUS, SIGN_EXTEND, <MODE>mode); - riscv_vector::emit_vlmax_insn (icode, riscv_vector::BINARY_OP, ops1); - - /* Then a narrowing shift. */ - rtx ops2[] = {operands[0], tmp1, const1_rtx}; - icode = code_for_pred_narrow_scalar (ASHIFTRT, <MODE>mode); - riscv_vector::emit_vlmax_insn (icode, riscv_vector::BINARY_OP, ops2); - DONE; -}) + { + insn_code icode = code_for_pred (UNSPEC_VAADD, <V_DOUBLE_TRUNC>mode); + riscv_vector::emit_vlmax_insn (icode, riscv_vector::BINARY_OP_VXRM_RDN, + operands); + DONE; + } +) (define_expand "avg<v_double_trunc>3_ceil" [(set (match_operand:<V_DOUBLE_TRUNC> 0 "register_operand") @@ -2517,25 +2511,13 @@ (match_operand:<V_DOUBLE_TRUNC> 2 "register_operand"))) (const_int 1)))))] "TARGET_VECTOR" -{ - /* First emit a widening addition. */ - rtx tmp1 = gen_reg_rtx (<MODE>mode); - rtx ops1[] = {tmp1, operands[1], operands[2]}; - insn_code icode = code_for_pred_dual_widen (PLUS, SIGN_EXTEND, <MODE>mode); - riscv_vector::emit_vlmax_insn (icode, riscv_vector::BINARY_OP, ops1); - - /* Then add 1. */ - rtx tmp2 = gen_reg_rtx (<MODE>mode); - rtx ops2[] = {tmp2, tmp1, const1_rtx}; - icode = code_for_pred_scalar (PLUS, <MODE>mode); - riscv_vector::emit_vlmax_insn (icode, riscv_vector::BINARY_OP, ops2); - - /* Finally, a narrowing shift. */ - rtx ops3[] = {operands[0], tmp2, const1_rtx}; - icode = code_for_pred_narrow_scalar (ASHIFTRT, <MODE>mode); - riscv_vector::emit_vlmax_insn (icode, riscv_vector::BINARY_OP, ops3); - DONE; -}) + { + insn_code icode = code_for_pred (UNSPEC_VAADD, <V_DOUBLE_TRUNC>mode); + riscv_vector::emit_vlmax_insn (icode, riscv_vector::BINARY_OP_VXRM_RNU, + operands); + DONE; + } +) ;; csrwi vxrm, 2 ;; vaaddu.vv vd, vs2, vs1 @@ -2546,7 +2528,8 @@ "TARGET_VECTOR" { insn_code icode = code_for_pred (UNSPEC_VAADDU, <MODE>mode); - riscv_vector::emit_vlmax_insn (icode, riscv_vector::BINARY_OP_VXRM_RDN, operands); + riscv_vector::emit_vlmax_insn (icode, riscv_vector::BINARY_OP_VXRM_RDN, + operands); DONE; }) @@ -2559,7 +2542,8 @@ "TARGET_VECTOR" { insn_code icode = code_for_pred (UNSPEC_VAADDU, <MODE>mode); - riscv_vector::emit_vlmax_insn (icode, riscv_vector::BINARY_OP_VXRM_RNU, operands); + riscv_vector::emit_vlmax_insn (icode, riscv_vector::BINARY_OP_VXRM_RNU, + operands); DONE; }) @@ -2584,7 +2568,8 @@ (match_operand:V_VLSF 1 "register_operand")] "TARGET_VECTOR && !flag_trapping_math && !flag_rounding_math" { - riscv_vector::expand_vec_ceil (operands[0], operands[1], <MODE>mode, <VCONVERT>mode); + riscv_vector::expand_vec_ceil (operands[0], operands[1], <MODE>mode, + <VCONVERT>mode); DONE; } ) @@ -2594,7 +2579,8 @@ (match_operand:V_VLSF 1 "register_operand")] "TARGET_VECTOR && !flag_trapping_math && !flag_rounding_math" { - riscv_vector::expand_vec_floor (operands[0], operands[1], <MODE>mode, <VCONVERT>mode); + riscv_vector::expand_vec_floor (operands[0], operands[1], <MODE>mode, + <VCONVERT>mode); DONE; } ) @@ -2604,7 +2590,8 @@ (match_operand:V_VLSF 1 "register_operand")] "TARGET_VECTOR && !flag_trapping_math && !flag_rounding_math" { - riscv_vector::expand_vec_nearbyint (operands[0], operands[1], <MODE>mode, <VCONVERT>mode); + riscv_vector::expand_vec_nearbyint (operands[0], operands[1], <MODE>mode, + <VCONVERT>mode); DONE; } ) @@ -2614,7 +2601,8 @@ (match_operand:V_VLSF 1 "register_operand")] "TARGET_VECTOR && !flag_trapping_math && !flag_rounding_math" { - riscv_vector::expand_vec_rint (operands[0], operands[1], <MODE>mode, <VCONVERT>mode); + riscv_vector::expand_vec_rint (operands[0], operands[1], <MODE>mode, + <VCONVERT>mode); DONE; } ) @@ -2624,7 +2612,8 @@ (match_operand:V_VLSF 1 "register_operand")] "TARGET_VECTOR && !flag_trapping_math && !flag_rounding_math" { - riscv_vector::expand_vec_round (operands[0], operands[1], <MODE>mode, <VCONVERT>mode); + riscv_vector::expand_vec_round (operands[0], operands[1], <MODE>mode, + <VCONVERT>mode); DONE; } ) @@ -2634,7 +2623,8 @@ (match_operand:V_VLSF 1 "register_operand")] "TARGET_VECTOR && !flag_trapping_math && !flag_rounding_math" { - riscv_vector::expand_vec_trunc (operands[0], operands[1], <MODE>mode, <VCONVERT>mode); + riscv_vector::expand_vec_trunc (operands[0], operands[1], <MODE>mode, + <VCONVERT>mode); DONE; } ) @@ -2644,7 +2634,8 @@ (match_operand:V_VLSF 1 "register_operand")] "TARGET_VECTOR && !flag_trapping_math && !flag_rounding_math" { - riscv_vector::expand_vec_roundeven (operands[0], operands[1], <MODE>mode, <VCONVERT>mode); + riscv_vector::expand_vec_roundeven (operands[0], operands[1], <MODE>mode, + <VCONVERT>mode); DONE; } ) @@ -2701,7 +2692,8 @@ (match_operand:V_VLS_F_CONVERT_SI 1 "register_operand")] "TARGET_VECTOR && !flag_trapping_math && !flag_rounding_math" { - riscv_vector::expand_vec_lceil (operands[0], operands[1], <MODE>mode, <V_F2SI_CONVERT>mode); + riscv_vector::expand_vec_lceil (operands[0], operands[1], <MODE>mode, + <V_F2SI_CONVERT>mode); DONE; } ) @@ -2711,7 +2703,8 @@ (match_operand:V_VLS_F_CONVERT_DI 1 "register_operand")] "TARGET_VECTOR && !flag_trapping_math && !flag_rounding_math" { - riscv_vector::expand_vec_lceil (operands[0], operands[1], <MODE>mode, <V_F2DI_CONVERT>mode); + riscv_vector::expand_vec_lceil (operands[0], operands[1], <MODE>mode, + <V_F2DI_CONVERT>mode); DONE; } ) @@ -2721,7 +2714,8 @@ (match_operand:V_VLS_F_CONVERT_SI 1 "register_operand")] "TARGET_VECTOR && !flag_trapping_math && !flag_rounding_math" { - riscv_vector::expand_vec_lfloor (operands[0], operands[1], <MODE>mode, <V_F2SI_CONVERT>mode); + riscv_vector::expand_vec_lfloor (operands[0], operands[1], <MODE>mode, + <V_F2SI_CONVERT>mode); DONE; } ) @@ -2731,7 +2725,8 @@ (match_operand:V_VLS_F_CONVERT_DI 1 "register_operand")] "TARGET_VECTOR && !flag_trapping_math && !flag_rounding_math" { - riscv_vector::expand_vec_lfloor (operands[0], operands[1], <MODE>mode, <V_F2DI_CONVERT>mode); + riscv_vector::expand_vec_lfloor (operands[0], operands[1], <MODE>mode, + <V_F2DI_CONVERT>mode); DONE; } ) @@ -2763,7 +2758,8 @@ (match_operand:V_VLSI 2 "register_operand")] "TARGET_VECTOR" { - riscv_vector::expand_vec_usadd (operands[0], operands[1], operands[2], <MODE>mode); + riscv_vector::expand_vec_usadd (operands[0], operands[1], operands[2], + <MODE>mode); DONE; } ) @@ -2774,7 +2770,8 @@ (match_operand:V_VLSI 2 "register_operand")] "TARGET_VECTOR" { - riscv_vector::expand_vec_ssadd (operands[0], operands[1], operands[2], <MODE>mode); + riscv_vector::expand_vec_ssadd (operands[0], operands[1], operands[2], + <MODE>mode); DONE; } ) @@ -2785,7 +2782,8 @@ (match_operand:V_VLSI 2 "register_operand")] "TARGET_VECTOR" { - riscv_vector::expand_vec_ussub (operands[0], operands[1], operands[2], <MODE>mode); + riscv_vector::expand_vec_ussub (operands[0], operands[1], operands[2], + <MODE>mode); DONE; } ) @@ -2796,7 +2794,8 @@ (match_operand:V_VLSI 2 "register_operand")] "TARGET_VECTOR" { - riscv_vector::expand_vec_sssub (operands[0], operands[1], operands[2], <MODE>mode); + riscv_vector::expand_vec_sssub (operands[0], operands[1], operands[2], + <MODE>mode); DONE; } ) diff --git a/gcc/config/riscv/bitmanip.md b/gcc/config/riscv/bitmanip.md index 5ed5e18..21426f4 100644 --- a/gcc/config/riscv/bitmanip.md +++ b/gcc/config/riscv/bitmanip.md @@ -1,4 +1,4 @@ -;; Machine description for RISC-V Bit Manipulation operations. +;); Machine description for RISC-V Bit Manipulation operations. ;; Copyright (C) 2021-2025 Free Software Foundation, Inc. ;; This file is part of GCC. @@ -68,23 +68,25 @@ [(set (match_operand:DI 0 "register_operand") (zero_extend:DI (plus:SI (ashift:SI (subreg:SI (match_operand:DI 1 "register_operand") 0) (match_operand:QI 2 "imm123_operand")) - (subreg:SI (match_operand:DI 3 "register_operand") 0))))] + (subreg:SI (match_operand:DI 3 "register_operand") 0)))) + (clobber (match_operand:DI 4 "register_operand"))] "TARGET_64BIT && TARGET_ZBA" - [(set (match_dup 0) (plus:DI (ashift:DI (match_dup 1) (match_dup 2)) (match_dup 3))) - (set (match_dup 0) (zero_extend:DI (subreg:SI (match_dup 0) 0)))]) + [(set (match_dup 4) (plus:DI (ashift:DI (match_dup 1) (match_dup 2)) (match_dup 3))) + (set (match_dup 0) (zero_extend:DI (subreg:SI (match_dup 4) 0)))]) (define_split [(set (match_operand:DI 0 "register_operand") (zero_extend:DI (plus:SI (subreg:SI (and:DI (ashift:DI (match_operand:DI 1 "register_operand") (match_operand:QI 2 "imm123_operand")) (match_operand:DI 3 "consecutive_bits_operand")) 0) - (subreg:SI (match_operand:DI 4 "register_operand") 0))))] + (subreg:SI (match_operand:DI 4 "register_operand") 0)))) + (clobber (match_operand:DI 5 "register_operand"))] "TARGET_64BIT && TARGET_ZBA && riscv_shamt_matches_mask_p (INTVAL (operands[2]), INTVAL (operands[3])) /* Ensure the mask includes all the bits in SImode. */ && ((INTVAL (operands[3]) & (HOST_WIDE_INT_1U << 31)) != 0)" - [(set (match_dup 0) (plus:DI (ashift:DI (match_dup 1) (match_dup 2)) (match_dup 4))) - (set (match_dup 0) (zero_extend:DI (subreg:SI (match_dup 0) 0)))]) + [(set (match_dup 5) (plus:DI (ashift:DI (match_dup 1) (match_dup 2)) (match_dup 4))) + (set (match_dup 0) (zero_extend:DI (subreg:SI (match_dup 5) 0)))]) ; Make sure that an andi followed by a sh[123]add remains a two instruction ; sequence--and is not torn apart into slli, slri, add. @@ -195,13 +197,14 @@ (match_operand:QI 2 "imm123_operand")) (match_operand 3 "consecutive_bits32_operand")) (match_operand:DI 4 "register_operand")) - (match_operand 5 "immediate_operand")))] + (match_operand 5 "immediate_operand"))) + (clobber (match_operand:DI 6 "register_operand"))] "TARGET_64BIT && TARGET_ZBA" - [(set (match_dup 0) + [(set (match_dup 6) (plus:DI (and:DI (ashift:DI (match_dup 1) (match_dup 2)) (match_dup 3)) (match_dup 4))) - (set (match_dup 0) (plus:DI (match_dup 0) (match_dup 5)))]) + (set (match_dup 0) (plus:DI (match_dup 6) (match_dup 5)))]) ;; ZBB extension. @@ -423,39 +426,40 @@ "rolw\t%0,%1,%2" [(set_attr "type" "bitmanip")]) -(define_insn_and_split "*<bitmanip_optab><GPR:mode>3_mask" - [(set (match_operand:GPR 0 "register_operand" "= r") - (bitmanip_rotate:GPR - (match_operand:GPR 1 "register_operand" " r") - (match_operator 4 "subreg_lowpart_operator" - [(and:GPR2 - (match_operand:GPR2 2 "register_operand" "r") - (match_operand 3 "<GPR:shiftm1>" "<GPR:shiftm1p>"))])))] +(define_insn "*<bitmanip_optab><mode>3_mask" + [(set (match_operand:X 0 "register_operand" "=r") + (bitmanip_rotate:X + (match_operand:X 1 "register_operand" "r") + (match_operator 4 "subreg_lowpart_operator" + [(and:X (match_operand:X 2 "register_operand" "r") + (match_operand 3 "<X:shiftm1>" "<X:shiftm1p>"))])))] "TARGET_ZBB || TARGET_ZBKB" - "#" - "&& 1" - [(set (match_dup 0) - (bitmanip_rotate:GPR (match_dup 1) - (match_dup 2)))] - "operands[2] = gen_lowpart (QImode, operands[2]);" + "<bitmanip_insn>\t%0,%1,%2" [(set_attr "type" "bitmanip") - (set_attr "mode" "<GPR:MODE>")]) + (set_attr "mode" "<X:MODE>")]) -(define_insn_and_split "*<bitmanip_optab>si3_sext_mask" - [(set (match_operand:DI 0 "register_operand" "= r") - (sign_extend:DI (bitmanip_rotate:SI - (match_operand:SI 1 "register_operand" " r") - (match_operator 4 "subreg_lowpart_operator" - [(and:GPR - (match_operand:GPR 2 "register_operand" "r") - (match_operand 3 "const_si_mask_operand"))]))))] +(define_insn "*<bitmanip_optab>3_mask_si" + [(set (match_operand:SI 0 "register_operand" "=r") + (bitmanip_rotate:SI + (match_operand:SI 1 "register_operand" "r") + (match_operator 3 "subreg_lowpart_operator" + [(and:X (match_operand:SI 2 "register_operand" "r") + (const_int 31))])))] "TARGET_64BIT && (TARGET_ZBB || TARGET_ZBKB)" - "#" - "&& 1" - [(set (match_dup 0) - (sign_extend:DI (bitmanip_rotate:SI (match_dup 1) - (match_dup 2))))] - "operands[2] = gen_lowpart (QImode, operands[2]);" + "<bitmanip_insn>w\t%0,%1,%2" + [(set_attr "type" "bitmanip") + (set_attr "mode" "SI")]) + +(define_insn "*<bitmanip_optab>si3_sext_mask" + [(set (match_operand:DI 0 "register_operand" "=r") + (sign_extend:DI + (bitmanip_rotate:SI + (match_operand:SI 1 "register_operand" "r") + (match_operator 3 "subreg_lowpart_operator" + [(and:X (match_operand:GPR 2 "register_operand" "r") + (const_int 31))]))))] + "TARGET_64BIT && (TARGET_ZBB || TARGET_ZBKB)" + "<bitmanip_insn>w\t%0,%1,%2" [(set_attr "type" "bitmanip") (set_attr "mode" "DI")]) @@ -842,44 +846,40 @@ [(set_attr "type" "bitmanip")]) ;; In case we have "val & ~IMM" where ~IMM has 2 bits set. -(define_insn_and_split "*bclri<mode>_nottwobits" - [(set (match_operand:X 0 "register_operand" "=r") - (and:X (match_operand:X 1 "register_operand" "r") - (match_operand:X 2 "const_nottwobits_not_arith_operand" "i")))] +(define_split + [(set (match_operand:X 0 "register_operand") + (and:X (match_operand:X 1 "register_operand") + (match_operand:X 2 "const_nottwobits_not_arith_operand"))) + (clobber (match_operand:X 3 "register_operand"))] "TARGET_ZBS && !paradoxical_subreg_p (operands[1])" - "#" - "&& reload_completed" - [(set (match_dup 0) (and:X (match_dup 1) (match_dup 3))) - (set (match_dup 0) (and:X (match_dup 0) (match_dup 4)))] + [(set (match_dup 3) (and:X (match_dup 1) (match_dup 4))) + (set (match_dup 0) (and:X (match_dup 3) (match_dup 5)))] { - unsigned HOST_WIDE_INT bits = ~UINTVAL (operands[2]); - unsigned HOST_WIDE_INT topbit = HOST_WIDE_INT_1U << floor_log2 (bits); + unsigned HOST_WIDE_INT bits = ~UINTVAL (operands[2]); + unsigned HOST_WIDE_INT topbit = HOST_WIDE_INT_1U << floor_log2 (bits); - operands[3] = GEN_INT (~bits | topbit); - operands[4] = GEN_INT (~topbit); -} -[(set_attr "type" "bitmanip")]) + operands[4] = GEN_INT (~bits | topbit); + operands[5] = GEN_INT (~topbit); +}) ;; In case of a paradoxical subreg, the sign bit and the high bits are ;; not allowed to be changed -(define_insn_and_split "*bclridisi_nottwobits" - [(set (match_operand:DI 0 "register_operand" "=r") - (and:DI (match_operand:DI 1 "register_operand" "r") - (match_operand:DI 2 "const_nottwobits_not_arith_operand" "i")))] +(define_split + [(set (match_operand:DI 0 "register_operand") + (and:DI (match_operand:DI 1 "register_operand") + (match_operand:DI 2 "const_nottwobits_not_arith_operand"))) + (clobber (match_operand:DI 3 "register_operand"))] "TARGET_64BIT && TARGET_ZBS && clz_hwi (~UINTVAL (operands[2])) > 33" - "#" - "&& reload_completed" - [(set (match_dup 0) (and:DI (match_dup 1) (match_dup 3))) - (set (match_dup 0) (and:DI (match_dup 0) (match_dup 4)))] + [(set (match_dup 3) (and:DI (match_dup 1) (match_dup 4))) + (set (match_dup 0) (and:DI (match_dup 3) (match_dup 5)))] { - unsigned HOST_WIDE_INT bits = ~UINTVAL (operands[2]); - unsigned HOST_WIDE_INT topbit = HOST_WIDE_INT_1U << floor_log2 (bits); + unsigned HOST_WIDE_INT bits = ~UINTVAL (operands[2]); + unsigned HOST_WIDE_INT topbit = HOST_WIDE_INT_1U << floor_log2 (bits); - operands[3] = GEN_INT (~bits | topbit); - operands[4] = GEN_INT (~topbit); -} -[(set_attr "type" "bitmanip")]) + operands[4] = GEN_INT (~bits | topbit); + operands[5] = GEN_INT (~topbit); +}) ;; An outer AND with a constant where bits 31..63 are 0 can be seen as ;; a virtual zero extension from 31 to 64 bits. @@ -908,6 +908,24 @@ "bext\t%0,%1,%2" [(set_attr "type" "bitmanip")]) +;; We do not define SHIFT_COUNT_TRUNCATED, so we have to have variants +;; that mask/extend the count if we want to eliminate those ops +;; +;; We could (in theory) use GPR for the various modes, but I haven't +;; seen those cases appear in practice. Without a testcase I've +;; elected to keep the modes X which is easy to reason about. +(define_insn "*bext<mode>_mask_pos" + [(set (match_operand:X 0 "register_operand" "=r") + (zero_extract:X (match_operand:X 1 "register_operand" "r") + (const_int 1) + (and:X + (match_operand:X 2 "register_operand" "r") + (match_operand 3 "const_int_operand"))))] + "(TARGET_ZBS + && INTVAL (operands[3]) + 1 == GET_MODE_BITSIZE (<MODE>mode))" + "bext\t%0,%1,%2" + [(set_attr "type" "bitmanip")]) + ;; This is a bext followed by a seqz. Normally this would be a 3->2 split ;; But the and-not pattern with a constant operand is a define_insn_and_split, ;; so this looks like a 2->2 split, which combine rejects. So implement it @@ -992,12 +1010,13 @@ [(set (match_operand:X 0 "register_operand") (and:X (not:X (lshiftrt:X (match_operand:X 1 "register_operand") (match_operand:QI 2 "register_operand"))) - (const_int 1)))] + (const_int 1))) + (clobber (match_operand:X 3 "register_operand"))] "TARGET_ZBS" - [(set (match_dup 0) (zero_extract:X (match_dup 1) + [(set (match_dup 3) (zero_extract:X (match_dup 1) (const_int 1) (match_dup 2))) - (set (match_dup 0) (xor:X (match_dup 0) (const_int 1)))] + (set (match_dup 0) (xor:X (match_dup 3) (const_int 1)))] "operands[2] = gen_lowpart (<MODE>mode, operands[2]);") ;; We can create a polarity-reversed mask (i.e. bit N -> { set = 0, clear = -1 }) @@ -1008,49 +1027,49 @@ (neg:GPR (eq:GPR (zero_extract:GPR (match_operand:GPR 1 "register_operand") (const_int 1) (match_operand 2)) - (const_int 0))))] + (const_int 0)))) + (clobber (match_operand:X 3 "register_operand"))] "TARGET_ZBS" - [(set (match_dup 0) (zero_extract:GPR (match_dup 1) (const_int 1) (match_dup 2))) - (set (match_dup 0) (plus:GPR (match_dup 0) (const_int -1)))]) + [(set (match_dup 3) (zero_extract:GPR (match_dup 1) (const_int 1) (match_dup 2))) + (set (match_dup 0) (plus:GPR (match_dup 3) (const_int -1)))]) ;; Catch those cases where we can use a bseti/binvi + ori/xori or ;; bseti/binvi + bseti/binvi instead of a lui + addi + or/xor sequence. (define_insn_and_split "*<or_optab>i<mode>_extrabit" [(set (match_operand:X 0 "register_operand" "=r") (any_or:X (match_operand:X 1 "register_operand" "r") - (match_operand:X 2 "uimm_extra_bit_or_twobits" "i")))] + (match_operand:X 2 "uimm_extra_bit_or_twobits" "i"))) + (clobber (match_scratch:X 3 "=&r"))] "TARGET_ZBS && !single_bit_mask_operand (operands[2], VOIDmode)" "#" "&& reload_completed" - [(set (match_dup 0) (<or_optab>:X (match_dup 1) (match_dup 3))) - (set (match_dup 0) (<or_optab>:X (match_dup 0) (match_dup 4)))] + [(set (match_dup 3) (<or_optab>:X (match_dup 1) (match_dup 4))) + (set (match_dup 0) (<or_optab>:X (match_dup 3) (match_dup 5)))] { unsigned HOST_WIDE_INT bits = UINTVAL (operands[2]); unsigned HOST_WIDE_INT topbit = HOST_WIDE_INT_1U << floor_log2 (bits); - operands[3] = GEN_INT (bits &~ topbit); - operands[4] = GEN_INT (topbit); + operands[4] = GEN_INT (bits &~ topbit); + operands[5] = GEN_INT (topbit); } [(set_attr "type" "bitmanip")]) ;; Same to use blcri + andi and blcri + bclri -(define_insn_and_split "*andi<mode>_extrabit" - [(set (match_operand:X 0 "register_operand" "=r") - (and:X (match_operand:X 1 "register_operand" "r") - (match_operand:X 2 "not_uimm_extra_bit_or_nottwobits" "i")))] +(define_split + [(set (match_operand:X 0 "register_operand") + (and:X (match_operand:X 1 "register_operand") + (match_operand:X 2 "not_uimm_extra_bit_or_nottwobits"))) + (clobber (match_operand:X 3 "register_operand"))] "TARGET_ZBS && !not_single_bit_mask_operand (operands[2], VOIDmode)" - "#" - "&& reload_completed" - [(set (match_dup 0) (and:X (match_dup 1) (match_dup 3))) - (set (match_dup 0) (and:X (match_dup 0) (match_dup 4)))] + [(set (match_dup 3) (and:X (match_dup 1) (match_dup 4))) + (set (match_dup 0) (and:X (match_dup 3) (match_dup 5)))] { unsigned HOST_WIDE_INT bits = UINTVAL (operands[2]); unsigned HOST_WIDE_INT topbit = HOST_WIDE_INT_1U << floor_log2 (~bits); - operands[3] = GEN_INT (bits | topbit); - operands[4] = GEN_INT (~topbit); -} -[(set_attr "type" "bitmanip")]) + operands[4] = GEN_INT (bits | topbit); + operands[5] = GEN_INT (~topbit); +}) ;; If we have the ZBA extension, then we can clear the upper half of a 64 ;; bit object with a zext.w. So if we have AND where the constant would @@ -1203,7 +1222,7 @@ we can't keep it in 64 bit variable.) then use clmul instruction to implement the CRC, otherwise (TARGET_ZBKB) generate table based using brev. */ - if ((TARGET_ZBKC || TARGET_ZBC) && <ANYI:MODE>mode < word_mode) + if ((TARGET_ZBKC || TARGET_ZBC || TARGET_ZVBC) && <ANYI:MODE>mode < word_mode) expand_reversed_crc_using_clmul (<ANYI:MODE>mode, <ANYI1:MODE>mode, operands); else if (TARGET_ZBKB) @@ -1235,7 +1254,8 @@ (match_operand:SUBX 3)] UNSPEC_CRC))] /* We don't support the case when data's size is bigger than CRC's size. */ - "(TARGET_ZBKC || TARGET_ZBC) && <SUBX:MODE>mode >= <SUBX1:MODE>mode" + "(TARGET_ZBKC || TARGET_ZBC || TARGET_ZVBC) + && <SUBX:MODE>mode >= <SUBX1:MODE>mode" { /* If we have the ZBC or ZBKC extension (ie, clmul) and it is possible to store the quotient within a single variable @@ -1245,3 +1265,89 @@ expand_crc_using_clmul (<SUBX:MODE>mode, <SUBX1:MODE>mode, operands); DONE; }) + +;; If we have an XOR/IOR with a constant operand (C) and the we can +;; synthesize ~C more efficiently than C, then synthesize ~C and use +;; xnor/orn instead. +;; +;; The same can be done for AND, but mvconst_internal's issues get in +;; the way. That's future work. +(define_split + [(set (match_operand:X 0 "register_operand") + (any_or:X (match_operand:X 1 "register_operand") + (match_operand:X 2 "const_int_operand"))) + (clobber (match_operand:X 3 "register_operand"))] + "TARGET_ZBB + && (riscv_const_insns (operands[2], true) + > riscv_const_insns (GEN_INT (~INTVAL (operands[2])), true))" + [(const_int 0)] +{ + /* Get the inverted constant into the temporary register. */ + riscv_emit_move (operands[3], GEN_INT (~INTVAL (operands[2]))); + + /* For xnor, the NOT operation is in a different position. So + we have to customize the split code we generate a bit. + + It is expected that AND will be handled like IOR in the future. */ + if (<CODE> == XOR) + { + rtx x = gen_rtx_XOR (<X:MODE>mode, operands[1], operands[3]); + x = gen_rtx_NOT (<X:MODE>mode, x); + emit_insn (gen_rtx_SET (operands[0], x)); + } + else + { + rtx x = gen_rtx_NOT (<X:MODE>mode, operands[3]); + x = gen_rtx_IOR (<X:MODE>mode, x, operands[1]); + emit_insn (gen_rtx_SET (operands[0], x)); + } + DONE; +}) + +;; More forms of single bit extraction. The RISC-V port does not +;; define SHIFT_COUNT_TRUNCATED so we need forms where the bit position +;; is masked. +;; +;; We could in theory use this for rv32 as well, but it probably does +;; not occur in practice. The bit position would need to be QI/HI mode, +;; otherwise we would not need the zero extension. +;; +;; One could also argue that the zero extension is redundant and should +;; have been optimized away during RTL simplification. +(define_insn "*bextdi_position_ze_masked" + [(set (match_operand:DI 0 "register_operand" "=r") + (zero_extract:DI (match_operand:DI 1 "register_operand" "r") + (const_int 1) + (zero_extend:DI + (and:SI (match_operand:SI 2 "register_operand" "r") + (const_int 63)))))] + "TARGET_64BIT && TARGET_ZBS" + "bext\t%0,%1,%2" + [(set_attr "type" "bitmanip")]) + +;; Same as above, but without the extraneous zero_extend. +(define_insn "*bextdi_position_ze_masked" + [(set (match_operand:X 0 "register_operand" "=r") + (zero_extract:X + (match_operand:X 1 "register_operand" "r") + (const_int 1) + (and:X (match_operand:SI 2 "register_operand" "r") + (match_operand:SI 3 "bitpos_mask_operand" "n"))))] + "TARGET_64BIT && TARGET_ZBS" + "bext\t%0,%1,%2" + [(set_attr "type" "bitmanip")]) + +;; This has shown up in testing. In particular we end up with an +;; immediate input. We can load that into a register and target +;; one of the above bext patterns. +(define_split + [(set (match_operand:X 0 "register_operand") + (and:X (lshiftrt:X (match_operand 1 "immediate_operand") + (match_operand:QI 2 "register_operand")) + (const_int 1))) + (clobber (match_operand:X 3 "register_operand"))] + "" + [(set (match_dup 3) (match_dup 1)) + (set (match_dup 0) (zero_extract:X (match_dup 3) + (const_int 1) + (zero_extend:X (match_dup 2))))]) diff --git a/gcc/config/riscv/constraints.md b/gcc/config/riscv/constraints.md index ba3c6e6..5ecaa19 100644 --- a/gcc/config/riscv/constraints.md +++ b/gcc/config/riscv/constraints.md @@ -43,6 +43,10 @@ (define_register_constraint "cf" "TARGET_HARD_FLOAT ? RVC_FP_REGS : (TARGET_ZFINX ? RVC_GR_REGS : NO_REGS)" "RVC floating-point registers (f8-f15), if available, reuse GPR as FPR when use zfinx.") +(define_register_constraint "cR" "RVC_GR_REGS" + "Even-odd RVC general purpose register (x8-x15)." + "regno % 2 == 0") + ;; General constraints (define_constraint "I" @@ -233,10 +237,11 @@ (and (match_code "const_vector") (match_test "rtx_equal_p (op, riscv_vector::gen_scalar_move_mask (GET_MODE (op)))"))) -(define_memory_constraint "Wdm" +(define_constraint "Wdm" "Vector duplicate memory operand" - (and (match_code "mem") - (match_code "reg" "0"))) + (and (match_test "strided_load_broadcast_p ()") + (and (match_code "mem") + (match_code "reg" "0")))) ;; Vendor ISA extension constraints. @@ -311,3 +316,17 @@ "Shifting immediate for SIMD shufflei3." (and (match_code "const_int") (match_test "IN_RANGE (ival, -64, -1)"))) + +(define_constraint "Ou01" + "A 1-bit unsigned immediate." + (and (match_code "const_int") + (match_test "IN_RANGE (ival, 0, 1)"))) + +(define_constraint "Ou02" + "A 2-bit unsigned immediate." + (and (match_code "const_int") + (match_test "IN_RANGE (ival, 0, 3)"))) + +(define_constraint "Q" + "An address operand that is valid for a prefetch instruction" + (match_operand 0 "prefetch_operand")) diff --git a/gcc/config/riscv/freebsd.h b/gcc/config/riscv/freebsd.h index 2dc7055..217e0ac 100644 --- a/gcc/config/riscv/freebsd.h +++ b/gcc/config/riscv/freebsd.h @@ -42,7 +42,7 @@ along with GCC; see the file COPYING3. If not see #define LINK_SPEC " \ -melf" XLEN_SPEC DEFAULT_ENDIAN_SPEC "riscv \ %{p:%nconsider using `-pg' instead of `-p' with gprof (1)} \ - " FBSD_LINK_PG_NOTES " \ + " FBSD_LINK_PG_NOTE " \ %{v:-V} \ %{assert*} %{R*} %{rpath*} %{defsym*} \ -X \ diff --git a/gcc/config/riscv/gen-riscv-ext-opt.cc b/gcc/config/riscv/gen-riscv-ext-opt.cc new file mode 100644 index 0000000..17b8f5b --- /dev/null +++ b/gcc/config/riscv/gen-riscv-ext-opt.cc @@ -0,0 +1,105 @@ +#include <vector> +#include <string> +#include <set> +#include <stdio.h> +#include "riscv-opts.h" + +struct version_t +{ + int major; + int minor; + version_t (int major, int minor, + enum riscv_isa_spec_class spec = ISA_SPEC_CLASS_NONE) + : major (major), minor (minor) + {} + bool operator<(const version_t &other) const + { + if (major != other.major) + return major < other.major; + return minor < other.minor; + } + + bool operator== (const version_t &other) const + { + return major == other.major && minor == other.minor; + } +}; + +static void +print_ext_doc_entry (const std::string &ext_name, const std::string &full_name, + const std::string &desc, + const std::vector<version_t> &supported_versions) +{ + // Implementation of the function to print the documentation entry + // for the extension. + std::set<version_t> unique_versions; + for (const auto &version : supported_versions) + unique_versions.insert (version); + printf ("@item %s\n", ext_name.c_str ()); + printf ("@tab"); + for (const auto &version : unique_versions) + { + printf (" %d.%d", version.major, version.minor); + } + printf ("\n"); + printf ("@tab %s", full_name.c_str ()); + if (desc.size ()) + printf (", %s", desc.c_str ()); + printf ("\n\n"); +} + +int +main () +{ + puts ("; Target options for the RISC-V port of the compiler"); + puts (";"); + puts ("; Copyright (C) 2025 Free Software Foundation, Inc."); + puts (";"); + puts ("; This file is part of GCC."); + puts (";"); + puts ( + "; GCC is free software; you can redistribute it and/or modify it under"); + puts ( + "; the terms of the GNU General Public License as published by the Free"); + puts ( + "; Software Foundation; either version 3, or (at your option) any later"); + puts ("; version."); + puts (";"); + puts ("; GCC is distributed in the hope that it will be useful, but WITHOUT"); + puts ("; ANY WARRANTY; without even the implied warranty of MERCHANTABILITY"); + puts ("; or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public"); + puts ("; License for more details."); + puts (";"); + puts ("; You should have received a copy of the GNU General Public License"); + puts ("; along with GCC; see the file COPYING3. If not see "); + puts ("; <http://www.gnu.org/licenses/>."); + + puts ("; This file is generated automatically using"); + puts ("; gcc/config/riscv/gen-riscv-ext-opt.cc from:"); + puts ("; gcc/config/riscv/riscv-ext.def"); + puts (""); + puts ("; Please *DO NOT* edit manually."); + + std::set<std::string> all_vars; +#define DEFINE_RISCV_EXT(NAME, UPPERCAE_NAME, FULL_NAME, DESC, URL, DEP_EXTS, \ + SUPPORTED_VERSIONS, FLAG_GROUP, BITMASK_GROUP_ID, \ + BITMASK_BIT_POSITION, EXTRA_EXTENSION_FLAGS) \ + all_vars.insert ("riscv_" #FLAG_GROUP "_subext"); +#include "riscv-ext.def" +#undef DEFINE_RISCV_EXT + + for (auto var : all_vars) + { + puts ("TargetVariable"); + printf ("int %s\n\n", var.c_str ()); + } + +#define DEFINE_RISCV_EXT(NAME, UPPERCAE_NAME, FULL_NAME, DESC, URL, DEP_EXTS, \ + SUPPORTED_VERSIONS, FLAG_GROUP, BITMASK_GROUP_ID, \ + BITMASK_BIT_POSITION, EXTRA_EXTENSION_FLAGS) \ + puts ("Mask(" #UPPERCAE_NAME ") Var(riscv_" #FLAG_GROUP "_subext)\n"); +#include "riscv-ext.def" +#undef DEFINE_RISCV_EXT + + return 0; +} diff --git a/gcc/config/riscv/gen-riscv-ext-texi.cc b/gcc/config/riscv/gen-riscv-ext-texi.cc new file mode 100644 index 0000000..c29a375 --- /dev/null +++ b/gcc/config/riscv/gen-riscv-ext-texi.cc @@ -0,0 +1,88 @@ +#include <vector> +#include <string> +#include <set> +#include <stdio.h> +#include "riscv-opts.h" + +struct version_t +{ + int major_version; + int minor_version; + version_t (int major, int minor, + enum riscv_isa_spec_class spec = ISA_SPEC_CLASS_NONE) + : major_version (major), minor_version (minor) + {} + bool operator<(const version_t &other) const + { + if (major_version != other.major_version) + return major_version < other.major_version; + return minor_version < other.minor_version; + } + + bool operator== (const version_t &other) const + { + return major_version == other.major_version && minor_version == other.minor_version; + } +}; + +static void +print_ext_doc_entry (const std::string &ext_name, const std::string &full_name, + const std::string &desc, + const std::vector<version_t> &supported_versions) +{ + // Implementation of the function to print the documentation entry + // for the extension. + std::set<version_t> unique_versions; + for (const auto &version : supported_versions) + unique_versions.insert (version); + printf ("@item %s\n", ext_name.c_str ()); + printf ("@tab"); + for (const auto &version : unique_versions) + { + printf (" %d.%d", version.major_version, version.minor_version); + } + printf ("\n"); + printf ("@tab %s", full_name.c_str ()); + if (desc.size ()) + printf (", %s", desc.c_str ()); + printf ("\n\n"); +} + +int +main () +{ + puts ("@c Copyright (C) 2025 Free Software Foundation, Inc."); + puts ("@c This is part of the GCC manual."); + puts ("@c For copying conditions, see the file gcc/doc/include/fdl.texi."); + puts (""); + puts ("@c This file is generated automatically using"); + puts ("@c gcc/config/riscv/gen-riscv-ext-texi.cc from:"); + puts ("@c gcc/config/riscv/riscv-ext.def"); + puts ("@c gcc/config/riscv/riscv-opts.h"); + puts (""); + puts ("@c Please *DO NOT* edit manually."); + puts (""); + puts ("@multitable @columnfractions .10 .10 .80"); + puts ("@headitem Extension Name @tab Supported Version @tab Description"); + puts (""); + + /* g extension is a very speical extension that no clear version... */ + puts ("@item g"); + puts ("@tab -"); + puts ( + "@tab General-purpose computing base extension, @samp{g} will expand to"); + puts ("@samp{i}, @samp{m}, @samp{a}, @samp{f}, @samp{d}, @samp{zicsr} and"); + puts ("@samp{zifencei}."); + puts (""); + +#define DEFINE_RISCV_EXT(NAME, UPPERCAE_NAME, FULL_NAME, DESC, URL, DEP_EXTS, \ + SUPPORTED_VERSIONS, FLAG_GROUP, BITMASK_GROUP_ID, \ + BITMASK_BIT_POSITION, EXTRA_EXTENSION_FLAGS) \ + print_ext_doc_entry (#NAME, FULL_NAME, DESC, \ + std::vector<version_t> SUPPORTED_VERSIONS); +#include "riscv-ext.def" +#undef DEFINE_RISCV_EXT + + puts ("@end multitable"); + return 0; +} diff --git a/gcc/config/riscv/generic-vector-ooo.md b/gcc/config/riscv/generic-vector-ooo.md index cb71941..ab9e57f 100644 --- a/gcc/config/riscv/generic-vector-ooo.md +++ b/gcc/config/riscv/generic-vector-ooo.md @@ -141,3 +141,7 @@ (eq_attr "type" "rdvlenb,rdvl") "vxu_ooo_issue,vxu_ooo_issue") +;; Vector sf_vcp. +(define_insn_reservation "vec_sf_vcp" 2 + (eq_attr "type" "sf_vc,sf_vc_se") + "vxu_ooo_issue") diff --git a/gcc/config/riscv/genrvv-type-indexer.cc b/gcc/config/riscv/genrvv-type-indexer.cc index 6de23cb6..f296089 100644 --- a/gcc/config/riscv/genrvv-type-indexer.cc +++ b/gcc/config/riscv/genrvv-type-indexer.cc @@ -23,8 +23,14 @@ along with GCC; see the file COPYING3. If not see #include <assert.h> #include <math.h> -#define BOOL_SIZE_LIST {1, 2, 4, 8, 16, 32, 64} -#define EEW_SIZE_LIST {8, 16, 32, 64} +#define BOOL_SIZE_LIST \ + { \ + 1, 2, 4, 8, 16, 32, 64 \ + } +#define EEW_SIZE_LIST \ + { \ + 8, 16, 32, 64 \ + } #define LMUL1_LOG2 0 std::string @@ -167,7 +173,7 @@ floattype (unsigned sew, int lmul_log2) std::string expand_floattype (unsigned sew, int lmul_log2, unsigned nf) { - if (sew != 8 || nf!= 1 + if (sew != 8 || nf != 1 || (!valid_type (sew * 4, lmul_log2 + 2, /*float_t*/ true))) return "INVALID"; @@ -297,11 +303,13 @@ main (int argc, const char **argv) for (unsigned eew : EEW_SIZE_LIST) fprintf (fp, " /*SIGNED_EEW%d_LMUL1_INTERPRET*/ %s,\n", eew, - inttype (eew, LMUL1_LOG2, /* unsigned_p */false).c_str ()); + inttype (eew, LMUL1_LOG2, /* unsigned_p */ false).c_str ()); for (unsigned eew : EEW_SIZE_LIST) fprintf (fp, " /*UNSIGNED_EEW%d_LMUL1_INTERPRET*/ %s,\n", eew, - inttype (eew, LMUL1_LOG2, /* unsigned_p */true).c_str ()); + inttype (eew, LMUL1_LOG2, /* unsigned_p */ true).c_str ()); + + fprintf (fp, " /*X2*/ INVALID,\n"); for (unsigned lmul_log2_offset : {1, 2, 3, 4, 5, 6}) { @@ -426,6 +434,10 @@ main (int argc, const char **argv) fprintf (fp, " /*UNSIGNED_EEW%d_LMUL1_INTERPRET*/ INVALID,\n", eew); + fprintf ( + fp, " /*X2*/ %s,\n", + inttype (sew * 2, lmul_log2 + 1, /*unsigned_p*/ true).c_str ()); + for (unsigned lmul_log2_offset : {1, 2, 3, 4, 5, 6}) { unsigned multiple_of_lmul = 1 << lmul_log2_offset; @@ -501,6 +513,8 @@ main (int argc, const char **argv) for (unsigned eew : EEW_SIZE_LIST) fprintf (fp, " /*UNSIGNED_EEW%d_LMUL1_INTERPRET*/ INVALID,\n", eew); + fprintf (fp, " /*X2*/ INVALID,\n"); + for (unsigned lmul_log2_offset : {1, 2, 3, 4, 5, 6}) { unsigned multiple_of_lmul = 1 << lmul_log2_offset; @@ -588,6 +602,8 @@ main (int argc, const char **argv) fprintf (fp, " /*UNSIGNED_EEW%d_LMUL1_INTERPRET*/ INVALID,\n", eew); + fprintf (fp, " /*X2*/ INVALID,\n"); + for (unsigned lmul_log2_offset : {1, 2, 3, 4, 5, 6}) { unsigned multiple_of_lmul = 1 << lmul_log2_offset; diff --git a/gcc/config/riscv/gnu.h b/gcc/config/riscv/gnu.h new file mode 100644 index 0000000..047399b --- /dev/null +++ b/gcc/config/riscv/gnu.h @@ -0,0 +1,59 @@ +/* Definitions for RISC-V GNU/Hurd systems with ELF format. + Copyright (C) 1998-2025 Free Software Foundation, Inc. + +This file is part of GCC. + +GCC is free software; you can redistribute it and/or modify +it under the terms of the GNU General Public License as published by +the Free Software Foundation; either version 3, or (at your option) +any later version. + +GCC is distributed in the hope that it will be useful, +but WITHOUT ANY WARRANTY; without even the implied warranty of +MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +GNU General Public License for more details. + +You should have received a copy of the GNU General Public License +along with GCC; see the file COPYING3. If not see +<http://www.gnu.org/licenses/>. */ + +#define TARGET_OS_CPP_BUILTINS() \ + do { \ + GNU_USER_TARGET_OS_CPP_BUILTINS(); \ + } while (0) + +#define GNU_USER_DYNAMIC_LINKER "/lib/ld-riscv" XLEN_SPEC "-" ABI_SPEC ".so.1" + +#define ICACHE_FLUSH_FUNC "__riscv_flush_icache" + +#define CPP_SPEC "%{pthread:-D_REENTRANT}" + +#define LD_EMUL_SUFFIX \ + "%{mabi=lp64d:}" \ + "%{mabi=lp64f:_lp64f}" \ + "%{mabi=lp64:_lp64}" \ + "%{mabi=ilp32d:}" \ + "%{mabi=ilp32f:_ilp32f}" \ + "%{mabi=ilp32:_ilp32}" + +#define LINK_SPEC "\ +-melf" XLEN_SPEC DEFAULT_ENDIAN_SPEC "riscv" LD_EMUL_SUFFIX " \ +%{mno-relax:--no-relax} \ +-X \ +%{mbig-endian:-EB} \ +%{mlittle-endian:-EL} \ +%{shared} \ + %{!shared: \ + %{!static: \ + %{!static-pie: \ + %{rdynamic:-export-dynamic} \ + -dynamic-linker " GNU_USER_DYNAMIC_LINKER "}} \ + %{static:-static} %{static-pie:-static -pie --no-dynamic-linker -z text}}" + +#define STARTFILE_PREFIX_SPEC \ + "/lib" XLEN_SPEC "/" ABI_SPEC "/ " \ + "/usr/lib" XLEN_SPEC "/" ABI_SPEC "/ " \ + "/lib/ " \ + "/usr/lib/ " + +#define RISCV_USE_CUSTOMISED_MULTI_LIB select_by_abi diff --git a/gcc/config/riscv/iterators.md b/gcc/config/riscv/iterators.md index 214c20b..584b345 100644 --- a/gcc/config/riscv/iterators.md +++ b/gcc/config/riscv/iterators.md @@ -262,6 +262,9 @@ (define_code_attr fix_uns [(fix "fix") (unsigned_fix "fixuns")]) +(define_code_attr OPTAB [(ior "IOR") + (xor "XOR")]) + ;; ------------------------------------------------------------------- ;; Code Attributes diff --git a/gcc/config/riscv/mips-insn.md b/gcc/config/riscv/mips-insn.md new file mode 100644 index 0000000..ad46026 --- /dev/null +++ b/gcc/config/riscv/mips-insn.md @@ -0,0 +1,35 @@ +;; Machine description for MIPS custom instructions. +;; Copyright (C) 2025 Free Software Foundation, Inc. + +;; This file is part of GCC. + +;; GCC is free software; you can redistribute it and/or modify +;; it under the terms of the GNU General Public License as published by +;; the Free Software Foundation; either version 3, or (at your option) +;; any later version. + +;; GCC is distributed in the hope that it will be useful, +;; but WITHOUT ANY WARRANTY; without even the implied warranty of +;; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +;; GNU General Public License for more details. + +;; You should have received a copy of the GNU General Public License +;; along with GCC; see the file COPYING3. If not see +;; <http://www.gnu.org/licenses/>. + +(define_insn "*mov<GPR:mode><X:mode>cc_bitmanip" + [(set (match_operand:GPR 0 "register_operand" "=r") + (if_then_else:GPR (any_eq:X (match_operand:X 1 "register_operand" "r") + (match_operand:X 2 "const_0_operand" "J")) + (match_operand:GPR 3 "reg_or_0_operand" "rJ") + (match_operand:GPR 4 "reg_or_0_operand" "rJ")))] + "TARGET_XMIPSCMOV" +{ + enum rtx_code code = <CODE>; + if (code == NE) + return "mips.ccmov\t%0,%1,%z3,%z4"; + else + return "mips.ccmov\t%0,%1,%z4,%z3"; +} +[(set_attr "type" "condmove") + (set_attr "mode" "<GPR:MODE>")]) diff --git a/gcc/config/riscv/mips-p8700.md b/gcc/config/riscv/mips-p8700.md new file mode 100644 index 0000000..ae0ea8d --- /dev/null +++ b/gcc/config/riscv/mips-p8700.md @@ -0,0 +1,167 @@ +;; DFA-based pipeline description for MIPS P8700. +;; +;; Copyright (C) 2025 Free Software Foundation, Inc. +;; +;; This file is part of GCC. +;; +;; GCC is free software; you can redistribute it and/or modify it +;; under the terms of the GNU General Public License as published +;; by the Free Software Foundation; either version 3, or (at your +;; option) any later version. + +;; GCC is distributed in the hope that it will be useful, but WITHOUT +;; ANY WARRANTY; without even the implied warranty of MERCHANTABILITY +;; or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public +;; License for more details. + +;; You should have received a copy of the GNU General Public License +;; along with GCC; see the file COPYING3. If not see +;; <http://www.gnu.org/licenses/>. + +(define_automaton "mips_p8700_agen_alq_pipe, mips_p8700_mdu_pipe, mips_p8700_fpu_pipe") + +;; The address generation queue (AGQ) has AL2, CTISTD and LDSTA pipes +(define_cpu_unit "mips_p8700_agq, mips_p8700_al2, mips_p8700_ctistd, mips_p8700_lsu" + "mips_p8700_agen_alq_pipe") + +(define_cpu_unit "mips_p8700_gpmul, mips_p8700_gpdiv" "mips_p8700_mdu_pipe") + +;; The arithmetic-logic-unit queue (ALQ) has ALU pipe +(define_cpu_unit "mips_p8700_alq, mips_p8700_alu" "mips_p8700_agen_alq_pipe") + +;; The floating-point-unit queue (FPQ) has short and long pipes +(define_cpu_unit "mips_p8700_fpu_short, mips_p8700_fpu_long" "mips_p8700_fpu_pipe") + +;; Long FPU pipeline. +(define_cpu_unit "mips_p8700_fpu_apu" "mips_p8700_fpu_pipe") + +;; P8700 unsupported insns are mapped to dummies reservations +(define_reservation "mips_p8700_dummies" + "mips_p8700_agq | mips_p8700_al2 | mips_p8700_ctistd | mips_p8700_lsu | + mips_p8700_fpu_short | mips_p8700_fpu_long") + +(define_reservation "mips_p8700_agq_al2" "mips_p8700_agq, mips_p8700_al2") +(define_reservation "mips_p8700_agq_ctistd" "mips_p8700_agq, mips_p8700_ctistd") +(define_reservation "mips_p8700_agq_lsu" "mips_p8700_agq, mips_p8700_lsu") +(define_reservation "mips_p8700_alq_alu" "mips_p8700_alq, mips_p8700_alu") + +;; +;; FPU pipe +;; + +(define_insn_reservation "mips_p8700_fpu_fadd" 4 + (and (eq_attr "tune" "mips_p8700") + (eq_attr "type" "fadd")) + "mips_p8700_fpu_long, mips_p8700_fpu_apu") + +(define_insn_reservation "mips_p8700_fpu_fabs" 2 + (and (eq_attr "tune" "mips_p8700") + (eq_attr "type" "fcmp,fmove")) + "mips_p8700_fpu_short, mips_p8700_fpu_apu") + +(define_insn_reservation "mips_p8700_fpu_fload" 8 + (and (eq_attr "tune" "mips_p8700") + (eq_attr "type" "fpload")) + "mips_p8700_agq_lsu") + +(define_insn_reservation "mips_p8700_fpu_fstore" 1 + (and (eq_attr "tune" "mips_p8700") + (eq_attr "type" "fpstore")) + "mips_p8700_agq_lsu") + +(define_insn_reservation "mips_p8700_fpu_fmadd" 8 + (and (eq_attr "tune" "mips_p8700") + (eq_attr "type" "fmadd")) + "mips_p8700_fpu_long, mips_p8700_fpu_apu") + +(define_insn_reservation "mips_p8700_fpu_fmul" 5 + (and (eq_attr "tune" "mips_p8700") + (eq_attr "type" "fmul")) + "mips_p8700_fpu_long, mips_p8700_fpu_apu") + +(define_insn_reservation "mips_p8700_fpu_div" 17 + (and (eq_attr "tune" "mips_p8700") + (eq_attr "type" "fdiv,fsqrt")) + "mips_p8700_fpu_long, mips_p8700_fpu_apu*17") + +(define_insn_reservation "mips_p8700_fpu_fcvt" 4 + (and (eq_attr "tune" "mips_p8700") + (eq_attr "type" "fcvt,fcvt_i2f,fcvt_f2i")) + "mips_p8700_fpu_long, mips_p8700_fpu_apu") + +(define_insn_reservation "mips_p8700_fpu_fmtc" 7 + (and (eq_attr "tune" "mips_p8700") + (eq_attr "type" "mtc")) + "mips_p8700_agq_lsu") + +(define_insn_reservation "mips_p8700_fpu_fmfc" 7 + (and (eq_attr "tune" "mips_p8700") + (eq_attr "type" "mfc")) + "mips_p8700_agq_lsu") + +;; +;; Integer pipe +;; + +(define_insn_reservation "mips_p8700_int_load" 4 + (and (eq_attr "tune" "mips_p8700") + (eq_attr "type" "load")) + "mips_p8700_agq_lsu") + +(define_insn_reservation "mips_p8700_int_store" 3 + (and (eq_attr "tune" "mips_p8700") + (eq_attr "type" "store")) + "mips_p8700_agq_lsu") + +(define_insn_reservation "mips_p8700_int_arith_1" 1 + (and (eq_attr "tune" "mips_p8700") + (eq_attr "type" "unknown,const,arith,shift,slt,multi,auipc,logical,move,bitmanip,min,max,minu,maxu,clz,ctz,rotate,atomic,condmove,crypto,mvpair,zicond")) + "mips_p8700_alq_alu | mips_p8700_agq_al2") + +(define_insn_reservation "mips_p8700_int_nop" 0 + (and (eq_attr "tune" "mips_p8700") + (eq_attr "type" "nop")) + "mips_p8700_alq_alu | mips_p8700_agq_al2") + +(define_insn_reservation "mips_p8700_dsp_mult" 4 + (and (eq_attr "tune" "mips_p8700") + (eq_attr "type" "imul,cpop,clmul")) + "mips_p8700_gpmul") + +(define_insn_reservation "mips_p8700_int_div" 8 + (and (eq_attr "tune" "mips_p8700") + (eq_attr "type" "idiv")) + "mips_p8700_gpdiv*5") + +(define_insn_reservation "mips_p8700_int_branch" 1 + (and (eq_attr "tune" "mips_p8700") + (eq_attr "type" "branch,jump,ret,sfb_alu,trap")) + "mips_p8700_agq_ctistd") + +(define_insn_reservation "mips_p8700_int_call" 2 + (and (eq_attr "tune" "mips_p8700") + (eq_attr "type" "call,jalr")) + "mips_p8700_agq_ctistd") + +;; mips-p8700 dummies insn and placeholder that had no mapping to p8700 hardware. +(define_insn_reservation "mips_p8700_unknown" 1 + (and (eq_attr "tune" "mips_p8700") + (eq_attr "type" "rdvlenb,rdvl,wrvxrm,wrfrm, + rdfrm,vsetvl,vsetvl_pre,vlde,vste,vldm,vstm,vlds,vsts, + vldux,vldox,vstux,vstox,vldff,vldr,vstr, + vlsegde,vssegte,vlsegds,vssegts,vlsegdux,vlsegdox,vssegtux,vssegtox,vlsegdff, + vialu,viwalu,vext,vicalu,vshift,vnshift,vicmp,viminmax, + vimul,vidiv,viwmul,vimuladd,sf_vqmacc,viwmuladd,vimerge,vimov, + vsalu,vaalu,vsmul,vsshift,vnclip,sf_vfnrclip, + vfalu,vfwalu,vfmul,vfdiv,vfwmul,vfmuladd,vfwmuladd,vfsqrt,vfrecp, + vfcmp,vfminmax,vfsgnj,vfclass,vfmerge,vfmov, + vfcvtitof,vfcvtftoi,vfwcvtitof,vfwcvtftoi, + vfwcvtftof,vfncvtitof,vfncvtftoi,vfncvtftof, + vired,viwred,vfredu,vfredo,vfwredu,vfwredo, + vmalu,vmpop,vmffs,vmsfs,vmiota,vmidx,vimovvx,vimovxv,vfmovvf,vfmovfv, + vslideup,vslidedown,vislide1up,vislide1down,vfslide1up,vfslide1down, + vgather,vcompress,vmov,vector,vandn,vbrev,vbrev8,vrev8,vclz,vctz,vcpop,vrol,vror,vwsll, + vclmul,vclmulh,vghsh,vgmul,vaesef,vaesem,vaesdf,vaesdm,vaeskf1,vaeskf2,vaesz, + vsha2ms,vsha2ch,vsha2cl,vsm4k,vsm4r,vsm3me,vsm3c,vfncvtbf16,vfwcvtbf16,vfwmaccbf16, + sf_vc,sf_vc_se")) + "mips_p8700_dummies") diff --git a/gcc/config/riscv/multilib-generator b/gcc/config/riscv/multilib-generator index 4828016..6ad1cf0 100755 --- a/gcc/config/riscv/multilib-generator +++ b/gcc/config/riscv/multilib-generator @@ -159,8 +159,8 @@ for cmodel in cmodels: "e.g. rv32imafd-ilp32--" % cfg) sys.exit(1) - # Compact code model only support rv64. - if cmodel == "compact" and arch.startswith("rv32"): + # Large code model only support rv64. + if cmodel == "large" and arch.startswith("rv32"): continue arch = arch_canonicalize (arch, args.misa_spec) diff --git a/gcc/config/riscv/pipeline-checker b/gcc/config/riscv/pipeline-checker new file mode 100755 index 0000000..815698b --- /dev/null +++ b/gcc/config/riscv/pipeline-checker @@ -0,0 +1,191 @@ +#!/usr/bin/env python3 + +# RISC-V pipeline model checker. +# Copyright (C) 2025 Free Software Foundation, Inc. +# +# This file is part of GCC. +# +# GCC is free software; you can redistribute it and/or modify +# it under the terms of the GNU General Public License as published by +# the Free Software Foundation; either version 3, or (at your option) +# any later version. +# +# GCC is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU General Public License for more details. +# +# You should have received a copy of the GNU General Public License +# along with GCC; see the file COPYING3. If not see +# <http://www.gnu.org/licenses/>. + +import re +import sys +import argparse +from pathlib import Path +from typing import List +import pprint + +def remove_line_comments(text: str) -> str: + # Remove ';;' and everything after it on each line + cleaned_lines = [] + for line in text.splitlines(): + comment_index = line.find(';;') + if comment_index != -1: + line = line[:comment_index] + cleaned_lines.append(line) + return '\n'.join(cleaned_lines) + + +def tokenize_sexpr(s: str) -> List[str]: + # Tokenize input string, including support for balanced {...} C blocks + tokens = [] + i = 0 + while i < len(s): + c = s[i] + if c.isspace(): + i += 1 + elif c == '(' or c == ')': + tokens.append(c) + i += 1 + elif c == '"': + # Parse quoted string + j = i + 1 + while j < len(s) and s[j] != '"': + if s[j] == '\\': + j += 1 # Skip escape + j += 1 + tokens.append(s[i:j+1]) + i = j + 1 + elif c == '{': + # Parse balanced C block + depth = 1 + j = i + 1 + while j < len(s) and depth > 0: + if s[j] == '{': + depth += 1 + elif s[j] == '}': + depth -= 1 + j += 1 + tokens.append(s[i:j]) # Include enclosing braces + i = j + else: + # Parse atom + j = i + while j < len(s) and not s[j].isspace() and s[j] not in '()"{}': + j += 1 + tokens.append(s[i:j]) + i = j + return tokens + + +def parse_sexpr(tokens: List[str]) -> any: + # Recursively parse tokenized S-expression + token = tokens.pop(0) + if token == '(': + lst = [] + while tokens[0] != ')': + lst.append(parse_sexpr(tokens)) + tokens.pop(0) # Discard closing parenthesis + return lst + elif token.startswith('"') and token.endswith('"'): + return token[1:-1] # Remove surrounding quotes + elif token.startswith('{') and token.endswith('}'): + return token # Keep C code block as-is + else: + return token + + +def find_define_attr_type(ast: any) -> List[List[str]]: + # Traverse AST to find all (define_attr "type" ...) entries + result = [] + if isinstance(ast, list): + if ast and ast[0] == 'define_attr' and len(ast) >= 2 and ast[1] == 'type': + result.append(ast) + for elem in ast: + result.extend(find_define_attr_type(elem)) + return result + + +def parse_md_file(path: Path): + # Read file, remove comments, and parse all top-level S-expressions + with open(path, encoding='utf-8') as f: + raw_content = f.read() + clean_content = remove_line_comments(raw_content) + tokens = tokenize_sexpr(clean_content) + items = [] + while tokens: + items.append(parse_sexpr(tokens)) + return items + +def parsing_str_set(s: str) -> set: + s = s.replace('\\','').split(',') + s = set(map(lambda x: x.strip(), s)) + return s + +def get_avaliable_types(md_file_path: str): + # Main logic: parse input file and print define_attr "type" expressions + ast = parse_md_file(Path(md_file_path)) + + # Get all type from define_attr type + define_attr_types = find_define_attr_type(ast) + types = parsing_str_set (define_attr_types[0][2]) + return types + +def get_consumed_type(entry: List[str]) -> set: + # Extract the consumed type from a define_insn_reservation entry + current_type = entry[0] + if current_type in ['and', 'or']: + return get_consumed_type(entry[1]) | get_consumed_type(entry[2]) + elif current_type == 'eq_attr' and entry[1] == 'type': + return parsing_str_set(entry[2]) + return set() + +def check_pipemodel(md_file_path: str): + # Load the RISCV MD file and check for pipemodel + ast = parse_md_file(Path(md_file_path)) + + consumed_type = set() + + for entry in ast: + entry_type = entry[0] + if entry_type not in ["define_insn_reservation"]: + continue + consumed_type |= get_consumed_type(entry[3]) + return consumed_type + + +def main(): + parser = argparse.ArgumentParser(description='Check GCC pipeline model for instruction type coverage') + parser.add_argument('pipeline_model', help='Pipeline model file to check') + parser.add_argument('--base-md', + help='Base machine description file (default: riscv.md in script directory)', + default=None) + parser.add_argument('-v', '--verbose', + help='Show detailed type information', + action='store_true') + args = parser.parse_args() + + # Set default base-md path if not provided + if args.base_md is None: + script_dir = Path(__file__).parent + base_md_path = script_dir / "riscv.md" + else: + base_md_path = Path(args.base_md) + avaliable_types = get_avaliable_types(str(base_md_path)) + consumed_type = check_pipemodel(args.pipeline_model) + + if args.verbose: + print("Available types:\n", avaliable_types) + print("Consumed types:\n", consumed_type) + + if not avaliable_types.issubset(consumed_type): + print("Error: Some types are not consumed by the pipemodel") + print("Missing types:\n", avaliable_types - consumed_type) + sys.exit(1) + else: + print("All available types are consumed by the pipemodel.") + + +if __name__ == '__main__': + main() diff --git a/gcc/config/riscv/predicates.md b/gcc/config/riscv/predicates.md index f26bafc..1f9a6b5 100644 --- a/gcc/config/riscv/predicates.md +++ b/gcc/config/riscv/predicates.md @@ -27,6 +27,18 @@ (ior (match_operand 0 "const_arith_operand") (match_operand 0 "register_operand"))) +;; REG or REG+D where D fits in a simm12 and has the low 5 bits +;; off. The REG+D form can be reloaded into a temporary if needed +;; after FP elimination if that exposes an invalid offset. +(define_predicate "prefetch_operand" + (ior (match_operand 0 "register_operand") + (and (match_test "const_arith_operand (op, VOIDmode)") + (match_test "(INTVAL (op) & 0x1f) == 0")) + (and (match_code "plus") + (match_test "register_operand (XEXP (op, 0), word_mode)") + (match_test "const_arith_operand (XEXP (op, 1), VOIDmode)") + (match_test "(INTVAL (XEXP (op, 1)) & 0x1f) == 0")))) + (define_predicate "lui_operand" (and (match_code "const_int") (match_test "LUI_OPERAND (INTVAL (op))"))) @@ -380,14 +392,6 @@ (and (match_code "const_int") (match_test "SINGLE_BIT_MASK_OPERAND (UINTVAL (op))"))) -;; Register, small constant or single bit constant for use in -;; bseti/binvi. -(define_predicate "arith_or_zbs_operand" - (ior (match_operand 0 "const_arith_operand") - (match_operand 0 "register_operand") - (and (match_test "TARGET_ZBS") - (match_operand 0 "single_bit_mask_operand")))) - (define_predicate "not_single_bit_mask_operand" (and (match_code "const_int") (match_test "SINGLE_BIT_MASK_OPERAND (~UINTVAL (op))"))) @@ -613,7 +617,7 @@ ;; The scalar operand can be directly broadcast by RVV instructions. (define_predicate "direct_broadcast_operand" - (match_test "riscv_vector::can_be_broadcasted_p (op)")) + (match_test "riscv_vector::can_be_broadcast_p (op)")) ;; A CONST_INT operand that has exactly two bits cleared. (define_predicate "const_nottwobits_operand" @@ -685,3 +689,11 @@ (and (match_operand 0 "register_operand") (match_test "REGNO (op) == RETURN_ADDR_REGNUM || REGNO (op) == T0_REGNUM"))) + +(define_predicate "bitpos_mask_operand" + (and (match_code "const_int") + (match_test "TARGET_64BIT ? INTVAL (op) == 63 : INTVAL (op) == 31"))) + +(define_predicate "reg_or_const_int_operand" + (ior (match_operand 0 "const_int_operand") + (match_operand 0 "register_operand"))) diff --git a/gcc/config/riscv/riscv-avlprop.cc b/gcc/config/riscv/riscv-avlprop.cc index bb4aceb..3031c29 100644 --- a/gcc/config/riscv/riscv-avlprop.cc +++ b/gcc/config/riscv/riscv-avlprop.cc @@ -508,7 +508,7 @@ pass_avlprop::execute (function *fn) simplify_replace_vlmax_avl (rinsn, prop.second); } - if (rvv_vector_bits == RVV_VECTOR_BITS_ZVL) + if (rvv_vector_bits == RVV_VECTOR_BITS_ZVL && !TARGET_XTHEADVECTOR) { /* Simplify VLMAX AVL into immediate AVL. E.g. Simplify this following case: diff --git a/gcc/config/riscv/riscv-c.cc b/gcc/config/riscv/riscv-c.cc index 7912b10..d2c0af3 100644 --- a/gcc/config/riscv/riscv-c.cc +++ b/gcc/config/riscv/riscv-c.cc @@ -36,10 +36,10 @@ along with GCC; see the file COPYING3. If not see struct pragma_intrinsic_flags { - int intrinsic_target_flags; + int intrinsic_riscv_isa_flags; int intrinsic_riscv_vector_elen_flags; - int intrinsic_riscv_zvl_flags; + int intrinsic_riscv_zvl_subext; int intrinsic_riscv_zvb_subext; int intrinsic_riscv_zvk_subext; }; @@ -47,16 +47,16 @@ struct pragma_intrinsic_flags static void riscv_pragma_intrinsic_flags_pollute (struct pragma_intrinsic_flags *flags) { - flags->intrinsic_target_flags = target_flags; + flags->intrinsic_riscv_isa_flags = riscv_isa_flags; flags->intrinsic_riscv_vector_elen_flags = riscv_vector_elen_flags; - flags->intrinsic_riscv_zvl_flags = riscv_zvl_flags; + flags->intrinsic_riscv_zvl_subext = riscv_zvl_subext; flags->intrinsic_riscv_zvb_subext = riscv_zvb_subext; flags->intrinsic_riscv_zvk_subext = riscv_zvk_subext; - target_flags = target_flags + riscv_isa_flags = riscv_isa_flags | MASK_VECTOR; - riscv_zvl_flags = riscv_zvl_flags + riscv_zvl_subext = riscv_zvl_subext | MASK_ZVL32B | MASK_ZVL64B | MASK_ZVL128B @@ -97,10 +97,10 @@ riscv_pragma_intrinsic_flags_pollute (struct pragma_intrinsic_flags *flags) static void riscv_pragma_intrinsic_flags_restore (struct pragma_intrinsic_flags *flags) { - target_flags = flags->intrinsic_target_flags; + riscv_isa_flags = flags->intrinsic_riscv_isa_flags; riscv_vector_elen_flags = flags->intrinsic_riscv_vector_elen_flags; - riscv_zvl_flags = flags->intrinsic_riscv_zvl_flags; + riscv_zvl_subext = flags->intrinsic_riscv_zvl_subext; riscv_zvb_subext = flags->intrinsic_riscv_zvb_subext; riscv_zvk_subext = flags->intrinsic_riscv_zvk_subext; } @@ -239,26 +239,22 @@ riscv_cpu_cpp_builtins (cpp_reader *pfile) size_t max_ext_len = 0; /* Figure out the max length of extension name for reserving buffer. */ - for (const riscv_subset_t *subset = subset_list->begin (); - subset != subset_list->end (); - subset = subset->next) - max_ext_len = MAX (max_ext_len, subset->name.length ()); + for (auto &subset : *subset_list) + max_ext_len = MAX (max_ext_len, subset.name.length ()); char *buf = (char *)alloca (max_ext_len + 10 /* For __riscv_ and '\0'. */); - for (const riscv_subset_t *subset = subset_list->begin (); - subset != subset_list->end (); - subset = subset->next) + for (auto &subset : *subset_list) { - int version_value = riscv_ext_version_value (subset->major_version, - subset->minor_version); + int version_value = riscv_ext_version_value (subset.major_version, + subset.minor_version); /* Special rule for zicsr and zifencei, it's used for ISA spec 2.2 or earlier. */ - if ((subset->name == "zicsr" || subset->name == "zifencei") + if ((subset.name == "zicsr" || subset.name == "zifencei") && version_value == 0) version_value = riscv_ext_version_value (2, 0); - sprintf (buf, "__riscv_%s", subset->name.c_str ()); + sprintf (buf, "__riscv_%s", subset.name.c_str ()); builtin_define_with_int_value (buf, version_value); } } @@ -279,7 +275,8 @@ riscv_pragma_intrinsic (cpp_reader *) const char *name = TREE_STRING_POINTER (x); if (strcmp (name, "vector") == 0 - || strcmp (name, "xtheadvector") == 0) + || strcmp (name, "xtheadvector") == 0 + || strcmp (name, "xsfvcp") == 0) { struct pragma_intrinsic_flags backup_flags; diff --git a/gcc/config/riscv/riscv-cores.def b/gcc/config/riscv/riscv-cores.def index 2918496..98f3470 100644 --- a/gcc/config/riscv/riscv-cores.def +++ b/gcc/config/riscv/riscv-cores.def @@ -33,6 +33,7 @@ #define RISCV_TUNE(TUNE_NAME, PIPELINE_MODEL, TUNE_INFO) #endif +RISCV_TUNE("generic", generic, generic_tune_info) RISCV_TUNE("rocket", generic, rocket_tune_info) RISCV_TUNE("sifive-3-series", generic, rocket_tune_info) RISCV_TUNE("sifive-5-series", generic, rocket_tune_info) @@ -41,9 +42,17 @@ RISCV_TUNE("sifive-p400-series", sifive_p400, sifive_p400_tune_info) RISCV_TUNE("sifive-p600-series", sifive_p600, sifive_p600_tune_info) RISCV_TUNE("tt-ascalon-d8", generic_ooo, tt_ascalon_d8_tune_info) RISCV_TUNE("thead-c906", generic, thead_c906_tune_info) +RISCV_TUNE("xt-c908", generic, generic_ooo_tune_info) +RISCV_TUNE("xt-c908v", generic, generic_ooo_tune_info) +RISCV_TUNE("xt-c910", generic, generic_ooo_tune_info) +RISCV_TUNE("xt-c910v2", generic, generic_ooo_tune_info) +RISCV_TUNE("xt-c920", generic, generic_ooo_tune_info) +RISCV_TUNE("xt-c920v2", generic, generic_ooo_tune_info) RISCV_TUNE("xiangshan-nanhu", xiangshan, xiangshan_nanhu_tune_info) +RISCV_TUNE("xiangshan-kunminghu", xiangshan, generic_ooo_tune_info) RISCV_TUNE("generic-ooo", generic_ooo, generic_ooo_tune_info) RISCV_TUNE("size", generic, optimize_size_tune_info) +RISCV_TUNE("mips-p8700", mips_p8700, mips_p8700_tune_info) #undef RISCV_TUNE @@ -93,6 +102,48 @@ RISCV_CORE("thead-c906", "rv64imafdc_xtheadba_xtheadbb_xtheadbs_xtheadcmo_" "xtheadmemidx_xtheadmempair_xtheadsync", "thead-c906") +RISCV_CORE("xt-c908", "rv64imafdc_zicbom_zicbop_zicboz_zicntr_zicsr_" + "zifencei_zihintpause_zihpm_zfh_zba_zbb_zbc_zbs_" + "sstc_svinval_svnapot_svpbmt_xtheadba_xtheadbb_" + "xtheadbs_xtheadcmo_xtheadcondmov_xtheadfmemidx_" + "xtheadmac_xtheadmemidx_xtheadmempair_xtheadsync", + "xt-c908") +RISCV_CORE("xt-c908v", "rv64imafdcv_zicbom_zicbop_zicboz_zicntr_zicsr_" + "zifencei_zihintpause_zihpm_zfh_zba_zbb_zbc_zbs_" + "zvfh_sstc_svinval_svnapot_svpbmt__xtheadba_" + "xtheadbb_xtheadbs_xtheadcmo_xtheadcondmov_" + "xtheadfmemidx_xtheadmac_xtheadmemidx_" + "xtheadmempair_xtheadsync_xtheadvdot", + "xt-c908") +RISCV_CORE("xt-c910", "rv64imafdc_zicntr_zicsr_zifencei_zihpm_zfh_" + "xtheadba_xtheadbb_xtheadbs_xtheadcmo_" + "xtheadcondmov_xtheadfmemidx_xtheadmac_" + "xtheadmemidx_xtheadmempair_xtheadsync", + "xt-c910") +RISCV_CORE("xt-c910v2", "rv64imafdc_zicbom_zicbop_zicboz_zicntr_zicond_" + "zicsr_zifencei _zihintntl_zihintpause_zihpm_" + "zawrs_zfa_zfbfmin_zfh_zca_zcb_zcd_zba_zbb_zbc_" + "zbs_sscofpmf_sstc_svinval_svnapot_svpbmt_" + "xtheadba_xtheadbb_xtheadbs_xtheadcmo_" + "xtheadcondmov_xtheadfmemidx_xtheadmac_" + "xtheadmemidx_xtheadmempair_xtheadsync", + "xt-c910v2") +RISCV_CORE("xt-c920", "rv64imafdc_zicntr_zicsr_zifencei_zihpm_zfh_" + "xtheadba_xtheadbb_xtheadbs_xtheadcmo_" + "xtheadcondmov_xtheadfmemidx_xtheadmac_" + "xtheadmemidx_xtheadmempair_xtheadsync_" + "xtheadvector", + "xt-c910") +RISCV_CORE("xt-c920v2", "rv64imafdcv_zicbom_zicbop_zicboz_zicntr_zicond_" + "zicsr_zifencei _zihintntl_zihintpause_zihpm_" + "zawrs_zfa_zfbfmin_zfh_zca_zcb_zcd_zba_zbb_zbc_" + "zbs_zvfbfmin_zvfbfwma_zvfh_sscofpmf_sstc_" + "svinval_svnapot_svpbmt_xtheadba_xtheadbb_" + "xtheadbs_xtheadcmo_xtheadcondmov_xtheadfmemidx_" + "xtheadmac_xtheadmemidx_xtheadmempair_" + "xtheadsync_xtheadvdot", + "xt-c920v2") + RISCV_CORE("tt-ascalon-d8", "rv64imafdcv_zic64b_zicbom_zicbop_zicboz_" "ziccamoa_ziccif_zicclsm_ziccrse_zicond_zicsr_" "zifencei_zihintntl_zihintpause_zimop_za64rs_" @@ -104,4 +155,20 @@ RISCV_CORE("xiangshan-nanhu", "rv64imafdc_zba_zbb_zbc_zbs_" "zbkb_zbkc_zbkx_zknd_zkne_zknh_zksed_zksh_" "svinval_zicbom_zicboz", "xiangshan-nanhu") + +RISCV_CORE("xiangshan-kunminghu", "rv64imafdcbvh_sdtrig_sha_shcounterenw_" + "shgatpa_shlcofideleg_shtvala_shvsatpa_shvstvala_shvstvecd_" + "smaia_smcsrind_smdbltrp_smmpm_smnpm_smrnmi_smstateen_" + "ssaia_ssccptr_sscofpmf_sscounterenw_sscsrind_ssdbltrp_" + "ssnpm_sspm_ssstateen_ssstrict_sstc_sstvala_sstvecd_" + "ssu64xl_supm_svade_svbare_svinval_svnapot_svpbmt_za64rs_" + "zacas_zawrs_zba_zbb_zbc_zbkb_zbkc_zbkx_zbs_zcb_zcmop_" + "zfa_zfh_zfhmin_zic64b_zicbom_zicbop_zicboz_ziccif_" + "zicclsm_ziccrse_zicntr_zicond_zicsr_zifencei_zihintpause_" + "zihpm_zimop_zkn_zknd_zkne_zknh_zksed_zksh_zkt_zvbb_zvfh_" + "zvfhmin_zvkt_zvl128b_zvl32b_zvl64b", + "xiangshan-kunminghu") + +RISCV_CORE("mips-p8700", "rv64imfd_zicsr_zifencei_zalrsc_zba_zbb", + "mips-p8700") #undef RISCV_CORE diff --git a/gcc/config/riscv/riscv-ext-corev.def b/gcc/config/riscv/riscv-ext-corev.def new file mode 100644 index 0000000..eb97399 --- /dev/null +++ b/gcc/config/riscv/riscv-ext-corev.def @@ -0,0 +1,87 @@ +/* CORE-V extension definition file for RISC-V. + Copyright (C) 2025 Free Software Foundation, Inc. + +This file is part of GCC. + +GCC is free software; you can redistribute it and/or modify +it under the terms of the GNU General Public License as published by +the Free Software Foundation; either version 3, or (at your option) +any later version. + +GCC is distributed in the hope that it will be useful, +but WITHOUT ANY WARRANTY; without even the implied warranty of +MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +GNU General Public License for more details. + +You should have received a copy of the GNU General Public License +along with GCC; see the file COPYING3. If not see +<http://www.gnu.org/licenses/>. + +Please run `make riscv-regen` in build folder to make sure updated anything. + +Format of DEFINE_RISCV_EXT, please refer to riscv-ext.def. */ + +DEFINE_RISCV_EXT( + /* NAME */ xcvalu, + /* UPPERCAE_NAME */ XCVALU, + /* FULL_NAME */ "Core-V miscellaneous ALU extension", + /* DESC */ "", + /* URL */ , + /* DEP_EXTS */ ({}), + /* SUPPORTED_VERSIONS */ ({{1, 0}}), + /* FLAG_GROUP */ xcv, + /* BITMASK_GROUP_ID */ BITMASK_NOT_YET_ALLOCATED, + /* BITMASK_BIT_POSITION*/ BITMASK_NOT_YET_ALLOCATED, + /* EXTRA_EXTENSION_FLAGS */ 0) + +DEFINE_RISCV_EXT( + /* NAME */ xcvbi, + /* UPPERCAE_NAME */ XCVBI, + /* FULL_NAME */ "xcvbi extension", + /* DESC */ "", + /* URL */ , + /* DEP_EXTS */ ({}), + /* SUPPORTED_VERSIONS */ ({{1, 0}}), + /* FLAG_GROUP */ xcv, + /* BITMASK_GROUP_ID */ BITMASK_NOT_YET_ALLOCATED, + /* BITMASK_BIT_POSITION*/ BITMASK_NOT_YET_ALLOCATED, + /* EXTRA_EXTENSION_FLAGS */ 0) + +DEFINE_RISCV_EXT( + /* NAME */ xcvelw, + /* UPPERCAE_NAME */ XCVELW, + /* FULL_NAME */ "Core-V event load word extension", + /* DESC */ "", + /* URL */ , + /* DEP_EXTS */ ({}), + /* SUPPORTED_VERSIONS */ ({{1, 0}}), + /* FLAG_GROUP */ xcv, + /* BITMASK_GROUP_ID */ BITMASK_NOT_YET_ALLOCATED, + /* BITMASK_BIT_POSITION*/ BITMASK_NOT_YET_ALLOCATED, + /* EXTRA_EXTENSION_FLAGS */ 0) + +DEFINE_RISCV_EXT( + /* NAME */ xcvmac, + /* UPPERCAE_NAME */ XCVMAC, + /* FULL_NAME */ "Core-V multiply-accumulate extension", + /* DESC */ "", + /* URL */ , + /* DEP_EXTS */ ({}), + /* SUPPORTED_VERSIONS */ ({{1, 0}}), + /* FLAG_GROUP */ xcv, + /* BITMASK_GROUP_ID */ BITMASK_NOT_YET_ALLOCATED, + /* BITMASK_BIT_POSITION*/ BITMASK_NOT_YET_ALLOCATED, + /* EXTRA_EXTENSION_FLAGS */ 0) + +DEFINE_RISCV_EXT( + /* NAME */ xcvsimd, + /* UPPERCAE_NAME */ XCVSIMD, + /* FULL_NAME */ "xcvsimd extension", + /* DESC */ "", + /* URL */ , + /* DEP_EXTS */ ({}), + /* SUPPORTED_VERSIONS */ ({{1, 0}}), + /* FLAG_GROUP */ xcv, + /* BITMASK_GROUP_ID */ BITMASK_NOT_YET_ALLOCATED, + /* BITMASK_BIT_POSITION*/ BITMASK_NOT_YET_ALLOCATED, + /* EXTRA_EXTENSION_FLAGS */ 0) diff --git a/gcc/config/riscv/riscv-ext-mips.def b/gcc/config/riscv/riscv-ext-mips.def new file mode 100644 index 0000000..5d7836d --- /dev/null +++ b/gcc/config/riscv/riscv-ext-mips.def @@ -0,0 +1,35 @@ +/* MIPS extension definition file for RISC-V. + Copyright (C) 2025 Free Software Foundation, Inc. + +This file is part of GCC. + +GCC is free software; you can redistribute it and/or modify +it under the terms of the GNU General Public License as published by +the Free Software Foundation; either version 3, or (at your option) +any later version. + +GCC is distributed in the hope that it will be useful, +but WITHOUT ANY WARRANTY; without even the implied warranty of +MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +GNU General Public License for more details. + +You should have received a copy of the GNU General Public License +along with GCC; see the file COPYING3. If not see +<http://www.gnu.org/licenses/>. + +Please run `make riscv-regen` in build folder to make sure updated anything. + +Format of DEFINE_RISCV_EXT, please refer to riscv-ext.def. */ + +DEFINE_RISCV_EXT ( + /* NAME. */ xmipscmov, + /* UPPERCASE_NAME. */ XMIPSCMOV, + /* FULL_NAME. */ "Mips conditional move extension", + /* DESC. */ "", + /* URL. */ , + /* DEP_EXTS. */ ({}), + /* SUPPORTED_VERSIONS. */ ({{1, 0}}), + /* FLAG_GROUP. */ xmips, + /* BITMASK_GROUP_ID. */ BITMASK_NOT_YET_ALLOCATED, + /* BITMASK_BIT_POSITION. */ BITMASK_NOT_YET_ALLOCATED, + /* EXTRA_EXTENSION_FLAGS. */ 0) diff --git a/gcc/config/riscv/riscv-ext-sifive.def b/gcc/config/riscv/riscv-ext-sifive.def new file mode 100644 index 0000000..c8d79da --- /dev/null +++ b/gcc/config/riscv/riscv-ext-sifive.def @@ -0,0 +1,87 @@ +/* SiFive extension definition file for RISC-V. + Copyright (C) 2025 Free Software Foundation, Inc. + +This file is part of GCC. + +GCC is free software; you can redistribute it and/or modify +it under the terms of the GNU General Public License as published by +the Free Software Foundation; either version 3, or (at your option) +any later version. + +GCC is distributed in the hope that it will be useful, +but WITHOUT ANY WARRANTY; without even the implied warranty of +MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +GNU General Public License for more details. + +You should have received a copy of the GNU General Public License +along with GCC; see the file COPYING3. If not see +<http://www.gnu.org/licenses/>. + +Please run `make riscv-regen` in build folder to make sure updated anything. + +Format of DEFINE_RISCV_EXT, please refer to riscv-ext.def. */ + +DEFINE_RISCV_EXT( + /* NAME */ xsfcease, + /* UPPERCAE_NAME */ XSFCEASE, + /* FULL_NAME */ "xsfcease extension", + /* DESC */ "", + /* URL */ , + /* DEP_EXTS */ ({}), + /* SUPPORTED_VERSIONS */ ({{1, 0}}), + /* FLAG_GROUP */ xsf, + /* BITMASK_GROUP_ID */ BITMASK_NOT_YET_ALLOCATED, + /* BITMASK_BIT_POSITION*/ BITMASK_NOT_YET_ALLOCATED, + /* EXTRA_EXTENSION_FLAGS */ 0) + +DEFINE_RISCV_EXT( + /* NAME */ xsfvcp, + /* UPPERCAE_NAME */ XSFVCP, + /* FULL_NAME */ "xsfvcp extension", + /* DESC */ "", + /* URL */ , + /* DEP_EXTS */ ({"zve32x"}), + /* SUPPORTED_VERSIONS */ ({{1, 0}}), + /* FLAG_GROUP */ xsf, + /* BITMASK_GROUP_ID */ BITMASK_NOT_YET_ALLOCATED, + /* BITMASK_BIT_POSITION*/ BITMASK_NOT_YET_ALLOCATED, + /* EXTRA_EXTENSION_FLAGS */ 0) + +DEFINE_RISCV_EXT( + /* NAME */ xsfvfnrclipxfqf, + /* UPPERCAE_NAME */ XSFVFNRCLIPXFQF, + /* FULL_NAME */ "xsfvfnrclipxfqf extension", + /* DESC */ "", + /* URL */ , + /* DEP_EXTS */ ({}), + /* SUPPORTED_VERSIONS */ ({{1, 0}}), + /* FLAG_GROUP */ xsf, + /* BITMASK_GROUP_ID */ BITMASK_NOT_YET_ALLOCATED, + /* BITMASK_BIT_POSITION*/ BITMASK_NOT_YET_ALLOCATED, + /* EXTRA_EXTENSION_FLAGS */ 0) + +DEFINE_RISCV_EXT( + /* NAME */ xsfvqmaccdod, + /* UPPERCAE_NAME */ XSFVQMACCDOD, + /* FULL_NAME */ "xsfvqmaccdod extension", + /* DESC */ "", + /* URL */ , + /* DEP_EXTS */ ({}), + /* SUPPORTED_VERSIONS */ ({{1, 0}}), + /* FLAG_GROUP */ xsf, + /* BITMASK_GROUP_ID */ BITMASK_NOT_YET_ALLOCATED, + /* BITMASK_BIT_POSITION*/ BITMASK_NOT_YET_ALLOCATED, + /* EXTRA_EXTENSION_FLAGS */ 0) + +DEFINE_RISCV_EXT( + /* NAME */ xsfvqmaccqoq, + /* UPPERCAE_NAME */ XSFVQMACCQOQ, + /* FULL_NAME */ "xsfvqmaccqoq extension", + /* DESC */ "", + /* URL */ , + /* DEP_EXTS */ ({}), + /* SUPPORTED_VERSIONS */ ({{1, 0}}), + /* FLAG_GROUP */ xsf, + /* BITMASK_GROUP_ID */ BITMASK_NOT_YET_ALLOCATED, + /* BITMASK_BIT_POSITION*/ BITMASK_NOT_YET_ALLOCATED, + /* EXTRA_EXTENSION_FLAGS */ 0) diff --git a/gcc/config/riscv/riscv-ext-thead.def b/gcc/config/riscv/riscv-ext-thead.def new file mode 100644 index 0000000..327d2ae --- /dev/null +++ b/gcc/config/riscv/riscv-ext-thead.def @@ -0,0 +1,191 @@ +/* T-head extension definition file for RISC-V. + Copyright (C) 2025 Free Software Foundation, Inc. + +This file is part of GCC. + +GCC is free software; you can redistribute it and/or modify +it under the terms of the GNU General Public License as published by +the Free Software Foundation; either version 3, or (at your option) +any later version. + +GCC is distributed in the hope that it will be useful, +but WITHOUT ANY WARRANTY; without even the implied warranty of +MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +GNU General Public License for more details. + +You should have received a copy of the GNU General Public License +along with GCC; see the file COPYING3. If not see +<http://www.gnu.org/licenses/>. + +Please run `make riscv-regen` in build folder to make sure updated anything. + +Format of DEFINE_RISCV_EXT, please refer to riscv-ext.def. */ + +DEFINE_RISCV_EXT( + /* NAME */ xtheadba, + /* UPPERCAE_NAME */ XTHEADBA, + /* FULL_NAME */ "T-head address calculation extension", + /* DESC */ "", + /* URL */ , + /* DEP_EXTS */ ({}), + /* SUPPORTED_VERSIONS */ ({{1, 0}}), + /* FLAG_GROUP */ xthead, + /* BITMASK_GROUP_ID */ BITMASK_NOT_YET_ALLOCATED, + /* BITMASK_BIT_POSITION*/ BITMASK_NOT_YET_ALLOCATED, + /* EXTRA_EXTENSION_FLAGS */ 0) + +DEFINE_RISCV_EXT( + /* NAME */ xtheadbb, + /* UPPERCAE_NAME */ XTHEADBB, + /* FULL_NAME */ "T-head basic bit-manipulation extension", + /* DESC */ "", + /* URL */ , + /* DEP_EXTS */ ({}), + /* SUPPORTED_VERSIONS */ ({{1, 0}}), + /* FLAG_GROUP */ xthead, + /* BITMASK_GROUP_ID */ BITMASK_NOT_YET_ALLOCATED, + /* BITMASK_BIT_POSITION*/ BITMASK_NOT_YET_ALLOCATED, + /* EXTRA_EXTENSION_FLAGS */ 0) + +DEFINE_RISCV_EXT( + /* NAME */ xtheadbs, + /* UPPERCAE_NAME */ XTHEADBS, + /* FULL_NAME */ "T-head single-bit instructions extension", + /* DESC */ "", + /* URL */ , + /* DEP_EXTS */ ({}), + /* SUPPORTED_VERSIONS */ ({{1, 0}}), + /* FLAG_GROUP */ xthead, + /* BITMASK_GROUP_ID */ BITMASK_NOT_YET_ALLOCATED, + /* BITMASK_BIT_POSITION*/ BITMASK_NOT_YET_ALLOCATED, + /* EXTRA_EXTENSION_FLAGS */ 0) + +DEFINE_RISCV_EXT( + /* NAME */ xtheadcmo, + /* UPPERCAE_NAME */ XTHEADCMO, + /* FULL_NAME */ "T-head cache management operations extension", + /* DESC */ "", + /* URL */ , + /* DEP_EXTS */ ({}), + /* SUPPORTED_VERSIONS */ ({{1, 0}}), + /* FLAG_GROUP */ xthead, + /* BITMASK_GROUP_ID */ BITMASK_NOT_YET_ALLOCATED, + /* BITMASK_BIT_POSITION*/ BITMASK_NOT_YET_ALLOCATED, + /* EXTRA_EXTENSION_FLAGS */ 0) + +DEFINE_RISCV_EXT( + /* NAME */ xtheadcondmov, + /* UPPERCAE_NAME */ XTHEADCONDMOV, + /* FULL_NAME */ "T-head conditional move extension", + /* DESC */ "", + /* URL */ , + /* DEP_EXTS */ ({}), + /* SUPPORTED_VERSIONS */ ({{1, 0}}), + /* FLAG_GROUP */ xthead, + /* BITMASK_GROUP_ID */ BITMASK_NOT_YET_ALLOCATED, + /* BITMASK_BIT_POSITION*/ BITMASK_NOT_YET_ALLOCATED, + /* EXTRA_EXTENSION_FLAGS */ 0) + +DEFINE_RISCV_EXT( + /* NAME */ xtheadfmemidx, + /* UPPERCAE_NAME */ XTHEADFMEMIDX, + /* FULL_NAME */ "T-head indexed memory operations for floating-point registers extension", + /* DESC */ "", + /* URL */ , + /* DEP_EXTS */ ({}), + /* SUPPORTED_VERSIONS */ ({{1, 0}}), + /* FLAG_GROUP */ xthead, + /* BITMASK_GROUP_ID */ BITMASK_NOT_YET_ALLOCATED, + /* BITMASK_BIT_POSITION*/ BITMASK_NOT_YET_ALLOCATED, + /* EXTRA_EXTENSION_FLAGS */ 0) + +DEFINE_RISCV_EXT( + /* NAME */ xtheadfmv, + /* UPPERCAE_NAME */ XTHEADFMV, + /* FULL_NAME */ "T-head double floating-point high-bit data transmission extension", + /* DESC */ "", + /* URL */ , + /* DEP_EXTS */ ({}), + /* SUPPORTED_VERSIONS */ ({{1, 0}}), + /* FLAG_GROUP */ xthead, + /* BITMASK_GROUP_ID */ BITMASK_NOT_YET_ALLOCATED, + /* BITMASK_BIT_POSITION*/ BITMASK_NOT_YET_ALLOCATED, + /* EXTRA_EXTENSION_FLAGS */ 0) + +DEFINE_RISCV_EXT( + /* NAME */ xtheadint, + /* UPPERCAE_NAME */ XTHEADINT, + /* FULL_NAME */ "T-head acceleration interruption extension", + /* DESC */ "", + /* URL */ , + /* DEP_EXTS */ ({}), + /* SUPPORTED_VERSIONS */ ({{1, 0}}), + /* FLAG_GROUP */ xthead, + /* BITMASK_GROUP_ID */ BITMASK_NOT_YET_ALLOCATED, + /* BITMASK_BIT_POSITION*/ BITMASK_NOT_YET_ALLOCATED, + /* EXTRA_EXTENSION_FLAGS */ 0) + +DEFINE_RISCV_EXT( + /* NAME */ xtheadmac, + /* UPPERCAE_NAME */ XTHEADMAC, + /* FULL_NAME */ "T-head multiply-accumulate extension", + /* DESC */ "", + /* URL */ , + /* DEP_EXTS */ ({}), + /* SUPPORTED_VERSIONS */ ({{1, 0}}), + /* FLAG_GROUP */ xthead, + /* BITMASK_GROUP_ID */ BITMASK_NOT_YET_ALLOCATED, + /* BITMASK_BIT_POSITION*/ BITMASK_NOT_YET_ALLOCATED, + /* EXTRA_EXTENSION_FLAGS */ 0) + +DEFINE_RISCV_EXT( + /* NAME */ xtheadmemidx, + /* UPPERCAE_NAME */ XTHEADMEMIDX, + /* FULL_NAME */ "T-head indexed memory operation extension", + /* DESC */ "", + /* URL */ , + /* DEP_EXTS */ ({}), + /* SUPPORTED_VERSIONS */ ({{1, 0}}), + /* FLAG_GROUP */ xthead, + /* BITMASK_GROUP_ID */ BITMASK_NOT_YET_ALLOCATED, + /* BITMASK_BIT_POSITION*/ BITMASK_NOT_YET_ALLOCATED, + /* EXTRA_EXTENSION_FLAGS */ 0) + +DEFINE_RISCV_EXT( + /* NAME */ xtheadmempair, + /* UPPERCAE_NAME */ XTHEADMEMPAIR, + /* FULL_NAME */ "T-head two-GPR memory operation extension", + /* DESC */ "", + /* URL */ , + /* DEP_EXTS */ ({}), + /* SUPPORTED_VERSIONS */ ({{1, 0}}), + /* FLAG_GROUP */ xthead, + /* BITMASK_GROUP_ID */ BITMASK_NOT_YET_ALLOCATED, + /* BITMASK_BIT_POSITION*/ BITMASK_NOT_YET_ALLOCATED, + /* EXTRA_EXTENSION_FLAGS */ 0) + +DEFINE_RISCV_EXT( + /* NAME */ xtheadsync, + /* UPPERCAE_NAME */ XTHEADSYNC, + /* FULL_NAME */ "T-head multi-core synchronization extension", + /* DESC */ "", + /* URL */ , + /* DEP_EXTS */ ({}), + /* SUPPORTED_VERSIONS */ ({{1, 0}}), + /* FLAG_GROUP */ xthead, + /* BITMASK_GROUP_ID */ BITMASK_NOT_YET_ALLOCATED, + /* BITMASK_BIT_POSITION*/ BITMASK_NOT_YET_ALLOCATED, + /* EXTRA_EXTENSION_FLAGS */ 0) + +DEFINE_RISCV_EXT( + /* NAME */ xtheadvector, + /* UPPERCAE_NAME */ XTHEADVECTOR, + /* FULL_NAME */ "xtheadvector extension", + /* DESC */ "", + /* URL */ , + /* DEP_EXTS */ ({}), + /* SUPPORTED_VERSIONS */ ({{1, 0}}), + /* FLAG_GROUP */ xthead, + /* BITMASK_GROUP_ID */ BITMASK_NOT_YET_ALLOCATED, + /* BITMASK_BIT_POSITION*/ BITMASK_NOT_YET_ALLOCATED, + /* EXTRA_EXTENSION_FLAGS */ 0) diff --git a/gcc/config/riscv/riscv-ext-ventana.def b/gcc/config/riscv/riscv-ext-ventana.def new file mode 100644 index 0000000..deed47f --- /dev/null +++ b/gcc/config/riscv/riscv-ext-ventana.def @@ -0,0 +1,35 @@ +/* Ventana extension definition file for RISC-V. + Copyright (C) 2025 Free Software Foundation, Inc. + +This file is part of GCC. + +GCC is free software; you can redistribute it and/or modify +it under the terms of the GNU General Public License as published by +the Free Software Foundation; either version 3, or (at your option) +any later version. + +GCC is distributed in the hope that it will be useful, +but WITHOUT ANY WARRANTY; without even the implied warranty of +MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +GNU General Public License for more details. + +You should have received a copy of the GNU General Public License +along with GCC; see the file COPYING3. If not see +<http://www.gnu.org/licenses/>. + +Please run `make riscv-regen` in build folder to make sure updated anything. + +Format of DEFINE_RISCV_EXT, please refer to riscv-ext.def. */ + +DEFINE_RISCV_EXT( + /* NAME */ xventanacondops, + /* UPPERCAE_NAME */ XVENTANACONDOPS, + /* FULL_NAME */ "Ventana integer conditional operations extension", + /* DESC */ "", + /* URL */ , + /* DEP_EXTS */ ({}), + /* SUPPORTED_VERSIONS */ ({{1, 0}}), + /* FLAG_GROUP */ xventana, + /* BITMASK_GROUP_ID */ BITMASK_NOT_YET_ALLOCATED, + /* BITMASK_BIT_POSITION*/ BITMASK_NOT_YET_ALLOCATED, + /* EXTRA_EXTENSION_FLAGS */ 0) diff --git a/gcc/config/riscv/riscv-ext.def b/gcc/config/riscv/riscv-ext.def new file mode 100644 index 0000000..6fc6d38 --- /dev/null +++ b/gcc/config/riscv/riscv-ext.def @@ -0,0 +1,2085 @@ +/* RISC-V extension definition file for RISC-V. + Copyright (C) 2025 Free Software Foundation, Inc. + +This file is part of GCC. + +GCC is free software; you can redistribute it and/or modify +it under the terms of the GNU General Public License as published by +the Free Software Foundation; either version 3, or (at your option) +any later version. + +GCC is distributed in the hope that it will be useful, +but WITHOUT ANY WARRANTY; without even the implied warranty of +MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +GNU General Public License for more details. + +You should have received a copy of the GNU General Public License +along with GCC; see the file COPYING3. If not see +<http://www.gnu.org/licenses/>. + +Please run `make riscv-regen` in build folder to make sure updated anything. + +Format of DEFINE_RISCV_EXT: + NAME: + The name of the extension, e.g. "i". + UPPERCASE_NAME: + The name of the extension in uppercase, e.g. "ZBA", this used + for generate TARGET_<ext-name> marco and MASK_<ext-name> macro. + For those extension only named with single letter, it should also come with + 'RV', e.g. 'v' should use 'RVV' here. + Some of old extension like 'i' and 'm' are not follow the rule. + FULL_NAME: + The full name of the extension, e.g. "Base integer extension". + DESC: + A short description of the extension, this will used during generating + documentation, GNU Texinfo format can be used this field. + URL: + A URL for the extension. + DEP_EXTS: + A list of dependent extensions, this is a list of strings or + a list of tuples. The first element of the tuple is the name + of the extension and the second element is a function that + takes a subset_list and returns true if the extension should be added as + a dependent extension, `c` and `zca` are examples of this. + SUPPORTED_VERSIONS: + A list of tuples, each tuple contains the major version number, minor + version number and the class of the specification. The version number is a + list of integers, e.g. {2, 0} for version 2.0. The class is + a string, e.g. "ISA_SPEC_CLASS_20191213", the class of the + specification is not required for any new extension. + FLAG_GROUP: + The group of the extension, this is used to group extensions + together. The group is a string, e.g. "base", "zi", "zm", "za", "zf", + "zc", "zb", "zk" and "zi". + This should be auto generated in theory in some day... + BITMASK_GROUP_ID: + The group id of the extension for the __riscv_feature_bits. + this field should sync with riscv-c-api-doc, and keep BITMASK_NOT_YET_ALLOCATED + if not got allocated. + https://github.com/riscv-non-isa/riscv-c-api-doc/blob/main/src/c-api.adoc#extension-bitmask-definitions + BITMASK_BIT_POSITION: + The bit position of the extension for the __riscv_feature_bits. + this field should sync with riscv-c-api-doc, and keep BITMASK_NOT_YET_ALLOCATED + if not got allocated. + https://github.com/riscv-non-isa/riscv-c-api-doc/blob/main/src/c-api.adoc#extension-bitmask-definitions + EXTRA_EXTENSION_FLAGS: + Extra flags for the extension, this is a bitmask of the + extra flags. The extra flags are: + - EXT_FLAG_MACRO: Set this flag if this extension is just a macro of set of + extensions, and not define any new instrcutions, new CSRs or new + behaviors, the example is `b` extension is just a macro of `zba`, `zbb` + and `zbs`. +*/ + +DEFINE_RISCV_EXT( + /* NAME */ e, + /* UPPERCASE_NAME */ RVE, + /* FULL_NAME */ "Reduced base integer extension", + /* DESC */ "", + /* URL */ , + /* DEP_EXTS */ ({}), + /* SUPPORTED_VERSIONS */ ({{2, 0}}), + /* FLAG_GROUP */ base, + /* BITMASK_GROUP_ID */ BITMASK_NOT_YET_ALLOCATED, + /* BITMASK_BIT_POSITION*/ BITMASK_NOT_YET_ALLOCATED, + /* EXTRA_EXTENSION_FLAGS */ 0) + +DEFINE_RISCV_EXT( + /* NAME */ i, + /* UPPERCASE_NAME */ RVI, + /* FULL_NAME */ "Base integer extension", + /* DESC */ "", + /* URL */ , + /* DEP_EXTS */ ({}), + /* SUPPORTED_VERSIONS */ ({{2, 1, ISA_SPEC_CLASS_20191213}, + {2, 1, ISA_SPEC_CLASS_20190608}, + {2, 0, ISA_SPEC_CLASS_2P2}}), + /* FLAG_GROUP */ base, + /* BITMASK_GROUP_ID */ 0, + /* BITMASK_BIT_POSITION*/ 8, + /* EXTRA_EXTENSION_FLAGS */ 0) + +DEFINE_RISCV_EXT( + /* NAME */ m, + /* UPPERCASE_NAME */ MUL, + /* FULL_NAME */ "Integer multiplication and division extension", + /* DESC */ "", + /* URL */ , + /* DEP_EXTS */ ({"zmmul"}), + /* SUPPORTED_VERSIONS */ ({{2, 0}}), + /* FLAG_GROUP */ base, + /* BITMASK_GROUP_ID */ 0, + /* BITMASK_BIT_POSITION*/ 12, + /* EXTRA_EXTENSION_FLAGS */ 0) + +DEFINE_RISCV_EXT( + /* NAME */ a, + /* UPPERCASE_NAME */ ATOMIC, + /* FULL_NAME */ "Atomic extension", + /* DESC */ "", + /* URL */ , + /* DEP_EXTS */ ({"zaamo", "zalrsc"}), + /* SUPPORTED_VERSIONS */ ({{2, 1, ISA_SPEC_CLASS_20191213}, + {2, 0, ISA_SPEC_CLASS_20190608}, + {2, 0, ISA_SPEC_CLASS_2P2}}), + /* FLAG_GROUP */ base, + /* BITMASK_GROUP_ID */ 0, + /* BITMASK_BIT_POSITION*/ 0, + /* EXTRA_EXTENSION_FLAGS */ EXT_FLAG_MACRO) + +DEFINE_RISCV_EXT( + /* NAME */ f, + /* UPPERCASE_NAME */ HARD_FLOAT, + /* FULL_NAME */ "Single-precision floating-point extension", + /* DESC */ "", + /* URL */ , + /* DEP_EXTS */ ({"zicsr"}), + /* SUPPORTED_VERSIONS */ ({{2, 2, ISA_SPEC_CLASS_20191213}, + {2, 2, ISA_SPEC_CLASS_20190608}, + {2, 0, ISA_SPEC_CLASS_2P2}}), + /* FLAG_GROUP */ base, + /* BITMASK_GROUP_ID */ 0, + /* BITMASK_BIT_POSITION*/ 5, + /* EXTRA_EXTENSION_FLAGS */ 0) + +DEFINE_RISCV_EXT( + /* NAME */ d, + /* UPPERCASE_NAME */ DOUBLE_FLOAT, + /* FULL_NAME */ "Double-precision floating-point extension", + /* DESC */ "", + /* URL */ , + /* DEP_EXTS */ ({"f", "zicsr"}), + /* SUPPORTED_VERSIONS */ ({{2, 2, ISA_SPEC_CLASS_20191213}, + {2, 2, ISA_SPEC_CLASS_20190608}, + {2, 0, ISA_SPEC_CLASS_2P2}}), + /* FLAG_GROUP */ base, + /* BITMASK_GROUP_ID */ 0, + /* BITMASK_BIT_POSITION*/ 3, + /* EXTRA_EXTENSION_FLAGS */ 0) + +DEFINE_RISCV_EXT( + /* NAME */ c, + /* UPPERCASE_NAME */ RVC, + /* FULL_NAME */ "Compressed extension", + /* DESC */ "", + /* URL */ , + /* DEP_EXTS */ ({"zca", + {"zcf", + [] (const riscv_subset_list *subset_list) -> bool + { + return subset_list->xlen () == 32 + && subset_list->lookup ("f"); + }}, + {"zcd", + [] (const riscv_subset_list *subset_list) -> bool + { + return subset_list->lookup ("d"); + }}}), + /* SUPPORTED_VERSIONS */ ({{2, 0}}), + /* FLAG_GROUP */ base, + /* BITMASK_GROUP_ID */ 0, + /* BITMASK_BIT_POSITION*/ 2, + /* EXTRA_EXTENSION_FLAGS */ 0) + +DEFINE_RISCV_EXT( + /* NAME */ b, + /* UPPERCASE_NAME */ RVB, + /* FULL_NAME */ "b extension", + /* DESC */ "", + /* URL */ , + /* DEP_EXTS */ ({"zba", "zbb", "zbs"}), + /* SUPPORTED_VERSIONS */ ({{1, 0}}), + /* FLAG_GROUP */ base, + /* BITMASK_GROUP_ID */ BITMASK_NOT_YET_ALLOCATED, + /* BITMASK_BIT_POSITION*/ BITMASK_NOT_YET_ALLOCATED, + /* EXTRA_EXTENSION_FLAGS */ EXT_FLAG_MACRO) + +DEFINE_RISCV_EXT( + /* NAME */ v, + /* UPPERCASE_NAME */ RVV, + /* FULL_NAME */ "Vector extension", + /* DESC */ "", + /* URL */ , + /* DEP_EXTS */ ({"zvl128b", "zve64d"}), + /* SUPPORTED_VERSIONS */ ({{1, 0}}), + /* FLAG_GROUP */ base, + /* BITMASK_GROUP_ID */ 0, + /* BITMASK_BIT_POSITION*/ 21, + /* EXTRA_EXTENSION_FLAGS */ 0) + +DEFINE_RISCV_EXT( + /* NAME */ h, + /* UPPERCASE_NAME */ RVH, + /* FULL_NAME */ "Hypervisor extension", + /* DESC */ "", + /* URL */ , + /* DEP_EXTS */ ({}), + /* SUPPORTED_VERSIONS */ ({{1, 0}}), + /* FLAG_GROUP */ base, + /* BITMASK_GROUP_ID */ BITMASK_NOT_YET_ALLOCATED, + /* BITMASK_BIT_POSITION*/ BITMASK_NOT_YET_ALLOCATED, + /* EXTRA_EXTENSION_FLAGS */ 0) + +DEFINE_RISCV_EXT( + /* NAME */ zic64b, + /* UPPERCASE_NAME */ ZIC64B, + /* FULL_NAME */ "Cache block size isf 64 bytes", + /* DESC */ "", + /* URL */ , + /* DEP_EXTS */ ({}), + /* SUPPORTED_VERSIONS */ ({{1, 0}}), + /* FLAG_GROUP */ zi, + /* BITMASK_GROUP_ID */ BITMASK_NOT_YET_ALLOCATED, + /* BITMASK_BIT_POSITION*/ BITMASK_NOT_YET_ALLOCATED, + /* EXTRA_EXTENSION_FLAGS */ 0) + +DEFINE_RISCV_EXT( + /* NAME */ zicbom, + /* UPPERCASE_NAME */ ZICBOM, + /* FULL_NAME */ "Cache-block management extension", + /* DESC */ "", + /* URL */ , + /* DEP_EXTS */ ({}), + /* SUPPORTED_VERSIONS */ ({{1, 0}}), + /* FLAG_GROUP */ zi, + /* BITMASK_GROUP_ID */ BITMASK_NOT_YET_ALLOCATED, + /* BITMASK_BIT_POSITION*/ BITMASK_NOT_YET_ALLOCATED, + /* EXTRA_EXTENSION_FLAGS */ 0) + +DEFINE_RISCV_EXT( + /* NAME */ zicbop, + /* UPPERCASE_NAME */ ZICBOP, + /* FULL_NAME */ "Cache-block prefetch extension", + /* DESC */ "", + /* URL */ , + /* DEP_EXTS */ ({}), + /* SUPPORTED_VERSIONS */ ({{1, 0}}), + /* FLAG_GROUP */ zi, + /* BITMASK_GROUP_ID */ BITMASK_NOT_YET_ALLOCATED, + /* BITMASK_BIT_POSITION*/ BITMASK_NOT_YET_ALLOCATED, + /* EXTRA_EXTENSION_FLAGS */ 0) + +DEFINE_RISCV_EXT( + /* NAME */ zicboz, + /* UPPERCASE_NAME */ ZICBOZ, + /* FULL_NAME */ "Cache-block zero extension", + /* DESC */ "", + /* URL */ , + /* DEP_EXTS */ ({}), + /* SUPPORTED_VERSIONS */ ({{1, 0}}), + /* FLAG_GROUP */ zi, + /* BITMASK_GROUP_ID */ 0, + /* BITMASK_BIT_POSITION*/ 37, + /* EXTRA_EXTENSION_FLAGS */ 0) + +DEFINE_RISCV_EXT( + /* NAME */ ziccamoa, + /* UPPERCASE_NAME */ ZICCAMOA, + /* FULL_NAME */ "Main memory supports all atomics in A", + /* DESC */ "", + /* URL */ , + /* DEP_EXTS */ ({}), + /* SUPPORTED_VERSIONS */ ({{1, 0}}), + /* FLAG_GROUP */ zi, + /* BITMASK_GROUP_ID */ BITMASK_NOT_YET_ALLOCATED, + /* BITMASK_BIT_POSITION*/ BITMASK_NOT_YET_ALLOCATED, + /* EXTRA_EXTENSION_FLAGS */ 0) + +DEFINE_RISCV_EXT( + /* NAME */ ziccif, + /* UPPERCASE_NAME */ ZICCIF, + /* FULL_NAME */ "Main memory supports instruction fetch with atomicity requirement", + /* DESC */ "", + /* URL */ , + /* DEP_EXTS */ ({}), + /* SUPPORTED_VERSIONS */ ({{1, 0}}), + /* FLAG_GROUP */ zi, + /* BITMASK_GROUP_ID */ BITMASK_NOT_YET_ALLOCATED, + /* BITMASK_BIT_POSITION*/ BITMASK_NOT_YET_ALLOCATED, + /* EXTRA_EXTENSION_FLAGS */ 0) + +DEFINE_RISCV_EXT( + /* NAME */ zicclsm, + /* UPPERCASE_NAME */ ZICCLSM, + /* FULL_NAME */ "Main memory supports misaligned loads/stores", + /* DESC */ "", + /* URL */ , + /* DEP_EXTS */ ({}), + /* SUPPORTED_VERSIONS */ ({{1, 0}}), + /* FLAG_GROUP */ zi, + /* BITMASK_GROUP_ID */ BITMASK_NOT_YET_ALLOCATED, + /* BITMASK_BIT_POSITION*/ BITMASK_NOT_YET_ALLOCATED, + /* EXTRA_EXTENSION_FLAGS */ 0) + +DEFINE_RISCV_EXT( + /* NAME */ ziccrse, + /* UPPERCASE_NAME */ ZICCRSE, + /* FULL_NAME */ "Main memory supports forward progress on LR/SC sequences", + /* DESC */ "", + /* URL */ , + /* DEP_EXTS */ ({}), + /* SUPPORTED_VERSIONS */ ({{1, 0}}), + /* FLAG_GROUP */ zi, + /* BITMASK_GROUP_ID */ BITMASK_NOT_YET_ALLOCATED, + /* BITMASK_BIT_POSITION*/ BITMASK_NOT_YET_ALLOCATED, + /* EXTRA_EXTENSION_FLAGS */ 0) + +DEFINE_RISCV_EXT( + /* NAME */ zicfilp, + /* UPPERCASE_NAME */ ZICFILP, + /* FULL_NAME */ "zicfilp extension", + /* DESC */ "", + /* URL */ , + /* DEP_EXTS */ ({"zicsr"}), + /* SUPPORTED_VERSIONS */ ({{1, 0}}), + /* FLAG_GROUP */ zi, + /* BITMASK_GROUP_ID */ BITMASK_NOT_YET_ALLOCATED, + /* BITMASK_BIT_POSITION*/ BITMASK_NOT_YET_ALLOCATED, + /* EXTRA_EXTENSION_FLAGS */ 0) + +DEFINE_RISCV_EXT( + /* NAME */ zicfiss, + /* UPPERCASE_NAME */ ZICFISS, + /* FULL_NAME */ "zicfiss extension", + /* DESC */ "", + /* URL */ , + /* DEP_EXTS */ ({"zicsr", "zimop"}), + /* SUPPORTED_VERSIONS */ ({{1, 0}}), + /* FLAG_GROUP */ zi, + /* BITMASK_GROUP_ID */ BITMASK_NOT_YET_ALLOCATED, + /* BITMASK_BIT_POSITION*/ BITMASK_NOT_YET_ALLOCATED, + /* EXTRA_EXTENSION_FLAGS */ 0) + +DEFINE_RISCV_EXT( + /* NAME */ zicntr, + /* UPPERCASE_NAME */ ZICNTR, + /* FULL_NAME */ "Standard extension for base counters and timers", + /* DESC */ "", + /* URL */ , + /* DEP_EXTS */ ({}), + /* SUPPORTED_VERSIONS */ ({{2, 0}}), + /* FLAG_GROUP */ zi, + /* BITMASK_GROUP_ID */ BITMASK_NOT_YET_ALLOCATED, + /* BITMASK_BIT_POSITION*/ BITMASK_NOT_YET_ALLOCATED, + /* EXTRA_EXTENSION_FLAGS */ 0) + +DEFINE_RISCV_EXT( + /* NAME */ zicond, + /* UPPERCASE_NAME */ ZICOND, + /* FULL_NAME */ "Integer conditional operations extension", + /* DESC */ "", + /* URL */ , + /* DEP_EXTS */ ({}), + /* SUPPORTED_VERSIONS */ ({{1, 0}}), + /* FLAG_GROUP */ zi, + /* BITMASK_GROUP_ID */ 0, + /* BITMASK_BIT_POSITION*/ 38, + /* EXTRA_EXTENSION_FLAGS */ 0) + +DEFINE_RISCV_EXT( + /* NAME */ zicsr, + /* UPPERCASE_NAME */ ZICSR, + /* FULL_NAME */ "Control and status register access extension", + /* DESC */ "", + /* URL */ , + /* DEP_EXTS */ ({}), + /* SUPPORTED_VERSIONS */ ({{2, 0}}), + /* FLAG_GROUP */ zi, + /* BITMASK_GROUP_ID */ BITMASK_NOT_YET_ALLOCATED, + /* BITMASK_BIT_POSITION*/ BITMASK_NOT_YET_ALLOCATED, + /* EXTRA_EXTENSION_FLAGS */ 0) + +DEFINE_RISCV_EXT( + /* NAME */ zifencei, + /* UPPERCASE_NAME */ ZIFENCEI, + /* FULL_NAME */ "Instruction-fetch fence extension", + /* DESC */ "", + /* URL */ , + /* DEP_EXTS */ ({}), + /* SUPPORTED_VERSIONS */ ({{2, 0}}), + /* FLAG_GROUP */ zi, + /* BITMASK_GROUP_ID */ BITMASK_NOT_YET_ALLOCATED, + /* BITMASK_BIT_POSITION*/ BITMASK_NOT_YET_ALLOCATED, + /* EXTRA_EXTENSION_FLAGS */ 0) + +DEFINE_RISCV_EXT( + /* NAME */ zihintntl, + /* UPPERCASE_NAME */ ZIHINTNTL, + /* FULL_NAME */ "Non-temporal locality hints extension", + /* DESC */ "", + /* URL */ , + /* DEP_EXTS */ ({}), + /* SUPPORTED_VERSIONS */ ({{1, 0}}), + /* FLAG_GROUP */ zi, + /* BITMASK_GROUP_ID */ 0, + /* BITMASK_BIT_POSITION*/ 39, + /* EXTRA_EXTENSION_FLAGS */ 0) + +DEFINE_RISCV_EXT( + /* NAME */ zihintpause, + /* UPPERCASE_NAME */ ZIHINTPAUSE, + /* FULL_NAME */ "Pause hint extension", + /* DESC */ "", + /* URL */ , + /* DEP_EXTS */ ({}), + /* SUPPORTED_VERSIONS */ ({{2, 0}}), + /* FLAG_GROUP */ zi, + /* BITMASK_GROUP_ID */ 0, + /* BITMASK_BIT_POSITION*/ 40, + /* EXTRA_EXTENSION_FLAGS */ 0) + +DEFINE_RISCV_EXT( + /* NAME */ zihpm, + /* UPPERCASE_NAME */ ZIHPM, + /* FULL_NAME */ "Standard extension for hardware performance counters", + /* DESC */ "", + /* URL */ , + /* DEP_EXTS */ ({}), + /* SUPPORTED_VERSIONS */ ({{2, 0}}), + /* FLAG_GROUP */ zi, + /* BITMASK_GROUP_ID */ BITMASK_NOT_YET_ALLOCATED, + /* BITMASK_BIT_POSITION*/ BITMASK_NOT_YET_ALLOCATED, + /* EXTRA_EXTENSION_FLAGS */ 0) + +DEFINE_RISCV_EXT( + /* NAME */ zimop, + /* UPPERCASE_NAME */ ZIMOP, + /* FULL_NAME */ "zimop extension", + /* DESC */ "", + /* URL */ , + /* DEP_EXTS */ ({}), + /* SUPPORTED_VERSIONS */ ({{1, 0}}), + /* FLAG_GROUP */ zi, + /* BITMASK_GROUP_ID */ 1, + /* BITMASK_BIT_POSITION*/ 1, + /* EXTRA_EXTENSION_FLAGS */ 0) + +DEFINE_RISCV_EXT( + /* NAME */ zilsd, + /* UPPERCASE_NAME */ ZILSD, + /* FULL_NAME */ "Load/Store pair instructions extension", + /* DESC */ "", + /* URL */ , + /* DEP_EXTS */ ({}), + /* SUPPORTED_VERSIONS */ ({{1, 0}}), + /* FLAG_GROUP */ zi, + /* BITMASK_GROUP_ID */ 1, + /* BITMASK_BIT_POSITION*/ 1, + /* EXTRA_EXTENSION_FLAGS */ 0) + +DEFINE_RISCV_EXT( + /* NAME */ zmmul, + /* UPPERCASE_NAME */ ZMMUL, + /* FULL_NAME */ "Integer multiplication extension", + /* DESC */ "", + /* URL */ , + /* DEP_EXTS */ ({}), + /* SUPPORTED_VERSIONS */ ({{1, 0}}), + /* FLAG_GROUP */ zm, + /* BITMASK_GROUP_ID */ BITMASK_NOT_YET_ALLOCATED, + /* BITMASK_BIT_POSITION*/ BITMASK_NOT_YET_ALLOCATED, + /* EXTRA_EXTENSION_FLAGS */ 0) + +DEFINE_RISCV_EXT( + /* NAME */ za128rs, + /* UPPERCASE_NAME */ ZA128RS, + /* FULL_NAME */ "Reservation set size of 128 bytes", + /* DESC */ "", + /* URL */ , + /* DEP_EXTS */ ({}), + /* SUPPORTED_VERSIONS */ ({{1, 0}}), + /* FLAG_GROUP */ za, + /* BITMASK_GROUP_ID */ BITMASK_NOT_YET_ALLOCATED, + /* BITMASK_BIT_POSITION*/ BITMASK_NOT_YET_ALLOCATED, + /* EXTRA_EXTENSION_FLAGS */ 0) + +DEFINE_RISCV_EXT( + /* NAME */ za64rs, + /* UPPERCASE_NAME */ ZA64RS, + /* FULL_NAME */ "Reservation set size of 64 bytes", + /* DESC */ "", + /* URL */ , + /* DEP_EXTS */ ({}), + /* SUPPORTED_VERSIONS */ ({{1, 0}}), + /* FLAG_GROUP */ za, + /* BITMASK_GROUP_ID */ BITMASK_NOT_YET_ALLOCATED, + /* BITMASK_BIT_POSITION*/ BITMASK_NOT_YET_ALLOCATED, + /* EXTRA_EXTENSION_FLAGS */ 0) + +DEFINE_RISCV_EXT( + /* NAME */ zaamo, + /* UPPERCASE_NAME */ ZAAMO, + /* FULL_NAME */ "zaamo extension", + /* DESC */ "", + /* URL */ , + /* DEP_EXTS */ ({}), + /* SUPPORTED_VERSIONS */ ({{1, 0}}), + /* FLAG_GROUP */ za, + /* BITMASK_GROUP_ID */ BITMASK_NOT_YET_ALLOCATED, + /* BITMASK_BIT_POSITION*/ BITMASK_NOT_YET_ALLOCATED, + /* EXTRA_EXTENSION_FLAGS */ 0) + +DEFINE_RISCV_EXT( + /* NAME */ zabha, + /* UPPERCASE_NAME */ ZABHA, + /* FULL_NAME */ "zabha extension", + /* DESC */ "", + /* URL */ , + /* DEP_EXTS */ ({"zaamo"}), + /* SUPPORTED_VERSIONS */ ({{1, 0}}), + /* FLAG_GROUP */ za, + /* BITMASK_GROUP_ID */ BITMASK_NOT_YET_ALLOCATED, + /* BITMASK_BIT_POSITION*/ BITMASK_NOT_YET_ALLOCATED, + /* EXTRA_EXTENSION_FLAGS */ 0) + +DEFINE_RISCV_EXT( + /* NAME */ zacas, + /* UPPERCASE_NAME */ ZACAS, + /* FULL_NAME */ "zacas extension", + /* DESC */ "", + /* URL */ , + /* DEP_EXTS */ ({"zaamo"}), + /* SUPPORTED_VERSIONS */ ({{1, 0}}), + /* FLAG_GROUP */ za, + /* BITMASK_GROUP_ID */ 0, + /* BITMASK_BIT_POSITION*/ 26, + /* EXTRA_EXTENSION_FLAGS */ 0) + +DEFINE_RISCV_EXT( + /* NAME */ zalrsc, + /* UPPERCASE_NAME */ ZALRSC, + /* FULL_NAME */ "zalrsc extension", + /* DESC */ "", + /* URL */ , + /* DEP_EXTS */ ({}), + /* SUPPORTED_VERSIONS */ ({{1, 0}}), + /* FLAG_GROUP */ za, + /* BITMASK_GROUP_ID */ BITMASK_NOT_YET_ALLOCATED, + /* BITMASK_BIT_POSITION*/ BITMASK_NOT_YET_ALLOCATED, + /* EXTRA_EXTENSION_FLAGS */ 0) + +DEFINE_RISCV_EXT( + /* NAME */ zawrs, + /* UPPERCASE_NAME */ ZAWRS, + /* FULL_NAME */ "Wait-on-reservation-set extension", + /* DESC */ "", + /* URL */ , + /* DEP_EXTS */ ({"zalrsc"}), + /* SUPPORTED_VERSIONS */ ({{1, 0}}), + /* FLAG_GROUP */ za, + /* BITMASK_GROUP_ID */ 1, + /* BITMASK_BIT_POSITION*/ 7, + /* EXTRA_EXTENSION_FLAGS */ 0) + +DEFINE_RISCV_EXT( + /* NAME */ zama16b, + /* UPPERCASE_NAME */ ZAMA16B, + /* FULL_NAME */ "Zama16b extension", + /* DESC */ "Misaligned loads, stores, and AMOs to main memory regions that do" + " not cross a naturally aligned 16-byte boundary are atomic.", + /* URL */ , + /* DEP_EXTS */ ({}), + /* SUPPORTED_VERSIONS */ ({{1, 0}}), + /* FLAG_GROUP */ za, + /* BITMASK_GROUP_ID */ BITMASK_NOT_YET_ALLOCATED, + /* BITMASK_BIT_POSITION*/ BITMASK_NOT_YET_ALLOCATED, + /* EXTRA_EXTENSION_FLAGS */ 0) + +DEFINE_RISCV_EXT( + /* NAME */ zfa, + /* UPPERCASE_NAME */ ZFA, + /* FULL_NAME */ "Additional floating-point extension", + /* DESC */ "", + /* URL */ , + /* DEP_EXTS */ ({"f"}), + /* SUPPORTED_VERSIONS */ ({{1, 0}}), + /* FLAG_GROUP */ zf, + /* BITMASK_GROUP_ID */ 0, + /* BITMASK_BIT_POSITION*/ 34, + /* EXTRA_EXTENSION_FLAGS */ 0) + +DEFINE_RISCV_EXT( + /* NAME */ zfbfmin, + /* UPPERCASE_NAME */ ZFBFMIN, + /* FULL_NAME */ "zfbfmin extension", + /* DESC */ "", + /* URL */ , + /* DEP_EXTS */ ({"zfhmin"}), + /* SUPPORTED_VERSIONS */ ({{1, 0}}), + /* FLAG_GROUP */ zf, + /* BITMASK_GROUP_ID */ BITMASK_NOT_YET_ALLOCATED, + /* BITMASK_BIT_POSITION*/ BITMASK_NOT_YET_ALLOCATED, + /* EXTRA_EXTENSION_FLAGS */ 0) + +DEFINE_RISCV_EXT( + /* NAME */ zfh, + /* UPPERCASE_NAME */ ZFH, + /* FULL_NAME */ "Half-precision floating-point extension", + /* DESC */ "", + /* URL */ , + /* DEP_EXTS */ ({"zfhmin"}), + /* SUPPORTED_VERSIONS */ ({{1, 0}}), + /* FLAG_GROUP */ zf, + /* BITMASK_GROUP_ID */ 0, + /* BITMASK_BIT_POSITION*/ 35, + /* EXTRA_EXTENSION_FLAGS */ 0) + +DEFINE_RISCV_EXT( + /* NAME */ zfhmin, + /* UPPERCASE_NAME */ ZFHMIN, + /* FULL_NAME */ "Minimal half-precision floating-point extension", + /* DESC */ "", + /* URL */ , + /* DEP_EXTS */ ({"f"}), + /* SUPPORTED_VERSIONS */ ({{1, 0}}), + /* FLAG_GROUP */ zf, + /* BITMASK_GROUP_ID */ 0, + /* BITMASK_BIT_POSITION*/ 36, + /* EXTRA_EXTENSION_FLAGS */ 0) + +DEFINE_RISCV_EXT( + /* NAME */ zfinx, + /* UPPERCASE_NAME */ ZFINX, + /* FULL_NAME */ "Single-precision floating-point in integer registers extension", + /* DESC */ "", + /* URL */ , + /* DEP_EXTS */ ({"zicsr"}), + /* SUPPORTED_VERSIONS */ ({{1, 0}}), + /* FLAG_GROUP */ zinx, + /* BITMASK_GROUP_ID */ BITMASK_NOT_YET_ALLOCATED, + /* BITMASK_BIT_POSITION*/ BITMASK_NOT_YET_ALLOCATED, + /* EXTRA_EXTENSION_FLAGS */ 0) + +DEFINE_RISCV_EXT( + /* NAME */ zdinx, + /* UPPERCASE_NAME */ ZDINX, + /* FULL_NAME */ "Double-precision floating-point in integer registers extension", + /* DESC */ "", + /* URL */ , + /* DEP_EXTS */ ({"zfinx", "zicsr"}), + /* SUPPORTED_VERSIONS */ ({{1, 0}}), + /* FLAG_GROUP */ zinx, + /* BITMASK_GROUP_ID */ BITMASK_NOT_YET_ALLOCATED, + /* BITMASK_BIT_POSITION*/ BITMASK_NOT_YET_ALLOCATED, + /* EXTRA_EXTENSION_FLAGS */ 0) + +DEFINE_RISCV_EXT( + /* NAME */ zca, + /* UPPERCASE_NAME */ ZCA, + /* FULL_NAME */ "Integer compressed instruction extension", + /* DESC */ "", + /* URL */ , + /* DEP_EXTS */ ({{"c", +[] (const riscv_subset_list *subset_list) -> bool +{ + /* For RV32 Zca implies C for one of these combinations of + extensions: Zca, F_Zca_Zcf and FD_Zca_Zcf_Zcd. */ + if (subset_list->xlen () == 32) + { + if (subset_list->lookup ("d")) + return subset_list->lookup ("zcf") && subset_list->lookup ("zcd"); + + if (subset_list->lookup ("f")) + return subset_list->lookup ("zcf"); + + return true; + } + + /* For RV64 Zca implies C for one of these combinations of + extensions: Zca and FD_Zca_Zcd (Zcf is not available + for RV64). */ + if (subset_list->xlen () == 64) + { + if (subset_list->lookup ("d")) + return subset_list->lookup ("zcd"); + + return true; + } + + /* Do nothing for future RV128 specification. Behaviour + for this case is not yet well defined. */ + return false; + +}}}), + /* SUPPORTED_VERSIONS */ ({{1, 0}}), + /* FLAG_GROUP */ zc, + /* BITMASK_GROUP_ID */ 1, + /* BITMASK_BIT_POSITION*/ 2, + /* EXTRA_EXTENSION_FLAGS */ 0) + +DEFINE_RISCV_EXT( + /* NAME */ zcb, + /* UPPERCASE_NAME */ ZCB, + /* FULL_NAME */ "Simple compressed instruction extension", + /* DESC */ "", + /* URL */ , + /* DEP_EXTS */ ({"zca"}), + /* SUPPORTED_VERSIONS */ ({{1, 0}}), + /* FLAG_GROUP */ zc, + /* BITMASK_GROUP_ID */ 1, + /* BITMASK_BIT_POSITION*/ 3, + /* EXTRA_EXTENSION_FLAGS */ 0) + +DEFINE_RISCV_EXT( + /* NAME */ zcd, + /* UPPERCASE_NAME */ ZCD, + /* FULL_NAME */ "Compressed double-precision floating point loads and stores extension", + /* DESC */ "", + /* URL */ , + /* DEP_EXTS */ ({"zca"}), + /* SUPPORTED_VERSIONS */ ({{1, 0}}), + /* FLAG_GROUP */ zc, + /* BITMASK_GROUP_ID */ 1, + /* BITMASK_BIT_POSITION*/ 4, + /* EXTRA_EXTENSION_FLAGS */ 0) + +DEFINE_RISCV_EXT( + /* NAME */ zce, + /* UPPERCASE_NAME */ ZCE, + /* FULL_NAME */ "Compressed instruction extensions for embedded processors", + /* DESC */ "", + /* URL */ , + /* DEP_EXTS */ ({"zca", "zcb", "zcmp", "zcmt", + {"zcf", + [] (const riscv_subset_list *subset_list) -> bool + { + return subset_list->xlen () == 32 + && subset_list->lookup ("f"); + }}}), + /* SUPPORTED_VERSIONS */ ({{1, 0}}), + /* FLAG_GROUP */ zc, + /* BITMASK_GROUP_ID */ BITMASK_NOT_YET_ALLOCATED, + /* BITMASK_BIT_POSITION*/ BITMASK_NOT_YET_ALLOCATED, + /* EXTRA_EXTENSION_FLAGS */ 0) + +DEFINE_RISCV_EXT( + /* NAME */ zcf, + /* UPPERCASE_NAME */ ZCF, + /* FULL_NAME */ "Compressed single-precision floating point loads and stores extension", + /* DESC */ "", + /* URL */ , + /* DEP_EXTS */ ({"zca"}), + /* SUPPORTED_VERSIONS */ ({{1, 0}}), + /* FLAG_GROUP */ zc, + /* BITMASK_GROUP_ID */ 1, + /* BITMASK_BIT_POSITION*/ 5, + /* EXTRA_EXTENSION_FLAGS */ 0) + +DEFINE_RISCV_EXT( + /* NAME */ zcmop, + /* UPPERCASE_NAME */ ZCMOP, + /* FULL_NAME */ "zcmop extension", + /* DESC */ "", + /* URL */ , + /* DEP_EXTS */ ({"zca"}), + /* SUPPORTED_VERSIONS */ ({{1, 0}}), + /* FLAG_GROUP */ zc, + /* BITMASK_GROUP_ID */ 1, + /* BITMASK_BIT_POSITION*/ 6, + /* EXTRA_EXTENSION_FLAGS */ 0) + +DEFINE_RISCV_EXT( + /* NAME */ zcmp, + /* UPPERCASE_NAME */ ZCMP, + /* FULL_NAME */ "Compressed push pop extension", + /* DESC */ "", + /* URL */ , + /* DEP_EXTS */ ({"zca"}), + /* SUPPORTED_VERSIONS */ ({{1, 0}}), + /* FLAG_GROUP */ zc, + /* BITMASK_GROUP_ID */ BITMASK_NOT_YET_ALLOCATED, + /* BITMASK_BIT_POSITION*/ BITMASK_NOT_YET_ALLOCATED, + /* EXTRA_EXTENSION_FLAGS */ 0) + +DEFINE_RISCV_EXT( + /* NAME */ zcmt, + /* UPPERCASE_NAME */ ZCMT, + /* FULL_NAME */ "Table jump instruction extension", + /* DESC */ "", + /* URL */ , + /* DEP_EXTS */ ({"zca", "zicsr"}), + /* SUPPORTED_VERSIONS */ ({{1, 0}}), + /* FLAG_GROUP */ zc, + /* BITMASK_GROUP_ID */ BITMASK_NOT_YET_ALLOCATED, + /* BITMASK_BIT_POSITION*/ BITMASK_NOT_YET_ALLOCATED, + /* EXTRA_EXTENSION_FLAGS */ 0) + +DEFINE_RISCV_EXT( + /* NAME */ zclsd, + /* UPPERCASE_NAME */ ZCLSD, + /* FULL_NAME */ "Compressed load/store pair instructions extension", + /* DESC */ "", + /* URL */ , + /* DEP_EXTS */ ({"zca", "zilsd"}), + /* SUPPORTED_VERSIONS */ ({{1, 0}}), + /* FLAG_GROUP */ zc, + /* BITMASK_GROUP_ID */ BITMASK_NOT_YET_ALLOCATED, + /* BITMASK_BIT_POSITION*/ BITMASK_NOT_YET_ALLOCATED, + /* EXTRA_EXTENSION_FLAGS */ 0) + +DEFINE_RISCV_EXT( + /* NAME */ zba, + /* UPPERCASE_NAME */ ZBA, + /* FULL_NAME */ "Address calculation extension", + /* DESC */ "", + /* URL */ , + /* DEP_EXTS */ ({}), + /* SUPPORTED_VERSIONS */ ({{1, 0}}), + /* FLAG_GROUP */ zb, + /* BITMASK_GROUP_ID */ 0, + /* BITMASK_BIT_POSITION*/ 27, + /* EXTRA_EXTENSION_FLAGS */ 0) + +DEFINE_RISCV_EXT( + /* NAME */ zbb, + /* UPPERCASE_NAME */ ZBB, + /* FULL_NAME */ "Basic bit manipulation extension", + /* DESC */ "", + /* URL */ , + /* DEP_EXTS */ ({}), + /* SUPPORTED_VERSIONS */ ({{1, 0}}), + /* FLAG_GROUP */ zb, + /* BITMASK_GROUP_ID */ 0, + /* BITMASK_BIT_POSITION*/ 28, + /* EXTRA_EXTENSION_FLAGS */ 0) + +DEFINE_RISCV_EXT( + /* NAME */ zbc, + /* UPPERCASE_NAME */ ZBC, + /* FULL_NAME */ "Carry-less multiplication extension", + /* DESC */ "", + /* URL */ , + /* DEP_EXTS */ ({}), + /* SUPPORTED_VERSIONS */ ({{1, 0}}), + /* FLAG_GROUP */ zb, + /* BITMASK_GROUP_ID */ 0, + /* BITMASK_BIT_POSITION*/ 29, + /* EXTRA_EXTENSION_FLAGS */ 0) + +DEFINE_RISCV_EXT( + /* NAME */ zbkb, + /* UPPERCASE_NAME */ ZBKB, + /* FULL_NAME */ "Cryptography bit-manipulation extension", + /* DESC */ "", + /* URL */ , + /* DEP_EXTS */ ({}), + /* SUPPORTED_VERSIONS */ ({{1, 0}}), + /* FLAG_GROUP */ zb, + /* BITMASK_GROUP_ID */ 0, + /* BITMASK_BIT_POSITION*/ 30, + /* EXTRA_EXTENSION_FLAGS */ 0) + +DEFINE_RISCV_EXT( + /* NAME */ zbkc, + /* UPPERCASE_NAME */ ZBKC, + /* FULL_NAME */ "Cryptography carry-less multiply extension", + /* DESC */ "", + /* URL */ , + /* DEP_EXTS */ ({}), + /* SUPPORTED_VERSIONS */ ({{1, 0}}), + /* FLAG_GROUP */ zb, + /* BITMASK_GROUP_ID */ 0, + /* BITMASK_BIT_POSITION*/ 31, + /* EXTRA_EXTENSION_FLAGS */ 0) + +DEFINE_RISCV_EXT( + /* NAME */ zbkx, + /* UPPERCASE_NAME */ ZBKX, + /* FULL_NAME */ "Cryptography crossbar permutation extension", + /* DESC */ "", + /* URL */ , + /* DEP_EXTS */ ({}), + /* SUPPORTED_VERSIONS */ ({{1, 0}}), + /* FLAG_GROUP */ zb, + /* BITMASK_GROUP_ID */ 0, + /* BITMASK_BIT_POSITION*/ 32, + /* EXTRA_EXTENSION_FLAGS */ 0) + +DEFINE_RISCV_EXT( + /* NAME */ zbs, + /* UPPERCASE_NAME */ ZBS, + /* FULL_NAME */ "Single-bit operation extension", + /* DESC */ "", + /* URL */ , + /* DEP_EXTS */ ({}), + /* SUPPORTED_VERSIONS */ ({{1, 0}}), + /* FLAG_GROUP */ zb, + /* BITMASK_GROUP_ID */ 0, + /* BITMASK_BIT_POSITION*/ 33, + /* EXTRA_EXTENSION_FLAGS */ 0) + +DEFINE_RISCV_EXT( + /* NAME */ zk, + /* UPPERCASE_NAME */ ZK, + /* FULL_NAME */ "Standard scalar cryptography extension", + /* DESC */ "", + /* URL */ , + /* DEP_EXTS */ ({"zkn", "zkr", "zkt"}), + /* SUPPORTED_VERSIONS */ ({{1, 0}}), + /* FLAG_GROUP */ zk, + /* BITMASK_GROUP_ID */ BITMASK_NOT_YET_ALLOCATED, + /* BITMASK_BIT_POSITION*/ BITMASK_NOT_YET_ALLOCATED, + /* EXTRA_EXTENSION_FLAGS */ EXT_FLAG_MACRO) + +DEFINE_RISCV_EXT( + /* NAME */ zkn, + /* UPPERCASE_NAME */ ZKN, + /* FULL_NAME */ "NIST algorithm suite extension", + /* DESC */ "", + /* URL */ , + /* DEP_EXTS */ ({"zbkb", "zbkc", "zbkx", "zkne", "zknd", "zknh"}), + /* SUPPORTED_VERSIONS */ ({{1, 0}}), + /* FLAG_GROUP */ zk, + /* BITMASK_GROUP_ID */ BITMASK_NOT_YET_ALLOCATED, + /* BITMASK_BIT_POSITION*/ BITMASK_NOT_YET_ALLOCATED, + /* EXTRA_EXTENSION_FLAGS */ EXT_FLAG_MACRO) + +DEFINE_RISCV_EXT( + /* NAME */ zknd, + /* UPPERCASE_NAME */ ZKND, + /* FULL_NAME */ "AES Decryption extension", + /* DESC */ "", + /* URL */ , + /* DEP_EXTS */ ({}), + /* SUPPORTED_VERSIONS */ ({{1, 0}}), + /* FLAG_GROUP */ zk, + /* BITMASK_GROUP_ID */ 0, + /* BITMASK_BIT_POSITION*/ 41, + /* EXTRA_EXTENSION_FLAGS */ 0) + +DEFINE_RISCV_EXT( + /* NAME */ zkne, + /* UPPERCASE_NAME */ ZKNE, + /* FULL_NAME */ "AES Encryption extension", + /* DESC */ "", + /* URL */ , + /* DEP_EXTS */ ({}), + /* SUPPORTED_VERSIONS */ ({{1, 0}}), + /* FLAG_GROUP */ zk, + /* BITMASK_GROUP_ID */ 0, + /* BITMASK_BIT_POSITION*/ 42, + /* EXTRA_EXTENSION_FLAGS */ 0) + +DEFINE_RISCV_EXT( + /* NAME */ zknh, + /* UPPERCASE_NAME */ ZKNH, + /* FULL_NAME */ "Hash function extension", + /* DESC */ "", + /* URL */ , + /* DEP_EXTS */ ({}), + /* SUPPORTED_VERSIONS */ ({{1, 0}}), + /* FLAG_GROUP */ zk, + /* BITMASK_GROUP_ID */ 0, + /* BITMASK_BIT_POSITION*/ 43, + /* EXTRA_EXTENSION_FLAGS */ 0) + +DEFINE_RISCV_EXT( + /* NAME */ zkr, + /* UPPERCASE_NAME */ ZKR, + /* FULL_NAME */ "Entropy source extension", + /* DESC */ "", + /* URL */ , + /* DEP_EXTS */ ({}), + /* SUPPORTED_VERSIONS */ ({{1, 0}}), + /* FLAG_GROUP */ zk, + /* BITMASK_GROUP_ID */ BITMASK_NOT_YET_ALLOCATED, + /* BITMASK_BIT_POSITION*/ BITMASK_NOT_YET_ALLOCATED, + /* EXTRA_EXTENSION_FLAGS */ 0) + +DEFINE_RISCV_EXT( + /* NAME */ zks, + /* UPPERCASE_NAME */ ZKS, + /* FULL_NAME */ "ShangMi algorithm suite extension", + /* DESC */ "", + /* URL */ , + /* DEP_EXTS */ ({"zbkb", "zbkc", "zbkx", "zksed", "zksh"}), + /* SUPPORTED_VERSIONS */ ({{1, 0}}), + /* FLAG_GROUP */ zk, + /* BITMASK_GROUP_ID */ BITMASK_NOT_YET_ALLOCATED, + /* BITMASK_BIT_POSITION*/ BITMASK_NOT_YET_ALLOCATED, + /* EXTRA_EXTENSION_FLAGS */ EXT_FLAG_MACRO) + +DEFINE_RISCV_EXT( + /* NAME */ zksed, + /* UPPERCASE_NAME */ ZKSED, + /* FULL_NAME */ "SM4 block cipher extension", + /* DESC */ "", + /* URL */ , + /* DEP_EXTS */ ({}), + /* SUPPORTED_VERSIONS */ ({{1, 0}}), + /* FLAG_GROUP */ zk, + /* BITMASK_GROUP_ID */ 0, + /* BITMASK_BIT_POSITION*/ 44, + /* EXTRA_EXTENSION_FLAGS */ 0) + +DEFINE_RISCV_EXT( + /* NAME */ zksh, + /* UPPERCASE_NAME */ ZKSH, + /* FULL_NAME */ "SM3 hash function extension", + /* DESC */ "", + /* URL */ , + /* DEP_EXTS */ ({}), + /* SUPPORTED_VERSIONS */ ({{1, 0}}), + /* FLAG_GROUP */ zk, + /* BITMASK_GROUP_ID */ 0, + /* BITMASK_BIT_POSITION*/ 45, + /* EXTRA_EXTENSION_FLAGS */ 0) + +DEFINE_RISCV_EXT( + /* NAME */ zkt, + /* UPPERCASE_NAME */ ZKT, + /* FULL_NAME */ "Data independent execution latency extension", + /* DESC */ "", + /* URL */ , + /* DEP_EXTS */ ({}), + /* SUPPORTED_VERSIONS */ ({{1, 0}}), + /* FLAG_GROUP */ zk, + /* BITMASK_GROUP_ID */ 0, + /* BITMASK_BIT_POSITION*/ 46, + /* EXTRA_EXTENSION_FLAGS */ 0) + +DEFINE_RISCV_EXT( + /* NAME */ ztso, + /* UPPERCASE_NAME */ ZTSO, + /* FULL_NAME */ "Total store ordering extension", + /* DESC */ "", + /* URL */ , + /* DEP_EXTS */ ({}), + /* SUPPORTED_VERSIONS */ ({{1, 0}}), + /* FLAG_GROUP */ zt, + /* BITMASK_GROUP_ID */ 0, + /* BITMASK_BIT_POSITION*/ 47, + /* EXTRA_EXTENSION_FLAGS */ 0) + +DEFINE_RISCV_EXT( + /* NAME */ zvbb, + /* UPPERCASE_NAME */ ZVBB, + /* FULL_NAME */ "Vector basic bit-manipulation extension", + /* DESC */ "", + /* URL */ , + /* DEP_EXTS */ ({"zvkb"}), + /* SUPPORTED_VERSIONS */ ({{1, 0}}), + /* FLAG_GROUP */ zvb, + /* BITMASK_GROUP_ID */ 0, + /* BITMASK_BIT_POSITION*/ 48, + /* EXTRA_EXTENSION_FLAGS */ 0) + +DEFINE_RISCV_EXT( + /* NAME */ zvbc, + /* UPPERCASE_NAME */ ZVBC, + /* FULL_NAME */ "Vector carryless multiplication extension", + /* DESC */ "", + /* URL */ , + /* DEP_EXTS */ ({"zve64x"}), + /* SUPPORTED_VERSIONS */ ({{1, 0}}), + /* FLAG_GROUP */ zvb, + /* BITMASK_GROUP_ID */ 0, + /* BITMASK_BIT_POSITION*/ 49, + /* EXTRA_EXTENSION_FLAGS */ 0) + +DEFINE_RISCV_EXT( + /* NAME */ zve32f, + /* UPPERCASE_NAME */ ZVE32F, + /* FULL_NAME */ "Vector extensions for embedded processors", + /* DESC */ "", + /* URL */ , + /* DEP_EXTS */ ({"f", "zve32x", "zvl32b"}), + /* SUPPORTED_VERSIONS */ ({{1, 0}}), + /* FLAG_GROUP */ zve, + /* BITMASK_GROUP_ID */ 0, + /* BITMASK_BIT_POSITION*/ 61, + /* EXTRA_EXTENSION_FLAGS */ 0) + +DEFINE_RISCV_EXT( + /* NAME */ zve32x, + /* UPPERCASE_NAME */ ZVE32X, + /* FULL_NAME */ "Vector extensions for embedded processors", + /* DESC */ "", + /* URL */ , + /* DEP_EXTS */ ({"zicsr", "zvl32b"}), + /* SUPPORTED_VERSIONS */ ({{1, 0}}), + /* FLAG_GROUP */ zve, + /* BITMASK_GROUP_ID */ 0, + /* BITMASK_BIT_POSITION*/ 60, + /* EXTRA_EXTENSION_FLAGS */ 0) + +DEFINE_RISCV_EXT( + /* NAME */ zve64d, + /* UPPERCASE_NAME */ ZVE64D, + /* FULL_NAME */ "Vector extensions for embedded processors", + /* DESC */ "", + /* URL */ , + /* DEP_EXTS */ ({"d", "zve64f", "zvl64b"}), + /* SUPPORTED_VERSIONS */ ({{1, 0}}), + /* FLAG_GROUP */ zve, + /* BITMASK_GROUP_ID */ 1, + /* BITMASK_BIT_POSITION*/ 0, + /* EXTRA_EXTENSION_FLAGS */ 0) + +DEFINE_RISCV_EXT( + /* NAME */ zve64f, + /* UPPERCASE_NAME */ ZVE64F, + /* FULL_NAME */ "Vector extensions for embedded processors", + /* DESC */ "", + /* URL */ , + /* DEP_EXTS */ ({"f", "zve32f", "zve64x", "zvl64b"}), + /* SUPPORTED_VERSIONS */ ({{1, 0}}), + /* FLAG_GROUP */ zve, + /* BITMASK_GROUP_ID */ 0, + /* BITMASK_BIT_POSITION*/ 63, + /* EXTRA_EXTENSION_FLAGS */ 0) + +DEFINE_RISCV_EXT( + /* NAME */ zve64x, + /* UPPERCASE_NAME */ ZVE64X, + /* FULL_NAME */ "Vector extensions for embedded processors", + /* DESC */ "", + /* URL */ , + /* DEP_EXTS */ ({"zve32x", "zvl64b"}), + /* SUPPORTED_VERSIONS */ ({{1, 0}}), + /* FLAG_GROUP */ zve, + /* BITMASK_GROUP_ID */ 0, + /* BITMASK_BIT_POSITION*/ 62, + /* EXTRA_EXTENSION_FLAGS */ 0) + +DEFINE_RISCV_EXT( + /* NAME */ zvfbfmin, + /* UPPERCASE_NAME */ ZVFBFMIN, + /* FULL_NAME */ "Vector BF16 converts extension", + /* DESC */ "", + /* URL */ , + /* DEP_EXTS */ ({"zve32f"}), + /* SUPPORTED_VERSIONS */ ({{1, 0}}), + /* FLAG_GROUP */ zvf, + /* BITMASK_GROUP_ID */ BITMASK_NOT_YET_ALLOCATED, + /* BITMASK_BIT_POSITION*/ BITMASK_NOT_YET_ALLOCATED, + /* EXTRA_EXTENSION_FLAGS */ 0) + +DEFINE_RISCV_EXT( + /* NAME */ zvfbfwma, + /* UPPERCASE_NAME */ ZVFBFWMA, + /* FULL_NAME */ "zvfbfwma extension", + /* DESC */ "", + /* URL */ , + /* DEP_EXTS */ ({"zvfbfmin", "zfbfmin"}), + /* SUPPORTED_VERSIONS */ ({{1, 0}}), + /* FLAG_GROUP */ zvf, + /* BITMASK_GROUP_ID */ BITMASK_NOT_YET_ALLOCATED, + /* BITMASK_BIT_POSITION*/ BITMASK_NOT_YET_ALLOCATED, + /* EXTRA_EXTENSION_FLAGS */ 0) + +DEFINE_RISCV_EXT( + /* NAME */ zvfh, + /* UPPERCASE_NAME */ ZVFH, + /* FULL_NAME */ "Vector half-precision floating-point extension", + /* DESC */ "", + /* URL */ , + /* DEP_EXTS */ ({"zve32f", "zfhmin"}), + /* SUPPORTED_VERSIONS */ ({{1, 0}}), + /* FLAG_GROUP */ zvf, + /* BITMASK_GROUP_ID */ 0, + /* BITMASK_BIT_POSITION*/ 50, + /* EXTRA_EXTENSION_FLAGS */ 0) + +DEFINE_RISCV_EXT( + /* NAME */ zvfhmin, + /* UPPERCASE_NAME */ ZVFHMIN, + /* FULL_NAME */ "Vector minimal half-precision floating-point extension", + /* DESC */ "", + /* URL */ , + /* DEP_EXTS */ ({"zve32f"}), + /* SUPPORTED_VERSIONS */ ({{1, 0}}), + /* FLAG_GROUP */ zvf, + /* BITMASK_GROUP_ID */ 0, + /* BITMASK_BIT_POSITION*/ 51, + /* EXTRA_EXTENSION_FLAGS */ 0) + +DEFINE_RISCV_EXT( + /* NAME */ zvkb, + /* UPPERCASE_NAME */ ZVKB, + /* FULL_NAME */ "Vector cryptography bit-manipulation extension", + /* DESC */ "", + /* URL */ , + /* DEP_EXTS */ ({"zve32x"}), + /* SUPPORTED_VERSIONS */ ({{1, 0}}), + /* FLAG_GROUP */ zvk, + /* BITMASK_GROUP_ID */ 0, + /* BITMASK_BIT_POSITION*/ 52, + /* EXTRA_EXTENSION_FLAGS */ 0) + +DEFINE_RISCV_EXT( + /* NAME */ zvkg, + /* UPPERCASE_NAME */ ZVKG, + /* FULL_NAME */ "Vector GCM/GMAC extension", + /* DESC */ "", + /* URL */ , + /* DEP_EXTS */ ({"zve32x"}), + /* SUPPORTED_VERSIONS */ ({{1, 0}}), + /* FLAG_GROUP */ zvk, + /* BITMASK_GROUP_ID */ 0, + /* BITMASK_BIT_POSITION*/ 53, + /* EXTRA_EXTENSION_FLAGS */ 0) + +DEFINE_RISCV_EXT( + /* NAME */ zvkn, + /* UPPERCASE_NAME */ ZVKN, + /* FULL_NAME */ "Vector NIST Algorithm Suite extension", + /* DESC */ "@samp{zvkn} will expand to", + /* URL */ , + /* DEP_EXTS */ ({"zvkned", "zvknhb", "zvkb", "zvkt"}), + /* SUPPORTED_VERSIONS */ ({{1, 0}}), + /* FLAG_GROUP */ zvk, + /* BITMASK_GROUP_ID */ BITMASK_NOT_YET_ALLOCATED, + /* BITMASK_BIT_POSITION*/ BITMASK_NOT_YET_ALLOCATED, + /* EXTRA_EXTENSION_FLAGS */ EXT_FLAG_MACRO) + +DEFINE_RISCV_EXT( + /* NAME */ zvknc, + /* UPPERCASE_NAME */ ZVKNC, + /* FULL_NAME */ "Vector NIST Algorithm Suite with carryless multiply extension, @samp{zvknc}", + /* DESC */ "", + /* URL */ , + /* DEP_EXTS */ ({"zvkn", "zvbc"}), + /* SUPPORTED_VERSIONS */ ({{1, 0}}), + /* FLAG_GROUP */ zvk, + /* BITMASK_GROUP_ID */ BITMASK_NOT_YET_ALLOCATED, + /* BITMASK_BIT_POSITION*/ BITMASK_NOT_YET_ALLOCATED, + /* EXTRA_EXTENSION_FLAGS */ EXT_FLAG_MACRO) + +DEFINE_RISCV_EXT( + /* NAME */ zvkned, + /* UPPERCASE_NAME */ ZVKNED, + /* FULL_NAME */ "Vector AES block cipher extension", + /* DESC */ "", + /* URL */ , + /* DEP_EXTS */ ({"zve32x"}), + /* SUPPORTED_VERSIONS */ ({{1, 0}}), + /* FLAG_GROUP */ zvk, + /* BITMASK_GROUP_ID */ 0, + /* BITMASK_BIT_POSITION*/ 54, + /* EXTRA_EXTENSION_FLAGS */ 0) + +DEFINE_RISCV_EXT( + /* NAME */ zvkng, + /* UPPERCASE_NAME */ ZVKNG, + /* FULL_NAME */ "Vector NIST Algorithm Suite with GCM extension, @samp{zvkng} will expand", + /* DESC */ "", + /* URL */ , + /* DEP_EXTS */ ({"zvkn", "zvkg"}), + /* SUPPORTED_VERSIONS */ ({{1, 0}}), + /* FLAG_GROUP */ zvk, + /* BITMASK_GROUP_ID */ BITMASK_NOT_YET_ALLOCATED, + /* BITMASK_BIT_POSITION*/ BITMASK_NOT_YET_ALLOCATED, + /* EXTRA_EXTENSION_FLAGS */ EXT_FLAG_MACRO) + +DEFINE_RISCV_EXT( + /* NAME */ zvknha, + /* UPPERCASE_NAME */ ZVKNHA, + /* FULL_NAME */ "Vector SHA-2 secure hash extension", + /* DESC */ "", + /* URL */ , + /* DEP_EXTS */ ({"zve32x"}), + /* SUPPORTED_VERSIONS */ ({{1, 0}}), + /* FLAG_GROUP */ zvk, + /* BITMASK_GROUP_ID */ 0, + /* BITMASK_BIT_POSITION*/ 55, + /* EXTRA_EXTENSION_FLAGS */ 0) + +DEFINE_RISCV_EXT( + /* NAME */ zvknhb, + /* UPPERCASE_NAME */ ZVKNHB, + /* FULL_NAME */ "Vector SHA-2 secure hash extension", + /* DESC */ "", + /* URL */ , + /* DEP_EXTS */ ({"zve64x"}), + /* SUPPORTED_VERSIONS */ ({{1, 0}}), + /* FLAG_GROUP */ zvk, + /* BITMASK_GROUP_ID */ 0, + /* BITMASK_BIT_POSITION*/ 56, + /* EXTRA_EXTENSION_FLAGS */ 0) + +DEFINE_RISCV_EXT( + /* NAME */ zvks, + /* UPPERCASE_NAME */ ZVKS, + /* FULL_NAME */ "Vector ShangMi algorithm suite extension, @samp{zvks} will expand", + /* DESC */ "", + /* URL */ , + /* DEP_EXTS */ ({"zvksed", "zvksh", "zvkb", "zvkt"}), + /* SUPPORTED_VERSIONS */ ({{1, 0}}), + /* FLAG_GROUP */ zvk, + /* BITMASK_GROUP_ID */ BITMASK_NOT_YET_ALLOCATED, + /* BITMASK_BIT_POSITION*/ BITMASK_NOT_YET_ALLOCATED, + /* EXTRA_EXTENSION_FLAGS */ EXT_FLAG_MACRO) + +DEFINE_RISCV_EXT( + /* NAME */ zvksc, + /* UPPERCASE_NAME */ ZVKSC, + /* FULL_NAME */ "Vector ShangMi algorithm suite with carryless multiplication extension,", + /* DESC */ "", + /* URL */ , + /* DEP_EXTS */ ({"zvks", "zvbc"}), + /* SUPPORTED_VERSIONS */ ({{1, 0}}), + /* FLAG_GROUP */ zvk, + /* BITMASK_GROUP_ID */ BITMASK_NOT_YET_ALLOCATED, + /* BITMASK_BIT_POSITION*/ BITMASK_NOT_YET_ALLOCATED, + /* EXTRA_EXTENSION_FLAGS */ EXT_FLAG_MACRO) + +DEFINE_RISCV_EXT( + /* NAME */ zvksed, + /* UPPERCASE_NAME */ ZVKSED, + /* FULL_NAME */ "Vector SM4 Block Cipher extension", + /* DESC */ "", + /* URL */ , + /* DEP_EXTS */ ({"zve32x"}), + /* SUPPORTED_VERSIONS */ ({{1, 0}}), + /* FLAG_GROUP */ zvk, + /* BITMASK_GROUP_ID */ 0, + /* BITMASK_BIT_POSITION*/ 57, + /* EXTRA_EXTENSION_FLAGS */ 0) + +DEFINE_RISCV_EXT( + /* NAME */ zvksg, + /* UPPERCASE_NAME */ ZVKSG, + /* FULL_NAME */ "Vector ShangMi algorithm suite with GCM extension, @samp{zvksg} will expand", + /* DESC */ "", + /* URL */ , + /* DEP_EXTS */ ({"zvks", "zvkg"}), + /* SUPPORTED_VERSIONS */ ({{1, 0}}), + /* FLAG_GROUP */ zvk, + /* BITMASK_GROUP_ID */ BITMASK_NOT_YET_ALLOCATED, + /* BITMASK_BIT_POSITION*/ BITMASK_NOT_YET_ALLOCATED, + /* EXTRA_EXTENSION_FLAGS */ EXT_FLAG_MACRO) + +DEFINE_RISCV_EXT( + /* NAME */ zvksh, + /* UPPERCASE_NAME */ ZVKSH, + /* FULL_NAME */ "Vector SM3 Secure Hash extension", + /* DESC */ "", + /* URL */ , + /* DEP_EXTS */ ({"zve32x"}), + /* SUPPORTED_VERSIONS */ ({{1, 0}}), + /* FLAG_GROUP */ zvk, + /* BITMASK_GROUP_ID */ 0, + /* BITMASK_BIT_POSITION*/ 58, + /* EXTRA_EXTENSION_FLAGS */ 0) + +DEFINE_RISCV_EXT( + /* NAME */ zvkt, + /* UPPERCASE_NAME */ ZVKT, + /* FULL_NAME */ "Vector data independent execution latency extension", + /* DESC */ "", + /* URL */ , + /* DEP_EXTS */ ({}), + /* SUPPORTED_VERSIONS */ ({{1, 0}}), + /* FLAG_GROUP */ zvk, + /* BITMASK_GROUP_ID */ 0, + /* BITMASK_BIT_POSITION*/ 59, + /* EXTRA_EXTENSION_FLAGS */ 0) + +DEFINE_RISCV_EXT( + /* NAME */ zvl1024b, + /* UPPERCASE_NAME */ ZVL1024B, + /* FULL_NAME */ "Minimum vector length standard extensions", + /* DESC */ "", + /* URL */ , + /* DEP_EXTS */ ({"zvl512b"}), + /* SUPPORTED_VERSIONS */ ({{1, 0}}), + /* FLAG_GROUP */ zvl, + /* BITMASK_GROUP_ID */ BITMASK_NOT_YET_ALLOCATED, + /* BITMASK_BIT_POSITION*/ BITMASK_NOT_YET_ALLOCATED, + /* EXTRA_EXTENSION_FLAGS */ 0) + +DEFINE_RISCV_EXT( + /* NAME */ zvl128b, + /* UPPERCASE_NAME */ ZVL128B, + /* FULL_NAME */ "Minimum vector length standard extensions", + /* DESC */ "", + /* URL */ , + /* DEP_EXTS */ ({"zvl64b"}), + /* SUPPORTED_VERSIONS */ ({{1, 0}}), + /* FLAG_GROUP */ zvl, + /* BITMASK_GROUP_ID */ BITMASK_NOT_YET_ALLOCATED, + /* BITMASK_BIT_POSITION*/ BITMASK_NOT_YET_ALLOCATED, + /* EXTRA_EXTENSION_FLAGS */ 0) + +DEFINE_RISCV_EXT( + /* NAME */ zvl16384b, + /* UPPERCASE_NAME */ ZVL16384B, + /* FULL_NAME */ "zvl16384b extension", + /* DESC */ "", + /* URL */ , + /* DEP_EXTS */ ({"zvl8192b"}), + /* SUPPORTED_VERSIONS */ ({{1, 0}}), + /* FLAG_GROUP */ zvl, + /* BITMASK_GROUP_ID */ BITMASK_NOT_YET_ALLOCATED, + /* BITMASK_BIT_POSITION*/ BITMASK_NOT_YET_ALLOCATED, + /* EXTRA_EXTENSION_FLAGS */ 0) + +DEFINE_RISCV_EXT( + /* NAME */ zvl2048b, + /* UPPERCASE_NAME */ ZVL2048B, + /* FULL_NAME */ "Minimum vector length standard extensions", + /* DESC */ "", + /* URL */ , + /* DEP_EXTS */ ({"zvl1024b"}), + /* SUPPORTED_VERSIONS */ ({{1, 0}}), + /* FLAG_GROUP */ zvl, + /* BITMASK_GROUP_ID */ BITMASK_NOT_YET_ALLOCATED, + /* BITMASK_BIT_POSITION*/ BITMASK_NOT_YET_ALLOCATED, + /* EXTRA_EXTENSION_FLAGS */ 0) + +DEFINE_RISCV_EXT( + /* NAME */ zvl256b, + /* UPPERCASE_NAME */ ZVL256B, + /* FULL_NAME */ "Minimum vector length standard extensions", + /* DESC */ "", + /* URL */ , + /* DEP_EXTS */ ({"zvl128b"}), + /* SUPPORTED_VERSIONS */ ({{1, 0}}), + /* FLAG_GROUP */ zvl, + /* BITMASK_GROUP_ID */ BITMASK_NOT_YET_ALLOCATED, + /* BITMASK_BIT_POSITION*/ BITMASK_NOT_YET_ALLOCATED, + /* EXTRA_EXTENSION_FLAGS */ 0) + +DEFINE_RISCV_EXT( + /* NAME */ zvl32768b, + /* UPPERCASE_NAME */ ZVL32768B, + /* FULL_NAME */ "zvl32768b extension", + /* DESC */ "", + /* URL */ , + /* DEP_EXTS */ ({"zvl16384b"}), + /* SUPPORTED_VERSIONS */ ({{1, 0}}), + /* FLAG_GROUP */ zvl, + /* BITMASK_GROUP_ID */ BITMASK_NOT_YET_ALLOCATED, + /* BITMASK_BIT_POSITION*/ BITMASK_NOT_YET_ALLOCATED, + /* EXTRA_EXTENSION_FLAGS */ 0) + +DEFINE_RISCV_EXT( + /* NAME */ zvl32b, + /* UPPERCASE_NAME */ ZVL32B, + /* FULL_NAME */ "Minimum vector length standard extensions", + /* DESC */ "", + /* URL */ , + /* DEP_EXTS */ ({}), + /* SUPPORTED_VERSIONS */ ({{1, 0}}), + /* FLAG_GROUP */ zvl, + /* BITMASK_GROUP_ID */ BITMASK_NOT_YET_ALLOCATED, + /* BITMASK_BIT_POSITION*/ BITMASK_NOT_YET_ALLOCATED, + /* EXTRA_EXTENSION_FLAGS */ 0) + +DEFINE_RISCV_EXT( + /* NAME */ zvl4096b, + /* UPPERCASE_NAME */ ZVL4096B, + /* FULL_NAME */ "Minimum vector length standard extensions", + /* DESC */ "", + /* URL */ , + /* DEP_EXTS */ ({"zvl2048b"}), + /* SUPPORTED_VERSIONS */ ({{1, 0}}), + /* FLAG_GROUP */ zvl, + /* BITMASK_GROUP_ID */ BITMASK_NOT_YET_ALLOCATED, + /* BITMASK_BIT_POSITION*/ BITMASK_NOT_YET_ALLOCATED, + /* EXTRA_EXTENSION_FLAGS */ 0) + +DEFINE_RISCV_EXT( + /* NAME */ zvl512b, + /* UPPERCASE_NAME */ ZVL512B, + /* FULL_NAME */ "Minimum vector length standard extensions", + /* DESC */ "", + /* URL */ , + /* DEP_EXTS */ ({"zvl256b"}), + /* SUPPORTED_VERSIONS */ ({{1, 0}}), + /* FLAG_GROUP */ zvl, + /* BITMASK_GROUP_ID */ BITMASK_NOT_YET_ALLOCATED, + /* BITMASK_BIT_POSITION*/ BITMASK_NOT_YET_ALLOCATED, + /* EXTRA_EXTENSION_FLAGS */ 0) + +DEFINE_RISCV_EXT( + /* NAME */ zvl64b, + /* UPPERCASE_NAME */ ZVL64B, + /* FULL_NAME */ "Minimum vector length standard extensions", + /* DESC */ "", + /* URL */ , + /* DEP_EXTS */ ({"zvl32b"}), + /* SUPPORTED_VERSIONS */ ({{1, 0}}), + /* FLAG_GROUP */ zvl, + /* BITMASK_GROUP_ID */ BITMASK_NOT_YET_ALLOCATED, + /* BITMASK_BIT_POSITION*/ BITMASK_NOT_YET_ALLOCATED, + /* EXTRA_EXTENSION_FLAGS */ 0) + +DEFINE_RISCV_EXT( + /* NAME */ zvl65536b, + /* UPPERCASE_NAME */ ZVL65536B, + /* FULL_NAME */ "zvl65536b extension", + /* DESC */ "", + /* URL */ , + /* DEP_EXTS */ ({"zvl32768b"}), + /* SUPPORTED_VERSIONS */ ({{1, 0}}), + /* FLAG_GROUP */ zvl, + /* BITMASK_GROUP_ID */ BITMASK_NOT_YET_ALLOCATED, + /* BITMASK_BIT_POSITION*/ BITMASK_NOT_YET_ALLOCATED, + /* EXTRA_EXTENSION_FLAGS */ 0) + +DEFINE_RISCV_EXT( + /* NAME */ zvl8192b, + /* UPPERCASE_NAME */ ZVL8192B, + /* FULL_NAME */ "zvl8192b extension", + /* DESC */ "", + /* URL */ , + /* DEP_EXTS */ ({"zvl4096b"}), + /* SUPPORTED_VERSIONS */ ({{1, 0}}), + /* FLAG_GROUP */ zvl, + /* BITMASK_GROUP_ID */ BITMASK_NOT_YET_ALLOCATED, + /* BITMASK_BIT_POSITION*/ BITMASK_NOT_YET_ALLOCATED, + /* EXTRA_EXTENSION_FLAGS */ 0) + +DEFINE_RISCV_EXT( + /* NAME */ zhinx, + /* UPPERCASE_NAME */ ZHINX, + /* FULL_NAME */ "Half-precision floating-point in integer registers extension", + /* DESC */ "", + /* URL */ , + /* DEP_EXTS */ ({"zhinxmin"}), + /* SUPPORTED_VERSIONS */ ({{1, 0}}), + /* FLAG_GROUP */ zinx, + /* BITMASK_GROUP_ID */ BITMASK_NOT_YET_ALLOCATED, + /* BITMASK_BIT_POSITION*/ BITMASK_NOT_YET_ALLOCATED, + /* EXTRA_EXTENSION_FLAGS */ 0) + +DEFINE_RISCV_EXT( + /* NAME */ zhinxmin, + /* UPPERCASE_NAME */ ZHINXMIN, + /* FULL_NAME */ "Minimal half-precision floating-point in integer registers extension", + /* DESC */ "", + /* URL */ , + /* DEP_EXTS */ ({"zfinx"}), + /* SUPPORTED_VERSIONS */ ({{1, 0}}), + /* FLAG_GROUP */ zinx, + /* BITMASK_GROUP_ID */ BITMASK_NOT_YET_ALLOCATED, + /* BITMASK_BIT_POSITION*/ BITMASK_NOT_YET_ALLOCATED, + /* EXTRA_EXTENSION_FLAGS */ 0) + +DEFINE_RISCV_EXT( + /* NAME */ sdtrig, + /* UPPERCASE_NAME */ SDTRIG, + /* FULL_NAME */ "sdtrig extension", + /* DESC */ "", + /* URL */ , + /* DEP_EXTS */ ({}), + /* SUPPORTED_VERSIONS */ ({{1, 0}}), + /* FLAG_GROUP */ sd, + /* BITMASK_GROUP_ID */ BITMASK_NOT_YET_ALLOCATED, + /* BITMASK_BIT_POSITION*/ BITMASK_NOT_YET_ALLOCATED, + /* EXTRA_EXTENSION_FLAGS */ 0) + +DEFINE_RISCV_EXT( + /* NAME */ sha, + /* UPPERCASE_NAME */ SHA, + /* FULL_NAME */ "The augmented hypervisor extension", + /* DESC */ "", + /* URL */ , + /* DEP_EXTS */ ({"h", "shcounterenw", "shgatpa", "shtvala", "shvstvala", "shvstvecd", "shvsatpa", "ssstateen"}), + /* SUPPORTED_VERSIONS */ ({{1, 0}}), + /* FLAG_GROUP */ sh, + /* BITMASK_GROUP_ID */ BITMASK_NOT_YET_ALLOCATED, + /* BITMASK_BIT_POSITION*/ BITMASK_NOT_YET_ALLOCATED, + /* EXTRA_EXTENSION_FLAGS */ 0) + +DEFINE_RISCV_EXT( + /* NAME */ shcounterenw, + /* UPPERCASE_NAME */ SHCOUNTERENW, + /* FULL_NAME */ "Support writeable enables for any supported counter", + /* DESC */ "", + /* URL */ , + /* DEP_EXTS */ ({"h", "zihpm"}), + /* SUPPORTED_VERSIONS */ ({{1, 0}}), + /* FLAG_GROUP */ sh, + /* BITMASK_GROUP_ID */ BITMASK_NOT_YET_ALLOCATED, + /* BITMASK_BIT_POSITION*/ BITMASK_NOT_YET_ALLOCATED, + /* EXTRA_EXTENSION_FLAGS */ 0) + +DEFINE_RISCV_EXT( + /* NAME */ shgatpa, + /* UPPERCASE_NAME */ SHGATPA, + /* FULL_NAME */ "SvNNx4 mode supported for all modes supported by satp", + /* DESC */ "", + /* URL */ , + /* DEP_EXTS */ ({"h", "ssstateen"}), + /* SUPPORTED_VERSIONS */ ({{1, 0}}), + /* FLAG_GROUP */ sh, + /* BITMASK_GROUP_ID */ BITMASK_NOT_YET_ALLOCATED, + /* BITMASK_BIT_POSITION*/ BITMASK_NOT_YET_ALLOCATED, + /* EXTRA_EXTENSION_FLAGS */ 0) + +DEFINE_RISCV_EXT( + /* NAME */ shlcofideleg, + /* UPPERCASE_NAME */ SHLCOFIDELEG, + /* FULL_NAME */ "Delegating LCOFI interrupts to VS-mode", + /* DESC */ "", + /* URL */ , + /* DEP_EXTS */ ({"h"}), + /* SUPPORTED_VERSIONS */ ({{1, 0}}), + /* FLAG_GROUP */ sh, + /* BITMASK_GROUP_ID */ BITMASK_NOT_YET_ALLOCATED, + /* BITMASK_BIT_POSITION*/ BITMASK_NOT_YET_ALLOCATED, + /* EXTRA_EXTENSION_FLAGS */ 0) + +DEFINE_RISCV_EXT( + /* NAME */ shtvala, + /* UPPERCASE_NAME */ SHTVALA, + /* FULL_NAME */ "The htval register provides all needed values", + /* DESC */ "", + /* URL */ , + /* DEP_EXTS */ ({"h"}), + /* SUPPORTED_VERSIONS */ ({{1, 0}}), + /* FLAG_GROUP */ sh, + /* BITMASK_GROUP_ID */ BITMASK_NOT_YET_ALLOCATED, + /* BITMASK_BIT_POSITION*/ BITMASK_NOT_YET_ALLOCATED, + /* EXTRA_EXTENSION_FLAGS */ 0) + +DEFINE_RISCV_EXT( + /* NAME */ shvstvala, + /* UPPERCASE_NAME */ SHVSTVALA, + /* FULL_NAME */ "The vstval register provides all needed values", + /* DESC */ "", + /* URL */ , + /* DEP_EXTS */ ({"h"}), + /* SUPPORTED_VERSIONS */ ({{1, 0}}), + /* FLAG_GROUP */ sh, + /* BITMASK_GROUP_ID */ BITMASK_NOT_YET_ALLOCATED, + /* BITMASK_BIT_POSITION*/ BITMASK_NOT_YET_ALLOCATED, + /* EXTRA_EXTENSION_FLAGS */ 0) + +DEFINE_RISCV_EXT( + /* NAME */ shvstvecd, + /* UPPERCASE_NAME */ SHVSTVECD, + /* FULL_NAME */ "The vstvec register supports Direct mode", + /* DESC */ "", + /* URL */ , + /* DEP_EXTS */ ({"h"}), + /* SUPPORTED_VERSIONS */ ({{1, 0}}), + /* FLAG_GROUP */ sh, + /* BITMASK_GROUP_ID */ BITMASK_NOT_YET_ALLOCATED, + /* BITMASK_BIT_POSITION*/ BITMASK_NOT_YET_ALLOCATED, + /* EXTRA_EXTENSION_FLAGS */ 0) + +DEFINE_RISCV_EXT( + /* NAME */ shvsatpa, + /* UPPERCASE_NAME */ SHVSATPA, + /* FULL_NAME */ "The vsatp register supports all modes supported by satp", + /* DESC */ "", + /* URL */ , + /* DEP_EXTS */ ({"h"}), + /* SUPPORTED_VERSIONS */ ({{1, 0}}), + /* FLAG_GROUP */ sh, + /* BITMASK_GROUP_ID */ BITMASK_NOT_YET_ALLOCATED, + /* BITMASK_BIT_POSITION*/ BITMASK_NOT_YET_ALLOCATED, + /* EXTRA_EXTENSION_FLAGS */ 0) + +DEFINE_RISCV_EXT( + /* NAME */ smaia, + /* UPPERCASE_NAME */ SMAIA, + /* FULL_NAME */ "Advanced interrupt architecture extension", + /* DESC */ "", + /* URL */ , + /* DEP_EXTS */ ({"ssaia"}), + /* SUPPORTED_VERSIONS */ ({{1, 0}}), + /* FLAG_GROUP */ sm, + /* BITMASK_GROUP_ID */ BITMASK_NOT_YET_ALLOCATED, + /* BITMASK_BIT_POSITION*/ BITMASK_NOT_YET_ALLOCATED, + /* EXTRA_EXTENSION_FLAGS */ 0) + +DEFINE_RISCV_EXT( + /* NAME */ smcntrpmf, + /* UPPERCASE_NAME */ SMCNTRPMF, + /* FULL_NAME */ "Cycle and instret privilege mode filtering", + /* DESC */ "", + /* URL */ , + /* DEP_EXTS */ ({"zicsr"}), + /* SUPPORTED_VERSIONS */ ({{1, 0}}), + /* FLAG_GROUP */ sm, + /* BITMASK_GROUP_ID */ BITMASK_NOT_YET_ALLOCATED, + /* BITMASK_BIT_POSITION*/ BITMASK_NOT_YET_ALLOCATED, + /* EXTRA_EXTENSION_FLAGS */ 0) + +DEFINE_RISCV_EXT( + /* NAME */ smcsrind, + /* UPPERCASE_NAME */ SMCSRIND, + /* FULL_NAME */ "Machine-Level Indirect CSR Access", + /* DESC */ "", + /* URL */ , + /* DEP_EXTS */ ({"zicsr", "sscsrind"}), + /* SUPPORTED_VERSIONS */ ({{1, 0}}), + /* FLAG_GROUP */ sm, + /* BITMASK_GROUP_ID */ BITMASK_NOT_YET_ALLOCATED, + /* BITMASK_BIT_POSITION*/ BITMASK_NOT_YET_ALLOCATED, + /* EXTRA_EXTENSION_FLAGS */ 0) + +DEFINE_RISCV_EXT( + /* NAME */ smepmp, + /* UPPERCASE_NAME */ SMEPMP, + /* FULL_NAME */ "PMP Enhancements for memory access and execution prevention on Machine mode", + /* DESC */ "", + /* URL */ , + /* DEP_EXTS */ ({"zicsr"}), + /* SUPPORTED_VERSIONS */ ({{1, 0}}), + /* FLAG_GROUP */ sm, + /* BITMASK_GROUP_ID */ BITMASK_NOT_YET_ALLOCATED, + /* BITMASK_BIT_POSITION*/ BITMASK_NOT_YET_ALLOCATED, + /* EXTRA_EXTENSION_FLAGS */ 0) + +DEFINE_RISCV_EXT( + /* NAME */ smmpm, + /* UPPERCASE_NAME */ SMMPM, + /* FULL_NAME */ "smmpm extension", + /* DESC */ "", + /* URL */ , + /* DEP_EXTS */ ({"zicsr"}), + /* SUPPORTED_VERSIONS */ ({{1, 0}}), + /* FLAG_GROUP */ sm, + /* BITMASK_GROUP_ID */ BITMASK_NOT_YET_ALLOCATED, + /* BITMASK_BIT_POSITION*/ BITMASK_NOT_YET_ALLOCATED, + /* EXTRA_EXTENSION_FLAGS */ 0) + +DEFINE_RISCV_EXT( + /* NAME */ smnpm, + /* UPPERCASE_NAME */ SMNPM, + /* FULL_NAME */ "smnpm extension", + /* DESC */ "", + /* URL */ , + /* DEP_EXTS */ ({"zicsr"}), + /* SUPPORTED_VERSIONS */ ({{1, 0}}), + /* FLAG_GROUP */ sm, + /* BITMASK_GROUP_ID */ BITMASK_NOT_YET_ALLOCATED, + /* BITMASK_BIT_POSITION*/ BITMASK_NOT_YET_ALLOCATED, + /* EXTRA_EXTENSION_FLAGS */ 0) + +DEFINE_RISCV_EXT( + /* NAME */ smrnmi, + /* UPPERCASE_NAME */ SMRNMI, + /* FULL_NAME */ "Resumable non-maskable interrupts", + /* DESC */ "", + /* URL */ , + /* DEP_EXTS */ ({"zicsr"}), + /* SUPPORTED_VERSIONS */ ({{1, 0}}), + /* FLAG_GROUP */ sm, + /* BITMASK_GROUP_ID */ BITMASK_NOT_YET_ALLOCATED, + /* BITMASK_BIT_POSITION*/ BITMASK_NOT_YET_ALLOCATED, + /* EXTRA_EXTENSION_FLAGS */ 0) + +DEFINE_RISCV_EXT( + /* NAME */ smstateen, + /* UPPERCASE_NAME */ SMSTATEEN, + /* FULL_NAME */ "State enable extension", + /* DESC */ "", + /* URL */ , + /* DEP_EXTS */ ({"ssstateen"}), + /* SUPPORTED_VERSIONS */ ({{1, 0}}), + /* FLAG_GROUP */ sm, + /* BITMASK_GROUP_ID */ BITMASK_NOT_YET_ALLOCATED, + /* BITMASK_BIT_POSITION*/ BITMASK_NOT_YET_ALLOCATED, + /* EXTRA_EXTENSION_FLAGS */ 0) + +DEFINE_RISCV_EXT( + /* NAME */ smdbltrp, + /* UPPERCASE_NAME */ SMDBLTRP, + /* FULL_NAME */ "Double Trap Extensions", + /* DESC */ "", + /* URL */ , + /* DEP_EXTS */ ({"zicsr"}), + /* SUPPORTED_VERSIONS */ ({{1, 0}}), + /* FLAG_GROUP */ sm, + /* BITMASK_GROUP_ID */ BITMASK_NOT_YET_ALLOCATED, + /* BITMASK_BIT_POSITION*/ BITMASK_NOT_YET_ALLOCATED, + /* EXTRA_EXTENSION_FLAGS */ 0) + +DEFINE_RISCV_EXT( + /* NAME */ ssaia, + /* UPPERCASE_NAME */ SSAIA, + /* FULL_NAME */ "Advanced interrupt architecture extension for supervisor-mode", + /* DESC */ "", + /* URL */ , + /* DEP_EXTS */ ({"zicsr"}), + /* SUPPORTED_VERSIONS */ ({{1, 0}}), + /* FLAG_GROUP */ ss, + /* BITMASK_GROUP_ID */ BITMASK_NOT_YET_ALLOCATED, + /* BITMASK_BIT_POSITION*/ BITMASK_NOT_YET_ALLOCATED, + /* EXTRA_EXTENSION_FLAGS */ 0) + +DEFINE_RISCV_EXT( + /* NAME */ ssccptr, + /* UPPERCASE_NAME */ SSCCPTR, + /* FULL_NAME */ "Main memory supports page table reads", + /* DESC */ "", + /* URL */ , + /* DEP_EXTS */ ({}), + /* SUPPORTED_VERSIONS */ ({{1, 0}}), + /* FLAG_GROUP */ ss, + /* BITMASK_GROUP_ID */ BITMASK_NOT_YET_ALLOCATED, + /* BITMASK_BIT_POSITION*/ BITMASK_NOT_YET_ALLOCATED, + /* EXTRA_EXTENSION_FLAGS */ 0) + +DEFINE_RISCV_EXT( + /* NAME */ sscofpmf, + /* UPPERCASE_NAME */ SSCOFPMF, + /* FULL_NAME */ "Count overflow & filtering extension", + /* DESC */ "", + /* URL */ , + /* DEP_EXTS */ ({"zicsr"}), + /* SUPPORTED_VERSIONS */ ({{1, 0}}), + /* FLAG_GROUP */ ss, + /* BITMASK_GROUP_ID */ BITMASK_NOT_YET_ALLOCATED, + /* BITMASK_BIT_POSITION*/ BITMASK_NOT_YET_ALLOCATED, + /* EXTRA_EXTENSION_FLAGS */ 0) + +DEFINE_RISCV_EXT( + /* NAME */ sscounterenw, + /* UPPERCASE_NAME */ SSCOUNTERENW, + /* FULL_NAME */ "Support writeable enables for any supported counter", + /* DESC */ "", + /* URL */ , + /* DEP_EXTS */ ({"zicsr"}), + /* SUPPORTED_VERSIONS */ ({{1, 0}}), + /* FLAG_GROUP */ ss, + /* BITMASK_GROUP_ID */ BITMASK_NOT_YET_ALLOCATED, + /* BITMASK_BIT_POSITION*/ BITMASK_NOT_YET_ALLOCATED, + /* EXTRA_EXTENSION_FLAGS */ 0) + +DEFINE_RISCV_EXT( + /* NAME */ sscsrind, + /* UPPERCASE_NAME */ SSCSRIND, + /* FULL_NAME */ "Supervisor-Level Indirect CSR Access", + /* DESC */ "", + /* URL */ , + /* DEP_EXTS */ ({"zicsr"}), + /* SUPPORTED_VERSIONS */ ({{1, 0}}), + /* FLAG_GROUP */ ss, + /* BITMASK_GROUP_ID */ BITMASK_NOT_YET_ALLOCATED, + /* BITMASK_BIT_POSITION*/ BITMASK_NOT_YET_ALLOCATED, + /* EXTRA_EXTENSION_FLAGS */ 0) + +DEFINE_RISCV_EXT( + /* NAME */ ssnpm, + /* UPPERCASE_NAME */ SSNPM, + /* FULL_NAME */ "ssnpm extension", + /* DESC */ "", + /* URL */ , + /* DEP_EXTS */ ({"zicsr"}), + /* SUPPORTED_VERSIONS */ ({{1, 0}}), + /* FLAG_GROUP */ ss, + /* BITMASK_GROUP_ID */ BITMASK_NOT_YET_ALLOCATED, + /* BITMASK_BIT_POSITION*/ BITMASK_NOT_YET_ALLOCATED, + /* EXTRA_EXTENSION_FLAGS */ 0) + +DEFINE_RISCV_EXT( + /* NAME */ sspm, + /* UPPERCASE_NAME */ SSPM, + /* FULL_NAME */ "sspm extension", + /* DESC */ "", + /* URL */ , + /* DEP_EXTS */ ({}), + /* SUPPORTED_VERSIONS */ ({{1, 0}}), + /* FLAG_GROUP */ ss, + /* BITMASK_GROUP_ID */ BITMASK_NOT_YET_ALLOCATED, + /* BITMASK_BIT_POSITION*/ BITMASK_NOT_YET_ALLOCATED, + /* EXTRA_EXTENSION_FLAGS */ 0) + +DEFINE_RISCV_EXT( + /* NAME */ ssstateen, + /* UPPERCASE_NAME */ SSSTATEEN, + /* FULL_NAME */ "State-enable extension for supervisor-mode", + /* DESC */ "", + /* URL */ , + /* DEP_EXTS */ ({"zicsr"}), + /* SUPPORTED_VERSIONS */ ({{1, 0}}), + /* FLAG_GROUP */ ss, + /* BITMASK_GROUP_ID */ BITMASK_NOT_YET_ALLOCATED, + /* BITMASK_BIT_POSITION*/ BITMASK_NOT_YET_ALLOCATED, + /* EXTRA_EXTENSION_FLAGS */ 0) + +DEFINE_RISCV_EXT( + /* NAME */ sstc, + /* UPPERCASE_NAME */ SSTC, + /* FULL_NAME */ "Supervisor-mode timer interrupts extension", + /* DESC */ "", + /* URL */ , + /* DEP_EXTS */ ({"zicsr"}), + /* SUPPORTED_VERSIONS */ ({{1, 0}}), + /* FLAG_GROUP */ ss, + /* BITMASK_GROUP_ID */ BITMASK_NOT_YET_ALLOCATED, + /* BITMASK_BIT_POSITION*/ BITMASK_NOT_YET_ALLOCATED, + /* EXTRA_EXTENSION_FLAGS */ 0) + +DEFINE_RISCV_EXT( + /* NAME */ sstvala, + /* UPPERCASE_NAME */ SSTVALA, + /* FULL_NAME */ "Stval provides all needed values", + /* DESC */ "", + /* URL */ , + /* DEP_EXTS */ ({"zicsr"}), + /* SUPPORTED_VERSIONS */ ({{1, 0}}), + /* FLAG_GROUP */ ss, + /* BITMASK_GROUP_ID */ BITMASK_NOT_YET_ALLOCATED, + /* BITMASK_BIT_POSITION*/ BITMASK_NOT_YET_ALLOCATED, + /* EXTRA_EXTENSION_FLAGS */ 0) + +DEFINE_RISCV_EXT( + /* NAME */ sstvecd, + /* UPPERCASE_NAME */ SSTVECD, + /* FULL_NAME */ "Stvec supports Direct mode", + /* DESC */ "", + /* URL */ , + /* DEP_EXTS */ ({"zicsr"}), + /* SUPPORTED_VERSIONS */ ({{1, 0}}), + /* FLAG_GROUP */ ss, + /* BITMASK_GROUP_ID */ BITMASK_NOT_YET_ALLOCATED, + /* BITMASK_BIT_POSITION*/ BITMASK_NOT_YET_ALLOCATED, + /* EXTRA_EXTENSION_FLAGS */ 0) + +DEFINE_RISCV_EXT( + /* NAME */ ssstrict, + /* UPPERCASE_NAME */ SSSTRICT, + /* FULL_NAME */ "ssstrict extension", + /* DESC */ "", + /* URL */ , + /* DEP_EXTS */ ({}), + /* SUPPORTED_VERSIONS */ ({{1, 0}}), + /* FLAG_GROUP */ ss, + /* BITMASK_GROUP_ID */ BITMASK_NOT_YET_ALLOCATED, + /* BITMASK_BIT_POSITION*/ BITMASK_NOT_YET_ALLOCATED, + /* EXTRA_EXTENSION_FLAGS */ 0) + +DEFINE_RISCV_EXT( + /* NAME */ ssdbltrp, + /* UPPERCASE_NAME */ SSDBLTRP, + /* FULL_NAME */ "Double Trap Extensions", + /* DESC */ "", + /* URL */ , + /* DEP_EXTS */ ({"zicsr"}), + /* SUPPORTED_VERSIONS */ ({{1, 0}}), + /* FLAG_GROUP */ ss, + /* BITMASK_GROUP_ID */ BITMASK_NOT_YET_ALLOCATED, + /* BITMASK_BIT_POSITION*/ BITMASK_NOT_YET_ALLOCATED, + /* EXTRA_EXTENSION_FLAGS */ 0) + +DEFINE_RISCV_EXT( + /* NAME */ ssu64xl, + /* UPPERCASE_NAME */ SSU64XL, + /* FULL_NAME */ "UXLEN=64 must be supported", + /* DESC */ "", + /* URL */ , + /* DEP_EXTS */ ({"zicsr"}), + /* SUPPORTED_VERSIONS */ ({{1, 0}}), + /* FLAG_GROUP */ ss, + /* BITMASK_GROUP_ID */ BITMASK_NOT_YET_ALLOCATED, + /* BITMASK_BIT_POSITION*/ BITMASK_NOT_YET_ALLOCATED, + /* EXTRA_EXTENSION_FLAGS */ 0) + +DEFINE_RISCV_EXT( + /* NAME */ supm, + /* UPPERCASE_NAME */ SUPM, + /* FULL_NAME */ "supm extension", + /* DESC */ "", + /* URL */ , + /* DEP_EXTS */ ({}), + /* SUPPORTED_VERSIONS */ ({{1, 0}}), + /* FLAG_GROUP */ su, + /* BITMASK_GROUP_ID */ BITMASK_NOT_YET_ALLOCATED, + /* BITMASK_BIT_POSITION*/ BITMASK_NOT_YET_ALLOCATED, + /* EXTRA_EXTENSION_FLAGS */ 0) + +DEFINE_RISCV_EXT( + /* NAME */ svinval, + /* UPPERCASE_NAME */ SVINVAL, + /* FULL_NAME */ "Fine-grained address-translation cache invalidation extension", + /* DESC */ "", + /* URL */ , + /* DEP_EXTS */ ({}), + /* SUPPORTED_VERSIONS */ ({{1, 0}}), + /* FLAG_GROUP */ sv, + /* BITMASK_GROUP_ID */ BITMASK_NOT_YET_ALLOCATED, + /* BITMASK_BIT_POSITION*/ BITMASK_NOT_YET_ALLOCATED, + /* EXTRA_EXTENSION_FLAGS */ 0) + +DEFINE_RISCV_EXT( + /* NAME */ svnapot, + /* UPPERCASE_NAME */ SVNAPOT, + /* FULL_NAME */ "NAPOT translation contiguity extension", + /* DESC */ "", + /* URL */ , + /* DEP_EXTS */ ({}), + /* SUPPORTED_VERSIONS */ ({{1, 0}}), + /* FLAG_GROUP */ sv, + /* BITMASK_GROUP_ID */ BITMASK_NOT_YET_ALLOCATED, + /* BITMASK_BIT_POSITION*/ BITMASK_NOT_YET_ALLOCATED, + /* EXTRA_EXTENSION_FLAGS */ 0) + +DEFINE_RISCV_EXT( + /* NAME */ svpbmt, + /* UPPERCASE_NAME */ SVPBMT, + /* FULL_NAME */ "Page-based memory types extension", + /* DESC */ "", + /* URL */ , + /* DEP_EXTS */ ({}), + /* SUPPORTED_VERSIONS */ ({{1, 0}}), + /* FLAG_GROUP */ sv, + /* BITMASK_GROUP_ID */ BITMASK_NOT_YET_ALLOCATED, + /* BITMASK_BIT_POSITION*/ BITMASK_NOT_YET_ALLOCATED, + /* EXTRA_EXTENSION_FLAGS */ 0) + +DEFINE_RISCV_EXT( + /* NAME */ svvptc, + /* UPPERCASE_NAME */ SVVPTC, + /* FULL_NAME */ "svvptc extension", + /* DESC */ "", + /* URL */ , + /* DEP_EXTS */ ({}), + /* SUPPORTED_VERSIONS */ ({{1, 0}}), + /* FLAG_GROUP */ sv, + /* BITMASK_GROUP_ID */ BITMASK_NOT_YET_ALLOCATED, + /* BITMASK_BIT_POSITION*/ BITMASK_NOT_YET_ALLOCATED, + /* EXTRA_EXTENSION_FLAGS */ 0) + +DEFINE_RISCV_EXT( + /* NAME */ svadu, + /* UPPERCASE_NAME */ SVADU, + /* FULL_NAME */ "Hardware Updating of A/D Bits extension", + /* DESC */ "", + /* URL */ , + /* DEP_EXTS */ ({"zicsr"}), + /* SUPPORTED_VERSIONS */ ({{1, 0}}), + /* FLAG_GROUP */ sv, + /* BITMASK_GROUP_ID */ BITMASK_NOT_YET_ALLOCATED, + /* BITMASK_BIT_POSITION*/ BITMASK_NOT_YET_ALLOCATED, + /* EXTRA_EXTENSION_FLAGS */ 0) + +DEFINE_RISCV_EXT( + /* NAME */ svade, + /* UPPERCASE_NAME */ SVADE, + /* FULL_NAME */ "Cause exception when hardware updating of A/D bits is disabled", + /* DESC */ "", + /* URL */ , + /* DEP_EXTS */ ({"zicsr"}), + /* SUPPORTED_VERSIONS */ ({{1, 0}}), + /* FLAG_GROUP */ sv, + /* BITMASK_GROUP_ID */ BITMASK_NOT_YET_ALLOCATED, + /* BITMASK_BIT_POSITION*/ BITMASK_NOT_YET_ALLOCATED, + /* EXTRA_EXTENSION_FLAGS */ 0) + +DEFINE_RISCV_EXT( + /* NAME */ svbare, + /* UPPERCASE_NAME */ SVBARE, + /* FULL_NAME */ "Satp mode bare is supported", + /* DESC */ "", + /* URL */ , + /* DEP_EXTS */ ({"zicsr"}), + /* SUPPORTED_VERSIONS */ ({{1, 0}}), + /* FLAG_GROUP */ sv, + /* BITMASK_GROUP_ID */ BITMASK_NOT_YET_ALLOCATED, + /* BITMASK_BIT_POSITION*/ BITMASK_NOT_YET_ALLOCATED, + /* EXTRA_EXTENSION_FLAGS */ 0) + +#include "riscv-ext-corev.def" +#include "riscv-ext-sifive.def" +#include "riscv-ext-thead.def" +#include "riscv-ext-ventana.def" +#include "riscv-ext-mips.def" diff --git a/gcc/config/riscv/riscv-ext.opt b/gcc/config/riscv/riscv-ext.opt new file mode 100644 index 0000000..26d6e68 --- /dev/null +++ b/gcc/config/riscv/riscv-ext.opt @@ -0,0 +1,451 @@ +; Target options for the RISC-V port of the compiler +; +; Copyright (C) 2025 Free Software Foundation, Inc. +; +; This file is part of GCC. +; +; GCC is free software; you can redistribute it and/or modify it under +; the terms of the GNU General Public License as published by the Free +; Software Foundation; either version 3, or (at your option) any later +; version. +; +; GCC is distributed in the hope that it will be useful, but WITHOUT +; ANY WARRANTY; without even the implied warranty of MERCHANTABILITY +; or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public +; License for more details. +; +; You should have received a copy of the GNU General Public License +; along with GCC; see the file COPYING3. If not see +; <http://www.gnu.org/licenses/>. +; This file is generated automatically using +; gcc/config/riscv/gen-riscv-ext-opt.cc from: +; gcc/config/riscv/riscv-ext.def + +; Please *DO NOT* edit manually. +TargetVariable +int riscv_base_subext + +TargetVariable +int riscv_sd_subext + +TargetVariable +int riscv_sh_subext + +TargetVariable +int riscv_sm_subext + +TargetVariable +int riscv_ss_subext + +TargetVariable +int riscv_su_subext + +TargetVariable +int riscv_sv_subext + +TargetVariable +int riscv_xcv_subext + +TargetVariable +int riscv_xmips_subext + +TargetVariable +int riscv_xsf_subext + +TargetVariable +int riscv_xthead_subext + +TargetVariable +int riscv_xventana_subext + +TargetVariable +int riscv_za_subext + +TargetVariable +int riscv_zb_subext + +TargetVariable +int riscv_zc_subext + +TargetVariable +int riscv_zf_subext + +TargetVariable +int riscv_zi_subext + +TargetVariable +int riscv_zinx_subext + +TargetVariable +int riscv_zk_subext + +TargetVariable +int riscv_zm_subext + +TargetVariable +int riscv_zt_subext + +TargetVariable +int riscv_zvb_subext + +TargetVariable +int riscv_zve_subext + +TargetVariable +int riscv_zvf_subext + +TargetVariable +int riscv_zvk_subext + +TargetVariable +int riscv_zvl_subext + +Mask(RVE) Var(riscv_base_subext) + +Mask(RVI) Var(riscv_base_subext) + +Mask(MUL) Var(riscv_base_subext) + +Mask(ATOMIC) Var(riscv_base_subext) + +Mask(HARD_FLOAT) Var(riscv_base_subext) + +Mask(DOUBLE_FLOAT) Var(riscv_base_subext) + +Mask(RVC) Var(riscv_base_subext) + +Mask(RVB) Var(riscv_base_subext) + +Mask(RVV) Var(riscv_base_subext) + +Mask(RVH) Var(riscv_base_subext) + +Mask(ZIC64B) Var(riscv_zi_subext) + +Mask(ZICBOM) Var(riscv_zi_subext) + +Mask(ZICBOP) Var(riscv_zi_subext) + +Mask(ZICBOZ) Var(riscv_zi_subext) + +Mask(ZICCAMOA) Var(riscv_zi_subext) + +Mask(ZICCIF) Var(riscv_zi_subext) + +Mask(ZICCLSM) Var(riscv_zi_subext) + +Mask(ZICCRSE) Var(riscv_zi_subext) + +Mask(ZICFILP) Var(riscv_zi_subext) + +Mask(ZICFISS) Var(riscv_zi_subext) + +Mask(ZICNTR) Var(riscv_zi_subext) + +Mask(ZICOND) Var(riscv_zi_subext) + +Mask(ZICSR) Var(riscv_zi_subext) + +Mask(ZIFENCEI) Var(riscv_zi_subext) + +Mask(ZIHINTNTL) Var(riscv_zi_subext) + +Mask(ZIHINTPAUSE) Var(riscv_zi_subext) + +Mask(ZIHPM) Var(riscv_zi_subext) + +Mask(ZIMOP) Var(riscv_zi_subext) + +Mask(ZILSD) Var(riscv_zi_subext) + +Mask(ZMMUL) Var(riscv_zm_subext) + +Mask(ZA128RS) Var(riscv_za_subext) + +Mask(ZA64RS) Var(riscv_za_subext) + +Mask(ZAAMO) Var(riscv_za_subext) + +Mask(ZABHA) Var(riscv_za_subext) + +Mask(ZACAS) Var(riscv_za_subext) + +Mask(ZALRSC) Var(riscv_za_subext) + +Mask(ZAWRS) Var(riscv_za_subext) + +Mask(ZAMA16B) Var(riscv_za_subext) + +Mask(ZFA) Var(riscv_zf_subext) + +Mask(ZFBFMIN) Var(riscv_zf_subext) + +Mask(ZFH) Var(riscv_zf_subext) + +Mask(ZFHMIN) Var(riscv_zf_subext) + +Mask(ZFINX) Var(riscv_zinx_subext) + +Mask(ZDINX) Var(riscv_zinx_subext) + +Mask(ZCA) Var(riscv_zc_subext) + +Mask(ZCB) Var(riscv_zc_subext) + +Mask(ZCD) Var(riscv_zc_subext) + +Mask(ZCE) Var(riscv_zc_subext) + +Mask(ZCF) Var(riscv_zc_subext) + +Mask(ZCMOP) Var(riscv_zc_subext) + +Mask(ZCMP) Var(riscv_zc_subext) + +Mask(ZCMT) Var(riscv_zc_subext) + +Mask(ZCLSD) Var(riscv_zc_subext) + +Mask(ZBA) Var(riscv_zb_subext) + +Mask(ZBB) Var(riscv_zb_subext) + +Mask(ZBC) Var(riscv_zb_subext) + +Mask(ZBKB) Var(riscv_zb_subext) + +Mask(ZBKC) Var(riscv_zb_subext) + +Mask(ZBKX) Var(riscv_zb_subext) + +Mask(ZBS) Var(riscv_zb_subext) + +Mask(ZK) Var(riscv_zk_subext) + +Mask(ZKN) Var(riscv_zk_subext) + +Mask(ZKND) Var(riscv_zk_subext) + +Mask(ZKNE) Var(riscv_zk_subext) + +Mask(ZKNH) Var(riscv_zk_subext) + +Mask(ZKR) Var(riscv_zk_subext) + +Mask(ZKS) Var(riscv_zk_subext) + +Mask(ZKSED) Var(riscv_zk_subext) + +Mask(ZKSH) Var(riscv_zk_subext) + +Mask(ZKT) Var(riscv_zk_subext) + +Mask(ZTSO) Var(riscv_zt_subext) + +Mask(ZVBB) Var(riscv_zvb_subext) + +Mask(ZVBC) Var(riscv_zvb_subext) + +Mask(ZVE32F) Var(riscv_zve_subext) + +Mask(ZVE32X) Var(riscv_zve_subext) + +Mask(ZVE64D) Var(riscv_zve_subext) + +Mask(ZVE64F) Var(riscv_zve_subext) + +Mask(ZVE64X) Var(riscv_zve_subext) + +Mask(ZVFBFMIN) Var(riscv_zvf_subext) + +Mask(ZVFBFWMA) Var(riscv_zvf_subext) + +Mask(ZVFH) Var(riscv_zvf_subext) + +Mask(ZVFHMIN) Var(riscv_zvf_subext) + +Mask(ZVKB) Var(riscv_zvk_subext) + +Mask(ZVKG) Var(riscv_zvk_subext) + +Mask(ZVKN) Var(riscv_zvk_subext) + +Mask(ZVKNC) Var(riscv_zvk_subext) + +Mask(ZVKNED) Var(riscv_zvk_subext) + +Mask(ZVKNG) Var(riscv_zvk_subext) + +Mask(ZVKNHA) Var(riscv_zvk_subext) + +Mask(ZVKNHB) Var(riscv_zvk_subext) + +Mask(ZVKS) Var(riscv_zvk_subext) + +Mask(ZVKSC) Var(riscv_zvk_subext) + +Mask(ZVKSED) Var(riscv_zvk_subext) + +Mask(ZVKSG) Var(riscv_zvk_subext) + +Mask(ZVKSH) Var(riscv_zvk_subext) + +Mask(ZVKT) Var(riscv_zvk_subext) + +Mask(ZVL1024B) Var(riscv_zvl_subext) + +Mask(ZVL128B) Var(riscv_zvl_subext) + +Mask(ZVL16384B) Var(riscv_zvl_subext) + +Mask(ZVL2048B) Var(riscv_zvl_subext) + +Mask(ZVL256B) Var(riscv_zvl_subext) + +Mask(ZVL32768B) Var(riscv_zvl_subext) + +Mask(ZVL32B) Var(riscv_zvl_subext) + +Mask(ZVL4096B) Var(riscv_zvl_subext) + +Mask(ZVL512B) Var(riscv_zvl_subext) + +Mask(ZVL64B) Var(riscv_zvl_subext) + +Mask(ZVL65536B) Var(riscv_zvl_subext) + +Mask(ZVL8192B) Var(riscv_zvl_subext) + +Mask(ZHINX) Var(riscv_zinx_subext) + +Mask(ZHINXMIN) Var(riscv_zinx_subext) + +Mask(SDTRIG) Var(riscv_sd_subext) + +Mask(SHA) Var(riscv_sh_subext) + +Mask(SHCOUNTERENW) Var(riscv_sh_subext) + +Mask(SHGATPA) Var(riscv_sh_subext) + +Mask(SHLCOFIDELEG) Var(riscv_sh_subext) + +Mask(SHTVALA) Var(riscv_sh_subext) + +Mask(SHVSTVALA) Var(riscv_sh_subext) + +Mask(SHVSTVECD) Var(riscv_sh_subext) + +Mask(SHVSATPA) Var(riscv_sh_subext) + +Mask(SMAIA) Var(riscv_sm_subext) + +Mask(SMCNTRPMF) Var(riscv_sm_subext) + +Mask(SMCSRIND) Var(riscv_sm_subext) + +Mask(SMEPMP) Var(riscv_sm_subext) + +Mask(SMMPM) Var(riscv_sm_subext) + +Mask(SMNPM) Var(riscv_sm_subext) + +Mask(SMRNMI) Var(riscv_sm_subext) + +Mask(SMSTATEEN) Var(riscv_sm_subext) + +Mask(SMDBLTRP) Var(riscv_sm_subext) + +Mask(SSAIA) Var(riscv_ss_subext) + +Mask(SSCCPTR) Var(riscv_ss_subext) + +Mask(SSCOFPMF) Var(riscv_ss_subext) + +Mask(SSCOUNTERENW) Var(riscv_ss_subext) + +Mask(SSCSRIND) Var(riscv_ss_subext) + +Mask(SSNPM) Var(riscv_ss_subext) + +Mask(SSPM) Var(riscv_ss_subext) + +Mask(SSSTATEEN) Var(riscv_ss_subext) + +Mask(SSTC) Var(riscv_ss_subext) + +Mask(SSTVALA) Var(riscv_ss_subext) + +Mask(SSTVECD) Var(riscv_ss_subext) + +Mask(SSSTRICT) Var(riscv_ss_subext) + +Mask(SSDBLTRP) Var(riscv_ss_subext) + +Mask(SSU64XL) Var(riscv_ss_subext) + +Mask(SUPM) Var(riscv_su_subext) + +Mask(SVINVAL) Var(riscv_sv_subext) + +Mask(SVNAPOT) Var(riscv_sv_subext) + +Mask(SVPBMT) Var(riscv_sv_subext) + +Mask(SVVPTC) Var(riscv_sv_subext) + +Mask(SVADU) Var(riscv_sv_subext) + +Mask(SVADE) Var(riscv_sv_subext) + +Mask(SVBARE) Var(riscv_sv_subext) + +Mask(XCVALU) Var(riscv_xcv_subext) + +Mask(XCVBI) Var(riscv_xcv_subext) + +Mask(XCVELW) Var(riscv_xcv_subext) + +Mask(XCVMAC) Var(riscv_xcv_subext) + +Mask(XCVSIMD) Var(riscv_xcv_subext) + +Mask(XSFCEASE) Var(riscv_xsf_subext) + +Mask(XSFVCP) Var(riscv_xsf_subext) + +Mask(XSFVFNRCLIPXFQF) Var(riscv_xsf_subext) + +Mask(XSFVQMACCDOD) Var(riscv_xsf_subext) + +Mask(XSFVQMACCQOQ) Var(riscv_xsf_subext) + +Mask(XTHEADBA) Var(riscv_xthead_subext) + +Mask(XTHEADBB) Var(riscv_xthead_subext) + +Mask(XTHEADBS) Var(riscv_xthead_subext) + +Mask(XTHEADCMO) Var(riscv_xthead_subext) + +Mask(XTHEADCONDMOV) Var(riscv_xthead_subext) + +Mask(XTHEADFMEMIDX) Var(riscv_xthead_subext) + +Mask(XTHEADFMV) Var(riscv_xthead_subext) + +Mask(XTHEADINT) Var(riscv_xthead_subext) + +Mask(XTHEADMAC) Var(riscv_xthead_subext) + +Mask(XTHEADMEMIDX) Var(riscv_xthead_subext) + +Mask(XTHEADMEMPAIR) Var(riscv_xthead_subext) + +Mask(XTHEADSYNC) Var(riscv_xthead_subext) + +Mask(XTHEADVECTOR) Var(riscv_xthead_subext) + +Mask(XVENTANACONDOPS) Var(riscv_xventana_subext) + +Mask(XMIPSCMOV) Var(riscv_xmips_subext) diff --git a/gcc/config/riscv/riscv-ext.opt.urls b/gcc/config/riscv/riscv-ext.opt.urls new file mode 100644 index 0000000..c4f4710 --- /dev/null +++ b/gcc/config/riscv/riscv-ext.opt.urls @@ -0,0 +1,2 @@ +; Autogenerated by regenerate-opt-urls.py from gcc/config/riscv/riscv-ext.opt and generated HTML + diff --git a/gcc/config/riscv/riscv-opts.h b/gcc/config/riscv/riscv-opts.h index 26fe228..e1a820b 100644 --- a/gcc/config/riscv/riscv-opts.h +++ b/gcc/config/riscv/riscv-opts.h @@ -58,7 +58,8 @@ enum riscv_microarchitecture_type { sifive_p400, sifive_p600, xiangshan, - generic_ooo + generic_ooo, + mips_p8700, }; extern enum riscv_microarchitecture_type riscv_microarchitecture; @@ -136,16 +137,16 @@ enum rvv_vector_bits_enum { /* Bit of riscv_zvl_flags will set continually, N-1 bit will set if N-bit is set, e.g. MASK_ZVL64B has set then MASK_ZVL32B is set, so we can use popcount to calculate the minimal VLEN. */ -#define TARGET_MIN_VLEN \ - ((riscv_zvl_flags == 0) \ - ? 0 \ - : 32 << (__builtin_popcount (riscv_zvl_flags) - 1)) +#define TARGET_MIN_VLEN \ + ((riscv_zvl_subext == 0) \ + ? 0 \ + : 32 << (__builtin_popcount (riscv_zvl_subext) - 1)) /* Same as TARGET_MIN_VLEN, but take an OPTS as gcc_options. */ #define TARGET_MIN_VLEN_OPTS(opts) \ - ((opts->x_riscv_zvl_flags == 0) \ + ((opts->x_riscv_zvl_subext == 0) \ ? 0 \ - : 32 << (__builtin_popcount (opts->x_riscv_zvl_flags) - 1)) + : 32 << (__builtin_popcount (opts->x_riscv_zvl_subext) - 1)) /* The maximum LMUL according to user configuration. */ #define TARGET_MAX_LMUL \ @@ -162,4 +163,15 @@ enum riscv_tls_type { #define TARGET_VECTOR_AUTOVEC_SEGMENT \ (TARGET_VECTOR && riscv_mautovec_segment) +#define GPR2VR_COST_UNPROVIDED -1 +#define FPR2VR_COST_UNPROVIDED -1 + +/* Extra extension flags, used for carry extra info for a RISC-V extension. */ +enum +{ + EXT_FLAG_MACRO = 1 << 0, +}; + +#define BITMASK_NOT_YET_ALLOCATED -1 + #endif /* ! GCC_RISCV_OPTS_H */ diff --git a/gcc/config/riscv/riscv-passes.def b/gcc/config/riscv/riscv-passes.def index 7e6a2a0..bc803c4 100644 --- a/gcc/config/riscv/riscv-passes.def +++ b/gcc/config/riscv/riscv-passes.def @@ -21,3 +21,5 @@ INSERT_PASS_AFTER (pass_rtl_store_motion, 1, pass_shorten_memrefs); INSERT_PASS_AFTER (pass_split_all_insns, 1, pass_avlprop); INSERT_PASS_BEFORE (pass_fast_rtl_dce, 1, pass_vsetvl); INSERT_PASS_BEFORE (pass_shorten_branches, 1, pass_insert_landing_pad); +INSERT_PASS_AFTER (pass_cse2, 1, pass_vector_permconst); + diff --git a/gcc/config/riscv/riscv-protos.h b/gcc/config/riscv/riscv-protos.h index 2bedd87..a41c4c2 100644 --- a/gcc/config/riscv/riscv-protos.h +++ b/gcc/config/riscv/riscv-protos.h @@ -137,9 +137,12 @@ extern void riscv_expand_usadd (rtx, rtx, rtx); extern void riscv_expand_ssadd (rtx, rtx, rtx); extern void riscv_expand_ussub (rtx, rtx, rtx); extern void riscv_expand_sssub (rtx, rtx, rtx); +extern void riscv_expand_usmul (rtx, rtx, rtx); extern void riscv_expand_ustrunc (rtx, rtx); extern void riscv_expand_sstrunc (rtx, rtx); extern int riscv_register_move_cost (machine_mode, reg_class_t, reg_class_t); +extern bool synthesize_ior_xor (rtx_code, rtx [3]); +extern bool synthesize_and (rtx [3]); #ifdef RTX_CODE extern void riscv_expand_int_scc (rtx, enum rtx_code, rtx, rtx, bool *invert_ptr = 0); @@ -201,6 +204,8 @@ rtl_opt_pass * make_pass_shorten_memrefs (gcc::context *ctxt); rtl_opt_pass * make_pass_avlprop (gcc::context *ctxt); rtl_opt_pass * make_pass_vsetvl (gcc::context *ctxt); rtl_opt_pass * make_pass_insert_landing_pad (gcc::context *ctxt); +rtl_opt_pass * make_pass_vector_permconst (gcc::context *ctxt); + /* Routines implemented in riscv-string.c. */ extern bool riscv_expand_block_compare (rtx, rtx, rtx, rtx); @@ -599,6 +604,7 @@ void emit_vlmax_vsetvl (machine_mode, rtx); void emit_hard_vlmax_vsetvl (machine_mode, rtx); void emit_vlmax_insn (unsigned, unsigned, rtx *); void emit_nonvlmax_insn (unsigned, unsigned, rtx *, rtx); +void emit_avltype_insn (unsigned, unsigned, rtx *, avl_type, rtx = nullptr); void emit_vlmax_insn_lra (unsigned, unsigned, rtx *, rtx); enum vlmul_type get_vlmul (machine_mode); rtx get_vlmax_rtx (machine_mode); @@ -664,6 +670,8 @@ void expand_vec_oct_ustrunc (rtx, rtx, machine_mode, machine_mode, machine_mode); void expand_vec_oct_sstrunc (rtx, rtx, machine_mode, machine_mode, machine_mode); +void expand_vx_binary_vec_dup_vec (rtx, rtx, rtx, rtx_code, machine_mode); +void expand_vx_binary_vec_vec_dup (rtx, rtx, rtx, rtx_code, machine_mode); #endif bool sew64_scalar_helper (rtx *, rtx *, rtx, machine_mode, bool, void (*)(rtx *, rtx), enum avl_type); @@ -753,7 +761,7 @@ uint8_t get_sew (rtx_insn *); enum vlmul_type get_vlmul (rtx_insn *); int count_regno_occurrences (rtx_insn *, unsigned int); bool imm_avl_p (machine_mode); -bool can_be_broadcasted_p (rtx); +bool can_be_broadcast_p (rtx); bool gather_scatter_valid_offset_p (machine_mode); HOST_WIDE_INT estimated_poly_value (poly_int64, unsigned int); bool whole_reg_to_reg_move_p (rtx *, machine_mode, int); @@ -806,6 +814,7 @@ extern const char *th_output_move (rtx, rtx); extern bool th_print_operand_address (FILE *, machine_mode, rtx); #endif +extern bool strided_load_broadcast_p (void); extern bool riscv_use_divmod_expander (void); void riscv_init_cumulative_args (CUMULATIVE_ARGS *, tree, rtx, tree, int); extern bool @@ -834,6 +843,8 @@ struct riscv_tune_info { const struct riscv_tune_info * riscv_parse_tune (const char *, bool); const cpu_vector_cost *get_vector_costs (); +int get_gr2vr_cost (); +int get_fr2vr_cost (); enum { diff --git a/gcc/config/riscv/riscv-selftests.cc b/gcc/config/riscv/riscv-selftests.cc index 34d01ac..9ca1ffe 100644 --- a/gcc/config/riscv/riscv-selftests.cc +++ b/gcc/config/riscv/riscv-selftests.cc @@ -342,9 +342,13 @@ run_broadcast_selftests (void) expand_vector_broadcast (mode, mem); \ insn = get_last_insn (); \ src = SET_SRC (PATTERN (insn)); \ - ASSERT_TRUE (MEM_P (XEXP (src, 0))); \ - ASSERT_TRUE ( \ - rtx_equal_p (src, gen_rtx_VEC_DUPLICATE (mode, XEXP (src, 0)))); \ + if (strided_load_broadcast_p ()) \ + { \ + ASSERT_TRUE (MEM_P (XEXP (src, 0))); \ + ASSERT_TRUE ( \ + rtx_equal_p (src, \ + gen_rtx_VEC_DUPLICATE (mode, XEXP (src, 0)))); \ + } \ end_sequence (); \ /* Test vmv.v.x or vfmv.v.f. */ \ start_sequence (); \ diff --git a/gcc/config/riscv/riscv-shorten-memrefs.cc b/gcc/config/riscv/riscv-shorten-memrefs.cc index 60f330e..2e3d9f6 100644 --- a/gcc/config/riscv/riscv-shorten-memrefs.cc +++ b/gcc/config/riscv/riscv-shorten-memrefs.cc @@ -189,8 +189,7 @@ pass_shorten_memrefs::transform (regno_map *m, basic_block bb) } } } - rtx_insn *seq = get_insns (); - end_sequence (); + rtx_insn *seq = end_sequence (); emit_insn_before (seq, insn); } } diff --git a/gcc/config/riscv/riscv-subset.h b/gcc/config/riscv/riscv-subset.h index 559e708..a35537d 100644 --- a/gcc/config/riscv/riscv-subset.h +++ b/gcc/config/riscv/riscv-subset.h @@ -82,6 +82,8 @@ private: const char *parse_single_multiletter_ext (const char *, const char *, const char *, bool); + std::string parse_profiles (const char*); + void handle_implied_ext (const char *); bool check_implied_ext (); void handle_combine_ext (); @@ -107,9 +109,6 @@ public: static riscv_subset_list *parse (const char *, location_t); const char *parse_single_ext (const char *, bool exact_single_p = true); - const riscv_subset_t *begin () const {return m_head;}; - const riscv_subset_t *end () const {return NULL;}; - int match_score (riscv_subset_list *) const; void set_loc (location_t); @@ -117,6 +116,65 @@ public: void set_allow_adding_dup (bool v) { m_allow_adding_dup = v; } void finalize (); + + class iterator + { + public: + explicit iterator(riscv_subset_t *node) : m_node(node) {} + + riscv_subset_t &operator*() const { return *m_node; } + riscv_subset_t *operator->() const { return m_node; } + + iterator &operator++() + { + if (m_node) + m_node = m_node->next; + return *this; + } + + bool operator!=(const iterator &other) const + { + return m_node != other.m_node; + } + + bool operator==(const iterator &other) const + { + return m_node == other.m_node; + } + + private: + riscv_subset_t *m_node; + }; + + iterator begin() { return iterator(m_head); } + iterator end() { return iterator(nullptr); } + + class const_iterator + { + public: + explicit const_iterator(const riscv_subset_t *node) : m_node(node) {} + + const riscv_subset_t &operator*() const { return *m_node; } + const riscv_subset_t *operator->() const { return m_node; } + + const_iterator &operator++() + { + if (m_node) + m_node = m_node->next; + return *this; + } + + bool operator!=(const const_iterator &other) const + { + return m_node != other.m_node; + } + + private: + const riscv_subset_t *m_node; + }; + + const_iterator begin() const { return const_iterator(m_head); } + const_iterator end() const { return const_iterator(nullptr); } }; extern const riscv_subset_list *riscv_cmdline_subset_list (void); @@ -127,6 +185,5 @@ extern bool riscv_minimal_hwprobe_feature_bits (const char *, location_t); extern bool riscv_ext_is_subset (struct cl_target_option *, struct cl_target_option *); -extern int riscv_x_target_flags_isa_mask (void); #endif /* ! GCC_RISCV_SUBSET_H */ diff --git a/gcc/config/riscv/riscv-target-attr.cc b/gcc/config/riscv/riscv-target-attr.cc index 1d96865..8ad3025 100644 --- a/gcc/config/riscv/riscv-target-attr.cc +++ b/gcc/config/riscv/riscv-target-attr.cc @@ -257,11 +257,7 @@ riscv_target_attr_parser::update_settings (struct gcc_options *opts) const { std::string local_arch = m_subset_list->to_string (true); const char* local_arch_str = local_arch.c_str (); - struct cl_target_option *default_opts - = TREE_TARGET_OPTION (target_option_default_node); - if (opts->x_riscv_arch_string != default_opts->x_riscv_arch_string) - free (CONST_CAST (void *, (const void *) opts->x_riscv_arch_string)); - opts->x_riscv_arch_string = xstrdup (local_arch_str); + opts->x_riscv_arch_string = ggc_strdup (local_arch_str); riscv_set_arch_by_subset_list (m_subset_list, opts); } diff --git a/gcc/config/riscv/riscv-v.cc b/gcc/config/riscv/riscv-v.cc index aae2d27..242ac08 100644 --- a/gcc/config/riscv/riscv-v.cc +++ b/gcc/config/riscv/riscv-v.cc @@ -408,7 +408,7 @@ emit_vlmax_insn_lra (unsigned icode, unsigned insn_flags, rtx *ops, rtx vl) gcc_assert (!can_create_pseudo_p ()); machine_mode mode = GET_MODE (ops[0]); - if (imm_avl_p (mode)) + if (imm_avl_p (mode) && !TARGET_XTHEADVECTOR) { /* Even though VL is a real hardreg already allocated since it is post-RA now, we still gain benefits that we emit @@ -437,6 +437,26 @@ emit_nonvlmax_insn (unsigned icode, unsigned insn_flags, rtx *ops, rtx vl) e.emit_insn ((enum insn_code) icode, ops); } +/* Emit either a VLMAX insn or a non-VLMAX insn depending on TYPE. For a + non-VLMAX insn, the length must be specified in VL. */ + +void +emit_avltype_insn (unsigned icode, unsigned insn_flags, rtx *ops, + avl_type type, rtx vl) +{ + if (type != avl_type::VLMAX && vl != NULL_RTX) + { + insn_expander<RVV_INSN_OPERANDS_MAX> e (insn_flags, false); + e.set_vl (vl); + e.emit_insn ((enum insn_code) icode, ops); + } + else + { + insn_expander<RVV_INSN_OPERANDS_MAX> e (insn_flags, true); + e.emit_insn ((enum insn_code) icode, ops); + } +} + /* Return true if the vector duplicated by a super element which is the fusion of consecutive elements. @@ -1171,461 +1191,511 @@ expand_vector_init_trailing_same_elem (rtx target, } static void -expand_const_vector (rtx target, rtx src) +expand_const_vec_duplicate (rtx target, rtx src, rtx elt) { machine_mode mode = GET_MODE (target); rtx result = register_operand (target, mode) ? target : gen_reg_rtx (mode); - rtx elt; - if (const_vec_duplicate_p (src, &elt)) + + if (GET_MODE_CLASS (mode) == MODE_VECTOR_BOOL) + { + gcc_assert (rtx_equal_p (elt, const0_rtx) + || rtx_equal_p (elt, const1_rtx)); + + rtx ops[] = {result, src}; + emit_vlmax_insn (code_for_pred_mov (mode), UNARY_MASK_OP, ops); + } + else if (valid_vec_immediate_p (src)) { - if (GET_MODE_CLASS (mode) == MODE_VECTOR_BOOL) - { - gcc_assert (rtx_equal_p (elt, const0_rtx) - || rtx_equal_p (elt, const1_rtx)); - rtx ops[] = {result, src}; - emit_vlmax_insn (code_for_pred_mov (mode), UNARY_MASK_OP, ops); - } /* Element in range -16 ~ 15 integer or 0.0 floating-point, we use vmv.v.i instruction. */ - else if (valid_vec_immediate_p (src)) + rtx ops[] = {result, src}; + emit_vlmax_insn (code_for_pred_mov (mode), UNARY_OP, ops); + } + else + { + /* Emit vec_duplicate<mode> split pattern before RA so that + we could have a better optimization opportunity in LICM + which will hoist vmv.v.x outside the loop and in fwprop && combine + which will transform 'vv' into 'vx' instruction. + + The reason we don't emit vec_duplicate<mode> split pattern during + RA since the split stage after RA is a too late stage to generate + RVV instruction which need an additional register (We can't + allocate a new register after RA) for VL operand of vsetvl + instruction (vsetvl a5, zero). */ + if (lra_in_progress) { - rtx ops[] = {result, src}; - emit_vlmax_insn (code_for_pred_mov (mode), UNARY_OP, ops); + rtx ops[] = {result, elt}; + emit_vlmax_insn (code_for_pred_broadcast (mode), UNARY_OP, ops); } else { - /* Emit vec_duplicate<mode> split pattern before RA so that - we could have a better optimization opportunity in LICM - which will hoist vmv.v.x outside the loop and in fwprop && combine - which will transform 'vv' into 'vx' instruction. - - The reason we don't emit vec_duplicate<mode> split pattern during - RA since the split stage after RA is a too late stage to generate - RVV instruction which need an additional register (We can't - allocate a new register after RA) for VL operand of vsetvl - instruction (vsetvl a5, zero). */ - if (lra_in_progress) - { - rtx ops[] = {result, elt}; - emit_vlmax_insn (code_for_pred_broadcast (mode), UNARY_OP, ops); - } - else - { - struct expand_operand ops[2]; - enum insn_code icode = optab_handler (vec_duplicate_optab, mode); - gcc_assert (icode != CODE_FOR_nothing); - create_output_operand (&ops[0], result, mode); - create_input_operand (&ops[1], elt, GET_MODE_INNER (mode)); - expand_insn (icode, 2, ops); - result = ops[0].value; - } + struct expand_operand ops[2]; + enum insn_code icode = optab_handler (vec_duplicate_optab, mode); + gcc_assert (icode != CODE_FOR_nothing); + create_output_operand (&ops[0], result, mode); + create_input_operand (&ops[1], elt, GET_MODE_INNER (mode)); + expand_insn (icode, 2, ops); + result = ops[0].value; } - - if (result != target) - emit_move_insn (target, result); - return; } - /* Support scalable const series vector. */ - rtx base, step; - if (const_vec_series_p (src, &base, &step)) - { - expand_vec_series (result, base, step); + if (result != target) + emit_move_insn (target, result); +} - if (result != target) - emit_move_insn (target, result); - return; +static void +expand_const_vec_series (rtx target, rtx base, rtx step) +{ + machine_mode mode = GET_MODE (target); + rtx result = register_operand (target, mode) ? target : gen_reg_rtx (mode); + + expand_vec_series (result, base, step); + + if (result != target) + emit_move_insn (target, result); +} + + +/* We handle the case that we can find a vector container to hold + element bitsize = NPATTERNS * ele_bitsize. + + NPATTERNS = 8, element width = 8 + v = { 0, 1, 2, 3, 4, 5, 6, 7, ... } + In this case, we can combine NPATTERNS element into a larger + element. Use element width = 64 and broadcast a vector with + all element equal to 0x0706050403020100. */ + +static void +expand_const_vector_duplicate_repeating (rtx target, rvv_builder *builder) +{ + machine_mode mode = GET_MODE (target); + rtx result = register_operand (target, mode) ? target : gen_reg_rtx (mode); + rtx ele = builder->get_merged_repeating_sequence (); + rtx dup; + + if (lra_in_progress) + { + dup = gen_reg_rtx (builder->new_mode ()); + rtx ops[] = {dup, ele}; + emit_vlmax_insn (code_for_pred_broadcast (builder->new_mode ()), + UNARY_OP, ops); } + else + dup = expand_vector_broadcast (builder->new_mode (), ele); - /* Handle variable-length vector. */ - unsigned int nelts_per_pattern = CONST_VECTOR_NELTS_PER_PATTERN (src); - unsigned int npatterns = CONST_VECTOR_NPATTERNS (src); - rvv_builder builder (mode, npatterns, nelts_per_pattern); - for (unsigned int i = 0; i < nelts_per_pattern; i++) + emit_move_insn (result, gen_lowpart (mode, dup)); + + if (result != target) + emit_move_insn (target, result); +} + +/* We handle the case that we can't find a vector container to hold + element bitsize = NPATTERNS * ele_bitsize. + + NPATTERNS = 8, element width = 16 + v = { 0, 1, 2, 3, 4, 5, 6, 7, ... } + Since NPATTERNS * element width = 128, we can't find a container + to hold it. + + In this case, we use NPATTERNS merge operations to generate such + vector. */ + +static void +expand_const_vector_duplicate_default (rtx target, rvv_builder *builder) +{ + machine_mode mode = GET_MODE (target); + rtx result = register_operand (target, mode) ? target : gen_reg_rtx (mode); + unsigned int nbits = builder->npatterns () - 1; + + /* Generate vid = { 0, 1, 2, 3, 4, 5, 6, 7, ... }. */ + rtx vid = gen_reg_rtx (builder->int_mode ()); + rtx op[] = {vid}; + emit_vlmax_insn (code_for_pred_series (builder->int_mode ()), NULLARY_OP, op); + + /* Generate vid_repeat = { 0, 1, ... nbits, ... } */ + rtx vid_repeat = gen_reg_rtx (builder->int_mode ()); + rtx and_ops[] = {vid_repeat, vid, + gen_int_mode (nbits, builder->inner_int_mode ())}; + emit_vlmax_insn (code_for_pred_scalar (AND, builder->int_mode ()), BINARY_OP, + and_ops); + + rtx tmp1 = gen_reg_rtx (builder->mode ()); + rtx dup_ops[] = {tmp1, builder->elt (0)}; + emit_vlmax_insn (code_for_pred_broadcast (builder->mode ()), UNARY_OP, + dup_ops); + + for (unsigned int i = 1; i < builder->npatterns (); i++) { - for (unsigned int j = 0; j < npatterns; j++) - builder.quick_push (CONST_VECTOR_ELT (src, i * npatterns + j)); + /* Generate mask according to i. */ + rtx mask = gen_reg_rtx (builder->mask_mode ()); + rtx const_vec = gen_const_vector_dup (builder->int_mode (), i); + expand_vec_cmp (mask, EQ, vid_repeat, const_vec); + + /* Merge scalar to each i. */ + rtx tmp2 = gen_reg_rtx (builder->mode ()); + rtx merge_ops[] = {tmp2, tmp1, builder->elt (i), mask}; + insn_code icode = code_for_pred_merge_scalar (builder->mode ()); + emit_vlmax_insn (icode, MERGE_OP, merge_ops); + tmp1 = tmp2; } - builder.finalize (); - if (CONST_VECTOR_DUPLICATE_P (src)) + emit_move_insn (result, tmp1); + + if (result != target) + emit_move_insn (target, result); +} + +/* Handle the case with repeating sequence that NELTS_PER_PATTERN = 1 + E.g. NPATTERNS = 4, v = { 0, 2, 6, 7, ... } + NPATTERNS = 8, v = { 0, 2, 6, 7, 19, 20, 8, 7 ... } + The elements within NPATTERNS are not necessary regular. */ +static void +expand_const_vector_duplicate (rtx target, rvv_builder *builder) +{ + if (builder->can_duplicate_repeating_sequence_p ()) + return expand_const_vector_duplicate_repeating (target, builder); + else + return expand_const_vector_duplicate_default (target, builder); +} + +static void +expand_const_vector_single_step_npatterns (rtx target, rvv_builder *builder) +{ + machine_mode mode = GET_MODE (target); + rtx result = register_operand (target, mode) ? target : gen_reg_rtx (mode); + + /* Describe the case by choosing NPATTERNS = 4 as an example. */ + insn_code icode; + + /* Step 1: Generate vid = { 0, 1, 2, 3, 4, 5, 6, 7, ... }. */ + rtx vid = gen_reg_rtx (builder->mode ()); + rtx vid_ops[] = {vid}; + icode = code_for_pred_series (builder->mode ()); + emit_vlmax_insn (icode, NULLARY_OP, vid_ops); + + if (builder->npatterns_all_equal_p ()) { - /* Handle the case with repeating sequence that NELTS_PER_PATTERN = 1 - E.g. NPATTERNS = 4, v = { 0, 2, 6, 7, ... } - NPATTERNS = 8, v = { 0, 2, 6, 7, 19, 20, 8, 7 ... } - The elements within NPATTERNS are not necessary regular. */ - if (builder.can_duplicate_repeating_sequence_p ()) + /* Generate the variable-length vector following this rule: + { a, a, a + step, a + step, a + step * 2, a + step * 2, ...} + E.g. { 0, 0, 8, 8, 16, 16, ... } */ + + /* We want to create a pattern where value[idx] = floor (idx / + NPATTERNS). As NPATTERNS is always a power of two we can + rewrite this as = idx & -NPATTERNS. */ + /* Step 2: VID AND -NPATTERNS: + { 0&-4, 1&-4, 2&-4, 3 &-4, 4 &-4, 5 &-4, 6 &-4, 7 &-4, ... } */ + rtx imm = gen_int_mode (-builder->npatterns (), builder->inner_mode ()); + rtx tmp1 = gen_reg_rtx (builder->mode ()); + rtx and_ops[] = {tmp1, vid, imm}; + icode = code_for_pred_scalar (AND, builder->mode ()); + emit_vlmax_insn (icode, BINARY_OP, and_ops); + + /* Step 3: Convert to step size 1. */ + rtx tmp2 = gen_reg_rtx (builder->mode ()); + /* log2 (npatterns) to get the shift amount to convert + Eg. { 0, 0, 0, 0, 4, 4, ... } + into { 0, 0, 0, 0, 1, 1, ... }. */ + HOST_WIDE_INT shift_amt = exact_log2 (builder->npatterns ()); + rtx shift = gen_int_mode (shift_amt, builder->inner_mode ()); + rtx shift_ops[] = {tmp2, tmp1, shift}; + icode = code_for_pred_scalar (ASHIFTRT, builder->mode ()); + emit_vlmax_insn (icode, BINARY_OP, shift_ops); + + /* Step 4: Multiply to step size n. */ + HOST_WIDE_INT step_size = + INTVAL (builder->elt (builder->npatterns ())) + - INTVAL (builder->elt (0)); + rtx tmp3 = gen_reg_rtx (builder->mode ()); + if (pow2p_hwi (step_size)) { - /* We handle the case that we can find a vector container to hold - element bitsize = NPATTERNS * ele_bitsize. - - NPATTERNS = 8, element width = 8 - v = { 0, 1, 2, 3, 4, 5, 6, 7, ... } - In this case, we can combine NPATTERNS element into a larger - element. Use element width = 64 and broadcast a vector with - all element equal to 0x0706050403020100. */ - rtx ele = builder.get_merged_repeating_sequence (); - rtx dup; - if (lra_in_progress) - { - dup = gen_reg_rtx (builder.new_mode ()); - rtx ops[] = {dup, ele}; - emit_vlmax_insn (code_for_pred_broadcast - (builder.new_mode ()), UNARY_OP, ops); - } - else - dup = expand_vector_broadcast (builder.new_mode (), ele); - emit_move_insn (result, gen_lowpart (mode, dup)); + /* Power of 2 can be handled with a left shift. */ + HOST_WIDE_INT shift = exact_log2 (step_size); + rtx shift_amount = gen_int_mode (shift, Pmode); + insn_code icode = code_for_pred_scalar (ASHIFT, mode); + rtx ops[] = {tmp3, tmp2, shift_amount}; + emit_vlmax_insn (icode, BINARY_OP, ops); } else { - /* We handle the case that we can't find a vector container to hold - element bitsize = NPATTERNS * ele_bitsize. - - NPATTERNS = 8, element width = 16 - v = { 0, 1, 2, 3, 4, 5, 6, 7, ... } - Since NPATTERNS * element width = 128, we can't find a container - to hold it. - - In this case, we use NPATTERNS merge operations to generate such - vector. */ - unsigned int nbits = npatterns - 1; - - /* Generate vid = { 0, 1, 2, 3, 4, 5, 6, 7, ... }. */ - rtx vid = gen_reg_rtx (builder.int_mode ()); - rtx op[] = {vid}; - emit_vlmax_insn (code_for_pred_series (builder.int_mode ()), - NULLARY_OP, op); - - /* Generate vid_repeat = { 0, 1, ... nbits, ... } */ - rtx vid_repeat = gen_reg_rtx (builder.int_mode ()); - rtx and_ops[] = {vid_repeat, vid, - gen_int_mode (nbits, builder.inner_int_mode ())}; - emit_vlmax_insn (code_for_pred_scalar (AND, builder.int_mode ()), - BINARY_OP, and_ops); - - rtx tmp1 = gen_reg_rtx (builder.mode ()); - rtx dup_ops[] = {tmp1, builder.elt (0)}; - emit_vlmax_insn (code_for_pred_broadcast (builder.mode ()), UNARY_OP, - dup_ops); - for (unsigned int i = 1; i < builder.npatterns (); i++) - { - /* Generate mask according to i. */ - rtx mask = gen_reg_rtx (builder.mask_mode ()); - rtx const_vec = gen_const_vector_dup (builder.int_mode (), i); - expand_vec_cmp (mask, EQ, vid_repeat, const_vec); - - /* Merge scalar to each i. */ - rtx tmp2 = gen_reg_rtx (builder.mode ()); - rtx merge_ops[] = {tmp2, tmp1, builder.elt (i), mask}; - insn_code icode = code_for_pred_merge_scalar (builder.mode ()); - emit_vlmax_insn (icode, MERGE_OP, merge_ops); - tmp1 = tmp2; - } - emit_move_insn (result, tmp1); + rtx mult_amt = gen_int_mode (step_size, builder->inner_mode ()); + insn_code icode = code_for_pred_scalar (MULT, builder->mode ()); + rtx ops[] = {tmp3, tmp2, mult_amt}; + emit_vlmax_insn (icode, BINARY_OP, ops); + } + + /* Step 5: Add starting value to all elements. */ + HOST_WIDE_INT init_val = INTVAL (builder->elt (0)); + if (init_val == 0) + emit_move_insn (result, tmp3); + else + { + rtx dup = gen_const_vector_dup (builder->mode (), init_val); + rtx add_ops[] = {result, tmp3, dup}; + icode = code_for_pred (PLUS, builder->mode ()); + emit_vlmax_insn (icode, BINARY_OP, add_ops); } } - else if (CONST_VECTOR_STEPPED_P (src)) + else { - gcc_assert (GET_MODE_CLASS (mode) == MODE_VECTOR_INT); - if (builder.single_step_npatterns_p ()) + /* Generate the variable-length vector following this rule: + { a, b, a + step, b + step, a + step*2, b + step*2, ... } */ + if (builder->npatterns_vid_diff_repeated_p ()) { - /* Describe the case by choosing NPATTERNS = 4 as an example. */ - insn_code icode; - - /* Step 1: Generate vid = { 0, 1, 2, 3, 4, 5, 6, 7, ... }. */ - rtx vid = gen_reg_rtx (builder.mode ()); - rtx vid_ops[] = {vid}; - icode = code_for_pred_series (builder.mode ()); - emit_vlmax_insn (icode, NULLARY_OP, vid_ops); - - if (builder.npatterns_all_equal_p ()) + /* Case 1: For example as below: + {3, 2, 1, 0, 7, 6, 5, 4, 11, 10, 9, 8... } + We have 3 - 0 = 3 equals 7 - 4 = 3, the sequence is + repeated as below after minus vid. + {3, 1, -1, -3, 3, 1, -1, -3...} + Then we can simplify the diff code gen to at most + npatterns(). */ + rvv_builder v (builder->mode (), builder->npatterns (), 1); + + /* Step 1: Generate diff = TARGET - VID. */ + for (unsigned int i = 0; i < v.npatterns (); ++i) { - /* Generate the variable-length vector following this rule: - { a, a, a + step, a + step, a + step * 2, a + step * 2, ...} - E.g. { 0, 0, 8, 8, 16, 16, ... } */ - - /* We want to create a pattern where value[idx] = floor (idx / - NPATTERNS). As NPATTERNS is always a power of two we can - rewrite this as = idx & -NPATTERNS. */ - /* Step 2: VID AND -NPATTERNS: - { 0&-4, 1&-4, 2&-4, 3 &-4, 4 &-4, 5 &-4, 6 &-4, 7 &-4, ... } - */ - rtx imm - = gen_int_mode (-builder.npatterns (), builder.inner_mode ()); - rtx tmp1 = gen_reg_rtx (builder.mode ()); - rtx and_ops[] = {tmp1, vid, imm}; - icode = code_for_pred_scalar (AND, builder.mode ()); - emit_vlmax_insn (icode, BINARY_OP, and_ops); - - /* Step 3: Convert to step size 1. */ - rtx tmp2 = gen_reg_rtx (builder.mode ()); - /* log2 (npatterns) to get the shift amount to convert - Eg. { 0, 0, 0, 0, 4, 4, ... } - into { 0, 0, 0, 0, 1, 1, ... }. */ - HOST_WIDE_INT shift_amt = exact_log2 (builder.npatterns ()) ; - rtx shift = gen_int_mode (shift_amt, builder.inner_mode ()); - rtx shift_ops[] = {tmp2, tmp1, shift}; - icode = code_for_pred_scalar (ASHIFTRT, builder.mode ()); - emit_vlmax_insn (icode, BINARY_OP, shift_ops); - - /* Step 4: Multiply to step size n. */ - HOST_WIDE_INT step_size = - INTVAL (builder.elt (builder.npatterns ())) - - INTVAL (builder.elt (0)); - rtx tmp3 = gen_reg_rtx (builder.mode ()); - if (pow2p_hwi (step_size)) - { - /* Power of 2 can be handled with a left shift. */ - HOST_WIDE_INT shift = exact_log2 (step_size); - rtx shift_amount = gen_int_mode (shift, Pmode); - insn_code icode = code_for_pred_scalar (ASHIFT, mode); - rtx ops[] = {tmp3, tmp2, shift_amount}; - emit_vlmax_insn (icode, BINARY_OP, ops); - } - else - { - rtx mult_amt = gen_int_mode (step_size, builder.inner_mode ()); - insn_code icode = code_for_pred_scalar (MULT, builder.mode ()); - rtx ops[] = {tmp3, tmp2, mult_amt}; - emit_vlmax_insn (icode, BINARY_OP, ops); - } - - /* Step 5: Add starting value to all elements. */ - HOST_WIDE_INT init_val = INTVAL (builder.elt (0)); - if (init_val == 0) - emit_move_insn (result, tmp3); - else - { - rtx dup = gen_const_vector_dup (builder.mode (), init_val); - rtx add_ops[] = {result, tmp3, dup}; - icode = code_for_pred (PLUS, builder.mode ()); - emit_vlmax_insn (icode, BINARY_OP, add_ops); - } + poly_int64 diff = rtx_to_poly_int64 (builder->elt (i)) - i; + v.quick_push (gen_int_mode (diff, v.inner_mode ())); } - else - { - /* Generate the variable-length vector following this rule: - { a, b, a + step, b + step, a + step*2, b + step*2, ... } */ - if (builder.npatterns_vid_diff_repeated_p ()) - { - /* Case 1: For example as below: - {3, 2, 1, 0, 7, 6, 5, 4, 11, 10, 9, 8... } - We have 3 - 0 = 3 equals 7 - 4 = 3, the sequence is - repeated as below after minus vid. - {3, 1, -1, -3, 3, 1, -1, -3...} - Then we can simplify the diff code gen to at most - npatterns(). */ - rvv_builder v (builder.mode (), builder.npatterns (), 1); - - /* Step 1: Generate diff = TARGET - VID. */ - for (unsigned int i = 0; i < v.npatterns (); ++i) - { - poly_int64 diff = rtx_to_poly_int64 (builder.elt (i)) - i; - v.quick_push (gen_int_mode (diff, v.inner_mode ())); - } - - /* Step 2: Generate result = VID + diff. */ - rtx vec = v.build (); - rtx add_ops[] = {result, vid, vec}; - emit_vlmax_insn (code_for_pred (PLUS, builder.mode ()), - BINARY_OP, add_ops); - } - else - { - /* Case 2: For example as below: - { -4, 4, -4 + 1, 4 + 1, -4 + 2, 4 + 2, -4 + 3, 4 + 3, ... } - */ - rvv_builder v (builder.mode (), builder.npatterns (), 1); - - /* Step 1: Generate { a, b, a, b, ... } */ - for (unsigned int i = 0; i < v.npatterns (); ++i) - v.quick_push (builder.elt (i)); - rtx new_base = v.build (); - - /* Step 2: Generate tmp1 = VID >> LOG2 (NPATTERNS). Â */ - rtx shift_count - = gen_int_mode (exact_log2 (builder.npatterns ()), - builder.inner_mode ()); - rtx tmp1 = gen_reg_rtx (builder.mode ()); - rtx shift_ops[] = {tmp1, vid, shift_count}; - emit_vlmax_insn (code_for_pred_scalar - (LSHIFTRT, builder.mode ()), BINARY_OP, - shift_ops); - - /* Step 3: Generate tmp2 = tmp1 * step. Â */ - rtx tmp2 = gen_reg_rtx (builder.mode ()); - rtx step - = simplify_binary_operation (MINUS, builder.inner_mode (), - builder.elt (v.npatterns()), - builder.elt (0)); - expand_vec_series (tmp2, const0_rtx, step, tmp1); - - /* Step 4: Generate result = tmp2 + new_base. Â */ - rtx add_ops[] = {result, tmp2, new_base}; - emit_vlmax_insn (code_for_pred (PLUS, builder.mode ()), - BINARY_OP, add_ops); - } - } + /* Step 2: Generate result = VID + diff. */ + rtx vec = v.build (); + rtx add_ops[] = {result, vid, vec}; + emit_vlmax_insn (code_for_pred (PLUS, builder->mode ()), BINARY_OP, + add_ops); } - else if (builder.interleaved_stepped_npatterns_p ()) + else { - rtx base1 = builder.elt (0); - rtx base2 = builder.elt (1); - poly_int64 step1 - = rtx_to_poly_int64 (builder.elt (builder.npatterns ())) - - rtx_to_poly_int64 (base1); - poly_int64 step2 - = rtx_to_poly_int64 (builder.elt (builder.npatterns () + 1)) - - rtx_to_poly_int64 (base2); + /* Case 2: For example as below: + { -4, 4, -4 + 1, 4 + 1, -4 + 2, 4 + 2, -4 + 3, 4 + 3, ... } + */ + rvv_builder v (builder->mode (), builder->npatterns (), 1); + + /* Step 1: Generate { a, b, a, b, ... } */ + for (unsigned int i = 0; i < v.npatterns (); ++i) + v.quick_push (builder->elt (i)); + rtx new_base = v.build (); + + /* Step 2: Generate tmp1 = VID >> LOG2 (NPATTERNS). Â */ + rtx shift_count = gen_int_mode (exact_log2 (builder->npatterns ()), + builder->inner_mode ()); + rtx tmp1 = gen_reg_rtx (builder->mode ()); + rtx shift_ops[] = {tmp1, vid, shift_count}; + emit_vlmax_insn (code_for_pred_scalar (LSHIFTRT, builder->mode ()), + BINARY_OP, shift_ops); + + /* Step 3: Generate tmp2 = tmp1 * step. Â */ + rtx tmp2 = gen_reg_rtx (builder->mode ()); + rtx step + = simplify_binary_operation (MINUS, builder->inner_mode (), + builder->elt (v.npatterns()), + builder->elt (0)); + expand_vec_series (tmp2, const0_rtx, step, tmp1); + + /* Step 4: Generate result = tmp2 + new_base. Â */ + rtx add_ops[] = {result, tmp2, new_base}; + emit_vlmax_insn (code_for_pred (PLUS, builder->mode ()), BINARY_OP, + add_ops); + } + } - /* For { 1, 0, 2, 0, ... , n - 1, 0 }, we can use larger EEW - integer vector mode to generate such vector efficiently. + if (result != target) + emit_move_insn (target, result); +} - E.g. EEW = 16, { 2, 0, 4, 0, ... } +static void +expand_const_vector_interleaved_stepped_npatterns (rtx target, rtx src, + rvv_builder *builder) +{ + machine_mode mode = GET_MODE (target); + rtx result = register_operand (target, mode) ? target : gen_reg_rtx (mode); + rtx base1 = builder->elt (0); + rtx base2 = builder->elt (1); - can be interpreted into: + poly_int64 step1 = rtx_to_poly_int64 (builder->elt (builder->npatterns ())) + - rtx_to_poly_int64 (base1); + poly_int64 step2 = + rtx_to_poly_int64 (builder->elt (builder->npatterns () + 1)) + - rtx_to_poly_int64 (base2); - EEW = 32, { 2, 4, ... }. + /* For { 1, 0, 2, 0, ... , n - 1, 0 }, we can use larger EEW + integer vector mode to generate such vector efficiently. - Both the series1 and series2 may overflow before taking the IOR - to generate the final result. However, only series1 matters - because the series2 will shift before IOR, thus the overflow - bits will never pollute the final result. + E.g. EEW = 16, { 2, 0, 4, 0, ... } - For now we forbid the negative steps and overflow, and they - will fall back to the default merge way to generate the - const_vector. */ + can be interpreted into: - unsigned int new_smode_bitsize = builder.inner_bits_size () * 2; - scalar_int_mode new_smode; - machine_mode new_mode; - poly_uint64 new_nunits - = exact_div (GET_MODE_NUNITS (builder.mode ()), 2); + EEW = 32, { 2, 4, ... }. - poly_int64 base1_poly = rtx_to_poly_int64 (base1); - bool overflow_smode_p = false; + Both the series1 and series2 may overflow before taking the IOR + to generate the final result. However, only series1 matters + because the series2 will shift before IOR, thus the overflow + bits will never pollute the final result. - if (!step1.is_constant ()) - overflow_smode_p = true; - else - { - int elem_count = XVECLEN (src, 0); - uint64_t step1_val = step1.to_constant (); - uint64_t base1_val = base1_poly.to_constant (); - uint64_t elem_val = base1_val + (elem_count - 1) * step1_val; + For now we forbid the negative steps and overflow, and they + will fall back to the default merge way to generate the + const_vector. */ - if ((elem_val >> builder.inner_bits_size ()) != 0) - overflow_smode_p = true; - } + unsigned int new_smode_bitsize = builder->inner_bits_size () * 2; + scalar_int_mode new_smode; + machine_mode new_mode; + poly_uint64 new_nunits = exact_div (GET_MODE_NUNITS (builder->mode ()), 2); + + poly_int64 base1_poly = rtx_to_poly_int64 (base1); + bool overflow_smode_p = false; + + if (!step1.is_constant ()) + overflow_smode_p = true; + else + { + int elem_count = XVECLEN (src, 0); + uint64_t step1_val = step1.to_constant (); + uint64_t base1_val = base1_poly.to_constant (); + uint64_t elem_val = base1_val + (elem_count - 1) * step1_val; + + if ((elem_val >> builder->inner_bits_size ()) != 0) + overflow_smode_p = true; + } - if (known_ge (step1, 0) && known_ge (step2, 0) - && int_mode_for_size (new_smode_bitsize, 0).exists (&new_smode) - && get_vector_mode (new_smode, new_nunits).exists (&new_mode) - && !overflow_smode_p) + if (known_ge (step1, 0) && known_ge (step2, 0) + && int_mode_for_size (new_smode_bitsize, 0).exists (&new_smode) + && get_vector_mode (new_smode, new_nunits).exists (&new_mode) + && !overflow_smode_p) + { + rtx tmp1 = gen_reg_rtx (new_mode); + base1 = gen_int_mode (base1_poly, new_smode); + expand_vec_series (tmp1, base1, gen_int_mode (step1, new_smode)); + + if (rtx_equal_p (base2, const0_rtx) && known_eq (step2, 0)) + /* { 1, 0, 2, 0, ... }. */ + emit_move_insn (result, gen_lowpart (mode, tmp1)); + else if (known_eq (step2, 0)) + { + /* { 1, 1, 2, 1, ... }. */ + rtx scalar = expand_simple_binop ( + Xmode, ASHIFT, gen_int_mode (rtx_to_poly_int64 (base2), Xmode), + gen_int_mode (builder->inner_bits_size (), Xmode), NULL_RTX, false, + OPTAB_DIRECT); + scalar = simplify_gen_subreg (new_smode, scalar, Xmode, 0); + rtx tmp2 = gen_reg_rtx (new_mode); + rtx ior_ops[] = {tmp2, tmp1, scalar}; + emit_vlmax_insn (code_for_pred_scalar (IOR, new_mode), BINARY_OP, + ior_ops); + emit_move_insn (result, gen_lowpart (mode, tmp2)); + } + else + { + /* { 1, 3, 2, 6, ... }. */ + rtx tmp2 = gen_reg_rtx (new_mode); + base2 = gen_int_mode (rtx_to_poly_int64 (base2), new_smode); + expand_vec_series (tmp2, base2, gen_int_mode (step2, new_smode)); + rtx shifted_tmp2; + rtx shift = gen_int_mode (builder->inner_bits_size (), Xmode); + if (lra_in_progress) { - rtx tmp1 = gen_reg_rtx (new_mode); - base1 = gen_int_mode (base1_poly, new_smode); - expand_vec_series (tmp1, base1, gen_int_mode (step1, new_smode)); - - if (rtx_equal_p (base2, const0_rtx) && known_eq (step2, 0)) - /* { 1, 0, 2, 0, ... }. */ - emit_move_insn (result, gen_lowpart (mode, tmp1)); - else if (known_eq (step2, 0)) - { - /* { 1, 1, 2, 1, ... }. */ - rtx scalar = expand_simple_binop ( - Xmode, ASHIFT, - gen_int_mode (rtx_to_poly_int64 (base2), Xmode), - gen_int_mode (builder.inner_bits_size (), Xmode), - NULL_RTX, false, OPTAB_DIRECT); - scalar = simplify_gen_subreg (new_smode, scalar, Xmode, 0); - rtx tmp2 = gen_reg_rtx (new_mode); - rtx ior_ops[] = {tmp2, tmp1, scalar}; - emit_vlmax_insn (code_for_pred_scalar (IOR, new_mode), - BINARY_OP, ior_ops); - emit_move_insn (result, gen_lowpart (mode, tmp2)); - } - else - { - /* { 1, 3, 2, 6, ... }. */ - rtx tmp2 = gen_reg_rtx (new_mode); - base2 = gen_int_mode (rtx_to_poly_int64 (base2), new_smode); - expand_vec_series (tmp2, base2, - gen_int_mode (step2, new_smode)); - rtx shifted_tmp2; - rtx shift = gen_int_mode (builder.inner_bits_size (), Xmode); - if (lra_in_progress) - { - shifted_tmp2 = gen_reg_rtx (new_mode); - rtx shift_ops[] = {shifted_tmp2, tmp2, shift}; - emit_vlmax_insn (code_for_pred_scalar - (ASHIFT, new_mode), BINARY_OP, - shift_ops); - } - else - shifted_tmp2 = expand_simple_binop (new_mode, ASHIFT, tmp2, - shift, NULL_RTX, false, - OPTAB_DIRECT); - rtx tmp3 = gen_reg_rtx (new_mode); - rtx ior_ops[] = {tmp3, tmp1, shifted_tmp2}; - emit_vlmax_insn (code_for_pred (IOR, new_mode), BINARY_OP, - ior_ops); - emit_move_insn (result, gen_lowpart (mode, tmp3)); - } + shifted_tmp2 = gen_reg_rtx (new_mode); + rtx shift_ops[] = {shifted_tmp2, tmp2, shift}; + emit_vlmax_insn (code_for_pred_scalar (ASHIFT, new_mode), + BINARY_OP, shift_ops); } else - { - rtx vid = gen_reg_rtx (mode); - expand_vec_series (vid, const0_rtx, const1_rtx); - /* Transform into { 0, 0, 1, 1, 2, 2, ... }. */ - rtx shifted_vid; - if (lra_in_progress) - { - shifted_vid = gen_reg_rtx (mode); - rtx shift = gen_int_mode (1, Xmode); - rtx shift_ops[] = {shifted_vid, vid, shift}; - emit_vlmax_insn (code_for_pred_scalar - (ASHIFT, mode), BINARY_OP, - shift_ops); - } - else - shifted_vid = expand_simple_binop (mode, LSHIFTRT, vid, - const1_rtx, NULL_RTX, - false, OPTAB_DIRECT); - rtx tmp1 = gen_reg_rtx (mode); - rtx tmp2 = gen_reg_rtx (mode); - expand_vec_series (tmp1, base1, - gen_int_mode (step1, builder.inner_mode ()), - shifted_vid); - expand_vec_series (tmp2, base2, - gen_int_mode (step2, builder.inner_mode ()), - shifted_vid); - - /* Transform into { 0, 1, 0, 1, 0, 1, ... }. */ - rtx and_vid = gen_reg_rtx (mode); - rtx and_ops[] = {and_vid, vid, const1_rtx}; - emit_vlmax_insn (code_for_pred_scalar (AND, mode), BINARY_OP, - and_ops); - rtx mask = gen_reg_rtx (builder.mask_mode ()); - expand_vec_cmp (mask, EQ, and_vid, CONST1_RTX (mode)); - - rtx ops[] = {result, tmp1, tmp2, mask}; - emit_vlmax_insn (code_for_pred_merge (mode), MERGE_OP, ops); - } + shifted_tmp2 = expand_simple_binop (new_mode, ASHIFT, tmp2, shift, + NULL_RTX, false, OPTAB_DIRECT); + rtx tmp3 = gen_reg_rtx (new_mode); + rtx ior_ops[] = {tmp3, tmp1, shifted_tmp2}; + emit_vlmax_insn (code_for_pred (IOR, new_mode), BINARY_OP, ior_ops); + emit_move_insn (result, gen_lowpart (mode, tmp3)); } - else - /* TODO: We will enable more variable-length vector in the future. */ - gcc_unreachable (); } else - gcc_unreachable (); + { + rtx vid = gen_reg_rtx (mode); + expand_vec_series (vid, const0_rtx, const1_rtx); + /* Transform into { 0, 0, 1, 1, 2, 2, ... }. */ + rtx shifted_vid; + if (lra_in_progress) + { + shifted_vid = gen_reg_rtx (mode); + rtx shift = gen_int_mode (1, Xmode); + rtx shift_ops[] = {shifted_vid, vid, shift}; + emit_vlmax_insn (code_for_pred_scalar (LSHIFTRT, mode), BINARY_OP, + shift_ops); + } + else + shifted_vid = expand_simple_binop (mode, LSHIFTRT, vid, const1_rtx, + NULL_RTX, false, OPTAB_DIRECT); + rtx tmp1 = gen_reg_rtx (mode); + rtx tmp2 = gen_reg_rtx (mode); + expand_vec_series (tmp1, base1, + gen_int_mode (step1, builder->inner_mode ()), + shifted_vid); + expand_vec_series (tmp2, base2, + gen_int_mode (step2, builder->inner_mode ()), + shifted_vid); + + /* Transform into { 0, 1, 0, 1, 0, 1, ... }. */ + rtx and_vid = gen_reg_rtx (mode); + rtx and_ops[] = {and_vid, vid, const1_rtx}; + emit_vlmax_insn (code_for_pred_scalar (AND, mode), BINARY_OP, and_ops); + rtx mask = gen_reg_rtx (builder->mask_mode ()); + expand_vec_cmp (mask, EQ, and_vid, CONST1_RTX (mode)); + + rtx ops[] = {result, tmp1, tmp2, mask}; + emit_vlmax_insn (code_for_pred_merge (mode), MERGE_OP, ops); + } if (result != target) emit_move_insn (target, result); } +static void +expand_const_vector_stepped (rtx target, rtx src, rvv_builder *builder) +{ + gcc_assert (GET_MODE_CLASS (GET_MODE (target)) == MODE_VECTOR_INT); + + if (builder->single_step_npatterns_p ()) + return expand_const_vector_single_step_npatterns (target, builder); + else if (builder->interleaved_stepped_npatterns_p ()) + return expand_const_vector_interleaved_stepped_npatterns (target, src, + builder); + + /* TODO: We will enable more variable-length vector in the future. */ + gcc_unreachable (); +} + +static void +expand_const_vector (rtx target, rtx src) +{ + rtx elt; + if (const_vec_duplicate_p (src, &elt)) + return expand_const_vec_duplicate (target, src, elt); + + /* Support scalable const series vector. */ + rtx base, step; + if (const_vec_series_p (src, &base, &step)) + return expand_const_vec_series(target, base, step); + + /* Handle variable-length vector. */ + unsigned int nelts_per_pattern = CONST_VECTOR_NELTS_PER_PATTERN (src); + unsigned int npatterns = CONST_VECTOR_NPATTERNS (src); + rvv_builder builder (GET_MODE (target), npatterns, nelts_per_pattern); + + for (unsigned int i = 0; i < nelts_per_pattern; i++) + { + for (unsigned int j = 0; j < npatterns; j++) + builder.quick_push (CONST_VECTOR_ELT (src, i * npatterns + j)); + } + + builder.finalize (); + + if (CONST_VECTOR_DUPLICATE_P (src)) + return expand_const_vector_duplicate (target, &builder); + else if (CONST_VECTOR_STEPPED_P (src)) + return expand_const_vector_stepped (target, src, &builder); + + gcc_unreachable (); +} + /* Get the frm mode with given CONST_INT rtx, the default mode is FRM_DYN. */ enum floating_point_rounding_mode @@ -2094,21 +2164,40 @@ sew64_scalar_helper (rtx *operands, rtx *scalar_op, rtx vl, return false; } + bool avoid_strided_broadcast = false; if (CONST_INT_P (*scalar_op)) { if (maybe_gt (GET_MODE_SIZE (scalar_mode), GET_MODE_SIZE (Pmode))) - *scalar_op = force_const_mem (scalar_mode, *scalar_op); + { + if (strided_load_broadcast_p ()) + *scalar_op = force_const_mem (scalar_mode, *scalar_op); + else + avoid_strided_broadcast = true; + } else *scalar_op = force_reg (scalar_mode, *scalar_op); } rtx tmp = gen_reg_rtx (vector_mode); - rtx ops[] = {tmp, *scalar_op}; - if (type == VLMAX) - emit_vlmax_insn (code_for_pred_broadcast (vector_mode), UNARY_OP, ops); + if (!avoid_strided_broadcast) + { + rtx ops[] = {tmp, *scalar_op}; + emit_avltype_insn (code_for_pred_broadcast (vector_mode), UNARY_OP, ops, + type, vl); + } else - emit_nonvlmax_insn (code_for_pred_broadcast (vector_mode), UNARY_OP, ops, - vl); + { + /* Load scalar as V1DI and broadcast via vrgather.vi. */ + rtx tmp1 = gen_reg_rtx (V1DImode); + emit_move_insn (tmp1, lowpart_subreg (V1DImode, *scalar_op, + scalar_mode)); + tmp1 = lowpart_subreg (vector_mode, tmp1, V1DImode); + + rtx ops[] = {tmp, tmp1, CONST0_RTX (Pmode)}; + emit_vlmax_insn (code_for_pred_gather_scalar (vector_mode), + BINARY_OP, ops); + } + emit_vector_func (operands, tmp); return true; @@ -2771,6 +2860,28 @@ autovectorize_vector_modes (vector_modes *modes, bool) i++; size = base_size / (1U << i); } + + /* If the user specified the exact mode to use look if it is available and + remove all other ones before returning. */ + if (riscv_autovec_mode) + { + auto_vector_modes ms; + ms.safe_splice (*modes); + modes->truncate (0); + + for (machine_mode mode : ms) + { + if (!strcmp (GET_MODE_NAME (mode), riscv_autovec_mode)) + { + modes->safe_push (mode); + return 0; + } + } + + /* Nothing found, fall back to regular handling. */ + modes->safe_splice (ms); + } + /* Enable LOOP_VINFO comparison in COST model. */ return VECT_COMPARE_COSTS; } @@ -4651,7 +4762,7 @@ prepare_ternary_operands (rtx *ops) ops[4], ops[1], ops[6], ops[7], ops[9])); ops[5] = ops[4] = ops[0]; } - else + else if (VECTOR_MODE_P (GET_MODE (ops[2]))) { /* Swap the multiplication ops if the fallback value is the second of the two. */ @@ -4661,8 +4772,10 @@ prepare_ternary_operands (rtx *ops) /* TODO: ??? Maybe we could support splitting FMA (a, 4, b) into PLUS (ASHIFT (a, 2), b) according to uarchs. */ } - gcc_assert (rtx_equal_p (ops[5], RVV_VUNDEF (mode)) - || rtx_equal_p (ops[5], ops[2]) || rtx_equal_p (ops[5], ops[4])); + gcc_assert ( + rtx_equal_p (ops[5], RVV_VUNDEF (mode)) || rtx_equal_p (ops[5], ops[2]) + || (!VECTOR_MODE_P (GET_MODE (ops[2])) && rtx_equal_p (ops[5], ops[3])) + || rtx_equal_p (ops[5], ops[4])); } /* Expand VEC_MASK_LEN_{LOAD_LANES,STORE_LANES}. */ @@ -5448,6 +5561,81 @@ expand_vec_oct_sstrunc (rtx op_0, rtx op_1, machine_mode vec_mode, expand_vec_double_sstrunc (op_0, quad_rtx, quad_mode); } +/* Expand the binary vx combine with the format like v2 = vop(vec_dup(x), v1). + Aka the first op comes from the vec_duplicate, and the second op is + the vector reg. */ + +void +expand_vx_binary_vec_dup_vec (rtx op_0, rtx op_1, rtx op_2, + rtx_code code, machine_mode mode) +{ + enum insn_code icode; + + switch (code) + { + case PLUS: + case AND: + case IOR: + case XOR: + case MULT: + case SMAX: + case UMAX: + case SMIN: + case UMIN: + case US_PLUS: + case SS_PLUS: + icode = code_for_pred_scalar (code, mode); + break; + case MINUS: + icode = code_for_pred_sub_reverse_scalar (mode); + break; + default: + gcc_unreachable (); + } + + rtx ops[] = {op_0, op_1, op_2}; + emit_vlmax_insn (icode, riscv_vector::BINARY_OP, ops); +} + +/* Expand the binary vx combine with the format like v2 = vop(v1, vec_dup(x)). + Aka the second op comes from the vec_duplicate, and the first op is + the vector reg. */ + +void +expand_vx_binary_vec_vec_dup (rtx op_0, rtx op_1, rtx op_2, + rtx_code code, machine_mode mode) +{ + enum insn_code icode; + + switch (code) + { + case MINUS: + case AND: + case IOR: + case XOR: + case MULT: + case DIV: + case UDIV: + case MOD: + case UMOD: + case SMAX: + case UMAX: + case SMIN: + case UMIN: + case US_PLUS: + case US_MINUS: + case SS_PLUS: + case SS_MINUS: + icode = code_for_pred_scalar (code, mode); + break; + default: + gcc_unreachable (); + } + + rtx ops[] = {op_0, op_1, op_2}; + emit_vlmax_insn (icode, riscv_vector::BINARY_OP, ops); +} + /* Vectorize popcount by the Wilkes-Wheeler-Gill algorithm that libgcc uses as well. */ void @@ -5620,9 +5808,9 @@ count_regno_occurrences (rtx_insn *rinsn, unsigned int regno) return count; } -/* Return true if the OP can be directly broadcasted. */ +/* Return true if the OP can be directly broadcast. */ bool -can_be_broadcasted_p (rtx op) +can_be_broadcast_p (rtx op) { machine_mode mode = GET_MODE (op); /* We don't allow RA (register allocation) reload generate @@ -5634,7 +5822,8 @@ can_be_broadcasted_p (rtx op) return false; if (satisfies_constraint_K (op) || register_operand (op, mode) - || satisfies_constraint_Wdm (op) || rtx_equal_p (op, CONST0_RTX (mode))) + || (strided_load_broadcast_p () && satisfies_constraint_Wdm (op)) + || rtx_equal_p (op, CONST0_RTX (mode))) return true; return can_create_pseudo_p () && nonmemory_operand (op, mode); diff --git a/gcc/config/riscv/riscv-vect-permconst.cc b/gcc/config/riscv/riscv-vect-permconst.cc new file mode 100644 index 0000000..087f26a --- /dev/null +++ b/gcc/config/riscv/riscv-vect-permconst.cc @@ -0,0 +1,318 @@ +/* Copyright (C) 2024 Free Software Foundation, Inc. + +This file is part of GCC. + +GCC is free software; you can redistribute it and/or modify +it under the terms of the GNU General Public License as published by +the Free Software Foundation; either version 3, or(at your option) +any later version. + +GCC is distributed in the hope that it will be useful, +but WITHOUT ANY WARRANTY; without even the implied warranty of +MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +GNU General Public License for more details. + +You should have received a copy of the GNU General Public License +along with GCC; see the file COPYING3. If not see +<http://www.gnu.org/licenses/>. */ + +#define IN_TARGET_CODE 1 +#define INCLUDE_ALGORITHM +#define INCLUDE_FUNCTIONAL +#define INCLUDE_MEMORY + +#include "config.h" +#include "system.h" +#include "coretypes.h" +#include "tm.h" +#include "backend.h" +#include "rtl.h" +#include "target.h" +#include "tree-pass.h" +#include "df.h" +#include "rtl-ssa.h" +#include "cfgcleanup.h" +#include "insn-attr.h" +#include "tm-constrs.h" +#include "insn-opinit.h" +#include "cfgrtl.h" + +/* So the basic idea of this pass is to identify loads of permutation + constants from the constant pool which could instead be trivially + derived from some earlier vector permutation constant. This will + replace a memory load from the constant pool with a vadd.vi + instruction. + + Conceptually this is much like the related_values optimization in + CSE, reload_cse_move2add or using SLSR to optimize constant synthesis. + If we wanted to make this generic I would suggest putting it into CSE + and providing target hooks to determine if particular permutation + constants could be derived from earlier permutation constants. */ + +const pass_data pass_data_vect_permconst = { + RTL_PASS, /* type */ + "vect_permconst", /* name */ + OPTGROUP_NONE, /* optinfo_flags */ + TV_NONE, /* tv_id */ + 0, /* properties_required */ + 0, /* properties_provided */ + 0, /* properties_destroyed */ + 0, /* todo_flags_start */ + 0, /* todo_flags_finish */ +}; + +/* Entry in the hash table. We "normalize" the permutation constant + by adjusting all entries by the value in the first element. This + allows simple hashing to discover permutation constants that differ + by a single constant across all their elements and may be derived + from each other with a vadd.vi. */ + +struct vector_permconst_entry +{ + /* The CONST_VECTOR in normalized form (first entry is zero). */ + /* We could avoid copying the vector with a more customized hash + routine which took care of normalization. */ + rtx normalized_vec; + + /* The destination register holding the CONST_VECTOR. When the optimization + applies this will be used as a source operand in the vadd.vi. */ + rtx dest; + + /* The insn generating DEST, the only reason we need this is because we + do not invalidate entries which implies we have to verify that DEST + is unchanged between INSN and the point where we want to use DEST + to derive a new permutation constant. */ + rtx_insn *insn; + + /* The bias of this entry used for normalization. If this value is added + to each element in NORMALIZED_VEC we would have the original permutation + constant. */ + HOST_WIDE_INT bias; +}; + +struct const_vector_hasher : nofree_ptr_hash <vector_permconst_entry> +{ + static inline hashval_t hash (const vector_permconst_entry *); + static inline bool equal (const vector_permconst_entry *, + const vector_permconst_entry *); +}; + +inline bool +const_vector_hasher::equal (const vector_permconst_entry *vpe1, + const vector_permconst_entry *vpe2) +{ + /* Do the cheap tests first, namely that the mode and number of entries + match between the two enries. */ + if (GET_MODE (vpe1->normalized_vec) != GET_MODE (vpe2->normalized_vec)) + return false; + + if (CONST_VECTOR_NUNITS (vpe1->normalized_vec).to_constant () + != CONST_VECTOR_NUNITS (vpe2->normalized_vec).to_constant ()) + return false; + + /* Check the value of each entry in the vector. We violate structure + sharing rules inside this pass, so while pointer equality would normally + be OK, it isn't here. */ + for (int i = 0; + i < CONST_VECTOR_NUNITS (vpe1->normalized_vec).to_constant (); + i++) + if (!rtx_equal_p (CONST_VECTOR_ELT (vpe1->normalized_vec, i), + CONST_VECTOR_ELT (vpe2->normalized_vec, i))) + return false; + + return true; +} + +inline hashval_t +const_vector_hasher::hash (const vector_permconst_entry *vpe) +{ + int do_not_record; + return hash_rtx (vpe->normalized_vec, Pmode, &do_not_record, NULL, false); +} + + +class vector_permconst : public rtl_opt_pass +{ +public: + vector_permconst (gcc::context *ctxt) + : rtl_opt_pass (pass_data_vect_permconst, ctxt) {} + + /* opt_pass methods: */ + virtual bool gate (function *) final override + { + return TARGET_VECTOR && optimize > 0; + } + virtual unsigned int execute (function *) final override; + +private: + void process_bb (basic_block); + hash_table<const_vector_hasher> *vector_permconst_table; +}; // class pass_vector_permconst + +/* Try to optimize vector permutation constants in BB. */ +void +vector_permconst::process_bb (basic_block bb) +{ + vector_permconst_table = new hash_table<const_vector_hasher> (11); + + /* Walk the insns in BB searching for vector loads from the constant pool + which can be satisfied by adjusting an earlier load with trivial + arithmetic. */ + rtx_insn *insn; + FOR_BB_INSNS (bb, insn) + { + if (!INSN_P (insn)) + continue; + + rtx set = single_set (insn); + if (!set) + continue; + + rtx dest = SET_DEST (set); + if (GET_MODE_CLASS (GET_MODE (dest)) != MODE_VECTOR_INT) + continue; + + rtx src = SET_SRC (set); + if (!MEM_P (src)) + continue; + + /* A load from the constant pool should have a REG_EQUAL + note with the vector contant in the note. */ + rtx note = find_reg_equal_equiv_note (insn); + if (!note + || REG_NOTE_KIND (note) != REG_EQUAL + || GET_CODE (XEXP (note, 0)) != CONST_VECTOR) + continue; + + if (!CONST_VECTOR_NUNITS (XEXP (note, 0)).is_constant ()) + continue; + + /* XXX Do we need to consider other forms of constants? */ + + /* We want to be selective about what gets past this point since + we make a copy of the vector and possibly enter it into the + hash table. So reject cases that are not likely a permutation + constant. ie, negative bias and large biases. We arbitrarily + use 16k as the largest vector size in bits we try to optimize. + + It may seem like a bias outside the range of vadd.vi should + be rejected, but what really matters is the difference of + biases across the two permutation constants. */ + rtx cvec = XEXP (note, 0); + HOST_WIDE_INT bias = INTVAL (CONST_VECTOR_ELT (cvec, 0)); + if (bias < 0 || bias > 16384 / 8) + continue; + + /* We need to verify that each element would be a valid value + in the inner mode after applying the bias. */ + machine_mode inner = GET_MODE_INNER (GET_MODE (cvec)); + HOST_WIDE_INT precision = GET_MODE_PRECISION (inner).to_constant (); + int i; + for (i = 0; i < CONST_VECTOR_NUNITS (cvec).to_constant (); i++) + { + HOST_WIDE_INT val = INTVAL (CONST_VECTOR_ELT (cvec, i)) - bias; + if (val != sext_hwi (val, precision)) + break; + } + + /* If the loop terminated early, then we found a case where the + adjusted constant would not fit, so we can't record the constant + for this case (it's unlikely to be useful anyway. */ + if (i != CONST_VECTOR_NUNITS (cvec).to_constant ()) + continue; + + /* At this point we have a load of a constant integer vector from the + constant pool. That constant integer vector is hopefully a + permutation constant. We need to make a copy of the vector and + normalize it to zero. + + XXX This violates structure sharing conventions. */ + rtvec_def *nvec = rtvec_alloc (CONST_VECTOR_NUNITS (cvec).to_constant ()); + + for (i = 0; i < CONST_VECTOR_NUNITS (cvec).to_constant (); i++) + nvec->elem[i] = GEN_INT (INTVAL (CONST_VECTOR_ELT (cvec, i)) - bias); + + rtx copy = gen_rtx_CONST_VECTOR (GET_MODE (cvec), nvec); + + /* Now that we have a normalized vector, look it up in the hash table, + inserting it if it wasn't already in the table. */ + struct vector_permconst_entry tmp; + tmp.normalized_vec = copy; + struct vector_permconst_entry **slot + = vector_permconst_table->find_slot (&tmp, INSERT); + if (*slot == NULL) + { + /* This constant was not in the table, so initialize the hash table + entry. */ + *slot = XNEW (vector_permconst_entry); + (*slot)->normalized_vec = copy; + (*slot)->dest = dest; + (*slot)->bias = bias; + (*slot)->insn = insn; + } + else + { + /* A hit in the hash table. We may be able to optimize this case. + + If the difference in biases fits in the immediate range for + vadd.vi, then we may optimize. */ + HOST_WIDE_INT adjustment = bias - (*slot)->bias; + if (IN_RANGE (adjustment, -16, 15)) + { + /* We also need to make sure the destination register was not + modified. I've chosen to test for that at optimization time + rather than invalidate entries in the table. This could be + changed to use REG_TICK like schemes or true invalidation if + this proves too compile-time costly. */ + if (!reg_set_between_p ((*slot)->dest, (*slot)->insn, insn)) + { + /* Instead of loading from the constant pool, adjust the + output of the earlier insn into our destination. */ + rtx x = gen_const_vec_duplicate (GET_MODE (copy), + GEN_INT (adjustment)); + rtx plus = gen_rtx_PLUS (GET_MODE (copy), (*slot)->dest, x); + rtx set = gen_rtx_SET (dest, plus); + rtx_insn *new_insn = emit_insn_after (set, insn); + /* XXX Should we copy over the REG_EQUAL note first? */ + delete_insn (insn); + insn = new_insn; + } + } + + /* We always keep the hash table entry pointing to the most recent + INSN that could generate the normalized entry. We can adjust + in the future if data says it's useful to do so. This just + keeps things simple for now. + + For example, we might want to keep multiple entries if they + have a different biases. */ + (*slot)->dest = dest; + (*slot)->bias = bias; + (*slot)->insn = insn; + } + } + + /* We construct and tear down the table for each block. This may + be overly expensive. */ + vector_permconst_table->empty (); +} + +/* Main entry point for this pass. */ +unsigned int +vector_permconst::execute (function *fn) +{ + /* Handle each block independently. While this should work nicely on EBBs, + let's wait for real world cases where it matters before adding that + complexity. */ + basic_block bb; + FOR_EACH_BB_FN (bb, fn) + process_bb (bb); + + return 0; +} + +rtl_opt_pass * +make_pass_vector_permconst (gcc::context *ctxt) +{ + return new vector_permconst (ctxt); +} diff --git a/gcc/config/riscv/riscv-vector-builtins-shapes.cc b/gcc/config/riscv/riscv-vector-builtins-shapes.cc index fc21b20..9832eb9 100644 --- a/gcc/config/riscv/riscv-vector-builtins-shapes.cc +++ b/gcc/config/riscv/riscv-vector-builtins-shapes.cc @@ -908,6 +908,8 @@ struct vset_def : public build_base { poly_int64 outer_size = GET_MODE_SIZE (c.arg_mode (0)); poly_int64 inner_size = GET_MODE_SIZE (c.arg_mode (2)); + if (maybe_eq (inner_size, 0)) + return false; unsigned int nvecs = exact_div (outer_size, inner_size).to_constant (); return c.require_immediate (1, 0, nvecs - 1); } @@ -920,6 +922,8 @@ struct vget_def : public misc_def { poly_int64 outer_size = GET_MODE_SIZE (c.arg_mode (0)); poly_int64 inner_size = GET_MODE_SIZE (c.ret_mode ()); + if (maybe_eq (inner_size, 0)) + return false; unsigned int nvecs = exact_div (outer_size, inner_size).to_constant (); return c.require_immediate (1, 0, nvecs - 1); } @@ -1343,6 +1347,52 @@ struct sf_vfnrclip_def : public build_base } }; +/* sf_vcix_se_def class. */ +struct sf_vcix_se_def : public build_base +{ + char *get_name (function_builder &b, const function_instance &instance, + bool overloaded_p) const override + { + /* Return nullptr if it is overloaded. */ + if (overloaded_p) + return nullptr; + + b.append_base_name (instance.base_name); + + /* vop --> vop<op>_se_<type>. */ + if (!overloaded_p) + { + b.append_name (operand_suffixes[instance.op_info->op]); + b.append_name ("_se"); + b.append_name (type_suffixes[instance.type.index].vector); + } + return b.finish_name (); + } +}; + +/* sf_vcix_def class. */ +struct sf_vcix_def : public build_base +{ + char *get_name (function_builder &b, const function_instance &instance, + bool overloaded_p) const override + { + /* Return nullptr if it is overloaded. */ + if (overloaded_p) + return nullptr; + + b.append_base_name (instance.base_name); + + /* vop --> vop_<type>. */ + if (!overloaded_p) + { + b.append_name (operand_suffixes[instance.op_info->op]); + b.append_name (type_suffixes[instance.type.index].vector); + } + return b.finish_name (); + } +}; + + SHAPE(vsetvl, vsetvl) SHAPE(vsetvl, vsetvlmax) SHAPE(loadstore, loadstore) @@ -1379,4 +1429,6 @@ SHAPE(crypto_vi, crypto_vi) SHAPE(crypto_vv_no_op_type, crypto_vv_no_op_type) SHAPE (sf_vqmacc, sf_vqmacc) SHAPE (sf_vfnrclip, sf_vfnrclip) +SHAPE(sf_vcix_se, sf_vcix_se) +SHAPE(sf_vcix, sf_vcix) } // end namespace riscv_vector diff --git a/gcc/config/riscv/riscv-vector-builtins-shapes.h b/gcc/config/riscv/riscv-vector-builtins-shapes.h index 858799b..2f2636e 100644 --- a/gcc/config/riscv/riscv-vector-builtins-shapes.h +++ b/gcc/config/riscv/riscv-vector-builtins-shapes.h @@ -62,6 +62,8 @@ extern const function_shape *const crypto_vv_no_op_type; /* Sifive vendor extension. */ extern const function_shape *const sf_vqmacc; extern const function_shape *const sf_vfnrclip; +extern const function_shape *const sf_vcix_se; +extern const function_shape *const sf_vcix; } } // end namespace riscv_vector diff --git a/gcc/config/riscv/riscv-vector-builtins-types.def b/gcc/config/riscv/riscv-vector-builtins-types.def index 857b637..ade6644 100644 --- a/gcc/config/riscv/riscv-vector-builtins-types.def +++ b/gcc/config/riscv/riscv-vector-builtins-types.def @@ -369,6 +369,18 @@ along with GCC; see the file COPYING3. If not see #define DEF_RVV_XFQF_OPS(TYPE, REQUIRE) #endif +/* Use "DEF_RVV_X2_U_OPS" macro include unsigned integer which will + be iterated and registered as intrinsic functions. */ +#ifndef DEF_RVV_X2_U_OPS +#define DEF_RVV_X2_U_OPS(TYPE, REQUIRE) +#endif + +/* Use "DEF_RVV_X2_WU_OPS" macro include widen unsigned integer which will + be iterated and registered as intrinsic functions. */ +#ifndef DEF_RVV_X2_WU_OPS +#define DEF_RVV_X2_WU_OPS(TYPE, REQUIRE) +#endif + DEF_RVV_I_OPS (vint8mf8_t, RVV_REQUIRE_ELEN_64) DEF_RVV_I_OPS (vint8mf4_t, 0) DEF_RVV_I_OPS (vint8mf2_t, 0) @@ -1463,6 +1475,32 @@ DEF_RVV_XFQF_OPS (vint8mf2_t, 0) DEF_RVV_XFQF_OPS (vint8m1_t, 0) DEF_RVV_XFQF_OPS (vint8m2_t, 0) +DEF_RVV_X2_U_OPS (vuint8mf8_t, RVV_REQUIRE_ELEN_64) +DEF_RVV_X2_U_OPS (vuint8mf4_t, 0) +DEF_RVV_X2_U_OPS (vuint8mf2_t, 0) +DEF_RVV_X2_U_OPS (vuint8m1_t, 0) +DEF_RVV_X2_U_OPS (vuint8m2_t, 0) +DEF_RVV_X2_U_OPS (vuint8m4_t, 0) +DEF_RVV_X2_U_OPS (vuint16mf4_t, RVV_REQUIRE_ELEN_64) +DEF_RVV_X2_U_OPS (vuint16mf2_t, 0) +DEF_RVV_X2_U_OPS (vuint16m1_t, 0) +DEF_RVV_X2_U_OPS (vuint16m2_t, 0) +DEF_RVV_X2_U_OPS (vuint16m4_t, 0) +DEF_RVV_X2_U_OPS (vuint32mf2_t, RVV_REQUIRE_ELEN_64) +DEF_RVV_X2_U_OPS (vuint32m1_t, 0) +DEF_RVV_X2_U_OPS (vuint32m2_t, 0) +DEF_RVV_X2_U_OPS (vuint32m4_t, 0) + +DEF_RVV_X2_WU_OPS (vuint16mf4_t, RVV_REQUIRE_ELEN_64) +DEF_RVV_X2_WU_OPS (vuint16mf2_t, 0) +DEF_RVV_X2_WU_OPS (vuint16m1_t, 0) +DEF_RVV_X2_WU_OPS (vuint16m2_t, 0) +DEF_RVV_X2_WU_OPS (vuint16m4_t, 0) +DEF_RVV_X2_WU_OPS (vuint32mf2_t, RVV_REQUIRE_ELEN_64) +DEF_RVV_X2_WU_OPS (vuint32m1_t, 0) +DEF_RVV_X2_WU_OPS (vuint32m2_t, 0) +DEF_RVV_X2_WU_OPS (vuint32m4_t, 0) + #undef DEF_RVV_I_OPS #undef DEF_RVV_U_OPS #undef DEF_RVV_F_OPS @@ -1519,3 +1557,5 @@ DEF_RVV_XFQF_OPS (vint8m2_t, 0) #undef DEF_RVV_F32_OPS #undef DEF_RVV_QMACC_OPS #undef DEF_RVV_XFQF_OPS +#undef DEF_RVV_X2_U_OPS +#undef DEF_RVV_X2_WU_OPS diff --git a/gcc/config/riscv/riscv-vector-builtins.cc b/gcc/config/riscv/riscv-vector-builtins.cc index d2fe849..8810af0 100644 --- a/gcc/config/riscv/riscv-vector-builtins.cc +++ b/gcc/config/riscv/riscv-vector-builtins.cc @@ -544,6 +544,20 @@ static const rvv_type_info crypto_sew64_ops[] = { #include "riscv-vector-builtins-types.def" {NUM_VECTOR_TYPES, 0}}; +/* A list of signed integer will be registered for Sifive Xsfvcp intrinsic*/ +/* functions. */ +static const rvv_type_info x2_u_ops[] = { +#define DEF_RVV_X2_U_OPS(TYPE, REQUIRE) {VECTOR_TYPE_##TYPE, REQUIRE}, +#include "riscv-vector-builtins-types.def" + {NUM_VECTOR_TYPES, 0}}; + +/* A list of signed integer will be registered for Sifive Xsfvcp intrinsic*/ +/* functions. */ +static const rvv_type_info x2_wu_ops[] = { +#define DEF_RVV_X2_WU_OPS(TYPE, REQUIRE) {VECTOR_TYPE_##TYPE, REQUIRE}, +#include "riscv-vector-builtins-types.def" + {NUM_VECTOR_TYPES, 0}}; + /* A list of signed integer will be registered for intrinsic * functions. */ static const rvv_type_info qmacc_ops[] = { @@ -805,7 +819,7 @@ static CONSTEXPR const rvv_arg_type_info bf_wwxv_args[] static CONSTEXPR const rvv_arg_type_info m_args[] = {rvv_arg_type_info (RVV_BASE_mask), rvv_arg_type_info_end}; -/* A list of args for vector_type func (scalar_type) function. */ +/* A list of args for vector_type func (scalar_type/sf.vc) function. */ static CONSTEXPR const rvv_arg_type_info x_args[] = {rvv_arg_type_info (RVV_BASE_scalar), rvv_arg_type_info_end}; @@ -1055,6 +1069,161 @@ static CONSTEXPR const rvv_arg_type_info scalar_ptr_size_args[] rvv_arg_type_info (RVV_BASE_size), rvv_arg_type_info (RVV_BASE_vector), rvv_arg_type_info_end}; +/* A list of args for vector_type func (sf.vc.x) function. */ +static CONSTEXPR const rvv_arg_type_info sf_vc_x_args[] + = {rvv_arg_type_info (RVV_BASE_scalar), + rvv_arg_type_info (RVV_BASE_scalar), + rvv_arg_type_info (RVV_BASE_scalar), + rvv_arg_type_info (RVV_BASE_scalar), + rvv_arg_type_info_end}; + +/* A list of args for vector_type func (sf.vc.v.x) function. */ +static CONSTEXPR const rvv_arg_type_info sf_vc_v_x_args[] + = {rvv_arg_type_info (RVV_BASE_scalar), + rvv_arg_type_info (RVV_BASE_scalar), + rvv_arg_type_info (RVV_BASE_scalar), + rvv_arg_type_info_end}; + +/* A list of args for vector_type func (sf.vc.i) function. */ +static CONSTEXPR const rvv_arg_type_info sf_vc_i_args[] + = {rvv_arg_type_info (RVV_BASE_scalar), + rvv_arg_type_info (RVV_BASE_scalar), + rvv_arg_type_info (RVV_BASE_scalar), + rvv_arg_type_info (RVV_BASE_scalar), + rvv_arg_type_info_end}; + + +/* A list of args for vector_type func (sf.vc.i) function. */ +static CONSTEXPR const rvv_arg_type_info sf_vc_v_i_args[] + = {rvv_arg_type_info (RVV_BASE_scalar), + rvv_arg_type_info (RVV_BASE_scalar), + rvv_arg_type_info (RVV_BASE_scalar), + rvv_arg_type_info_end}; + +/* A list of args for vector_type func (sf.vc.vv) function. */ +static CONSTEXPR const rvv_arg_type_info sf_vc_vv_args[] + = {rvv_arg_type_info (RVV_BASE_scalar), + rvv_arg_type_info (RVV_BASE_scalar), + rvv_arg_type_info (RVV_BASE_vector), + rvv_arg_type_info (RVV_BASE_vector), + rvv_arg_type_info_end}; + +/* A list of args for vector_type func (sf.vc.v.vv) function. */ +static CONSTEXPR const rvv_arg_type_info sf_vc_v_vv_args[] + = {rvv_arg_type_info (RVV_BASE_scalar), + rvv_arg_type_info (RVV_BASE_vector), + rvv_arg_type_info (RVV_BASE_vector), + rvv_arg_type_info_end}; + +/* A list of args for vector_type func (sf.vc.xv) function. */ +static CONSTEXPR const rvv_arg_type_info sf_vc_xv_args[] + = {rvv_arg_type_info (RVV_BASE_scalar), + rvv_arg_type_info (RVV_BASE_scalar), + rvv_arg_type_info (RVV_BASE_vector), + rvv_arg_type_info (RVV_BASE_scalar), + rvv_arg_type_info_end}; + +/* A list of args for vector_type func (sf.vc.v.xv) function. */ +static CONSTEXPR const rvv_arg_type_info sf_vc_v_xv_args[] + = {rvv_arg_type_info (RVV_BASE_scalar), + rvv_arg_type_info (RVV_BASE_vector), + rvv_arg_type_info (RVV_BASE_scalar), + rvv_arg_type_info_end}; + +/* A list of args for vector_type func (sf.vc.iv) function. */ +static CONSTEXPR const rvv_arg_type_info sf_vc_iv_args[] + = {rvv_arg_type_info (RVV_BASE_scalar), + rvv_arg_type_info (RVV_BASE_scalar), + rvv_arg_type_info (RVV_BASE_vector), + rvv_arg_type_info (RVV_BASE_scalar), + rvv_arg_type_info_end}; + +/* A list of args for vector_type func (sf.vc.v.iv) function. */ +static CONSTEXPR const rvv_arg_type_info sf_vc_v_iv_args[] + = {rvv_arg_type_info (RVV_BASE_scalar), + rvv_arg_type_info (RVV_BASE_vector), + rvv_arg_type_info (RVV_BASE_scalar), + rvv_arg_type_info_end}; + +/* A list of args for vector_type func (sf.vc.fv) function. */ +static CONSTEXPR const rvv_arg_type_info sf_vc_fv_args[] + = {rvv_arg_type_info (RVV_BASE_scalar), + rvv_arg_type_info (RVV_BASE_scalar), + rvv_arg_type_info (RVV_BASE_vector), + rvv_arg_type_info (RVV_BASE_scalar_float), + rvv_arg_type_info_end}; + +/* A list of args for vector_type func (sf.vc.v.fv) function. */ +static CONSTEXPR const rvv_arg_type_info sf_vc_v_fv_args[] + = {rvv_arg_type_info (RVV_BASE_scalar), + rvv_arg_type_info (RVV_BASE_vector), + rvv_arg_type_info (RVV_BASE_scalar_float), + rvv_arg_type_info_end}; + +/* A list of args for vector_type func (sf.vc.vvv/sf.vc.v.vvv) function. */ +static CONSTEXPR const rvv_arg_type_info sf_vc_vvv_args[] + = {rvv_arg_type_info (RVV_BASE_scalar), + rvv_arg_type_info (RVV_BASE_vector), + rvv_arg_type_info (RVV_BASE_vector), + rvv_arg_type_info (RVV_BASE_vector), + rvv_arg_type_info_end}; + +/* A list of args for vector_type func (sf.vc.xvv/sf.vc.v.xvv) function. */ +static CONSTEXPR const rvv_arg_type_info sf_vc_xvv_args[] + = {rvv_arg_type_info (RVV_BASE_scalar), + rvv_arg_type_info (RVV_BASE_vector), + rvv_arg_type_info (RVV_BASE_vector), + rvv_arg_type_info (RVV_BASE_scalar), + rvv_arg_type_info_end}; + +/* A list of args for vector_type func (sf.vc.ivv/sf.vc.v.ivv) function. */ +static CONSTEXPR const rvv_arg_type_info sf_vc_ivv_args[] + = {rvv_arg_type_info (RVV_BASE_scalar), + rvv_arg_type_info (RVV_BASE_unsigned_vector), + rvv_arg_type_info (RVV_BASE_unsigned_vector), + rvv_arg_type_info (RVV_BASE_scalar), + rvv_arg_type_info_end}; + +/* A list of args for vector_type func (sf.vc.fvv/sf.vc.v.fvv) function. */ +static CONSTEXPR const rvv_arg_type_info sf_vc_fvv_args[] + = {rvv_arg_type_info (RVV_BASE_scalar), + rvv_arg_type_info (RVV_BASE_vector), + rvv_arg_type_info (RVV_BASE_vector), + rvv_arg_type_info (RVV_BASE_scalar_float), + rvv_arg_type_info_end}; + +/* A list of args for vector_type func (sf.vc.vvw/sf.vc.v.vvw) function. */ +static CONSTEXPR const rvv_arg_type_info sf_vc_vvw_args[] + = {rvv_arg_type_info (RVV_BASE_scalar), + rvv_arg_type_info (RVV_BASE_x2_vector), + rvv_arg_type_info (RVV_BASE_vector), + rvv_arg_type_info (RVV_BASE_vector), + rvv_arg_type_info_end}; + +/* A list of args for vector_type func (sf.vc.xvw/sf.vc.v.xvw) function. */ +static CONSTEXPR const rvv_arg_type_info sf_vc_xvw_args[] + = {rvv_arg_type_info (RVV_BASE_scalar), + rvv_arg_type_info (RVV_BASE_x2_vector), + rvv_arg_type_info (RVV_BASE_vector), + rvv_arg_type_info (RVV_BASE_scalar), + rvv_arg_type_info_end}; + +/* A list of args for vector_type func (sf.vc.ivw/sf.vc.v.ivw) function. */ +static CONSTEXPR const rvv_arg_type_info sf_vc_ivw_args[] + = {rvv_arg_type_info (RVV_BASE_scalar), + rvv_arg_type_info (RVV_BASE_x2_vector), + rvv_arg_type_info (RVV_BASE_vector), + rvv_arg_type_info (RVV_BASE_scalar), + rvv_arg_type_info_end}; + +/* A list of args for vector_type func (sf.vc.fvw/sf.vc.v.fvw) function. */ +static CONSTEXPR const rvv_arg_type_info sf_vc_fvw_args[] + = {rvv_arg_type_info (RVV_BASE_scalar), + rvv_arg_type_info (RVV_BASE_x2_vector), + rvv_arg_type_info (RVV_BASE_vector), + rvv_arg_type_info (RVV_BASE_scalar_float), + rvv_arg_type_info_end}; + /* A list of none preds that will be registered for intrinsic functions. */ static CONSTEXPR const predication_type_index none_preds[] = {PRED_TYPE_none, NUM_PRED_TYPES}; @@ -3006,6 +3175,174 @@ static CONSTEXPR const rvv_op_info u_vvvv_crypto_sew64_ops rvv_arg_type_info (RVV_BASE_vector), /* Return type */ vvv_args /* Args */}; +static CONSTEXPR const rvv_op_info sf_vc_x_ops + = {full_v_u_ops, /* Types */ + OP_TYPE_x, /* Suffix */ + rvv_arg_type_info (RVV_BASE_void), /* Return type */ + sf_vc_x_args /* Args */}; + +static CONSTEXPR const rvv_op_info sf_vc_v_x_ops + = {full_v_u_ops, /* Types */ + OP_TYPE_v_x, /* Suffix */ + rvv_arg_type_info (RVV_BASE_vector), /* Return type */ + sf_vc_v_x_args /* Args */}; + +static CONSTEXPR const rvv_op_info sf_vc_i_ops + = {full_v_u_ops, /* Types */ + OP_TYPE_i, /* Suffix */ + rvv_arg_type_info (RVV_BASE_void), /* Return type */ + sf_vc_i_args /* Args */}; + +static CONSTEXPR const rvv_op_info sf_vc_v_i_ops + = {full_v_u_ops, /* Types */ + OP_TYPE_v_i, /* Suffix */ + rvv_arg_type_info (RVV_BASE_vector), /* Return type */ + sf_vc_v_i_args /* Args */}; + +static CONSTEXPR const rvv_op_info sf_vc_vv_ops + = {full_v_u_ops, /* Types */ + OP_TYPE_vv, /* Suffix */ + rvv_arg_type_info (RVV_BASE_void), /* Return type */ + sf_vc_vv_args /* Args */}; + +static CONSTEXPR const rvv_op_info sf_vc_v_vv_ops + = {full_v_u_ops, /* Types */ + OP_TYPE_v_vv, /* Suffix */ + rvv_arg_type_info (RVV_BASE_vector), /* Return type */ + sf_vc_v_vv_args /* Args */}; + +static CONSTEXPR const rvv_op_info sf_vc_xv_ops + = {full_v_u_ops, /* Types */ + OP_TYPE_xv, /* Suffix */ + rvv_arg_type_info (RVV_BASE_void), /* Return type */ + sf_vc_xv_args /* Args */}; + +static CONSTEXPR const rvv_op_info sf_vc_v_xv_ops + = {full_v_u_ops, /* Types */ + OP_TYPE_v_xv, /* Suffix */ + rvv_arg_type_info (RVV_BASE_vector), /* Return type */ + sf_vc_v_xv_args /* Args */}; + +static CONSTEXPR const rvv_op_info sf_vc_iv_ops + = {full_v_u_ops, /* Types */ + OP_TYPE_iv, /* Suffix */ + rvv_arg_type_info (RVV_BASE_void), /* Return type */ + sf_vc_iv_args /* Args */}; + +static CONSTEXPR const rvv_op_info sf_vc_v_iv_ops + = {full_v_u_ops, /* Types */ + OP_TYPE_v_iv, /* Suffix */ + rvv_arg_type_info (RVV_BASE_vector), /* Return type */ + sf_vc_v_iv_args /* Args */}; + +static CONSTEXPR const rvv_op_info sf_vc_fv_ops + = {wextu_ops, /* Types */ + OP_TYPE_fv, /* Suffix */ + rvv_arg_type_info (RVV_BASE_void), /* Return type */ + sf_vc_fv_args /* Args */}; + +static CONSTEXPR const rvv_op_info sf_vc_v_fv_ops + = {wextu_ops, /* Types */ + OP_TYPE_v_fv, /* Suffix */ + rvv_arg_type_info (RVV_BASE_vector), /* Return type */ + sf_vc_v_fv_args /* Args */}; + +static CONSTEXPR const rvv_op_info sf_vc_vvv_ops + = {full_v_u_ops, /* Types */ + OP_TYPE_vvv, /* Suffix */ + rvv_arg_type_info (RVV_BASE_void), /* Return type */ + sf_vc_vvv_args /* Args */}; + +static CONSTEXPR const rvv_op_info sf_vc_v_vvv_ops + = {full_v_u_ops, /* Types */ + OP_TYPE_v_vvv, /* Suffix */ + rvv_arg_type_info (RVV_BASE_vector), /* Return type */ + sf_vc_vvv_args /* Args */}; + +static CONSTEXPR const rvv_op_info sf_vc_xvv_ops + = {full_v_u_ops, /* Types */ + OP_TYPE_xvv, /* Suffix */ + rvv_arg_type_info (RVV_BASE_void), /* Return type */ + sf_vc_xvv_args /* Args */}; + +static CONSTEXPR const rvv_op_info sf_vc_v_xvv_ops + = {full_v_u_ops, /* Types */ + OP_TYPE_v_xvv, /* Suffix */ + rvv_arg_type_info (RVV_BASE_vector), /* Return type */ + sf_vc_xvv_args /* Args */}; + +static CONSTEXPR const rvv_op_info sf_vc_ivv_ops + = {full_v_u_ops, /* Types */ + OP_TYPE_ivv, /* Suffix */ + rvv_arg_type_info (RVV_BASE_void), /* Return type */ + sf_vc_ivv_args /* Args */}; + +static CONSTEXPR const rvv_op_info sf_vc_v_ivv_ops + = {full_v_u_ops, /* Types */ + OP_TYPE_v_ivv, /* Suffix */ + rvv_arg_type_info (RVV_BASE_vector), /* Return type */ + sf_vc_ivv_args /* Args */}; + +static CONSTEXPR const rvv_op_info sf_vc_fvv_ops + = {wextu_ops, /* Types */ + OP_TYPE_fvv, /* Suffix */ + rvv_arg_type_info (RVV_BASE_void), /* Return type */ + sf_vc_fvv_args /* Args */}; + +static CONSTEXPR const rvv_op_info sf_vc_v_fvv_ops + = {wextu_ops, /* Types */ + OP_TYPE_v_fvv, /* Suffix */ + rvv_arg_type_info (RVV_BASE_vector), /* Return type */ + sf_vc_fvv_args /* Args */}; + +static CONSTEXPR const rvv_op_info sf_vc_vvw_ops + = {x2_u_ops, /* Types */ + OP_TYPE_vvw, /* Suffix */ + rvv_arg_type_info (RVV_BASE_void), /* Return type */ + sf_vc_vvw_args /* Args */}; + +static CONSTEXPR const rvv_op_info sf_vc_v_vvw_ops + = {x2_u_ops, /* Types */ + OP_TYPE_v_vvw, /* Suffix */ + rvv_arg_type_info (RVV_BASE_x2_vector), /* Return type */ + sf_vc_vvw_args /* Args */}; + +static CONSTEXPR const rvv_op_info sf_vc_xvw_ops + = {x2_u_ops, /* Types */ + OP_TYPE_xvw, /* Suffix */ + rvv_arg_type_info (RVV_BASE_void), /* Return type */ + sf_vc_xvw_args /* Args */}; + +static CONSTEXPR const rvv_op_info sf_vc_v_xvw_ops + = {x2_u_ops, /* Types */ + OP_TYPE_v_xvw, /* Suffix */ + rvv_arg_type_info (RVV_BASE_x2_vector), /* Return type */ + sf_vc_xvw_args /* Args */}; + +static CONSTEXPR const rvv_op_info sf_vc_ivw_ops + = {x2_u_ops, /* Types */ + OP_TYPE_ivw, /* Suffix */ + rvv_arg_type_info (RVV_BASE_void), /* Return type */ + sf_vc_ivw_args /* Args */}; + +static CONSTEXPR const rvv_op_info sf_vc_v_ivw_ops + = {x2_u_ops, /* Types */ + OP_TYPE_v_ivw, /* Suffix */ + rvv_arg_type_info (RVV_BASE_x2_vector), /* Return type */ + sf_vc_ivw_args /* Args */}; + +static CONSTEXPR const rvv_op_info sf_vc_fvw_ops + = {x2_wu_ops, /* Types */ + OP_TYPE_fvw, /* Suffix */ + rvv_arg_type_info (RVV_BASE_void), /* Return type */ + sf_vc_fvw_args /* Args */}; + +static CONSTEXPR const rvv_op_info sf_vc_v_fvw_ops + = {x2_wu_ops, /* Types */ + OP_TYPE_v_fvw, /* Suffix */ + rvv_arg_type_info (RVV_BASE_x2_vector), /* Return type */ + sf_vc_fvw_args /* Args */}; + /* A list of all RVV base function types. */ static CONSTEXPR const function_type_info function_types[] = { #define DEF_RVV_TYPE_INDEX( \ @@ -3022,7 +3359,7 @@ static CONSTEXPR const function_type_info function_types[] = { SIGNED_EEW16_LMUL1_INTERPRET, SIGNED_EEW32_LMUL1_INTERPRET, \ SIGNED_EEW64_LMUL1_INTERPRET, UNSIGNED_EEW8_LMUL1_INTERPRET, \ UNSIGNED_EEW16_LMUL1_INTERPRET, UNSIGNED_EEW32_LMUL1_INTERPRET, \ - UNSIGNED_EEW64_LMUL1_INTERPRET, X2_VLMUL_EXT, X4_VLMUL_EXT, X8_VLMUL_EXT, \ + UNSIGNED_EEW64_LMUL1_INTERPRET, X2, X2_VLMUL_EXT, X4_VLMUL_EXT, X8_VLMUL_EXT,\ X16_VLMUL_EXT, X32_VLMUL_EXT, X64_VLMUL_EXT, TUPLE_SUBPART) \ { \ VECTOR_TYPE_##VECTOR, \ @@ -3087,6 +3424,7 @@ static CONSTEXPR const function_type_info function_types[] = { VECTOR_TYPE_##UNSIGNED_EEW16_LMUL1_INTERPRET, \ VECTOR_TYPE_##UNSIGNED_EEW32_LMUL1_INTERPRET, \ VECTOR_TYPE_##UNSIGNED_EEW64_LMUL1_INTERPRET, \ + VECTOR_TYPE_##X2, \ VECTOR_TYPE_##X2_VLMUL_EXT, \ VECTOR_TYPE_##X4_VLMUL_EXT, \ VECTOR_TYPE_##X8_VLMUL_EXT, \ @@ -3504,26 +3842,26 @@ check_required_extensions (const function_instance &instance) required_extensions |= RVV_REQUIRE_RV64BIT; } - uint64_t riscv_isa_flags = 0; + uint64_t isa_flags = 0; if (TARGET_VECTOR_ELEN_BF_16) - riscv_isa_flags |= RVV_REQUIRE_ELEN_BF_16; + isa_flags |= RVV_REQUIRE_ELEN_BF_16; if (TARGET_VECTOR_ELEN_FP_16) - riscv_isa_flags |= RVV_REQUIRE_ELEN_FP_16; + isa_flags |= RVV_REQUIRE_ELEN_FP_16; if (TARGET_VECTOR_ELEN_FP_32) - riscv_isa_flags |= RVV_REQUIRE_ELEN_FP_32; + isa_flags |= RVV_REQUIRE_ELEN_FP_32; if (TARGET_VECTOR_ELEN_FP_64) - riscv_isa_flags |= RVV_REQUIRE_ELEN_FP_64; + isa_flags |= RVV_REQUIRE_ELEN_FP_64; if (TARGET_VECTOR_ELEN_64) - riscv_isa_flags |= RVV_REQUIRE_ELEN_64; + isa_flags |= RVV_REQUIRE_ELEN_64; if (TARGET_64BIT) - riscv_isa_flags |= RVV_REQUIRE_RV64BIT; + isa_flags |= RVV_REQUIRE_RV64BIT; if (TARGET_FULL_V) - riscv_isa_flags |= RVV_REQUIRE_FULL_V; + isa_flags |= RVV_REQUIRE_FULL_V; if (TARGET_MIN_VLEN > 32) - riscv_isa_flags |= RVV_REQUIRE_MIN_VLEN_64; + isa_flags |= RVV_REQUIRE_MIN_VLEN_64; - uint64_t missing_extensions = required_extensions & ~riscv_isa_flags; + uint64_t missing_extensions = required_extensions & ~isa_flags; if (missing_extensions != 0) return false; return true; @@ -3600,6 +3938,24 @@ rvv_arg_type_info::get_xfqf_float_type (vector_type_index type_idx) const return NULL_TREE; } +tree +rvv_arg_type_info::get_scalar_float_type (vector_type_index type_idx) const +{ + /* Convert vint types to their corresponding scalar float types. + Note: + - According to riscv-vector-builtins-types.def, the index of an unsigned + type is always one greater than its corresponding signed type. + - Conversion for vint8 types is not required. */ + if (type_idx >= VECTOR_TYPE_vint16mf4_t && type_idx <= VECTOR_TYPE_vuint16m8_t) + return builtin_types[VECTOR_TYPE_vfloat16m1_t].scalar; + else if (type_idx >= VECTOR_TYPE_vint32mf2_t && type_idx <= VECTOR_TYPE_vuint32m8_t) + return builtin_types[VECTOR_TYPE_vfloat32m1_t].scalar; + else if (type_idx >= VECTOR_TYPE_vint64m1_t && type_idx <= VECTOR_TYPE_vuint64m8_t) + return builtin_types[VECTOR_TYPE_vfloat64m1_t].scalar; + else + return NULL_TREE; +} + vector_type_index rvv_arg_type_info::get_function_type_index (vector_type_index type_idx) const { @@ -3758,7 +4114,7 @@ function_instance::modifies_global_state_p () const return true; /* Handle direct modifications of global state. */ - return flags & (CP_WRITE_MEMORY | CP_WRITE_CSR); + return flags & (CP_WRITE_MEMORY | CP_WRITE_CSR | CP_USE_COPROCESSORS); } /* Return true if calls to the function could raise a signal. */ @@ -4621,6 +4977,12 @@ registered_function::overloaded_hash () const for (unsigned int i = 0; i < argument_types.length (); i++) { type = argument_types[i]; + + /* If we're passed something entirely unreasonable, just ignore here. + We'll warn later anyway. */ + if (TREE_CODE_CLASS (TREE_CODE (type)) != tcc_type) + continue; + unsigned_p = POINTER_TYPE_P (type) ? TYPE_UNSIGNED (TREE_TYPE (type)) : TYPE_UNSIGNED (type); mode_p = POINTER_TYPE_P (type) ? TYPE_MODE (TREE_TYPE (type)) @@ -4724,7 +5086,11 @@ bool verify_type_context (location_t loc, type_context_kind context, const_tree type, bool silent_p) { - if (!sizeless_type_p (type)) + const_tree tmp = type; + if (omp_type_context (context) && POINTER_TYPE_P (type)) + tmp = strip_pointer_types (tmp); + + if (!sizeless_type_p (tmp)) return true; switch (context) @@ -4796,6 +5162,34 @@ verify_type_context (location_t loc, type_context_kind context, const_tree type, error_at (loc, "capture by copy of RVV type %qT", type); return false; + + case TCTX_OMP_MAP: + if (!silent_p) + error_at (loc, "RVV type %qT not allowed in %<map%> clause", type); + return false; + + case TCTX_OMP_MAP_IMP_REF: + if (!silent_p) + error ("cannot reference %qT object types in %<target%> region", type); + return false; + + case TCTX_OMP_PRIVATE: + if (!silent_p) + error_at (loc, "RVV type %qT not allowed in" + " %<target%> %<private%> clause", type); + return false; + + case TCTX_OMP_FIRSTPRIVATE: + if (!silent_p) + error_at (loc, "RVV type %qT not allowed in" + " %<target%> %<firstprivate%> clause", type); + return false; + + case TCTX_OMP_DEVICE_ADDR: + if (!silent_p) + error_at (loc, "RVV type %qT not allowed in" + " %<target%> device clauses", type); + return false; } gcc_unreachable (); diff --git a/gcc/config/riscv/riscv-vector-builtins.def b/gcc/config/riscv/riscv-vector-builtins.def index 3a62869..be3fb1a 100644 --- a/gcc/config/riscv/riscv-vector-builtins.def +++ b/gcc/config/riscv/riscv-vector-builtins.def @@ -82,7 +82,7 @@ along with GCC; see the file COPYING3. If not see SIGNED_EEW16_LMUL1_INTERPRET, SIGNED_EEW32_LMUL1_INTERPRET, \ SIGNED_EEW64_LMUL1_INTERPRET, UNSIGNED_EEW8_LMUL1_INTERPRET, \ UNSIGNED_EEW16_LMUL1_INTERPRET, UNSIGNED_EEW32_LMUL1_INTERPRET, \ - UNSIGNED_EEW64_LMUL1_INTERPRET, X2_VLMUL_EXT, X4_VLMUL_EXT, X8_VLMUL_EXT, \ + UNSIGNED_EEW64_LMUL1_INTERPRET, X2, X2_VLMUL_EXT, X4_VLMUL_EXT, X8_VLMUL_EXT,\ X16_VLMUL_EXT, X32_VLMUL_EXT, X64_VLMUL_EXT, TUPLE_SUBPART) #endif @@ -637,6 +637,32 @@ DEF_RVV_OP_TYPE (xu_w) DEF_RVV_OP_TYPE (s) DEF_RVV_OP_TYPE (4x8x4) DEF_RVV_OP_TYPE (2x8x2) +DEF_RVV_OP_TYPE (v_x) +DEF_RVV_OP_TYPE (i) +DEF_RVV_OP_TYPE (v_i) +DEF_RVV_OP_TYPE (xv) +DEF_RVV_OP_TYPE (iv) +DEF_RVV_OP_TYPE (fv) +DEF_RVV_OP_TYPE (vvv) +DEF_RVV_OP_TYPE (xvv) +DEF_RVV_OP_TYPE (ivv) +DEF_RVV_OP_TYPE (fvv) +DEF_RVV_OP_TYPE (vvw) +DEF_RVV_OP_TYPE (xvw) +DEF_RVV_OP_TYPE (ivw) +DEF_RVV_OP_TYPE (fvw) +DEF_RVV_OP_TYPE (v_vv) +DEF_RVV_OP_TYPE (v_xv) +DEF_RVV_OP_TYPE (v_iv) +DEF_RVV_OP_TYPE (v_fv) +DEF_RVV_OP_TYPE (v_vvv) +DEF_RVV_OP_TYPE (v_xvv) +DEF_RVV_OP_TYPE (v_ivv) +DEF_RVV_OP_TYPE (v_fvv) +DEF_RVV_OP_TYPE (v_vvw) +DEF_RVV_OP_TYPE (v_xvw) +DEF_RVV_OP_TYPE (v_ivw) +DEF_RVV_OP_TYPE (v_fvw) DEF_RVV_PRED_TYPE (ta) DEF_RVV_PRED_TYPE (tu) @@ -720,6 +746,7 @@ DEF_RVV_BASE_TYPE (unsigned_eew8_lmul1_interpret, get_vector_type (type_idx)) DEF_RVV_BASE_TYPE (unsigned_eew16_lmul1_interpret, get_vector_type (type_idx)) DEF_RVV_BASE_TYPE (unsigned_eew32_lmul1_interpret, get_vector_type (type_idx)) DEF_RVV_BASE_TYPE (unsigned_eew64_lmul1_interpret, get_vector_type (type_idx)) +DEF_RVV_BASE_TYPE (x2_vector, get_vector_type (type_idx)) DEF_RVV_BASE_TYPE (vlmul_ext_x2, get_vector_type (type_idx)) DEF_RVV_BASE_TYPE (vlmul_ext_x4, get_vector_type (type_idx)) DEF_RVV_BASE_TYPE (vlmul_ext_x8, get_vector_type (type_idx)) @@ -729,6 +756,7 @@ DEF_RVV_BASE_TYPE (vlmul_ext_x64, get_vector_type (type_idx)) DEF_RVV_BASE_TYPE (size_ptr, build_pointer_type (size_type_node)) DEF_RVV_BASE_TYPE (tuple_subpart, get_tuple_subpart_type (type_idx)) DEF_RVV_BASE_TYPE (xfqf_float, get_xfqf_float_type (type_idx)) +DEF_RVV_BASE_TYPE (scalar_float, get_scalar_float_type (type_idx)) DEF_RVV_VXRM_ENUM (RNU, VXRM_RNU) DEF_RVV_VXRM_ENUM (RNE, VXRM_RNE) diff --git a/gcc/config/riscv/riscv-vector-builtins.h b/gcc/config/riscv/riscv-vector-builtins.h index 42ba905..1f2587a 100644 --- a/gcc/config/riscv/riscv-vector-builtins.h +++ b/gcc/config/riscv/riscv-vector-builtins.h @@ -130,6 +130,7 @@ enum required_ext XSFVQMACCQOQ_EXT, /* XSFVQMACCQOQ extension */ XSFVQMACCDOD_EXT, /* XSFVQMACCDOD extension */ XSFVFNRCLIPXFQF_EXT, /* XSFVFNRCLIPXFQF extension */ + XSFVCP_EXT, /* XSFVCP extension*/ /* Please update below to isa_name func when add or remove enum type(s). */ }; @@ -169,6 +170,8 @@ static inline const char * required_ext_to_isa_name (enum required_ext required) return "xsfvqmaccdod"; case XSFVFNRCLIPXFQF_EXT: return "xsfvfnrclipxfqf"; + case XSFVCP_EXT: + return "xsfvcp"; default: gcc_unreachable (); } @@ -212,6 +215,8 @@ static inline bool required_extensions_specified (enum required_ext required) return TARGET_XSFVQMACCDOD; case XSFVFNRCLIPXFQF_EXT: return TARGET_XSFVFNRCLIPXFQF; + case XSFVCP_EXT: + return TARGET_XSFVCP; default: gcc_unreachable (); } @@ -297,6 +302,7 @@ struct rvv_arg_type_info tree get_tree_type (vector_type_index) const; tree get_tuple_subpart_type (vector_type_index) const; tree get_xfqf_float_type (vector_type_index) const; + tree get_scalar_float_type (vector_type_index) const; }; /* Static information for each operand. */ @@ -325,43 +331,7 @@ struct function_group_info /* Return true if required extension is enabled */ bool match (required_ext ext_value) const { - switch (ext_value) - { - case VECTOR_EXT: - return TARGET_VECTOR; - case ZVBB_EXT: - return TARGET_ZVBB; - case ZVBB_OR_ZVKB_EXT: - return (TARGET_ZVBB || TARGET_ZVKB); - case ZVBC_EXT: - return TARGET_ZVBC; - case ZVKG_EXT: - return TARGET_ZVKG; - case ZVKNED_EXT: - return TARGET_ZVKNED; - case ZVKNHA_OR_ZVKNHB_EXT: - return (TARGET_ZVKNHA || TARGET_ZVKNHB); - case ZVKNHB_EXT: - return TARGET_ZVKNHB; - case ZVKSED_EXT: - return TARGET_ZVKSED; - case ZVKSH_EXT: - return TARGET_ZVKSH; - case XTHEADVECTOR_EXT: - return TARGET_XTHEADVECTOR; - case ZVFBFMIN_EXT: - return TARGET_ZVFBFMIN; - case ZVFBFWMA_EXT: - return TARGET_ZVFBFWMA; - case XSFVQMACCQOQ_EXT: - return TARGET_XSFVQMACCQOQ; - case XSFVQMACCDOD_EXT: - return TARGET_XSFVQMACCDOD; - case XSFVFNRCLIPXFQF_EXT: - return TARGET_XSFVFNRCLIPXFQF; - default: - gcc_unreachable (); - } + return required_extensions_specified (ext_value); } /* The base name, as a string. */ const char *base_name; diff --git a/gcc/config/riscv/riscv-vector-costs.cc b/gcc/config/riscv/riscv-vector-costs.cc index 167375c..4d8170d 100644 --- a/gcc/config/riscv/riscv-vector-costs.cc +++ b/gcc/config/riscv/riscv-vector-costs.cc @@ -205,9 +205,7 @@ compute_local_program_points ( if (!is_gimple_assign_or_call (gsi_stmt (si))) continue; stmt_vec_info stmt_info = vinfo->lookup_stmt (gsi_stmt (si)); - enum stmt_vec_info_type type - = STMT_VINFO_TYPE (vect_stmt_to_vectorize (stmt_info)); - if (type != undef_vec_info_type) + if (STMT_VINFO_RELEVANT_P (stmt_info)) { stmt_point info = {point, gsi_stmt (si), stmt_info}; program_points.safe_push (info); @@ -626,7 +624,7 @@ compute_estimated_lmul (loop_vec_info loop_vinfo, machine_mode mode) int regno_alignment = riscv_get_v_regno_alignment (loop_vinfo->vector_mode); if (riscv_v_ext_vls_mode_p (loop_vinfo->vector_mode)) return regno_alignment; - else if (known_eq (LOOP_VINFO_SLP_UNROLLING_FACTOR (loop_vinfo), 1U)) + else { int estimated_vf = vect_vf_for_cost (loop_vinfo); int estimated_lmul = estimated_vf * GET_MODE_BITSIZE (mode).to_constant () @@ -636,25 +634,6 @@ compute_estimated_lmul (loop_vec_info loop_vinfo, machine_mode mode) else return estimated_lmul; } - else - { - /* Estimate the VLA SLP LMUL. */ - if (regno_alignment > RVV_M1) - return regno_alignment; - else if (mode != QImode - || LOOP_VINFO_SLP_UNROLLING_FACTOR (loop_vinfo).is_constant ()) - { - int ratio; - if (can_div_trunc_p (BYTES_PER_RISCV_VECTOR, - GET_MODE_SIZE (loop_vinfo->vector_mode), &ratio)) - { - if (ratio == 1) - return RVV_M4; - else if (ratio == 2) - return RVV_M2; - } - } - } return 0; } @@ -1120,8 +1099,8 @@ costs::adjust_stmt_cost (enum vect_cost_for_stmt kind, loop_vec_info loop, switch (kind) { case scalar_to_vec: - stmt_cost += (FLOAT_TYPE_P (vectype) ? costs->regmove->FR2VR - : costs->regmove->GR2VR); + stmt_cost + += (FLOAT_TYPE_P (vectype) ? get_fr2vr_cost () : get_gr2vr_cost ()); break; case vec_to_scalar: stmt_cost += (FLOAT_TYPE_P (vectype) ? costs->regmove->VR2FR diff --git a/gcc/config/riscv/riscv-vsetvl.cc b/gcc/config/riscv/riscv-vsetvl.cc index 0ac2538..4fe0ae6 100644 --- a/gcc/config/riscv/riscv-vsetvl.cc +++ b/gcc/config/riscv/riscv-vsetvl.cc @@ -100,31 +100,28 @@ using namespace riscv_vector; static void bitmap_union_of_preds_with_entry (sbitmap dst, sbitmap *src, basic_block b) { - unsigned int set_size = dst->size; - edge e; - unsigned ix; - - for (ix = 0; ix < EDGE_COUNT (b->preds); ix++) + /* Handle case with no predecessors (including ENTRY block). */ + if (EDGE_COUNT (b->preds) == 0) { - e = EDGE_PRED (b, ix); - bitmap_copy (dst, src[e->src->index]); - break; + bitmap_clear (dst); + return; } - if (ix == EDGE_COUNT (b->preds)) - bitmap_clear (dst); - else - for (ix++; ix < EDGE_COUNT (b->preds); ix++) - { - unsigned int i; - SBITMAP_ELT_TYPE *p, *r; - - e = EDGE_PRED (b, ix); - p = src[e->src->index]->elms; - r = dst->elms; - for (i = 0; i < set_size; i++) - *r++ |= *p++; - } + edge e; + edge_iterator ei; + /* Union remaining predecessors' bitmaps. */ + FOR_EACH_EDGE (e, ei, b->preds) + { + /* Initialize with first predecessor's bitmap. */ + if (ei.index == 0) + { + bitmap_copy (dst, src[e->src->index]); + continue; + } + + /* Perform bitmap OR operation element-wise. */ + bitmap_ior (dst, dst, src[e->src->index]); + } } /* Compute the reaching definition in and out based on the gen and KILL @@ -685,7 +682,7 @@ invalid_opt_bb_p (basic_block cfg_bb) /* We only do LCM optimizations on blocks that are post dominated by EXIT block, that is, we don't do LCM optimizations on infinite loop. */ FOR_EACH_EDGE (e, ei, cfg_bb->succs) - if (e->flags & EDGE_FAKE) + if ((e->flags & EDGE_FAKE) || (e->flags & EDGE_ABNORMAL)) return true; return false; @@ -2698,6 +2695,7 @@ pre_vsetvl::compute_lcm_local_properties () m_avout = sbitmap_vector_alloc (last_basic_block_for_fn (cfun), num_exprs); bitmap_vector_clear (m_avloc, last_basic_block_for_fn (cfun)); + bitmap_vector_clear (m_kill, last_basic_block_for_fn (cfun)); bitmap_vector_clear (m_antloc, last_basic_block_for_fn (cfun)); bitmap_vector_ones (m_transp, last_basic_block_for_fn (cfun)); @@ -2749,6 +2747,10 @@ pre_vsetvl::compute_lcm_local_properties () if (invalid_opt_bb_p (bb->cfg_bb ())) { + if (dump_file && (dump_flags & TDF_DETAILS)) + fprintf (dump_file, "\n --- skipping bb %u due to weird edge", + bb->index ()); + bitmap_clear (m_antloc[bb_index]); bitmap_clear (m_transp[bb_index]); } @@ -3022,6 +3024,18 @@ pre_vsetvl::earliest_fuse_vsetvl_info (int iter) continue; } + /* We cannot lift a vsetvl into the source block if the block is + not transparent WRT to it. + This is too restrictive for blocks where a register's use only + feeds into vsetvls and no regular insns. One example is the + test rvv/vsetvl/avl_single-68.c which is currently XFAILed for + that reason. + In order to support this case we'd need to check the vsetvl's + AVL operand's uses in the source block and make sure they are + only used in other vsetvls. */ + if (!bitmap_bit_p (m_transp[eg->src->index], expr_index)) + continue; + if (dump_file && (dump_flags & TDF_DETAILS)) { fprintf (dump_file, @@ -3402,8 +3416,7 @@ pre_vsetvl::emit_vsetvl () } start_sequence (); insert_vsetvl_insn (EMIT_DIRECT, footer_info); - rtx_insn *rinsn = get_insns (); - end_sequence (); + rtx_insn *rinsn = end_sequence (); default_rtl_profile (); insert_insn_on_edge (rinsn, eg); need_commit = true; @@ -3434,8 +3447,7 @@ pre_vsetvl::emit_vsetvl () start_sequence (); insert_vsetvl_insn (EMIT_DIRECT, info); - rtx_insn *rinsn = get_insns (); - end_sequence (); + rtx_insn *rinsn = end_sequence (); default_rtl_profile (); /* We should not get an abnormal edge here. */ diff --git a/gcc/config/riscv/riscv.cc b/gcc/config/riscv/riscv.cc index 38f3ae7..1275b03 100644 --- a/gcc/config/riscv/riscv.cc +++ b/gcc/config/riscv/riscv.cc @@ -87,6 +87,10 @@ along with GCC; see the file COPYING3. If not see #include "riscv-vector-costs.h" #include "riscv-subset.h" +/* Target variants that support full conditional move. */ +#define TARGET_COND_MOV \ + (TARGET_SFB_ALU || TARGET_XTHEADCONDMOV || TARGET_XMIPSCMOV) + /* True if X is an UNSPEC wrapper around a SYMBOL_REF or LABEL_REF. */ #define UNSPEC_ADDRESS_P(X) \ (GET_CODE (X) == UNSPEC \ @@ -107,6 +111,8 @@ along with GCC; see the file COPYING3. If not see /* True the mode switching has static frm, or false. */ #define STATIC_FRM_P(c) ((c)->machine->mode_sw_info.static_frm_p) +#define CFUN_IN_CALL(c) ((c)->machine->mode_sw_info.cfun_call) + /* True if we can use the instructions in the XTheadInt extension to handle interrupts, or false. */ #define TH_INT_INTERRUPT(c) \ @@ -176,10 +182,13 @@ struct GTY(()) mode_switching_info { mode instruction in the function or not. */ bool static_frm_p; + bool cfun_call; + mode_switching_info () { dynamic_frm = NULL_RTX; static_frm_p = false; + cfun_call = false; } }; @@ -278,6 +287,10 @@ enum riscv_fusion_pairs RISCV_FUSE_AUIPC_LD = (1 << 7), RISCV_FUSE_LDPREINCREMENT = (1 << 8), RISCV_FUSE_ALIGNED_STD = (1 << 9), + RISCV_FUSE_CACHE_ALIGNED_STD = (1 << 10), + RISCV_FUSE_BFEXT = (1 << 11), + RISCV_FUSE_EXPANDED_LD = (1 << 12), + RISCV_FUSE_B_ALUI = (1 << 13), }; /* Costs of various operations on the different architectures. */ @@ -297,6 +310,8 @@ struct riscv_tune_param bool vector_unaligned_access; bool use_divmod_expansion; bool overlap_op_by_pieces; + bool use_zero_stride_load; + bool speculative_sched_vsetvl; unsigned int fusible_ops; const struct cpu_vector_cost *vec_costs; const char *function_align; @@ -444,6 +459,30 @@ static const struct cpu_vector_cost generic_vector_cost = { &rvv_regmove_vector_cost, /* regmove */ }; +/* Costs to use when optimizing for generic. */ +static const struct riscv_tune_param generic_tune_info = { + {COSTS_N_INSNS (4), COSTS_N_INSNS (5)}, /* fp_add */ + {COSTS_N_INSNS (4), COSTS_N_INSNS (5)}, /* fp_mul */ + {COSTS_N_INSNS (20), COSTS_N_INSNS (20)}, /* fp_div */ + {COSTS_N_INSNS (4), COSTS_N_INSNS (4)}, /* int_mul */ + {COSTS_N_INSNS (33), COSTS_N_INSNS (65)}, /* int_div */ + 1, /* issue_rate */ + 4, /* branch_cost */ + 5, /* memory_cost */ + 8, /* fmv_cost */ + true, /* slow_unaligned_access */ + false, /* vector_unaligned_access */ + false, /* use_divmod_expansion */ + false, /* overlap_op_by_pieces */ + true, /* use_zero_stride_load */ + false, /* speculative_sched_vsetvl */ + RISCV_FUSE_NOTHING, /* fusible_ops */ + NULL, /* vector cost */ + NULL, /* function_align */ + NULL, /* jump_align */ + NULL, /* loop_align */ +}; + /* Costs to use when optimizing for rocket. */ static const struct riscv_tune_param rocket_tune_info = { {COSTS_N_INSNS (4), COSTS_N_INSNS (5)}, /* fp_add */ @@ -459,6 +498,8 @@ static const struct riscv_tune_param rocket_tune_info = { false, /* vector_unaligned_access */ false, /* use_divmod_expansion */ false, /* overlap_op_by_pieces */ + true, /* use_zero_stride_load */ + false, /* speculative_sched_vsetvl */ RISCV_FUSE_NOTHING, /* fusible_ops */ NULL, /* vector cost */ NULL, /* function_align */ @@ -481,6 +522,8 @@ static const struct riscv_tune_param sifive_7_tune_info = { false, /* vector_unaligned_access */ false, /* use_divmod_expansion */ false, /* overlap_op_by_pieces */ + true, /* use_zero_stride_load */ + false, /* speculative_sched_vsetvl */ RISCV_FUSE_NOTHING, /* fusible_ops */ NULL, /* vector cost */ NULL, /* function_align */ @@ -503,6 +546,8 @@ static const struct riscv_tune_param sifive_p400_tune_info = { false, /* vector_unaligned_access */ false, /* use_divmod_expansion */ false, /* overlap_op_by_pieces */ + true, /* use_zero_stride_load */ + false, /* speculative_sched_vsetvl */ RISCV_FUSE_LUI_ADDI | RISCV_FUSE_AUIPC_ADDI, /* fusible_ops */ &generic_vector_cost, /* vector cost */ NULL, /* function_align */ @@ -525,6 +570,8 @@ static const struct riscv_tune_param sifive_p600_tune_info = { false, /* vector_unaligned_access */ false, /* use_divmod_expansion */ false, /* overlap_op_by_pieces */ + true, /* use_zero_stride_load */ + false, /* speculative_sched_vsetvl */ RISCV_FUSE_LUI_ADDI | RISCV_FUSE_AUIPC_ADDI, /* fusible_ops */ &generic_vector_cost, /* vector cost */ NULL, /* function_align */ @@ -547,6 +594,8 @@ static const struct riscv_tune_param thead_c906_tune_info = { false, /* vector_unaligned_access */ false, /* use_divmod_expansion */ false, /* overlap_op_by_pieces */ + true, /* use_zero_stride_load */ + false, /* speculative_sched_vsetvl */ RISCV_FUSE_NOTHING, /* fusible_ops */ NULL, /* vector cost */ NULL, /* function_align */ @@ -569,6 +618,8 @@ static const struct riscv_tune_param xiangshan_nanhu_tune_info = { false, /* vector_unaligned_access */ false, /* use_divmod_expansion */ false, /* overlap_op_by_pieces */ + true, /* use_zero_stride_load */ + false, /* speculative_sched_vsetvl */ RISCV_FUSE_ZEXTW | RISCV_FUSE_ZEXTH, /* fusible_ops */ NULL, /* vector cost */ NULL, /* function_align */ @@ -591,6 +642,8 @@ static const struct riscv_tune_param generic_ooo_tune_info = { true, /* vector_unaligned_access */ false, /* use_divmod_expansion */ true, /* overlap_op_by_pieces */ + true, /* use_zero_stride_load */ + false, /* speculative_sched_vsetvl */ RISCV_FUSE_NOTHING, /* fusible_ops */ &generic_vector_cost, /* vector cost */ NULL, /* function_align */ @@ -613,6 +666,8 @@ static const struct riscv_tune_param tt_ascalon_d8_tune_info = { true, /* vector_unaligned_access */ true, /* use_divmod_expansion */ true, /* overlap_op_by_pieces */ + true, /* use_zero_stride_load */ + false, /* speculative_sched_vsetvl */ RISCV_FUSE_NOTHING, /* fusible_ops */ &generic_vector_cost, /* vector cost */ NULL, /* function_align */ @@ -635,6 +690,8 @@ static const struct riscv_tune_param optimize_size_tune_info = { false, /* vector_unaligned_access */ false, /* use_divmod_expansion */ false, /* overlap_op_by_pieces */ + true, /* use_zero_stride_load */ + false, /* speculative_sched_vsetvl */ RISCV_FUSE_NOTHING, /* fusible_ops */ NULL, /* vector cost */ NULL, /* function_align */ @@ -642,6 +699,30 @@ static const struct riscv_tune_param optimize_size_tune_info = { NULL, /* loop_align */ }; +/* Costs to use when optimizing for MIPS P8700 */ +static const struct riscv_tune_param mips_p8700_tune_info = { + {COSTS_N_INSNS (4), COSTS_N_INSNS (4)}, /* fp_add */ + {COSTS_N_INSNS (5), COSTS_N_INSNS (5)}, /* fp_mul */ + {COSTS_N_INSNS (17), COSTS_N_INSNS (17)}, /* fp_div */ + {COSTS_N_INSNS (5), COSTS_N_INSNS (5)}, /* int_mul */ + {COSTS_N_INSNS (8), COSTS_N_INSNS (8)}, /* int_div */ + 4, /* issue_rate */ + 8, /* branch_cost */ + 4, /* memory_cost */ + 8, /* fmv_cost */ + true, /* slow_unaligned_access */ + false, /* vector_unaligned_access */ + true, /* use_divmod_expansion */ + false, /* overlap_op_by_pieces */ + true, /* use_zero_stride_load */ + false, /* speculative_sched_vsetvl */ + RISCV_FUSE_NOTHING, /* fusible_ops */ + NULL, /* vector cost */ + NULL, /* function_align */ + NULL, /* jump_align */ + NULL, /* loop_align */ +}; + static bool riscv_avoid_shrink_wrapping_separate (); static tree riscv_handle_fndecl_attribute (tree *, tree, tree, int, bool *); static tree riscv_handle_type_attribute (tree *, tree, tree, int, bool *); @@ -776,6 +857,16 @@ void riscv_frame_info::reset(void) arg_pointer_offset = 0; } +/* Check if the mode is twice the size of the XLEN mode. */ + +static bool +riscv_2x_xlen_mode_p (machine_mode mode) +{ + poly_int64 mode_size = GET_MODE_SIZE (mode); + return mode_size.is_constant () + && (mode_size.to_constant () == UNITS_PER_WORD * 2); +} + /* Implement TARGET_MIN_ARITHMETIC_PRECISION. */ static unsigned int @@ -1007,16 +1098,16 @@ riscv_build_integer_1 (struct riscv_integer_op codes[RISCV_MAX_INTEGER_OPS], /* Now iterate over the bits we want to clear until the cost is too high or we're done. */ nval = value ^ HOST_WIDE_INT_C (-1); - nval &= HOST_WIDE_INT_C (~0x7fffffff); + nval &= ~HOST_WIDE_INT_C (0x7fffffff); while (nval && alt_cost < cost) { HOST_WIDE_INT bit = ctz_hwi (nval); alt_codes[alt_cost].code = AND; - alt_codes[alt_cost].value = ~(1UL << bit); + alt_codes[alt_cost].value = ~(HOST_WIDE_INT_UC (1) << bit); alt_codes[alt_cost].use_uw = false; alt_codes[alt_cost].save_temporary = false; alt_cost++; - nval &= ~(1UL << bit); + nval &= ~(HOST_WIDE_INT_UC (1) << bit); } if (nval == 0 && alt_cost <= cost) @@ -2856,9 +2947,7 @@ riscv_call_tls_get_addr (rtx sym, rtx result) gen_int_mode (RISCV_CC_BASE, SImode))); RTL_CONST_CALL_P (insn) = 1; use_reg (&CALL_INSN_FUNCTION_USAGE (insn), a0); - insn = get_insns (); - - end_sequence (); + insn = end_sequence (); return insn; } @@ -3742,6 +3831,24 @@ riscv_legitimize_move (machine_mode mode, rtx dest, rtx src) return true; } + if (TARGET_ZILSD && riscv_2x_xlen_mode_p (mode) + && ((REG_P (dest) && MEM_P (src)) || (MEM_P (dest) && REG_P (src))) + && can_create_pseudo_p ()) + { + rtx reg = REG_P (dest) ? dest : src; + unsigned regno = REGNO (reg); + /* ZILSD requires an even-odd register pair, let RA to + fix the constraint if the reg is hard reg and not even reg. */ + if ((regno < FIRST_PSEUDO_REGISTER) + && (regno % 2) != 0) + { + rtx tmp = gen_reg_rtx (GET_MODE (reg)); + emit_move_insn (tmp, src); + emit_move_insn (dest, tmp); + return true; + } + } + /* RISC-V GCC may generate non-legitimate address due to we provide some pattern for optimize access PIC local symbol and it's make GCC generate unrecognizable instruction during optimizing. */ @@ -3812,7 +3919,7 @@ static int riscv_binary_cost (rtx x, int single_insns, int double_insns) { if (!riscv_v_ext_mode_p (GET_MODE (x)) - && GET_MODE_SIZE (GET_MODE (x)).to_constant () == UNITS_PER_WORD * 2) + && riscv_2x_xlen_mode_p (GET_MODE (x))) return COSTS_N_INSNS (double_insns); return COSTS_N_INSNS (single_insns); } @@ -3851,6 +3958,27 @@ riscv_extend_cost (rtx op, bool unsigned_p) return COSTS_N_INSNS (2); } +/* Return the cost of the vector binary rtx like add, minus, mult. + The cost of scalar2vr_cost will be appended if there one of the + op comes from the VEC_DUPLICATE. */ + +static int +get_vector_binary_rtx_cost (rtx x, int scalar2vr_cost) +{ + gcc_assert (riscv_v_ext_mode_p (GET_MODE (x))); + + rtx op_0 = XEXP (x, 0); + rtx op_1 = XEXP (x, 1); + + if (GET_CODE (op_0) == VEC_DUPLICATE + || GET_CODE (op_1) == VEC_DUPLICATE) + return (scalar2vr_cost + 1) * COSTS_N_INSNS (1); + else if (GET_CODE (op_0) == NEG && GET_CODE (op_1) == VEC_DUPLICATE) + return (scalar2vr_cost + 1) * COSTS_N_INSNS (1); + else + return COSTS_N_INSNS (1); +} + /* Implement TARGET_RTX_COSTS. */ #define SINGLE_SHIFT_COST 1 @@ -3863,7 +3991,75 @@ riscv_rtx_costs (rtx x, machine_mode mode, int outer_code, int opno ATTRIBUTE_UN Cost Model need to be well analyzed and supported in the future. */ if (riscv_v_ext_mode_p (mode)) { - *total = COSTS_N_INSNS (1); + int gr2vr_cost = get_gr2vr_cost (); + int fr2vr_cost = get_fr2vr_cost (); + int scalar2vr_cost = FLOAT_MODE_P (GET_MODE_INNER (mode)) + ? fr2vr_cost : gr2vr_cost; + + switch (outer_code) + { + case SET: + { + switch (GET_CODE (x)) + { + case VEC_DUPLICATE: + *total = gr2vr_cost * COSTS_N_INSNS (1); + break; + case IF_THEN_ELSE: + { + rtx op = XEXP (x, 1); + + switch (GET_CODE (op)) + { + case DIV: + case UDIV: + case MOD: + case UMOD: + case US_PLUS: + case US_MINUS: + case SS_PLUS: + case SS_MINUS: + *total = get_vector_binary_rtx_cost (op, scalar2vr_cost); + break; + default: + *total = COSTS_N_INSNS (1); + break; + } + } + break; + case PLUS: + case MINUS: + case AND: + case IOR: + case XOR: + case MULT: + case SMAX: + case UMAX: + case SMIN: + case UMIN: + { + rtx op; + rtx op_0 = XEXP (x, 0); + rtx op_1 = XEXP (x, 1); + + if (GET_CODE (op = op_0) == MULT + || GET_CODE (op = op_1) == MULT) + *total = get_vector_binary_rtx_cost (op, scalar2vr_cost); + else + *total = get_vector_binary_rtx_cost (x, scalar2vr_cost); + } + break; + default: + *total = COSTS_N_INSNS (1); + break; + } + } + break; + default: + *total = COSTS_N_INSNS (1); + break; + } + return true; } @@ -3883,10 +4079,41 @@ riscv_rtx_costs (rtx x, machine_mode mode, int outer_code, int opno ATTRIBUTE_UN *total = COSTS_N_INSNS (1); return true; } + + /* Register move for XLEN * 2. */ + if (TARGET_ZILSD + && register_operand (SET_SRC (x), GET_MODE (SET_SRC (x))) + && riscv_2x_xlen_mode_p (mode)) + { + /* We still need two instruction for move with ZILSD, + but let minus one cost to let subreg split don't. + TODO: Add riscv_tune_param for this. */ + *total = COSTS_N_INSNS (2) - 1; + return true; + } + + /* Load for XLEN * 2. */ + if (TARGET_ZILSD && MEM_P (SET_SRC (x)) + && riscv_2x_xlen_mode_p (mode)) + { + /* TODO: Add riscv_tune_param for this. */ + *total = COSTS_N_INSNS (1); + return true; + } + riscv_rtx_costs (SET_SRC (x), mode, SET, opno, total, speed); return true; } + /* Store for XLEN * 2. */ + if (TARGET_ZILSD && MEM_P (SET_DEST (x)) && REG_P (SET_SRC (x)) + && riscv_2x_xlen_mode_p (mode)) + { + /* TODO: Add riscv_tune_param for this. */ + *total = COSTS_N_INSNS (1); + return true; + } + /* Otherwise return FALSE indicating we should recurse into both the SET_DEST and SET_SRC combining the cost of both. */ return false; @@ -3946,7 +4173,7 @@ riscv_rtx_costs (rtx x, machine_mode mode, int outer_code, int opno ATTRIBUTE_UN return false; case IF_THEN_ELSE: - if ((TARGET_SFB_ALU || TARGET_XTHEADCONDMOV) + if (TARGET_COND_MOV && reg_or_0_operand (XEXP (x, 1), mode) && sfb_alu_operand (XEXP (x, 2), mode) && comparison_operator (XEXP (x, 0), VOIDmode)) @@ -4486,16 +4713,14 @@ riscv_noce_conversion_profitable_p (rtx_insn *seq, rtx dest = SET_DEST (x); - /* Do something similar for the moves that are likely to + /* Do something similar for the moves that are likely to turn into NOP moves by the time the register allocator is - done. These are also side effects of how our sCC expanders - work. We'll want to check and update LAST_DEST here too. */ - if (last_dest - && REG_P (dest) + done. We don't require src to be something set in this + sequence, just a promoted SUBREG. */ + if (REG_P (dest) && GET_MODE (dest) == SImode && SUBREG_P (src) - && SUBREG_PROMOTED_VAR_P (src) - && REGNO (SUBREG_REG (src)) == REGNO (last_dest)) + && SUBREG_PROMOTED_VAR_P (src)) { riscv_if_info.original_cost += COSTS_N_INSNS (1); riscv_if_info.max_seq_cost += COSTS_N_INSNS (1); @@ -4544,6 +4769,19 @@ riscv_split_64bit_move_p (rtx dest, rtx src) if (TARGET_64BIT) return false; + /* Zilsd provides load/store with even-odd register pair. */ + if (TARGET_ZILSD + && (((REG_P (dest) && MEM_P (src)) + || (MEM_P (dest) && REG_P (src))))) + { + rtx reg = REG_P (dest) ? dest : src; + unsigned regno = REGNO (reg); + /* GCC may still generating some load/store with odd-even reg pair + because the ABI handling, but that's fine, just split that later. */ + if (GP_REG_P (regno)) + return (regno < FIRST_PSEUDO_REGISTER) && ((regno % 2) != 0); + } + /* There is no need to split if the FLI instruction in the `Zfa` extension can be used. */ if (satisfies_constraint_zfli (src)) return false; @@ -5253,40 +5491,137 @@ riscv_expand_conditional_branch (rtx label, rtx_code code, rtx op0, rtx op1) emit_jump_insn (gen_condjump (condition, label)); } +/* canonicalization of the comparands. */ +void +canonicalize_comparands (rtx_code code, rtx *op0, rtx *op1) +{ + /* An integer comparison must be comparing WORD_MODE objects. + Extend the comparison arguments as necessary. */ + if ((INTEGRAL_MODE_P (GET_MODE (*op0)) && GET_MODE (*op0) != word_mode) + || (INTEGRAL_MODE_P (GET_MODE (*op1)) && GET_MODE (*op1) != word_mode)) + riscv_extend_comparands (code, op0, op1); + + /* We might have been handed back a SUBREG. Just to make things + easy, force it into a REG. */ + if (!REG_P (*op0) && !CONST_INT_P (*op0)) + *op0 = force_reg (word_mode, *op0); + if (!REG_P (*op1) && !CONST_INT_P (*op1)) + *op1 = force_reg (word_mode, *op1); +} + +/* Emit target specific conditional move like TARGET_XMIPSCMOV etc. */ +bool +riscv_target_conditional_move (rtx dest, rtx op0, rtx op1, rtx_code code, + rtx cons, rtx alt) +{ + machine_mode dst_mode = GET_MODE (dest); + rtx target; + + /* force the operands to the register. */ + cons = force_reg (dst_mode, cons); + alt = force_reg (dst_mode, alt); + + if (TARGET_XMIPSCMOV) + { + if (code == EQ || code == NE) + { + op0 = riscv_zero_if_equal (op0, op1); + op1 = const0_rtx; + } + else + { + target = gen_reg_rtx (GET_MODE (op0)); + riscv_emit_int_order_test (code, 0, target, op0, op1); + op0 = target; + op1 = const0_rtx; + code = NE; + } + riscv_emit_int_compare (&code, &op0, &op1); + rtx cond = gen_rtx_fmt_ee (code, GET_MODE (op0), op0, op1); + emit_insn (gen_rtx_SET (dest, gen_rtx_IF_THEN_ELSE (dst_mode, + cond, cons, alt))); + return true; + } + /* TARGET_SFB_ALU || TARGET_XTHEADCONDMOV. */ + else + { + riscv_emit_int_compare (&code, &op0, &op1, !TARGET_SFB_ALU); + rtx cond = gen_rtx_fmt_ee (code, GET_MODE (op0), op0, op1); + emit_insn (gen_rtx_SET (dest, gen_rtx_IF_THEN_ELSE (dst_mode, cond, + cons, alt))); + return true; + } +} + /* Emit a cond move: If OP holds, move CONS to DEST; else move ALT to DEST. Return 0 if expansion failed. */ bool riscv_expand_conditional_move (rtx dest, rtx op, rtx cons, rtx alt) { - machine_mode mode = GET_MODE (dest); + machine_mode dst_mode = GET_MODE (dest); + machine_mode cond_mode = GET_MODE (dest); rtx_code code = GET_CODE (op); rtx op0 = XEXP (op, 0); rtx op1 = XEXP (op, 1); + /* General note. This is called from the conditional move + expander. That simplifies the cases we need to worry about + as we know the destination will have the same mode as the + true/false arms. Furthermore we know that mode will be + DI/SI for rv64 or SI for rv32. */ + + /* For some tests, we can easily construct a 0, -1 value + which can then be used to synthesize more efficient + sequences that don't use zicond. */ + if ((code == LT || code == GE) + && (REG_P (op0) || SUBREG_P (op0)) + && op1 == CONST0_RTX (GET_MODE (op0))) + { + /* The code to expand signed division by a power of 2 uses a + conditional add by 2^n-1 idiom. It can be more efficiently + synthesized without zicond using srai+srli+add. + + But we don't see the constants here. Just a conditional move + with registers as the true/false values. So this is a little + over-aggressive and can result in a few missed if-conversions. */ + if ((REG_P (cons) || SUBREG_P (cons)) + && (REG_P (alt) || SUBREG_P (alt))) + return false; + + /* If one value is a nonzero constant and the other value is + not a constant, then avoid zicond as more efficient sequences + using the splatted sign bit are often possible. */ + if (CONST_INT_P (alt) + && alt != CONST0_RTX (dst_mode) + && !CONST_INT_P (cons)) + return false; + + if (CONST_INT_P (cons) + && cons != CONST0_RTX (dst_mode) + && !CONST_INT_P (alt)) + return false; + + /* If we need more special cases, add them here. */ + } + + if (((TARGET_ZICOND_LIKE - || (arith_operand (cons, mode) && arith_operand (alt, mode))) - && (GET_MODE_CLASS (mode) == MODE_INT)) - || TARGET_SFB_ALU || TARGET_XTHEADCONDMOV) + || (arith_operand (cons, dst_mode) && arith_operand (alt, dst_mode))) + && GET_MODE_CLASS (dst_mode) == MODE_INT + && GET_MODE_CLASS (cond_mode) == MODE_INT) + || TARGET_COND_MOV) { machine_mode mode0 = GET_MODE (op0); - machine_mode mode1 = GET_MODE (op1); - - /* An integer comparison must be comparing WORD_MODE objects. We - must enforce that so that we don't strip away a sign_extension - thinking it is unnecessary. We might consider using - riscv_extend_operands if they are not already properly extended. */ - if ((INTEGRAL_MODE_P (mode0) && mode0 != word_mode) - || (INTEGRAL_MODE_P (mode1) && mode1 != word_mode)) - return false; - /* In the fallback generic case use MODE rather than WORD_MODE for - the output of the SCC instruction, to match the mode of the NEG + canonicalize_comparands (code,&op0,&op1); + + /* In the fallback generic case use DST_MODE rather than WORD_MODE + for the output of the SCC instruction, to match the mode of the NEG operation below. The output of SCC is 0 or 1 boolean, so it is valid for input in any scalar integer mode. */ - rtx tmp = gen_reg_rtx ((TARGET_ZICOND_LIKE - || TARGET_SFB_ALU || TARGET_XTHEADCONDMOV) - ? word_mode : mode); + rtx tmp = gen_reg_rtx ((TARGET_ZICOND_LIKE || TARGET_COND_MOV) + ? word_mode : dst_mode); bool invert = false; /* Canonicalize the comparison. It must be an equality comparison @@ -5315,41 +5650,28 @@ riscv_expand_conditional_move (rtx dest, rtx op, rtx cons, rtx alt) else return false; - op = gen_rtx_fmt_ee (invert ? EQ : NE, mode, tmp, const0_rtx); + op = gen_rtx_fmt_ee (invert ? EQ : NE, cond_mode, tmp, const0_rtx); /* We've generated a new comparison. Update the local variables. */ code = GET_CODE (op); op0 = XEXP (op, 0); op1 = XEXP (op, 1); } - else if (!TARGET_ZICOND_LIKE && !TARGET_SFB_ALU && !TARGET_XTHEADCONDMOV) + else if (!TARGET_ZICOND_LIKE && !TARGET_COND_MOV) riscv_expand_int_scc (tmp, code, op0, op1, &invert); - if (TARGET_SFB_ALU || TARGET_XTHEADCONDMOV) - { - riscv_emit_int_compare (&code, &op0, &op1, !TARGET_SFB_ALU); - rtx cond = gen_rtx_fmt_ee (code, GET_MODE (op0), op0, op1); + if (TARGET_COND_MOV) + return riscv_target_conditional_move (dest, op0, op1, code, cons, alt); - /* The expander is a bit loose in its specification of the true - arm of the conditional move. That allows us to support more - cases for extensions which are more general than SFB. But - does mean we need to force CONS into a register at this point. */ - cons = force_reg (mode, cons); - /* With XTheadCondMov we need to force ALT into a register too. */ - alt = force_reg (mode, alt); - emit_insn (gen_rtx_SET (dest, gen_rtx_IF_THEN_ELSE (mode, cond, - cons, alt))); - return true; - } else if (!TARGET_ZICOND_LIKE) { if (invert) std::swap (cons, alt); - rtx reg1 = gen_reg_rtx (mode); - rtx reg2 = gen_reg_rtx (mode); - rtx reg3 = gen_reg_rtx (mode); - rtx reg4 = gen_reg_rtx (mode); + rtx reg1 = gen_reg_rtx (dst_mode); + rtx reg2 = gen_reg_rtx (dst_mode); + rtx reg3 = gen_reg_rtx (dst_mode); + rtx reg4 = gen_reg_rtx (dst_mode); riscv_emit_unary (NEG, reg1, tmp); riscv_emit_binary (AND, reg2, reg1, cons); @@ -5359,48 +5681,52 @@ riscv_expand_conditional_move (rtx dest, rtx op, rtx cons, rtx alt) return true; } /* 0, reg or 0, imm */ - else if (cons == CONST0_RTX (mode) - && (REG_P (alt) - || (CONST_INT_P (alt) && alt != CONST0_RTX (mode)))) + else if (cons == CONST0_RTX (dst_mode) + && ((REG_P (alt) || SUBREG_P (alt)) + || (CONST_INT_P (alt) && alt != CONST0_RTX (dst_mode)))) { riscv_emit_int_compare (&code, &op0, &op1, true); rtx cond = gen_rtx_fmt_ee (code, GET_MODE (op0), op0, op1); - alt = force_reg (mode, alt); + alt = force_reg (dst_mode, alt); emit_insn (gen_rtx_SET (dest, - gen_rtx_IF_THEN_ELSE (mode, cond, + gen_rtx_IF_THEN_ELSE (dst_mode, cond, cons, alt))); return true; } /* imm, imm */ - else if (CONST_INT_P (cons) && cons != CONST0_RTX (mode) - && CONST_INT_P (alt) && alt != CONST0_RTX (mode)) + else if (CONST_INT_P (cons) && cons != CONST0_RTX (dst_mode) + && CONST_INT_P (alt) && alt != CONST0_RTX (dst_mode)) { riscv_emit_int_compare (&code, &op0, &op1, true); rtx cond = gen_rtx_fmt_ee (code, GET_MODE (op0), op0, op1); HOST_WIDE_INT t = INTVAL (alt) - INTVAL (cons); - alt = force_reg (mode, gen_int_mode (t, mode)); + alt = force_reg (dst_mode, gen_int_mode (t, dst_mode)); emit_insn (gen_rtx_SET (dest, - gen_rtx_IF_THEN_ELSE (mode, cond, - CONST0_RTX (mode), + gen_rtx_IF_THEN_ELSE (dst_mode, cond, + CONST0_RTX (dst_mode), alt))); /* CONS might not fit into a signed 12 bit immediate suitable for an addi instruction. If that's the case, force it into a register. */ if (!SMALL_OPERAND (INTVAL (cons))) - cons = force_reg (mode, cons); + cons = force_reg (dst_mode, cons); riscv_emit_binary (PLUS, dest, dest, cons); return true; } /* imm, reg */ - else if (CONST_INT_P (cons) && cons != CONST0_RTX (mode) && REG_P (alt)) + else if (CONST_INT_P (cons) + && cons != CONST0_RTX (dst_mode) + && (REG_P (alt) || SUBREG_P (alt))) { /* Optimize for register value of 0. */ - if (code == NE && rtx_equal_p (op0, alt) && op1 == CONST0_RTX (mode)) + if (code == NE + && rtx_equal_p (op0, alt) + && op1 == CONST0_RTX (dst_mode)) { rtx cond = gen_rtx_fmt_ee (code, GET_MODE (op0), op0, op1); - cons = force_reg (mode, cons); + cons = force_reg (dst_mode, cons); emit_insn (gen_rtx_SET (dest, - gen_rtx_IF_THEN_ELSE (mode, cond, + gen_rtx_IF_THEN_ELSE (dst_mode, cond, cons, alt))); return true; } @@ -5408,47 +5734,51 @@ riscv_expand_conditional_move (rtx dest, rtx op, rtx cons, rtx alt) riscv_emit_int_compare (&code, &op0, &op1, true); rtx cond = gen_rtx_fmt_ee (code, GET_MODE (op0), op0, op1); - rtx temp1 = gen_reg_rtx (mode); - rtx temp2 = gen_int_mode (-1 * INTVAL (cons), mode); + rtx temp1 = gen_reg_rtx (dst_mode); + rtx temp2 = gen_int_mode (-1 * INTVAL (cons), dst_mode); /* TEMP2 and/or CONS might not fit into a signed 12 bit immediate suitable for an addi instruction. If that's the case, force it into a register. */ if (!SMALL_OPERAND (INTVAL (temp2))) - temp2 = force_reg (mode, temp2); + temp2 = force_reg (dst_mode, temp2); if (!SMALL_OPERAND (INTVAL (cons))) - cons = force_reg (mode, cons); + cons = force_reg (dst_mode, cons); riscv_emit_binary (PLUS, temp1, alt, temp2); emit_insn (gen_rtx_SET (dest, - gen_rtx_IF_THEN_ELSE (mode, cond, - CONST0_RTX (mode), + gen_rtx_IF_THEN_ELSE (dst_mode, cond, + CONST0_RTX (dst_mode), temp1))); riscv_emit_binary (PLUS, dest, dest, cons); return true; } /* reg, 0 or imm, 0 */ - else if ((REG_P (cons) - || (CONST_INT_P (cons) && cons != CONST0_RTX (mode))) - && alt == CONST0_RTX (mode)) + else if (((REG_P (cons) || SUBREG_P (cons)) + || (CONST_INT_P (cons) && cons != CONST0_RTX (dst_mode))) + && alt == CONST0_RTX (dst_mode)) { riscv_emit_int_compare (&code, &op0, &op1, true); rtx cond = gen_rtx_fmt_ee (code, GET_MODE (op0), op0, op1); - cons = force_reg (mode, cons); - emit_insn (gen_rtx_SET (dest, gen_rtx_IF_THEN_ELSE (mode, cond, + cons = force_reg (dst_mode, cons); + emit_insn (gen_rtx_SET (dest, gen_rtx_IF_THEN_ELSE (dst_mode, cond, cons, alt))); return true; } /* reg, imm */ - else if (REG_P (cons) && CONST_INT_P (alt) && alt != CONST0_RTX (mode)) + else if ((REG_P (cons) || (SUBREG_P (cons))) + && CONST_INT_P (alt) + && alt != CONST0_RTX (dst_mode)) { /* Optimize for register value of 0. */ - if (code == EQ && rtx_equal_p (op0, cons) && op1 == CONST0_RTX (mode)) + if (code == EQ + && rtx_equal_p (op0, cons) + && op1 == CONST0_RTX (dst_mode)) { rtx cond = gen_rtx_fmt_ee (code, GET_MODE (op0), op0, op1); - alt = force_reg (mode, alt); + alt = force_reg (dst_mode, alt); emit_insn (gen_rtx_SET (dest, - gen_rtx_IF_THEN_ELSE (mode, cond, + gen_rtx_IF_THEN_ELSE (dst_mode, cond, cons, alt))); return true; } @@ -5456,53 +5786,54 @@ riscv_expand_conditional_move (rtx dest, rtx op, rtx cons, rtx alt) riscv_emit_int_compare (&code, &op0, &op1, true); rtx cond = gen_rtx_fmt_ee (code, GET_MODE (op0), op0, op1); - rtx temp1 = gen_reg_rtx (mode); - rtx temp2 = gen_int_mode (-1 * INTVAL (alt), mode); + rtx temp1 = gen_reg_rtx (dst_mode); + rtx temp2 = gen_int_mode (-1 * INTVAL (alt), dst_mode); /* TEMP2 and/or ALT might not fit into a signed 12 bit immediate suitable for an addi instruction. If that's the case, force it into a register. */ if (!SMALL_OPERAND (INTVAL (temp2))) - temp2 = force_reg (mode, temp2); + temp2 = force_reg (dst_mode, temp2); if (!SMALL_OPERAND (INTVAL (alt))) - alt = force_reg (mode, alt); + alt = force_reg (dst_mode, alt); riscv_emit_binary (PLUS, temp1, cons, temp2); emit_insn (gen_rtx_SET (dest, - gen_rtx_IF_THEN_ELSE (mode, cond, + gen_rtx_IF_THEN_ELSE (dst_mode, cond, temp1, - CONST0_RTX (mode)))); + CONST0_RTX (dst_mode)))); riscv_emit_binary (PLUS, dest, dest, alt); return true; } /* reg, reg */ - else if (REG_P (cons) && REG_P (alt)) + else if ((REG_P (cons) || SUBREG_P (cons)) + && (REG_P (alt) || SUBREG_P (alt))) { if (((code == EQ && rtx_equal_p (cons, op0)) || (code == NE && rtx_equal_p (alt, op0))) - && op1 == CONST0_RTX (mode)) + && op1 == CONST0_RTX (dst_mode)) { rtx cond = gen_rtx_fmt_ee (code, GET_MODE (op0), op0, op1); - alt = force_reg (mode, alt); + alt = force_reg (dst_mode, alt); emit_insn (gen_rtx_SET (dest, - gen_rtx_IF_THEN_ELSE (mode, cond, + gen_rtx_IF_THEN_ELSE (dst_mode, cond, cons, alt))); return true; } - rtx reg1 = gen_reg_rtx (mode); - rtx reg2 = gen_reg_rtx (mode); + rtx reg1 = gen_reg_rtx (dst_mode); + rtx reg2 = gen_reg_rtx (dst_mode); riscv_emit_int_compare (&code, &op0, &op1, true); rtx cond1 = gen_rtx_fmt_ee (code, GET_MODE (op0), op0, op1); rtx cond2 = gen_rtx_fmt_ee (code == NE ? EQ : NE, GET_MODE (op0), op0, op1); emit_insn (gen_rtx_SET (reg2, - gen_rtx_IF_THEN_ELSE (mode, cond2, - CONST0_RTX (mode), + gen_rtx_IF_THEN_ELSE (dst_mode, cond2, + CONST0_RTX (dst_mode), cons))); emit_insn (gen_rtx_SET (reg1, - gen_rtx_IF_THEN_ELSE (mode, cond1, - CONST0_RTX (mode), + gen_rtx_IF_THEN_ELSE (dst_mode, cond1, + CONST0_RTX (dst_mode), alt))); riscv_emit_binary (PLUS, dest, reg1, reg2); return true; @@ -6879,6 +7210,7 @@ riscv_asm_output_opcode (FILE *asm_out_file, const char *p) 'T' Print shift-index of inverted single-bit mask OP. '~' Print w if TARGET_64BIT is true; otherwise not print anything. 'N' Print register encoding as integer (0-31). + 'H' Print the name of the next register for integer. Note please keep this list and the list in riscv.md in sync. */ @@ -7174,6 +7506,27 @@ riscv_print_operand (FILE *file, rtx op, int letter) asm_fprintf (file, "%u", (regno - offset)); break; } + case 'H': + { + if (!REG_P (op)) + { + output_operand_lossage ("modifier 'H' require register operand"); + break; + } + if (REGNO (op) > 31) + { + output_operand_lossage ("modifier 'H' is for integer registers only"); + break; + } + if (REGNO (op) == 31) + { + output_operand_lossage ("modifier 'H' cannot be applied to R31"); + break; + } + + fputs (reg_names[REGNO (op) + 1], file); + break; + } default: switch (code) { @@ -7863,11 +8216,9 @@ riscv_can_inline_p (tree caller, tree callee) struct cl_target_option *callee_opts = TREE_TARGET_OPTION (callee_tree); struct cl_target_option *caller_opts = TREE_TARGET_OPTION (caller_tree); - int isa_flag_mask = riscv_x_target_flags_isa_mask (); - - /* Callee and caller should have the same target options except for ISA. */ - int callee_target_flags = callee_opts->x_target_flags & ~isa_flag_mask; - int caller_target_flags = caller_opts->x_target_flags & ~isa_flag_mask; + /* Callee and caller should have the same target options. */ + int callee_target_flags = callee_opts->x_target_flags; + int caller_target_flags = caller_opts->x_target_flags; if (callee_target_flags != caller_target_flags) return false; @@ -8686,12 +9037,20 @@ riscv_allocate_and_probe_stack_space (rtx temp1, HOST_WIDE_INT size) temp2 = riscv_force_temporary (temp2, gen_int_mode (rounded_size, Pmode)); insn = emit_insn (gen_sub3_insn (temp2, stack_pointer_rtx, temp2)); + /* The size does not represent actual stack pointer address shift + from the top of the frame, as it might be lowered before. + To consider the correct SP addresses for the CFA notes, it is needed + to correct them with the initial offset value. */ + HOST_WIDE_INT initial_cfa_offset + = cfun->machine->frame.total_size.to_constant () - size; + if (!frame_pointer_needed) { /* We want the CFA independent of the stack pointer for the duration of the loop. */ add_reg_note (insn, REG_CFA_DEF_CFA, - plus_constant (Pmode, temp1, rounded_size)); + plus_constant (Pmode, temp1, + initial_cfa_offset + rounded_size)); RTX_FRAME_RELATED_P (insn) = 1; } @@ -8704,7 +9063,8 @@ riscv_allocate_and_probe_stack_space (rtx temp1, HOST_WIDE_INT size) { insn = get_last_insn (); add_reg_note (insn, REG_CFA_DEF_CFA, - plus_constant (Pmode, stack_pointer_rtx, rounded_size)); + plus_constant (Pmode, stack_pointer_rtx, + initial_cfa_offset + rounded_size)); RTX_FRAME_RELATED_P (insn) = 1; } @@ -9650,27 +10010,27 @@ int riscv_register_move_cost (machine_mode mode, reg_class_t from, reg_class_t to) { - bool from_is_fpr = from == FP_REGS || from == RVC_FP_REGS; - bool from_is_gpr = from == GR_REGS || from == RVC_GR_REGS; - bool to_is_fpr = to == FP_REGS || to == RVC_FP_REGS; - bool to_is_gpr = to == GR_REGS || to == RVC_GR_REGS; + bool from_is_fpr = reg_class_subset_p (from, FP_REGS); + bool from_is_gpr = reg_class_subset_p (from, GR_REGS); + bool to_is_fpr = reg_class_subset_p (to, FP_REGS); + bool to_is_gpr = reg_class_subset_p (to, GR_REGS); if ((from_is_fpr && to_is_gpr) || (from_is_gpr && to_is_fpr)) return tune_param->fmv_cost; if (from == V_REGS) { - if (to == GR_REGS) + if (to_is_gpr) return get_vector_costs ()->regmove->VR2GR; - else if (to == FP_REGS) + else if (to_is_fpr) return get_vector_costs ()->regmove->VR2FR; } if (to == V_REGS) { - if (from == GR_REGS) - return get_vector_costs ()->regmove->GR2VR; - else if (from == FP_REGS) - return get_vector_costs ()->regmove->FR2VR; + if (from_is_gpr) + return get_gr2vr_cost (); + else if (from_is_fpr) + return get_fr2vr_cost (); } return riscv_secondary_memory_needed (mode, from, to) ? 8 : 2; @@ -9746,6 +10106,10 @@ riscv_hard_regno_mode_ok (unsigned int regno, machine_mode mode) if (riscv_v_ext_mode_p (mode)) return false; + /* Zilsd require load/store with even-odd reg pair. */ + if (TARGET_ZILSD && riscv_2x_xlen_mode_p (mode) && ((regno % 2) != 0)) + return false; + if (!GP_REG_P (regno + nregs - 1)) return false; } @@ -9898,23 +10262,114 @@ riscv_fusion_enabled_p(enum riscv_fusion_pairs op) return tune_param->fusible_ops & op; } +/* Matches an add: + (set (reg:DI rd) (plus:SI (reg:SI rs1) (reg:SI rs2))) */ + +static bool +riscv_set_is_add (rtx set) +{ + return (GET_CODE (SET_SRC (set)) == PLUS + && REG_P (XEXP (SET_SRC (set), 0)) + && REG_P (XEXP (SET_SRC (set), 1)) + && REG_P (SET_DEST (set))); +} + +/* Matches an addi: + (set (reg:DI rd) (plus:SI (reg:SI rs1) (const_int imm))) */ + +static bool +riscv_set_is_addi (rtx set) +{ + return (GET_CODE (SET_SRC (set)) == PLUS + && REG_P (XEXP (SET_SRC (set), 0)) + && CONST_INT_P (XEXP (SET_SRC (set), 1)) + && REG_P (SET_DEST (set))); +} + +/* Matches an add.uw: + (set (reg:DI rd) + (plus:DI (zero_extend:DI (reg:SI rs1)) (reg:DI rs2))) */ + +static bool +riscv_set_is_adduw (rtx set) +{ + return (GET_CODE (SET_SRC (set)) == PLUS + && GET_CODE (XEXP (SET_SRC (set), 0)) == ZERO_EXTEND + && REG_P (XEXP (XEXP (SET_SRC (set), 0), 0)) + && REG_P (XEXP (SET_SRC (set), 1)) + && REG_P (SET_DEST (set))); +} + +/* Matches a shNadd: + (set (reg:DI rd) + (plus:DI (ashift:DI (reg:DI rs1) (const_int N)) (reg:DI rS2)) */ + +static bool +riscv_set_is_shNadd (rtx set) +{ + return (GET_CODE (SET_SRC (set)) == PLUS + && GET_CODE (XEXP (SET_SRC (set), 0)) == ASHIFT + && REG_P (XEXP (XEXP (SET_SRC (set), 0), 0)) + && CONST_INT_P (XEXP (XEXP (SET_SRC (set), 0), 1)) + && (INTVAL (XEXP (XEXP (SET_SRC (set), 0), 1)) == 1 + || INTVAL (XEXP (XEXP (SET_SRC (set), 0), 1)) == 2 + || INTVAL (XEXP (XEXP (SET_SRC (set), 0), 1)) == 3) + && REG_P (SET_DEST (set))); +} + +/* Matches a shNadd.uw: + (set (reg:DI rd) + (plus:DI (and:DI (ashift:DI (reg:DI rs1) (const_int N)) + (const_int N)) + (reg:DI rs2)) */ + +static bool +riscv_set_is_shNadduw (rtx set) +{ + return (GET_CODE (SET_SRC (set)) == PLUS + && GET_CODE (XEXP (SET_SRC (set), 0)) == AND + && GET_CODE (XEXP (XEXP (SET_SRC (set), 0), 0)) == ASHIFT + && REG_P (XEXP (XEXP (XEXP (SET_SRC (set), 0), 0), 0)) + && CONST_INT_P (XEXP (XEXP (XEXP (SET_SRC (set), 0), 0), 1)) + && (INTVAL (XEXP (XEXP (XEXP (SET_SRC (set), 0), 0), 1)) == 1 + || INTVAL (XEXP (XEXP (XEXP (SET_SRC (set), 0), 0), 1)) == 2 + || INTVAL (XEXP (XEXP (XEXP (SET_SRC (set), 0), 0), 1)) == 3) + && REG_P (SET_DEST (set))); +} + /* Implement TARGET_SCHED_MACRO_FUSION_PAIR_P. Return true if PREV and CURR should be kept together during scheduling. */ static bool riscv_macro_fusion_pair_p (rtx_insn *prev, rtx_insn *curr) { + /* If fusion is not enabled, then there's nothing to do. */ + if (!riscv_macro_fusion_p ()) + return false; + + /* If PREV is already marked as fused, then we can't fuse CURR with PREV + and if we were to fuse them we'd end up with a blob of insns that + essentially are an atomic unit which is bad for scheduling. */ + if (SCHED_GROUP_P (prev)) + return false; + rtx prev_set = single_set (prev); rtx curr_set = single_set (curr); /* prev and curr are simple SET insns i.e. no flag setting or branching. */ bool simple_sets_p = prev_set && curr_set && !any_condjump_p (curr); + bool sched1 = can_create_pseudo_p (); - if (!riscv_macro_fusion_p ()) - return false; + unsigned int prev_dest_regno = (REG_P (SET_DEST (prev_set)) + ? REGNO (SET_DEST (prev_set)) + : FIRST_PSEUDO_REGISTER); + unsigned int curr_dest_regno = (REG_P (SET_DEST (curr_set)) + ? REGNO (SET_DEST (curr_set)) + : FIRST_PSEUDO_REGISTER); if (simple_sets_p && (riscv_fusion_enabled_p (RISCV_FUSE_ZEXTW) - || riscv_fusion_enabled_p (RISCV_FUSE_ZEXTWS))) + || riscv_fusion_enabled_p (RISCV_FUSE_ZEXTWS)) + && (sched1 || prev_dest_regno == curr_dest_regno)) { /* We are trying to match the following: prev (slli) == (set (reg:DI rD) @@ -9928,19 +10383,23 @@ riscv_macro_fusion_pair_p (rtx_insn *prev, rtx_insn *curr) && GET_CODE (SET_SRC (curr_set)) == LSHIFTRT && REG_P (SET_DEST (prev_set)) && REG_P (SET_DEST (curr_set)) - && REGNO (SET_DEST (prev_set)) == REGNO (SET_DEST (curr_set)) - && REGNO (XEXP (SET_SRC (curr_set), 0)) == REGNO(SET_DEST (curr_set)) + && REGNO (XEXP (SET_SRC (curr_set), 0)) == curr_dest_regno && CONST_INT_P (XEXP (SET_SRC (prev_set), 1)) && CONST_INT_P (XEXP (SET_SRC (curr_set), 1)) && INTVAL (XEXP (SET_SRC (prev_set), 1)) == 32 - && (( INTVAL (XEXP (SET_SRC (curr_set), 1)) == 32 - && riscv_fusion_enabled_p(RISCV_FUSE_ZEXTW) ) - || ( INTVAL (XEXP (SET_SRC (curr_set), 1)) < 32 - && riscv_fusion_enabled_p(RISCV_FUSE_ZEXTWS)))) - return true; + && ((INTVAL (XEXP (SET_SRC (curr_set), 1)) == 32 + && riscv_fusion_enabled_p (RISCV_FUSE_ZEXTW) ) + || (INTVAL (XEXP (SET_SRC (curr_set), 1)) < 32 + && riscv_fusion_enabled_p (RISCV_FUSE_ZEXTWS)))) + { + if (dump_file) + fprintf (dump_file, "RISCV_FUSE_ZEXTWS\n"); + return true; + } } - if (simple_sets_p && riscv_fusion_enabled_p (RISCV_FUSE_ZEXTH)) + if (simple_sets_p && riscv_fusion_enabled_p (RISCV_FUSE_ZEXTH) + && (sched1 || prev_dest_regno == curr_dest_regno)) { /* We are trying to match the following: prev (slli) == (set (reg:DI rD) @@ -9952,16 +10411,20 @@ riscv_macro_fusion_pair_p (rtx_insn *prev, rtx_insn *curr) && GET_CODE (SET_SRC (curr_set)) == LSHIFTRT && REG_P (SET_DEST (prev_set)) && REG_P (SET_DEST (curr_set)) - && REGNO (SET_DEST (prev_set)) == REGNO (SET_DEST (curr_set)) - && REGNO (XEXP (SET_SRC (curr_set), 0)) == REGNO(SET_DEST (curr_set)) + && REGNO (XEXP (SET_SRC (curr_set), 0)) == curr_dest_regno && CONST_INT_P (XEXP (SET_SRC (prev_set), 1)) && CONST_INT_P (XEXP (SET_SRC (curr_set), 1)) && INTVAL (XEXP (SET_SRC (prev_set), 1)) == 48 && INTVAL (XEXP (SET_SRC (curr_set), 1)) == 48) - return true; + { + if (dump_file) + fprintf (dump_file,"RISCV_FUSE_ZEXTH\n"); + return true; + } } - if (simple_sets_p && riscv_fusion_enabled_p (RISCV_FUSE_LDINDEXED)) + if (simple_sets_p && riscv_fusion_enabled_p (RISCV_FUSE_LDINDEXED) + && (sched1 || prev_dest_regno == curr_dest_regno)) { /* We are trying to match the following: prev (add) == (set (reg:DI rD) @@ -9970,12 +10433,17 @@ riscv_macro_fusion_pair_p (rtx_insn *prev, rtx_insn *curr) (mem:DI (reg:DI rD))) */ if (MEM_P (SET_SRC (curr_set)) + && SCALAR_INT_MODE_P (GET_MODE (SET_DEST (curr_set))) && REG_P (XEXP (SET_SRC (curr_set), 0)) - && REGNO (XEXP (SET_SRC (curr_set), 0)) == REGNO (SET_DEST (prev_set)) + && REGNO (XEXP (SET_SRC (curr_set), 0)) == prev_dest_regno && GET_CODE (SET_SRC (prev_set)) == PLUS && REG_P (XEXP (SET_SRC (prev_set), 0)) && REG_P (XEXP (SET_SRC (prev_set), 1))) - return true; + { + if (dump_file) + fprintf (dump_file, "RISCV_FUSE_LDINDEXED\n"); + return true; + } /* We are trying to match the following: prev (add) == (set (reg:DI rD) @@ -9985,15 +10453,154 @@ riscv_macro_fusion_pair_p (rtx_insn *prev, rtx_insn *curr) if ((GET_CODE (SET_SRC (curr_set)) == SIGN_EXTEND || (GET_CODE (SET_SRC (curr_set)) == ZERO_EXTEND)) && MEM_P (XEXP (SET_SRC (curr_set), 0)) + && SCALAR_INT_MODE_P (GET_MODE (SET_DEST (curr_set))) && REG_P (XEXP (XEXP (SET_SRC (curr_set), 0), 0)) - && REGNO (XEXP (XEXP (SET_SRC (curr_set), 0), 0)) == REGNO (SET_DEST (prev_set)) + && REGNO (XEXP (XEXP (SET_SRC (curr_set), 0), 0)) == prev_dest_regno && GET_CODE (SET_SRC (prev_set)) == PLUS && REG_P (XEXP (SET_SRC (prev_set), 0)) && REG_P (XEXP (SET_SRC (prev_set), 1))) - return true; + { + if (dump_file) + fprintf (dump_file, "RISCV_FUSE_LDINDEXED\n"); + return true; + } + } + + if (simple_sets_p && riscv_fusion_enabled_p (RISCV_FUSE_EXPANDED_LD) + && (sched1 || prev_dest_regno == curr_dest_regno)) + { + /* For the "expanded add/load fusion" family we have 2 main + categories: memory loads with displacement (i.e. with imm offset) + and loads without displacement (i.e. with offset = x0). + + For loads without displacement we'll need: + - add + ld (done in RISCV_FUSE_LDINDEXED) + - addi + ld (done in RISCV_FUSE_LDPREINCREMENT) + - shNadd + ld + - add.uw + lw + - shNadd.uw + lw + + For loads with displacement/immediates: + with lw with immediate): + - add + ld with displacement + - addi + ld with displacement + - shNadd + ld with displacement + - add.uw + lw with displacement + - shNadd.uw + lw with displacement */ + + /* We're trying to match a curr_set ld with displacement: + prev (add|addi) = (set (reg:DI rd) (...)) + curr (ld) == (set (reg:DI rD) + (mem:DI (plus:DI (reg:DI rD) (const_int IMM12)))) */ + if (MEM_P (SET_SRC (curr_set)) + && SCALAR_INT_MODE_P (GET_MODE (SET_DEST (curr_set))) + && GET_CODE (XEXP (SET_SRC (curr_set), 0)) == PLUS + && REGNO (XEXP (XEXP (SET_SRC (curr_set), 0), 0)) == prev_dest_regno) + { + if (riscv_set_is_add (prev_set)) + { + if (dump_file) + fprintf (dump_file, "RISCV_FUSE_EXPANDED_LD\n"); + return true; + } + + if (riscv_set_is_addi (prev_set)) + { + if (dump_file) + fprintf (dump_file, "RISCV_FUSE_EXPANDED_LD\n"); + return true; + } + + if (riscv_set_is_shNadd (prev_set)) + { + if (dump_file) + fprintf (dump_file, "RISCV_FUSE_EXPANDED_LD\n"); + return true; + } + } + + /* We're trying to match a ld without displacement: + prev (addi|shNadd) = (reg:DI rD) (...)) + curr (ld) == (set (reg:DI rD) + (mem:DI (reg:DI rD))) */ + if (MEM_P (SET_SRC (curr_set)) + && SCALAR_INT_MODE_P (GET_MODE (SET_DEST (curr_set))) + && REG_P (XEXP (SET_SRC (curr_set), 0)) + && REGNO (XEXP (SET_SRC (curr_set), 0)) == prev_dest_regno) + { + if (riscv_set_is_addi (prev_set)) + { + if (dump_file) + fprintf (dump_file, "RISCV_FUSE_EXPANDED_LD\n"); + return true; + } + + if (riscv_set_is_shNadd (prev_set)) + { + if (dump_file) + fprintf (dump_file, "RISCV_FUSE_EXPANDED_LD\n"); + return true; + } + } + + /* We're trying to match a curr_set lw with displacement: + prev (add.uw|shNadd.uw) = (set (reg:DI rd) (...)) + curr (lw) == (set (reg:DI rd) + (any_extend:DI (mem:SUBX (plus:DI ((reg:DI rd) + (const_int IMM)))) */ + if ((GET_CODE (SET_SRC (curr_set)) == SIGN_EXTEND + || (GET_CODE (SET_SRC (curr_set)) == ZERO_EXTEND)) + && MEM_P (XEXP (SET_SRC (curr_set), 0)) + && SCALAR_INT_MODE_P (GET_MODE (SET_DEST (curr_set))) + && GET_CODE (XEXP (XEXP (SET_SRC (curr_set), 0), 0)) == PLUS + && REG_P (XEXP (XEXP (XEXP (SET_SRC (curr_set), 0), 0),0)) + && (REGNO (XEXP (XEXP (XEXP (SET_SRC (curr_set), 0), 0),0)) + == prev_dest_regno)) + { + if (riscv_set_is_adduw (prev_set)) + { + if (dump_file) + fprintf (dump_file, "RISCV_FUSE_EXPANDED_LD\n"); + return true; + } + + if (riscv_set_is_shNadduw (prev_set)) + { + if (dump_file) + fprintf (dump_file, "RISCV_FUSE_EXPANDED_LD\n"); + return true; + } + } + + /* We're trying to match a curr_set lw without displacement: + prev (add.uw|shNadd.uw) = (set (reg:DI rd) (...)) + curr (ld|lh|lw) == (set (reg:DI rd) + (any_extend:DI (mem:SUBX (reg:DI rsd)))) */ + if ((GET_CODE (SET_SRC (curr_set)) == SIGN_EXTEND + || (GET_CODE (SET_SRC (curr_set)) == ZERO_EXTEND)) + && MEM_P (XEXP (SET_SRC (curr_set), 0)) + && SCALAR_INT_MODE_P (GET_MODE (SET_DEST (curr_set))) + && REG_P (XEXP (XEXP (SET_SRC (curr_set), 0), 0)) + && REGNO (XEXP (XEXP (SET_SRC (curr_set), 0), 0)) == prev_dest_regno) + { + if (riscv_set_is_adduw (prev_set)) + { + if (dump_file) + fprintf (dump_file, "RISCV_FUSE_EXPANDED_LD\n"); + return true; + } + + if (riscv_set_is_shNadduw (prev_set)) + { + if (dump_file) + fprintf (dump_file, "RISCV_FUSE_EXPANDED_LD\n"); + return true; + } + } } - if (simple_sets_p && riscv_fusion_enabled_p (RISCV_FUSE_LDPREINCREMENT)) + if (simple_sets_p && riscv_fusion_enabled_p (RISCV_FUSE_LDPREINCREMENT) + && (sched1 || prev_dest_regno == curr_dest_regno)) { /* We are trying to match the following: prev (add) == (set (reg:DI rS) @@ -10002,15 +10609,21 @@ riscv_macro_fusion_pair_p (rtx_insn *prev, rtx_insn *curr) (mem:DI (reg:DI rS))) */ if (MEM_P (SET_SRC (curr_set)) + && SCALAR_INT_MODE_P (GET_MODE (SET_DEST (curr_set))) && REG_P (XEXP (SET_SRC (curr_set), 0)) - && REGNO (XEXP (SET_SRC (curr_set), 0)) == REGNO (SET_DEST (prev_set)) + && REGNO (XEXP (SET_SRC (curr_set), 0)) == prev_dest_regno && GET_CODE (SET_SRC (prev_set)) == PLUS && REG_P (XEXP (SET_SRC (prev_set), 0)) && CONST_INT_P (XEXP (SET_SRC (prev_set), 1))) - return true; + { + if (dump_file) + fprintf (dump_file, "RISCV_FUSE_LDPREINCREMENT\n"); + return true; + } } - if (simple_sets_p && riscv_fusion_enabled_p (RISCV_FUSE_LUI_ADDI)) + if (simple_sets_p && riscv_fusion_enabled_p (RISCV_FUSE_LUI_ADDI) + && (sched1 || prev_dest_regno == curr_dest_regno)) { /* We are trying to match the following: prev (lui) == (set (reg:DI rD) (const_int UPPER_IMM_20)) @@ -10024,10 +10637,15 @@ riscv_macro_fusion_pair_p (rtx_insn *prev, rtx_insn *curr) && (GET_CODE (SET_SRC (prev_set)) == HIGH || (CONST_INT_P (SET_SRC (prev_set)) && LUI_OPERAND (INTVAL (SET_SRC (prev_set)))))) - return true; + { + if (dump_file) + fprintf (dump_file, "RISCV_FUSE_LUI_ADDI\n"); + return true; + } } - if (simple_sets_p && riscv_fusion_enabled_p (RISCV_FUSE_AUIPC_ADDI)) + if (simple_sets_p && riscv_fusion_enabled_p (RISCV_FUSE_AUIPC_ADDI) + && (sched1 || prev_dest_regno == curr_dest_regno)) { /* We are trying to match the following: prev (auipc) == (set (reg:DI rD) (unspec:DI [...] UNSPEC_AUIPC)) @@ -10045,38 +10663,64 @@ riscv_macro_fusion_pair_p (rtx_insn *prev, rtx_insn *curr) && CONST_INT_P (XEXP (SET_SRC (curr_set), 1)) && SMALL_OPERAND (INTVAL (XEXP (SET_SRC (curr_set), 1)))))) - return true; + { + if (dump_file) + fprintf (dump_file, "RISCV_FUSE_AUIPC_ADDI\n"); + return true; + } } - if (simple_sets_p && riscv_fusion_enabled_p (RISCV_FUSE_LUI_LD)) + if (simple_sets_p && riscv_fusion_enabled_p (RISCV_FUSE_LUI_LD) + && (sched1 || prev_dest_regno == curr_dest_regno)) { /* We are trying to match the following: prev (lui) == (set (reg:DI rD) (const_int UPPER_IMM_20)) curr (ld) == (set (reg:DI rD) (mem:DI (plus:DI (reg:DI rD) (const_int IMM12)))) */ + /* A LUI_OPERAND accepts (const_int 0), but we won't emit that as LUI. So + reject that case explicitly. */ if (CONST_INT_P (SET_SRC (prev_set)) + && SET_SRC (prev_set) != CONST0_RTX (GET_MODE (SET_DEST (prev_set))) && LUI_OPERAND (INTVAL (SET_SRC (prev_set))) && MEM_P (SET_SRC (curr_set)) - && GET_CODE (XEXP (SET_SRC (curr_set), 0)) == PLUS) - return true; + && SCALAR_INT_MODE_P (GET_MODE (SET_DEST (curr_set))) + && GET_CODE (XEXP (SET_SRC (curr_set), 0)) == PLUS + && REGNO (XEXP (XEXP (SET_SRC (curr_set), 0), 0)) == prev_dest_regno) + { + if (dump_file) + fprintf (dump_file, "RISCV_FUSE_LUI_LD\n"); + return true; + } if (GET_CODE (SET_SRC (prev_set)) == HIGH && MEM_P (SET_SRC (curr_set)) + && SCALAR_INT_MODE_P (GET_MODE (SET_DEST (curr_set))) && GET_CODE (XEXP (SET_SRC (curr_set), 0)) == LO_SUM - && REGNO (SET_DEST (prev_set)) == REGNO (XEXP (XEXP (SET_SRC (curr_set), 0), 0))) - return true; + && REGNO (XEXP (XEXP (SET_SRC (curr_set), 0), 0)) == prev_dest_regno) + { + if (dump_file) + fprintf (dump_file, "RISCV_FUSE_LUI_LD\n"); + return true; + } if (GET_CODE (SET_SRC (prev_set)) == HIGH && (GET_CODE (SET_SRC (curr_set)) == SIGN_EXTEND || GET_CODE (SET_SRC (curr_set)) == ZERO_EXTEND) && MEM_P (XEXP (SET_SRC (curr_set), 0)) + && SCALAR_INT_MODE_P (GET_MODE (SET_DEST (curr_set))) && (GET_CODE (XEXP (XEXP (SET_SRC (curr_set), 0), 0)) == LO_SUM - && REGNO (SET_DEST (prev_set)) == REGNO (XEXP (XEXP (XEXP (SET_SRC (curr_set), 0), 0), 0)))) - return true; + && (REGNO (XEXP (XEXP (XEXP (SET_SRC (curr_set), 0), 0), 0)) + == prev_dest_regno))) + { + if (dump_file) + fprintf (dump_file, "RISCV_FUSE_LUI_LD\n"); + return true; + } } - if (simple_sets_p && riscv_fusion_enabled_p (RISCV_FUSE_AUIPC_LD)) + if (simple_sets_p && riscv_fusion_enabled_p (RISCV_FUSE_AUIPC_LD) + && (sched1 || prev_dest_regno == curr_dest_regno)) { /* We are trying to match the following: prev (auipc) == (set (reg:DI rD) (unspec:DI [...] UNSPEC_AUIPC)) @@ -10086,11 +10730,16 @@ riscv_macro_fusion_pair_p (rtx_insn *prev, rtx_insn *curr) if (GET_CODE (SET_SRC (prev_set)) == UNSPEC && XINT (prev_set, 1) == UNSPEC_AUIPC && MEM_P (SET_SRC (curr_set)) + && SCALAR_INT_MODE_P (GET_MODE (SET_DEST (curr_set))) && GET_CODE (XEXP (SET_SRC (curr_set), 0)) == PLUS) - return true; + { + if (dump_file) + fprintf (dump_file, "RISCV_FUSE_AUIPC_LD\n"); + return true; + } } - if (simple_sets_p && riscv_fusion_enabled_p (RISCV_FUSE_ALIGNED_STD)) + if (simple_sets_p && riscv_fusion_enabled_p (RISCV_FUSE_CACHE_ALIGNED_STD)) { /* We are trying to match the following: prev (sd) == (set (mem (plus (reg sp|fp) (const_int))) @@ -10100,6 +10749,7 @@ riscv_macro_fusion_pair_p (rtx_insn *prev, rtx_insn *curr) if (MEM_P (SET_DEST (prev_set)) && MEM_P (SET_DEST (curr_set)) + && SCALAR_INT_MODE_P (GET_MODE (SET_DEST (curr_set))) /* We can probably relax this condition. The documentation is a bit unclear about sub-word cases. So we just model DImode for now. */ && GET_MODE (SET_DEST (curr_set)) == DImode @@ -10110,43 +10760,205 @@ riscv_macro_fusion_pair_p (rtx_insn *prev, rtx_insn *curr) extract_base_offset_in_addr (SET_DEST (prev_set), &base_prev, &offset_prev); extract_base_offset_in_addr (SET_DEST (curr_set), &base_curr, &offset_curr); - /* Fail if we did not find both bases. */ - if (base_prev == NULL_RTX || base_curr == NULL_RTX) - return false; + /* Proceed only if we find both bases, both bases are register and + bases are the same register. */ + if (base_prev != NULL_RTX && base_curr != NULL_RTX + && REG_P (base_prev) && REG_P (base_curr) + && REGNO (base_prev) != REGNO (base_curr) + /* The alignment of hte base pointer is more useful than the + alignment of the memory reference for determining if we're + on opposite sides of a cache line. */ + && REGNO_POINTER_ALIGN (ORIGINAL_REGNO (base_prev)) >= 128) + { + /* The two stores must be contained within opposite halves of the + same 16 byte aligned block of memory. We know the pointer + has suitable alignment, so we just need to check the offsets + of the two stores for suitable alignment. */ + + /* Get the smaller offset into OFFSET_PREV. */ + if (INTVAL (offset_prev) > INTVAL (offset_curr)) + std::swap (offset_prev, offset_curr); + + /* We have a match if the smaller offset (OFFSET_PREV) is 16 + byte aligned and the higher offset is 8 bytes more than the + lower offset. */ + if ((INTVAL (offset_prev) % 16) == 0 + && (INTVAL (offset_prev) + 8 == INTVAL (offset_curr))) + { + if (dump_file) + fprintf (dump_file, "RISCV_FUSE_ALIGNED_STD\n"); + return true; + } + } + } + } - /* Fail if either base is not a register. */ - if (!REG_P (base_prev) || !REG_P (base_curr)) - return false; + /* More general form of the RISCV_FUSE_CACHE_ALIGNED_STD. The + major difference is the dependency on the stores being opposite + halves of a cache line is dropped. Instead the lowest address + needs 2X the alignment of the object and the higher address + immediately followed the first object. */ + if (simple_sets_p && riscv_fusion_enabled_p (RISCV_FUSE_ALIGNED_STD)) + { + /* We are trying to match the following: + prev (sd) == (set (mem (plus (reg rS1) (const_int))) + (reg rS2)) + curr (sd) == (set (mem (plus (reg rS1) (const_int))) + (reg rS3)) */ - /* Fail if the bases are not the same register. */ - if (REGNO (base_prev) != REGNO (base_curr)) - return false; + if (MEM_P (SET_DEST (prev_set)) + && SCALAR_INT_MODE_P (GET_MODE (SET_DEST (curr_set))) + && MEM_P (SET_DEST (curr_set)) + /* Stores must have the same width */ + && GET_MODE (SET_DEST (curr_set)) == GET_MODE (SET_DEST (prev_set))) + { + rtx base_prev, base_curr, offset_prev, offset_curr; + unsigned mode_size; + + extract_base_offset_in_addr (SET_DEST (prev_set), + &base_prev, &offset_prev); + extract_base_offset_in_addr (SET_DEST (curr_set), + &base_curr, &offset_curr); + + /* Proceed only if we find both bases, both bases + are registers and bases are the same register. */ + if (base_prev != NULL_RTX && base_curr != NULL_RTX + && REG_P (base_prev) && REG_P (base_curr) + && REGNO (base_prev) == REGNO (base_curr)) + { + machine_mode mode = GET_MODE (SET_DEST (curr_set)); + mode_size = estimated_poly_value (GET_MODE_SIZE (mode)); + + HOST_WIDE_INT offset_prev_int = INTVAL (offset_prev); + HOST_WIDE_INT offset_curr_int = INTVAL (offset_curr); + + /* Get the smaller offset into OFFSET_PREV_INT. */ + if (offset_prev_int > offset_curr_int) + std::swap (offset_prev_int, offset_curr_int); + + /* We've normalized, so we need to check that the lower + address is aligned to 2X the size of the object. The + higher address must be the lower address plus the + size of the object. */ + if (((offset_prev_int % (2 * mode_size)) == 0) + && offset_prev_int + mode_size == offset_curr_int) + { + if (dump_file) + fprintf (dump_file, "RISCV_FUSE_ALIGNED_STD\n"); + return true; + } + } + } + } - /* Originally the thought was to check MEM_ALIGN, but that was - reporting incorrect alignments, even for SP/FP accesses, so we - gave up on that approach. Instead just check for stack/hfp - which we know are aligned. */ - if (REGNO (base_prev) != STACK_POINTER_REGNUM - && REGNO (base_prev) != HARD_FRAME_POINTER_REGNUM) - return false; + if (simple_sets_p && riscv_fusion_enabled_p (RISCV_FUSE_BFEXT) + && (sched1 || prev_dest_regno == curr_dest_regno)) + { + /* We are trying to match the following: + prev (slli) == (set (reg:DI rD) + (ashift:DI (reg:DI rS) (const_int))) + curr (srli) == (set (reg:DI rD) + (lshiftrt:DI (reg:DI rD) (const_int))) */ - /* The two stores must be contained within opposite halves of the - same 16 byte aligned block of memory. We know that the stack - pointer and the frame pointer have suitable alignment. So we - just need to check the offsets of the two stores for suitable - alignment. */ - /* Get the smaller offset into OFFSET_PREV. */ - if (INTVAL (offset_prev) > INTVAL (offset_curr)) - std::swap (offset_prev, offset_curr); - - /* If the smaller offset (OFFSET_PREV) is not 16 byte aligned, - then fail. */ - if ((INTVAL (offset_prev) % 16) != 0) - return false; + if (GET_CODE (SET_SRC (prev_set)) == ASHIFT + && (GET_CODE (SET_SRC (curr_set)) == LSHIFTRT + || GET_CODE (SET_SRC (curr_set)) == ASHIFTRT) + && REG_P (SET_DEST (prev_set)) + && REG_P (SET_DEST (curr_set)) + && REGNO (XEXP (SET_SRC (curr_set), 0)) == prev_dest_regno + && CONST_INT_P (XEXP (SET_SRC (prev_set), 1)) + && CONST_INT_P (XEXP (SET_SRC (curr_set), 1))) + { + if (dump_file) + fprintf (dump_file, "RISCV_FUSE_BFEXT\n"); + return true; + } + } + + if (simple_sets_p && riscv_fusion_enabled_p (RISCV_FUSE_B_ALUI) + && (sched1 || prev_dest_regno == curr_dest_regno)) + { + /* We are trying to match the following: + prev (orc.b) == (set (reg rD) + (unspec (reg rS1))) + curr (not) == (set (reg rD2) (not (reg rD))) */ - /* The higher offset must be 8 bytes more than the lower - offset. */ - return (INTVAL (offset_prev) + 8 == INTVAL (offset_curr)); + if (GET_CODE (SET_SRC (prev_set)) == UNSPEC + && GET_CODE (SET_SRC (curr_set)) == NOT + && XINT (SET_SRC (prev_set), 1) == UNSPEC_ORC_B + && REG_P (SET_DEST (prev_set)) + && REG_P (SET_DEST (curr_set)) + && REG_P (XEXP (SET_SRC (curr_set), 0)) + && REGNO (XEXP (SET_SRC (curr_set), 0)) == prev_dest_regno) + { + if (dump_file) + fprintf (dump_file, "RISCV_FUSE_B_ALUI\n"); + return true; + } + + /* We are trying to match the following: + prev (ctz) == (set (reg rD) (ctz (reg rS1))) + curr (andi) == (set (reg rD) + (and (reg rD) (const_int 63))) */ + + if (GET_CODE (SET_SRC (prev_set)) == CTZ + && GET_CODE (SET_SRC (curr_set)) == AND + && CONST_INT_P (XEXP (SET_SRC (curr_set), 1)) + && INTVAL (XEXP (SET_SRC (curr_set), 1)) == 63 + && REG_P (SET_DEST (prev_set)) + && REG_P (SET_DEST (curr_set)) + && REG_P (XEXP (SET_SRC (curr_set), 0)) + && REGNO (XEXP (SET_SRC (curr_set), 0)) == prev_dest_regno) + { + if (dump_file) + fprintf (dump_file, "RISCV_FUSE_B_ALUI\n"); + return true; + } + + /* We are trying to match the following: + prev (sub) == (set (reg rD) + (minus (const_int 0) (reg rS2)) + curr (max) == (set (reg rD) + (smax (reg rD) (reg rS2))) */ + + if (GET_CODE (SET_SRC (prev_set)) == MINUS + && (XEXP (SET_SRC (prev_set), 0) + == CONST0_RTX (GET_MODE (SET_SRC (prev_set)))) + && CONST_INT_P (XEXP (SET_SRC (prev_set), 0)) + && GET_CODE (SET_SRC (curr_set)) == SMAX + && REG_P (SET_DEST (prev_set)) + && REG_P (SET_DEST (curr_set)) + && REG_P (XEXP (SET_SRC (curr_set), 0)) + && REGNO (XEXP (SET_SRC (curr_set), 0)) == prev_dest_regno + && REG_P (XEXP (SET_SRC (prev_set), 1)) + && REG_P (XEXP (SET_SRC (curr_set), 1)) + && (REGNO (XEXP (SET_SRC (prev_set), 1)) + == REGNO (XEXP (SET_SRC (curr_set), 1)))) + { + if (dump_file) + fprintf (dump_file, "RISCV_FUSE_B_ALUI\n"); + return true; + } + + /* We are trying to match the following: + prev (neg) == (set (reg rD) (neg (reg rS1))) + curr (max) == (set (reg rD) + (smax (reg rD) (reg rS1))) */ + + if (GET_CODE (SET_SRC (prev_set)) == NEG + && GET_CODE (SET_SRC (curr_set)) == SMAX + && REG_P (SET_DEST (prev_set)) + && REG_P (SET_DEST (curr_set)) + && REG_P (XEXP (SET_SRC (curr_set), 0)) + && REGNO (XEXP (SET_SRC (curr_set), 0)) == prev_dest_regno + && REG_P (XEXP (SET_SRC (prev_set), 0)) + && REG_P (XEXP (SET_SRC (curr_set), 1)) + && (REGNO (XEXP (SET_SRC (prev_set), 0)) + == REGNO (XEXP (SET_SRC (curr_set), 1)))) + { + if (dump_file) + fprintf (dump_file, "RISCV_FUSE_B_ALUI\n"); + return true; } } @@ -10222,6 +11034,27 @@ riscv_sched_adjust_cost (rtx_insn *, int, rtx_insn *insn, int cost, return new_cost; } +/* Implement TARGET_SCHED_CAN_SPECULATE_INSN hook. Return true if insn can + can be scheduled for speculative execution. Reject vsetvl instructions to + prevent the scheduler from hoisting them out of basic blocks without + checking for data dependencies PR117974. */ +static bool +riscv_sched_can_speculate_insn (rtx_insn *insn) +{ + /* Gate speculative scheduling of vsetvl instructions behind tune param. */ + if (tune_param->speculative_sched_vsetvl) + return true; + + switch (get_attr_type (insn)) + { + case TYPE_VSETVL: + case TYPE_VSETVL_PRE: + return false; + default: + return true; + } +} + /* Auxiliary function to emit RISC-V ELF attribute. */ static void riscv_emit_attribute () @@ -10382,7 +11215,7 @@ riscv_file_end () fprintf (asm_out_file, "1:\n"); /* pr_type. */ - fprintf (asm_out_file, "\t.p2align\t3\n"); + fprintf (asm_out_file, "\t.p2align\t%u\n", p2align); fprintf (asm_out_file, "2:\n"); fprintf (asm_out_file, "\t.long\t0xc0000000\n"); /* pr_datasz. */ @@ -11528,11 +12361,10 @@ riscv_gpr_save_operation_p (rtx op) /* Two CLOBBER and USEs, must check the order. */ unsigned expect_code = i < 3 ? CLOBBER : USE; if (GET_CODE (elt) != expect_code - || !REG_P (XEXP (elt, 1)) - || (REGNO (XEXP (elt, 1)) != gpr_save_reg_order[i])) + || !REG_P (XEXP (elt, 0)) + || (REGNO (XEXP (elt, 0)) != gpr_save_reg_order[i])) return false; } - break; } return true; } @@ -11986,6 +12818,14 @@ riscv_lshift_subword (machine_mode mode ATTRIBUTE_UNUSED, rtx value, rtx shift, gen_lowpart (QImode, shift))); } +/* Return TRUE if we should use the zero stride load, FALSE otherwise. */ + +bool +strided_load_broadcast_p () +{ + return tune_param->use_zero_stride_load; +} + /* Return TRUE if we should use the divmod expander, FALSE otherwise. This allows the behavior to be tuned for specific implementations as well as when optimizing for size. */ @@ -12047,27 +12887,30 @@ riscv_emit_frm_mode_set (int mode, int prev_mode) if (prev_mode == riscv_vector::FRM_DYN_CALL) emit_insn (gen_frrmsi (backup_reg)); /* Backup frm when DYN_CALL. */ - if (mode != prev_mode) - { - rtx frm = gen_int_mode (mode, SImode); + if (mode == prev_mode) + return; - if (mode == riscv_vector::FRM_DYN_CALL - && prev_mode != riscv_vector::FRM_DYN && STATIC_FRM_P (cfun)) - /* No need to emit when prev mode is DYN already. */ - emit_insn (gen_fsrmsi_restore_volatile (backup_reg)); - else if (mode == riscv_vector::FRM_DYN_EXIT && STATIC_FRM_P (cfun) - && prev_mode != riscv_vector::FRM_DYN - && prev_mode != riscv_vector::FRM_DYN_CALL) - /* No need to emit when prev mode is DYN or DYN_CALL already. */ - emit_insn (gen_fsrmsi_restore_volatile (backup_reg)); - else if (mode == riscv_vector::FRM_DYN - && prev_mode != riscv_vector::FRM_DYN_CALL) - /* Restore frm value from backup when switch to DYN mode. */ - emit_insn (gen_fsrmsi_restore (backup_reg)); - else if (riscv_static_frm_mode_p (mode)) - /* Set frm value when switch to static mode. */ - emit_insn (gen_fsrmsi_restore (frm)); + if (riscv_static_frm_mode_p (mode)) + { + /* Set frm value when switch to static mode. */ + emit_insn (gen_fsrmsi_restore (gen_int_mode (mode, SImode))); + return; } + + bool restore_p + = /* No need to emit when prev mode is DYN. */ + (STATIC_FRM_P (cfun) && mode == riscv_vector::FRM_DYN_CALL + && prev_mode != riscv_vector::FRM_DYN) + /* No need to emit if prev mode is DYN or DYN_CALL. */ + || (STATIC_FRM_P (cfun) && mode == riscv_vector::FRM_DYN_EXIT + && prev_mode != riscv_vector::FRM_DYN + && prev_mode != riscv_vector::FRM_DYN_CALL) + /* Restore frm value when switch to DYN mode. */ + || (STATIC_FRM_P (cfun) && mode == riscv_vector::FRM_DYN + && prev_mode != riscv_vector::FRM_DYN_CALL); + + if (restore_p) + emit_insn (gen_fsrmsi_restore (backup_reg)); } /* Implement Mode switching. */ @@ -12090,59 +12933,6 @@ riscv_emit_mode_set (int entity, int mode, int prev_mode, } } -/* Adjust the FRM_NONE insn after a call to FRM_DYN for the - underlying emit. */ - -static int -riscv_frm_adjust_mode_after_call (rtx_insn *cur_insn, int mode) -{ - rtx_insn *insn = prev_nonnote_nondebug_insn_bb (cur_insn); - - if (insn && CALL_P (insn)) - return riscv_vector::FRM_DYN; - - return mode; -} - -/* Insert the backup frm insn to the end of the bb if and only if the call - is the last insn of this bb. */ - -static void -riscv_frm_emit_after_bb_end (rtx_insn *cur_insn) -{ - edge eg; - bool abnormal_edge_p = false; - edge_iterator eg_iterator; - basic_block bb = BLOCK_FOR_INSN (cur_insn); - - FOR_EACH_EDGE (eg, eg_iterator, bb->succs) - { - if (eg->flags & EDGE_ABNORMAL) - abnormal_edge_p = true; - else - { - start_sequence (); - emit_insn (gen_frrmsi (DYNAMIC_FRM_RTL (cfun))); - rtx_insn *backup_insn = get_insns (); - end_sequence (); - - insert_insn_on_edge (backup_insn, eg); - } - } - - if (abnormal_edge_p) - { - start_sequence (); - emit_insn (gen_frrmsi (DYNAMIC_FRM_RTL (cfun))); - rtx_insn *backup_insn = get_insns (); - end_sequence (); - - insert_insn_end_basic_block (backup_insn, bb); - } - - commit_edge_insertions (); -} - /* Return mode that frm must be switched into prior to the execution of insn. */ @@ -12154,33 +12944,25 @@ riscv_frm_mode_needed (rtx_insn *cur_insn, int code) /* The dynamic frm will be initialized only onece during cfun. */ DYNAMIC_FRM_RTL (cfun) = gen_reg_rtx (SImode); emit_insn_at_entry (gen_frrmsi (DYNAMIC_FRM_RTL (cfun))); + CFUN_IN_CALL (cfun) = false; } if (CALL_P (cur_insn)) { - rtx_insn *insn = next_nonnote_nondebug_insn_bb (cur_insn); - - if (!insn) - riscv_frm_emit_after_bb_end (cur_insn); - + CFUN_IN_CALL (cfun) = true; return riscv_vector::FRM_DYN_CALL; } int mode = code >= 0 ? get_attr_frm_mode (cur_insn) : riscv_vector::FRM_NONE; if (mode == riscv_vector::FRM_NONE) - /* After meet a call, we need to backup the frm because it may be - updated during the call. Here, for each insn, we will check if - the previous insn is a call or not. When previous insn is call, - there will be 2 cases for the emit mode set. - - 1. Current insn is not MODE_NONE, then the mode switch framework - will do the mode switch from MODE_CALL to MODE_NONE natively. - 2. Current insn is MODE_NONE, we need to adjust the MODE_NONE to - the MODE_DYN, and leave the mode switch itself to perform - the emit mode set. - */ - mode = riscv_frm_adjust_mode_after_call (cur_insn, mode); + { + if (CFUN_IN_CALL (cfun)) + { + CFUN_IN_CALL (cfun) = false; + return riscv_vector::FRM_DYN; + } + } return mode; } @@ -12207,7 +12989,7 @@ singleton_vxrm_need (void) /* Walk the IL noting if VXRM is needed and if there's more than one mode needed. */ bool found = false; - int saved_vxrm_mode; + int saved_vxrm_mode = VXRM_MODE_NONE; for (rtx_insn *insn = get_insns (); insn; insn = NEXT_INSN (insn)) { if (!INSN_P (insn) || DEBUG_INSN_P (insn)) @@ -12480,6 +13262,36 @@ get_vector_costs () return costs; } +/* Return the cost of operation that move from gpr to vr. + It will take the value of --param=gpr2vr_cost if it is provided. + Or the default regmove->GR2VR will be returned. */ + +int +get_gr2vr_cost () +{ + int cost = get_vector_costs ()->regmove->GR2VR; + + if (gpr2vr_cost != GPR2VR_COST_UNPROVIDED) + cost = gpr2vr_cost; + + return cost; +} + +/* Return the cost of moving data from floating-point to vector register. + It will take the value of --param=fpr2vr-cost if it is provided. + Otherwise the default regmove->FR2VR will be returned. */ + +int +get_fr2vr_cost () +{ + int cost = get_vector_costs ()->regmove->FR2VR; + + if (fpr2vr_cost != FPR2VR_COST_UNPROVIDED) + cost = fpr2vr_cost; + + return cost; +} + /* Implement targetm.vectorize.builtin_vectorization_cost. */ static int @@ -12545,8 +13357,7 @@ riscv_builtin_vectorization_cost (enum vect_cost_for_stmt type_of_cost, case vec_construct: { /* TODO: This is too pessimistic in case we can splat. */ - int regmove_cost = fp ? costs->regmove->FR2VR - : costs->regmove->GR2VR; + int regmove_cost = fp ? get_fr2vr_cost () : get_gr2vr_cost (); return (regmove_cost + common_costs->scalar_to_vec_cost) * estimated_poly_value (TYPE_VECTOR_SUBPARTS (vectype)); } @@ -12979,6 +13790,88 @@ riscv_expand_sssub (rtx dest, rtx x, rtx y) emit_move_insn (dest, gen_lowpart (mode, xmode_dest)); } +/* Implement the Xmode usmul. + + b = SAT_MUL (a, b); + => + _1 = a * b; + _2 = mulhu (a, b); + _overflow_p = _2 == 0; + _mask = - _overflow_p; + b = _1 | _mask; + */ + +static void +riscv_expand_xmode_usmul (rtx dest, rtx x, rtx y) +{ + machine_mode mode = GET_MODE (dest); + + gcc_assert (mode == Xmode); + + rtx mul = gen_reg_rtx (Xmode); + rtx mulhu = gen_reg_rtx (Xmode); + rtx overflow_p = gen_reg_rtx (Xmode); + + riscv_emit_binary (MULT, mul, x, y); + + if (TARGET_64BIT) + emit_insn (gen_usmuldi3_highpart (mulhu, x, y)); + else + emit_insn (gen_usmulsi3_highpart (mulhu, x, y)); + + riscv_emit_binary (NE, overflow_p, mulhu, CONST0_RTX (Xmode)); + riscv_emit_unary (NEG, overflow_p, overflow_p); + riscv_emit_binary (IOR, dest, mul, overflow_p); +} + +/* Implement the non-Xmode usmul. + + b = SAT_MUL (a, b); + => + _1 = a * b; + _max = (T)-1 + _overflow_p = _1 > _max; + _mask = - _overflow_p; + b = _1 | _mask; + */ + +static void +riscv_expand_non_xmode_usmul (rtx dest, rtx x, rtx y) +{ + machine_mode mode = GET_MODE (dest); + unsigned bitsize = GET_MODE_BITSIZE (mode).to_constant (); + + gcc_assert (mode != Xmode); + + rtx xmode_x = riscv_extend_to_xmode_reg (x, mode, ZERO_EXTEND); + rtx xmode_y = riscv_extend_to_xmode_reg (y, mode, ZERO_EXTEND); + rtx xmode_mul = gen_reg_rtx (Xmode); + rtx mul_max = gen_reg_rtx (Xmode); + rtx overflow_p = gen_reg_rtx (Xmode); + + uint64_t max = ((uint64_t)1 << bitsize) - 1; + + emit_move_insn (mul_max, GEN_INT (max)); + riscv_emit_binary (MULT, xmode_mul, xmode_x, xmode_y); + + riscv_emit_binary (LTU, overflow_p, mul_max, xmode_mul); + riscv_emit_unary (NEG, overflow_p, overflow_p); + riscv_emit_binary (IOR, xmode_mul, xmode_mul, overflow_p); + + emit_move_insn (dest, gen_lowpart (mode, xmode_mul)); +} + +/* Implements the unsigned saturation mult standard name usmul for int mode. */ + +void +riscv_expand_usmul (rtx dest, rtx x, rtx y) +{ + if (GET_MODE (dest) == Xmode) + return riscv_expand_xmode_usmul (dest, x, y) ; + else + return riscv_expand_non_xmode_usmul (dest, x, y); +} + /* Implement the unsigned saturation truncation for int mode. b = SAT_TRUNC (a); @@ -13136,9 +14029,6 @@ parse_features_for_version (tree decl, DECL_SOURCE_LOCATION (decl)); gcc_assert (parse_res); - if (arch_string != default_opts->x_riscv_arch_string) - free (CONST_CAST (void *, (const void *) arch_string)); - cl_target_option_restore (&global_options, &global_options_set, &cur_target); } @@ -13735,7 +14625,6 @@ riscv_get_function_versions_dispatcher (void *decl) struct cgraph_node *node = NULL; struct cgraph_node *default_node = NULL; struct cgraph_function_version_info *node_v = NULL; - struct cgraph_function_version_info *first_v = NULL; tree dispatch_decl = NULL; @@ -13752,41 +14641,16 @@ riscv_get_function_versions_dispatcher (void *decl) if (node_v->dispatcher_resolver != NULL) return node_v->dispatcher_resolver; - /* Find the default version and make it the first node. */ - first_v = node_v; - /* Go to the beginning of the chain. */ - while (first_v->prev != NULL) - first_v = first_v->prev; - default_version_info = first_v; - - while (default_version_info != NULL) - { - struct riscv_feature_bits res; - int priority; /* Unused. */ - parse_features_for_version (default_version_info->this_node->decl, - res, priority); - if (res.length == 0) - break; - default_version_info = default_version_info->next; - } + /* The default node is always the beginning of the chain. */ + default_version_info = node_v; + while (default_version_info->prev) + default_version_info = default_version_info->prev; + default_node = default_version_info->this_node; /* If there is no default node, just return NULL. */ - if (default_version_info == NULL) + if (!is_function_default_version (default_node->decl)) return NULL; - /* Make default info the first node. */ - if (first_v != default_version_info) - { - default_version_info->prev->next = default_version_info->next; - if (default_version_info->next) - default_version_info->next->prev = default_version_info->prev; - first_v->prev = default_version_info; - default_version_info->next = first_v; - default_version_info->prev = NULL; - } - - default_node = default_version_info->this_node; - if (targetm.has_ifunc_p ()) { struct cgraph_function_version_info *it_v = NULL; @@ -13930,17 +14794,53 @@ expand_crc_using_clmul (scalar_mode crc_mode, scalar_mode data_mode, rtx data = gen_rtx_ZERO_EXTEND (word_mode, operands[2]); riscv_expand_op (XOR, word_mode, a0, crc, data); - if (TARGET_64BIT) - emit_insn (gen_riscv_clmul_di (a0, a0, t0)); - else - emit_insn (gen_riscv_clmul_si (a0, a0, t0)); + if (TARGET_ZBKC || TARGET_ZBC) + { + if (TARGET_64BIT) + emit_insn (gen_riscv_clmul_di (a0, a0, t0)); + else + emit_insn (gen_riscv_clmul_si (a0, a0, t0)); - riscv_expand_op (LSHIFTRT, word_mode, a0, a0, - gen_int_mode (crc_size, word_mode)); - if (TARGET_64BIT) - emit_insn (gen_riscv_clmul_di (a0, a0, t1)); + riscv_expand_op (LSHIFTRT, word_mode, a0, a0, + gen_int_mode (crc_size, word_mode)); + if (TARGET_64BIT) + emit_insn (gen_riscv_clmul_di (a0, a0, t1)); + else + emit_insn (gen_riscv_clmul_si (a0, a0, t1)); + } else - emit_insn (gen_riscv_clmul_si (a0, a0, t1)); + { + machine_mode vmode; + if (!riscv_vector::get_vector_mode (DImode, 1).exists (&vmode)) + gcc_unreachable (); + + rtx vec = gen_reg_rtx (vmode); + + insn_code icode1 = code_for_pred_broadcast (vmode); + rtx ops1[] = {vec, a0}; + emit_nonvlmax_insn (icode1, UNARY_OP, ops1, CONST1_RTX (Pmode)); + + rtx rvv1di_reg = gen_rtx_SUBREG (RVVM1DImode, vec, 0); + insn_code icode2 = code_for_pred_vclmul_scalar (UNSPEC_VCLMUL, + E_RVVM1DImode); + rtx ops2[] = {rvv1di_reg, rvv1di_reg, t0}; + emit_nonvlmax_insn (icode2, riscv_vector::BINARY_OP, ops2, CONST1_RTX + (Pmode)); + + rtx shift_amount = gen_int_mode (data_size, Pmode); + insn_code icode3 = code_for_pred_scalar (LSHIFTRT, vmode); + rtx ops3[] = {vec, vec, shift_amount}; + emit_nonvlmax_insn (icode3, BINARY_OP, ops3, CONST1_RTX (Pmode)); + + insn_code icode4 = code_for_pred_vclmul_scalar (UNSPEC_VCLMULH, + E_RVVM1DImode); + rtx ops4[] = {rvv1di_reg, rvv1di_reg, t1}; + emit_nonvlmax_insn (icode4, riscv_vector::BINARY_OP, ops4, CONST1_RTX + (Pmode)); + + rtx vec_low_lane = gen_lowpart (DImode, vec); + riscv_emit_move (a0, vec_low_lane); + } if (crc_size > data_size) { @@ -13989,19 +14889,53 @@ expand_reversed_crc_using_clmul (scalar_mode crc_mode, scalar_mode data_mode, rtx a0 = gen_reg_rtx (word_mode); riscv_expand_op (XOR, word_mode, a0, crc, data); - if (TARGET_64BIT) - emit_insn (gen_riscv_clmul_di (a0, a0, t0)); - else - emit_insn (gen_riscv_clmul_si (a0, a0, t0)); + if (TARGET_ZBKC || TARGET_ZBC) + { + if (TARGET_64BIT) + emit_insn (gen_riscv_clmul_di (a0, a0, t0)); + else + emit_insn (gen_riscv_clmul_si (a0, a0, t0)); - rtx num_shift = gen_int_mode (GET_MODE_BITSIZE (word_mode) - data_size, - word_mode); - riscv_expand_op (ASHIFT, word_mode, a0, a0, num_shift); + rtx num_shift = gen_int_mode (BITS_PER_WORD - data_size, word_mode); + riscv_expand_op (ASHIFT, word_mode, a0, a0, num_shift); - if (TARGET_64BIT) - emit_insn (gen_riscv_clmulh_di (a0, a0, t1)); + if (TARGET_64BIT) + emit_insn (gen_riscv_clmulh_di (a0, a0, t1)); + else + emit_insn (gen_riscv_clmulh_si (a0, a0, t1)); + } else - emit_insn (gen_riscv_clmulh_si (a0, a0, t1)); + { + machine_mode vmode; + if (!riscv_vector::get_vector_mode (DImode, 1).exists (&vmode)) + gcc_unreachable (); + + rtx vec = gen_reg_rtx (vmode); + insn_code icode1 = code_for_pred_broadcast (vmode); + rtx ops1[] = {vec, a0}; + emit_nonvlmax_insn (icode1, UNARY_OP, ops1, CONST1_RTX (Pmode)); + + rtx rvv1di_reg = gen_rtx_SUBREG (RVVM1DImode, vec, 0); + insn_code icode2 = code_for_pred_vclmul_scalar (UNSPEC_VCLMUL, + E_RVVM1DImode); + rtx ops2[] = {rvv1di_reg, rvv1di_reg, t0}; + emit_nonvlmax_insn (icode2, riscv_vector::BINARY_OP, ops2, CONST1_RTX + (Pmode)); + + rtx shift_amount = gen_int_mode (BITS_PER_WORD - data_size, Pmode); + insn_code icode3 = code_for_pred_scalar (ASHIFT, vmode); + rtx ops3[] = {vec, vec, shift_amount}; + emit_nonvlmax_insn (icode3, BINARY_OP, ops3, CONST1_RTX (Pmode)); + + insn_code icode4 = code_for_pred_vclmul_scalar (UNSPEC_VCLMULH, + E_RVVM1DImode); + rtx ops4[] = {rvv1di_reg, rvv1di_reg, t1}; + emit_nonvlmax_insn (icode4, riscv_vector::BINARY_OP, ops4, CONST1_RTX + (Pmode)); + + rtx vec_low_lane = gen_lowpart (DImode, vec); + riscv_emit_move (a0, vec_low_lane); + } if (crc_size > data_size) { @@ -14035,6 +14969,427 @@ bool need_shadow_stack_push_pop_p () return is_zicfiss_p () && riscv_save_return_addr_reg_p (); } +/* Synthesize OPERANDS[0] = OPERANDS[1] CODE OPERANDS[2]. + + OPERANDS[0] and OPERANDS[1] will be a REG and may be the same + REG. + + OPERANDS[2] is a CONST_INT. + + CODE is IOR or XOR. + + Return TRUE if the operation was fully synthesized and the caller + need not generate additional code. Return FALSE if the operation + was not synthesized and the caller is responsible for emitting the + proper sequence. */ + +bool +synthesize_ior_xor (rtx_code code, rtx operands[3]) +{ + /* Trivial cases that don't need synthesis. */ + if (SMALL_OPERAND (INTVAL (operands[2])) + || ((TARGET_ZBS || TARGET_ZBKB) + && single_bit_mask_operand (operands[2], word_mode))) + return false; + + /* The number of instructions to synthesize the constant is a good + estimate of the budget. That does not account for out of order + execution an fusion in the constant synthesis those would naturally + decrease the budget. It also does not account for the IOR/XOR at + the end of the sequence which would increase the budget. */ + int budget = (TARGET_ZBS ? riscv_const_insns (operands[2], true) : -1); + int original_budget = budget; + + /* Bits we need to set in operands[0]. As we synthesize the operation, + we clear bits in IVAL. Once IVAL is zero, then synthesis of the + operation is complete. */ + unsigned HOST_WIDE_INT ival = INTVAL (operands[2]); + + /* Check if we want to use [x]ori. Then get the remaining bits + and decrease the budget by one. */ + if ((ival & HOST_WIDE_INT_UC (0x7ff)) != 0) + { + ival &= ~HOST_WIDE_INT_UC (0x7ff); + budget--; + } + + /* Check for bseti cases. For each remaining bit in ival, + decrease the budget by one. */ + while (ival) + { + HOST_WIDE_INT tmpval = HOST_WIDE_INT_UC (1) << ctz_hwi (ival); + ival &= ~tmpval; + budget--; + } + + /* If we're flipping all but a small number of bits we can pre-flip + the outliers, then flip all the bits, which would restore those + bits that were pre-flipped. */ + if ((TARGET_ZBS || TARGET_ZBKB) + && budget < 0 + && code == XOR + && popcount_hwi (~INTVAL (operands[2])) < original_budget) + { + /* Pre-flipping bits we want to preserve. */ + rtx input = operands[1]; + rtx output = NULL_RTX; + ival = ~INTVAL (operands[2]); + while (ival) + { + HOST_WIDE_INT tmpval = HOST_WIDE_INT_UC (1) << ctz_hwi (ival); + rtx x = GEN_INT (tmpval); + x = gen_rtx_XOR (word_mode, input, x); + output = gen_reg_rtx (word_mode); + emit_insn (gen_rtx_SET (output, x)); + input = output; + ival &= ~tmpval; + } + + gcc_assert (output); + + /* Now flip all the bits, which restores the bits we were + preserving. */ + rtx x = gen_rtx_NOT (word_mode, input); + emit_insn (gen_rtx_SET (operands[0], x)); + return true; + } + + /* One more approach we can try. If our budget is 3+ instructions, + then we can try to rotate the source so that the bits we want to + set are in the low 11 bits. We then use [x]ori to set those low + bits, then rotate things back into their proper place. */ + if ((TARGET_ZBB || TARGET_XTHEADBB || TARGET_ZBKB) + && budget < 0 + && popcount_hwi (INTVAL (operands[2])) <= 11 + && riscv_const_insns (operands[2], true) >= 3) + { + ival = INTVAL (operands[2]); + /* First see if the constant trivially fits into 11 bits in the LSB. */ + int lsb = ctz_hwi (ival); + int msb = BITS_PER_WORD - 1 - clz_hwi (ival); + if (msb - lsb + 1 <= 11) + { + rtx output = gen_reg_rtx (word_mode); + rtx input = operands[1]; + + /* Rotate the source right by LSB bits. */ + rtx x = GEN_INT (lsb); + x = gen_rtx_ROTATERT (word_mode, input, x); + emit_insn (gen_rtx_SET (output, x)); + input = output; + + /* Shift the constant right by LSB bits. */ + x = GEN_INT (ival >> lsb); + + /* Perform the IOR/XOR operation. */ + x = gen_rtx_fmt_ee (code, word_mode, input, x); + output = gen_reg_rtx (word_mode); + emit_insn (gen_rtx_SET (output, x)); + input = output; + + /* And rotate left to put everything back in place, we don't + have rotate left by a constant, so use rotate right by + an adjusted constant. */ + x = GEN_INT (BITS_PER_WORD - lsb); + x = gen_rtx_ROTATERT (word_mode, input, x); + emit_insn (gen_rtx_SET (operands[0], x)); + return true; + } + + /* Maybe the bits are split between the high and low parts + of the constant. A bit more complex, but still manageable. + + Conceptually we want to rotate left the constant by the number + of leading zeros after masking off all but the low 11 bits. */ + int rotcount = clz_hwi (ival & 0x7ff) - (BITS_PER_WORD - 11); + + /* Rotate the constant left by MSB bits. */ + ival = (ival << rotcount) | (ival >> (BITS_PER_WORD - rotcount)); + + /* Now we can do the same tests as before. */ + lsb = ctz_hwi (ival); + msb = BITS_PER_WORD - clz_hwi (ival); + if ((INTVAL (operands[2]) & HOST_WIDE_INT_UC (0x7ff)) != 0 + && msb - lsb + 1 <= 11) + { + rtx output = gen_reg_rtx (word_mode); + rtx input = operands[1]; + + /* Rotate the source left by ROTCOUNT bits, we don't have + rotate left by a constant, so use rotate right by an + adjusted constant. */ + rtx x = GEN_INT (BITS_PER_WORD - rotcount); + x = gen_rtx_ROTATERT (word_mode, input, x); + emit_insn (gen_rtx_SET (output, x)); + input = output; + + /* We've already rotated the constant. So perform the IOR/XOR + operation. */ + x = GEN_INT (ival); + x = gen_rtx_fmt_ee (code, word_mode, input, x); + output = gen_reg_rtx (word_mode); + emit_insn (gen_rtx_SET (output, x)); + input = output; + + /* And rotate right to put everything into its proper place. */ + x = GEN_INT (rotcount); + x = gen_rtx_ROTATERT (word_mode, input, x); + emit_insn (gen_rtx_SET (operands[0], x)); + return true; + } + } + + /* If after accounting for bseti the remaining budget has + gone to less than zero, it forces the value into a + register and performs the IOR operation. It returns + TRUE to the caller so the caller knows code generation + is complete. */ + if (budget < 0) + { + rtx x = force_reg (word_mode, operands[2]); + x = gen_rtx_fmt_ee (code, word_mode, operands[1], x); + emit_insn (gen_rtx_SET (operands[0], x)); + return true; + } + + /* Synthesis is better than loading the constant. */ + ival = INTVAL (operands[2]); + rtx input = operands[1]; + rtx output = NULL_RTX; + + /* Emit the [x]ori insn that sets the low 11 bits into + the proper state. */ + if ((ival & HOST_WIDE_INT_UC (0x7ff)) != 0) + { + rtx x = GEN_INT (ival & HOST_WIDE_INT_UC (0x7ff)); + x = gen_rtx_fmt_ee (code, word_mode, input, x); + output = gen_reg_rtx (word_mode); + emit_insn (gen_rtx_SET (output, x)); + input = output; + ival &= ~HOST_WIDE_INT_UC (0x7ff); + } + + /* We figure out a single bit as a constant and + generate a CONST_INT node for that. Then we + construct the IOR node, then the SET node and + emit it. An IOR with a suitable constant that is + a single bit will be implemented with a bseti. */ + while (ival) + { + HOST_WIDE_INT tmpval = HOST_WIDE_INT_UC (1) << ctz_hwi (ival); + rtx x = GEN_INT (tmpval); + x = gen_rtx_fmt_ee (code, word_mode, input, x); + output = gen_reg_rtx (word_mode); + emit_insn (gen_rtx_SET (output, x)); + input = output; + ival &= ~tmpval; + } + + gcc_assert (output); + emit_move_insn (operands[0], output); + return true; +} + +/* Synthesize OPERANDS[0] = OPERANDS[1] & OPERANDS[2]. + + OPERANDS[0] and OPERANDS[1] will be a REG and may be the same + REG. + + OPERANDS[2] is a CONST_INT. + + Return TRUE if the operation was fully synthesized and the caller + need not generate additional code. Return FALSE if the operation + was not synthesized and the caller is responsible for emitting the + proper sequence. */ + +bool +synthesize_and (rtx operands[3]) +{ + /* Trivial cases that don't need synthesis. */ + if (SMALL_OPERAND (INTVAL (operands[2])) + || (TARGET_ZBS && not_single_bit_mask_operand (operands[2], word_mode))) + return false; + + /* If the second operand is a mode mask, emit an extension + insn instead. */ + if (CONST_INT_P (operands[2])) + { + enum machine_mode tmode = VOIDmode; + if (UINTVAL (operands[2]) == GET_MODE_MASK (HImode)) + tmode = HImode; + else if (UINTVAL (operands[2]) == GET_MODE_MASK (SImode)) + tmode = SImode; + + if (tmode != VOIDmode) + { + rtx tmp = gen_lowpart (tmode, operands[1]); + emit_insn (gen_extend_insn (operands[0], tmp, word_mode, tmode, 1)); + return true; + } + } + + /* The number of instructions to synthesize the constant is a good + estimate of the budget. That does not account for out of order + execution an fusion in the constant synthesis those would naturally + decrease the budget. It also does not account for the AND at + the end of the sequence which would increase the budget. */ + int budget = riscv_const_insns (operands[2], true); + rtx input = NULL_RTX; + rtx output = NULL_RTX; + + /* Left shift + right shift to clear high bits. */ + if (budget >= 2 && p2m1_shift_operand (operands[2], word_mode)) + { + int count = (GET_MODE_BITSIZE (GET_MODE (operands[1])).to_constant () + - exact_log2 (INTVAL (operands[2]) + 1)); + rtx x = gen_rtx_ASHIFT (word_mode, operands[1], GEN_INT (count)); + output = gen_reg_rtx (word_mode); + emit_insn (gen_rtx_SET (output, x)); + input = output; + x = gen_rtx_LSHIFTRT (word_mode, input, GEN_INT (count)); + emit_insn (gen_rtx_SET (operands[0], x)); + return true; + } + + /* Clears a bunch of low bits with only high bits set. */ + unsigned HOST_WIDE_INT t = ~INTVAL (operands[2]); + if (budget >= 2 && exact_log2 (t + 1) >= 0) + { + int count = ctz_hwi (INTVAL (operands[2])); + rtx x = gen_rtx_LSHIFTRT (word_mode, operands[1], GEN_INT (count)); + output = gen_reg_rtx (word_mode); + emit_insn (gen_rtx_SET (output, x)); + input = output; + x = gen_rtx_ASHIFT (word_mode, input, GEN_INT (count)); + emit_insn (gen_rtx_SET (operands[0], x)); + return true; + } + + /* If we shift right to eliminate the trailing zeros and + the result is a SMALL_OPERAND, then it's a shift right, + andi and shift left. */ + t = INTVAL (operands[2]); + t >>= ctz_hwi (t); + if (budget >= 3 && SMALL_OPERAND (t) && popcount_hwi (t) > 2) + { + /* Shift right to clear the low order bits. */ + unsigned HOST_WIDE_INT count = ctz_hwi (INTVAL (operands[2])); + rtx x = gen_rtx_LSHIFTRT (word_mode, operands[1], GEN_INT (count)); + output = gen_reg_rtx (word_mode); + emit_insn (gen_rtx_SET (output, x)); + input = output; + + /* Now emit the ANDI. */ + unsigned HOST_WIDE_INT mask = INTVAL (operands[2]); + mask >>= ctz_hwi (mask); + x = gen_rtx_AND (word_mode, input, GEN_INT (mask)); + output = gen_reg_rtx (word_mode); + emit_insn (gen_rtx_SET (output, x)); + input = output; + + /* Shift left to move bits into position. */ + count = INTVAL (operands[2]); + count = ctz_hwi (count); + x = gen_rtx_ASHIFT (word_mode, input, GEN_INT (count)); + emit_insn (gen_rtx_SET (operands[0], x)); + return true; + } + + /* If there are all zeros, except for a run of 1s somewhere in the middle + of the constant, then this is at worst 3 shifts. */ + t = INTVAL (operands[2]); + if (budget >= 3 + && consecutive_bits_operand (GEN_INT (t), word_mode) + && popcount_hwi (t) > 3) + { + /* Shift right to clear the low order bits. */ + int count = ctz_hwi (INTVAL (operands[2])); + rtx x = gen_rtx_LSHIFTRT (word_mode, operands[1], GEN_INT (count)); + output = gen_reg_rtx (word_mode); + emit_insn (gen_rtx_SET (output, x)); + input = output; + + /* Shift left to clear the high order bits. */ + count += clz_hwi (INTVAL (operands[2])) % BITS_PER_WORD; + x = gen_rtx_ASHIFT (word_mode, input, GEN_INT (count)); + output = gen_reg_rtx (word_mode); + emit_insn (gen_rtx_SET (output, x)); + input = output; + + /* And shift back right to put the bits into position. */ + count = clz_hwi (INTVAL (operands[2])) % BITS_PER_WORD; + x = gen_rtx_LSHIFTRT (word_mode, input, GEN_INT (count)); + emit_insn (gen_rtx_SET (operands[0], x)); + return true; + } + + /* The special cases didn't apply. It's entirely possible we may + want to combine some of the ideas above with bclr, but for now + those are deferred until we see them popping up in practice. */ + + unsigned HOST_WIDE_INT ival = ~INTVAL (operands[2]); + + /* Clear as many bits using andi as we can. */ + if ((ival & HOST_WIDE_INT_UC (0x7ff)) != 0x0) + { + ival &= ~HOST_WIDE_INT_UC (0x7ff); + budget--; + } + + /* And handle remaining bits via bclr. */ + while (TARGET_ZBS && ival) + { + unsigned HOST_WIDE_INT tmpval = HOST_WIDE_INT_UC (1) << ctz_hwi (ival); + ival &= ~tmpval; + budget--; + } + + /* If the remaining budget has gone to less than zero, it + forces the value into a register and performs the AND + operation. It returns TRUE to the caller so the caller + knows code generation is complete. + FIXME: This is hacked to always be enabled until the last + patch in the series is enabled. */ + if (ival || budget < 0) + { + rtx x = force_reg (word_mode, operands[2]); + x = gen_rtx_AND (word_mode, operands[1], x); + emit_insn (gen_rtx_SET (operands[0], x)); + return true; + } + + /* Synthesis is better than loading the constant. */ + ival = ~INTVAL (operands[2]); + input = operands[1]; + + /* Clear any of the lower 11 bits we need. */ + if ((ival & HOST_WIDE_INT_UC (0x7ff)) != 0) + { + rtx x = GEN_INT (~(ival & HOST_WIDE_INT_UC (0x7ff))); + x = gen_rtx_AND (word_mode, input, x); + output = gen_reg_rtx (word_mode); + emit_insn (gen_rtx_SET (output, x)); + input = output; + ival &= ~HOST_WIDE_INT_UC (0x7ff); + } + + /* Clear the rest with bclr. */ + while (ival) + { + unsigned HOST_WIDE_INT tmpval = HOST_WIDE_INT_UC (1) << ctz_hwi (ival); + rtx x = GEN_INT (~tmpval); + x = gen_rtx_AND (word_mode, input, x); + output = gen_reg_rtx (word_mode); + emit_insn (gen_rtx_SET (output, x)); + input = output; + ival &= ~tmpval; + } + + emit_move_insn (operands[0], input); + return true; +} + + /* Initialize the GCC target structure. */ #undef TARGET_ASM_ALIGNED_HI_OP #define TARGET_ASM_ALIGNED_HI_OP "\t.half\t" @@ -14068,6 +15423,9 @@ bool need_shadow_stack_push_pop_p () #undef TARGET_SCHED_ADJUST_COST #define TARGET_SCHED_ADJUST_COST riscv_sched_adjust_cost +#undef TARGET_SCHED_CAN_SPECULATE_INSN +#define TARGET_SCHED_CAN_SPECULATE_INSN riscv_sched_can_speculate_insn + #undef TARGET_FUNCTION_OK_FOR_SIBCALL #define TARGET_FUNCTION_OK_FOR_SIBCALL riscv_function_ok_for_sibcall diff --git a/gcc/config/riscv/riscv.h b/gcc/config/riscv/riscv.h index 2bcabd0..45fa521 100644 --- a/gcc/config/riscv/riscv.h +++ b/gcc/config/riscv/riscv.h @@ -43,7 +43,7 @@ along with GCC; see the file COPYING3. If not see #endif #ifndef RISCV_TUNE_STRING_DEFAULT -#define RISCV_TUNE_STRING_DEFAULT "rocket" +#define RISCV_TUNE_STRING_DEFAULT "generic" #endif extern const char *riscv_expand_arch (int argc, const char **argv); @@ -888,7 +888,7 @@ extern enum riscv_cc get_riscv_cc (const rtx use); #define ASM_OUTPUT_OPCODE(STREAM, PTR) \ (PTR) = riscv_asm_output_opcode(STREAM, PTR) -#define JUMP_TABLES_IN_TEXT_SECTION 0 +#define JUMP_TABLES_IN_TEXT_SECTION (riscv_cmodel == CM_LARGE) #define CASE_VECTOR_MODE SImode #define CASE_VECTOR_PC_RELATIVE (riscv_cmodel != CM_MEDLOW) diff --git a/gcc/config/riscv/riscv.md b/gcc/config/riscv/riscv.md index 26a247c..c3b504d 100644 --- a/gcc/config/riscv/riscv.md +++ b/gcc/config/riscv/riscv.md @@ -495,6 +495,8 @@ ;; SiFive custom extension instrctions ;; sf_vqmacc vector matrix integer multiply-add instructions ;; sf_vfnrclip vector fp32 to int8 ranged clip instructions +;; sf_vc vector coprocessor interface without side effect +;; sf_vc_se vector coprocessor interface with side effect (define_attr "type" "unknown,branch,jump,jalr,ret,call,load,fpload,store,fpstore, mtc,mfc,const,arith,logical,shift,slt,imul,idiv,move,fmove,fadd,fmul, @@ -516,7 +518,8 @@ vslideup,vslidedown,vislide1up,vislide1down,vfslide1up,vfslide1down, vgather,vcompress,vmov,vector,vandn,vbrev,vbrev8,vrev8,vclz,vctz,vcpop,vrol,vror,vwsll, vclmul,vclmulh,vghsh,vgmul,vaesef,vaesem,vaesdf,vaesdm,vaeskf1,vaeskf2,vaesz, - vsha2ms,vsha2ch,vsha2cl,vsm4k,vsm4r,vsm3me,vsm3c,vfncvtbf16,vfwcvtbf16,vfwmaccbf16" + vsha2ms,vsha2ch,vsha2cl,vsm4k,vsm4r,vsm3me,vsm3c,vfncvtbf16,vfwcvtbf16,vfwmaccbf16, + sf_vc,sf_vc_se" (cond [(eq_attr "got" "load") (const_string "load") ;; If a doubleword move uses these expensive instructions, @@ -669,7 +672,7 @@ ;; Microarchitectures we know how to tune for. ;; Keep this in sync with enum riscv_microarchitecture. (define_attr "tune" - "generic,sifive_7,sifive_p400,sifive_p600,xiangshan,generic_ooo" + "generic,sifive_7,sifive_p400,sifive_p600,xiangshan,generic_ooo,mips_p8700" (const (symbol_ref "((enum attr_tune) riscv_microarchitecture)"))) ;; Describe a user's asm statement. @@ -789,7 +792,7 @@ rtx t5 = gen_reg_rtx (DImode); rtx t6 = gen_reg_rtx (DImode); - riscv_emit_binary (PLUS, operands[0], operands[1], operands[2]); + emit_insn (gen_addsi3_extended (t6, operands[1], operands[2])); if (GET_CODE (operands[1]) != CONST_INT) emit_insn (gen_extend_insn (t4, operands[1], DImode, SImode, 0)); else @@ -799,7 +802,10 @@ else t5 = operands[2]; emit_insn (gen_adddi3 (t3, t4, t5)); - emit_insn (gen_extend_insn (t6, operands[0], DImode, SImode, 0)); + rtx t7 = gen_lowpart (SImode, t6); + SUBREG_PROMOTED_VAR_P (t7) = 1; + SUBREG_PROMOTED_SET (t7, SRP_SIGNED); + emit_move_insn (operands[0], t7); riscv_expand_conditional_branch (operands[3], NE, t6, t3); } @@ -835,8 +841,11 @@ emit_insn (gen_extend_insn (t3, operands[1], DImode, SImode, 0)); else t3 = operands[1]; - riscv_emit_binary (PLUS, operands[0], operands[1], operands[2]); - emit_insn (gen_extend_insn (t4, operands[0], DImode, SImode, 0)); + emit_insn (gen_addsi3_extended (t4, operands[1], operands[2])); + rtx t5 = gen_lowpart (SImode, t4); + SUBREG_PROMOTED_VAR_P (t5) = 1; + SUBREG_PROMOTED_SET (t5, SRP_SIGNED); + emit_move_insn (operands[0], t5); riscv_expand_conditional_branch (operands[3], LTU, t4, t3); } @@ -875,7 +884,7 @@ ;; Where C1 is not a LUI operand, but ~C1 is a LUI operand (define_insn_and_split "*lui_constraint<X:mode>_and_to_or" - [(set (match_operand:X 0 "register_operand" "=r") + [(set (match_operand:X 0 "register_operand" "=r") (plus:X (and:X (match_operand:X 1 "register_operand" "r") (match_operand 2 "const_int_operand")) (match_operand 3 "const_int_operand"))) @@ -889,13 +898,21 @@ <= riscv_const_insns (operands[3], false)))" "#" "&& reload_completed" - [(set (match_dup 4) (match_dup 5)) - (set (match_dup 0) (ior:X (match_dup 1) (match_dup 4))) - (set (match_dup 4) (match_dup 6)) - (set (match_dup 0) (minus:X (match_dup 0) (match_dup 4)))] + [(const_int 0)] { operands[5] = GEN_INT (~INTVAL (operands[2])); operands[6] = GEN_INT ((~INTVAL (operands[2])) | (-INTVAL (operands[3]))); + + /* This is always a LUI operand, so it's safe to just emit. */ + emit_move_insn (operands[4], operands[5]); + + rtx x = gen_rtx_IOR (word_mode, operands[1], operands[4]); + emit_move_insn (operands[0], x); + + /* This may require multiple steps to synthesize. */ + riscv_emit_move (operands[4], operands[6]); + x = gen_rtx_MINUS (word_mode, operands[0], operands[4]); + emit_move_insn (operands[0], x); } [(set_attr "type" "arith")]) @@ -966,7 +983,7 @@ rtx t5 = gen_reg_rtx (DImode); rtx t6 = gen_reg_rtx (DImode); - riscv_emit_binary (MINUS, operands[0], operands[1], operands[2]); + emit_insn (gen_subsi3_extended (t6, operands[1], operands[2])); if (GET_CODE (operands[1]) != CONST_INT) emit_insn (gen_extend_insn (t4, operands[1], DImode, SImode, 0)); else @@ -976,7 +993,10 @@ else t5 = operands[2]; emit_insn (gen_subdi3 (t3, t4, t5)); - emit_insn (gen_extend_insn (t6, operands[0], DImode, SImode, 0)); + rtx t7 = gen_lowpart (SImode, t6); + SUBREG_PROMOTED_VAR_P (t7) = 1; + SUBREG_PROMOTED_SET (t7, SRP_SIGNED); + emit_move_insn (operands[0], t7); riscv_expand_conditional_branch (operands[3], NE, t6, t3); } @@ -1015,8 +1035,11 @@ emit_insn (gen_extend_insn (t3, operands[1], DImode, SImode, 0)); else t3 = operands[1]; - riscv_emit_binary (MINUS, operands[0], operands[1], operands[2]); - emit_insn (gen_extend_insn (t4, operands[0], DImode, SImode, 0)); + emit_insn (gen_subsi3_extended (t4, operands[1], operands[2])); + rtx t5 = gen_lowpart (SImode, t4); + SUBREG_PROMOTED_VAR_P (t5) = 1; + SUBREG_PROMOTED_SET (t5, SRP_SIGNED); + emit_move_insn (operands[0], t5); riscv_expand_conditional_branch (operands[3], LTU, t3, t4); } @@ -1709,26 +1732,11 @@ (define_expand "and<mode>3" [(set (match_operand:X 0 "register_operand") (and:X (match_operand:X 1 "register_operand") - (match_operand:X 2 "arith_or_mode_mask_or_zbs_operand")))] + (match_operand:X 2 "reg_or_const_int_operand")))] "" { - /* If the second operand is a mode mask, emit an extension - insn instead. */ - if (CONST_INT_P (operands[2])) - { - enum machine_mode tmode = VOIDmode; - if (UINTVAL (operands[2]) == GET_MODE_MASK (HImode)) - tmode = HImode; - else if (UINTVAL (operands[2]) == GET_MODE_MASK (SImode)) - tmode = SImode; - - if (tmode != VOIDmode) - { - rtx tmp = gen_lowpart (tmode, operands[1]); - emit_insn (gen_extend_insn (operands[0], tmp, <MODE>mode, tmode, 1)); - DONE; - } - } + if (CONST_INT_P (operands[2]) && synthesize_and (operands)) + DONE; }) (define_insn "*and<mode>3" @@ -1752,8 +1760,15 @@ (define_expand "<optab><mode>3" [(set (match_operand:X 0 "register_operand") (any_or:X (match_operand:X 1 "register_operand" "") - (match_operand:X 2 "arith_or_zbs_operand" "")))] - "") + (match_operand:X 2 "reg_or_const_int_operand" "")))] + "" + +{ + /* If synthesis of the logical op is successful, then no further code + generation is necessary. Else just generate code normally. */ + if (CONST_INT_P (operands[2]) && synthesize_ior_xor (<OPTAB>, operands)) + DONE; +}) (define_insn "*<optab><mode>3" [(set (match_operand:X 0 "register_operand" "=r,r") @@ -2494,8 +2509,8 @@ }) (define_insn "*movdi_32bit" - [(set (match_operand:DI 0 "nonimmediate_operand" "=r,r,r,m, *f,*f,*r,*f,*m,r") - (match_operand:DI 1 "move_operand" " r,i,m,r,*J*r,*m,*f,*f,*f,vp"))] + [(set (match_operand:DI 0 "nonimmediate_operand" "=r,r,r, m, *f,*f,*r,*f,*m,r") + (match_operand:DI 1 "move_operand" " r,i,m,rJ,*J*r,*m,*f,*f,*f,vp"))] "!TARGET_64BIT && (register_operand (operands[0], DImode) || reg_or_0_operand (operands[1], DImode))" @@ -2922,7 +2937,7 @@ [(set_attr "type" "shift") (set_attr "mode" "DI")]) -(define_insn_and_split "*<optab><GPR:mode>3_mask_1" +(define_insn "*<optab><GPR:mode>3_mask_1" [(set (match_operand:GPR 0 "register_operand" "= r") (any_shift:GPR (match_operand:GPR 1 "register_operand" " r") @@ -2931,12 +2946,14 @@ (match_operand:GPR2 2 "register_operand" "r") (match_operand 3 "<GPR:shiftm1>"))])))] "" - "#" - "&& 1" - [(set (match_dup 0) - (any_shift:GPR (match_dup 1) - (match_dup 2)))] - "operands[2] = gen_lowpart (QImode, operands[2]);" +{ + /* If the shift mode is not word mode, then it must be the + case that we're generating rv64 code, but this is a 32-bit + operation. Thus we need to use the "w" variant. */ + if (E_<GPR:MODE>mode != word_mode) + return "<insn>w\t%0,%1,%2"; + return "<insn>\t%0,%1,%2"; +} [(set_attr "type" "shift") (set_attr "mode" "<GPR:MODE>")]) @@ -2955,7 +2972,7 @@ [(set_attr "type" "shift") (set_attr "mode" "SI")]) -(define_insn_and_split "*<optab>si3_extend_mask" +(define_insn "*<optab>si3_extend_mask" [(set (match_operand:DI 0 "register_operand" "= r") (sign_extend:DI (any_shift:SI @@ -2965,13 +2982,7 @@ (match_operand:GPR 2 "register_operand" " r") (match_operand 3 "const_si_mask_operand"))]))))] "TARGET_64BIT" - "#" - "&& 1" - [(set (match_dup 0) - (sign_extend:DI - (any_shift:SI (match_dup 1) - (match_dup 2))))] - "operands[2] = gen_lowpart (QImode, operands[2]);" + "<insn>w\t%0,%1,%2" [(set_attr "type" "shift") (set_attr "mode" "SI")]) @@ -3169,15 +3180,25 @@ "#" "&& reload_completed" [(set (match_dup 4) (lshiftrt:X (subreg:X (match_dup 2) 0) (match_dup 6))) - (set (match_dup 4) (and:X (match_dup 4) (match_dup 7))) + (set (match_dup 4) (match_dup 8)) (set (pc) (if_then_else (match_op_dup 1 [(match_dup 4) (const_int 0)]) (label_ref (match_dup 0)) (pc)))] { - HOST_WIDE_INT mask = INTVAL (operands[3]); - int trailing = ctz_hwi (mask); + HOST_WIDE_INT mask = INTVAL (operands[3]); + int trailing = ctz_hwi (mask); - operands[6] = GEN_INT (trailing); - operands[7] = GEN_INT (mask >> trailing); + operands[6] = GEN_INT (trailing); + operands[7] = GEN_INT (mask >> trailing); + + /* This splits after reload, so there's little chance to clean things + up. Rather than emit a ton of RTL here, we can just make a new + operand for that RHS and use it. For the case where the AND would + have been redundant, we can make it a NOP move, which does get + cleaned up. */ + if (operands[7] == CONSTM1_RTX (word_mode)) + operands[8] = operands[4]; + else + operands[8] = gen_rtx_AND (word_mode, operands[4], operands[7]); } [(set_attr "type" "branch")]) @@ -3277,7 +3298,7 @@ (match_operand:GPR 2 "movcc_operand") (match_operand:GPR 3 "movcc_operand")))] "TARGET_SFB_ALU || TARGET_XTHEADCONDMOV || TARGET_ZICOND_LIKE - || TARGET_MOVCC" + || TARGET_MOVCC || TARGET_XMIPSCMOV" { if (riscv_expand_conditional_move (operands[0], operands[1], operands[2], operands[3])) @@ -4381,7 +4402,7 @@ ) (define_insn "prefetch" - [(prefetch (match_operand 0 "address_operand" "r") + [(prefetch (match_operand 0 "prefetch_operand" "Qr") (match_operand 1 "imm5_operand" "i") (match_operand 2 "const_int_operand" "n"))] "TARGET_ZICBOP" @@ -4401,7 +4422,7 @@ (const_string "4")))]) (define_insn "riscv_prefetchi_<mode>" - [(unspec_volatile:X [(match_operand:X 0 "address_operand" "r") + [(unspec_volatile:X [(match_operand:X 0 "prefetch_operand" "Q") (match_operand:X 1 "imm5_operand" "i")] UNSPECV_PREI)] "TARGET_ZICBOP" @@ -4613,6 +4634,17 @@ } ) +(define_expand "usmul<mode>3" + [(match_operand:ANYI 0 "register_operand") + (match_operand:ANYI 1 "register_operand") + (match_operand:ANYI 2 "register_operand")] + "" + { + riscv_expand_usmul (operands[0], operands[1], operands[2]); + DONE; + } +) + (define_expand "ustrunc<mode><anyi_double_truncated>2" [(match_operand:<ANYI_DOUBLE_TRUNCATED> 0 "register_operand") (match_operand:ANYI_DOUBLE_TRUNC 1 "register_operand")] @@ -4691,23 +4723,38 @@ (match_operand 2 "const_int_operand" "n")) (match_operand 3 "const_int_operand" "n"))) (clobber (match_scratch:DI 4 "=&r"))] - "(TARGET_64BIT && riscv_const_insns (operands[3], false) == 1)" + "(TARGET_64BIT + && riscv_const_insns (operands[3], false) == 1 + && riscv_const_insns (GEN_INT (INTVAL (operands[3]) + << INTVAL (operands[2])), false) != 1)" "#" "&& reload_completed" [(const_int 0)] "{ - rtx x = gen_rtx_ASHIFT (DImode, operands[1], operands[2]); - emit_insn (gen_rtx_SET (operands[0], x)); - - /* If the constant fits in a simm12, use it directly as we do not - get another good chance to optimize things again. */ - if (!SMALL_OPERAND (INTVAL (operands[3]))) + /* Prefer to generate shNadd when we can, even over using an + immediate form. If we're not going to be able to generate + a shNadd, then use the constant directly if it fits in a + simm12 field since we won't get another chance to optimize this. */ + if ((TARGET_ZBA && imm123_operand (operands[2], word_mode)) + || !SMALL_OPERAND (INTVAL (operands[3]))) emit_move_insn (operands[4], operands[3]); else operands[4] = operands[3]; - x = gen_rtx_PLUS (DImode, operands[0], operands[4]); - emit_insn (gen_rtx_SET (operands[0], x)); + if (TARGET_ZBA && imm123_operand (operands[2], word_mode)) + { + rtx x = gen_rtx_ASHIFT (DImode, operands[1], operands[2]); + x = gen_rtx_PLUS (DImode, x, operands[4]); + emit_insn (gen_rtx_SET (operands[0], x)); + } + else + { + rtx x = gen_rtx_ASHIFT (DImode, operands[1], operands[2]); + emit_insn (gen_rtx_SET (operands[0], x)); + x = gen_rtx_PLUS (DImode, operands[0], operands[4]); + emit_insn (gen_rtx_SET (operands[0], x)); + } + DONE; }" [(set_attr "type" "arith")]) @@ -4806,6 +4853,25 @@ [(set_attr "type" "move") (set_attr "mode" "<MODE>")]) +;; If we're trying to create 0 or 2^n-1 based on the result of +;; a test such as (lt (reg) (const_int 0)), we'll see a splat of +;; the sign bit across a GPR using srai, then a logical and to +;; mask off high bits. We can replace the logical and with +;; a logical right shift which works without constant synthesis +;; for larger constants. +(define_split + [(set (match_operand:X 0 "register_operand") + (and:X (ashiftrt:X (match_operand:X 1 "register_operand") + (match_operand 2 "const_int_operand")) + (match_operand 3 "const_int_operand")))] + "(INTVAL (operands[2]) == BITS_PER_WORD - 1 + && exact_log2 (INTVAL (operands[3]) + 1) >= 0)" + [(set (match_dup 0) (ashiftrt:X (match_dup 1) (match_dup 2))) + (set (match_dup 0) (lshiftrt:X (match_dup 0) (match_dup 3)))] + { operands[3] = GEN_INT (BITS_PER_WORD + - exact_log2 (INTVAL (operands[3]) + 1)); }) + +;; Standard extensions and pattern for optimization (include "bitmanip.md") (include "crypto.md") (include "sync.md") @@ -4813,18 +4879,22 @@ (include "sync-ztso.md") (include "peephole.md") (include "pic.md") -(include "generic.md") -(include "sifive-7.md") -(include "sifive-p400.md") -(include "sifive-p600.md") -(include "thead.md") -(include "generic-vector-ooo.md") -(include "generic-ooo.md") (include "vector.md") (include "vector-crypto.md") (include "vector-bfloat16.md") (include "zicond.md") +(include "mips-insn.md") (include "sfb.md") (include "zc.md") +;; Vendor extensions +(include "thead.md") (include "corev.md") +;; Pipeline models +(include "generic.md") (include "xiangshan.md") +(include "mips-p8700.md") +(include "sifive-7.md") +(include "sifive-p400.md") +(include "sifive-p600.md") +(include "generic-vector-ooo.md") +(include "generic-ooo.md") diff --git a/gcc/config/riscv/riscv.opt b/gcc/config/riscv/riscv.opt index 7515c8e..6543fd1 100644 --- a/gcc/config/riscv/riscv.opt +++ b/gcc/config/riscv/riscv.opt @@ -168,23 +168,14 @@ momit-leaf-frame-pointer Target Mask(OMIT_LEAF_FRAME_POINTER) Save Omit the frame pointer in leaf functions. -Mask(64BIT) - -Mask(MUL) - -Mask(ATOMIC) - -Mask(HARD_FLOAT) - -Mask(DOUBLE_FLOAT) - -Mask(RVC) +TargetVariable +int riscv_isa_flags -Mask(RVE) +Mask(64BIT) Var(riscv_isa_flags) -Mask(VECTOR) +Mask(VECTOR) Var(riscv_isa_flags) -Mask(FULL_V) +Mask(FULL_V) Var(riscv_isa_flags) mriscv-attribute Target Var(riscv_emit_attribute_p) Init(-1) @@ -233,93 +224,6 @@ TargetVariable long riscv_stack_protector_guard_offset = 0 TargetVariable -int riscv_zi_subext - -Mask(ZICSR) Var(riscv_zi_subext) - -Mask(ZIFENCEI) Var(riscv_zi_subext) - -Mask(ZIHINTNTL) Var(riscv_zi_subext) - -Mask(ZIHINTPAUSE) Var(riscv_zi_subext) - -Mask(ZICOND) Var(riscv_zi_subext) - -Mask(ZICCAMOA) Var(riscv_zi_subext) - -Mask(ZICCIF) Var(riscv_zi_subext) - -Mask(ZICCLSM) Var(riscv_zi_subext) - -Mask(ZICCRSE) Var(riscv_zi_subext) - -Mask(ZICFISS) Var(riscv_zi_subext) - -Mask(ZICFILP) Var(riscv_zi_subext) - -TargetVariable -int riscv_za_subext - -Mask(ZAWRS) Var(riscv_za_subext) - -Mask(ZAAMO) Var(riscv_za_subext) - -Mask(ZALRSC) Var(riscv_za_subext) - -Mask(ZABHA) Var(riscv_za_subext) - -Mask(ZACAS) Var(riscv_za_subext) - -Mask(ZA64RS) Var(riscv_za_subext) - -Mask(ZA128RS) Var(riscv_za_subext) - -TargetVariable -int riscv_zb_subext - -Mask(ZBA) Var(riscv_zb_subext) - -Mask(ZBB) Var(riscv_zb_subext) - -Mask(ZBC) Var(riscv_zb_subext) - -Mask(ZBS) Var(riscv_zb_subext) - -TargetVariable -int riscv_zinx_subext - -Mask(ZFINX) Var(riscv_zinx_subext) - -Mask(ZDINX) Var(riscv_zinx_subext) - -Mask(ZHINX) Var(riscv_zinx_subext) - -Mask(ZHINXMIN) Var(riscv_zinx_subext) - -TargetVariable -int riscv_zk_subext - -Mask(ZBKB) Var(riscv_zk_subext) - -Mask(ZBKC) Var(riscv_zk_subext) - -Mask(ZBKX) Var(riscv_zk_subext) - -Mask(ZKNE) Var(riscv_zk_subext) - -Mask(ZKND) Var(riscv_zk_subext) - -Mask(ZKNH) Var(riscv_zk_subext) - -Mask(ZKR) Var(riscv_zk_subext) - -Mask(ZKSED) Var(riscv_zk_subext) - -Mask(ZKSH) Var(riscv_zk_subext) - -Mask(ZKT) Var(riscv_zk_subext) - -TargetVariable int riscv_vector_elen_flags Mask(VECTOR_ELEN_32) Var(riscv_vector_elen_flags) @@ -335,207 +239,6 @@ Mask(VECTOR_ELEN_FP_16) Var(riscv_vector_elen_flags) Mask(VECTOR_ELEN_BF_16) Var(riscv_vector_elen_flags) TargetVariable -int riscv_zvl_flags - -Mask(ZVL32B) Var(riscv_zvl_flags) - -Mask(ZVL64B) Var(riscv_zvl_flags) - -Mask(ZVL128B) Var(riscv_zvl_flags) - -Mask(ZVL256B) Var(riscv_zvl_flags) - -Mask(ZVL512B) Var(riscv_zvl_flags) - -Mask(ZVL1024B) Var(riscv_zvl_flags) - -Mask(ZVL2048B) Var(riscv_zvl_flags) - -Mask(ZVL4096B) Var(riscv_zvl_flags) - -Mask(ZVL8192B) Var(riscv_zvl_flags) - -Mask(ZVL16384B) Var(riscv_zvl_flags) - -Mask(ZVL32768B) Var(riscv_zvl_flags) - -Mask(ZVL65536B) Var(riscv_zvl_flags) - -TargetVariable -int riscv_zvb_subext - -Mask(ZVBB) Var(riscv_zvb_subext) - -Mask(ZVBC) Var(riscv_zvb_subext) - -Mask(ZVKB) Var(riscv_zvb_subext) - -TargetVariable -int riscv_zvk_subext - -Mask(ZVKG) Var(riscv_zvk_subext) - -Mask(ZVKNED) Var(riscv_zvk_subext) - -Mask(ZVKNHA) Var(riscv_zvk_subext) - -Mask(ZVKNHB) Var(riscv_zvk_subext) - -Mask(ZVKSED) Var(riscv_zvk_subext) - -Mask(ZVKSH) Var(riscv_zvk_subext) - -Mask(ZVKN) Var(riscv_zvk_subext) - -Mask(ZVKNC) Var(riscv_zvk_subext) - -Mask(ZVKNG) Var(riscv_zvk_subext) - -Mask(ZVKS) Var(riscv_zvk_subext) - -Mask(ZVKSC) Var(riscv_zvk_subext) - -Mask(ZVKSG) Var(riscv_zvk_subext) - -Mask(ZVKT) Var(riscv_zvk_subext) - -TargetVariable -int riscv_zicmo_subext - -Mask(ZICBOZ) Var(riscv_zicmo_subext) - -Mask(ZICBOM) Var(riscv_zicmo_subext) - -Mask(ZICBOP) Var(riscv_zicmo_subext) - -Mask(ZIC64B) Var(riscv_zicmo_subext) - -TargetVariable -int riscv_mop_subext - -Mask(ZIMOP) Var(riscv_mop_subext) - -Mask(ZCMOP) Var(riscv_mop_subext) - -TargetVariable -int riscv_zf_subext - -Mask(ZFBFMIN) Var(riscv_zf_subext) - -Mask(ZFHMIN) Var(riscv_zf_subext) - -Mask(ZFH) Var(riscv_zf_subext) - -Mask(ZVFBFMIN) Var(riscv_zf_subext) - -Mask(ZVFBFWMA) Var(riscv_zf_subext) - -Mask(ZVFHMIN) Var(riscv_zf_subext) - -Mask(ZVFH) Var(riscv_zf_subext) - -TargetVariable -int riscv_zfa_subext - -Mask(ZFA) Var(riscv_zfa_subext) - -TargetVariable -int riscv_zm_subext - -Mask(ZMMUL) Var(riscv_zm_subext) - -TargetVariable -int riscv_zc_subext - -Mask(ZCA) Var(riscv_zc_subext) - -Mask(ZCB) Var(riscv_zc_subext) - -Mask(ZCE) Var(riscv_zc_subext) - -Mask(ZCF) Var(riscv_zc_subext) - -Mask(ZCD) Var(riscv_zc_subext) - -Mask(ZCMP) Var(riscv_zc_subext) - -Mask(ZCMT) Var(riscv_zc_subext) - -Mask(XCVBI) Var(riscv_xcv_subext) - -TargetVariable -int riscv_sv_subext - -Mask(SVINVAL) Var(riscv_sv_subext) - -Mask(SVNAPOT) Var(riscv_sv_subext) - -Mask(SVVPTC) Var(riscv_sv_subext) - -TargetVariable -int riscv_ztso_subext - -Mask(ZTSO) Var(riscv_ztso_subext) - -TargetVariable -int riscv_xcv_subext - -Mask(XCVMAC) Var(riscv_xcv_subext) - -Mask(XCVALU) Var(riscv_xcv_subext) - -Mask(XCVELW) Var(riscv_xcv_subext) - -Mask(XCVSIMD) Var(riscv_xcv_subext) - -TargetVariable -int riscv_xthead_subext - -Mask(XTHEADBA) Var(riscv_xthead_subext) - -Mask(XTHEADBB) Var(riscv_xthead_subext) - -Mask(XTHEADBS) Var(riscv_xthead_subext) - -Mask(XTHEADCMO) Var(riscv_xthead_subext) - -Mask(XTHEADCONDMOV) Var(riscv_xthead_subext) - -Mask(XTHEADFMEMIDX) Var(riscv_xthead_subext) - -Mask(XTHEADFMV) Var(riscv_xthead_subext) - -Mask(XTHEADINT) Var(riscv_xthead_subext) - -Mask(XTHEADMAC) Var(riscv_xthead_subext) - -Mask(XTHEADMEMIDX) Var(riscv_xthead_subext) - -Mask(XTHEADMEMPAIR) Var(riscv_xthead_subext) - -Mask(XTHEADSYNC) Var(riscv_xthead_subext) - -Mask(XTHEADVECTOR) Var(riscv_xthead_subext) - -TargetVariable -int riscv_xventana_subext - -Mask(XVENTANACONDOPS) Var(riscv_xventana_subext) - -TargetVariable -int riscv_sifive_subext - -Mask(XSFVCP) Var(riscv_sifive_subext) - -Mask(XSFCEASE) Var(riscv_sifive_subext) - -Mask(XSFVQMACCQOQ) Var(riscv_sifive_subext) - -Mask(XSFVQMACCDOD) Var(riscv_sifive_subext) - -Mask(XSFVFNRCLIPXFQF) Var(riscv_sifive_subext) - -TargetVariable int riscv_fmv_priority = 0 Enum @@ -579,6 +282,18 @@ Inline strlen calls if possible. Target RejectNegative Joined UInteger Var(riscv_strcmp_inline_limit) Init(64) Max number of bytes to compare as part of inlined strcmp/strncmp routines (default: 64). +-param=gpr2vr-cost= +Target RejectNegative Joined UInteger Var(gpr2vr_cost) Init(GPR2VR_COST_UNPROVIDED) +Set the cost value of the rvv instruction when operate from GPR to VR. + +-param=fpr2vr-cost= +Target RejectNegative Joined UInteger Var(fpr2vr_cost) Init(FPR2VR_COST_UNPROVIDED) +Set the cost value of the rvv instruction when operate from FPR to VR. + +-param=riscv-autovec-mode= +Target Undocumented RejectNegative Joined Var(riscv_autovec_mode) Save +Set the only autovec mode to try. + Enum Name(rvv_max_lmul) Type(enum rvv_max_lmul_enum) The RVV possible LMUL (-mrvv-max-lmul=): diff --git a/gcc/config/riscv/sifive-7.md b/gcc/config/riscv/sifive-7.md index c208541..a04b80b 100644 --- a/gcc/config/riscv/sifive-7.md +++ b/gcc/config/riscv/sifive-7.md @@ -1,4 +1,4 @@ -(define_automaton "sifive_7") +(define_automaton "sifive_7,sifive_7_vec,sifive_7_vec_mem") ;; Sifive 7 Series Base Core ;; This has two pipelines, A (Address) and B (Branch). @@ -11,6 +11,14 @@ (define_cpu_unit "sifive_7_idiv" "sifive_7") (define_cpu_unit "sifive_7_fpu" "sifive_7") +;; Vector command queue +(define_cpu_unit "sifive_7_vcq" "sifive_7") +;; Vector arithmetic sequencer +(define_cpu_unit "sifive_7_va" "sifive_7_vec") +;; Vector store sequencer +(define_cpu_unit "sifive_7_vs" "sifive_7_vec_mem") +;; Vector load sequencer +(define_cpu_unit "sifive_7_vl" "sifive_7_vec_mem") (define_insn_reservation "sifive_7_load" 3 (and (eq_attr "tune" "sifive_7") @@ -60,9 +68,14 @@ (define_insn_reservation "sifive_7_alu" 2 (and (eq_attr "tune" "sifive_7") (eq_attr "type" "unknown,arith,shift,slt,multi,logical,move,bitmanip,\ - rotate,min,max,minu,maxu,clz,ctz,atomic,condmove,mvpair,zicond")) + min,max,minu,maxu,atomic,condmove,mvpair,zicond")) "sifive_7_A|sifive_7_B") +(define_insn_reservation "sifive_7_alu_b" 2 + (and (eq_attr "tune" "sifive_7") + (eq_attr "type" "clz,ctz,rotate")) + "sifive_7_B") + (define_insn_reservation "sifive_7_load_immediate" 1 (and (eq_attr "tune" "sifive_7") (eq_attr "type" "nop,const,auipc")) @@ -91,6 +104,12 @@ (eq_attr "type" "fcvt,fcvt_i2f,fcvt_f2i,fcmp,fmove")) "sifive_7_B") +(define_insn_reservation "sifive_7_fdiv_h" 14 + (and (eq_attr "tune" "sifive_7") + (eq_attr "type" "fdiv,fsqrt") + (eq_attr "mode" "HF")) + "sifive_7_B,sifive_7_fpu*13") + (define_insn_reservation "sifive_7_fdiv_s" 27 (and (eq_attr "tune" "sifive_7") (eq_attr "type" "fdiv,fsqrt") @@ -119,6 +138,21 @@ (eq_attr "type" "cpop,clmul")) "sifive_7_A") +(define_insn_reservation "sifive_7_csr" 5 + (and (eq_attr "tune" "sifive_7") + (eq_attr "type" "rdfrm,wrfrm,wrvxrm")) + "sifive_7_A") + +(define_insn_reservation "sifive_7_crypto" 10 + (and (eq_attr "tune" "sifive_7") + (eq_attr "type" "crypto")) + "sifive_7_A") + +(define_insn_reservation "sifive_7_unknown" 10 + (and (eq_attr "tune" "sifive_7") + (eq_attr "type" "ghost")) + "sifive_7_A") + (define_bypass 1 "sifive_7_load,sifive_7_alu,sifive_7_mul,sifive_7_f2i,sifive_7_sfb_alu" "sifive_7_alu,sifive_7_branch") @@ -129,13 +163,140 @@ "sifive_7_store" "riscv_store_data_bypass_p") (define_bypass 2 "sifive_7_i2f" - "sifive_7_sfma,sifive_7_dfma,sifive_7_fp_other,sifive_7_fdiv_s,sifive_7_fdiv_d") + "sifive_7_sfma,sifive_7_dfma,sifive_7_fp_other,sifive_7_fdiv_h,sifive_7_fdiv_s,sifive_7_fdiv_d,sifive_7_hfma") (define_bypass 2 "sifive_7_fp_other" - "sifive_7_sfma,sifive_7_dfma,sifive_7_fp_other,sifive_7_fdiv_s,sifive_7_fdiv_d") + "sifive_7_sfma,sifive_7_dfma,sifive_7_fp_other,sifive_7_fdiv_h,sifive_7_fdiv_s,sifive_7_fdiv_d,sifive_7_hfma") (define_bypass 2 "sifive_7_fp_other" "sifive_7_alu,sifive_7_branch") (define_bypass 2 "sifive_7_fp_other" "sifive_7_store" "riscv_store_data_bypass_p") + +;; Vector pipeline +;; The latency is depend on LMUL, but we didn't model that yet since we don't +;; want to expand the rule too much unless we prove model that could get +;; meaningful performance difference. + +(define_insn_reservation "sifive_7_vsetvl" 2 + (and (eq_attr "tune" "sifive_7") + (eq_attr "type" "vsetvl_pre,vsetvl,rdvlenb,rdvl")) + "sifive_7_A") + +(define_insn_reservation "sifive_7_vec_load" 4 + (and (eq_attr "tune" "sifive_7") + (eq_attr "type" "vlde,vldm,vlds,vldux,vldox,vldff,vldr, + vlsegde,vlsegds,vlsegdux,vlsegdox,vlsegdff")) + "sifive_7_vcq,sifive_7_vl*3") + +(define_insn_reservation "sifive_7_vec_store" 4 + (and (eq_attr "tune" "sifive_7") + (eq_attr "type" "vste,vstm,vsts,vstux,vstox,vstr, + vssegte,vssegts,vssegtux,vssegtox")) + "sifive_7_vcq,sifive_7_vs*3") + +(define_insn_reservation "sifive_7_vec_ialu" 4 + (and (eq_attr "tune" "sifive_7") + (eq_attr "type" "vimovxv,vmov,vimovvx,vialu,vicalu,vext, + vshift,viminmax,vimerge,vbrev,vrev8, + vimov,vext,vbrev8,vclz,vctz,vcpop,vrol,vror,vandn")) + "sifive_7_vcq,sifive_7_va*3") + +(define_insn_reservation "sifive_7_vec_slow_ialu" 8 + (and (eq_attr "tune" "sifive_7") + (eq_attr "type" "vshift,vimul,vimuladd")) + "sifive_7_vcq,sifive_7_va*7") + +(define_insn_reservation "sifive_7_vec_cmp" 4 + (and (eq_attr "tune" "sifive_7") + (eq_attr "type" "vicmp")) + "sifive_7_vcq,sifive_7_va*3") + +(define_insn_reservation "sifive_7_vec_iwalu" 8 + (and (eq_attr "tune" "sifive_7") + (eq_attr "type" "viwalu,viwmul,viwmuladd,vnshift,vwsll")) + "sifive_7_vcq,sifive_7_va*7") + +(define_insn_reservation "sifive_7_vec_div" 16 + (and (eq_attr "tune" "sifive_7") + (eq_attr "type" "vidiv,vfdiv")) + "sifive_7_vcq,sifive_7_va*15") + +(define_insn_reservation "sifive_7_vec_fixed_point" 8 + (and (eq_attr "tune" "sifive_7") + (eq_attr "type" "vsalu,vaalu,vsmul,vsshift")) + "sifive_7_vcq,sifive_7_va*7") + +(define_insn_reservation "sifive_7_vec_narrow_fixed_point" 8 + (and (eq_attr "tune" "sifive_7") + (eq_attr "type" "vnclip")) + "sifive_7_vcq,sifive_7_va*7") + +(define_insn_reservation "sifive_7_vec_fsimple" 4 + (and (eq_attr "tune" "sifive_7") + (eq_attr "type" "vfmovvf,vfmovfv,vfclass")) + "sifive_7_vcq,sifive_7_va*3") + +(define_insn_reservation "sifive_7_vec_falu" 8 + (and (eq_attr "tune" "sifive_7") + (eq_attr "type" "vfalu,vfmul,vfmuladd,vfrecp, + vfcvtitof,vfcvtftoi,vfmerge,vfmov,vfsgnj")) + "sifive_7_vcq,sifive_7_va*7") + +(define_insn_reservation "sifive_7_vec_fcmp" 4 + (and (eq_attr "tune" "sifive_7") + (eq_attr "type" "vfcmp,vfminmax")) + "sifive_7_vcq,sifive_7_va*3") + +(define_insn_reservation "sifive_7_vec_fsqrt_fdiv" 16 + (and (eq_attr "tune" "sifive_7") + (eq_attr "type" "vfsqrt,vfdiv")) + "sifive_7_vcq,sifive_7_va*15") + +(define_insn_reservation "sifive_7_vec_fwalu" 8 + (and (eq_attr "tune" "sifive_7") + (eq_attr "type" "vfwalu,vfwmul,vfwmuladd,vfwmaccbf16,vfwcvtitof, + vfwcvtftoi,vfwcvtftof,vfwcvtbf16, + vfncvtitof,vfncvtftoi,vfncvtftof,vfncvtbf16, + sf_vfnrclip,sf_vqmacc")) + "sifive_7_vcq,sifive_7_va*7") + +(define_insn_reservation "sifive_7_vec_red" 12 + (and (eq_attr "tune" "sifive_7") + (eq_attr "type" "vired,vfredu,vfredo,viwred,vfwredu,vfwredo")) + "sifive_7_vcq,sifive_7_va*11") + +(define_insn_reservation "sifive_7_vec_mask" 4 + (and (eq_attr "tune" "sifive_7") + (eq_attr "type" "vmalu,vmpop,vmffs,vmsfs")) + "sifive_7_vcq,sifive_7_va*3") + +(define_insn_reservation "sifive_7_vec_mask_special" 4 + (and (eq_attr "tune" "sifive_7") + (eq_attr "type" "vmiota,vmidx")) + "sifive_7_vcq,sifive_7_va*3") + +(define_insn_reservation "sifive_7_vec_gather" 8 + (and (eq_attr "tune" "sifive_7") + (eq_attr "type" "vgather")) + "sifive_7_vcq,sifive_7_va*7") + +(define_insn_reservation "sifive_7_vec_compress" 16 + (and (eq_attr "tune" "sifive_7") + (eq_attr "type" "vcompress")) + "sifive_7_vcq,sifive_7_va*15") + +(define_insn_reservation "sifive_7_vec_slide" 4 + (and (eq_attr "tune" "sifive_7") + (eq_attr "type" "vslideup,vslidedown,vislide1up,vislide1down,vfslide1up,vfslide1down")) + "sifive_7_vcq,sifive_7_va*3") + +;; Assume that's slow if it's unknown instruction vector type. +(define_insn_reservation "sifive_7_vec_unknown" 16 + (and (eq_attr "tune" "sifive_7") + (eq_attr "type" "vector,vclmul,vclmulh,vghsh,vgmul, + vaesef,vaesem,vaesdf,vaesdm,vaeskf1,vaeskf2, + vaesz,vsha2ms,vsha2ch,vsha2cl, + vsm4k,vsm4r,vsm3me,vsm3c,sf_vc,sf_vc_se")) + "sifive_7_vcq,sifive_7_va*15") diff --git a/gcc/config/riscv/sifive-vector-builtins-bases.cc b/gcc/config/riscv/sifive-vector-builtins-bases.cc index 85e1b6f..be530ca 100644 --- a/gcc/config/riscv/sifive-vector-builtins-bases.cc +++ b/gcc/config/riscv/sifive-vector-builtins-bases.cc @@ -195,12 +195,89 @@ public: } }; +/* Implements SiFive sf.vc. */ +class sf_vc : public function_base +{ +public: + + unsigned int call_properties (const function_instance &) const override + { + return CP_USE_COPROCESSORS; + } + + rtx expand (function_expander &e) const override + { + switch (e.op_info->op) + { + case OP_TYPE_x: + return e.use_exact_insn (code_for_sf_vc_x_se (e.vector_mode ())); + case OP_TYPE_i: + return e.use_exact_insn (code_for_sf_vc_i_se (e.vector_mode ())); + case OP_TYPE_vv: + return e.use_exact_insn (code_for_sf_vc_vv_se (e.vector_mode ())); + case OP_TYPE_xv: + return e.use_exact_insn (code_for_sf_vc_xv_se (e.vector_mode ())); + case OP_TYPE_iv: + return e.use_exact_insn (code_for_sf_vc_iv_se (e.vector_mode ())); + case OP_TYPE_fv: + return e.use_exact_insn (code_for_sf_vc_fv_se (e.vector_mode ())); + case OP_TYPE_v_x: + return e.use_exact_insn (code_for_sf_vc_v_x_se (e.vector_mode ())); + case OP_TYPE_v_i: + return e.use_exact_insn (code_for_sf_vc_v_i_se (e.vector_mode ())); + case OP_TYPE_v_vv: + return e.use_exact_insn (code_for_sf_vc_v_vv_se (e.vector_mode ())); + case OP_TYPE_v_xv: + return e.use_exact_insn (code_for_sf_vc_v_xv_se (e.vector_mode ())); + case OP_TYPE_v_iv: + return e.use_exact_insn (code_for_sf_vc_v_iv_se (e.vector_mode ())); + case OP_TYPE_v_fv: + return e.use_exact_insn (code_for_sf_vc_v_fv_se (e.vector_mode ())); + case OP_TYPE_vvv: + return e.use_exact_insn (code_for_sf_vc_vvv_se (e.vector_mode ())); + case OP_TYPE_xvv: + return e.use_exact_insn (code_for_sf_vc_xvv_se (e.vector_mode ())); + case OP_TYPE_ivv: + return e.use_exact_insn (code_for_sf_vc_ivv_se (e.vector_mode ())); + case OP_TYPE_fvv: + return e.use_exact_insn (code_for_sf_vc_fvv_se (e.vector_mode ())); + case OP_TYPE_vvw: + return e.use_exact_insn (code_for_sf_vc_vvw_se (e.vector_mode ())); + case OP_TYPE_xvw: + return e.use_exact_insn (code_for_sf_vc_xvw_se (e.vector_mode ())); + case OP_TYPE_ivw: + return e.use_exact_insn (code_for_sf_vc_ivw_se (e.vector_mode ())); + case OP_TYPE_fvw: + return e.use_exact_insn (code_for_sf_vc_fvw_se (e.vector_mode ())); + case OP_TYPE_v_vvv: + return e.use_exact_insn (code_for_sf_vc_v_vvv_se (e.vector_mode ())); + case OP_TYPE_v_xvv: + return e.use_exact_insn (code_for_sf_vc_v_xvv_se (e.vector_mode ())); + case OP_TYPE_v_ivv: + return e.use_exact_insn (code_for_sf_vc_v_ivv_se (e.vector_mode ())); + case OP_TYPE_v_fvv: + return e.use_exact_insn (code_for_sf_vc_v_fvv_se (e.vector_mode ())); + case OP_TYPE_v_vvw: + return e.use_exact_insn (code_for_sf_vc_v_vvw_se (e.vector_mode ())); + case OP_TYPE_v_xvw: + return e.use_exact_insn (code_for_sf_vc_v_xvw_se (e.vector_mode ())); + case OP_TYPE_v_ivw: + return e.use_exact_insn (code_for_sf_vc_v_ivw_se (e.vector_mode ())); + case OP_TYPE_v_fvw: + return e.use_exact_insn (code_for_sf_vc_v_fvw_se (e.vector_mode ())); + default: + gcc_unreachable (); + } + } +}; + static CONSTEXPR const sf_vqmacc sf_vqmacc_obj; static CONSTEXPR const sf_vqmaccu sf_vqmaccu_obj; static CONSTEXPR const sf_vqmaccsu sf_vqmaccsu_obj; static CONSTEXPR const sf_vqmaccus sf_vqmaccus_obj; static CONSTEXPR const sf_vfnrclip_x_f_qf<UNSPEC_SF_VFNRCLIP> sf_vfnrclip_x_f_qf_obj; static CONSTEXPR const sf_vfnrclip_xu_f_qf<UNSPEC_SF_VFNRCLIPU> sf_vfnrclip_xu_f_qf_obj; +static CONSTEXPR const sf_vc sf_vc_obj; /* Declare the function base NAME, pointing it to an instance of class <NAME>_obj. */ @@ -213,4 +290,5 @@ BASE (sf_vqmaccsu) BASE (sf_vqmaccus) BASE (sf_vfnrclip_x_f_qf) BASE (sf_vfnrclip_xu_f_qf) +BASE (sf_vc) } // end namespace riscv_vector diff --git a/gcc/config/riscv/sifive-vector-builtins-bases.h b/gcc/config/riscv/sifive-vector-builtins-bases.h index 69e5540..4ec1e30 100644 --- a/gcc/config/riscv/sifive-vector-builtins-bases.h +++ b/gcc/config/riscv/sifive-vector-builtins-bases.h @@ -23,6 +23,8 @@ namespace riscv_vector { +static const unsigned int CP_USE_COPROCESSORS = 1U << 6; + namespace bases { extern const function_base *const sf_vqmacc; extern const function_base *const sf_vqmaccu; @@ -30,6 +32,7 @@ extern const function_base *const sf_vqmaccsu; extern const function_base *const sf_vqmaccus; extern const function_base *const sf_vfnrclip_x_f_qf; extern const function_base *const sf_vfnrclip_xu_f_qf; +extern const function_base *const sf_vc; } } // end namespace riscv_vector diff --git a/gcc/config/riscv/sifive-vector-builtins-functions.def b/gcc/config/riscv/sifive-vector-builtins-functions.def index e6621c7..f6703ae 100644 --- a/gcc/config/riscv/sifive-vector-builtins-functions.def +++ b/gcc/config/riscv/sifive-vector-builtins-functions.def @@ -55,4 +55,49 @@ DEF_RVV_FUNCTION (sf_vfnrclip_x_f_qf, sf_vfnrclip, full_preds, i_clip_qf_ops) DEF_RVV_FUNCTION (sf_vfnrclip_xu_f_qf, sf_vfnrclip, full_preds, u_clip_qf_ops) #undef REQUIRED_EXTENSIONS +#define REQUIRED_EXTENSIONS XSFVCP_EXT +DEF_RVV_FUNCTION (sf_vc, sf_vcix_se, none_preds, sf_vc_x_ops) +DEF_RVV_FUNCTION (sf_vc, sf_vcix_se, none_preds, sf_vc_i_ops) +DEF_RVV_FUNCTION (sf_vc, sf_vcix_se, none_preds, sf_vc_vv_ops) +DEF_RVV_FUNCTION (sf_vc, sf_vcix_se, none_preds, sf_vc_xv_ops) +DEF_RVV_FUNCTION (sf_vc, sf_vcix_se, none_preds, sf_vc_iv_ops) +DEF_RVV_FUNCTION (sf_vc, sf_vcix_se, none_preds, sf_vc_fv_ops) +DEF_RVV_FUNCTION (sf_vc, sf_vcix_se, none_preds, sf_vc_vvv_ops) +DEF_RVV_FUNCTION (sf_vc, sf_vcix_se, none_preds, sf_vc_xvv_ops) +DEF_RVV_FUNCTION (sf_vc, sf_vcix_se, none_preds, sf_vc_ivv_ops) +DEF_RVV_FUNCTION (sf_vc, sf_vcix_se, none_preds, sf_vc_fvv_ops) +DEF_RVV_FUNCTION (sf_vc, sf_vcix_se, none_preds, sf_vc_vvw_ops) +DEF_RVV_FUNCTION (sf_vc, sf_vcix_se, none_preds, sf_vc_xvw_ops) +DEF_RVV_FUNCTION (sf_vc, sf_vcix_se, none_preds, sf_vc_ivw_ops) +DEF_RVV_FUNCTION (sf_vc, sf_vcix_se, none_preds, sf_vc_fvw_ops) +DEF_RVV_FUNCTION (sf_vc, sf_vcix_se, none_preds, sf_vc_v_x_ops) +DEF_RVV_FUNCTION (sf_vc, sf_vcix_se, none_preds, sf_vc_v_i_ops) +DEF_RVV_FUNCTION (sf_vc, sf_vcix_se, none_preds, sf_vc_v_vv_ops) +DEF_RVV_FUNCTION (sf_vc, sf_vcix_se, none_preds, sf_vc_v_xv_ops) +DEF_RVV_FUNCTION (sf_vc, sf_vcix_se, none_preds, sf_vc_v_iv_ops) +DEF_RVV_FUNCTION (sf_vc, sf_vcix_se, none_preds, sf_vc_v_fv_ops) +DEF_RVV_FUNCTION (sf_vc, sf_vcix_se, none_preds, sf_vc_v_vvv_ops) +DEF_RVV_FUNCTION (sf_vc, sf_vcix_se, none_preds, sf_vc_v_xvv_ops) +DEF_RVV_FUNCTION (sf_vc, sf_vcix_se, none_preds, sf_vc_v_ivv_ops) +DEF_RVV_FUNCTION (sf_vc, sf_vcix_se, none_preds, sf_vc_v_fvv_ops) +DEF_RVV_FUNCTION (sf_vc, sf_vcix_se, none_preds, sf_vc_v_vvw_ops) +DEF_RVV_FUNCTION (sf_vc, sf_vcix_se, none_preds, sf_vc_v_xvw_ops) +DEF_RVV_FUNCTION (sf_vc, sf_vcix_se, none_preds, sf_vc_v_ivw_ops) +DEF_RVV_FUNCTION (sf_vc, sf_vcix_se, none_preds, sf_vc_v_fvw_ops) +DEF_RVV_FUNCTION (sf_vc, sf_vcix, none_preds, sf_vc_v_x_ops) +DEF_RVV_FUNCTION (sf_vc, sf_vcix, none_preds, sf_vc_v_i_ops) +DEF_RVV_FUNCTION (sf_vc, sf_vcix, none_preds, sf_vc_v_vv_ops) +DEF_RVV_FUNCTION (sf_vc, sf_vcix, none_preds, sf_vc_v_xv_ops) +DEF_RVV_FUNCTION (sf_vc, sf_vcix, none_preds, sf_vc_v_iv_ops) +DEF_RVV_FUNCTION (sf_vc, sf_vcix, none_preds, sf_vc_v_fv_ops) +DEF_RVV_FUNCTION (sf_vc, sf_vcix, none_preds, sf_vc_v_vvv_ops) +DEF_RVV_FUNCTION (sf_vc, sf_vcix, none_preds, sf_vc_v_xvv_ops) +DEF_RVV_FUNCTION (sf_vc, sf_vcix, none_preds, sf_vc_v_ivv_ops) +DEF_RVV_FUNCTION (sf_vc, sf_vcix, none_preds, sf_vc_v_fvv_ops) +DEF_RVV_FUNCTION (sf_vc, sf_vcix, none_preds, sf_vc_v_vvw_ops) +DEF_RVV_FUNCTION (sf_vc, sf_vcix, none_preds, sf_vc_v_xvw_ops) +DEF_RVV_FUNCTION (sf_vc, sf_vcix, none_preds, sf_vc_v_ivw_ops) +DEF_RVV_FUNCTION (sf_vc, sf_vcix, none_preds, sf_vc_v_fvw_ops) +#undef REQUIRED_EXTENSIONS + #undef DEF_RVV_FUNCTION diff --git a/gcc/config/riscv/sifive-vector.md b/gcc/config/riscv/sifive-vector.md index 2975b1e..a416634 100644 --- a/gcc/config/riscv/sifive-vector.md +++ b/gcc/config/riscv/sifive-vector.md @@ -182,3 +182,874 @@ "sf.vfnrclip.x<v_su>.f.qf\t%0,%3,%4%p1" [(set_attr "type" "sf_vfnrclip") (set_attr "mode" "<MODE>")]) + +;; SF_VCP +(define_insn "@sf_vc_x_se<mode>" + [(unspec:<VM> + [(match_operand:<VM> 0 "vector_mask_operand" " Wc1") + (match_operand 5 "vector_length_operand" " rK") + (match_operand 6 "const_int_operand" " i") + (match_operand 7 "const_int_operand" " i") + (match_operand 8 "const_int_operand" " i") + (reg:SI VL_REGNUM) + (reg:SI VTYPE_REGNUM)] UNSPEC_VPREDICATE) + (unspec_volatile:VFULLI + [(match_operand:SI 1 "const_int_operand" "Ou02") + (match_operand:SI 2 "const_int_operand" "K") + (match_operand:SI 3 "const_int_operand" "K") + (match_operand:<VEL> 4 "register_operand" "r")] UNSPECV_SF_CV)] + "TARGET_VECTOR && TARGET_XSFVCP" + "sf.vc.x\t%1,%2,%3,%4" + [(set_attr "type" "sf_vc_se") + (set_attr "mode" "<MODE>")]) + +(define_insn "@sf_vc_v_x_se<mode>" + [(set (match_operand:VFULLI 0 "register_operand" "=vr,vr") + (if_then_else:VFULLI + (unspec:<VM> + [(match_operand:<VM> 1 "vector_mask_operand" " Wc1,Wc1") + (match_operand 6 "vector_length_operand" " rK, rK") + (match_operand 7 "const_int_operand" " i, i") + (match_operand 8 "const_int_operand" " i, i") + (match_operand 9 "const_int_operand" " i, i") + (reg:SI VL_REGNUM) + (reg:SI VTYPE_REGNUM)] UNSPEC_VPREDICATE) + (unspec_volatile:VFULLI + [(match_operand:SI 3 "const_int_operand" "Ou02,Ou02") + (match_operand:SI 4 "const_int_operand" "K,K") + (match_operand:<VEL> 5 "register_operand" "r,r")] UNSPECV_SF_CV) + (match_operand:VFULLI 2 "vector_merge_operand" "vu,vu")))] + "TARGET_VECTOR && TARGET_XSFVCP" + "sf.vc.v.x\t%3,%4,%0,%5" + [(set_attr "type" "sf_vc_se") + (set_attr "mode" "<MODE>")]) + +(define_insn "@sf_vc_v_x<mode>" + [(set (match_operand:VFULLI 0 "register_operand" "=vr,vr") + (if_then_else:VFULLI + (unspec:<VM> + [(match_operand:<VM> 1 "vector_mask_operand" " Wc1,Wc1") + (match_operand 6 "vector_length_operand" " rK, rK") + (match_operand 7 "const_int_operand" " i, i") + (match_operand 8 "const_int_operand" " i, i") + (match_operand 9 "const_int_operand" " i, i") + (reg:SI VL_REGNUM) + (reg:SI VTYPE_REGNUM)] UNSPEC_VPREDICATE) + (unspec:VFULLI + [(match_operand:SI 3 "const_int_operand" "Ou02,Ou02") + (match_operand:SI 4 "const_int_operand" "K,K") + (match_operand:<VEL> 5 "register_operand" "r,r")] UNSPEC_SF_CV) + (match_operand:VFULLI 2 "vector_merge_operand" "vu,vu")))] + "TARGET_VECTOR && TARGET_XSFVCP" + "sf.vc.v.x\t%3,%4,%0,%5" + [(set_attr "type" "sf_vc") + (set_attr "mode" "<MODE>")]) + +(define_insn "@sf_vc_i_se<mode>" + [(unspec:<VM> + [(match_operand:<VM> 0 "vector_mask_operand" " Wc1") + (match_operand 5 "vector_length_operand" " rK") + (match_operand 6 "const_int_operand" " i") + (match_operand 7 "const_int_operand" " i") + (match_operand 8 "const_int_operand" " i") + (reg:SI VL_REGNUM) + (reg:SI VTYPE_REGNUM)] UNSPEC_VPREDICATE) + (unspec_volatile:VFULLI + [(match_operand:SI 1 "const_int_operand" "Ou02") + (match_operand:SI 2 "const_int_operand" "K") + (match_operand:SI 3 "const_int_operand" "K") + (match_operand:SI 4 "const_int_operand" "P")] UNSPECV_SF_CV)] + "TARGET_VECTOR && TARGET_XSFVCP" + "sf.vc.i\t%1,%2,%3,%4" + [(set_attr "type" "sf_vc_se") + (set_attr "mode" "<MODE>")]) + +(define_insn "@sf_vc_v_i_se<mode>" + [(set (match_operand:VFULLI 0 "register_operand" "=vr,vr") + (if_then_else:VFULLI + (unspec:<VM> + [(match_operand:<VM> 1 "vector_mask_operand" " Wc1,Wc1") + (match_operand 6 "vector_length_operand" " rK, rK") + (match_operand 7 "const_int_operand" " i, i") + (match_operand 8 "const_int_operand" " i, i") + (match_operand 9 "const_int_operand" " i, i") + (reg:SI VL_REGNUM) + (reg:SI VTYPE_REGNUM)] UNSPEC_VPREDICATE) + (unspec_volatile:VFULLI + [(match_operand:SI 3 "const_int_operand" "Ou02,Ou02") + (match_operand:SI 4 "const_int_operand" "K,K") + (match_operand:SI 5 "const_int_operand" "P,P")] UNSPECV_SF_CV) + (match_operand:VFULLI 2 "vector_merge_operand" "vu,vu")))] + "TARGET_VECTOR && TARGET_XSFVCP" + "sf.vc.v.i\t%3,%4,%0,%5" + [(set_attr "type" "sf_vc_se") + (set_attr "mode" "<MODE>")]) + +(define_insn "@sf_vc_v_i<mode>" + [(set (match_operand:VFULLI 0 "register_operand" "=vr,vr") + (if_then_else:VFULLI + (unspec:<VM> + [(match_operand:<VM> 1 "vector_mask_operand" " Wc1,Wc1") + (match_operand 6 "vector_length_operand" " rK, rK") + (match_operand 7 "const_int_operand" " i, i") + (match_operand 8 "const_int_operand" " i, i") + (match_operand 9 "const_int_operand" " i, i") + (reg:SI VL_REGNUM) + (reg:SI VTYPE_REGNUM)] UNSPEC_VPREDICATE) + (unspec:VFULLI + [(match_operand:SI 3 "const_int_operand" "Ou02,Ou02") + (match_operand:SI 4 "const_int_operand" "K,K") + (match_operand:SI 5 "const_int_operand" "P,P")] UNSPEC_SF_CV) + (match_operand:VFULLI 2 "vector_merge_operand" "vu,vu")))] + "TARGET_VECTOR && TARGET_XSFVCP" + "sf.vc.v.i\t%3,%4,%0,%5" + [(set_attr "type" "sf_vc") + (set_attr "mode" "<MODE>")]) + +(define_insn "@sf_vc_vv_se<mode>" + [(unspec:<VM> + [(match_operand:<VM> 0 "vector_mask_operand" " Wc1") + (match_operand 5 "vector_length_operand" " rK") + (match_operand 6 "const_int_operand" " i") + (match_operand 7 "const_int_operand" " i") + (match_operand 8 "const_int_operand" " i") + (reg:SI VL_REGNUM) + (reg:SI VTYPE_REGNUM)] UNSPEC_VPREDICATE) + (unspec_volatile:VFULLI + [(match_operand:SI 1 "const_int_operand" "Ou02") + (match_operand:SI 2 "const_int_operand" "K") + (match_operand:VFULLI 3 "register_operand" "vr") + (match_operand:VFULLI 4 "register_operand" "vr")] UNSPECV_SF_CV)] + "TARGET_VECTOR && TARGET_XSFVCP" + "sf.vc.vv\t%1,%2,%3,%4" + [(set_attr "type" "sf_vc_se") + (set_attr "mode" "<MODE>")]) + +(define_insn "@sf_vc_v_vv_se<mode>" + [(set (match_operand:VFULLI 0 "register_operand" "=&vr,vr") + (if_then_else:VFULLI + (unspec:<VM> + [(match_operand:<VM> 1 "vector_mask_operand" " Wc1,Wc1") + (match_operand 6 "vector_length_operand" " rK, rK") + (match_operand 7 "const_int_operand" " i, i") + (match_operand 8 "const_int_operand" " i, i") + (match_operand 9 "const_int_operand" " i, i") + (reg:SI VL_REGNUM) + (reg:SI VTYPE_REGNUM)] UNSPEC_VPREDICATE) + (unspec_volatile:VFULLI + [(match_operand:SI 3 "const_int_operand" "Ou02,Ou02") + (match_operand:VFULLI 4 "register_operand" "vr,vr") + (match_operand:VFULLI 5 "register_operand" "vr,vr")] UNSPECV_SF_CV) + (match_operand:VFULLI 2 "vector_merge_operand" "vu,vu")))] + "TARGET_VECTOR && TARGET_XSFVCP" + "sf.vc.v.vv\t%3,%0,%4,%5" + [(set_attr "type" "sf_vc_se") + (set_attr "mode" "<MODE>")]) + +(define_insn "@sf_vc_v_vv<mode>" + [(set (match_operand:VFULLI 0 "register_operand" "=&vr,vr") + (if_then_else:VFULLI + (unspec:<VM> + [(match_operand:<VM> 1 "vector_mask_operand" " Wc1,Wc1") + (match_operand 6 "vector_length_operand" " rK, rK") + (match_operand 7 "const_int_operand" " i, i") + (match_operand 8 "const_int_operand" " i, i") + (match_operand 9 "const_int_operand" " i, i") + (reg:SI VL_REGNUM) + (reg:SI VTYPE_REGNUM)] UNSPEC_VPREDICATE) + (unspec:VFULLI + [(match_operand:SI 3 "const_int_operand" "Ou02,Ou02") + (match_operand:VFULLI 4 "register_operand" "vr,vr") + (match_operand:VFULLI 5 "register_operand" "vr,vr")] UNSPEC_SF_CV) + (match_operand:VFULLI 2 "vector_merge_operand" "vu,vu")))] + "TARGET_VECTOR && TARGET_XSFVCP" + "sf.vc.v.vv\t%3,%0,%4,%5" + [(set_attr "type" "sf_vc") + (set_attr "mode" "<MODE>")]) + +(define_insn "@sf_vc_xv_se<mode>" + [(unspec:<VM> + [(match_operand:<VM> 0 "vector_mask_operand" " Wc1") + (match_operand 5 "vector_length_operand" " rK") + (match_operand 6 "const_int_operand" " i") + (match_operand 7 "const_int_operand" " i") + (match_operand 8 "const_int_operand" " i") + (reg:SI VL_REGNUM) + (reg:SI VTYPE_REGNUM)] UNSPEC_VPREDICATE) + (unspec_volatile:VFULLI + [(match_operand:SI 1 "const_int_operand" "Ou02") + (match_operand:SI 2 "const_int_operand" "K") + (match_operand:VFULLI 3 "register_operand" "vr") + (match_operand:<VEL> 4 "register_operand" "r")] UNSPECV_SF_CV)] + "TARGET_VECTOR && TARGET_XSFVCP" + "sf.vc.xv\t%1,%2,%3,%4" + [(set_attr "type" "sf_vc_se") + (set_attr "mode" "<MODE>")]) + +(define_insn "@sf_vc_v_xv_se<mode>" + [(set (match_operand:VFULLI 0 "register_operand" "=&vd,vd") + (if_then_else:VFULLI + (unspec:<VM> + [(match_operand:<VM> 1 "vector_mask_operand" " Wc1,Wc1") + (match_operand 6 "vector_length_operand" " rK, rK") + (match_operand 7 "const_int_operand" " i, i") + (match_operand 8 "const_int_operand" " i, i") + (match_operand 9 "const_int_operand" " i, i") + (reg:SI VL_REGNUM) + (reg:SI VTYPE_REGNUM)] UNSPEC_VPREDICATE) + (unspec_volatile:VFULLI + [(match_operand:SI 3 "const_int_operand" "Ou02,Ou02") + (match_operand:VFULLI 4 "register_operand" "vr,vr") + (match_operand:<VEL> 5 "register_operand" "r,r")] UNSPECV_SF_CV) + (match_operand:VFULLI 2 "vector_merge_operand" "vu,vu")))] + "TARGET_VECTOR && TARGET_XSFVCP" + "sf.vc.v.xv\t%3,%0,%4,%5" + [(set_attr "type" "sf_vc_se") + (set_attr "mode" "<MODE>")]) + +(define_insn "@sf_vc_v_xv<mode>" + [(set (match_operand:VFULLI 0 "register_operand" "=&vd,vd") + (if_then_else:VFULLI + (unspec:<VM> + [(match_operand:<VM> 1 "vector_mask_operand" " Wc1,Wc1") + (match_operand 6 "vector_length_operand" " rK, rK") + (match_operand 7 "const_int_operand" " i, i") + (match_operand 8 "const_int_operand" " i, i") + (match_operand 9 "const_int_operand" " i, i") + (reg:SI VL_REGNUM) + (reg:SI VTYPE_REGNUM)] UNSPEC_VPREDICATE) + (unspec:VFULLI + [(match_operand:SI 3 "const_int_operand" "Ou02,Ou02") + (match_operand:VFULLI 4 "register_operand" "vr,vr") + (match_operand:<VEL> 5 "register_operand" "r,r")] UNSPEC_SF_CV) + (match_operand:VFULLI 2 "vector_merge_operand" "vu,vu")))] + "TARGET_VECTOR && TARGET_XSFVCP" + "sf.vc.v.xv\t%3,%0,%4,%5" + [(set_attr "type" "sf_vc") + (set_attr "mode" "<MODE>")]) + +(define_insn "@sf_vc_iv_se<mode>" + [(unspec:<VM> + [(match_operand:<VM> 0 "vector_mask_operand" " Wc1") + (match_operand 5 "vector_length_operand" " rK") + (match_operand 6 "const_int_operand" " i") + (match_operand 7 "const_int_operand" " i") + (match_operand 8 "const_int_operand" " i") + (reg:SI VL_REGNUM) + (reg:SI VTYPE_REGNUM)] UNSPEC_VPREDICATE) + (unspec_volatile:VFULLI + [(match_operand:SI 1 "const_int_operand" "Ou02") + (match_operand:SI 2 "const_int_operand" "K") + (match_operand:VFULLI 3 "register_operand" "vr") + (match_operand:SI 4 "const_int_operand" "P")] UNSPECV_SF_CV)] + "TARGET_VECTOR && TARGET_XSFVCP" + "sf.vc.iv\t%1,%2,%3,%4" + [(set_attr "type" "sf_vc_se") + (set_attr "mode" "<MODE>")]) + +(define_insn "@sf_vc_v_iv_se<mode>" + [(set (match_operand:VFULLI 0 "register_operand" "=&vd,vd") + (if_then_else:VFULLI + (unspec:<VM> + [(match_operand:<VM> 1 "vector_mask_operand" " Wc1,Wc1") + (match_operand 6 "vector_length_operand" " rK, rK") + (match_operand 7 "const_int_operand" " i, i") + (match_operand 8 "const_int_operand" " i, i") + (match_operand 9 "const_int_operand" " i, i") + (reg:SI VL_REGNUM) + (reg:SI VTYPE_REGNUM)] UNSPEC_VPREDICATE) + (unspec_volatile:VFULLI + [(match_operand:SI 3 "const_int_operand" "Ou02,Ou02") + (match_operand:VFULLI 4 "register_operand" "vr,vr") + (match_operand:SI 5 "const_int_operand" "P,P")] UNSPECV_SF_CV) + (match_operand:VFULLI 2 "vector_merge_operand" "vu,vu")))] + "TARGET_VECTOR && TARGET_XSFVCP" + "sf.vc.v.iv\t%3,%0,%4,%5" + [(set_attr "type" "sf_vc_se") + (set_attr "mode" "<MODE>")]) + +(define_insn "@sf_vc_v_iv<mode>" + [(set (match_operand:VFULLI 0 "register_operand" "=&vd,vd") + (if_then_else:VFULLI + (unspec:<VM> + [(match_operand:<VM> 1 "vector_mask_operand" " Wc1,Wc1") + (match_operand 6 "vector_length_operand" " rK, rK") + (match_operand 7 "const_int_operand" " i, i") + (match_operand 8 "const_int_operand" " i, i") + (match_operand 9 "const_int_operand" " i, i") + (reg:SI VL_REGNUM) + (reg:SI VTYPE_REGNUM)] UNSPEC_VPREDICATE) + (unspec:VFULLI + [(match_operand:SI 3 "const_int_operand" "Ou02,Ou02") + (match_operand:VFULLI 4 "register_operand" "vr,vr") + (match_operand:SI 5 "const_int_operand" "P,P")] UNSPEC_SF_CV) + (match_operand:VFULLI 2 "vector_merge_operand" "vu,vu")))] + "TARGET_VECTOR && TARGET_XSFVCP" + "sf.vc.v.iv\t%3,%0,%4,%5" + [(set_attr "type" "sf_vc") + (set_attr "mode" "<MODE>")]) + +(define_insn "@sf_vc_fv_se<mode>" + [(unspec:<VM> + [(match_operand:<VM> 0 "vector_mask_operand" " Wc1") + (match_operand 5 "vector_length_operand" " rK") + (match_operand 6 "const_int_operand" " i") + (match_operand 7 "const_int_operand" " i") + (match_operand 8 "const_int_operand" " i") + (reg:SI VL_REGNUM) + (reg:SI VTYPE_REGNUM)] UNSPEC_VPREDICATE) + (unspec_volatile:SF_FV + [(match_operand:SI 1 "const_int_operand" "Ou01") + (match_operand:SI 2 "const_int_operand" "K") + (match_operand:SF_FV 3 "register_operand" "vr") + (match_operand:<SF_XF> 4 "register_operand" "f")] UNSPECV_SF_CV)] + "TARGET_VECTOR && TARGET_XSFVCP" + "sf.vc.fv\t%1,%2,%3,%4" + [(set_attr "type" "sf_vc_se") + (set_attr "mode" "<MODE>")]) + +(define_insn "@sf_vc_v_fv_se<mode>" + [(set (match_operand:SF_FV 0 "register_operand" "=&vd,vd") + (if_then_else:SF_FV + (unspec:<VM> + [(match_operand:<VM> 1 "vector_mask_operand" " Wc1,Wc1") + (match_operand 6 "vector_length_operand" " rK, rK") + (match_operand 7 "const_int_operand" " i, i") + (match_operand 8 "const_int_operand" " i, i") + (match_operand 9 "const_int_operand" " i, i") + (reg:SI VL_REGNUM) + (reg:SI VTYPE_REGNUM)] UNSPEC_VPREDICATE) + (unspec_volatile:SF_FV + [(match_operand:SI 3 "const_int_operand" "Ou01,Ou01") + (match_operand:SF_FV 4 "register_operand" "vr,vr") + (match_operand:<SF_XF> 5 "register_operand" "f,f")] UNSPECV_SF_CV) + (match_operand:SF_FV 2 "vector_merge_operand" "vu,vu")))] + "TARGET_VECTOR && TARGET_XSFVCP" + "sf.vc.v.fv\t%3,%0,%4,%5" + [(set_attr "type" "sf_vc_se") + (set_attr "mode" "<MODE>")]) + +(define_insn "@sf_vc_v_fv<mode>" + [(set (match_operand:SF_FV 0 "register_operand" "=&vd,vd") + (if_then_else:SF_FV + (unspec:<VM> + [(match_operand:<VM> 1 "vector_mask_operand" " Wc1,Wc1") + (match_operand 6 "vector_length_operand" " rK, rK") + (match_operand 7 "const_int_operand" " i, i") + (match_operand 8 "const_int_operand" " i, i") + (match_operand 9 "const_int_operand" " i, i") + (reg:SI VL_REGNUM) + (reg:SI VTYPE_REGNUM)] UNSPEC_VPREDICATE) + (unspec:SF_FV + [(match_operand:SI 3 "const_int_operand" "Ou01,Ou01") + (match_operand:SF_FV 4 "register_operand" "vr,vr") + (match_operand:<SF_XF> 5 "register_operand" "f,f")] UNSPEC_SF_CV) + (match_operand:SF_FV 2 "vector_merge_operand" "vu,vu")))] + "TARGET_VECTOR && TARGET_XSFVCP" + "sf.vc.v.fv\t%3,%0,%4,%5" + [(set_attr "type" "sf_vc") + (set_attr "mode" "<MODE>")]) + +(define_insn "@sf_vc_vvv_se<mode>" + [(unspec:<VM> + [(match_operand:<VM> 0 "vector_mask_operand" "vmWc1") + (match_operand 5 "vector_length_operand" " rK") + (match_operand 6 "const_int_operand" " i") + (match_operand 7 "const_int_operand" " i") + (match_operand 8 "const_int_operand" " i") + (reg:SI VL_REGNUM) + (reg:SI VTYPE_REGNUM)] UNSPEC_VPREDICATE) + (unspec_volatile:VFULLI + [(match_operand:SI 1 "const_int_operand" "Ou02") + (match_operand:VFULLI 2 "register_operand" "vd") + (match_operand:VFULLI 3 "register_operand" "vr") + (match_operand:VFULLI 4 "register_operand" "vr")] UNSPECV_SF_CV)] + "TARGET_VECTOR && TARGET_XSFVCP" + "sf.vc.vvv\t%1,%2,%3,%4" + [(set_attr "type" "sf_vc_se") + (set_attr "mode" "<MODE>")]) + +(define_insn "@sf_vc_v_vvv_se<mode>" + [(set (match_operand:VFULLI 0 "register_operand" "=&vr,vr") + (if_then_else:VFULLI + (unspec:<VM> + [(match_operand:<VM> 1 "vector_mask_operand" " Wc1,Wc1") + (match_operand 7 "vector_length_operand" " rK, rK") + (match_operand 8 "const_int_operand" " i, i") + (match_operand 9 "const_int_operand" " i, i") + (match_operand 10 "const_int_operand" " i, i") + (reg:SI VL_REGNUM) + (reg:SI VTYPE_REGNUM)] UNSPEC_VPREDICATE) + (unspec_volatile:VFULLI + [(match_operand:SI 3 "const_int_operand" "Ou02,Ou02") + (match_operand:VFULLI 4 "register_operand" "vd,vd") + (match_operand:VFULLI 5 "register_operand" "vr,vr") + (match_operand:VFULLI 6 "register_operand" "vr,vr")] UNSPECV_SF_CV) + (match_operand:VFULLI 2 "vector_merge_operand" "vu,vu")))] + "TARGET_VECTOR && TARGET_XSFVCP" + "sf.vc.v.vvv\t%3,%4,%6,%5" + [(set_attr "type" "sf_vc_se") + (set_attr "mode" "<MODE>")]) + +(define_insn "@sf_vc_v_vvv<mode>" + [(set (match_operand:VFULLI 0 "register_operand" "=&vr,vr") + (if_then_else:VFULLI + (unspec:<VM> + [(match_operand:<VM> 1 "vector_mask_operand" " Wc1,Wc1") + (match_operand 7 "vector_length_operand" " rK, rK") + (match_operand 8 "const_int_operand" " i, i") + (match_operand 9 "const_int_operand" " i, i") + (match_operand 10 "const_int_operand" " i, i") + (reg:SI VL_REGNUM) + (reg:SI VTYPE_REGNUM)] UNSPEC_VPREDICATE) + (unspec:VFULLI + [(match_operand:SI 3 "const_int_operand" "Ou02,Ou02") + (match_operand:VFULLI 4 "register_operand" "vd,vd") + (match_operand:VFULLI 5 "register_operand" "vr,vr") + (match_operand:VFULLI 6 "register_operand" "vr,vr")] UNSPEC_SF_CV) + (match_operand:VFULLI 2 "vector_merge_operand" "vu,vu")))] + "TARGET_VECTOR && TARGET_XSFVCP" + "sf.vc.v.vvv\t%3,%4,%6,%5" + [(set_attr "type" "sf_vc") + (set_attr "mode" "<MODE>")]) + +(define_insn "@sf_vc_xvv_se<mode>" + [(unspec:<VM> + [(match_operand:<VM> 0 "vector_mask_operand" " Wc1") + (match_operand 5 "vector_length_operand" " rK") + (match_operand 6 "const_int_operand" " i") + (match_operand 7 "const_int_operand" " i") + (match_operand 8 "const_int_operand" " i") + (reg:SI VL_REGNUM) + (reg:SI VTYPE_REGNUM)] UNSPEC_VPREDICATE) + (unspec_volatile:VFULLI + [(match_operand:SI 1 "const_int_operand" "Ou02") + (match_operand:VFULLI 2 "register_operand" "vd") + (match_operand:VFULLI 3 "register_operand" "vr") + (match_operand:<VEL> 4 "register_operand" "r")] UNSPECV_SF_CV)] + "TARGET_VECTOR && TARGET_XSFVCP" + "sf.vc.xvv\t%1,%2,%3,%4" + [(set_attr "type" "sf_vc_se") + (set_attr "mode" "<MODE>")]) + +(define_insn "@sf_vc_v_xvv_se<mode>" + [(set (match_operand:VFULLI 0 "register_operand" "=&vr,vr") + (if_then_else:VFULLI + (unspec:<VM> + [(match_operand:<VM> 1 "vector_mask_operand" " Wc1,Wc1") + (match_operand 7 "vector_length_operand" " rK, rK") + (match_operand 8 "const_int_operand" " i, i") + (match_operand 9 "const_int_operand" " i, i") + (match_operand 10 "const_int_operand" " i, i") + (reg:SI VL_REGNUM) + (reg:SI VTYPE_REGNUM)] UNSPEC_VPREDICATE) + (unspec_volatile:VFULLI + [(match_operand:SI 3 "const_int_operand" "Ou02,Ou02") + (match_operand:VFULLI 4 "register_operand" "vd,vd") + (match_operand:VFULLI 5 "register_operand" "vr,vr") + (match_operand:<VEL> 6 "register_operand" "r,r")] UNSPECV_SF_CV) + (match_operand:VFULLI 2 "vector_merge_operand" "vu,vu")))] + "TARGET_VECTOR && TARGET_XSFVCP" + "sf.vc.v.xvv\t%3,%4,%5,%6" + [(set_attr "type" "sf_vc_se") + (set_attr "mode" "<MODE>")]) + +(define_insn "@sf_vc_v_xvv<mode>" + [(set (match_operand:VFULLI 0 "register_operand" "=&vr,vr") + (if_then_else:VFULLI + (unspec:<VM> + [(match_operand:<VM> 1 "vector_mask_operand" " Wc1,Wc1") + (match_operand 7 "vector_length_operand" " rK, rK") + (match_operand 8 "const_int_operand" " i, i") + (match_operand 9 "const_int_operand" " i, i") + (match_operand 10 "const_int_operand" " i, i") + (reg:SI VL_REGNUM) + (reg:SI VTYPE_REGNUM)] UNSPEC_VPREDICATE) + (unspec:VFULLI + [(match_operand:SI 3 "const_int_operand" "Ou02,Ou02") + (match_operand:VFULLI 4 "register_operand" "vd,vd") + (match_operand:VFULLI 5 "register_operand" "vr,vr") + (match_operand:<VEL> 6 "register_operand" "r,r")] UNSPEC_SF_CV) + (match_operand:VFULLI 2 "vector_merge_operand" "vu,vu")))] + "TARGET_VECTOR && TARGET_XSFVCP" + "sf.vc.v.xvv\t%3,%4,%5,%6" + [(set_attr "type" "sf_vc") + (set_attr "mode" "<MODE>")]) + +(define_insn "@sf_vc_ivv_se<mode>" + [(unspec:<VM> + [(match_operand:<VM> 0 "vector_mask_operand" " Wc1") + (match_operand 5 "vector_length_operand" " rK") + (match_operand 6 "const_int_operand" " i") + (match_operand 7 "const_int_operand" " i") + (match_operand 8 "const_int_operand" " i") + (reg:SI VL_REGNUM) + (reg:SI VTYPE_REGNUM)] UNSPEC_VPREDICATE) + (unspec_volatile:VFULLI + [(match_operand:SI 1 "const_int_operand" "Ou02") + (match_operand:VFULLI 2 "register_operand" "vd") + (match_operand:VFULLI 3 "register_operand" "vr") + (match_operand:SI 4 "const_int_operand" "P")] UNSPECV_SF_CV)] + "TARGET_VECTOR && TARGET_XSFVCP" + "sf.vc.ivv\t%1,%2,%3,%4" + [(set_attr "type" "sf_vc_se") + (set_attr "mode" "<MODE>")]) + +(define_insn "@sf_vc_v_ivv_se<mode>" + [(set (match_operand:VFULLI 0 "register_operand" "=&vr,vr") + (if_then_else:VFULLI + (unspec:<VM> + [(match_operand:<VM> 1 "vector_mask_operand" " Wc1,Wc1") + (match_operand 7 "vector_length_operand" " rK, rK") + (match_operand 8 "const_int_operand" " i, i") + (match_operand 9 "const_int_operand" " i, i") + (match_operand 10 "const_int_operand" " i, i") + (reg:SI VL_REGNUM) + (reg:SI VTYPE_REGNUM)] UNSPEC_VPREDICATE) + (unspec_volatile:VFULLI + [(match_operand:SI 3 "const_int_operand" "Ou02,Ou02") + (match_operand:VFULLI 4 "register_operand" "vd,vd") + (match_operand:VFULLI 5 "register_operand" "vr,vr") + (match_operand:SI 6 "const_int_operand" "P,P")] UNSPECV_SF_CV) + (match_operand:VFULLI 2 "vector_merge_operand" "vu,vu")))] + "TARGET_VECTOR && TARGET_XSFVCP" + "sf.vc.v.ivv\t%3,%4,%5,%6" + [(set_attr "type" "sf_vc_se") + (set_attr "mode" "<MODE>")]) + +(define_insn "@sf_vc_v_ivv<mode>" + [(set (match_operand:VFULLI 0 "register_operand" "=&vr,vr") + (if_then_else:VFULLI + (unspec:<VM> + [(match_operand:<VM> 1 "vector_mask_operand" " Wc1,Wc1") + (match_operand 7 "vector_length_operand" " rK, rK") + (match_operand 8 "const_int_operand" " i, i") + (match_operand 9 "const_int_operand" " i, i") + (match_operand 10 "const_int_operand" " i, i") + (reg:SI VL_REGNUM) + (reg:SI VTYPE_REGNUM)] UNSPEC_VPREDICATE) + (unspec:VFULLI + [(match_operand:SI 3 "const_int_operand" "Ou02,Ou02") + (match_operand:VFULLI 4 "register_operand" "vd,vd") + (match_operand:VFULLI 5 "register_operand" "vr,vr") + (match_operand:SI 6 "const_int_operand" "P,P")] UNSPEC_SF_CV) + (match_operand:VFULLI 2 "vector_merge_operand" "vu,vu")))] + "TARGET_VECTOR && TARGET_XSFVCP" + "sf.vc.v.ivv\t%3,%4,%5,%6" + [(set_attr "type" "sf_vc") + (set_attr "mode" "<MODE>")]) + +(define_insn "@sf_vc_fvv_se<mode>" + [(unspec:<VM> + [(match_operand:<VM> 0 "vector_mask_operand" " Wc1") + (match_operand 5 "vector_length_operand" " rK") + (match_operand 6 "const_int_operand" " i") + (match_operand 7 "const_int_operand" " i") + (match_operand 8 "const_int_operand" " i") + (reg:SI VL_REGNUM) + (reg:SI VTYPE_REGNUM)] UNSPEC_VPREDICATE) + (unspec_volatile:SF_FV + [(match_operand:SI 1 "const_int_operand" "Ou01") + (match_operand:SF_FV 2 "register_operand" "vd") + (match_operand:SF_FV 3 "register_operand" "vr") + (match_operand:<SF_XF> 4 "register_operand" "f")] UNSPECV_SF_CV)] + "TARGET_VECTOR && TARGET_XSFVCP" + "sf.vc.fvv\t%1,%2,%3,%4" + [(set_attr "type" "sf_vc_se") + (set_attr "mode" "<MODE>")]) + +(define_insn "@sf_vc_v_fvv_se<mode>" + [(set (match_operand:SF_FV 0 "register_operand" "=&vr,vr") + (if_then_else:SF_FV + (unspec:<VM> + [(match_operand:<VM> 1 "vector_mask_operand" " Wc1,Wc1") + (match_operand 7 "vector_length_operand" " rK, rK") + (match_operand 8 "const_int_operand" " i, i") + (match_operand 9 "const_int_operand" " i, i") + (match_operand 10 "const_int_operand" " i, i") + (reg:SI VL_REGNUM) + (reg:SI VTYPE_REGNUM)] UNSPEC_VPREDICATE) + (unspec_volatile:SF_FV + [(match_operand:SI 3 "const_int_operand" "Ou01,Ou01") + (match_operand:SF_FV 4 "register_operand" "vd,vd") + (match_operand:SF_FV 5 "register_operand" "vr,vr") + (match_operand:<SF_XF> 6 "register_operand" "f,f")] UNSPECV_SF_CV) + (match_operand:SF_FV 2 "vector_merge_operand" "vu,vu")))] + "TARGET_VECTOR && TARGET_XSFVCP" + "sf.vc.v.fvv\t%3,%4,%5,%6" + [(set_attr "type" "sf_vc_se") + (set_attr "mode" "<MODE>")]) + +(define_insn "@sf_vc_v_fvv<mode>" + [(set (match_operand:SF_FV 0 "register_operand" "=&vr,vr") + (if_then_else:SF_FV + (unspec:<VM> + [(match_operand:<VM> 1 "vector_mask_operand" " Wc1,Wc1") + (match_operand 7 "vector_length_operand" " rK, rK") + (match_operand 8 "const_int_operand" " i, i") + (match_operand 9 "const_int_operand" " i, i") + (match_operand 10 "const_int_operand" " i, i") + (reg:SI VL_REGNUM) + (reg:SI VTYPE_REGNUM)] UNSPEC_VPREDICATE) + (unspec:SF_FV + [(match_operand:SI 3 "const_int_operand" "Ou01,Ou01") + (match_operand:SF_FV 4 "register_operand" "vd,vd") + (match_operand:SF_FV 5 "register_operand" "vr,vr") + (match_operand:<SF_XF> 6 "register_operand" "f,f")] UNSPEC_SF_CV) + (match_operand:SF_FV 2 "vector_merge_operand" "vu,vu")))] + "TARGET_VECTOR && TARGET_XSFVCP" + "sf.vc.v.fvv\t%3,%4,%5,%6" + [(set_attr "type" "sf_vc") + (set_attr "mode" "<MODE>")]) + +(define_insn "@sf_vc_vvw_se<mode>" + [(unspec:<VM> + [(match_operand:<VM> 0 "vector_mask_operand" " Wc1") + (match_operand 5 "vector_length_operand" " rK") + (match_operand 6 "const_int_operand" " i") + (match_operand 7 "const_int_operand" " i") + (match_operand 8 "const_int_operand" " i") + (reg:SI VL_REGNUM) + (reg:SI VTYPE_REGNUM)] UNSPEC_VPREDICATE) + (unspec_volatile:<SF_VW> + [(match_operand:SI 1 "const_int_operand" "Ou02") + (match_operand:<SF_VW> 2 "register_operand" "vd") + (match_operand:SF_VC_W 3 "register_operand" "vr") + (match_operand:SF_VC_W 4 "register_operand" "vr")] UNSPECV_SF_CV)] + "TARGET_VECTOR && TARGET_XSFVCP" + "sf.vc.vvw\t%1,%2,%3,%4" + [(set_attr "type" "sf_vc_se") + (set_attr "mode" "<MODE>")]) + +(define_insn "@sf_vc_v_vvw_se<mode>" + [(set (match_operand:<SF_VW> 0 "register_operand" "=&vr,vr") + (if_then_else:<SF_VW> + (unspec:<VM> + [(match_operand:<VM> 1 "vector_mask_operand" " Wc1,Wc1") + (match_operand 7 "vector_length_operand" " rK, rK") + (match_operand 8 "const_int_operand" " i, i") + (match_operand 9 "const_int_operand" " i, i") + (match_operand 10 "const_int_operand" " i, i") + (reg:SI VL_REGNUM) + (reg:SI VTYPE_REGNUM)] UNSPEC_VPREDICATE) + (unspec_volatile:<SF_VW> + [(match_operand:SI 3 "const_int_operand" "Ou02,Ou02") + (match_operand:<SF_VW> 4 "register_operand" "vd,vd") + (match_operand:SF_VC_W 5 "register_operand" "vr,vr") + (match_operand:SF_VC_W 6 "register_operand" "vr,vr")] UNSPECV_SF_CV) + (match_operand:<SF_VW> 2 "vector_merge_operand" "vu,vu")))] + "TARGET_VECTOR && TARGET_XSFVCP" + "sf.vc.v.vvw\t%3,%4,%5,%6" + [(set_attr "type" "sf_vc_se") + (set_attr "mode" "<MODE>")]) + +(define_insn "@sf_vc_v_vvw<mode>" + [(set (match_operand:<SF_VW> 0 "register_operand" "=&vr,vr") + (if_then_else:<SF_VW> + (unspec:<VM> + [(match_operand:<VM> 1 "vector_mask_operand" " Wc1,Wc1") + (match_operand 7 "vector_length_operand" " rK, rK") + (match_operand 8 "const_int_operand" " i, i") + (match_operand 9 "const_int_operand" " i, i") + (match_operand 10 "const_int_operand" " i, i") + (reg:SI VL_REGNUM) + (reg:SI VTYPE_REGNUM)] UNSPEC_VPREDICATE) + (unspec:<SF_VW> + [(match_operand:SI 3 "const_int_operand" "Ou02,Ou02") + (match_operand:<SF_VW> 4 "register_operand" "vd,vd") + (match_operand:SF_VC_W 5 "register_operand" "vr,vr") + (match_operand:SF_VC_W 6 "register_operand" "vr,vr")] UNSPEC_SF_CV) + (match_operand:<SF_VW> 2 "vector_merge_operand" "vu,vu")))] + "TARGET_VECTOR && TARGET_XSFVCP" + "sf.vc.v.vvw\t%3,%4,%5,%6" + [(set_attr "type" "sf_vc") + (set_attr "mode" "<MODE>")]) + +(define_insn "@sf_vc_xvw_se<mode>" + [(unspec:<VM> + [(match_operand:<VM> 0 "vector_mask_operand" " Wc1") + (match_operand 5 "vector_length_operand" " rK") + (match_operand 6 "const_int_operand" " i") + (match_operand 7 "const_int_operand" " i") + (match_operand 8 "const_int_operand" " i") + (reg:SI VL_REGNUM) + (reg:SI VTYPE_REGNUM)] UNSPEC_VPREDICATE) + (unspec_volatile:<SF_VW> + [(match_operand:SI 1 "const_int_operand" "Ou02") + (match_operand:<SF_VW> 2 "register_operand" "vd") + (match_operand:SF_VC_W 3 "register_operand" "vr") + (match_operand:<VEL> 4 "register_operand" "r")] UNSPECV_SF_CV)] + "TARGET_VECTOR && TARGET_XSFVCP" + "sf.vc.xvw\t%1,%2,%3,%4" + [(set_attr "type" "sf_vc_se") + (set_attr "mode" "<MODE>")]) + +(define_insn "@sf_vc_v_xvw_se<mode>" + [(set (match_operand:<SF_VW> 0 "register_operand" "=&vr,vr") + (if_then_else:<SF_VW> + (unspec:<VM> + [(match_operand:<VM> 1 "vector_mask_operand" " Wc1,Wc1") + (match_operand 7 "vector_length_operand" " rK, rK") + (match_operand 8 "const_int_operand" " i, i") + (match_operand 9 "const_int_operand" " i, i") + (match_operand 10 "const_int_operand" " i, i") + (reg:SI VL_REGNUM) + (reg:SI VTYPE_REGNUM)] UNSPEC_VPREDICATE) + (unspec_volatile:<SF_VW> + [(match_operand:SI 3 "const_int_operand" "Ou02,Ou02") + (match_operand:<SF_VW> 4 "register_operand" "vd,vd") + (match_operand:SF_VC_W 5 "register_operand" "vr,vr") + (match_operand:<VEL> 6 "register_operand" "r,r")] UNSPECV_SF_CV) + (match_operand:<SF_VW> 2 "vector_merge_operand" "vu,vu")))] + "TARGET_VECTOR && TARGET_XSFVCP" + "sf.vc.v.xvw\t%3,%4,%5,%6" + [(set_attr "type" "sf_vc_se") + (set_attr "mode" "<MODE>")]) + +(define_insn "@sf_vc_v_xvw<mode>" + [(set (match_operand:<SF_VW> 0 "register_operand" "=&vr,vr") + (if_then_else:<SF_VW> + (unspec:<VM> + [(match_operand:<VM> 1 "vector_mask_operand" " Wc1,Wc1") + (match_operand 7 "vector_length_operand" " rK, rK") + (match_operand 8 "const_int_operand" " i, i") + (match_operand 9 "const_int_operand" " i, i") + (match_operand 10 "const_int_operand" " i, i") + (reg:SI VL_REGNUM) + (reg:SI VTYPE_REGNUM)] UNSPEC_VPREDICATE) + (unspec:<SF_VW> + [(match_operand:SI 3 "const_int_operand" "Ou02,Ou02") + (match_operand:<SF_VW> 4 "register_operand" "vd,vd") + (match_operand:SF_VC_W 5 "register_operand" "vr,vr") + (match_operand:<VEL> 6 "register_operand" "r,r")] UNSPEC_SF_CV) + (match_operand:<SF_VW> 2 "vector_merge_operand" "vu,vu")))] + "TARGET_VECTOR && TARGET_XSFVCP" + "sf.vc.v.xvw\t%3,%4,%5,%6" + [(set_attr "type" "sf_vc") + (set_attr "mode" "<MODE>")]) + +(define_insn "@sf_vc_ivw_se<mode>" + [(unspec:<VM> + [(match_operand:<VM> 0 "vector_mask_operand" " Wc1") + (match_operand 5 "vector_length_operand" " rK") + (match_operand 6 "const_int_operand" " i") + (match_operand 7 "const_int_operand" " i") + (match_operand 8 "const_int_operand" " i") + (reg:SI VL_REGNUM) + (reg:SI VTYPE_REGNUM)] UNSPEC_VPREDICATE) + (unspec_volatile:<SF_VW> + [(match_operand:SI 1 "const_int_operand" "Ou02") + (match_operand:<SF_VW> 2 "register_operand" "vd") + (match_operand:SF_VC_W 3 "register_operand" "vr") + (match_operand:SI 4 "immediate_operand" "P")] UNSPECV_SF_CV)] + "TARGET_VECTOR && TARGET_XSFVCP" + "sf.vc.ivw\t%1,%2,%3,%4" + [(set_attr "type" "sf_vc_se") + (set_attr "mode" "<MODE>")]) + +(define_insn "@sf_vc_v_ivw_se<mode>" + [(set (match_operand:<SF_VW> 0 "register_operand" "=&vr,vr") + (if_then_else:<SF_VW> + (unspec_volatile:<VM> + [(match_operand:<VM> 1 "vector_mask_operand" " Wc1,Wc1") + (match_operand 7 "vector_length_operand" " rK, rK") + (match_operand 8 "const_int_operand" " i, i") + (match_operand 9 "const_int_operand" " i, i") + (match_operand 10 "const_int_operand" " i, i") + (reg:SI VL_REGNUM) + (reg:SI VTYPE_REGNUM)] UNSPEC_VPREDICATE) + (unspec_volatile:<SF_VW> + [(match_operand:SI 3 "const_int_operand" "Ou02,Ou02") + (match_operand:<SF_VW> 4 "register_operand" "vd,vd") + (match_operand:SF_VC_W 5 "register_operand" "vr,vr") + (match_operand:SI 6 "immediate_operand" "P,P")] UNSPEC_SF_CV) + (match_operand:<SF_VW> 2 "vector_merge_operand" "vu,vu")))] + "TARGET_VECTOR && TARGET_XSFVCP" + "sf.vc.v.ivw\t%3,%4,%5,%6" + [(set_attr "type" "sf_vc_se") + (set_attr "mode" "<MODE>")]) + +(define_insn "@sf_vc_v_ivw<mode>" + [(set (match_operand:<SF_VW> 0 "register_operand" "=&vr,vr") + (if_then_else:<SF_VW> + (unspec:<VM> + [(match_operand:<VM> 1 "vector_mask_operand" " Wc1,Wc1") + (match_operand 7 "vector_length_operand" " rK, rK") + (match_operand 8 "const_int_operand" " i, i") + (match_operand 9 "const_int_operand" " i, i") + (match_operand 10 "const_int_operand" " i, i") + (reg:SI VL_REGNUM) + (reg:SI VTYPE_REGNUM)] UNSPEC_VPREDICATE) + (unspec:<SF_VW> + [(match_operand:SI 3 "const_int_operand" "Ou02,Ou02") + (match_operand:<SF_VW> 4 "register_operand" "vd,vd") + (match_operand:SF_VC_W 5 "register_operand" "vr,vr") + (match_operand:SI 6 "immediate_operand" "P,P")] UNSPEC_SF_CV) + (match_operand:<SF_VW> 2 "vector_merge_operand" "vu,vu")))] + "TARGET_VECTOR && TARGET_XSFVCP" + "sf.vc.v.ivw\t%3,%4,%5,%6" + [(set_attr "type" "sf_vc") + (set_attr "mode" "<MODE>")]) + +(define_insn "@sf_vc_fvw_se<mode>" + [(unspec:<VM> + [(match_operand:<VM> 0 "vector_mask_operand" " Wc1") + (match_operand 5 "vector_length_operand" " rK") + (match_operand 6 "const_int_operand" " i") + (match_operand 7 "const_int_operand" " i") + (match_operand 8 "const_int_operand" " i") + (reg:SI VL_REGNUM) + (reg:SI VTYPE_REGNUM)] UNSPEC_VPREDICATE) + (unspec_volatile:<SF_VW> + [(match_operand:SI 1 "const_int_operand" "Ou01") + (match_operand:<SF_VW> 2 "register_operand" "vd") + (match_operand:SF_VC_FW 3 "register_operand" "vr") + (match_operand:<SF_XFW> 4 "register_operand" "f")] UNSPECV_SF_CV)] + "TARGET_VECTOR && TARGET_XSFVCP" + "sf.vc.fvw\t%1,%2,%3,%4" + [(set_attr "type" "sf_vc_se") + (set_attr "mode" "<MODE>")]) + +(define_insn "@sf_vc_v_fvw_se<mode>" + [(set (match_operand:<SF_VW> 0 "register_operand" "=&vr,vr") + (if_then_else:<SF_VW> + (unspec:<VM> + [(match_operand:<VM> 1 "vector_mask_operand" " Wc1,Wc1") + (match_operand 7 "vector_length_operand" " rK, rK") + (match_operand 8 "const_int_operand" " i, i") + (match_operand 9 "const_int_operand" " i, i") + (match_operand 10 "const_int_operand" " i, i") + (reg:SI VL_REGNUM) + (reg:SI VTYPE_REGNUM)] UNSPEC_VPREDICATE) + (unspec_volatile:<SF_VW> + [(match_operand:SI 3 "const_int_operand" "Ou01,Ou01") + (match_operand:<SF_VW> 4 "register_operand" "vd,vd") + (match_operand:SF_VC_FW 5 "register_operand" "vr,vr") + (match_operand:<SF_XFW> 6 "register_operand" "f,f")] UNSPECV_SF_CV) + (match_operand:<SF_VW> 2 "vector_merge_operand" "vu,vu")))] + "TARGET_VECTOR && TARGET_XSFVCP" + "sf.vc.v.fvw\t%3,%4,%5,%6" + [(set_attr "type" "sf_vc_se") + (set_attr "mode" "<MODE>")]) + +(define_insn "@sf_vc_v_fvw<mode>" + [(set (match_operand:<SF_VW> 0 "register_operand" "=&vr,vr") + (if_then_else:<SF_VW> + (unspec:<VM> + [(match_operand:<VM> 1 "vector_mask_operand" " Wc1,Wc1") + (match_operand 7 "vector_length_operand" " rK, rK") + (match_operand 8 "const_int_operand" " i, i") + (match_operand 9 "const_int_operand" " i, i") + (match_operand 10 "const_int_operand" " i, i") + (reg:SI VL_REGNUM) + (reg:SI VTYPE_REGNUM)] UNSPEC_VPREDICATE) + (unspec:<SF_VW> + [(match_operand:SI 3 "const_int_operand" "Ou01,Ou01") + (match_operand:<SF_VW> 4 "register_operand" "vd,vd") + (match_operand:SF_VC_FW 5 "register_operand" "vr,vr") + (match_operand:<SF_XFW> 6 "register_operand" "f,f")] UNSPEC_SF_CV) + (match_operand:<SF_VW> 2 "vector_merge_operand" "vu,vu")))] + "TARGET_VECTOR && TARGET_XSFVCP" + "sf.vc.v.fvw\t%3,%4,%5,%6" + [(set_attr "type" "sf_vc") + (set_attr "mode" "<MODE>")]) diff --git a/gcc/config/riscv/sync.md b/gcc/config/riscv/sync.md index 726800a..50ec8b3 100644 --- a/gcc/config/riscv/sync.md +++ b/gcc/config/riscv/sync.md @@ -405,18 +405,17 @@ (match_operand:SI 3 "const_int_operand")] ;; model UNSPEC_SYNC_EXCHANGE)) (set (match_dup 1) - (match_operand:GPR 2 "register_operand" "0")) + (match_operand:GPR 2 "reg_or_0_operand" "rJ")) (clobber (match_scratch:GPR 4 "=&r"))] ;; tmp_1 "!TARGET_ZAAMO && TARGET_ZALRSC" { return "1:\;" - "lr.<amo>%I3\t%4, %1\;" - "sc.<amo>%J3\t%0, %0, %1\;" - "bnez\t%0, 1b\;" - "mv\t%0, %4"; + "lr.<amo>%I3\t%0, %1\;" + "sc.<amo>%J3\t%4, %z2, %1\;" + "bnez\t%4, 1b\"; } [(set_attr "type" "atomic") - (set (attr "length") (const_int 16))]) + (set (attr "length") (const_int 12))]) (define_expand "atomic_exchange<mode>" [(match_operand:SHORT 0 "register_operand") ;; old value at mem @@ -628,7 +627,7 @@ (match_operand:SHORT 1 "memory_operand" "+A")) ;; memory (set (match_dup 1) (unspec_volatile:SHORT [(match_operand:SHORT 2 "register_operand" "0") ;; expected_val - (match_operand:SHORT 3 "register_operand" "rJ") ;; desired_val + (match_operand:SHORT 3 "reg_or_0_operand" "rJ") ;; desired_val (match_operand:SI 4 "const_int_operand") ;; mod_s (match_operand:SI 5 "const_int_operand")] ;; mod_f UNSPEC_COMPARE_AND_SWAP))] diff --git a/gcc/config/riscv/t-riscv b/gcc/config/riscv/t-riscv index 6493087..7aac56a 100644 --- a/gcc/config/riscv/t-riscv +++ b/gcc/config/riscv/t-riscv @@ -104,6 +104,13 @@ riscv-avlprop.o: $(srcdir)/config/riscv/riscv-avlprop.cc \ $(COMPILER) -c $(ALL_COMPILERFLAGS) $(ALL_CPPFLAGS) $(INCLUDES) \ $(srcdir)/config/riscv/riscv-avlprop.cc +riscv-vect-permconst.o: $(srcdir)/config/riscv/riscv-vect-permconst.cc \ + $(CONFIG_H) $(SYSTEM_H) coretypes.h $(TM_H) $(RTL_H) $(REGS_H) \ + $(TARGET_H) tree-pass.h df.h rtl-ssa.h cfgcleanup.h insn-attr.h \ + tm-constrs.h insn-opinit.h cfgrtl.h + $(COMPILER) -c $(ALL_COMPILERFLAGS) $(ALL_CPPFLAGS) $(INCLUDES) \ + $(srcdir)/config/riscv/riscv-vect-permconst.cc + riscv-d.o: $(srcdir)/config/riscv/riscv-d.cc \ $(CONFIG_H) $(SYSTEM_H) coretypes.h $(TM_H) $(COMPILE) $< @@ -180,3 +187,50 @@ s-riscv-vector-type-indexer.gen.defs: build/genrvv-type-indexer$(build_exeext) $(STAMP) s-riscv-vector-type-indexer.gen.defs genprog+=rvv-type-indexer + +RISCV_EXT_DEFS = \ + $(srcdir)/config/riscv/riscv-ext.def \ + $(srcdir)/config/riscv/riscv-ext-corev.def \ + $(srcdir)/config/riscv/riscv-ext.def \ + $(srcdir)/config/riscv/riscv-ext-sifive.def \ + $(srcdir)/config/riscv/riscv-ext-thead.def \ + $(srcdir)/config/riscv/riscv-ext-ventana.def \ + $(srcdir)/config/riscv/riscv-ext-mips.def + +$(srcdir)/config/riscv/riscv-ext.opt: $(RISCV_EXT_DEFS) + +build/gen-riscv-ext-opt.o: $(srcdir)/config/riscv/gen-riscv-ext-opt.cc \ + $(RISCV_EXT_DEFS) + $(CXX_FOR_BUILD) $(CXXFLAGS_FOR_BUILD) -c $< -o $@ + +build/gen-riscv-ext-opt$(build_exeext): build/gen-riscv-ext-opt.o + $(LINKER_FOR_BUILD) $(BUILD_LINKERFLAGS) $(BUILD_LDFLAGS) -o $@ $< + +s-riscv-ext.opt: build/gen-riscv-ext-opt$(build_exeext) + $(RUN_GEN) build/gen-riscv-ext-opt$(build_exeext) > tmp-riscv-ext.opt + $(SHELL) $(srcdir)/../move-if-change tmp-riscv-ext.opt $(srcdir)/config/riscv/riscv-ext.opt + $(STAMP) s-riscv-ext.opt + +build/gen-riscv-ext-texi.o: $(srcdir)/config/riscv/gen-riscv-ext-texi.cc \ + $(RISCV_EXT_DEFS) + $(CXX_FOR_BUILD) $(CXXFLAGS_FOR_BUILD) -c $< -o $@ + +build/gen-riscv-ext-texi$(build_exeext): build/gen-riscv-ext-texi.o + $(LINKER_FOR_BUILD) $(BUILD_LINKERFLAGS) $(BUILD_LDFLAGS) -o $@ $< + +$(srcdir)/doc/riscv-ext.texi: $(RISCV_EXT_DEFS) +$(srcdir)/doc/riscv-ext.texi: s-riscv-ext.texi ; @true + +# Generate the doc when generating option file. +$(srcdir)/config/riscv/riscv-ext.opt: s-riscv-ext.texi ; @true + +s-riscv-ext.texi: build/gen-riscv-ext-texi$(build_exeext) + $(RUN_GEN) build/gen-riscv-ext-texi$(build_exeext) > tmp-riscv-ext.texi + $(SHELL) $(srcdir)/../move-if-change tmp-riscv-ext.texi $(srcdir)/doc/riscv-ext.texi + $(STAMP) s-riscv-ext.texi + +# Run `riscv-regen' after you changed or added anything from riscv-ext*.def + +.PHONY: riscv-regen + +riscv-regen: s-riscv-ext.texi s-riscv-ext.opt diff --git a/gcc/config/riscv/vector-iterators.md b/gcc/config/riscv/vector-iterators.md index f8da71b..5f6cc42 100644 --- a/gcc/config/riscv/vector-iterators.md +++ b/gcc/config/riscv/vector-iterators.md @@ -120,10 +120,11 @@ UNSPEC_SF_VFNRCLIP UNSPEC_SF_VFNRCLIPU + UNSPEC_SF_CV ]) (define_c_enum "unspecv" [ - UNSPECV_FRM_RESTORE_EXIT + UNSPECV_SF_CV ]) ;; Subset of VI with fractional LMUL types @@ -2417,6 +2418,47 @@ (RVVM1x2DF "rvvm1df") ]) +(define_mode_attr vsubel [ + (RVVM8HI "qi") (RVVM4HI "qi") (RVVM2HI "qi") (RVVM1HI "qi") (RVVMF2HI "qi") (RVVMF4HI "qi") + + (RVVM8SI "hi") (RVVM4SI "hi") (RVVM2SI "hi") (RVVM1SI "hi") (RVVMF2SI "hi") + + (RVVM8SF "hf") (RVVM4SF "hf") (RVVM2SF "hf") (RVVM1SF "hf") (RVVMF2SF "hf") + + (RVVM8DI "si") (RVVM4DI "si") (RVVM2DI "si") (RVVM1DI "si") + + (RVVM8DF "sf") (RVVM4DF "sf") (RVVM2DF "sf") (RVVM1DF "sf") + + ;; VLS modes. + (V1HI "qi") (V2HI "qi") (V4HI "qi") (V8HI "qi") (V16HI "qi") (V32HI "qi") (V64HI "qi") (V128HI "qi") (V256HI "qi") + (V512HI "qi") (V1024HI "qi") (V2048HI "qi") + (V1SI "hi") (V2SI "hi") (V4SI "hi") (V8SI "hi") (V16SI "hi") (V32SI "hi") (V64SI "hi") (V128SI "hi") (V256SI "hi") + (V512SI "hi") (V1024SI "hi") + (V1DI "si") (V2DI "si") (V4DI "si") (V8DI "si") (V16DI "si") (V32DI "si") (V64DI "si") (V128DI "si") (V256DI "si") (V512DI "si") + + (V1SF "hf") + (V2SF "hf") + (V4SF "hf") + (V8SF "hf") + (V16SF "hf") + (V32SF "hf") + (V64SF "hf") + (V128SF "hf") + (V256SF "hf") + (V512SF "hf") + (V1024SF "hf") + (V1DF "sf") + (V2DF "sf") + (V4DF "sf") + (V8DF "sf") + (V16DF "sf") + (V32DF "sf") + (V64DF "sf") + (V128DF "sf") + (V256DF "sf") + (V512DF "sf") +]) + (define_mode_attr VSUBEL [ (RVVM8HI "QI") (RVVM4HI "QI") (RVVM2HI "QI") (RVVM1HI "QI") (RVVMF2HI "QI") (RVVMF4HI "QI") @@ -4040,6 +4082,15 @@ smax umax smin umin mult div udiv mod umod ]) +(define_code_iterator any_int_binop_no_shift_v_vdup [ + plus minus and ior xor mult div udiv mod umod smax umax smin umin us_plus + us_minus ss_plus ss_minus +]) + +(define_code_iterator any_int_binop_no_shift_vdup_v [ + plus minus and ior xor mult smax umax smin umin us_plus ss_plus +]) + (define_code_iterator any_int_unop [neg not]) (define_code_iterator any_commutative_binop [plus and ior xor @@ -4873,3 +4924,50 @@ (RVVM1QI "rvvm4sf") (RVVM2QI "rvvm8sf") ]) + + +(define_mode_iterator SF_VC_W [ + RVVM4QI RVVM2QI RVVM1QI RVVMF2QI RVVMF4QI (RVVMF8QI "TARGET_MIN_VLEN > 32") + RVVM4HI RVVM2HI RVVM1HI RVVMF2HI (RVVMF4HI "TARGET_MIN_VLEN > 32") + RVVM4SI RVVM2SI RVVM1SI (RVVMF2SI "TARGET_MIN_VLEN > 32") +]) + +(define_mode_attr SF_VW [ + (RVVM4QI "RVVM8HI") (RVVM2QI "RVVM4HI") (RVVM1QI "RVVM2HI") (RVVMF2QI "RVVM1HI") + (RVVMF4QI "RVVMF2HI") (RVVMF8QI "RVVMF4HI") + (RVVM4HI "RVVM8SI") (RVVM2HI "RVVM4SI") (RVVM1HI "RVVM2SI") (RVVMF2HI "RVVM1SI") + (RVVMF4HI "RVVMF2SI") + (RVVM4SI "RVVM8DI") (RVVM2SI "RVVM4DI") (RVVM1SI "RVVM2DI") (RVVMF2SI "RVVM1DI") +]) + +(define_mode_attr sf_vw [ + (RVVM4QI "rvvm8hi") (RVVM2QI "rvvm4hi") (RVVM1QI "rvvm2hi") (RVVMF2QI "rvvm1hi") + (RVVMF4QI "rvvmf2hi") (RVVMF8QI "rvvmf4hi") + (RVVM4HI "rvvm8si") (RVVM2HI "rvvm4si") (RVVM1HI "rvvm2si") (RVVMF2HI "rvvm1si") + (RVVMF4HI "rvvmf2si") + (RVVM4SI "rvvm8di") (RVVM2SI "rvvm4di") (RVVM1SI "rvvm2di") (RVVMF2SI "rvvm1di") +]) + +(define_mode_iterator SF_FV [ + RVVM8HI RVVM4HI RVVM2HI RVVM1HI RVVMF2HI (RVVMF4HI "TARGET_MIN_VLEN > 32") + RVVM8SI RVVM4SI RVVM2SI RVVM1SI (RVVMF2SI "TARGET_MIN_VLEN > 32") + (RVVM8DI "TARGET_VECTOR_ELEN_64") (RVVM4DI "TARGET_VECTOR_ELEN_64") + (RVVM2DI "TARGET_VECTOR_ELEN_64") (RVVM1DI "TARGET_VECTOR_ELEN_64") +]) + + +(define_mode_iterator SF_VC_FW [ + RVVM4HI RVVM2HI RVVM1HI RVVMF2HI (RVVMF4HI "TARGET_MIN_VLEN > 32") + RVVM4SI RVVM2SI RVVM1SI (RVVMF2SI "TARGET_MIN_VLEN > 32") +]) + +(define_mode_attr SF_XF [ + (RVVM8HI "HF") (RVVM4HI "HF") (RVVM2HI "HF") (RVVM1HI "HF") (RVVMF2HI "HF") (RVVMF4HI "HF") + (RVVM8SI "SF") (RVVM4SI "SF") (RVVM2SI "SF") (RVVM1SI "SF") (RVVMF2SI "SF") + (RVVM8DI "DF") (RVVM4DI "DF") (RVVM2DI "DF") (RVVM1DI "DF") +]) + +(define_mode_attr SF_XFW [ + (RVVM4HI "HF") (RVVM2HI "HF") (RVVM1HI "HF") (RVVMF2HI "HF") (RVVMF4HI "HF") + (RVVM4SI "SF") (RVVM2SI "SF") (RVVM1SI "SF") (RVVMF2SI "SF") +]) diff --git a/gcc/config/riscv/vector.md b/gcc/config/riscv/vector.md index 8ee43cf..baf215b 100644 --- a/gcc/config/riscv/vector.md +++ b/gcc/config/riscv/vector.md @@ -56,8 +56,7 @@ vssegtux,vssegtox,vlsegdff,vandn,vbrev,vbrev8,vrev8,vcpop,vclz,vctz,vrol,\ vror,vwsll,vclmul,vclmulh,vghsh,vgmul,vaesef,vaesem,vaesdf,vaesdm,\ vaeskf1,vaeskf2,vaesz,vsha2ms,vsha2ch,vsha2cl,vsm4k,vsm4r,vsm3me,vsm3c,\ - vfncvtbf16,vfwcvtbf16,vfwmaccbf16,\ - sf_vqmacc,sf_vfnrclip") + vfncvtbf16,vfwcvtbf16,vfwmaccbf16,sf_vqmacc,sf_vfnrclip,sf_vc,sf_vc_se") (const_string "true")] (const_string "false"))) @@ -1116,19 +1115,6 @@ (set_attr "mode" "SI")] ) -;; The volatile fsrmsi restore is used for the exit point for the -;; dynamic mode switching. It will generate one volatile fsrm a5 -;; which won't be eliminated. -(define_insn "fsrmsi_restore_volatile" - [(set (reg:SI FRM_REGNUM) - (unspec_volatile:SI [(match_operand:SI 0 "register_operand" "r")] - UNSPECV_FRM_RESTORE_EXIT))] - "TARGET_VECTOR" - "fsrm\t%0" - [(set_attr "type" "wrfrm") - (set_attr "mode" "SI")] -) - ;; Read FRM (define_insn "frrmsi" [(set (match_operand:SI 0 "register_operand" "=r") @@ -1594,8 +1580,22 @@ "&& 1" [(const_int 0)] { - riscv_vector::emit_vlmax_insn (code_for_pred_broadcast (<MODE>mode), - riscv_vector::UNARY_OP, operands); + if (!strided_load_broadcast_p () + && TARGET_ZVFHMIN && !TARGET_ZVFH && <VEL>mode == HFmode) + { + /* For Float16, reinterpret as HImode, broadcast and reinterpret + back. */ + poly_uint64 nunits = GET_MODE_NUNITS (<MODE>mode); + machine_mode vmodehi + = riscv_vector::get_vector_mode (HImode, nunits).require (); + rtx ops[] = {lowpart_subreg (vmodehi, operands[0], <MODE>mode), + lowpart_subreg (HImode, operands[1], HFmode)}; + riscv_vector::emit_vlmax_insn (code_for_pred_broadcast (vmodehi), + riscv_vector::UNARY_OP, ops); + } + else + riscv_vector::emit_vlmax_insn (code_for_pred_broadcast (<MODE>mode), + riscv_vector::UNARY_OP, operands); DONE; } [(set_attr "type" "vector")] @@ -1797,7 +1797,7 @@ [(set_attr "type" "vsetvl") (set_attr "mode" "SI")]) -;; This pattern use to combine bellow two insns and then further remove +;; This pattern use to combine below two insns and then further remove ;; unnecessary sign_extend operations: ;; (set (reg:DI 134 [ _1 ]) ;; (unspec:DI [ @@ -2136,18 +2136,34 @@ (match_operand 7 "const_int_operand") (reg:SI VL_REGNUM) (reg:SI VTYPE_REGNUM)] UNSPEC_VPREDICATE) - (vec_duplicate:V_VLS - (match_operand:<VEL> 3 "direct_broadcast_operand")) + ;; (vec_duplicate:V_VLS ;; wrapper activated by wrap_vec_dup below. + (match_operand:<VEL> 3 "direct_broadcast_operand") ;; ) (match_operand:V_VLS 2 "vector_merge_operand")))] "TARGET_VECTOR" { /* Transform vmv.v.x/vfmv.v.f (avl = 1) into vmv.s.x since vmv.s.x/vfmv.s.f has better chances to do vsetvl fusion in vsetvl pass. */ + bool wrap_vec_dup = true; + rtx vec_cst = NULL_RTX; if (riscv_vector::splat_to_scalar_move_p (operands)) { operands[1] = riscv_vector::gen_scalar_move_mask (<VM>mode); operands[3] = force_reg (<VEL>mode, operands[3]); } + else if (immediate_operand (operands[3], <VEL>mode) + && (vec_cst = gen_const_vec_duplicate (<MODE>mode, operands[3])) + && (/* -> pred_broadcast<mode>_zero */ + (vector_least_significant_set_mask_operand (operands[1], + <VM>mode) + && vector_const_0_operand (vec_cst, <MODE>mode)) + || (/* pred_broadcast<mode>_imm */ + vector_all_trues_mask_operand (operands[1], <VM>mode) + && vector_const_int_or_double_0_operand (vec_cst, + <MODE>mode)))) + { + operands[3] = vec_cst; + wrap_vec_dup = false; + } /* Handle vmv.s.x instruction (Wb1 mask) which has memory scalar. */ else if (satisfies_constraint_Wdm (operands[3])) { @@ -2169,7 +2185,7 @@ } } else if (GET_MODE_BITSIZE (<VEL>mode) > GET_MODE_BITSIZE (Pmode) - && (immediate_operand (operands[3], Pmode) + && (immediate_operand (operands[3], Pmode) || (CONST_POLY_INT_P (operands[3]) && known_ge (rtx_to_poly_int64 (operands[3]), 0U) && known_le (rtx_to_poly_int64 (operands[3]), GET_MODE_SIZE (<MODE>mode))))) @@ -2191,6 +2207,8 @@ ; else operands[3] = force_reg (<VEL>mode, operands[3]); + if (wrap_vec_dup) + operands[3] = gen_rtx_VEC_DUPLICATE (<MODE>mode, operands[3]); }) (define_insn_and_split "*pred_broadcast<mode>" @@ -2220,12 +2238,7 @@ "(register_operand (operands[3], <VEL>mode) || CONST_POLY_INT_P (operands[3])) && GET_MODE_BITSIZE (<VEL>mode) > GET_MODE_BITSIZE (Pmode)" - [(set (match_dup 0) - (if_then_else:V_VLSI (unspec:<VM> [(match_dup 1) (match_dup 4) - (match_dup 5) (match_dup 6) (match_dup 7) - (reg:SI VL_REGNUM) (reg:SI VTYPE_REGNUM)] UNSPEC_VPREDICATE) - (vec_duplicate:V_VLSI (match_dup 3)) - (match_dup 2)))] + [(const_int 0)] { gcc_assert (can_create_pseudo_p ()); if (CONST_POLY_INT_P (operands[3])) @@ -2234,12 +2247,6 @@ emit_move_insn (tmp, operands[3]); operands[3] = tmp; } - rtx m = assign_stack_local (<VEL>mode, GET_MODE_SIZE (<VEL>mode), - GET_MODE_ALIGNMENT (<VEL>mode)); - m = validize_mem (m); - emit_move_insn (m, operands[3]); - m = gen_rtx_MEM (<VEL>mode, force_reg (Pmode, XEXP (m, 0))); - operands[3] = m; /* For SEW = 64 in RV32 system, we expand vmv.s.x: andi a2,a2,1 @@ -2250,6 +2257,35 @@ operands[4] = riscv_vector::gen_avl_for_scalar_move (operands[4]); operands[1] = CONSTM1_RTX (<VM>mode); } + + /* If the target doesn't want a strided-load broadcast we go with a regular + V1DImode load and a broadcast gather. */ + if (strided_load_broadcast_p ()) + { + rtx mem = assign_stack_local (<VEL>mode, GET_MODE_SIZE (<VEL>mode), + GET_MODE_ALIGNMENT (<VEL>mode)); + mem = validize_mem (mem); + emit_move_insn (mem, operands[3]); + mem = gen_rtx_MEM (<VEL>mode, force_reg (Pmode, XEXP (mem, 0))); + + emit_insn + (gen_pred_broadcast<mode> + (operands[0], operands[1], operands[2], mem, + operands[4], operands[5], operands[6], operands[7])); + } + else + { + rtx tmp = gen_reg_rtx (V1DImode); + emit_move_insn (tmp, lowpart_subreg (V1DImode, operands[3], + <VEL>mode)); + tmp = lowpart_subreg (<MODE>mode, tmp, V1DImode); + + emit_insn + (gen_pred_gather<mode>_scalar + (operands[0], operands[1], operands[2], tmp, CONST0_RTX (Pmode), + operands[4], operands[5], operands[6], operands[7])); + } + DONE; } [(set_attr "type" "vimov,vimov,vlds,vlds,vlds,vlds,vimovxv,vimovxv") (set_attr "mode" "<MODE>")]) @@ -2289,9 +2325,9 @@ (reg:SI VL_REGNUM) (reg:SI VTYPE_REGNUM)] UNSPEC_VPREDICATE) (vec_duplicate:V_VLSF_ZVFHMIN - (match_operand:<VEL> 3 "direct_broadcast_operand" "Wdm, Wdm, Wdm, Wdm")) + (match_operand:<VEL> 3 "direct_broadcast_operand" " A, A, A, A")) (match_operand:V_VLSF_ZVFHMIN 2 "vector_merge_operand" " vu, 0, vu, 0")))] - "TARGET_VECTOR" + "TARGET_VECTOR && strided_load_broadcast_p ()" "@ vlse<sew>.v\t%0,%3,zero,%1.t vlse<sew>.v\t%0,%3,zero,%1.t @@ -3939,7 +3975,7 @@ (any_extend:VWEXTI (match_operand:<V_DOUBLE_TRUNC> 3 "register_operand" " vr, vr")) (match_operand:VWEXTI 2 "vector_merge_operand" " vu, 0")))] - "TARGET_VECTOR" + "TARGET_VECTOR && !TARGET_XTHEADVECTOR" "v<sz>ext.vf2\t%0,%3%p1" [(set_attr "type" "vext") (set_attr "mode" "<MODE>")]) @@ -3959,7 +3995,7 @@ (any_extend:VQEXTI (match_operand:<V_QUAD_TRUNC> 3 "register_operand" " vr, vr")) (match_operand:VQEXTI 2 "vector_merge_operand" " vu, 0")))] - "TARGET_VECTOR" + "TARGET_VECTOR && !TARGET_XTHEADVECTOR" "v<sz>ext.vf4\t%0,%3%p1" [(set_attr "type" "vext") (set_attr "mode" "<MODE>")]) @@ -3979,7 +4015,7 @@ (any_extend:VOEXTI (match_operand:<V_OCT_TRUNC> 3 "register_operand" " vr, vr")) (match_operand:VOEXTI 2 "vector_merge_operand" " vu, 0")))] - "TARGET_VECTOR" + "TARGET_VECTOR && !TARGET_XTHEADVECTOR" "v<sz>ext.vf8\t%0,%3%p1" [(set_attr "type" "vext") (set_attr "mode" "<MODE>")]) @@ -6595,9 +6631,42 @@ (match_operand:<VEL> 2 "register_operand")) (match_operand:V_VLSF 3 "register_operand")) (match_operand:V_VLSF 4 "register_operand")) - (match_operand:V_VLSF 5 "register_operand")))] + (match_operand:V_VLSF 5 "vector_merge_operand")))] "TARGET_VECTOR" -{}) +{ + riscv_vector::prepare_ternary_operands (operands); +}) + +(define_insn "*pred_mul_<optab><mode>_scalar_undef" + [(set (match_operand:V_VLSF 0 "register_operand" "=vd,vd, vr, vr") + (if_then_else:V_VLSF + (unspec:<VM> + [(match_operand:<VM> 1 "vector_mask_operand" " vm, vm,Wc1,Wc1") + (match_operand 6 "vector_length_operand" "rvl,rvl,rvl,rvl") + (match_operand 7 "const_int_operand" " i, i, i, i") + (match_operand 8 "const_int_operand" " i, i, i, i") + (match_operand 9 "const_int_operand" " i, i, i, i") + (match_operand 10 "const_int_operand" " i, i, i, i") + (reg:SI VL_REGNUM) + (reg:SI VTYPE_REGNUM) + (reg:SI FRM_REGNUM)] UNSPEC_VPREDICATE) + (plus_minus:V_VLSF + (mult:V_VLSF + (vec_duplicate:V_VLSF + (match_operand:<VEL> 3 "register_operand" " f, f, f, f")) + (match_operand:V_VLSF 4 "register_operand" " 0, vr, 0, vr")) + (match_operand:V_VLSF 5 "register_operand" " vr, 0, vr, 0")) + (match_operand:V_VLSF 2 "vector_undef_operand")))] + "TARGET_VECTOR" + "@ + vf<madd_msub>.vf\t%0,%3,%5%p1 + vf<macc_msac>.vf\t%0,%3,%4%p1 + vf<madd_msub>.vf\t%0,%3,%5%p1 + vf<macc_msac>.vf\t%0,%3,%4%p1" + [(set_attr "type" "vfmuladd") + (set_attr "mode" "<MODE>") + (set (attr "frm_mode") + (symbol_ref "riscv_vector::get_frm_mode (operands[10])"))]) (define_insn "*pred_<madd_msub><mode>_scalar" [(set (match_operand:V_VLSF 0 "register_operand" "=vd, vr") @@ -6811,9 +6880,43 @@ (match_operand:<VEL> 2 "register_operand")) (match_operand:V_VLSF 3 "register_operand"))) (match_operand:V_VLSF 4 "register_operand")) - (match_operand:V_VLSF 5 "register_operand")))] + (match_operand:V_VLSF 5 "vector_merge_operand")))] "TARGET_VECTOR" -{}) +{ + riscv_vector::prepare_ternary_operands (operands); +}) + +(define_insn "*pred_mul_neg_<optab><mode>_scalar_undef" + [(set (match_operand:V_VLSF 0 "register_operand" "=vd,vd, vr, vr") + (if_then_else:V_VLSF + (unspec:<VM> + [(match_operand:<VM> 1 "vector_mask_operand" " vm, vm,Wc1,Wc1") + (match_operand 6 "vector_length_operand" "rvl,rvl,rvl,rvl") + (match_operand 7 "const_int_operand" " i, i, i, i") + (match_operand 8 "const_int_operand" " i, i, i, i") + (match_operand 9 "const_int_operand" " i, i, i, i") + (match_operand 10 "const_int_operand" " i, i, i, i") + (reg:SI VL_REGNUM) + (reg:SI VTYPE_REGNUM) + (reg:SI FRM_REGNUM)] UNSPEC_VPREDICATE) + (plus_minus:V_VLSF + (neg:V_VLSF + (mult:V_VLSF + (vec_duplicate:V_VLSF + (match_operand:<VEL> 3 "register_operand" " f, f, f, f")) + (match_operand:V_VLSF 4 "register_operand" " 0, vr, 0, vr"))) + (match_operand:V_VLSF 5 "register_operand" " vr, 0, vr, 0")) + (match_operand:V_VLSF 2 "vector_undef_operand")))] + "TARGET_VECTOR" + "@ + vf<nmsub_nmadd>.vf\t%0,%3,%5%p1 + vf<nmsac_nmacc>.vf\t%0,%3,%4%p1 + vf<nmsub_nmadd>.vf\t%0,%3,%5%p1 + vf<nmsac_nmacc>.vf\t%0,%3,%4%p1" + [(set_attr "type" "vfmuladd") + (set_attr "mode" "<MODE>") + (set (attr "frm_mode") + (symbol_ref "riscv_vector::get_frm_mode (operands[10])"))]) (define_insn "*pred_<nmsub_nmadd><mode>_scalar" [(set (match_operand:V_VLSF 0 "register_operand" "=vd, vr") @@ -7196,10 +7299,10 @@ (plus_minus:VWEXTF (mult:VWEXTF (float_extend:VWEXTF - (vec_duplicate:<V_DOUBLE_TRUNC> - (match_operand:<VSUBEL> 3 "register_operand" " f"))) - (float_extend:VWEXTF - (match_operand:<V_DOUBLE_TRUNC> 4 "register_operand" " vr"))) + (match_operand:<V_DOUBLE_TRUNC> 4 "register_operand" " vr")) + (vec_duplicate:VWEXTF + (float_extend:<VEL> + (match_operand:<VSUBEL> 3 "register_operand" " f")))) (match_operand:VWEXTF 2 "register_operand" " 0")) (match_dup 2)))] "TARGET_VECTOR" diff --git a/gcc/config/riscv/zicond.md b/gcc/config/riscv/zicond.md index f87b4f2..d170f6a 100644 --- a/gcc/config/riscv/zicond.md +++ b/gcc/config/riscv/zicond.md @@ -234,5 +234,39 @@ (const_int 0) (match_dup 4)))]) +;; We can splat the sign bit across a GPR with a arithmetic right shift +;; which gives us a 0, -1 result. We then turn on bit #0 unconditionally +;; which results in 1, -1. There's probably other cases that could be +;; handled, this seems particularly important though. +(define_split + [(set (match_operand:X 0 "register_operand") + (plus:X (if_then_else:X (ge:X (match_operand:X 1 "register_operand") + (const_int 0)) + (match_operand 2 "const_int_operand") + (match_operand 3 "const_int_operand")) + (match_operand 4 "const_int_operand")))] + "((TARGET_ZICOND_LIKE || TARGET_XTHEADCONDMOV) + && INTVAL (operands[2]) + INTVAL (operands[4]) == 1 + && INTVAL (operands[3]) + INTVAL (operands[4]) == -1)" + [(set (match_dup 0) (ashiftrt:X (match_dup 1) (match_dup 2))) + (set (match_dup 0) (ior:X (match_dup 0) (const_int 1)))] + { operands[2] = GEN_INT (GET_MODE_BITSIZE (word_mode) - 1); }) - +;; Similarly, but the condition and true/false values are reversed +;; +;; Note the case where the condition is reversed, but not the true/false +;; values. Or vice-versa is not handled because we don't support 4->3 +;; splits. +(define_split + [(set (match_operand:X 0 "register_operand") + (plus:X (if_then_else:X (lt:X (match_operand:X 1 "register_operand") + (const_int 0)) + (match_operand 2 "const_int_operand") + (match_operand 3 "const_int_operand")) + (match_operand 4 "const_int_operand")))] + "((TARGET_ZICOND_LIKE || TARGET_XTHEADCONDMOV) + && INTVAL (operands[2]) + INTVAL (operands[4]) == -1 + && INTVAL (operands[3]) + INTVAL (operands[4]) == 1)" + [(set (match_dup 0) (ashiftrt:X (match_dup 1) (match_dup 2))) + (set (match_dup 0) (ior:X (match_dup 0) (const_int 1)))] + { operands[2] = GEN_INT (GET_MODE_BITSIZE (word_mode) - 1); }) |