diff options
Diffstat (limited to 'gcc/config/riscv')
-rw-r--r-- | gcc/config/riscv/autovec-opt.md | 23 | ||||
-rw-r--r-- | gcc/config/riscv/bitmanip.md | 74 | ||||
-rw-r--r-- | gcc/config/riscv/predicates.md | 4 | ||||
-rw-r--r-- | gcc/config/riscv/riscv-opts.h | 2 | ||||
-rw-r--r-- | gcc/config/riscv/riscv-protos.h | 1 | ||||
-rw-r--r-- | gcc/config/riscv/riscv-vect-permconst.cc | 20 | ||||
-rw-r--r-- | gcc/config/riscv/riscv-vector-costs.cc | 2 | ||||
-rw-r--r-- | gcc/config/riscv/riscv.cc | 54 | ||||
-rw-r--r-- | gcc/config/riscv/riscv.md | 20 | ||||
-rw-r--r-- | gcc/config/riscv/riscv.opt | 10 | ||||
-rw-r--r-- | gcc/config/riscv/vector-iterators.md | 4 |
11 files changed, 204 insertions, 10 deletions
diff --git a/gcc/config/riscv/autovec-opt.md b/gcc/config/riscv/autovec-opt.md index 0c3b0cc..7cf7e8a 100644 --- a/gcc/config/riscv/autovec-opt.md +++ b/gcc/config/riscv/autovec-opt.md @@ -1673,3 +1673,26 @@ DONE; } [(set_attr "type" "vandn")]) + + +;; ============================================================================= +;; Combine vec_duplicate + op.vv to op.vx +;; Include +;; - vadd.vx +;; ============================================================================= +(define_insn_and_split "*<optab>_vx_<mode>" + [(set (match_operand:V_VLSI 0 "register_operand") + (any_int_binop_no_shift_vx:V_VLSI + (vec_duplicate:V_VLSI + (match_operand:<VEL> 1 "register_operand")) + (match_operand:V_VLSI 2 "<binop_rhs2_predicate>")))] + "TARGET_VECTOR && can_create_pseudo_p ()" + "#" + "&& 1" + [(const_int 0)] + { + rtx ops[] = {operands[0], operands[2], operands[1]}; + riscv_vector::emit_vlmax_insn (code_for_pred_scalar (<CODE>, <MODE>mode), + riscv_vector::BINARY_OP, ops); + } + [(set_attr "type" "vialu")]) diff --git a/gcc/config/riscv/bitmanip.md b/gcc/config/riscv/bitmanip.md index 20d03dc..95df533 100644 --- a/gcc/config/riscv/bitmanip.md +++ b/gcc/config/riscv/bitmanip.md @@ -1302,3 +1302,77 @@ } DONE; }) + +;; More forms of single bit extraction. The RISC-V port does not +;; define SHIFT_COUNT_TRUNCATED so we need forms where the bit position +;; is masked. +;; +;; We could in theory use this for rv32 as well, but it probably does +;; not occur in practice. The bit position would need to be QI/HI mode, +;; otherwise we would not need the zero extension. +;; +;; One could also argue that the zero extension is redundant and should +;; have been optimized away during RTL simplification. +(define_insn "*bextdi_position_ze_masked" + [(set (match_operand:DI 0 "register_operand" "=r") + (zero_extract:DI (match_operand:DI 1 "register_operand" "r") + (const_int 1) + (zero_extend:DI + (and:SI (match_operand:SI 2 "register_operand" "r") + (const_int 63)))))] + "TARGET_64BIT && TARGET_ZBS" + "bext\t%0,%1,%2" + [(set_attr "type" "bitmanip")]) + +;; Same as above, but without the extraneous zero_extend. +(define_insn "*bextdi_position_ze_masked" + [(set (match_operand:X 0 "register_operand" "=r") + (zero_extract:X + (match_operand:X 1 "register_operand" "r") + (const_int 1) + (and:X (match_operand:SI 2 "register_operand" "r") + (match_operand:SI 3 "bitpos_mask_operand" "n"))))] + "TARGET_64BIT && TARGET_ZBS" + "bext\t%0,%1,%2" + [(set_attr "type" "bitmanip")]) + + +;; Single bit extraction by first shifting it into the sign bit, then +;; shifting it down to the low bit. +(define_insn "*bext<mode>_position_masked" + [(set (match_operand:X 0 "register_operand" "=r") + (lshiftrt:X (ashift:X (match_operand:X 1 "register_operand" "r") + (match_operand:QI 2 "register_operand" "r")) + (match_operand:X 3 "bitpos_mask_operand" "n")))] + "TARGET_ZBS" + "bext\t%0,%1,%2" + [(set_attr "type" "bitmanip")]) + +;; Single bit extraction by shifting into the low bit, but with the +;; position formed with a subreg of a mask. +(define_insn "*bext<mode>_position_masked_subreg" + [(set (match_operand:X 0 "register_operand" "=r") + (lshiftrt:X + (ashift:X (match_operand:X 1 "register_operand" "r") + (subreg:QI + (and:X (match_operand:X 2 "register_operand" "r") + (match_operand:X 3 "bitpos_mask_operand" "n")) 0)) + (match_operand:X 4 "bitpos_mask_operand" "n")))] + "TARGET_ZBS" + "bext\t%0,%1,%2" + [(set_attr "type" "bitmanip")]) + +;; This has shown up in testing. In particular we end up with an +;; immediate input. We can load that into a register and target +;; one of the above bext patterns. +(define_split + [(set (match_operand:X 0 "register_operand") + (and:X (lshiftrt:X (match_operand 1 "immediate_operand") + (match_operand:QI 2 "register_operand")) + (const_int 1))) + (clobber (match_operand:X 3 "register_operand"))] + "" + [(set (match_dup 3) (match_dup 1)) + (set (match_dup 0) (zero_extract:X (match_dup 3) + (const_int 1) + (zero_extend:X (match_dup 2))))]) diff --git a/gcc/config/riscv/predicates.md b/gcc/config/riscv/predicates.md index f26bafc..c9a638c 100644 --- a/gcc/config/riscv/predicates.md +++ b/gcc/config/riscv/predicates.md @@ -685,3 +685,7 @@ (and (match_operand 0 "register_operand") (match_test "REGNO (op) == RETURN_ADDR_REGNUM || REGNO (op) == T0_REGNUM"))) + +(define_predicate "bitpos_mask_operand" + (and (match_code "const_int") + (match_test "TARGET_64BIT ? INTVAL (op) == 63 : INTVAL (op) == 31"))) diff --git a/gcc/config/riscv/riscv-opts.h b/gcc/config/riscv/riscv-opts.h index 26fe228..9766b89 100644 --- a/gcc/config/riscv/riscv-opts.h +++ b/gcc/config/riscv/riscv-opts.h @@ -162,4 +162,6 @@ enum riscv_tls_type { #define TARGET_VECTOR_AUTOVEC_SEGMENT \ (TARGET_VECTOR && riscv_mautovec_segment) +#define GPR2VR_COST_UNPROVIDED -1 + #endif /* ! GCC_RISCV_OPTS_H */ diff --git a/gcc/config/riscv/riscv-protos.h b/gcc/config/riscv/riscv-protos.h index 2e88990..b0d5bbb 100644 --- a/gcc/config/riscv/riscv-protos.h +++ b/gcc/config/riscv/riscv-protos.h @@ -836,6 +836,7 @@ struct riscv_tune_info { const struct riscv_tune_info * riscv_parse_tune (const char *, bool); const cpu_vector_cost *get_vector_costs (); +int get_gr2vr_cost (); enum { diff --git a/gcc/config/riscv/riscv-vect-permconst.cc b/gcc/config/riscv/riscv-vect-permconst.cc index feecc7e..8e13cf8 100644 --- a/gcc/config/riscv/riscv-vect-permconst.cc +++ b/gcc/config/riscv/riscv-vect-permconst.cc @@ -203,6 +203,24 @@ vector_permconst::process_bb (basic_block bb) if (bias < 0 || bias > 16384 / 8) continue; + /* We need to verify that each element would be a valid value + in the inner mode after applying the bias. */ + machine_mode inner = GET_MODE_INNER (GET_MODE (cvec)); + HOST_WIDE_INT precision = GET_MODE_PRECISION (inner).to_constant (); + int i; + for (i = 0; i < CONST_VECTOR_NUNITS (cvec).to_constant (); i++) + { + HOST_WIDE_INT val = INTVAL (CONST_VECTOR_ELT (cvec, i)) - bias; + if (val != sext_hwi (val, precision)) + break; + } + + /* If the loop terminated early, then we found a case where the + adjusted constant would not fit, so we can't record the constant + for this case (it's unlikely to be useful anyway. */ + if (i != CONST_VECTOR_NUNITS (cvec).to_constant ()) + continue; + /* At this point we have a load of a constant integer vector from the constant pool. That constant integer vector is hopefully a permutation constant. We need to make a copy of the vector and @@ -211,7 +229,7 @@ vector_permconst::process_bb (basic_block bb) XXX This violates structure sharing conventions. */ rtvec_def *nvec = gen_rtvec (CONST_VECTOR_NUNITS (cvec).to_constant ()); - for (int i = 0; i < CONST_VECTOR_NUNITS (cvec).to_constant (); i++) + for (i = 0; i < CONST_VECTOR_NUNITS (cvec).to_constant (); i++) nvec->elem[i] = GEN_INT (INTVAL (CONST_VECTOR_ELT (cvec, i)) - bias); rtx copy = gen_rtx_CONST_VECTOR (GET_MODE (cvec), nvec); diff --git a/gcc/config/riscv/riscv-vector-costs.cc b/gcc/config/riscv/riscv-vector-costs.cc index 167375c..c28eecd 100644 --- a/gcc/config/riscv/riscv-vector-costs.cc +++ b/gcc/config/riscv/riscv-vector-costs.cc @@ -1121,7 +1121,7 @@ costs::adjust_stmt_cost (enum vect_cost_for_stmt kind, loop_vec_info loop, { case scalar_to_vec: stmt_cost += (FLOAT_TYPE_P (vectype) ? costs->regmove->FR2VR - : costs->regmove->GR2VR); + : get_gr2vr_cost ()); break; case vec_to_scalar: stmt_cost += (FLOAT_TYPE_P (vectype) ? costs->regmove->VR2FR diff --git a/gcc/config/riscv/riscv.cc b/gcc/config/riscv/riscv.cc index a065732..3ee88db 100644 --- a/gcc/config/riscv/riscv.cc +++ b/gcc/config/riscv/riscv.cc @@ -3863,7 +3863,40 @@ riscv_rtx_costs (rtx x, machine_mode mode, int outer_code, int opno ATTRIBUTE_UN Cost Model need to be well analyzed and supported in the future. */ if (riscv_v_ext_mode_p (mode)) { - *total = COSTS_N_INSNS (1); + int gr2vr_cost = get_gr2vr_cost (); + + switch (outer_code) + { + case SET: + { + switch (GET_CODE (x)) + { + case VEC_DUPLICATE: + *total = gr2vr_cost * COSTS_N_INSNS (1); + break; + case PLUS: + { + rtx op_0 = XEXP (x, 0); + rtx op_1 = XEXP (x, 1); + + if (GET_CODE (op_0) == VEC_DUPLICATE + || GET_CODE (op_1) == VEC_DUPLICATE) + *total = (gr2vr_cost + 1) * COSTS_N_INSNS (1); + else + *total = COSTS_N_INSNS (1); + } + break; + default: + *total = COSTS_N_INSNS (1); + break; + } + } + break; + default: + *total = COSTS_N_INSNS (1); + break; + } + return true; } @@ -9690,7 +9723,7 @@ riscv_register_move_cost (machine_mode mode, if (to == V_REGS) { if (from_is_gpr) - return get_vector_costs ()->regmove->GR2VR; + return get_gr2vr_cost (); else if (from_is_fpr) return get_vector_costs ()->regmove->FR2VR; } @@ -12540,6 +12573,21 @@ get_vector_costs () return costs; } +/* Return the cost of operation that move from gpr to vr. + It will take the value of --param=gpr2vr_cost if it is provided. + Or the default regmove->GR2VR will be returned. */ + +int +get_gr2vr_cost () +{ + int cost = get_vector_costs ()->regmove->GR2VR; + + if (gpr2vr_cost != GPR2VR_COST_UNPROVIDED) + cost = gpr2vr_cost; + + return cost; +} + /* Implement targetm.vectorize.builtin_vectorization_cost. */ static int @@ -12606,7 +12654,7 @@ riscv_builtin_vectorization_cost (enum vect_cost_for_stmt type_of_cost, { /* TODO: This is too pessimistic in case we can splat. */ int regmove_cost = fp ? costs->regmove->FR2VR - : costs->regmove->GR2VR; + : get_gr2vr_cost (); return (regmove_cost + common_costs->scalar_to_vec_cost) * estimated_poly_value (TYPE_VECTOR_SUBPARTS (vectype)); } diff --git a/gcc/config/riscv/riscv.md b/gcc/config/riscv/riscv.md index 15c89ff..259997f 100644 --- a/gcc/config/riscv/riscv.md +++ b/gcc/config/riscv/riscv.md @@ -3173,15 +3173,25 @@ "#" "&& reload_completed" [(set (match_dup 4) (lshiftrt:X (subreg:X (match_dup 2) 0) (match_dup 6))) - (set (match_dup 4) (and:X (match_dup 4) (match_dup 7))) + (set (match_dup 4) (match_dup 8)) (set (pc) (if_then_else (match_op_dup 1 [(match_dup 4) (const_int 0)]) (label_ref (match_dup 0)) (pc)))] { - HOST_WIDE_INT mask = INTVAL (operands[3]); - int trailing = ctz_hwi (mask); + HOST_WIDE_INT mask = INTVAL (operands[3]); + int trailing = ctz_hwi (mask); + + operands[6] = GEN_INT (trailing); + operands[7] = GEN_INT (mask >> trailing); - operands[6] = GEN_INT (trailing); - operands[7] = GEN_INT (mask >> trailing); + /* This splits after reload, so there's little chance to clean things + up. Rather than emit a ton of RTL here, we can just make a new + operand for that RHS and use it. For the case where the AND would + have been redundant, we can make it a NOP move, which does get + cleaned up. */ + if (operands[7] == CONSTM1_RTX (word_mode)) + operands[8] = operands[4]; + else + operands[8] = gen_rtx_AND (word_mode, operands[4], operands[7]); } [(set_attr "type" "branch")]) diff --git a/gcc/config/riscv/riscv.opt b/gcc/config/riscv/riscv.opt index 7515c8e..80593ee 100644 --- a/gcc/config/riscv/riscv.opt +++ b/gcc/config/riscv/riscv.opt @@ -274,6 +274,8 @@ Mask(ZA64RS) Var(riscv_za_subext) Mask(ZA128RS) Var(riscv_za_subext) +Mask(ZAMA16B) Var(riscv_za_subext) + TargetVariable int riscv_zb_subext @@ -466,6 +468,10 @@ Mask(XCVBI) Var(riscv_xcv_subext) TargetVariable int riscv_sv_subext +Mask(SVADE) Var(riscv_sv_subext) + +Mask(SVADU) Var(riscv_sv_subext) + Mask(SVINVAL) Var(riscv_sv_subext) Mask(SVNAPOT) Var(riscv_sv_subext) @@ -579,6 +585,10 @@ Inline strlen calls if possible. Target RejectNegative Joined UInteger Var(riscv_strcmp_inline_limit) Init(64) Max number of bytes to compare as part of inlined strcmp/strncmp routines (default: 64). +-param=gpr2vr-cost= +Target RejectNegative Joined UInteger Var(gpr2vr_cost) Init(GPR2VR_COST_UNPROVIDED) +Set the cost value of the rvv instruction when operate from GPR to VR. + Enum Name(rvv_max_lmul) Type(enum rvv_max_lmul_enum) The RVV possible LMUL (-mrvv-max-lmul=): diff --git a/gcc/config/riscv/vector-iterators.md b/gcc/config/riscv/vector-iterators.md index b4c86909..eae3340 100644 --- a/gcc/config/riscv/vector-iterators.md +++ b/gcc/config/riscv/vector-iterators.md @@ -4041,6 +4041,10 @@ smax umax smin umin mult div udiv mod umod ]) +(define_code_iterator any_int_binop_no_shift_vx [ + plus +]) + (define_code_iterator any_int_unop [neg not]) (define_code_iterator any_commutative_binop [plus and ior xor |