aboutsummaryrefslogtreecommitdiff
path: root/gcc/config/riscv
diff options
context:
space:
mode:
Diffstat (limited to 'gcc/config/riscv')
-rw-r--r--gcc/config/riscv/autovec-opt.md23
-rw-r--r--gcc/config/riscv/bitmanip.md74
-rw-r--r--gcc/config/riscv/predicates.md4
-rw-r--r--gcc/config/riscv/riscv-opts.h2
-rw-r--r--gcc/config/riscv/riscv-protos.h1
-rw-r--r--gcc/config/riscv/riscv-vect-permconst.cc20
-rw-r--r--gcc/config/riscv/riscv-vector-costs.cc2
-rw-r--r--gcc/config/riscv/riscv.cc54
-rw-r--r--gcc/config/riscv/riscv.md20
-rw-r--r--gcc/config/riscv/riscv.opt10
-rw-r--r--gcc/config/riscv/vector-iterators.md4
11 files changed, 204 insertions, 10 deletions
diff --git a/gcc/config/riscv/autovec-opt.md b/gcc/config/riscv/autovec-opt.md
index 0c3b0cc..7cf7e8a 100644
--- a/gcc/config/riscv/autovec-opt.md
+++ b/gcc/config/riscv/autovec-opt.md
@@ -1673,3 +1673,26 @@
DONE;
}
[(set_attr "type" "vandn")])
+
+
+;; =============================================================================
+;; Combine vec_duplicate + op.vv to op.vx
+;; Include
+;; - vadd.vx
+;; =============================================================================
+(define_insn_and_split "*<optab>_vx_<mode>"
+ [(set (match_operand:V_VLSI 0 "register_operand")
+ (any_int_binop_no_shift_vx:V_VLSI
+ (vec_duplicate:V_VLSI
+ (match_operand:<VEL> 1 "register_operand"))
+ (match_operand:V_VLSI 2 "<binop_rhs2_predicate>")))]
+ "TARGET_VECTOR && can_create_pseudo_p ()"
+ "#"
+ "&& 1"
+ [(const_int 0)]
+ {
+ rtx ops[] = {operands[0], operands[2], operands[1]};
+ riscv_vector::emit_vlmax_insn (code_for_pred_scalar (<CODE>, <MODE>mode),
+ riscv_vector::BINARY_OP, ops);
+ }
+ [(set_attr "type" "vialu")])
diff --git a/gcc/config/riscv/bitmanip.md b/gcc/config/riscv/bitmanip.md
index 20d03dc..95df533 100644
--- a/gcc/config/riscv/bitmanip.md
+++ b/gcc/config/riscv/bitmanip.md
@@ -1302,3 +1302,77 @@
}
DONE;
})
+
+;; More forms of single bit extraction. The RISC-V port does not
+;; define SHIFT_COUNT_TRUNCATED so we need forms where the bit position
+;; is masked.
+;;
+;; We could in theory use this for rv32 as well, but it probably does
+;; not occur in practice. The bit position would need to be QI/HI mode,
+;; otherwise we would not need the zero extension.
+;;
+;; One could also argue that the zero extension is redundant and should
+;; have been optimized away during RTL simplification.
+(define_insn "*bextdi_position_ze_masked"
+ [(set (match_operand:DI 0 "register_operand" "=r")
+ (zero_extract:DI (match_operand:DI 1 "register_operand" "r")
+ (const_int 1)
+ (zero_extend:DI
+ (and:SI (match_operand:SI 2 "register_operand" "r")
+ (const_int 63)))))]
+ "TARGET_64BIT && TARGET_ZBS"
+ "bext\t%0,%1,%2"
+ [(set_attr "type" "bitmanip")])
+
+;; Same as above, but without the extraneous zero_extend.
+(define_insn "*bextdi_position_ze_masked"
+ [(set (match_operand:X 0 "register_operand" "=r")
+ (zero_extract:X
+ (match_operand:X 1 "register_operand" "r")
+ (const_int 1)
+ (and:X (match_operand:SI 2 "register_operand" "r")
+ (match_operand:SI 3 "bitpos_mask_operand" "n"))))]
+ "TARGET_64BIT && TARGET_ZBS"
+ "bext\t%0,%1,%2"
+ [(set_attr "type" "bitmanip")])
+
+
+;; Single bit extraction by first shifting it into the sign bit, then
+;; shifting it down to the low bit.
+(define_insn "*bext<mode>_position_masked"
+ [(set (match_operand:X 0 "register_operand" "=r")
+ (lshiftrt:X (ashift:X (match_operand:X 1 "register_operand" "r")
+ (match_operand:QI 2 "register_operand" "r"))
+ (match_operand:X 3 "bitpos_mask_operand" "n")))]
+ "TARGET_ZBS"
+ "bext\t%0,%1,%2"
+ [(set_attr "type" "bitmanip")])
+
+;; Single bit extraction by shifting into the low bit, but with the
+;; position formed with a subreg of a mask.
+(define_insn "*bext<mode>_position_masked_subreg"
+ [(set (match_operand:X 0 "register_operand" "=r")
+ (lshiftrt:X
+ (ashift:X (match_operand:X 1 "register_operand" "r")
+ (subreg:QI
+ (and:X (match_operand:X 2 "register_operand" "r")
+ (match_operand:X 3 "bitpos_mask_operand" "n")) 0))
+ (match_operand:X 4 "bitpos_mask_operand" "n")))]
+ "TARGET_ZBS"
+ "bext\t%0,%1,%2"
+ [(set_attr "type" "bitmanip")])
+
+;; This has shown up in testing. In particular we end up with an
+;; immediate input. We can load that into a register and target
+;; one of the above bext patterns.
+(define_split
+ [(set (match_operand:X 0 "register_operand")
+ (and:X (lshiftrt:X (match_operand 1 "immediate_operand")
+ (match_operand:QI 2 "register_operand"))
+ (const_int 1)))
+ (clobber (match_operand:X 3 "register_operand"))]
+ ""
+ [(set (match_dup 3) (match_dup 1))
+ (set (match_dup 0) (zero_extract:X (match_dup 3)
+ (const_int 1)
+ (zero_extend:X (match_dup 2))))])
diff --git a/gcc/config/riscv/predicates.md b/gcc/config/riscv/predicates.md
index f26bafc..c9a638c 100644
--- a/gcc/config/riscv/predicates.md
+++ b/gcc/config/riscv/predicates.md
@@ -685,3 +685,7 @@
(and (match_operand 0 "register_operand")
(match_test "REGNO (op) == RETURN_ADDR_REGNUM
|| REGNO (op) == T0_REGNUM")))
+
+(define_predicate "bitpos_mask_operand"
+ (and (match_code "const_int")
+ (match_test "TARGET_64BIT ? INTVAL (op) == 63 : INTVAL (op) == 31")))
diff --git a/gcc/config/riscv/riscv-opts.h b/gcc/config/riscv/riscv-opts.h
index 26fe228..9766b89 100644
--- a/gcc/config/riscv/riscv-opts.h
+++ b/gcc/config/riscv/riscv-opts.h
@@ -162,4 +162,6 @@ enum riscv_tls_type {
#define TARGET_VECTOR_AUTOVEC_SEGMENT \
(TARGET_VECTOR && riscv_mautovec_segment)
+#define GPR2VR_COST_UNPROVIDED -1
+
#endif /* ! GCC_RISCV_OPTS_H */
diff --git a/gcc/config/riscv/riscv-protos.h b/gcc/config/riscv/riscv-protos.h
index 2e88990..b0d5bbb 100644
--- a/gcc/config/riscv/riscv-protos.h
+++ b/gcc/config/riscv/riscv-protos.h
@@ -836,6 +836,7 @@ struct riscv_tune_info {
const struct riscv_tune_info *
riscv_parse_tune (const char *, bool);
const cpu_vector_cost *get_vector_costs ();
+int get_gr2vr_cost ();
enum
{
diff --git a/gcc/config/riscv/riscv-vect-permconst.cc b/gcc/config/riscv/riscv-vect-permconst.cc
index feecc7e..8e13cf8 100644
--- a/gcc/config/riscv/riscv-vect-permconst.cc
+++ b/gcc/config/riscv/riscv-vect-permconst.cc
@@ -203,6 +203,24 @@ vector_permconst::process_bb (basic_block bb)
if (bias < 0 || bias > 16384 / 8)
continue;
+ /* We need to verify that each element would be a valid value
+ in the inner mode after applying the bias. */
+ machine_mode inner = GET_MODE_INNER (GET_MODE (cvec));
+ HOST_WIDE_INT precision = GET_MODE_PRECISION (inner).to_constant ();
+ int i;
+ for (i = 0; i < CONST_VECTOR_NUNITS (cvec).to_constant (); i++)
+ {
+ HOST_WIDE_INT val = INTVAL (CONST_VECTOR_ELT (cvec, i)) - bias;
+ if (val != sext_hwi (val, precision))
+ break;
+ }
+
+ /* If the loop terminated early, then we found a case where the
+ adjusted constant would not fit, so we can't record the constant
+ for this case (it's unlikely to be useful anyway. */
+ if (i != CONST_VECTOR_NUNITS (cvec).to_constant ())
+ continue;
+
/* At this point we have a load of a constant integer vector from the
constant pool. That constant integer vector is hopefully a
permutation constant. We need to make a copy of the vector and
@@ -211,7 +229,7 @@ vector_permconst::process_bb (basic_block bb)
XXX This violates structure sharing conventions. */
rtvec_def *nvec = gen_rtvec (CONST_VECTOR_NUNITS (cvec).to_constant ());
- for (int i = 0; i < CONST_VECTOR_NUNITS (cvec).to_constant (); i++)
+ for (i = 0; i < CONST_VECTOR_NUNITS (cvec).to_constant (); i++)
nvec->elem[i] = GEN_INT (INTVAL (CONST_VECTOR_ELT (cvec, i)) - bias);
rtx copy = gen_rtx_CONST_VECTOR (GET_MODE (cvec), nvec);
diff --git a/gcc/config/riscv/riscv-vector-costs.cc b/gcc/config/riscv/riscv-vector-costs.cc
index 167375c..c28eecd 100644
--- a/gcc/config/riscv/riscv-vector-costs.cc
+++ b/gcc/config/riscv/riscv-vector-costs.cc
@@ -1121,7 +1121,7 @@ costs::adjust_stmt_cost (enum vect_cost_for_stmt kind, loop_vec_info loop,
{
case scalar_to_vec:
stmt_cost += (FLOAT_TYPE_P (vectype) ? costs->regmove->FR2VR
- : costs->regmove->GR2VR);
+ : get_gr2vr_cost ());
break;
case vec_to_scalar:
stmt_cost += (FLOAT_TYPE_P (vectype) ? costs->regmove->VR2FR
diff --git a/gcc/config/riscv/riscv.cc b/gcc/config/riscv/riscv.cc
index a065732..3ee88db 100644
--- a/gcc/config/riscv/riscv.cc
+++ b/gcc/config/riscv/riscv.cc
@@ -3863,7 +3863,40 @@ riscv_rtx_costs (rtx x, machine_mode mode, int outer_code, int opno ATTRIBUTE_UN
Cost Model need to be well analyzed and supported in the future. */
if (riscv_v_ext_mode_p (mode))
{
- *total = COSTS_N_INSNS (1);
+ int gr2vr_cost = get_gr2vr_cost ();
+
+ switch (outer_code)
+ {
+ case SET:
+ {
+ switch (GET_CODE (x))
+ {
+ case VEC_DUPLICATE:
+ *total = gr2vr_cost * COSTS_N_INSNS (1);
+ break;
+ case PLUS:
+ {
+ rtx op_0 = XEXP (x, 0);
+ rtx op_1 = XEXP (x, 1);
+
+ if (GET_CODE (op_0) == VEC_DUPLICATE
+ || GET_CODE (op_1) == VEC_DUPLICATE)
+ *total = (gr2vr_cost + 1) * COSTS_N_INSNS (1);
+ else
+ *total = COSTS_N_INSNS (1);
+ }
+ break;
+ default:
+ *total = COSTS_N_INSNS (1);
+ break;
+ }
+ }
+ break;
+ default:
+ *total = COSTS_N_INSNS (1);
+ break;
+ }
+
return true;
}
@@ -9690,7 +9723,7 @@ riscv_register_move_cost (machine_mode mode,
if (to == V_REGS)
{
if (from_is_gpr)
- return get_vector_costs ()->regmove->GR2VR;
+ return get_gr2vr_cost ();
else if (from_is_fpr)
return get_vector_costs ()->regmove->FR2VR;
}
@@ -12540,6 +12573,21 @@ get_vector_costs ()
return costs;
}
+/* Return the cost of operation that move from gpr to vr.
+ It will take the value of --param=gpr2vr_cost if it is provided.
+ Or the default regmove->GR2VR will be returned. */
+
+int
+get_gr2vr_cost ()
+{
+ int cost = get_vector_costs ()->regmove->GR2VR;
+
+ if (gpr2vr_cost != GPR2VR_COST_UNPROVIDED)
+ cost = gpr2vr_cost;
+
+ return cost;
+}
+
/* Implement targetm.vectorize.builtin_vectorization_cost. */
static int
@@ -12606,7 +12654,7 @@ riscv_builtin_vectorization_cost (enum vect_cost_for_stmt type_of_cost,
{
/* TODO: This is too pessimistic in case we can splat. */
int regmove_cost = fp ? costs->regmove->FR2VR
- : costs->regmove->GR2VR;
+ : get_gr2vr_cost ();
return (regmove_cost + common_costs->scalar_to_vec_cost)
* estimated_poly_value (TYPE_VECTOR_SUBPARTS (vectype));
}
diff --git a/gcc/config/riscv/riscv.md b/gcc/config/riscv/riscv.md
index 15c89ff..259997f 100644
--- a/gcc/config/riscv/riscv.md
+++ b/gcc/config/riscv/riscv.md
@@ -3173,15 +3173,25 @@
"#"
"&& reload_completed"
[(set (match_dup 4) (lshiftrt:X (subreg:X (match_dup 2) 0) (match_dup 6)))
- (set (match_dup 4) (and:X (match_dup 4) (match_dup 7)))
+ (set (match_dup 4) (match_dup 8))
(set (pc) (if_then_else (match_op_dup 1 [(match_dup 4) (const_int 0)])
(label_ref (match_dup 0)) (pc)))]
{
- HOST_WIDE_INT mask = INTVAL (operands[3]);
- int trailing = ctz_hwi (mask);
+ HOST_WIDE_INT mask = INTVAL (operands[3]);
+ int trailing = ctz_hwi (mask);
+
+ operands[6] = GEN_INT (trailing);
+ operands[7] = GEN_INT (mask >> trailing);
- operands[6] = GEN_INT (trailing);
- operands[7] = GEN_INT (mask >> trailing);
+ /* This splits after reload, so there's little chance to clean things
+ up. Rather than emit a ton of RTL here, we can just make a new
+ operand for that RHS and use it. For the case where the AND would
+ have been redundant, we can make it a NOP move, which does get
+ cleaned up. */
+ if (operands[7] == CONSTM1_RTX (word_mode))
+ operands[8] = operands[4];
+ else
+ operands[8] = gen_rtx_AND (word_mode, operands[4], operands[7]);
}
[(set_attr "type" "branch")])
diff --git a/gcc/config/riscv/riscv.opt b/gcc/config/riscv/riscv.opt
index 7515c8e..80593ee 100644
--- a/gcc/config/riscv/riscv.opt
+++ b/gcc/config/riscv/riscv.opt
@@ -274,6 +274,8 @@ Mask(ZA64RS) Var(riscv_za_subext)
Mask(ZA128RS) Var(riscv_za_subext)
+Mask(ZAMA16B) Var(riscv_za_subext)
+
TargetVariable
int riscv_zb_subext
@@ -466,6 +468,10 @@ Mask(XCVBI) Var(riscv_xcv_subext)
TargetVariable
int riscv_sv_subext
+Mask(SVADE) Var(riscv_sv_subext)
+
+Mask(SVADU) Var(riscv_sv_subext)
+
Mask(SVINVAL) Var(riscv_sv_subext)
Mask(SVNAPOT) Var(riscv_sv_subext)
@@ -579,6 +585,10 @@ Inline strlen calls if possible.
Target RejectNegative Joined UInteger Var(riscv_strcmp_inline_limit) Init(64)
Max number of bytes to compare as part of inlined strcmp/strncmp routines (default: 64).
+-param=gpr2vr-cost=
+Target RejectNegative Joined UInteger Var(gpr2vr_cost) Init(GPR2VR_COST_UNPROVIDED)
+Set the cost value of the rvv instruction when operate from GPR to VR.
+
Enum
Name(rvv_max_lmul) Type(enum rvv_max_lmul_enum)
The RVV possible LMUL (-mrvv-max-lmul=):
diff --git a/gcc/config/riscv/vector-iterators.md b/gcc/config/riscv/vector-iterators.md
index b4c86909..eae3340 100644
--- a/gcc/config/riscv/vector-iterators.md
+++ b/gcc/config/riscv/vector-iterators.md
@@ -4041,6 +4041,10 @@
smax umax smin umin mult div udiv mod umod
])
+(define_code_iterator any_int_binop_no_shift_vx [
+ plus
+])
+
(define_code_iterator any_int_unop [neg not])
(define_code_iterator any_commutative_binop [plus and ior xor