aboutsummaryrefslogtreecommitdiff
path: root/gcc/config/riscv/riscv.cc
diff options
context:
space:
mode:
authorJerry DeLisle <jvdelisle@gcc.gnu.org>2025-09-02 15:58:26 -0700
committerJerry DeLisle <jvdelisle@gcc.gnu.org>2025-09-02 15:58:26 -0700
commit071b4126c613881f4cb25b4e5c39032964827f88 (patch)
tree7ed805786566918630d1d617b1ed8f7310f5fd8e /gcc/config/riscv/riscv.cc
parent845d23f3ea08ba873197c275a8857eee7edad996 (diff)
parentcaa1c2f42691d68af4d894a5c3e700ecd2dba080 (diff)
downloadgcc-devel/gfortran-test.zip
gcc-devel/gfortran-test.tar.gz
gcc-devel/gfortran-test.tar.bz2
Merge branch 'master' into gfortran-testdevel/gfortran-test
Diffstat (limited to 'gcc/config/riscv/riscv.cc')
-rw-r--r--gcc/config/riscv/riscv.cc399
1 files changed, 273 insertions, 126 deletions
diff --git a/gcc/config/riscv/riscv.cc b/gcc/config/riscv/riscv.cc
index 0a9fcef..591122f 100644
--- a/gcc/config/riscv/riscv.cc
+++ b/gcc/config/riscv/riscv.cc
@@ -3685,7 +3685,8 @@ riscv_legitimize_move (machine_mode mode, rtx dest, rtx src)
/* This test can fail if (for example) we want a HF and Z[v]fh is
not enabled. In that case we just want to let the standard
expansion path run. */
- if (riscv_vector::get_vector_mode (smode, nunits).exists (&vmode))
+ if (riscv_vector::get_vector_mode (smode, nunits).exists (&vmode)
+ && gen_lowpart_common (vmode, SUBREG_REG (src)))
{
rtx v = gen_lowpart (vmode, SUBREG_REG (src));
rtx int_reg = dest;
@@ -3958,41 +3959,6 @@ riscv_extend_cost (rtx op, bool unsigned_p)
return COSTS_N_INSNS (2);
}
-/* Return the cost of the vector binary rtx like add, minus, mult.
- The cost of scalar2vr_cost will be appended if there one of the
- op comes from the VEC_DUPLICATE. */
-
-static int
-get_vector_binary_rtx_cost (rtx x, int scalar2vr_cost)
-{
- gcc_assert (riscv_v_ext_mode_p (GET_MODE (x)));
-
- rtx neg;
- rtx op_0;
- rtx op_1;
-
- if (GET_CODE (x) == UNSPEC)
- {
- op_0 = XVECEXP (x, 0, 0);
- op_1 = XVECEXP (x, 0, 1);
- }
- else
- {
- op_0 = XEXP (x, 0);
- op_1 = XEXP (x, 1);
- }
-
- if (GET_CODE (op_0) == VEC_DUPLICATE
- || GET_CODE (op_1) == VEC_DUPLICATE)
- return (scalar2vr_cost + 1) * COSTS_N_INSNS (1);
- else if (GET_CODE (neg = op_0) == NEG
- && (GET_CODE (op_1) == VEC_DUPLICATE
- || GET_CODE (XEXP (neg, 0)) == VEC_DUPLICATE))
- return (scalar2vr_cost + 1) * COSTS_N_INSNS (1);
- else
- return COSTS_N_INSNS (1);
-}
-
/* Implement TARGET_RTX_COSTS. */
#define SINGLE_SHIFT_COST 1
@@ -4014,73 +3980,20 @@ riscv_rtx_costs (rtx x, machine_mode mode, int outer_code, int opno ATTRIBUTE_UN
{
case SET:
{
- switch (GET_CODE (x))
+ if (GET_CODE (x) == VEC_DUPLICATE)
+ *total = (scalar2vr_cost + 1) * COSTS_N_INSNS (1);
+ else
{
- case VEC_DUPLICATE:
- *total = gr2vr_cost * COSTS_N_INSNS (1);
- break;
- case IF_THEN_ELSE:
- {
- rtx op = XEXP (x, 1);
+ int vec_dup_count = 0;
+ subrtx_var_iterator::array_type array;
- switch (GET_CODE (op))
- {
- case DIV:
- case UDIV:
- case MOD:
- case UMOD:
- case US_PLUS:
- case US_MINUS:
- case SS_PLUS:
- case SS_MINUS:
- *total = get_vector_binary_rtx_cost (op, scalar2vr_cost);
- break;
- case UNSPEC:
- {
- switch (XINT (op, 1))
- {
- case UNSPEC_VAADDU:
- case UNSPEC_VAADD:
- *total
- = get_vector_binary_rtx_cost (op, scalar2vr_cost);
- break;
- default:
- *total = COSTS_N_INSNS (1);
- break;
- }
- }
- break;
- default:
- *total = COSTS_N_INSNS (1);
- break;
- }
- }
- break;
- case PLUS:
- case MINUS:
- case AND:
- case IOR:
- case XOR:
- case MULT:
- case SMAX:
- case UMAX:
- case SMIN:
- case UMIN:
- {
- rtx op;
- rtx op_0 = XEXP (x, 0);
- rtx op_1 = XEXP (x, 1);
+ FOR_EACH_SUBRTX_VAR (iter, array, x, ALL)
+ if (GET_CODE (*iter) == VEC_DUPLICATE)
+ vec_dup_count++;
- if (GET_CODE (op = op_0) == MULT
- || GET_CODE (op = op_1) == MULT)
- *total = get_vector_binary_rtx_cost (op, scalar2vr_cost);
- else
- *total = get_vector_binary_rtx_cost (x, scalar2vr_cost);
- }
- break;
- default:
- *total = COSTS_N_INSNS (1);
- break;
+ int total_vec_dup_cost = vec_dup_count * scalar2vr_cost;
+
+ *total = COSTS_N_INSNS (1) * (total_vec_dup_cost + 1);
}
}
break;
@@ -5532,9 +5445,9 @@ canonicalize_comparands (rtx_code code, rtx *op0, rtx *op1)
/* We might have been handed back a SUBREG. Just to make things
easy, force it into a REG. */
- if (!REG_P (*op0) && !CONST_INT_P (*op0))
+ if (!REG_P (*op0) && !CONST_INT_P (*op0) && INTEGRAL_MODE_P (GET_MODE (*op0)))
*op0 = force_reg (word_mode, *op0);
- if (!REG_P (*op1) && !CONST_INT_P (*op1))
+ if (!REG_P (*op1) && !CONST_INT_P (*op1) && INTEGRAL_MODE_P (GET_MODE (*op1)))
*op1 = force_reg (word_mode, *op1);
}
@@ -6213,7 +6126,8 @@ riscv_pass_vls_aggregate_in_gpr (struct riscv_arg_info *info, machine_mode mode,
For a library call, FNTYPE is 0. */
void
-riscv_init_cumulative_args (CUMULATIVE_ARGS *cum, tree fntype, rtx, tree, int)
+riscv_init_cumulative_args (CUMULATIVE_ARGS *cum, const_tree fntype,
+ rtx, tree, int)
{
memset (cum, 0, sizeof (*cum));
@@ -6494,30 +6408,44 @@ riscv_arg_partial_bytes (cumulative_args_t cum,
return arg.stack_p ? arg.num_gprs * UNITS_PER_WORD : 0;
}
-/* Implement FUNCTION_VALUE and LIBCALL_VALUE. For normal calls,
- VALTYPE is the return type and MODE is VOIDmode. For libcalls,
- VALTYPE is null and MODE is the mode of the return value. */
+/* Implements hook TARGET_FUNCTION_VALUE. */
rtx
-riscv_function_value (const_tree type, const_tree func, machine_mode mode)
+riscv_function_value (const_tree ret_type, const_tree fn_decl_or_type,
+ bool)
{
struct riscv_arg_info info;
CUMULATIVE_ARGS args;
- if (type)
+ if (fn_decl_or_type)
{
- int unsigned_p = TYPE_UNSIGNED (type);
+ const_tree fntype = TREE_CODE (fn_decl_or_type) == FUNCTION_DECL ?
+ TREE_TYPE (fn_decl_or_type) : fn_decl_or_type;
+ riscv_init_cumulative_args (&args, fntype, NULL_RTX, NULL_TREE, 0);
+ }
+ else
+ memset (&args, 0, sizeof args);
- mode = TYPE_MODE (type);
+ int unsigned_p = TYPE_UNSIGNED (ret_type);
- /* Since TARGET_PROMOTE_FUNCTION_MODE unconditionally promotes,
- return values, promote the mode here too. */
- mode = promote_function_mode (type, mode, &unsigned_p, func, 1);
- }
+ machine_mode mode = TYPE_MODE (ret_type);
- memset (&args, 0, sizeof args);
+ /* Since TARGET_PROMOTE_FUNCTION_MODE unconditionally promotes,
+ return values, promote the mode here too. */
+ mode = promote_function_mode (ret_type, mode, &unsigned_p, fn_decl_or_type, 1);
- return riscv_get_arg_info (&info, &args, mode, type, true, true);
+ return riscv_get_arg_info (&info, &args, mode, ret_type, true, true);
+}
+
+/* Implements hook TARGET_LIBCALL_VALUE. */
+
+rtx
+riscv_libcall_value (machine_mode mode, const_rtx fun ATTRIBUTE_UNUSED)
+{
+ struct riscv_arg_info info;
+ CUMULATIVE_ARGS args;
+ memset (&args, 0, sizeof args);
+ return riscv_get_arg_info (&info, &args, mode, NULL_TREE, true, true);
}
/* Implement TARGET_PASS_BY_REFERENCE. */
@@ -13867,9 +13795,9 @@ riscv_expand_xmode_usmul (rtx dest, rtx x, rtx y)
riscv_emit_binary (MULT, mul, x, y);
if (TARGET_64BIT)
- emit_insn (gen_usmuldi3_highpart (mulhu, x, y));
+ emit_insn (gen_umuldi3_highpart (mulhu, x, y));
else
- emit_insn (gen_usmulsi3_highpart (mulhu, x, y));
+ emit_insn (gen_umulsi3_highpart (mulhu, x, y));
riscv_emit_binary (NE, overflow_p, mulhu, CONST0_RTX (Xmode));
riscv_emit_unary (NEG, overflow_p, overflow_p);
@@ -14037,10 +13965,13 @@ riscv_c_mode_for_floating_type (enum tree_index ti)
return default_mode_for_floating_type (ti);
}
-/* This parses the attribute arguments to target_version in DECL and modifies
- the feature mask and priority required to select those targets. */
+/* Parse the attribute arguments to target_version in DECL and modify
+ the feature mask and priority required to select those targets.
+ If LOC is nonnull, report diagnostics against *LOC, otherwise
+ remain silent. */
static void
parse_features_for_version (tree decl,
+ location_t *loc,
struct riscv_feature_bits &res,
int &priority)
{
@@ -14071,14 +14002,12 @@ parse_features_for_version (tree decl,
cl_target_option_restore (&global_options, &global_options_set,
default_opts);
- riscv_process_target_version_attr (TREE_VALUE (version_attr),
- DECL_SOURCE_LOCATION (decl));
+ riscv_process_target_version_attr (TREE_VALUE (version_attr), loc);
priority = global_options.x_riscv_fmv_priority;
const char *arch_string = global_options.x_riscv_arch_string;
bool parse_res
- = riscv_minimal_hwprobe_feature_bits (arch_string, &res,
- DECL_SOURCE_LOCATION (decl));
+ = riscv_minimal_hwprobe_feature_bits (arch_string, &res, loc);
gcc_assert (parse_res);
cl_target_option_restore (&global_options, &global_options_set,
@@ -14135,8 +14064,8 @@ riscv_compare_version_priority (tree decl1, tree decl2)
struct riscv_feature_bits mask1, mask2;
int prio1, prio2;
- parse_features_for_version (decl1, mask1, prio1);
- parse_features_for_version (decl2, mask2, prio2);
+ parse_features_for_version (decl1, nullptr, mask1, prio1);
+ parse_features_for_version (decl2, nullptr, mask2, prio2);
return compare_fmv_features (mask1, mask2, prio1, prio2);
}
@@ -14439,6 +14368,7 @@ dispatch_function_versions (tree dispatch_decl,
version_info.version_decl = version_decl;
// Get attribute string, parse it and find the right features.
parse_features_for_version (version_decl,
+ &DECL_SOURCE_LOCATION (version_decl),
version_info.features,
version_info.prio);
function_versions.push_back (version_info);
@@ -15441,6 +15371,217 @@ synthesize_and (rtx operands[3])
return true;
}
+/* Synthesize OPERANDS[0] = OPERANDS[1] + OPERANDS[2].
+
+ OPERANDS[0] and OPERANDS[1] will be a REG and may be the same
+ REG.
+
+ OPERANDS[2] is a CONST_INT.
+
+ Return TRUE if the operation was fully synthesized and the caller
+ need not generate additional code. Return FALSE if the operation
+ was not synthesized and the caller is responsible for emitting the
+ proper sequence. */
+
+bool
+synthesize_add (rtx operands[3])
+{
+ /* Trivial cases that don't need synthesis. */
+ if (SMALL_OPERAND (INTVAL (operands[2])))
+ return false;
+
+ int budget1 = riscv_const_insns (operands[2], true);
+ int budget2 = riscv_const_insns (GEN_INT (-INTVAL (operands[2])), true);
+
+ HOST_WIDE_INT ival = INTVAL (operands[2]);
+
+ /* If we can emit two addi insns then that's better than synthesizing
+ the constant into a temporary, then adding the temporary to the
+ other input. The exception is when the constant can be loaded
+ in a single instruction which can issue whenever its convenient. */
+ if (SUM_OF_TWO_S12 (ival) && budget1 >= 2)
+ {
+ HOST_WIDE_INT saturated = HOST_WIDE_INT_M1U << (IMM_BITS - 1);
+
+ if (ival >= 0)
+ saturated = ~saturated;
+
+ ival -= saturated;
+
+ rtx x = gen_rtx_PLUS (word_mode, operands[1], GEN_INT (saturated));
+ emit_insn (gen_rtx_SET (operands[0], x));
+ rtx output = gen_rtx_PLUS (word_mode, operands[0], GEN_INT (ival));
+ emit_insn (gen_rtx_SET (operands[0], output));
+ return true;
+ }
+
+ /* If we can shift the constant by 1, 2, or 3 bit positions
+ and the result is a cheaper constant, then do so. */
+ ival = INTVAL (operands[2]);
+ if (TARGET_ZBA
+ && (((ival % 2) == 0 && budget1
+ > riscv_const_insns (GEN_INT (ival >> 1), true))
+ || ((ival % 4) == 0 && budget1
+ > riscv_const_insns (GEN_INT (ival >> 2), true))
+ || ((ival % 8) == 0 && budget1
+ > riscv_const_insns (GEN_INT (ival >> 3), true))))
+ {
+ // Load the shifted constant into a temporary
+ int shct = ctz_hwi (ival);
+
+ /* We can handle shifting up to 3 bit positions via shNadd. */
+ if (shct > 3)
+ shct = 3;
+
+ /* The adjusted constant may still need synthesis, so do not copy
+ it directly into register. Let the expander handle it. */
+ rtx tmp = force_reg (word_mode, GEN_INT (ival >> shct));
+
+ /* Generate shift-add of temporary and operands[1]
+ into the final destination. */
+ rtx x = gen_rtx_ASHIFT (word_mode, tmp, GEN_INT (shct));
+ rtx output = gen_rtx_PLUS (word_mode, x, operands[1]);
+ emit_insn (gen_rtx_SET (operands[0], output));
+ return true;
+ }
+
+ /* If the negated constant is cheaper than the original, then negate
+ the constant and use sub. */
+ if (budget2 < budget1)
+ {
+ // load -INTVAL (operands[2]) into a temporary
+ rtx tmp = force_reg (word_mode, GEN_INT (-INTVAL (operands[2])));
+
+ // subtract operads[2] from operands[1]
+ rtx output = gen_rtx_MINUS (word_mode, operands[1], tmp);
+ emit_insn (gen_rtx_SET (operands[0], output));
+ return true;
+ }
+
+ /* No add synthesis was found. Synthesize the constant into
+ a temporary and use that. */
+ rtx x = force_reg (word_mode, operands[2]);
+ x = gen_rtx_PLUS (word_mode, operands[1], x);
+ emit_insn (gen_rtx_SET (operands[0], x));
+ return true;
+}
+
+/* Synthesize OPERANDS[0] = OPERANDS[1] + OPERANDS[2].
+
+ For 32-bit object cases with a 64-bit target.
+
+ OPERANDS[0] and OPERANDS[1] will be a REG and may be the same
+ REG.
+
+ OPERANDS[2] is a CONST_INT.
+
+ Return TRUE if the operation was fully synthesized and the caller
+ need not generate additional code. Return FALSE if the operation
+ was not synthesized and the caller is responsible for emitting the
+ proper sequence. */
+
+
+bool
+synthesize_add_extended (rtx operands[3])
+{
+
+/* If operands[2] is a 12-bit signed immediate,
+ no synthesis needs to be done. */
+
+ if (SMALL_OPERAND (INTVAL (operands[2])))
+ return false;
+
+ HOST_WIDE_INT ival = INTVAL (operands[2]);
+ int budget1 = riscv_const_insns (operands[2], true);
+ int budget2 = riscv_const_insns (GEN_INT (-INTVAL (operands[2])), true);
+
+/* If operands[2] can be split into two 12-bit signed immediates,
+ split add into two adds. */
+
+ if (SUM_OF_TWO_S12 (ival))
+ {
+ HOST_WIDE_INT saturated = HOST_WIDE_INT_M1U << (IMM_BITS - 1);
+
+ if (ival >= 0)
+ saturated = ~saturated;
+
+ ival -= saturated;
+
+ rtx temp = gen_reg_rtx (DImode);
+ emit_insn (gen_addsi3_extended (temp, operands[1], GEN_INT (saturated)));
+ temp = gen_lowpart (SImode, temp);
+ SUBREG_PROMOTED_VAR_P (temp) = 1;
+ SUBREG_PROMOTED_SET (temp, SRP_SIGNED);
+ emit_insn (gen_rtx_SET (operands[0], temp));
+ rtx t = gen_reg_rtx (DImode);
+ emit_insn (gen_addsi3_extended (t, operands[0], GEN_INT (ival)));
+ t = gen_lowpart (SImode, t);
+ SUBREG_PROMOTED_VAR_P (t) = 1;
+ SUBREG_PROMOTED_SET (t, SRP_SIGNED);
+ emit_move_insn (operands[0], t);
+ return true;
+ }
+
+
+/* If the negated value is cheaper to synthesize, subtract that from
+ operands[1]. */
+
+ if (budget2 < budget1)
+ {
+ rtx tmp = gen_reg_rtx (SImode);
+ emit_insn (gen_rtx_SET (tmp, GEN_INT (-INTVAL (operands[2]))));
+
+ rtx t = gen_reg_rtx (DImode);
+ emit_insn (gen_subsi3_extended (t, operands[1], tmp));
+ t = gen_lowpart (SImode, t);
+ SUBREG_PROMOTED_VAR_P (t) = 1;
+ SUBREG_PROMOTED_SET (t, SRP_SIGNED);
+ emit_move_insn (operands[0], t);
+ return true;
+ }
+
+ rtx tsrc = force_reg (SImode, operands[2]);
+ rtx tdest = gen_reg_rtx (DImode);
+ emit_insn (gen_addsi3_extended (tdest, operands[1], tsrc));
+ tdest = gen_lowpart (SImode, tdest);
+ SUBREG_PROMOTED_VAR_P (tdest) = 1;
+ SUBREG_PROMOTED_SET (tdest, SRP_SIGNED);
+ emit_move_insn (operands[0], tdest);
+ return true;
+
+}
+
+
+/*
+ HINT : argument specify the target cache
+
+ TODO : LOCALITY is unused.
+
+ Return the first operand of the associated PREF or PREFX insn. */
+rtx
+riscv_prefetch_cookie (rtx hint, rtx locality)
+{
+ return (GEN_INT (INTVAL (hint)
+ + CacheHint::DCACHE_HINT + INTVAL (locality) * 0));
+}
+
+/* Return true if X is a legitimate address with offset for prefetch.
+ MODE is the mode of the value being accessed. */
+bool
+riscv_prefetch_offset_address_p (rtx x, machine_mode mode)
+{
+ struct riscv_address_info addr;
+
+ if (riscv_classify_address (&addr, x, mode, false)
+ && addr.type == ADDRESS_REG)
+ {
+ if (TARGET_XMIPSCBOP)
+ return (CONST_INT_P (addr.offset)
+ && MIPS_RISCV_9BIT_OFFSET_P (INTVAL (addr.offset)));
+ }
+
+ return true;
+}
/* Initialize the GCC target structure. */
#undef TARGET_ASM_ALIGNED_HI_OP
@@ -15804,6 +15945,12 @@ synthesize_and (rtx operands[3])
#undef TARGET_VECTOR_MODE_SUPPORTED_ANY_TARGET_P
#define TARGET_VECTOR_MODE_SUPPORTED_ANY_TARGET_P riscv_vector_mode_supported_any_target_p
+#undef TARGET_FUNCTION_VALUE
+#define TARGET_FUNCTION_VALUE riscv_function_value
+
+#undef TARGET_LIBCALL_VALUE
+#define TARGET_LIBCALL_VALUE riscv_libcall_value
+
#undef TARGET_FUNCTION_VALUE_REGNO_P
#define TARGET_FUNCTION_VALUE_REGNO_P riscv_function_value_regno_p