aboutsummaryrefslogtreecommitdiff
path: root/gcc/config/riscv/riscv.cc
diff options
context:
space:
mode:
Diffstat (limited to 'gcc/config/riscv/riscv.cc')
-rw-r--r--gcc/config/riscv/riscv.cc623
1 files changed, 494 insertions, 129 deletions
diff --git a/gcc/config/riscv/riscv.cc b/gcc/config/riscv/riscv.cc
index 63404d3..96519c9 100644
--- a/gcc/config/riscv/riscv.cc
+++ b/gcc/config/riscv/riscv.cc
@@ -735,6 +735,105 @@ static const struct riscv_tune_param mips_p8700_tune_info = {
true, /* prefer-agnostic. */
};
+/* Costs to use when optimizing for Andes 25 series. */
+static const struct riscv_tune_param andes_25_tune_info = {
+ {COSTS_N_INSNS (4), COSTS_N_INSNS (5)}, /* fp_add */
+ {COSTS_N_INSNS (4), COSTS_N_INSNS (5)}, /* fp_mul */
+ {COSTS_N_INSNS (20), COSTS_N_INSNS (20)}, /* fp_div */
+ {COSTS_N_INSNS (2), COSTS_N_INSNS (2)}, /* int_mul */
+ {COSTS_N_INSNS (24), COSTS_N_INSNS (24)}, /* int_div */
+ 1, /* issue_rate */
+ 3, /* branch_cost */
+ 3, /* memory_cost */
+ 8, /* fmv_cost */
+ false, /* slow_unaligned_access */
+ false, /* vector_unaligned_access */
+ true, /* use_divmod_expansion */
+ false, /* overlap_op_by_pieces */
+ false, /* use_zero_stride_load */
+ false, /* speculative_sched_vsetvl */
+ RISCV_FUSE_NOTHING, /* fusible_ops */
+ NULL, /* vector cost */
+ NULL, /* function_align */
+ NULL, /* jump_align */
+ NULL, /* loop_align. */
+ true, /* prefer-agnostic. */
+};
+
+static const struct riscv_tune_param spacemit_x60_tune_info= {
+ {COSTS_N_INSNS (4), COSTS_N_INSNS (5)}, /* fp_add */
+ {COSTS_N_INSNS (4), COSTS_N_INSNS (5)}, /* fp_mul */
+ {COSTS_N_INSNS (15), COSTS_N_INSNS (22)}, /* fp_div */
+ {COSTS_N_INSNS (3), COSTS_N_INSNS (6)}, /* int_mul */
+ {COSTS_N_INSNS (12), COSTS_N_INSNS (20)}, /* int_div */
+ 2, /* issue_rate */
+ 3, /* branch_cost */
+ 5, /* memory_cost */
+ 6, /* fmv_cost */
+ false, /* slow_unaligned_access */
+ false, /* vector_unaligned_access */
+ false, /* use_divmod_expansion */
+ false, /* overlap_op_by_pieces */
+ false, /* use_zero_stride_load */
+ true, /* speculative_sched_vsetvl */
+ RISCV_FUSE_NOTHING, /* fusible_ops */
+ NULL, /* vector cost */
+ NULL, /* function_align */
+ NULL, /* jump_align */
+ NULL, /* loop_align */
+ true, /* prefer-agnostic. */
+};
+
+/* Costs to use when optimizing for Andes 23 series. */
+static const struct riscv_tune_param andes_23_tune_info = {
+ {COSTS_N_INSNS (4), COSTS_N_INSNS (5)}, /* fp_add */
+ {COSTS_N_INSNS (4), COSTS_N_INSNS (5)}, /* fp_mul */
+ {COSTS_N_INSNS (20), COSTS_N_INSNS (20)}, /* fp_div */
+ {COSTS_N_INSNS (2), COSTS_N_INSNS (2)}, /* int_mul */
+ {COSTS_N_INSNS (24), COSTS_N_INSNS (24)}, /* int_div */
+ 2, /* issue_rate */
+ 3, /* branch_cost */
+ 3, /* memory_cost */
+ 8, /* fmv_cost */
+ false, /* slow_unaligned_access */
+ false, /* vector_unaligned_access */
+ true, /* use_divmod_expansion */
+ false, /* overlap_op_by_pieces */
+ false, /* use_zero_stride_load */
+ false, /* speculative_sched_vsetvl */
+ RISCV_FUSE_NOTHING, /* fusible_ops */
+ NULL, /* vector cost */
+ NULL, /* function_align */
+ NULL, /* jump_align */
+ NULL, /* loop_align */
+ true, /* prefer-agnostic. */
+};
+
+/* Costs to use when optimizing for Andes 45 series. */
+static const struct riscv_tune_param andes_45_tune_info = {
+ {COSTS_N_INSNS (4), COSTS_N_INSNS (5)}, /* fp_add */
+ {COSTS_N_INSNS (4), COSTS_N_INSNS (5)}, /* fp_mul */
+ {COSTS_N_INSNS (20), COSTS_N_INSNS (20)}, /* fp_div */
+ {COSTS_N_INSNS (2), COSTS_N_INSNS (2)}, /* int_mul */
+ {COSTS_N_INSNS (24), COSTS_N_INSNS (24)}, /* int_div */
+ 2, /* issue_rate */
+ 3, /* branch_cost */
+ 3, /* memory_cost */
+ 8, /* fmv_cost */
+ false, /* slow_unaligned_access */
+ false, /* vector_unaligned_access */
+ true, /* use_divmod_expansion */
+ false, /* overlap_op_by_pieces */
+ false, /* use_zero_stride_load */
+ false, /* speculative_sched_vsetvl */
+ RISCV_FUSE_NOTHING, /* fusible_ops */
+ NULL, /* vector cost */
+ NULL, /* function_align */
+ NULL, /* jump_align */
+ NULL, /* loop_align */
+ true, /* prefer-agnostic. */
+};
+
static bool riscv_avoid_shrink_wrapping_separate ();
static tree riscv_handle_fndecl_attribute (tree *, tree, tree, int, bool *);
static tree riscv_handle_type_attribute (tree *, tree, tree, int, bool *);
@@ -1741,8 +1840,19 @@ riscv_symbolic_constant_p (rtx x, enum riscv_symbol_type *symbol_type)
/* Nonzero offsets are only valid for references that don't use the GOT. */
switch (*symbol_type)
{
- case SYMBOL_ABSOLUTE:
case SYMBOL_PCREL:
+ /* In 64-bit mode, PC-relative offsets with ranges beyond +/-1GiB are
+ more likely than not to end up out of range for an auipc instruction
+ randomly-placed within the 2GB range usable by medany, and such
+ offsets are quite unlikely to come up by chance, so be conservative
+ and separate the offset for them when in 64-bit mode, where they don't
+ wrap around. */
+ if (TARGET_64BIT)
+ return sext_hwi (INTVAL (offset), 30) == INTVAL (offset);
+
+ /* Fall through. */
+
+ case SYMBOL_ABSOLUTE:
case SYMBOL_TLS_LE:
/* GAS rejects offsets outside the range [-2^31, 2^31-1]. */
return sext_hwi (INTVAL (offset), 32) == INTVAL (offset);
@@ -2765,7 +2875,7 @@ riscv_unspec_address_offset (rtx base, rtx offset,
enum riscv_symbol_type symbol_type)
{
base = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, base),
- UNSPEC_ADDRESS_FIRST + symbol_type);
+ UNSPEC_ADDRESS_FIRST + (int) symbol_type);
if (offset != const0_rtx)
base = gen_rtx_PLUS (Pmode, base, offset);
return gen_rtx_CONST (Pmode, base);
@@ -3731,8 +3841,7 @@ riscv_legitimize_move (machine_mode mode, rtx dest, rtx src)
/* The low-part must be zero-extended when ELEN == 32 and
mode == 64. */
if (num == 2 && i == 0)
- emit_insn (gen_extend_insn (int_reg, result, mode, smode,
- true));
+ int_reg = convert_modes (mode, smode, result, true);
if (i == 1)
{
@@ -3779,6 +3888,8 @@ riscv_legitimize_move (machine_mode mode, rtx dest, rtx src)
temp_reg = gen_reg_rtx (word_mode);
zero_extend_p = (LOAD_EXTEND_OP (mode) == ZERO_EXTEND);
+ /* SRC is a MEM, so we can always extend it directly, so
+ no need to indirect through convert_modes. */
emit_insn (gen_extend_insn (temp_reg, src, word_mode, mode,
zero_extend_p));
riscv_emit_move (dest, gen_lowpart (mode, temp_reg));
@@ -3833,9 +3944,8 @@ riscv_legitimize_move (machine_mode mode, rtx dest, rtx src)
{
rtx mask = force_reg (word_mode, gen_int_mode (-65536, word_mode));
rtx temp = gen_reg_rtx (word_mode);
- emit_insn (gen_extend_insn (temp,
- gen_lowpart (HImode, src),
- word_mode, HImode, 1));
+ temp = convert_modes (word_mode, HImode,
+ gen_lowpart (HImode, src), true);
if (word_mode == SImode)
emit_insn (gen_iorsi3 (temp, mask, temp));
else
@@ -4722,6 +4832,13 @@ riscv_noce_conversion_profitable_p (rtx_insn *seq,
if (last_dest)
last_dest = dest;
}
+ else if (REG_P (dest) && src == CONST0_RTX (GET_MODE (dest)))
+ {
+ /* A GPR set to zero can always be replaced with x0, so any
+ insn that sets a GPR to zero will eventually be eliminated. */
+ riscv_if_info.original_cost += COSTS_N_INSNS (1);
+ riscv_if_info.max_seq_cost += COSTS_N_INSNS (1);
+ }
else
last_dest = NULL_RTX;
@@ -4908,7 +5025,7 @@ riscv_output_move (rtx dest, rtx src)
if (TARGET_ZFHMIN || TARGET_ZFBFMIN)
return "fmv.x.h\t%0,%1";
/* Using fmv.x.s + sign-extend to emulate fmv.x.h. */
- return "fmv.x.s\t%0,%1;slli\t%0,%0,16;srai\t%0,%0,16";
+ return "fmv.x.s\t%0,%1\n\tslli\t%0,%0,16\n\tsrai\t%0,%0,16";
case 4:
return "fmv.x.s\t%0,%1";
case 8:
@@ -5886,11 +6003,47 @@ static int
riscv_flatten_aggregate_field (const_tree type, riscv_aggregate_field *fields,
int n, HOST_WIDE_INT offset,
bool ignore_zero_width_bit_field_p,
+ bool ignore_empty_union_and_zero_len_array_p,
bool vls_p = false, unsigned abi_vlen = 0)
{
int max_aggregate_field = vls_p ? 8 : 2;
switch (TREE_CODE (type))
{
+ case UNION_TYPE:
+ {
+ if (!ignore_empty_union_and_zero_len_array_p)
+ return -1;
+ /* Empty union should ignore. */
+ if (TYPE_SIZE (type) == NULL || integer_zerop (TYPE_SIZE (type)))
+ return n;
+ /* Or all union member are empty union or empty struct. */
+ for (tree f = TYPE_FIELDS (type); f; f = DECL_CHAIN (f))
+ {
+ if (TREE_CODE (f) != FIELD_DECL)
+ continue;
+ int m;
+ HOST_WIDE_INT pos = offset + int_byte_position (f);
+ switch (TREE_CODE (TREE_TYPE (f)))
+ {
+ case ARRAY_TYPE:
+ case UNION_TYPE:
+ case RECORD_TYPE:
+ m = riscv_flatten_aggregate_field (
+ TREE_TYPE (f), fields, n, pos,
+ ignore_zero_width_bit_field_p,
+ true);
+ /* Any non-empty struct/union/array will stop the flatten. */
+ if (m != n)
+ return -1;
+ break;
+ default:
+ /* Any member are not struct, union or array will stop the
+ flatten. */
+ return -1;
+ }
+ }
+ return n;
+ }
case RECORD_TYPE:
/* Can't handle incomplete types nor sizes that are not fixed. */
if (!COMPLETE_TYPE_P (type)
@@ -5916,7 +6069,9 @@ riscv_flatten_aggregate_field (const_tree type, riscv_aggregate_field *fields,
{
HOST_WIDE_INT pos = offset + int_byte_position (f);
n = riscv_flatten_aggregate_field (
- TREE_TYPE (f), fields, n, pos, ignore_zero_width_bit_field_p,
+ TREE_TYPE (f), fields, n, pos,
+ ignore_zero_width_bit_field_p,
+ ignore_empty_union_and_zero_len_array_p,
vls_p, abi_vlen);
}
if (n < 0)
@@ -5930,14 +6085,20 @@ riscv_flatten_aggregate_field (const_tree type, riscv_aggregate_field *fields,
riscv_aggregate_field subfields[8];
tree index = TYPE_DOMAIN (type);
tree elt_size = TYPE_SIZE_UNIT (TREE_TYPE (type));
+
+ /* Array with zero size member should be ignored. */
+ if (ignore_empty_union_and_zero_len_array_p && integer_zerop (elt_size))
+ return n;
+
int n_subfields
- = riscv_flatten_aggregate_field (TREE_TYPE (type), subfields, 0,
- offset,
- ignore_zero_width_bit_field_p, vls_p,
- abi_vlen);
+ = riscv_flatten_aggregate_field (
+ TREE_TYPE (type), subfields, 0,
+ offset,
+ ignore_zero_width_bit_field_p,
+ ignore_empty_union_and_zero_len_array_p,
+ vls_p, abi_vlen);
/* Can't handle incomplete types nor sizes that are not fixed. */
- if (n_subfields <= 0
- || !COMPLETE_TYPE_P (type)
+ if (!COMPLETE_TYPE_P (type)
|| TREE_CODE (TYPE_SIZE (type)) != INTEGER_CST
|| !index
|| !TYPE_MAX_VALUE (index)
@@ -5947,6 +6108,15 @@ riscv_flatten_aggregate_field (const_tree type, riscv_aggregate_field *fields,
|| !tree_fits_uhwi_p (elt_size))
return -1;
+ /* Zero-length array with empty union/struct should be ignored. */
+ if (ignore_empty_union_and_zero_len_array_p && n_subfields == 0
+ && integer_zerop (TYPE_MIN_VALUE (index))
+ && integer_all_onesp (TYPE_MAX_VALUE (index)))
+ return n;
+
+ if (n_subfields <= 0)
+ return -1;
+
n_elts = 1 + tree_to_uhwi (TYPE_MAX_VALUE (index))
- tree_to_uhwi (TYPE_MIN_VALUE (index));
gcc_assert (n_elts >= 0);
@@ -6026,14 +6196,25 @@ static int
riscv_flatten_aggregate_argument (const_tree type,
riscv_aggregate_field *fields,
bool ignore_zero_width_bit_field_p,
+ bool ignore_empty_union_and_zero_len_array_p,
bool vls_p = false, unsigned abi_vlen = 0)
{
if (!type || TREE_CODE (type) != RECORD_TYPE)
return -1;
return riscv_flatten_aggregate_field (type, fields, 0, 0,
- ignore_zero_width_bit_field_p, vls_p,
- abi_vlen);
+ ignore_zero_width_bit_field_p,
+ ignore_empty_union_and_zero_len_array_p,
+ vls_p, abi_vlen);
+}
+
+static bool
+riscv_any_non_float_type_field (riscv_aggregate_field *fields, int n)
+{
+ for (int i = 0; i < n; i++)
+ if (!SCALAR_FLOAT_TYPE_P (fields[i].type))
+ return true;
+ return false;
}
/* See whether TYPE is a record whose fields should be returned in one or
@@ -6044,24 +6225,18 @@ riscv_pass_aggregate_in_fpr_pair_p (const_tree type,
riscv_aggregate_field fields[2])
{
static int warned = 0;
+ if (!type)
+ return 0;
/* This is the old ABI, which differs for C++ and C. */
- int n_old = riscv_flatten_aggregate_argument (type, fields, false);
- for (int i = 0; i < n_old; i++)
- if (!SCALAR_FLOAT_TYPE_P (fields[i].type))
- {
- n_old = -1;
- break;
- }
+ int n_old = riscv_flatten_aggregate_argument (type, fields, false, false);
+ if (riscv_any_non_float_type_field (fields, n_old))
+ n_old = -1;
/* This is the new ABI, which is the same for C++ and C. */
- int n_new = riscv_flatten_aggregate_argument (type, fields, true);
- for (int i = 0; i < n_new; i++)
- if (!SCALAR_FLOAT_TYPE_P (fields[i].type))
- {
- n_new = -1;
- break;
- }
+ int n_new = riscv_flatten_aggregate_argument (type, fields, true, false);
+ if (riscv_any_non_float_type_field (fields, n_new))
+ n_new = -1;
if ((n_old != n_new) && (warned == 0))
{
@@ -6070,7 +6245,58 @@ riscv_pass_aggregate_in_fpr_pair_p (const_tree type,
warned = 1;
}
- return n_new > 0 ? n_new : 0;
+ /* ABI with fixing flatten empty union. */
+ int n_new2 = riscv_flatten_aggregate_argument (type, fields, true, true);
+ if (riscv_any_non_float_type_field (fields, n_new2))
+ n_new2 = -1;
+
+ bool num_fpr = riscv_pass_mode_in_fpr_p (TYPE_MODE (type));
+
+ /* There is a special case for struct with zero length array with struct and a
+ floating point member.
+ e.g:
+ struct S0ae_1f {
+ struct {
+ } e1[0];
+ float f;
+ };
+
+ This case we will got 1, but legacy ABI will got -1, however legacy ABI
+ will got 1 in later logic, so we should consider this case as compatible.
+ */
+ bool compatible_p = n_new2 == 1 && n_new == -1 && num_fpr == 1;
+
+ if ((n_new2 != n_new)
+ && !compatible_p && (warned == 0))
+ {
+ warning (OPT_Wpsabi, "ABI for flattened empty union and zero "
+ "length array changed in GCC 16");
+ warned = 1;
+ }
+
+ return n_new2 > 0 ? n_new2 : 0;
+}
+
+struct riscv_aggregate_field_info_t {
+ unsigned num_fpr;
+ unsigned num_gpr;
+
+ riscv_aggregate_field_info_t ()
+ : num_fpr (0), num_gpr (0)
+ {}
+};
+
+static riscv_aggregate_field_info_t
+riscv_parse_aggregate_field_info (riscv_aggregate_field *fields, int n)
+{
+ riscv_aggregate_field_info_t info;
+ for (int i = 0; i < n; i++)
+ {
+ info.num_fpr += SCALAR_FLOAT_TYPE_P (fields[i].type);
+ info.num_gpr += INTEGRAL_TYPE_P (fields[i].type);
+ }
+
+ return info;
}
/* See whether TYPE is a record whose fields should be returned in one or
@@ -6084,35 +6310,48 @@ riscv_pass_aggregate_in_fpr_and_gpr_p (const_tree type,
static int warned = 0;
/* This is the old ABI, which differs for C++ and C. */
- unsigned num_int_old = 0, num_float_old = 0;
- int n_old = riscv_flatten_aggregate_argument (type, fields, false);
- for (int i = 0; i < n_old; i++)
- {
- num_float_old += SCALAR_FLOAT_TYPE_P (fields[i].type);
- num_int_old += INTEGRAL_TYPE_P (fields[i].type);
- }
+ int n_old = riscv_flatten_aggregate_argument (type, fields, false, false);
+ riscv_aggregate_field_info_t old_info;
+ old_info = riscv_parse_aggregate_field_info (fields, n_old);
/* This is the new ABI, which is the same for C++ and C. */
- unsigned num_int_new = 0, num_float_new = 0;
- int n_new = riscv_flatten_aggregate_argument (type, fields, true);
- for (int i = 0; i < n_new; i++)
- {
- num_float_new += SCALAR_FLOAT_TYPE_P (fields[i].type);
- num_int_new += INTEGRAL_TYPE_P (fields[i].type);
- }
+ int n_new = riscv_flatten_aggregate_argument (type, fields, true, false);
+ riscv_aggregate_field_info_t new_info;
+ new_info = riscv_parse_aggregate_field_info (fields, n_new);
+
+ bool values_changed = old_info.num_fpr != new_info.num_fpr
+ || old_info.num_gpr != new_info.num_gpr;
+ bool old_is_one_one = old_info.num_fpr == 1 && old_info.num_gpr == 1;
+ bool new_is_one_one = new_info.num_fpr == 1 && new_info.num_gpr == 1;
- if (((num_int_old == 1 && num_float_old == 1
- && (num_int_old != num_int_new || num_float_old != num_float_new))
- || (num_int_new == 1 && num_float_new == 1
- && (num_int_old != num_int_new || num_float_old != num_float_new)))
- && (warned == 0))
+ if (values_changed
+ && (old_is_one_one || new_is_one_one)
+ && warned == 0)
{
warning (OPT_Wpsabi, "ABI for flattened struct with zero-length "
"bit-fields changed in GCC 10");
warned = 1;
}
- return num_int_new == 1 && num_float_new == 1;
+ /* ABI with fixing flatten empty union. */
+ int n_new2 = riscv_flatten_aggregate_argument (type, fields, true, true);
+ riscv_aggregate_field_info_t new2_info;
+ new2_info = riscv_parse_aggregate_field_info (fields, n_new2);
+
+ values_changed = new_info.num_fpr != new2_info.num_fpr
+ || new_info.num_gpr != new2_info.num_gpr;
+ bool new2_is_one_one = new2_info.num_fpr == 1 && new2_info.num_gpr == 1;
+
+ if (values_changed
+ && (new_is_one_one || new2_is_one_one)
+ && warned == 0)
+ {
+ warning (OPT_Wpsabi, "ABI for flattened empty union and zero "
+ "length array changed in GCC 16");
+ warned = 1;
+ }
+
+ return new2_is_one_one;
}
/* Return the representation of an argument passed or returned in an FPR
@@ -6466,7 +6705,7 @@ riscv_pass_aggregate_in_vr (struct riscv_arg_info *info,
riscv_aggregate_field fields[8];
unsigned int abi_vlen = riscv_get_cc_abi_vlen (cum->variant_cc);
int i;
- int n = riscv_flatten_aggregate_argument (type, fields, true,
+ int n = riscv_flatten_aggregate_argument (type, fields, true, true,
/* vls_p */ true, abi_vlen);
if (n == -1)
@@ -10576,6 +10815,71 @@ riscv_issue_rate (void)
return tune_param->issue_rate;
}
+/* Structure for very basic vector configuration tracking in the scheduler. */
+struct last_vconfig
+{
+ bool valid;
+ bool ta;
+ bool ma;
+ uint8_t sew;
+ uint8_t vlmul;
+ rtx avl;
+} last_vconfig;
+
+/* Clear LAST_VCONFIG so we have no known state. */
+static void
+clear_vconfig (void)
+{
+ memset (&last_vconfig, 0, sizeof (last_vconfig));
+}
+
+/* Return TRUE if INSN is a vector insn needing a particular
+ vector configuration that is trivially equal to the last
+ vector insn issued. Return FALSE otherwise. */
+static bool
+compatible_with_last_vconfig (rtx_insn *insn)
+{
+ /* We might be able to extract the data from a preexisting vsetvl. */
+ if (vsetvl_insn_p (insn))
+ return false;
+
+ /* Nothing to do for these cases. */
+ if (!NONDEBUG_INSN_P (insn) || !has_vtype_op (insn))
+ return false;
+
+ extract_insn_cached (insn);
+
+ rtx avl = get_avl (insn);
+ if (avl != last_vconfig.avl)
+ return false;
+
+ if (get_sew (insn) != last_vconfig.sew)
+ return false;
+
+ if (get_vlmul (insn) != last_vconfig.vlmul)
+ return false;
+
+ if (tail_agnostic_p (insn) != last_vconfig.ta)
+ return false;
+
+ if (mask_agnostic_p (insn) != last_vconfig.ma)
+ return false;
+
+ /* No differences found, they're trivially compatible. */
+ return true;
+}
+
+/* Implement TARGET_SCHED_INIT, we use this to track the vector configuration
+ of the last issued vector instruction. We can then use that information
+ to potentially adjust the ready queue to issue instructions of a compatible
+ vector configuration instead of a conflicting configuration. That will
+ reduce the number of vsetvl instructions we ultimately emit. */
+static void
+riscv_sched_init (FILE *, int, int)
+{
+ clear_vconfig ();
+}
+
/* Implement TARGET_SCHED_VARIABLE_ISSUE. */
static int
riscv_sched_variable_issue (FILE *, int, rtx_insn *insn, int more)
@@ -10600,9 +10904,88 @@ riscv_sched_variable_issue (FILE *, int, rtx_insn *insn, int more)
an assert so we can find and fix this problem. */
gcc_assert (insn_has_dfa_reservation_p (insn));
+ /* If this is a vector insn with vl/vtype info, then record the last
+ vector configuration. */
+ if (vsetvl_insn_p (insn))
+ clear_vconfig ();
+ else if (NONDEBUG_INSN_P (insn) && has_vtype_op (insn))
+ {
+ extract_insn_cached (insn);
+
+ rtx avl = get_avl (insn);
+ if (avl == RVV_VLMAX)
+ avl = const0_rtx;
+
+ if (!avl || !CONST_INT_P (avl))
+ clear_vconfig ();
+ else
+ {
+ last_vconfig.valid = true;
+ last_vconfig.avl = avl;
+ last_vconfig.sew = get_sew (insn);
+ last_vconfig.vlmul = get_vlmul (insn);
+ last_vconfig.ta = tail_agnostic_p (insn);
+ last_vconfig.ma = mask_agnostic_p (insn);
+ }
+ }
+
return more - 1;
}
+/* Implement TARGET_SCHED_REORDER. The goal here is to look at the ready
+ queue and reorder it ever so slightly to encourage issing an insn with
+ the same vector configuration as the most recently issued vector
+ instruction. That will reduce vsetvl instructions. */
+static int
+riscv_sched_reorder (FILE *, int, rtx_insn **ready, int *nreadyp, int)
+{
+ /* If we don't have a valid prior vector configuration, then there is
+ no point in reordering the ready queue, similarly if there is
+ just one entry in the queue. */
+ if (!last_vconfig.valid || *nreadyp == 1)
+ return riscv_issue_rate ();
+
+ return riscv_issue_rate ();
+ int nready = *nreadyp;
+ int priority = INSN_PRIORITY (ready[nready - 1]);
+ for (int i = nready - 1; i >= 0; i--)
+ {
+ rtx_insn *insn = ready[i];
+
+ /* On a high performance core, vsetvl instructions should be
+ inexpensive. Removing them is very much a secondary concern, so
+ be extremely conservative with reordering, essentially only
+ allowing reordering within the highest priority value.
+
+ Lower end cores may benefit from more flexibility here. That
+ tuning is left to those who understand their core's behavior
+ and can thoroughly benchmark the result. Assuming such
+ designs appear, we can probably put an entry in the tuning
+ structure to indicate how much difference in priority to allow. */
+ if (INSN_PRIORITY (insn) < priority)
+ break;
+
+ if (compatible_with_last_vconfig (insn))
+ {
+ /* This entry is compatible with the last vconfig and has
+ the same priority as the most important insn. So swap
+ it so that we keep the vector configuration as-is and
+ ultimately eliminate a vsetvl.
+
+ Note no need to swap if this is the first entry in the
+ queue. */
+ if (i == nready - 1)
+ break;
+
+ std::swap (ready[i], ready[nready - 1]);
+ break;
+ }
+ }
+
+ return riscv_issue_rate ();
+}
+
+
/* Implement TARGET_SCHED_MACRO_FUSION_P. Return true if target supports
instruction fusion of some sort. */
@@ -11842,6 +12225,12 @@ riscv_override_options_internal (struct gcc_options *opts)
/* Convert -march and -mrvv-vector-bits to a chunks count. */
riscv_vector_chunks = riscv_convert_vector_chunks (opts);
+ /* Set scalar costing to a high value such that we always pick
+ vectorization. Increase scalar costing by 100x. */
+ if (opts->x_riscv_max_vectorization)
+ SET_OPTION_IF_UNSET (&global_options, &global_options_set,
+ param_vect_scalar_cost_multiplier, 10000);
+
if (opts->x_flag_cf_protection != CF_NONE)
{
if ((opts->x_flag_cf_protection & CF_RETURN) == CF_RETURN
@@ -12079,6 +12468,39 @@ riscv_option_restore (struct gcc_options *opts,
static GTY (()) tree riscv_previous_fndecl;
+/* Reset the previous function declaration. */
+
+void
+riscv_reset_previous_fndecl (void)
+{
+ riscv_previous_fndecl = NULL;
+}
+
+/* Implement TARGET_OPTION_SAVE. */
+
+static void
+riscv_option_save (struct cl_target_option *ptr,
+ struct gcc_options *opts,
+ struct gcc_options * /* opts_set */)
+{
+ ptr->x_riscv_arch_string = opts->x_riscv_arch_string;
+ ptr->x_riscv_tune_string = opts->x_riscv_tune_string;
+ ptr->x_riscv_cpu_string = opts->x_riscv_cpu_string;
+}
+
+/* Implement TARGET_OPTION_PRINT. */
+
+static void
+riscv_option_print (FILE *file, int indent, struct cl_target_option *ptr)
+{
+ fprintf (file, "%*sarch = %s\n", indent, "",
+ ptr->x_riscv_arch_string ? ptr->x_riscv_arch_string : "default");
+ fprintf (file, "%*stune = %s\n", indent, "",
+ ptr->x_riscv_tune_string ? ptr->x_riscv_tune_string : "default");
+ if (ptr->x_riscv_cpu_string)
+ fprintf (file, "%*scpu = %s\n", indent, "", ptr->x_riscv_cpu_string);
+}
+
/* Implement TARGET_CONDITIONAL_REGISTER_USAGE. */
static void
@@ -12415,7 +12837,7 @@ riscv_get_interrupt_type (tree decl)
/* Implement `TARGET_SET_CURRENT_FUNCTION'. Unpack the codegen decisions
like tuning and ISA features from the DECL_FUNCTION_SPECIFIC_TARGET
of the function, if such exists. This function may be called multiple
- times on a single function so use aarch64_previous_fndecl to avoid
+ times on a single function so use riscv_previous_fndecl to avoid
setting up identical state. */
/* Sanity checking for above function attributes. */
@@ -13871,84 +14293,14 @@ riscv_get_raw_result_mode (int regno)
return default_get_reg_raw_mode (regno);
}
-/* Generate a REG rtx of Xmode from the given rtx and mode.
- The rtx x can be REG (QI/HI/SI/DI) or const_int.
- The machine_mode mode is the original mode from define pattern.
- The rtx_code can be ZERO_EXTEND or SIGN_EXTEND.
-
- If rtx is REG:
-
- 1. If rtx Xmode, the RTX x will be returned directly.
- 2. If rtx non-Xmode, the value extended into a new REG of Xmode will be
- returned.
-
- The scalar ALU like add don't support non-Xmode like QI/HI. Then the
- gen_lowpart will have problem here. For example, when we would like
- to add -1 (0xff if QImode) and 2 (0x2 if QImode). The 0xff and 0x2 will
- be loaded to register for adding. Aka:
-
- 0xff + 0x2 = 0x101 instead of -1 + 2 = 1.
-
- Thus we need to sign extend 0xff to 0xffffffffffffffff if Xmode is DImode
- for correctness. Similar the unsigned also need zero extend.
-
- If rtx is const_int:
-
- 1. A new REG rtx will be created to hold the value of const_int.
-
- According to the gccint doc, the constants generated for modes with fewer
- bits than in HOST_WIDE_INT must be sign extended to full width. Thus there
- will be two cases here, take QImode as example.
-
- For .SAT_SUB (127, y) in QImode, we have (const_int 127) and one simple
- mov from const_int to the new REG rtx is good enough here.
-
- For .SAT_SUB (254, y) in QImode, we have (const_int -2) after define_expand.
- Aka 0xfffffffffffffffe in Xmode of RV64 but we actually need 0xfe in Xmode
- of RV64. So we need to cleanup the highest 56 bits of the new REG rtx moved
- from the (const_int -2).
-
- Then the underlying expanding can perform the code generation based on
- the REG rtx of Xmode, instead of taking care of these in expand func. */
-
+/* Force X into an Xmode register. */
static rtx
riscv_extend_to_xmode_reg (rtx x, machine_mode mode, enum rtx_code rcode)
{
gcc_assert (rcode == ZERO_EXTEND || rcode == SIGN_EXTEND);
- rtx xmode_reg = gen_reg_rtx (Xmode);
-
- if (CONST_INT_P (x))
- {
- if (mode == Xmode)
- emit_move_insn (xmode_reg, x);
- else if (rcode == ZERO_EXTEND)
- {
- /* Combine deliberately does not simplify extensions of constants
- (long story). So try to generate the zero extended constant
- efficiently.
-
- First extract the constant and mask off all the bits not in
- MODE. */
- HOST_WIDE_INT val = INTVAL (x);
- val &= GET_MODE_MASK (mode);
-
- /* X may need synthesis, so do not blindly copy it. */
- xmode_reg = force_reg (Xmode, gen_int_mode (val, Xmode));
- }
- else /* SIGN_EXTEND. */
- {
- rtx x_reg = gen_reg_rtx (mode);
- emit_move_insn (x_reg, x);
- riscv_emit_unary (rcode, xmode_reg, x_reg);
- }
- }
- else if (mode == Xmode)
- return x;
- else
- riscv_emit_unary (rcode, xmode_reg, x);
-
- return xmode_reg;
+ rtx t = convert_modes (Xmode, mode, x, rcode == ZERO_EXTEND);
+ return force_reg (Xmode, t);
}
/* Implements the unsigned saturation add standard name usadd for int mode.
@@ -14295,7 +14647,7 @@ riscv_expand_ustrunc (rtx dest, rtx src)
gcc_assert (precision < 64);
uint64_t max = ((uint64_t)1u << precision) - 1u;
- rtx xmode_src = gen_lowpart (Xmode, src);
+ rtx xmode_src = riscv_extend_to_xmode_reg (src, GET_MODE (src), ZERO_EXTEND);
rtx xmode_dest = gen_reg_rtx (Xmode);
rtx xmode_lt = gen_reg_rtx (Xmode);
@@ -15598,7 +15950,8 @@ synthesize_and (rtx operands[3])
if (tmode != VOIDmode)
{
rtx tmp = gen_lowpart (tmode, operands[1]);
- emit_insn (gen_extend_insn (operands[0], tmp, word_mode, tmode, 1));
+ emit_move_insn (operands[0], convert_modes (word_mode, tmode,
+ tmp, true));
return true;
}
}
@@ -15995,9 +16348,15 @@ riscv_prefetch_offset_address_p (rtx x, machine_mode mode)
#undef TARGET_OPTION_OVERRIDE
#define TARGET_OPTION_OVERRIDE riscv_option_override
+#undef TARGET_OPTION_SAVE
+#define TARGET_OPTION_SAVE riscv_option_save
+
#undef TARGET_OPTION_RESTORE
#define TARGET_OPTION_RESTORE riscv_option_restore
+#undef TARGET_OPTION_PRINT
+#define TARGET_OPTION_PRINT riscv_option_print
+
#undef TARGET_OPTION_VALID_ATTRIBUTE_P
#define TARGET_OPTION_VALID_ATTRIBUTE_P riscv_option_valid_attribute_p
@@ -16011,9 +16370,15 @@ riscv_prefetch_offset_address_p (rtx x, machine_mode mode)
#undef TARGET_SCHED_MACRO_FUSION_PAIR_P
#define TARGET_SCHED_MACRO_FUSION_PAIR_P riscv_macro_fusion_pair_p
+#undef TARGET_SCHED_INIT
+#define TARGET_SCHED_INIT riscv_sched_init
+
#undef TARGET_SCHED_VARIABLE_ISSUE
#define TARGET_SCHED_VARIABLE_ISSUE riscv_sched_variable_issue
+#undef TARGET_SCHED_REORDER
+#define TARGET_SCHED_REORDER riscv_sched_reorder
+
#undef TARGET_SCHED_ADJUST_COST
#define TARGET_SCHED_ADJUST_COST riscv_sched_adjust_cost