diff options
Diffstat (limited to 'gcc/config/riscv/riscv.cc')
| -rw-r--r-- | gcc/config/riscv/riscv.cc | 623 |
1 files changed, 494 insertions, 129 deletions
diff --git a/gcc/config/riscv/riscv.cc b/gcc/config/riscv/riscv.cc index 63404d3..96519c9 100644 --- a/gcc/config/riscv/riscv.cc +++ b/gcc/config/riscv/riscv.cc @@ -735,6 +735,105 @@ static const struct riscv_tune_param mips_p8700_tune_info = { true, /* prefer-agnostic. */ }; +/* Costs to use when optimizing for Andes 25 series. */ +static const struct riscv_tune_param andes_25_tune_info = { + {COSTS_N_INSNS (4), COSTS_N_INSNS (5)}, /* fp_add */ + {COSTS_N_INSNS (4), COSTS_N_INSNS (5)}, /* fp_mul */ + {COSTS_N_INSNS (20), COSTS_N_INSNS (20)}, /* fp_div */ + {COSTS_N_INSNS (2), COSTS_N_INSNS (2)}, /* int_mul */ + {COSTS_N_INSNS (24), COSTS_N_INSNS (24)}, /* int_div */ + 1, /* issue_rate */ + 3, /* branch_cost */ + 3, /* memory_cost */ + 8, /* fmv_cost */ + false, /* slow_unaligned_access */ + false, /* vector_unaligned_access */ + true, /* use_divmod_expansion */ + false, /* overlap_op_by_pieces */ + false, /* use_zero_stride_load */ + false, /* speculative_sched_vsetvl */ + RISCV_FUSE_NOTHING, /* fusible_ops */ + NULL, /* vector cost */ + NULL, /* function_align */ + NULL, /* jump_align */ + NULL, /* loop_align. */ + true, /* prefer-agnostic. */ +}; + +static const struct riscv_tune_param spacemit_x60_tune_info= { + {COSTS_N_INSNS (4), COSTS_N_INSNS (5)}, /* fp_add */ + {COSTS_N_INSNS (4), COSTS_N_INSNS (5)}, /* fp_mul */ + {COSTS_N_INSNS (15), COSTS_N_INSNS (22)}, /* fp_div */ + {COSTS_N_INSNS (3), COSTS_N_INSNS (6)}, /* int_mul */ + {COSTS_N_INSNS (12), COSTS_N_INSNS (20)}, /* int_div */ + 2, /* issue_rate */ + 3, /* branch_cost */ + 5, /* memory_cost */ + 6, /* fmv_cost */ + false, /* slow_unaligned_access */ + false, /* vector_unaligned_access */ + false, /* use_divmod_expansion */ + false, /* overlap_op_by_pieces */ + false, /* use_zero_stride_load */ + true, /* speculative_sched_vsetvl */ + RISCV_FUSE_NOTHING, /* fusible_ops */ + NULL, /* vector cost */ + NULL, /* function_align */ + NULL, /* jump_align */ + NULL, /* loop_align */ + true, /* prefer-agnostic. */ +}; + +/* Costs to use when optimizing for Andes 23 series. */ +static const struct riscv_tune_param andes_23_tune_info = { + {COSTS_N_INSNS (4), COSTS_N_INSNS (5)}, /* fp_add */ + {COSTS_N_INSNS (4), COSTS_N_INSNS (5)}, /* fp_mul */ + {COSTS_N_INSNS (20), COSTS_N_INSNS (20)}, /* fp_div */ + {COSTS_N_INSNS (2), COSTS_N_INSNS (2)}, /* int_mul */ + {COSTS_N_INSNS (24), COSTS_N_INSNS (24)}, /* int_div */ + 2, /* issue_rate */ + 3, /* branch_cost */ + 3, /* memory_cost */ + 8, /* fmv_cost */ + false, /* slow_unaligned_access */ + false, /* vector_unaligned_access */ + true, /* use_divmod_expansion */ + false, /* overlap_op_by_pieces */ + false, /* use_zero_stride_load */ + false, /* speculative_sched_vsetvl */ + RISCV_FUSE_NOTHING, /* fusible_ops */ + NULL, /* vector cost */ + NULL, /* function_align */ + NULL, /* jump_align */ + NULL, /* loop_align */ + true, /* prefer-agnostic. */ +}; + +/* Costs to use when optimizing for Andes 45 series. */ +static const struct riscv_tune_param andes_45_tune_info = { + {COSTS_N_INSNS (4), COSTS_N_INSNS (5)}, /* fp_add */ + {COSTS_N_INSNS (4), COSTS_N_INSNS (5)}, /* fp_mul */ + {COSTS_N_INSNS (20), COSTS_N_INSNS (20)}, /* fp_div */ + {COSTS_N_INSNS (2), COSTS_N_INSNS (2)}, /* int_mul */ + {COSTS_N_INSNS (24), COSTS_N_INSNS (24)}, /* int_div */ + 2, /* issue_rate */ + 3, /* branch_cost */ + 3, /* memory_cost */ + 8, /* fmv_cost */ + false, /* slow_unaligned_access */ + false, /* vector_unaligned_access */ + true, /* use_divmod_expansion */ + false, /* overlap_op_by_pieces */ + false, /* use_zero_stride_load */ + false, /* speculative_sched_vsetvl */ + RISCV_FUSE_NOTHING, /* fusible_ops */ + NULL, /* vector cost */ + NULL, /* function_align */ + NULL, /* jump_align */ + NULL, /* loop_align */ + true, /* prefer-agnostic. */ +}; + static bool riscv_avoid_shrink_wrapping_separate (); static tree riscv_handle_fndecl_attribute (tree *, tree, tree, int, bool *); static tree riscv_handle_type_attribute (tree *, tree, tree, int, bool *); @@ -1741,8 +1840,19 @@ riscv_symbolic_constant_p (rtx x, enum riscv_symbol_type *symbol_type) /* Nonzero offsets are only valid for references that don't use the GOT. */ switch (*symbol_type) { - case SYMBOL_ABSOLUTE: case SYMBOL_PCREL: + /* In 64-bit mode, PC-relative offsets with ranges beyond +/-1GiB are + more likely than not to end up out of range for an auipc instruction + randomly-placed within the 2GB range usable by medany, and such + offsets are quite unlikely to come up by chance, so be conservative + and separate the offset for them when in 64-bit mode, where they don't + wrap around. */ + if (TARGET_64BIT) + return sext_hwi (INTVAL (offset), 30) == INTVAL (offset); + + /* Fall through. */ + + case SYMBOL_ABSOLUTE: case SYMBOL_TLS_LE: /* GAS rejects offsets outside the range [-2^31, 2^31-1]. */ return sext_hwi (INTVAL (offset), 32) == INTVAL (offset); @@ -2765,7 +2875,7 @@ riscv_unspec_address_offset (rtx base, rtx offset, enum riscv_symbol_type symbol_type) { base = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, base), - UNSPEC_ADDRESS_FIRST + symbol_type); + UNSPEC_ADDRESS_FIRST + (int) symbol_type); if (offset != const0_rtx) base = gen_rtx_PLUS (Pmode, base, offset); return gen_rtx_CONST (Pmode, base); @@ -3731,8 +3841,7 @@ riscv_legitimize_move (machine_mode mode, rtx dest, rtx src) /* The low-part must be zero-extended when ELEN == 32 and mode == 64. */ if (num == 2 && i == 0) - emit_insn (gen_extend_insn (int_reg, result, mode, smode, - true)); + int_reg = convert_modes (mode, smode, result, true); if (i == 1) { @@ -3779,6 +3888,8 @@ riscv_legitimize_move (machine_mode mode, rtx dest, rtx src) temp_reg = gen_reg_rtx (word_mode); zero_extend_p = (LOAD_EXTEND_OP (mode) == ZERO_EXTEND); + /* SRC is a MEM, so we can always extend it directly, so + no need to indirect through convert_modes. */ emit_insn (gen_extend_insn (temp_reg, src, word_mode, mode, zero_extend_p)); riscv_emit_move (dest, gen_lowpart (mode, temp_reg)); @@ -3833,9 +3944,8 @@ riscv_legitimize_move (machine_mode mode, rtx dest, rtx src) { rtx mask = force_reg (word_mode, gen_int_mode (-65536, word_mode)); rtx temp = gen_reg_rtx (word_mode); - emit_insn (gen_extend_insn (temp, - gen_lowpart (HImode, src), - word_mode, HImode, 1)); + temp = convert_modes (word_mode, HImode, + gen_lowpart (HImode, src), true); if (word_mode == SImode) emit_insn (gen_iorsi3 (temp, mask, temp)); else @@ -4722,6 +4832,13 @@ riscv_noce_conversion_profitable_p (rtx_insn *seq, if (last_dest) last_dest = dest; } + else if (REG_P (dest) && src == CONST0_RTX (GET_MODE (dest))) + { + /* A GPR set to zero can always be replaced with x0, so any + insn that sets a GPR to zero will eventually be eliminated. */ + riscv_if_info.original_cost += COSTS_N_INSNS (1); + riscv_if_info.max_seq_cost += COSTS_N_INSNS (1); + } else last_dest = NULL_RTX; @@ -4908,7 +5025,7 @@ riscv_output_move (rtx dest, rtx src) if (TARGET_ZFHMIN || TARGET_ZFBFMIN) return "fmv.x.h\t%0,%1"; /* Using fmv.x.s + sign-extend to emulate fmv.x.h. */ - return "fmv.x.s\t%0,%1;slli\t%0,%0,16;srai\t%0,%0,16"; + return "fmv.x.s\t%0,%1\n\tslli\t%0,%0,16\n\tsrai\t%0,%0,16"; case 4: return "fmv.x.s\t%0,%1"; case 8: @@ -5886,11 +6003,47 @@ static int riscv_flatten_aggregate_field (const_tree type, riscv_aggregate_field *fields, int n, HOST_WIDE_INT offset, bool ignore_zero_width_bit_field_p, + bool ignore_empty_union_and_zero_len_array_p, bool vls_p = false, unsigned abi_vlen = 0) { int max_aggregate_field = vls_p ? 8 : 2; switch (TREE_CODE (type)) { + case UNION_TYPE: + { + if (!ignore_empty_union_and_zero_len_array_p) + return -1; + /* Empty union should ignore. */ + if (TYPE_SIZE (type) == NULL || integer_zerop (TYPE_SIZE (type))) + return n; + /* Or all union member are empty union or empty struct. */ + for (tree f = TYPE_FIELDS (type); f; f = DECL_CHAIN (f)) + { + if (TREE_CODE (f) != FIELD_DECL) + continue; + int m; + HOST_WIDE_INT pos = offset + int_byte_position (f); + switch (TREE_CODE (TREE_TYPE (f))) + { + case ARRAY_TYPE: + case UNION_TYPE: + case RECORD_TYPE: + m = riscv_flatten_aggregate_field ( + TREE_TYPE (f), fields, n, pos, + ignore_zero_width_bit_field_p, + true); + /* Any non-empty struct/union/array will stop the flatten. */ + if (m != n) + return -1; + break; + default: + /* Any member are not struct, union or array will stop the + flatten. */ + return -1; + } + } + return n; + } case RECORD_TYPE: /* Can't handle incomplete types nor sizes that are not fixed. */ if (!COMPLETE_TYPE_P (type) @@ -5916,7 +6069,9 @@ riscv_flatten_aggregate_field (const_tree type, riscv_aggregate_field *fields, { HOST_WIDE_INT pos = offset + int_byte_position (f); n = riscv_flatten_aggregate_field ( - TREE_TYPE (f), fields, n, pos, ignore_zero_width_bit_field_p, + TREE_TYPE (f), fields, n, pos, + ignore_zero_width_bit_field_p, + ignore_empty_union_and_zero_len_array_p, vls_p, abi_vlen); } if (n < 0) @@ -5930,14 +6085,20 @@ riscv_flatten_aggregate_field (const_tree type, riscv_aggregate_field *fields, riscv_aggregate_field subfields[8]; tree index = TYPE_DOMAIN (type); tree elt_size = TYPE_SIZE_UNIT (TREE_TYPE (type)); + + /* Array with zero size member should be ignored. */ + if (ignore_empty_union_and_zero_len_array_p && integer_zerop (elt_size)) + return n; + int n_subfields - = riscv_flatten_aggregate_field (TREE_TYPE (type), subfields, 0, - offset, - ignore_zero_width_bit_field_p, vls_p, - abi_vlen); + = riscv_flatten_aggregate_field ( + TREE_TYPE (type), subfields, 0, + offset, + ignore_zero_width_bit_field_p, + ignore_empty_union_and_zero_len_array_p, + vls_p, abi_vlen); /* Can't handle incomplete types nor sizes that are not fixed. */ - if (n_subfields <= 0 - || !COMPLETE_TYPE_P (type) + if (!COMPLETE_TYPE_P (type) || TREE_CODE (TYPE_SIZE (type)) != INTEGER_CST || !index || !TYPE_MAX_VALUE (index) @@ -5947,6 +6108,15 @@ riscv_flatten_aggregate_field (const_tree type, riscv_aggregate_field *fields, || !tree_fits_uhwi_p (elt_size)) return -1; + /* Zero-length array with empty union/struct should be ignored. */ + if (ignore_empty_union_and_zero_len_array_p && n_subfields == 0 + && integer_zerop (TYPE_MIN_VALUE (index)) + && integer_all_onesp (TYPE_MAX_VALUE (index))) + return n; + + if (n_subfields <= 0) + return -1; + n_elts = 1 + tree_to_uhwi (TYPE_MAX_VALUE (index)) - tree_to_uhwi (TYPE_MIN_VALUE (index)); gcc_assert (n_elts >= 0); @@ -6026,14 +6196,25 @@ static int riscv_flatten_aggregate_argument (const_tree type, riscv_aggregate_field *fields, bool ignore_zero_width_bit_field_p, + bool ignore_empty_union_and_zero_len_array_p, bool vls_p = false, unsigned abi_vlen = 0) { if (!type || TREE_CODE (type) != RECORD_TYPE) return -1; return riscv_flatten_aggregate_field (type, fields, 0, 0, - ignore_zero_width_bit_field_p, vls_p, - abi_vlen); + ignore_zero_width_bit_field_p, + ignore_empty_union_and_zero_len_array_p, + vls_p, abi_vlen); +} + +static bool +riscv_any_non_float_type_field (riscv_aggregate_field *fields, int n) +{ + for (int i = 0; i < n; i++) + if (!SCALAR_FLOAT_TYPE_P (fields[i].type)) + return true; + return false; } /* See whether TYPE is a record whose fields should be returned in one or @@ -6044,24 +6225,18 @@ riscv_pass_aggregate_in_fpr_pair_p (const_tree type, riscv_aggregate_field fields[2]) { static int warned = 0; + if (!type) + return 0; /* This is the old ABI, which differs for C++ and C. */ - int n_old = riscv_flatten_aggregate_argument (type, fields, false); - for (int i = 0; i < n_old; i++) - if (!SCALAR_FLOAT_TYPE_P (fields[i].type)) - { - n_old = -1; - break; - } + int n_old = riscv_flatten_aggregate_argument (type, fields, false, false); + if (riscv_any_non_float_type_field (fields, n_old)) + n_old = -1; /* This is the new ABI, which is the same for C++ and C. */ - int n_new = riscv_flatten_aggregate_argument (type, fields, true); - for (int i = 0; i < n_new; i++) - if (!SCALAR_FLOAT_TYPE_P (fields[i].type)) - { - n_new = -1; - break; - } + int n_new = riscv_flatten_aggregate_argument (type, fields, true, false); + if (riscv_any_non_float_type_field (fields, n_new)) + n_new = -1; if ((n_old != n_new) && (warned == 0)) { @@ -6070,7 +6245,58 @@ riscv_pass_aggregate_in_fpr_pair_p (const_tree type, warned = 1; } - return n_new > 0 ? n_new : 0; + /* ABI with fixing flatten empty union. */ + int n_new2 = riscv_flatten_aggregate_argument (type, fields, true, true); + if (riscv_any_non_float_type_field (fields, n_new2)) + n_new2 = -1; + + bool num_fpr = riscv_pass_mode_in_fpr_p (TYPE_MODE (type)); + + /* There is a special case for struct with zero length array with struct and a + floating point member. + e.g: + struct S0ae_1f { + struct { + } e1[0]; + float f; + }; + + This case we will got 1, but legacy ABI will got -1, however legacy ABI + will got 1 in later logic, so we should consider this case as compatible. + */ + bool compatible_p = n_new2 == 1 && n_new == -1 && num_fpr == 1; + + if ((n_new2 != n_new) + && !compatible_p && (warned == 0)) + { + warning (OPT_Wpsabi, "ABI for flattened empty union and zero " + "length array changed in GCC 16"); + warned = 1; + } + + return n_new2 > 0 ? n_new2 : 0; +} + +struct riscv_aggregate_field_info_t { + unsigned num_fpr; + unsigned num_gpr; + + riscv_aggregate_field_info_t () + : num_fpr (0), num_gpr (0) + {} +}; + +static riscv_aggregate_field_info_t +riscv_parse_aggregate_field_info (riscv_aggregate_field *fields, int n) +{ + riscv_aggregate_field_info_t info; + for (int i = 0; i < n; i++) + { + info.num_fpr += SCALAR_FLOAT_TYPE_P (fields[i].type); + info.num_gpr += INTEGRAL_TYPE_P (fields[i].type); + } + + return info; } /* See whether TYPE is a record whose fields should be returned in one or @@ -6084,35 +6310,48 @@ riscv_pass_aggregate_in_fpr_and_gpr_p (const_tree type, static int warned = 0; /* This is the old ABI, which differs for C++ and C. */ - unsigned num_int_old = 0, num_float_old = 0; - int n_old = riscv_flatten_aggregate_argument (type, fields, false); - for (int i = 0; i < n_old; i++) - { - num_float_old += SCALAR_FLOAT_TYPE_P (fields[i].type); - num_int_old += INTEGRAL_TYPE_P (fields[i].type); - } + int n_old = riscv_flatten_aggregate_argument (type, fields, false, false); + riscv_aggregate_field_info_t old_info; + old_info = riscv_parse_aggregate_field_info (fields, n_old); /* This is the new ABI, which is the same for C++ and C. */ - unsigned num_int_new = 0, num_float_new = 0; - int n_new = riscv_flatten_aggregate_argument (type, fields, true); - for (int i = 0; i < n_new; i++) - { - num_float_new += SCALAR_FLOAT_TYPE_P (fields[i].type); - num_int_new += INTEGRAL_TYPE_P (fields[i].type); - } + int n_new = riscv_flatten_aggregate_argument (type, fields, true, false); + riscv_aggregate_field_info_t new_info; + new_info = riscv_parse_aggregate_field_info (fields, n_new); + + bool values_changed = old_info.num_fpr != new_info.num_fpr + || old_info.num_gpr != new_info.num_gpr; + bool old_is_one_one = old_info.num_fpr == 1 && old_info.num_gpr == 1; + bool new_is_one_one = new_info.num_fpr == 1 && new_info.num_gpr == 1; - if (((num_int_old == 1 && num_float_old == 1 - && (num_int_old != num_int_new || num_float_old != num_float_new)) - || (num_int_new == 1 && num_float_new == 1 - && (num_int_old != num_int_new || num_float_old != num_float_new))) - && (warned == 0)) + if (values_changed + && (old_is_one_one || new_is_one_one) + && warned == 0) { warning (OPT_Wpsabi, "ABI for flattened struct with zero-length " "bit-fields changed in GCC 10"); warned = 1; } - return num_int_new == 1 && num_float_new == 1; + /* ABI with fixing flatten empty union. */ + int n_new2 = riscv_flatten_aggregate_argument (type, fields, true, true); + riscv_aggregate_field_info_t new2_info; + new2_info = riscv_parse_aggregate_field_info (fields, n_new2); + + values_changed = new_info.num_fpr != new2_info.num_fpr + || new_info.num_gpr != new2_info.num_gpr; + bool new2_is_one_one = new2_info.num_fpr == 1 && new2_info.num_gpr == 1; + + if (values_changed + && (new_is_one_one || new2_is_one_one) + && warned == 0) + { + warning (OPT_Wpsabi, "ABI for flattened empty union and zero " + "length array changed in GCC 16"); + warned = 1; + } + + return new2_is_one_one; } /* Return the representation of an argument passed or returned in an FPR @@ -6466,7 +6705,7 @@ riscv_pass_aggregate_in_vr (struct riscv_arg_info *info, riscv_aggregate_field fields[8]; unsigned int abi_vlen = riscv_get_cc_abi_vlen (cum->variant_cc); int i; - int n = riscv_flatten_aggregate_argument (type, fields, true, + int n = riscv_flatten_aggregate_argument (type, fields, true, true, /* vls_p */ true, abi_vlen); if (n == -1) @@ -10576,6 +10815,71 @@ riscv_issue_rate (void) return tune_param->issue_rate; } +/* Structure for very basic vector configuration tracking in the scheduler. */ +struct last_vconfig +{ + bool valid; + bool ta; + bool ma; + uint8_t sew; + uint8_t vlmul; + rtx avl; +} last_vconfig; + +/* Clear LAST_VCONFIG so we have no known state. */ +static void +clear_vconfig (void) +{ + memset (&last_vconfig, 0, sizeof (last_vconfig)); +} + +/* Return TRUE if INSN is a vector insn needing a particular + vector configuration that is trivially equal to the last + vector insn issued. Return FALSE otherwise. */ +static bool +compatible_with_last_vconfig (rtx_insn *insn) +{ + /* We might be able to extract the data from a preexisting vsetvl. */ + if (vsetvl_insn_p (insn)) + return false; + + /* Nothing to do for these cases. */ + if (!NONDEBUG_INSN_P (insn) || !has_vtype_op (insn)) + return false; + + extract_insn_cached (insn); + + rtx avl = get_avl (insn); + if (avl != last_vconfig.avl) + return false; + + if (get_sew (insn) != last_vconfig.sew) + return false; + + if (get_vlmul (insn) != last_vconfig.vlmul) + return false; + + if (tail_agnostic_p (insn) != last_vconfig.ta) + return false; + + if (mask_agnostic_p (insn) != last_vconfig.ma) + return false; + + /* No differences found, they're trivially compatible. */ + return true; +} + +/* Implement TARGET_SCHED_INIT, we use this to track the vector configuration + of the last issued vector instruction. We can then use that information + to potentially adjust the ready queue to issue instructions of a compatible + vector configuration instead of a conflicting configuration. That will + reduce the number of vsetvl instructions we ultimately emit. */ +static void +riscv_sched_init (FILE *, int, int) +{ + clear_vconfig (); +} + /* Implement TARGET_SCHED_VARIABLE_ISSUE. */ static int riscv_sched_variable_issue (FILE *, int, rtx_insn *insn, int more) @@ -10600,9 +10904,88 @@ riscv_sched_variable_issue (FILE *, int, rtx_insn *insn, int more) an assert so we can find and fix this problem. */ gcc_assert (insn_has_dfa_reservation_p (insn)); + /* If this is a vector insn with vl/vtype info, then record the last + vector configuration. */ + if (vsetvl_insn_p (insn)) + clear_vconfig (); + else if (NONDEBUG_INSN_P (insn) && has_vtype_op (insn)) + { + extract_insn_cached (insn); + + rtx avl = get_avl (insn); + if (avl == RVV_VLMAX) + avl = const0_rtx; + + if (!avl || !CONST_INT_P (avl)) + clear_vconfig (); + else + { + last_vconfig.valid = true; + last_vconfig.avl = avl; + last_vconfig.sew = get_sew (insn); + last_vconfig.vlmul = get_vlmul (insn); + last_vconfig.ta = tail_agnostic_p (insn); + last_vconfig.ma = mask_agnostic_p (insn); + } + } + return more - 1; } +/* Implement TARGET_SCHED_REORDER. The goal here is to look at the ready + queue and reorder it ever so slightly to encourage issing an insn with + the same vector configuration as the most recently issued vector + instruction. That will reduce vsetvl instructions. */ +static int +riscv_sched_reorder (FILE *, int, rtx_insn **ready, int *nreadyp, int) +{ + /* If we don't have a valid prior vector configuration, then there is + no point in reordering the ready queue, similarly if there is + just one entry in the queue. */ + if (!last_vconfig.valid || *nreadyp == 1) + return riscv_issue_rate (); + + return riscv_issue_rate (); + int nready = *nreadyp; + int priority = INSN_PRIORITY (ready[nready - 1]); + for (int i = nready - 1; i >= 0; i--) + { + rtx_insn *insn = ready[i]; + + /* On a high performance core, vsetvl instructions should be + inexpensive. Removing them is very much a secondary concern, so + be extremely conservative with reordering, essentially only + allowing reordering within the highest priority value. + + Lower end cores may benefit from more flexibility here. That + tuning is left to those who understand their core's behavior + and can thoroughly benchmark the result. Assuming such + designs appear, we can probably put an entry in the tuning + structure to indicate how much difference in priority to allow. */ + if (INSN_PRIORITY (insn) < priority) + break; + + if (compatible_with_last_vconfig (insn)) + { + /* This entry is compatible with the last vconfig and has + the same priority as the most important insn. So swap + it so that we keep the vector configuration as-is and + ultimately eliminate a vsetvl. + + Note no need to swap if this is the first entry in the + queue. */ + if (i == nready - 1) + break; + + std::swap (ready[i], ready[nready - 1]); + break; + } + } + + return riscv_issue_rate (); +} + + /* Implement TARGET_SCHED_MACRO_FUSION_P. Return true if target supports instruction fusion of some sort. */ @@ -11842,6 +12225,12 @@ riscv_override_options_internal (struct gcc_options *opts) /* Convert -march and -mrvv-vector-bits to a chunks count. */ riscv_vector_chunks = riscv_convert_vector_chunks (opts); + /* Set scalar costing to a high value such that we always pick + vectorization. Increase scalar costing by 100x. */ + if (opts->x_riscv_max_vectorization) + SET_OPTION_IF_UNSET (&global_options, &global_options_set, + param_vect_scalar_cost_multiplier, 10000); + if (opts->x_flag_cf_protection != CF_NONE) { if ((opts->x_flag_cf_protection & CF_RETURN) == CF_RETURN @@ -12079,6 +12468,39 @@ riscv_option_restore (struct gcc_options *opts, static GTY (()) tree riscv_previous_fndecl; +/* Reset the previous function declaration. */ + +void +riscv_reset_previous_fndecl (void) +{ + riscv_previous_fndecl = NULL; +} + +/* Implement TARGET_OPTION_SAVE. */ + +static void +riscv_option_save (struct cl_target_option *ptr, + struct gcc_options *opts, + struct gcc_options * /* opts_set */) +{ + ptr->x_riscv_arch_string = opts->x_riscv_arch_string; + ptr->x_riscv_tune_string = opts->x_riscv_tune_string; + ptr->x_riscv_cpu_string = opts->x_riscv_cpu_string; +} + +/* Implement TARGET_OPTION_PRINT. */ + +static void +riscv_option_print (FILE *file, int indent, struct cl_target_option *ptr) +{ + fprintf (file, "%*sarch = %s\n", indent, "", + ptr->x_riscv_arch_string ? ptr->x_riscv_arch_string : "default"); + fprintf (file, "%*stune = %s\n", indent, "", + ptr->x_riscv_tune_string ? ptr->x_riscv_tune_string : "default"); + if (ptr->x_riscv_cpu_string) + fprintf (file, "%*scpu = %s\n", indent, "", ptr->x_riscv_cpu_string); +} + /* Implement TARGET_CONDITIONAL_REGISTER_USAGE. */ static void @@ -12415,7 +12837,7 @@ riscv_get_interrupt_type (tree decl) /* Implement `TARGET_SET_CURRENT_FUNCTION'. Unpack the codegen decisions like tuning and ISA features from the DECL_FUNCTION_SPECIFIC_TARGET of the function, if such exists. This function may be called multiple - times on a single function so use aarch64_previous_fndecl to avoid + times on a single function so use riscv_previous_fndecl to avoid setting up identical state. */ /* Sanity checking for above function attributes. */ @@ -13871,84 +14293,14 @@ riscv_get_raw_result_mode (int regno) return default_get_reg_raw_mode (regno); } -/* Generate a REG rtx of Xmode from the given rtx and mode. - The rtx x can be REG (QI/HI/SI/DI) or const_int. - The machine_mode mode is the original mode from define pattern. - The rtx_code can be ZERO_EXTEND or SIGN_EXTEND. - - If rtx is REG: - - 1. If rtx Xmode, the RTX x will be returned directly. - 2. If rtx non-Xmode, the value extended into a new REG of Xmode will be - returned. - - The scalar ALU like add don't support non-Xmode like QI/HI. Then the - gen_lowpart will have problem here. For example, when we would like - to add -1 (0xff if QImode) and 2 (0x2 if QImode). The 0xff and 0x2 will - be loaded to register for adding. Aka: - - 0xff + 0x2 = 0x101 instead of -1 + 2 = 1. - - Thus we need to sign extend 0xff to 0xffffffffffffffff if Xmode is DImode - for correctness. Similar the unsigned also need zero extend. - - If rtx is const_int: - - 1. A new REG rtx will be created to hold the value of const_int. - - According to the gccint doc, the constants generated for modes with fewer - bits than in HOST_WIDE_INT must be sign extended to full width. Thus there - will be two cases here, take QImode as example. - - For .SAT_SUB (127, y) in QImode, we have (const_int 127) and one simple - mov from const_int to the new REG rtx is good enough here. - - For .SAT_SUB (254, y) in QImode, we have (const_int -2) after define_expand. - Aka 0xfffffffffffffffe in Xmode of RV64 but we actually need 0xfe in Xmode - of RV64. So we need to cleanup the highest 56 bits of the new REG rtx moved - from the (const_int -2). - - Then the underlying expanding can perform the code generation based on - the REG rtx of Xmode, instead of taking care of these in expand func. */ - +/* Force X into an Xmode register. */ static rtx riscv_extend_to_xmode_reg (rtx x, machine_mode mode, enum rtx_code rcode) { gcc_assert (rcode == ZERO_EXTEND || rcode == SIGN_EXTEND); - rtx xmode_reg = gen_reg_rtx (Xmode); - - if (CONST_INT_P (x)) - { - if (mode == Xmode) - emit_move_insn (xmode_reg, x); - else if (rcode == ZERO_EXTEND) - { - /* Combine deliberately does not simplify extensions of constants - (long story). So try to generate the zero extended constant - efficiently. - - First extract the constant and mask off all the bits not in - MODE. */ - HOST_WIDE_INT val = INTVAL (x); - val &= GET_MODE_MASK (mode); - - /* X may need synthesis, so do not blindly copy it. */ - xmode_reg = force_reg (Xmode, gen_int_mode (val, Xmode)); - } - else /* SIGN_EXTEND. */ - { - rtx x_reg = gen_reg_rtx (mode); - emit_move_insn (x_reg, x); - riscv_emit_unary (rcode, xmode_reg, x_reg); - } - } - else if (mode == Xmode) - return x; - else - riscv_emit_unary (rcode, xmode_reg, x); - - return xmode_reg; + rtx t = convert_modes (Xmode, mode, x, rcode == ZERO_EXTEND); + return force_reg (Xmode, t); } /* Implements the unsigned saturation add standard name usadd for int mode. @@ -14295,7 +14647,7 @@ riscv_expand_ustrunc (rtx dest, rtx src) gcc_assert (precision < 64); uint64_t max = ((uint64_t)1u << precision) - 1u; - rtx xmode_src = gen_lowpart (Xmode, src); + rtx xmode_src = riscv_extend_to_xmode_reg (src, GET_MODE (src), ZERO_EXTEND); rtx xmode_dest = gen_reg_rtx (Xmode); rtx xmode_lt = gen_reg_rtx (Xmode); @@ -15598,7 +15950,8 @@ synthesize_and (rtx operands[3]) if (tmode != VOIDmode) { rtx tmp = gen_lowpart (tmode, operands[1]); - emit_insn (gen_extend_insn (operands[0], tmp, word_mode, tmode, 1)); + emit_move_insn (operands[0], convert_modes (word_mode, tmode, + tmp, true)); return true; } } @@ -15995,9 +16348,15 @@ riscv_prefetch_offset_address_p (rtx x, machine_mode mode) #undef TARGET_OPTION_OVERRIDE #define TARGET_OPTION_OVERRIDE riscv_option_override +#undef TARGET_OPTION_SAVE +#define TARGET_OPTION_SAVE riscv_option_save + #undef TARGET_OPTION_RESTORE #define TARGET_OPTION_RESTORE riscv_option_restore +#undef TARGET_OPTION_PRINT +#define TARGET_OPTION_PRINT riscv_option_print + #undef TARGET_OPTION_VALID_ATTRIBUTE_P #define TARGET_OPTION_VALID_ATTRIBUTE_P riscv_option_valid_attribute_p @@ -16011,9 +16370,15 @@ riscv_prefetch_offset_address_p (rtx x, machine_mode mode) #undef TARGET_SCHED_MACRO_FUSION_PAIR_P #define TARGET_SCHED_MACRO_FUSION_PAIR_P riscv_macro_fusion_pair_p +#undef TARGET_SCHED_INIT +#define TARGET_SCHED_INIT riscv_sched_init + #undef TARGET_SCHED_VARIABLE_ISSUE #define TARGET_SCHED_VARIABLE_ISSUE riscv_sched_variable_issue +#undef TARGET_SCHED_REORDER +#define TARGET_SCHED_REORDER riscv_sched_reorder + #undef TARGET_SCHED_ADJUST_COST #define TARGET_SCHED_ADJUST_COST riscv_sched_adjust_cost |
