diff options
Diffstat (limited to 'gcc/config/riscv/riscv.cc')
-rw-r--r-- | gcc/config/riscv/riscv.cc | 399 |
1 files changed, 273 insertions, 126 deletions
diff --git a/gcc/config/riscv/riscv.cc b/gcc/config/riscv/riscv.cc index 0a9fcef..591122f 100644 --- a/gcc/config/riscv/riscv.cc +++ b/gcc/config/riscv/riscv.cc @@ -3685,7 +3685,8 @@ riscv_legitimize_move (machine_mode mode, rtx dest, rtx src) /* This test can fail if (for example) we want a HF and Z[v]fh is not enabled. In that case we just want to let the standard expansion path run. */ - if (riscv_vector::get_vector_mode (smode, nunits).exists (&vmode)) + if (riscv_vector::get_vector_mode (smode, nunits).exists (&vmode) + && gen_lowpart_common (vmode, SUBREG_REG (src))) { rtx v = gen_lowpart (vmode, SUBREG_REG (src)); rtx int_reg = dest; @@ -3958,41 +3959,6 @@ riscv_extend_cost (rtx op, bool unsigned_p) return COSTS_N_INSNS (2); } -/* Return the cost of the vector binary rtx like add, minus, mult. - The cost of scalar2vr_cost will be appended if there one of the - op comes from the VEC_DUPLICATE. */ - -static int -get_vector_binary_rtx_cost (rtx x, int scalar2vr_cost) -{ - gcc_assert (riscv_v_ext_mode_p (GET_MODE (x))); - - rtx neg; - rtx op_0; - rtx op_1; - - if (GET_CODE (x) == UNSPEC) - { - op_0 = XVECEXP (x, 0, 0); - op_1 = XVECEXP (x, 0, 1); - } - else - { - op_0 = XEXP (x, 0); - op_1 = XEXP (x, 1); - } - - if (GET_CODE (op_0) == VEC_DUPLICATE - || GET_CODE (op_1) == VEC_DUPLICATE) - return (scalar2vr_cost + 1) * COSTS_N_INSNS (1); - else if (GET_CODE (neg = op_0) == NEG - && (GET_CODE (op_1) == VEC_DUPLICATE - || GET_CODE (XEXP (neg, 0)) == VEC_DUPLICATE)) - return (scalar2vr_cost + 1) * COSTS_N_INSNS (1); - else - return COSTS_N_INSNS (1); -} - /* Implement TARGET_RTX_COSTS. */ #define SINGLE_SHIFT_COST 1 @@ -4014,73 +3980,20 @@ riscv_rtx_costs (rtx x, machine_mode mode, int outer_code, int opno ATTRIBUTE_UN { case SET: { - switch (GET_CODE (x)) + if (GET_CODE (x) == VEC_DUPLICATE) + *total = (scalar2vr_cost + 1) * COSTS_N_INSNS (1); + else { - case VEC_DUPLICATE: - *total = gr2vr_cost * COSTS_N_INSNS (1); - break; - case IF_THEN_ELSE: - { - rtx op = XEXP (x, 1); + int vec_dup_count = 0; + subrtx_var_iterator::array_type array; - switch (GET_CODE (op)) - { - case DIV: - case UDIV: - case MOD: - case UMOD: - case US_PLUS: - case US_MINUS: - case SS_PLUS: - case SS_MINUS: - *total = get_vector_binary_rtx_cost (op, scalar2vr_cost); - break; - case UNSPEC: - { - switch (XINT (op, 1)) - { - case UNSPEC_VAADDU: - case UNSPEC_VAADD: - *total - = get_vector_binary_rtx_cost (op, scalar2vr_cost); - break; - default: - *total = COSTS_N_INSNS (1); - break; - } - } - break; - default: - *total = COSTS_N_INSNS (1); - break; - } - } - break; - case PLUS: - case MINUS: - case AND: - case IOR: - case XOR: - case MULT: - case SMAX: - case UMAX: - case SMIN: - case UMIN: - { - rtx op; - rtx op_0 = XEXP (x, 0); - rtx op_1 = XEXP (x, 1); + FOR_EACH_SUBRTX_VAR (iter, array, x, ALL) + if (GET_CODE (*iter) == VEC_DUPLICATE) + vec_dup_count++; - if (GET_CODE (op = op_0) == MULT - || GET_CODE (op = op_1) == MULT) - *total = get_vector_binary_rtx_cost (op, scalar2vr_cost); - else - *total = get_vector_binary_rtx_cost (x, scalar2vr_cost); - } - break; - default: - *total = COSTS_N_INSNS (1); - break; + int total_vec_dup_cost = vec_dup_count * scalar2vr_cost; + + *total = COSTS_N_INSNS (1) * (total_vec_dup_cost + 1); } } break; @@ -5532,9 +5445,9 @@ canonicalize_comparands (rtx_code code, rtx *op0, rtx *op1) /* We might have been handed back a SUBREG. Just to make things easy, force it into a REG. */ - if (!REG_P (*op0) && !CONST_INT_P (*op0)) + if (!REG_P (*op0) && !CONST_INT_P (*op0) && INTEGRAL_MODE_P (GET_MODE (*op0))) *op0 = force_reg (word_mode, *op0); - if (!REG_P (*op1) && !CONST_INT_P (*op1)) + if (!REG_P (*op1) && !CONST_INT_P (*op1) && INTEGRAL_MODE_P (GET_MODE (*op1))) *op1 = force_reg (word_mode, *op1); } @@ -6213,7 +6126,8 @@ riscv_pass_vls_aggregate_in_gpr (struct riscv_arg_info *info, machine_mode mode, For a library call, FNTYPE is 0. */ void -riscv_init_cumulative_args (CUMULATIVE_ARGS *cum, tree fntype, rtx, tree, int) +riscv_init_cumulative_args (CUMULATIVE_ARGS *cum, const_tree fntype, + rtx, tree, int) { memset (cum, 0, sizeof (*cum)); @@ -6494,30 +6408,44 @@ riscv_arg_partial_bytes (cumulative_args_t cum, return arg.stack_p ? arg.num_gprs * UNITS_PER_WORD : 0; } -/* Implement FUNCTION_VALUE and LIBCALL_VALUE. For normal calls, - VALTYPE is the return type and MODE is VOIDmode. For libcalls, - VALTYPE is null and MODE is the mode of the return value. */ +/* Implements hook TARGET_FUNCTION_VALUE. */ rtx -riscv_function_value (const_tree type, const_tree func, machine_mode mode) +riscv_function_value (const_tree ret_type, const_tree fn_decl_or_type, + bool) { struct riscv_arg_info info; CUMULATIVE_ARGS args; - if (type) + if (fn_decl_or_type) { - int unsigned_p = TYPE_UNSIGNED (type); + const_tree fntype = TREE_CODE (fn_decl_or_type) == FUNCTION_DECL ? + TREE_TYPE (fn_decl_or_type) : fn_decl_or_type; + riscv_init_cumulative_args (&args, fntype, NULL_RTX, NULL_TREE, 0); + } + else + memset (&args, 0, sizeof args); - mode = TYPE_MODE (type); + int unsigned_p = TYPE_UNSIGNED (ret_type); - /* Since TARGET_PROMOTE_FUNCTION_MODE unconditionally promotes, - return values, promote the mode here too. */ - mode = promote_function_mode (type, mode, &unsigned_p, func, 1); - } + machine_mode mode = TYPE_MODE (ret_type); - memset (&args, 0, sizeof args); + /* Since TARGET_PROMOTE_FUNCTION_MODE unconditionally promotes, + return values, promote the mode here too. */ + mode = promote_function_mode (ret_type, mode, &unsigned_p, fn_decl_or_type, 1); - return riscv_get_arg_info (&info, &args, mode, type, true, true); + return riscv_get_arg_info (&info, &args, mode, ret_type, true, true); +} + +/* Implements hook TARGET_LIBCALL_VALUE. */ + +rtx +riscv_libcall_value (machine_mode mode, const_rtx fun ATTRIBUTE_UNUSED) +{ + struct riscv_arg_info info; + CUMULATIVE_ARGS args; + memset (&args, 0, sizeof args); + return riscv_get_arg_info (&info, &args, mode, NULL_TREE, true, true); } /* Implement TARGET_PASS_BY_REFERENCE. */ @@ -13867,9 +13795,9 @@ riscv_expand_xmode_usmul (rtx dest, rtx x, rtx y) riscv_emit_binary (MULT, mul, x, y); if (TARGET_64BIT) - emit_insn (gen_usmuldi3_highpart (mulhu, x, y)); + emit_insn (gen_umuldi3_highpart (mulhu, x, y)); else - emit_insn (gen_usmulsi3_highpart (mulhu, x, y)); + emit_insn (gen_umulsi3_highpart (mulhu, x, y)); riscv_emit_binary (NE, overflow_p, mulhu, CONST0_RTX (Xmode)); riscv_emit_unary (NEG, overflow_p, overflow_p); @@ -14037,10 +13965,13 @@ riscv_c_mode_for_floating_type (enum tree_index ti) return default_mode_for_floating_type (ti); } -/* This parses the attribute arguments to target_version in DECL and modifies - the feature mask and priority required to select those targets. */ +/* Parse the attribute arguments to target_version in DECL and modify + the feature mask and priority required to select those targets. + If LOC is nonnull, report diagnostics against *LOC, otherwise + remain silent. */ static void parse_features_for_version (tree decl, + location_t *loc, struct riscv_feature_bits &res, int &priority) { @@ -14071,14 +14002,12 @@ parse_features_for_version (tree decl, cl_target_option_restore (&global_options, &global_options_set, default_opts); - riscv_process_target_version_attr (TREE_VALUE (version_attr), - DECL_SOURCE_LOCATION (decl)); + riscv_process_target_version_attr (TREE_VALUE (version_attr), loc); priority = global_options.x_riscv_fmv_priority; const char *arch_string = global_options.x_riscv_arch_string; bool parse_res - = riscv_minimal_hwprobe_feature_bits (arch_string, &res, - DECL_SOURCE_LOCATION (decl)); + = riscv_minimal_hwprobe_feature_bits (arch_string, &res, loc); gcc_assert (parse_res); cl_target_option_restore (&global_options, &global_options_set, @@ -14135,8 +14064,8 @@ riscv_compare_version_priority (tree decl1, tree decl2) struct riscv_feature_bits mask1, mask2; int prio1, prio2; - parse_features_for_version (decl1, mask1, prio1); - parse_features_for_version (decl2, mask2, prio2); + parse_features_for_version (decl1, nullptr, mask1, prio1); + parse_features_for_version (decl2, nullptr, mask2, prio2); return compare_fmv_features (mask1, mask2, prio1, prio2); } @@ -14439,6 +14368,7 @@ dispatch_function_versions (tree dispatch_decl, version_info.version_decl = version_decl; // Get attribute string, parse it and find the right features. parse_features_for_version (version_decl, + &DECL_SOURCE_LOCATION (version_decl), version_info.features, version_info.prio); function_versions.push_back (version_info); @@ -15441,6 +15371,217 @@ synthesize_and (rtx operands[3]) return true; } +/* Synthesize OPERANDS[0] = OPERANDS[1] + OPERANDS[2]. + + OPERANDS[0] and OPERANDS[1] will be a REG and may be the same + REG. + + OPERANDS[2] is a CONST_INT. + + Return TRUE if the operation was fully synthesized and the caller + need not generate additional code. Return FALSE if the operation + was not synthesized and the caller is responsible for emitting the + proper sequence. */ + +bool +synthesize_add (rtx operands[3]) +{ + /* Trivial cases that don't need synthesis. */ + if (SMALL_OPERAND (INTVAL (operands[2]))) + return false; + + int budget1 = riscv_const_insns (operands[2], true); + int budget2 = riscv_const_insns (GEN_INT (-INTVAL (operands[2])), true); + + HOST_WIDE_INT ival = INTVAL (operands[2]); + + /* If we can emit two addi insns then that's better than synthesizing + the constant into a temporary, then adding the temporary to the + other input. The exception is when the constant can be loaded + in a single instruction which can issue whenever its convenient. */ + if (SUM_OF_TWO_S12 (ival) && budget1 >= 2) + { + HOST_WIDE_INT saturated = HOST_WIDE_INT_M1U << (IMM_BITS - 1); + + if (ival >= 0) + saturated = ~saturated; + + ival -= saturated; + + rtx x = gen_rtx_PLUS (word_mode, operands[1], GEN_INT (saturated)); + emit_insn (gen_rtx_SET (operands[0], x)); + rtx output = gen_rtx_PLUS (word_mode, operands[0], GEN_INT (ival)); + emit_insn (gen_rtx_SET (operands[0], output)); + return true; + } + + /* If we can shift the constant by 1, 2, or 3 bit positions + and the result is a cheaper constant, then do so. */ + ival = INTVAL (operands[2]); + if (TARGET_ZBA + && (((ival % 2) == 0 && budget1 + > riscv_const_insns (GEN_INT (ival >> 1), true)) + || ((ival % 4) == 0 && budget1 + > riscv_const_insns (GEN_INT (ival >> 2), true)) + || ((ival % 8) == 0 && budget1 + > riscv_const_insns (GEN_INT (ival >> 3), true)))) + { + // Load the shifted constant into a temporary + int shct = ctz_hwi (ival); + + /* We can handle shifting up to 3 bit positions via shNadd. */ + if (shct > 3) + shct = 3; + + /* The adjusted constant may still need synthesis, so do not copy + it directly into register. Let the expander handle it. */ + rtx tmp = force_reg (word_mode, GEN_INT (ival >> shct)); + + /* Generate shift-add of temporary and operands[1] + into the final destination. */ + rtx x = gen_rtx_ASHIFT (word_mode, tmp, GEN_INT (shct)); + rtx output = gen_rtx_PLUS (word_mode, x, operands[1]); + emit_insn (gen_rtx_SET (operands[0], output)); + return true; + } + + /* If the negated constant is cheaper than the original, then negate + the constant and use sub. */ + if (budget2 < budget1) + { + // load -INTVAL (operands[2]) into a temporary + rtx tmp = force_reg (word_mode, GEN_INT (-INTVAL (operands[2]))); + + // subtract operads[2] from operands[1] + rtx output = gen_rtx_MINUS (word_mode, operands[1], tmp); + emit_insn (gen_rtx_SET (operands[0], output)); + return true; + } + + /* No add synthesis was found. Synthesize the constant into + a temporary and use that. */ + rtx x = force_reg (word_mode, operands[2]); + x = gen_rtx_PLUS (word_mode, operands[1], x); + emit_insn (gen_rtx_SET (operands[0], x)); + return true; +} + +/* Synthesize OPERANDS[0] = OPERANDS[1] + OPERANDS[2]. + + For 32-bit object cases with a 64-bit target. + + OPERANDS[0] and OPERANDS[1] will be a REG and may be the same + REG. + + OPERANDS[2] is a CONST_INT. + + Return TRUE if the operation was fully synthesized and the caller + need not generate additional code. Return FALSE if the operation + was not synthesized and the caller is responsible for emitting the + proper sequence. */ + + +bool +synthesize_add_extended (rtx operands[3]) +{ + +/* If operands[2] is a 12-bit signed immediate, + no synthesis needs to be done. */ + + if (SMALL_OPERAND (INTVAL (operands[2]))) + return false; + + HOST_WIDE_INT ival = INTVAL (operands[2]); + int budget1 = riscv_const_insns (operands[2], true); + int budget2 = riscv_const_insns (GEN_INT (-INTVAL (operands[2])), true); + +/* If operands[2] can be split into two 12-bit signed immediates, + split add into two adds. */ + + if (SUM_OF_TWO_S12 (ival)) + { + HOST_WIDE_INT saturated = HOST_WIDE_INT_M1U << (IMM_BITS - 1); + + if (ival >= 0) + saturated = ~saturated; + + ival -= saturated; + + rtx temp = gen_reg_rtx (DImode); + emit_insn (gen_addsi3_extended (temp, operands[1], GEN_INT (saturated))); + temp = gen_lowpart (SImode, temp); + SUBREG_PROMOTED_VAR_P (temp) = 1; + SUBREG_PROMOTED_SET (temp, SRP_SIGNED); + emit_insn (gen_rtx_SET (operands[0], temp)); + rtx t = gen_reg_rtx (DImode); + emit_insn (gen_addsi3_extended (t, operands[0], GEN_INT (ival))); + t = gen_lowpart (SImode, t); + SUBREG_PROMOTED_VAR_P (t) = 1; + SUBREG_PROMOTED_SET (t, SRP_SIGNED); + emit_move_insn (operands[0], t); + return true; + } + + +/* If the negated value is cheaper to synthesize, subtract that from + operands[1]. */ + + if (budget2 < budget1) + { + rtx tmp = gen_reg_rtx (SImode); + emit_insn (gen_rtx_SET (tmp, GEN_INT (-INTVAL (operands[2])))); + + rtx t = gen_reg_rtx (DImode); + emit_insn (gen_subsi3_extended (t, operands[1], tmp)); + t = gen_lowpart (SImode, t); + SUBREG_PROMOTED_VAR_P (t) = 1; + SUBREG_PROMOTED_SET (t, SRP_SIGNED); + emit_move_insn (operands[0], t); + return true; + } + + rtx tsrc = force_reg (SImode, operands[2]); + rtx tdest = gen_reg_rtx (DImode); + emit_insn (gen_addsi3_extended (tdest, operands[1], tsrc)); + tdest = gen_lowpart (SImode, tdest); + SUBREG_PROMOTED_VAR_P (tdest) = 1; + SUBREG_PROMOTED_SET (tdest, SRP_SIGNED); + emit_move_insn (operands[0], tdest); + return true; + +} + + +/* + HINT : argument specify the target cache + + TODO : LOCALITY is unused. + + Return the first operand of the associated PREF or PREFX insn. */ +rtx +riscv_prefetch_cookie (rtx hint, rtx locality) +{ + return (GEN_INT (INTVAL (hint) + + CacheHint::DCACHE_HINT + INTVAL (locality) * 0)); +} + +/* Return true if X is a legitimate address with offset for prefetch. + MODE is the mode of the value being accessed. */ +bool +riscv_prefetch_offset_address_p (rtx x, machine_mode mode) +{ + struct riscv_address_info addr; + + if (riscv_classify_address (&addr, x, mode, false) + && addr.type == ADDRESS_REG) + { + if (TARGET_XMIPSCBOP) + return (CONST_INT_P (addr.offset) + && MIPS_RISCV_9BIT_OFFSET_P (INTVAL (addr.offset))); + } + + return true; +} /* Initialize the GCC target structure. */ #undef TARGET_ASM_ALIGNED_HI_OP @@ -15804,6 +15945,12 @@ synthesize_and (rtx operands[3]) #undef TARGET_VECTOR_MODE_SUPPORTED_ANY_TARGET_P #define TARGET_VECTOR_MODE_SUPPORTED_ANY_TARGET_P riscv_vector_mode_supported_any_target_p +#undef TARGET_FUNCTION_VALUE +#define TARGET_FUNCTION_VALUE riscv_function_value + +#undef TARGET_LIBCALL_VALUE +#define TARGET_LIBCALL_VALUE riscv_libcall_value + #undef TARGET_FUNCTION_VALUE_REGNO_P #define TARGET_FUNCTION_VALUE_REGNO_P riscv_function_value_regno_p |