diff options
Diffstat (limited to 'gcc')
26 files changed, 532 insertions, 583 deletions
diff --git a/gcc/ChangeLog b/gcc/ChangeLog index 3ab7f56..1b8058d 100644 --- a/gcc/ChangeLog +++ b/gcc/ChangeLog @@ -1,3 +1,18 @@ +2025-04-26 Jan Hubicka <hubicka@ucw.cz> + + PR target/105275 + * config/i386/i386.cc (ix86_vector_costs::add_stmt_cost): Fix cost of FP scalar + MAX_EXPR and MIN_EXPR + +2025-04-26 Iain Buclaw <ibuclaw@gdcproject.org> + + * config.gcc (LIST): --enable-obsolete for m32c-elf. + +2025-04-26 Andrew Pinski <quic_apinski@quicinc.com> + + * simplify-rtx.cc (simplify_context::simplify_unary_operation_1) <case ZERO_EXTEND>: + Add simplifcation for and with a constant. + 2025-04-25 Dimitar Dimitrov <dimitar@dinux.eu> * doc/sourcebuild.texi: Document variadic_mi_thunk effective diff --git a/gcc/DATESTAMP b/gcc/DATESTAMP index 61358eb..bcfb36f 100644 --- a/gcc/DATESTAMP +++ b/gcc/DATESTAMP @@ -1 +1 @@ -20250426 +20250427 diff --git a/gcc/ada/gcc-interface/utils.cc b/gcc/ada/gcc-interface/utils.cc index 9212827..23737c3 100644 --- a/gcc/ada/gcc-interface/utils.cc +++ b/gcc/ada/gcc-interface/utils.cc @@ -3286,30 +3286,6 @@ tree create_param_decl (tree name, tree type) { tree param_decl = build_decl (input_location, PARM_DECL, name, type); - - /* Honor TARGET_PROMOTE_PROTOTYPES like the C compiler, as not doing so - can lead to various ABI violations. */ - if (targetm.calls.promote_prototypes (NULL_TREE) - && INTEGRAL_TYPE_P (type) - && TYPE_PRECISION (type) < TYPE_PRECISION (integer_type_node)) - { - /* We have to be careful about biased types here. Make a subtype - of integer_type_node with the proper biasing. */ - if (TREE_CODE (type) == INTEGER_TYPE - && TYPE_BIASED_REPRESENTATION_P (type)) - { - tree subtype - = make_unsigned_type (TYPE_PRECISION (integer_type_node)); - TREE_TYPE (subtype) = integer_type_node; - TYPE_BIASED_REPRESENTATION_P (subtype) = 1; - SET_TYPE_RM_MIN_VALUE (subtype, TYPE_MIN_VALUE (type)); - SET_TYPE_RM_MAX_VALUE (subtype, TYPE_MAX_VALUE (type)); - type = subtype; - } - else - type = integer_type_node; - } - DECL_ARG_TYPE (param_decl) = type; return param_decl; } diff --git a/gcc/c/c-decl.cc b/gcc/c/c-decl.cc index 8c420f2..e7aee8a 100644 --- a/gcc/c/c-decl.cc +++ b/gcc/c/c-decl.cc @@ -5721,26 +5721,6 @@ start_decl (struct c_declarator *declarator, struct c_declspecs *declspecs, } if (TREE_CODE (decl) == FUNCTION_DECL - && targetm.calls.promote_prototypes (TREE_TYPE (decl))) - { - struct c_declarator *ce = declarator; - - if (ce->kind == cdk_pointer) - ce = declarator->declarator; - if (ce->kind == cdk_function) - { - tree args = ce->u.arg_info->parms; - for (; args; args = DECL_CHAIN (args)) - { - tree type = TREE_TYPE (args); - if (type && INTEGRAL_TYPE_P (type) - && TYPE_PRECISION (type) < TYPE_PRECISION (integer_type_node)) - DECL_ARG_TYPE (args) = c_type_promotes_to (type); - } - } - } - - if (TREE_CODE (decl) == FUNCTION_DECL && DECL_DECLARED_INLINE_P (decl) && DECL_UNINLINABLE (decl) && lookup_attribute ("noinline", DECL_ATTRIBUTES (decl))) @@ -11179,13 +11159,6 @@ store_parm_decls_oldstyle (tree fndecl, const struct c_arg_info *arg_info) useful for argument types like uid_t. */ DECL_ARG_TYPE (parm) = TREE_TYPE (parm); - if (targetm.calls.promote_prototypes (TREE_TYPE (current_function_decl)) - && INTEGRAL_TYPE_P (TREE_TYPE (parm)) - && (TYPE_PRECISION (TREE_TYPE (parm)) - < TYPE_PRECISION (integer_type_node))) - DECL_ARG_TYPE (parm) - = c_type_promotes_to (TREE_TYPE (parm)); - /* ??? Is it possible to get here with a built-in prototype or will it always have been diagnosed as conflicting with an @@ -11413,19 +11386,6 @@ finish_function (location_t end_loc) if (c_dialect_objc ()) objc_finish_function (); - if (TREE_CODE (fndecl) == FUNCTION_DECL - && targetm.calls.promote_prototypes (TREE_TYPE (fndecl))) - { - tree args = DECL_ARGUMENTS (fndecl); - for (; args; args = DECL_CHAIN (args)) - { - tree type = TREE_TYPE (args); - if (INTEGRAL_TYPE_P (type) - && TYPE_PRECISION (type) < TYPE_PRECISION (integer_type_node)) - DECL_ARG_TYPE (args) = c_type_promotes_to (type); - } - } - if (DECL_INITIAL (fndecl) && DECL_INITIAL (fndecl) != error_mark_node) BLOCK_SUPERCONTEXT (DECL_INITIAL (fndecl)) = fndecl; diff --git a/gcc/c/c-typeck.cc b/gcc/c/c-typeck.cc index 55d896e..d94ecb5 100644 --- a/gcc/c/c-typeck.cc +++ b/gcc/c/c-typeck.cc @@ -4163,12 +4163,6 @@ convert_argument (location_t ploc, tree function, tree fundecl, val, origtype, ic_argpass, npc, fundecl, function, parmnum + 1, warnopt); - - if (targetm.calls.promote_prototypes (fundecl ? TREE_TYPE (fundecl) : 0) - && INTEGRAL_TYPE_P (type) - && (TYPE_PRECISION (type) < TYPE_PRECISION (integer_type_node))) - parmval = default_conversion (parmval); - return parmval; } @@ -6744,17 +6738,12 @@ c_safe_arg_type_equiv_p (tree t1, tree t2) && TREE_CODE (t2) == POINTER_TYPE) return true; - /* The signedness of the parameter matters only when an integral - type smaller than int is promoted to int, otherwise only the - precision of the parameter matters. - This check should make sure that the callee does not see - undefined values in argument registers. */ + /* Only the precision of the parameter matters. This check should + make sure that the callee does not see undefined values in argument + registers. */ if (INTEGRAL_TYPE_P (t1) && INTEGRAL_TYPE_P (t2) - && TYPE_PRECISION (t1) == TYPE_PRECISION (t2) - && (TYPE_UNSIGNED (t1) == TYPE_UNSIGNED (t2) - || !targetm.calls.promote_prototypes (NULL_TREE) - || TYPE_PRECISION (t1) >= TYPE_PRECISION (integer_type_node))) + && TYPE_PRECISION (t1) == TYPE_PRECISION (t2)) return true; return comptypes (t1, t2); diff --git a/gcc/calls.cc b/gcc/calls.cc index 076e046..676f0f9 100644 --- a/gcc/calls.cc +++ b/gcc/calls.cc @@ -1382,6 +1382,11 @@ initialize_argument_information (int num_actuals ATTRIBUTE_UNUSED, } } + bool promote_p + = targetm.calls.promote_prototypes (fndecl + ? TREE_TYPE (fndecl) + : fntype); + /* I counts args in order (to be) pushed; ARGPOS counts in order written. */ for (argpos = 0; argpos < num_actuals; i--, argpos++) { @@ -1391,6 +1396,10 @@ initialize_argument_information (int num_actuals ATTRIBUTE_UNUSED, /* Replace erroneous argument with constant zero. */ if (type == error_mark_node || !COMPLETE_TYPE_P (type)) args[i].tree_value = integer_zero_node, type = integer_type_node; + else if (promote_p + && INTEGRAL_TYPE_P (type) + && TYPE_PRECISION (type) < TYPE_PRECISION (integer_type_node)) + type = integer_type_node; /* If TYPE is a transparent union or record, pass things the way we would pass the first field of the union or record. We have diff --git a/gcc/common/config/i386/i386-common.cc b/gcc/common/config/i386/i386-common.cc index 4815fbc..296df3b 100644 --- a/gcc/common/config/i386/i386-common.cc +++ b/gcc/common/config/i386/i386-common.cc @@ -1519,17 +1519,18 @@ ix86_handle_option (struct gcc_options *opts, return true; case OPT_msse4: - gcc_assert (value != 0); - opts->x_ix86_isa_flags |= OPTION_MASK_ISA_SSE4_SET; - opts->x_ix86_isa_flags_explicit |= OPTION_MASK_ISA_SSE4_SET; - return true; - - case OPT_mno_sse4: - gcc_assert (value != 0); - opts->x_ix86_isa_flags &= ~OPTION_MASK_ISA_SSE4_UNSET; - opts->x_ix86_isa_flags_explicit |= OPTION_MASK_ISA_SSE4_UNSET; - opts->x_ix86_isa_flags2 &= ~OPTION_MASK_ISA2_SSE4_UNSET; - opts->x_ix86_isa_flags2_explicit |= OPTION_MASK_ISA2_SSE4_UNSET; + if (value) + { + opts->x_ix86_isa_flags |= OPTION_MASK_ISA_SSE4_SET; + opts->x_ix86_isa_flags_explicit |= OPTION_MASK_ISA_SSE4_SET; + } + else + { + opts->x_ix86_isa_flags &= ~OPTION_MASK_ISA_SSE4_UNSET; + opts->x_ix86_isa_flags_explicit |= OPTION_MASK_ISA_SSE4_UNSET; + opts->x_ix86_isa_flags2 &= ~OPTION_MASK_ISA2_SSE4_UNSET; + opts->x_ix86_isa_flags2_explicit |= OPTION_MASK_ISA2_SSE4_UNSET; + } return true; case OPT_msse4a: diff --git a/gcc/config.gcc b/gcc/config.gcc index d98df88..6dbe880 100644 --- a/gcc/config.gcc +++ b/gcc/config.gcc @@ -273,6 +273,7 @@ esac # Obsolete configurations. case ${target} in ia64*-*-hpux* | ia64*-*-*vms* | ia64*-*-elf* \ + | m32c*-*-* \ ) if test "x$enable_obsolete" != xyes; then echo "*** Configuration ${target} is obsolete." >&2 diff --git a/gcc/config/i386/i386-options.cc b/gcc/config/i386/i386-options.cc index 964449f..45aa9b4 100644 --- a/gcc/config/i386/i386-options.cc +++ b/gcc/config/i386/i386-options.cc @@ -1271,13 +1271,6 @@ ix86_valid_target_attribute_inner_p (tree fndecl, tree args, char *p_strings[], } } - /* Fixup -msse4 which is RejectNegative to -mno-sse4 when negated. */ - if (opt == OPT_msse4 && !opt_set_p) - { - opt = OPT_mno_sse4; - opt_set_p = true; - } - /* Process the option. */ if (opt == N_OPTS) { diff --git a/gcc/config/i386/i386.cc b/gcc/config/i386/i386.cc index 78df3d9..3171d6e 100644 --- a/gcc/config/i386/i386.cc +++ b/gcc/config/i386/i386.cc @@ -25420,7 +25420,8 @@ ix86_vector_costs::add_stmt_cost (int count, vect_cost_for_stmt kind, case MAX_EXPR: if (fp) { - if (X87_FLOAT_MODE_P (mode)) + if (X87_FLOAT_MODE_P (mode) + && !SSE_FLOAT_MODE_SSEMATH_OR_HFBF_P (mode)) /* x87 requires conditional branch. We don't have cost for that. */ ; @@ -25457,7 +25458,8 @@ ix86_vector_costs::add_stmt_cost (int count, vect_cost_for_stmt kind, case ABSU_EXPR: if (fp) { - if (X87_FLOAT_MODE_P (mode)) + if (X87_FLOAT_MODE_P (mode) + && !SSE_FLOAT_MODE_SSEMATH_OR_HFBF_P (mode)) /* fabs. */ stmt_cost = ix86_cost->fabs; else diff --git a/gcc/config/i386/i386.opt b/gcc/config/i386/i386.opt index 27d34bd..0abf134 100644 --- a/gcc/config/i386/i386.opt +++ b/gcc/config/i386/i386.opt @@ -721,13 +721,9 @@ Target Mask(ISA_SSE4_2) Var(ix86_isa_flags) Save Support MMX, SSE, SSE2, SSE3, SSSE3, SSE4.1 and SSE4.2 built-in functions and code generation. msse4 -Target RejectNegative Mask(ISA_SSE4_2) Var(ix86_isa_flags) Save +Target Mask(ISA_SSE4_2) Var(ix86_isa_flags) Save Support MMX, SSE, SSE2, SSE3, SSSE3, SSE4.1 and SSE4.2 built-in functions and code generation. -mno-sse4 -Target RejectNegative InverseMask(ISA_SSE4_1) Var(ix86_isa_flags) Save -Do not support SSE4.1 and SSE4.2 built-in functions and code generation. - msse5 Target Undocumented Alias(mavx) Warn(%<-msse5%> was removed) ;; Deprecated diff --git a/gcc/config/riscv/riscv-v.cc b/gcc/config/riscv/riscv-v.cc index aae2d27..66c8b29 100644 --- a/gcc/config/riscv/riscv-v.cc +++ b/gcc/config/riscv/riscv-v.cc @@ -1171,461 +1171,511 @@ expand_vector_init_trailing_same_elem (rtx target, } static void -expand_const_vector (rtx target, rtx src) +expand_const_vec_duplicate (rtx target, rtx src, rtx elt) { machine_mode mode = GET_MODE (target); rtx result = register_operand (target, mode) ? target : gen_reg_rtx (mode); - rtx elt; - if (const_vec_duplicate_p (src, &elt)) + + if (GET_MODE_CLASS (mode) == MODE_VECTOR_BOOL) + { + gcc_assert (rtx_equal_p (elt, const0_rtx) + || rtx_equal_p (elt, const1_rtx)); + + rtx ops[] = {result, src}; + emit_vlmax_insn (code_for_pred_mov (mode), UNARY_MASK_OP, ops); + } + else if (valid_vec_immediate_p (src)) { - if (GET_MODE_CLASS (mode) == MODE_VECTOR_BOOL) - { - gcc_assert (rtx_equal_p (elt, const0_rtx) - || rtx_equal_p (elt, const1_rtx)); - rtx ops[] = {result, src}; - emit_vlmax_insn (code_for_pred_mov (mode), UNARY_MASK_OP, ops); - } /* Element in range -16 ~ 15 integer or 0.0 floating-point, we use vmv.v.i instruction. */ - else if (valid_vec_immediate_p (src)) + rtx ops[] = {result, src}; + emit_vlmax_insn (code_for_pred_mov (mode), UNARY_OP, ops); + } + else + { + /* Emit vec_duplicate<mode> split pattern before RA so that + we could have a better optimization opportunity in LICM + which will hoist vmv.v.x outside the loop and in fwprop && combine + which will transform 'vv' into 'vx' instruction. + + The reason we don't emit vec_duplicate<mode> split pattern during + RA since the split stage after RA is a too late stage to generate + RVV instruction which need an additional register (We can't + allocate a new register after RA) for VL operand of vsetvl + instruction (vsetvl a5, zero). */ + if (lra_in_progress) { - rtx ops[] = {result, src}; - emit_vlmax_insn (code_for_pred_mov (mode), UNARY_OP, ops); + rtx ops[] = {result, elt}; + emit_vlmax_insn (code_for_pred_broadcast (mode), UNARY_OP, ops); } else { - /* Emit vec_duplicate<mode> split pattern before RA so that - we could have a better optimization opportunity in LICM - which will hoist vmv.v.x outside the loop and in fwprop && combine - which will transform 'vv' into 'vx' instruction. - - The reason we don't emit vec_duplicate<mode> split pattern during - RA since the split stage after RA is a too late stage to generate - RVV instruction which need an additional register (We can't - allocate a new register after RA) for VL operand of vsetvl - instruction (vsetvl a5, zero). */ - if (lra_in_progress) - { - rtx ops[] = {result, elt}; - emit_vlmax_insn (code_for_pred_broadcast (mode), UNARY_OP, ops); - } - else - { - struct expand_operand ops[2]; - enum insn_code icode = optab_handler (vec_duplicate_optab, mode); - gcc_assert (icode != CODE_FOR_nothing); - create_output_operand (&ops[0], result, mode); - create_input_operand (&ops[1], elt, GET_MODE_INNER (mode)); - expand_insn (icode, 2, ops); - result = ops[0].value; - } + struct expand_operand ops[2]; + enum insn_code icode = optab_handler (vec_duplicate_optab, mode); + gcc_assert (icode != CODE_FOR_nothing); + create_output_operand (&ops[0], result, mode); + create_input_operand (&ops[1], elt, GET_MODE_INNER (mode)); + expand_insn (icode, 2, ops); + result = ops[0].value; } - - if (result != target) - emit_move_insn (target, result); - return; } - /* Support scalable const series vector. */ - rtx base, step; - if (const_vec_series_p (src, &base, &step)) - { - expand_vec_series (result, base, step); + if (result != target) + emit_move_insn (target, result); +} - if (result != target) - emit_move_insn (target, result); - return; +static void +expand_const_vec_series (rtx target, rtx base, rtx step) +{ + machine_mode mode = GET_MODE (target); + rtx result = register_operand (target, mode) ? target : gen_reg_rtx (mode); + + expand_vec_series (result, base, step); + + if (result != target) + emit_move_insn (target, result); +} + + +/* We handle the case that we can find a vector container to hold + element bitsize = NPATTERNS * ele_bitsize. + + NPATTERNS = 8, element width = 8 + v = { 0, 1, 2, 3, 4, 5, 6, 7, ... } + In this case, we can combine NPATTERNS element into a larger + element. Use element width = 64 and broadcast a vector with + all element equal to 0x0706050403020100. */ + +static void +expand_const_vector_duplicate_repeating (rtx target, rvv_builder *builder) +{ + machine_mode mode = GET_MODE (target); + rtx result = register_operand (target, mode) ? target : gen_reg_rtx (mode); + rtx ele = builder->get_merged_repeating_sequence (); + rtx dup; + + if (lra_in_progress) + { + dup = gen_reg_rtx (builder->new_mode ()); + rtx ops[] = {dup, ele}; + emit_vlmax_insn (code_for_pred_broadcast (builder->new_mode ()), + UNARY_OP, ops); } + else + dup = expand_vector_broadcast (builder->new_mode (), ele); - /* Handle variable-length vector. */ - unsigned int nelts_per_pattern = CONST_VECTOR_NELTS_PER_PATTERN (src); - unsigned int npatterns = CONST_VECTOR_NPATTERNS (src); - rvv_builder builder (mode, npatterns, nelts_per_pattern); - for (unsigned int i = 0; i < nelts_per_pattern; i++) + emit_move_insn (result, gen_lowpart (mode, dup)); + + if (result != target) + emit_move_insn (target, result); +} + +/* We handle the case that we can't find a vector container to hold + element bitsize = NPATTERNS * ele_bitsize. + + NPATTERNS = 8, element width = 16 + v = { 0, 1, 2, 3, 4, 5, 6, 7, ... } + Since NPATTERNS * element width = 128, we can't find a container + to hold it. + + In this case, we use NPATTERNS merge operations to generate such + vector. */ + +static void +expand_const_vector_duplicate_default (rtx target, rvv_builder *builder) +{ + machine_mode mode = GET_MODE (target); + rtx result = register_operand (target, mode) ? target : gen_reg_rtx (mode); + unsigned int nbits = builder->npatterns () - 1; + + /* Generate vid = { 0, 1, 2, 3, 4, 5, 6, 7, ... }. */ + rtx vid = gen_reg_rtx (builder->int_mode ()); + rtx op[] = {vid}; + emit_vlmax_insn (code_for_pred_series (builder->int_mode ()), NULLARY_OP, op); + + /* Generate vid_repeat = { 0, 1, ... nbits, ... } */ + rtx vid_repeat = gen_reg_rtx (builder->int_mode ()); + rtx and_ops[] = {vid_repeat, vid, + gen_int_mode (nbits, builder->inner_int_mode ())}; + emit_vlmax_insn (code_for_pred_scalar (AND, builder->int_mode ()), BINARY_OP, + and_ops); + + rtx tmp1 = gen_reg_rtx (builder->mode ()); + rtx dup_ops[] = {tmp1, builder->elt (0)}; + emit_vlmax_insn (code_for_pred_broadcast (builder->mode ()), UNARY_OP, + dup_ops); + + for (unsigned int i = 1; i < builder->npatterns (); i++) { - for (unsigned int j = 0; j < npatterns; j++) - builder.quick_push (CONST_VECTOR_ELT (src, i * npatterns + j)); + /* Generate mask according to i. */ + rtx mask = gen_reg_rtx (builder->mask_mode ()); + rtx const_vec = gen_const_vector_dup (builder->int_mode (), i); + expand_vec_cmp (mask, EQ, vid_repeat, const_vec); + + /* Merge scalar to each i. */ + rtx tmp2 = gen_reg_rtx (builder->mode ()); + rtx merge_ops[] = {tmp2, tmp1, builder->elt (i), mask}; + insn_code icode = code_for_pred_merge_scalar (builder->mode ()); + emit_vlmax_insn (icode, MERGE_OP, merge_ops); + tmp1 = tmp2; } - builder.finalize (); - if (CONST_VECTOR_DUPLICATE_P (src)) + emit_move_insn (result, tmp1); + + if (result != target) + emit_move_insn (target, result); +} + +/* Handle the case with repeating sequence that NELTS_PER_PATTERN = 1 + E.g. NPATTERNS = 4, v = { 0, 2, 6, 7, ... } + NPATTERNS = 8, v = { 0, 2, 6, 7, 19, 20, 8, 7 ... } + The elements within NPATTERNS are not necessary regular. */ +static void +expand_const_vector_duplicate (rtx target, rvv_builder *builder) +{ + if (builder->can_duplicate_repeating_sequence_p ()) + return expand_const_vector_duplicate_repeating (target, builder); + else + return expand_const_vector_duplicate_default (target, builder); +} + +static void +expand_const_vector_single_step_npatterns (rtx target, rvv_builder *builder) +{ + machine_mode mode = GET_MODE (target); + rtx result = register_operand (target, mode) ? target : gen_reg_rtx (mode); + + /* Describe the case by choosing NPATTERNS = 4 as an example. */ + insn_code icode; + + /* Step 1: Generate vid = { 0, 1, 2, 3, 4, 5, 6, 7, ... }. */ + rtx vid = gen_reg_rtx (builder->mode ()); + rtx vid_ops[] = {vid}; + icode = code_for_pred_series (builder->mode ()); + emit_vlmax_insn (icode, NULLARY_OP, vid_ops); + + if (builder->npatterns_all_equal_p ()) { - /* Handle the case with repeating sequence that NELTS_PER_PATTERN = 1 - E.g. NPATTERNS = 4, v = { 0, 2, 6, 7, ... } - NPATTERNS = 8, v = { 0, 2, 6, 7, 19, 20, 8, 7 ... } - The elements within NPATTERNS are not necessary regular. */ - if (builder.can_duplicate_repeating_sequence_p ()) + /* Generate the variable-length vector following this rule: + { a, a, a + step, a + step, a + step * 2, a + step * 2, ...} + E.g. { 0, 0, 8, 8, 16, 16, ... } */ + + /* We want to create a pattern where value[idx] = floor (idx / + NPATTERNS). As NPATTERNS is always a power of two we can + rewrite this as = idx & -NPATTERNS. */ + /* Step 2: VID AND -NPATTERNS: + { 0&-4, 1&-4, 2&-4, 3 &-4, 4 &-4, 5 &-4, 6 &-4, 7 &-4, ... } */ + rtx imm = gen_int_mode (-builder->npatterns (), builder->inner_mode ()); + rtx tmp1 = gen_reg_rtx (builder->mode ()); + rtx and_ops[] = {tmp1, vid, imm}; + icode = code_for_pred_scalar (AND, builder->mode ()); + emit_vlmax_insn (icode, BINARY_OP, and_ops); + + /* Step 3: Convert to step size 1. */ + rtx tmp2 = gen_reg_rtx (builder->mode ()); + /* log2 (npatterns) to get the shift amount to convert + Eg. { 0, 0, 0, 0, 4, 4, ... } + into { 0, 0, 0, 0, 1, 1, ... }. */ + HOST_WIDE_INT shift_amt = exact_log2 (builder->npatterns ()); + rtx shift = gen_int_mode (shift_amt, builder->inner_mode ()); + rtx shift_ops[] = {tmp2, tmp1, shift}; + icode = code_for_pred_scalar (ASHIFTRT, builder->mode ()); + emit_vlmax_insn (icode, BINARY_OP, shift_ops); + + /* Step 4: Multiply to step size n. */ + HOST_WIDE_INT step_size = + INTVAL (builder->elt (builder->npatterns ())) + - INTVAL (builder->elt (0)); + rtx tmp3 = gen_reg_rtx (builder->mode ()); + if (pow2p_hwi (step_size)) { - /* We handle the case that we can find a vector container to hold - element bitsize = NPATTERNS * ele_bitsize. - - NPATTERNS = 8, element width = 8 - v = { 0, 1, 2, 3, 4, 5, 6, 7, ... } - In this case, we can combine NPATTERNS element into a larger - element. Use element width = 64 and broadcast a vector with - all element equal to 0x0706050403020100. */ - rtx ele = builder.get_merged_repeating_sequence (); - rtx dup; - if (lra_in_progress) - { - dup = gen_reg_rtx (builder.new_mode ()); - rtx ops[] = {dup, ele}; - emit_vlmax_insn (code_for_pred_broadcast - (builder.new_mode ()), UNARY_OP, ops); - } - else - dup = expand_vector_broadcast (builder.new_mode (), ele); - emit_move_insn (result, gen_lowpart (mode, dup)); + /* Power of 2 can be handled with a left shift. */ + HOST_WIDE_INT shift = exact_log2 (step_size); + rtx shift_amount = gen_int_mode (shift, Pmode); + insn_code icode = code_for_pred_scalar (ASHIFT, mode); + rtx ops[] = {tmp3, tmp2, shift_amount}; + emit_vlmax_insn (icode, BINARY_OP, ops); } else { - /* We handle the case that we can't find a vector container to hold - element bitsize = NPATTERNS * ele_bitsize. - - NPATTERNS = 8, element width = 16 - v = { 0, 1, 2, 3, 4, 5, 6, 7, ... } - Since NPATTERNS * element width = 128, we can't find a container - to hold it. - - In this case, we use NPATTERNS merge operations to generate such - vector. */ - unsigned int nbits = npatterns - 1; - - /* Generate vid = { 0, 1, 2, 3, 4, 5, 6, 7, ... }. */ - rtx vid = gen_reg_rtx (builder.int_mode ()); - rtx op[] = {vid}; - emit_vlmax_insn (code_for_pred_series (builder.int_mode ()), - NULLARY_OP, op); - - /* Generate vid_repeat = { 0, 1, ... nbits, ... } */ - rtx vid_repeat = gen_reg_rtx (builder.int_mode ()); - rtx and_ops[] = {vid_repeat, vid, - gen_int_mode (nbits, builder.inner_int_mode ())}; - emit_vlmax_insn (code_for_pred_scalar (AND, builder.int_mode ()), - BINARY_OP, and_ops); - - rtx tmp1 = gen_reg_rtx (builder.mode ()); - rtx dup_ops[] = {tmp1, builder.elt (0)}; - emit_vlmax_insn (code_for_pred_broadcast (builder.mode ()), UNARY_OP, - dup_ops); - for (unsigned int i = 1; i < builder.npatterns (); i++) - { - /* Generate mask according to i. */ - rtx mask = gen_reg_rtx (builder.mask_mode ()); - rtx const_vec = gen_const_vector_dup (builder.int_mode (), i); - expand_vec_cmp (mask, EQ, vid_repeat, const_vec); - - /* Merge scalar to each i. */ - rtx tmp2 = gen_reg_rtx (builder.mode ()); - rtx merge_ops[] = {tmp2, tmp1, builder.elt (i), mask}; - insn_code icode = code_for_pred_merge_scalar (builder.mode ()); - emit_vlmax_insn (icode, MERGE_OP, merge_ops); - tmp1 = tmp2; - } - emit_move_insn (result, tmp1); + rtx mult_amt = gen_int_mode (step_size, builder->inner_mode ()); + insn_code icode = code_for_pred_scalar (MULT, builder->mode ()); + rtx ops[] = {tmp3, tmp2, mult_amt}; + emit_vlmax_insn (icode, BINARY_OP, ops); + } + + /* Step 5: Add starting value to all elements. */ + HOST_WIDE_INT init_val = INTVAL (builder->elt (0)); + if (init_val == 0) + emit_move_insn (result, tmp3); + else + { + rtx dup = gen_const_vector_dup (builder->mode (), init_val); + rtx add_ops[] = {result, tmp3, dup}; + icode = code_for_pred (PLUS, builder->mode ()); + emit_vlmax_insn (icode, BINARY_OP, add_ops); } } - else if (CONST_VECTOR_STEPPED_P (src)) + else { - gcc_assert (GET_MODE_CLASS (mode) == MODE_VECTOR_INT); - if (builder.single_step_npatterns_p ()) + /* Generate the variable-length vector following this rule: + { a, b, a + step, b + step, a + step*2, b + step*2, ... } */ + if (builder->npatterns_vid_diff_repeated_p ()) { - /* Describe the case by choosing NPATTERNS = 4 as an example. */ - insn_code icode; - - /* Step 1: Generate vid = { 0, 1, 2, 3, 4, 5, 6, 7, ... }. */ - rtx vid = gen_reg_rtx (builder.mode ()); - rtx vid_ops[] = {vid}; - icode = code_for_pred_series (builder.mode ()); - emit_vlmax_insn (icode, NULLARY_OP, vid_ops); - - if (builder.npatterns_all_equal_p ()) + /* Case 1: For example as below: + {3, 2, 1, 0, 7, 6, 5, 4, 11, 10, 9, 8... } + We have 3 - 0 = 3 equals 7 - 4 = 3, the sequence is + repeated as below after minus vid. + {3, 1, -1, -3, 3, 1, -1, -3...} + Then we can simplify the diff code gen to at most + npatterns(). */ + rvv_builder v (builder->mode (), builder->npatterns (), 1); + + /* Step 1: Generate diff = TARGET - VID. */ + for (unsigned int i = 0; i < v.npatterns (); ++i) { - /* Generate the variable-length vector following this rule: - { a, a, a + step, a + step, a + step * 2, a + step * 2, ...} - E.g. { 0, 0, 8, 8, 16, 16, ... } */ - - /* We want to create a pattern where value[idx] = floor (idx / - NPATTERNS). As NPATTERNS is always a power of two we can - rewrite this as = idx & -NPATTERNS. */ - /* Step 2: VID AND -NPATTERNS: - { 0&-4, 1&-4, 2&-4, 3 &-4, 4 &-4, 5 &-4, 6 &-4, 7 &-4, ... } - */ - rtx imm - = gen_int_mode (-builder.npatterns (), builder.inner_mode ()); - rtx tmp1 = gen_reg_rtx (builder.mode ()); - rtx and_ops[] = {tmp1, vid, imm}; - icode = code_for_pred_scalar (AND, builder.mode ()); - emit_vlmax_insn (icode, BINARY_OP, and_ops); - - /* Step 3: Convert to step size 1. */ - rtx tmp2 = gen_reg_rtx (builder.mode ()); - /* log2 (npatterns) to get the shift amount to convert - Eg. { 0, 0, 0, 0, 4, 4, ... } - into { 0, 0, 0, 0, 1, 1, ... }. */ - HOST_WIDE_INT shift_amt = exact_log2 (builder.npatterns ()) ; - rtx shift = gen_int_mode (shift_amt, builder.inner_mode ()); - rtx shift_ops[] = {tmp2, tmp1, shift}; - icode = code_for_pred_scalar (ASHIFTRT, builder.mode ()); - emit_vlmax_insn (icode, BINARY_OP, shift_ops); - - /* Step 4: Multiply to step size n. */ - HOST_WIDE_INT step_size = - INTVAL (builder.elt (builder.npatterns ())) - - INTVAL (builder.elt (0)); - rtx tmp3 = gen_reg_rtx (builder.mode ()); - if (pow2p_hwi (step_size)) - { - /* Power of 2 can be handled with a left shift. */ - HOST_WIDE_INT shift = exact_log2 (step_size); - rtx shift_amount = gen_int_mode (shift, Pmode); - insn_code icode = code_for_pred_scalar (ASHIFT, mode); - rtx ops[] = {tmp3, tmp2, shift_amount}; - emit_vlmax_insn (icode, BINARY_OP, ops); - } - else - { - rtx mult_amt = gen_int_mode (step_size, builder.inner_mode ()); - insn_code icode = code_for_pred_scalar (MULT, builder.mode ()); - rtx ops[] = {tmp3, tmp2, mult_amt}; - emit_vlmax_insn (icode, BINARY_OP, ops); - } - - /* Step 5: Add starting value to all elements. */ - HOST_WIDE_INT init_val = INTVAL (builder.elt (0)); - if (init_val == 0) - emit_move_insn (result, tmp3); - else - { - rtx dup = gen_const_vector_dup (builder.mode (), init_val); - rtx add_ops[] = {result, tmp3, dup}; - icode = code_for_pred (PLUS, builder.mode ()); - emit_vlmax_insn (icode, BINARY_OP, add_ops); - } + poly_int64 diff = rtx_to_poly_int64 (builder->elt (i)) - i; + v.quick_push (gen_int_mode (diff, v.inner_mode ())); } - else - { - /* Generate the variable-length vector following this rule: - { a, b, a + step, b + step, a + step*2, b + step*2, ... } */ - if (builder.npatterns_vid_diff_repeated_p ()) - { - /* Case 1: For example as below: - {3, 2, 1, 0, 7, 6, 5, 4, 11, 10, 9, 8... } - We have 3 - 0 = 3 equals 7 - 4 = 3, the sequence is - repeated as below after minus vid. - {3, 1, -1, -3, 3, 1, -1, -3...} - Then we can simplify the diff code gen to at most - npatterns(). */ - rvv_builder v (builder.mode (), builder.npatterns (), 1); - - /* Step 1: Generate diff = TARGET - VID. */ - for (unsigned int i = 0; i < v.npatterns (); ++i) - { - poly_int64 diff = rtx_to_poly_int64 (builder.elt (i)) - i; - v.quick_push (gen_int_mode (diff, v.inner_mode ())); - } - - /* Step 2: Generate result = VID + diff. */ - rtx vec = v.build (); - rtx add_ops[] = {result, vid, vec}; - emit_vlmax_insn (code_for_pred (PLUS, builder.mode ()), - BINARY_OP, add_ops); - } - else - { - /* Case 2: For example as below: - { -4, 4, -4 + 1, 4 + 1, -4 + 2, 4 + 2, -4 + 3, 4 + 3, ... } - */ - rvv_builder v (builder.mode (), builder.npatterns (), 1); - - /* Step 1: Generate { a, b, a, b, ... } */ - for (unsigned int i = 0; i < v.npatterns (); ++i) - v.quick_push (builder.elt (i)); - rtx new_base = v.build (); - - /* Step 2: Generate tmp1 = VID >> LOG2 (NPATTERNS). */ - rtx shift_count - = gen_int_mode (exact_log2 (builder.npatterns ()), - builder.inner_mode ()); - rtx tmp1 = gen_reg_rtx (builder.mode ()); - rtx shift_ops[] = {tmp1, vid, shift_count}; - emit_vlmax_insn (code_for_pred_scalar - (LSHIFTRT, builder.mode ()), BINARY_OP, - shift_ops); - - /* Step 3: Generate tmp2 = tmp1 * step. */ - rtx tmp2 = gen_reg_rtx (builder.mode ()); - rtx step - = simplify_binary_operation (MINUS, builder.inner_mode (), - builder.elt (v.npatterns()), - builder.elt (0)); - expand_vec_series (tmp2, const0_rtx, step, tmp1); - - /* Step 4: Generate result = tmp2 + new_base. */ - rtx add_ops[] = {result, tmp2, new_base}; - emit_vlmax_insn (code_for_pred (PLUS, builder.mode ()), - BINARY_OP, add_ops); - } - } + /* Step 2: Generate result = VID + diff. */ + rtx vec = v.build (); + rtx add_ops[] = {result, vid, vec}; + emit_vlmax_insn (code_for_pred (PLUS, builder->mode ()), BINARY_OP, + add_ops); } - else if (builder.interleaved_stepped_npatterns_p ()) + else { - rtx base1 = builder.elt (0); - rtx base2 = builder.elt (1); - poly_int64 step1 - = rtx_to_poly_int64 (builder.elt (builder.npatterns ())) - - rtx_to_poly_int64 (base1); - poly_int64 step2 - = rtx_to_poly_int64 (builder.elt (builder.npatterns () + 1)) - - rtx_to_poly_int64 (base2); + /* Case 2: For example as below: + { -4, 4, -4 + 1, 4 + 1, -4 + 2, 4 + 2, -4 + 3, 4 + 3, ... } + */ + rvv_builder v (builder->mode (), builder->npatterns (), 1); + + /* Step 1: Generate { a, b, a, b, ... } */ + for (unsigned int i = 0; i < v.npatterns (); ++i) + v.quick_push (builder->elt (i)); + rtx new_base = v.build (); + + /* Step 2: Generate tmp1 = VID >> LOG2 (NPATTERNS). */ + rtx shift_count = gen_int_mode (exact_log2 (builder->npatterns ()), + builder->inner_mode ()); + rtx tmp1 = gen_reg_rtx (builder->mode ()); + rtx shift_ops[] = {tmp1, vid, shift_count}; + emit_vlmax_insn (code_for_pred_scalar (LSHIFTRT, builder->mode ()), + BINARY_OP, shift_ops); + + /* Step 3: Generate tmp2 = tmp1 * step. */ + rtx tmp2 = gen_reg_rtx (builder->mode ()); + rtx step + = simplify_binary_operation (MINUS, builder->inner_mode (), + builder->elt (v.npatterns()), + builder->elt (0)); + expand_vec_series (tmp2, const0_rtx, step, tmp1); + + /* Step 4: Generate result = tmp2 + new_base. */ + rtx add_ops[] = {result, tmp2, new_base}; + emit_vlmax_insn (code_for_pred (PLUS, builder->mode ()), BINARY_OP, + add_ops); + } + } - /* For { 1, 0, 2, 0, ... , n - 1, 0 }, we can use larger EEW - integer vector mode to generate such vector efficiently. + if (result != target) + emit_move_insn (target, result); +} - E.g. EEW = 16, { 2, 0, 4, 0, ... } +static void +expand_const_vector_interleaved_stepped_npatterns (rtx target, rtx src, + rvv_builder *builder) +{ + machine_mode mode = GET_MODE (target); + rtx result = register_operand (target, mode) ? target : gen_reg_rtx (mode); + rtx base1 = builder->elt (0); + rtx base2 = builder->elt (1); - can be interpreted into: + poly_int64 step1 = rtx_to_poly_int64 (builder->elt (builder->npatterns ())) + - rtx_to_poly_int64 (base1); + poly_int64 step2 = + rtx_to_poly_int64 (builder->elt (builder->npatterns () + 1)) + - rtx_to_poly_int64 (base2); - EEW = 32, { 2, 4, ... }. + /* For { 1, 0, 2, 0, ... , n - 1, 0 }, we can use larger EEW + integer vector mode to generate such vector efficiently. - Both the series1 and series2 may overflow before taking the IOR - to generate the final result. However, only series1 matters - because the series2 will shift before IOR, thus the overflow - bits will never pollute the final result. + E.g. EEW = 16, { 2, 0, 4, 0, ... } - For now we forbid the negative steps and overflow, and they - will fall back to the default merge way to generate the - const_vector. */ + can be interpreted into: - unsigned int new_smode_bitsize = builder.inner_bits_size () * 2; - scalar_int_mode new_smode; - machine_mode new_mode; - poly_uint64 new_nunits - = exact_div (GET_MODE_NUNITS (builder.mode ()), 2); + EEW = 32, { 2, 4, ... }. - poly_int64 base1_poly = rtx_to_poly_int64 (base1); - bool overflow_smode_p = false; + Both the series1 and series2 may overflow before taking the IOR + to generate the final result. However, only series1 matters + because the series2 will shift before IOR, thus the overflow + bits will never pollute the final result. - if (!step1.is_constant ()) - overflow_smode_p = true; - else - { - int elem_count = XVECLEN (src, 0); - uint64_t step1_val = step1.to_constant (); - uint64_t base1_val = base1_poly.to_constant (); - uint64_t elem_val = base1_val + (elem_count - 1) * step1_val; + For now we forbid the negative steps and overflow, and they + will fall back to the default merge way to generate the + const_vector. */ - if ((elem_val >> builder.inner_bits_size ()) != 0) - overflow_smode_p = true; - } + unsigned int new_smode_bitsize = builder->inner_bits_size () * 2; + scalar_int_mode new_smode; + machine_mode new_mode; + poly_uint64 new_nunits = exact_div (GET_MODE_NUNITS (builder->mode ()), 2); + + poly_int64 base1_poly = rtx_to_poly_int64 (base1); + bool overflow_smode_p = false; + + if (!step1.is_constant ()) + overflow_smode_p = true; + else + { + int elem_count = XVECLEN (src, 0); + uint64_t step1_val = step1.to_constant (); + uint64_t base1_val = base1_poly.to_constant (); + uint64_t elem_val = base1_val + (elem_count - 1) * step1_val; - if (known_ge (step1, 0) && known_ge (step2, 0) - && int_mode_for_size (new_smode_bitsize, 0).exists (&new_smode) - && get_vector_mode (new_smode, new_nunits).exists (&new_mode) - && !overflow_smode_p) + if ((elem_val >> builder->inner_bits_size ()) != 0) + overflow_smode_p = true; + } + + if (known_ge (step1, 0) && known_ge (step2, 0) + && int_mode_for_size (new_smode_bitsize, 0).exists (&new_smode) + && get_vector_mode (new_smode, new_nunits).exists (&new_mode) + && !overflow_smode_p) + { + rtx tmp1 = gen_reg_rtx (new_mode); + base1 = gen_int_mode (base1_poly, new_smode); + expand_vec_series (tmp1, base1, gen_int_mode (step1, new_smode)); + + if (rtx_equal_p (base2, const0_rtx) && known_eq (step2, 0)) + /* { 1, 0, 2, 0, ... }. */ + emit_move_insn (result, gen_lowpart (mode, tmp1)); + else if (known_eq (step2, 0)) + { + /* { 1, 1, 2, 1, ... }. */ + rtx scalar = expand_simple_binop ( + Xmode, ASHIFT, gen_int_mode (rtx_to_poly_int64 (base2), Xmode), + gen_int_mode (builder->inner_bits_size (), Xmode), NULL_RTX, false, + OPTAB_DIRECT); + scalar = simplify_gen_subreg (new_smode, scalar, Xmode, 0); + rtx tmp2 = gen_reg_rtx (new_mode); + rtx ior_ops[] = {tmp2, tmp1, scalar}; + emit_vlmax_insn (code_for_pred_scalar (IOR, new_mode), BINARY_OP, + ior_ops); + emit_move_insn (result, gen_lowpart (mode, tmp2)); + } + else + { + /* { 1, 3, 2, 6, ... }. */ + rtx tmp2 = gen_reg_rtx (new_mode); + base2 = gen_int_mode (rtx_to_poly_int64 (base2), new_smode); + expand_vec_series (tmp2, base2, gen_int_mode (step2, new_smode)); + rtx shifted_tmp2; + rtx shift = gen_int_mode (builder->inner_bits_size (), Xmode); + if (lra_in_progress) { - rtx tmp1 = gen_reg_rtx (new_mode); - base1 = gen_int_mode (base1_poly, new_smode); - expand_vec_series (tmp1, base1, gen_int_mode (step1, new_smode)); - - if (rtx_equal_p (base2, const0_rtx) && known_eq (step2, 0)) - /* { 1, 0, 2, 0, ... }. */ - emit_move_insn (result, gen_lowpart (mode, tmp1)); - else if (known_eq (step2, 0)) - { - /* { 1, 1, 2, 1, ... }. */ - rtx scalar = expand_simple_binop ( - Xmode, ASHIFT, - gen_int_mode (rtx_to_poly_int64 (base2), Xmode), - gen_int_mode (builder.inner_bits_size (), Xmode), - NULL_RTX, false, OPTAB_DIRECT); - scalar = simplify_gen_subreg (new_smode, scalar, Xmode, 0); - rtx tmp2 = gen_reg_rtx (new_mode); - rtx ior_ops[] = {tmp2, tmp1, scalar}; - emit_vlmax_insn (code_for_pred_scalar (IOR, new_mode), - BINARY_OP, ior_ops); - emit_move_insn (result, gen_lowpart (mode, tmp2)); - } - else - { - /* { 1, 3, 2, 6, ... }. */ - rtx tmp2 = gen_reg_rtx (new_mode); - base2 = gen_int_mode (rtx_to_poly_int64 (base2), new_smode); - expand_vec_series (tmp2, base2, - gen_int_mode (step2, new_smode)); - rtx shifted_tmp2; - rtx shift = gen_int_mode (builder.inner_bits_size (), Xmode); - if (lra_in_progress) - { - shifted_tmp2 = gen_reg_rtx (new_mode); - rtx shift_ops[] = {shifted_tmp2, tmp2, shift}; - emit_vlmax_insn (code_for_pred_scalar - (ASHIFT, new_mode), BINARY_OP, - shift_ops); - } - else - shifted_tmp2 = expand_simple_binop (new_mode, ASHIFT, tmp2, - shift, NULL_RTX, false, - OPTAB_DIRECT); - rtx tmp3 = gen_reg_rtx (new_mode); - rtx ior_ops[] = {tmp3, tmp1, shifted_tmp2}; - emit_vlmax_insn (code_for_pred (IOR, new_mode), BINARY_OP, - ior_ops); - emit_move_insn (result, gen_lowpart (mode, tmp3)); - } + shifted_tmp2 = gen_reg_rtx (new_mode); + rtx shift_ops[] = {shifted_tmp2, tmp2, shift}; + emit_vlmax_insn (code_for_pred_scalar (ASHIFT, new_mode), + BINARY_OP, shift_ops); } else - { - rtx vid = gen_reg_rtx (mode); - expand_vec_series (vid, const0_rtx, const1_rtx); - /* Transform into { 0, 0, 1, 1, 2, 2, ... }. */ - rtx shifted_vid; - if (lra_in_progress) - { - shifted_vid = gen_reg_rtx (mode); - rtx shift = gen_int_mode (1, Xmode); - rtx shift_ops[] = {shifted_vid, vid, shift}; - emit_vlmax_insn (code_for_pred_scalar - (ASHIFT, mode), BINARY_OP, - shift_ops); - } - else - shifted_vid = expand_simple_binop (mode, LSHIFTRT, vid, - const1_rtx, NULL_RTX, - false, OPTAB_DIRECT); - rtx tmp1 = gen_reg_rtx (mode); - rtx tmp2 = gen_reg_rtx (mode); - expand_vec_series (tmp1, base1, - gen_int_mode (step1, builder.inner_mode ()), - shifted_vid); - expand_vec_series (tmp2, base2, - gen_int_mode (step2, builder.inner_mode ()), - shifted_vid); - - /* Transform into { 0, 1, 0, 1, 0, 1, ... }. */ - rtx and_vid = gen_reg_rtx (mode); - rtx and_ops[] = {and_vid, vid, const1_rtx}; - emit_vlmax_insn (code_for_pred_scalar (AND, mode), BINARY_OP, - and_ops); - rtx mask = gen_reg_rtx (builder.mask_mode ()); - expand_vec_cmp (mask, EQ, and_vid, CONST1_RTX (mode)); - - rtx ops[] = {result, tmp1, tmp2, mask}; - emit_vlmax_insn (code_for_pred_merge (mode), MERGE_OP, ops); - } + shifted_tmp2 = expand_simple_binop (new_mode, ASHIFT, tmp2, shift, + NULL_RTX, false, OPTAB_DIRECT); + rtx tmp3 = gen_reg_rtx (new_mode); + rtx ior_ops[] = {tmp3, tmp1, shifted_tmp2}; + emit_vlmax_insn (code_for_pred (IOR, new_mode), BINARY_OP, ior_ops); + emit_move_insn (result, gen_lowpart (mode, tmp3)); } - else - /* TODO: We will enable more variable-length vector in the future. */ - gcc_unreachable (); } else - gcc_unreachable (); + { + rtx vid = gen_reg_rtx (mode); + expand_vec_series (vid, const0_rtx, const1_rtx); + /* Transform into { 0, 0, 1, 1, 2, 2, ... }. */ + rtx shifted_vid; + if (lra_in_progress) + { + shifted_vid = gen_reg_rtx (mode); + rtx shift = gen_int_mode (1, Xmode); + rtx shift_ops[] = {shifted_vid, vid, shift}; + emit_vlmax_insn (code_for_pred_scalar (ASHIFT, mode), BINARY_OP, + shift_ops); + } + else + shifted_vid = expand_simple_binop (mode, LSHIFTRT, vid, const1_rtx, + NULL_RTX, false, OPTAB_DIRECT); + rtx tmp1 = gen_reg_rtx (mode); + rtx tmp2 = gen_reg_rtx (mode); + expand_vec_series (tmp1, base1, + gen_int_mode (step1, builder->inner_mode ()), + shifted_vid); + expand_vec_series (tmp2, base2, + gen_int_mode (step2, builder->inner_mode ()), + shifted_vid); + + /* Transform into { 0, 1, 0, 1, 0, 1, ... }. */ + rtx and_vid = gen_reg_rtx (mode); + rtx and_ops[] = {and_vid, vid, const1_rtx}; + emit_vlmax_insn (code_for_pred_scalar (AND, mode), BINARY_OP, and_ops); + rtx mask = gen_reg_rtx (builder->mask_mode ()); + expand_vec_cmp (mask, EQ, and_vid, CONST1_RTX (mode)); + + rtx ops[] = {result, tmp1, tmp2, mask}; + emit_vlmax_insn (code_for_pred_merge (mode), MERGE_OP, ops); + } if (result != target) emit_move_insn (target, result); } +static void +expand_const_vector_stepped (rtx target, rtx src, rvv_builder *builder) +{ + gcc_assert (GET_MODE_CLASS (GET_MODE (target)) == MODE_VECTOR_INT); + + if (builder->single_step_npatterns_p ()) + return expand_const_vector_single_step_npatterns (target, builder); + else if (builder->interleaved_stepped_npatterns_p ()) + return expand_const_vector_interleaved_stepped_npatterns (target, src, + builder); + + /* TODO: We will enable more variable-length vector in the future. */ + gcc_unreachable (); +} + +static void +expand_const_vector (rtx target, rtx src) +{ + rtx elt; + if (const_vec_duplicate_p (src, &elt)) + return expand_const_vec_duplicate (target, src, elt); + + /* Support scalable const series vector. */ + rtx base, step; + if (const_vec_series_p (src, &base, &step)) + return expand_const_vec_series(target, base, step); + + /* Handle variable-length vector. */ + unsigned int nelts_per_pattern = CONST_VECTOR_NELTS_PER_PATTERN (src); + unsigned int npatterns = CONST_VECTOR_NPATTERNS (src); + rvv_builder builder (GET_MODE (target), npatterns, nelts_per_pattern); + + for (unsigned int i = 0; i < nelts_per_pattern; i++) + { + for (unsigned int j = 0; j < npatterns; j++) + builder.quick_push (CONST_VECTOR_ELT (src, i * npatterns + j)); + } + + builder.finalize (); + + if (CONST_VECTOR_DUPLICATE_P (src)) + return expand_const_vector_duplicate (target, &builder); + else if (CONST_VECTOR_STEPPED_P (src)) + return expand_const_vector_stepped (target, src, &builder); + + gcc_unreachable (); +} + /* Get the frm mode with given CONST_INT rtx, the default mode is FRM_DYN. */ enum floating_point_rounding_mode diff --git a/gcc/cp/call.cc b/gcc/cp/call.cc index 6caac89..2c3ef3d 100644 --- a/gcc/cp/call.cc +++ b/gcc/cp/call.cc @@ -9707,11 +9707,6 @@ type_passed_as (tree type) /* Pass classes with copy ctors by invisible reference. */ if (TREE_ADDRESSABLE (type)) type = build_reference_type (type); - else if (targetm.calls.promote_prototypes (NULL_TREE) - && INTEGRAL_TYPE_P (type) - && COMPLETE_TYPE_P (type) - && tree_int_cst_lt (TYPE_SIZE (type), TYPE_SIZE (integer_type_node))) - type = integer_type_node; return type; } @@ -9747,11 +9742,6 @@ convert_for_arg_passing (tree type, tree val, tsubst_flags_t complain) /* Pass classes with copy ctors by invisible reference. */ else if (TREE_ADDRESSABLE (type)) val = build1 (ADDR_EXPR, build_reference_type (type), val); - else if (targetm.calls.promote_prototypes (NULL_TREE) - && INTEGRAL_TYPE_P (type) - && COMPLETE_TYPE_P (type) - && tree_int_cst_lt (TYPE_SIZE (type), TYPE_SIZE (integer_type_node))) - val = cp_perform_integral_promotions (val, complain); if (complain & tf_warning) maybe_warn_parm_abi (type, cp_expr_loc_or_input_loc (val)); diff --git a/gcc/cp/typeck.cc b/gcc/cp/typeck.cc index 88f8f34..1b9fdf5 100644 --- a/gcc/cp/typeck.cc +++ b/gcc/cp/typeck.cc @@ -1372,17 +1372,12 @@ cxx_safe_arg_type_equiv_p (tree t1, tree t2) && TYPE_PTR_P (t2)) return true; - /* The signedness of the parameter matters only when an integral - type smaller than int is promoted to int, otherwise only the - precision of the parameter matters. - This check should make sure that the callee does not see - undefined values in argument registers. */ + /* Only the precision of the parameter matters. This check should + make sure that the callee does not see undefined values in argument + registers. */ if (INTEGRAL_TYPE_P (t1) && INTEGRAL_TYPE_P (t2) - && TYPE_PRECISION (t1) == TYPE_PRECISION (t2) - && (TYPE_UNSIGNED (t1) == TYPE_UNSIGNED (t2) - || !targetm.calls.promote_prototypes (NULL_TREE) - || TYPE_PRECISION (t1) >= TYPE_PRECISION (integer_type_node))) + && TYPE_PRECISION (t1) == TYPE_PRECISION (t2)) return true; return same_type_p (t1, t2); diff --git a/gcc/gimple.cc b/gcc/gimple.cc index 9acfa38..77b2e50 100644 --- a/gcc/gimple.cc +++ b/gcc/gimple.cc @@ -2916,15 +2916,7 @@ gimple_builtin_call_types_compatible_p (const gimple *stmt, tree fndecl) return true; tree arg = gimple_call_arg (stmt, i); tree type = TREE_VALUE (targs); - if (!useless_type_conversion_p (type, TREE_TYPE (arg)) - /* char/short integral arguments are promoted to int - by several frontends if targetm.calls.promote_prototypes - is true. Allow such promotion too. */ - && !(INTEGRAL_TYPE_P (type) - && TYPE_PRECISION (type) < TYPE_PRECISION (integer_type_node) - && targetm.calls.promote_prototypes (TREE_TYPE (fndecl)) - && useless_type_conversion_p (integer_type_node, - TREE_TYPE (arg)))) + if (!useless_type_conversion_p (type, TREE_TYPE (arg))) return false; targs = TREE_CHAIN (targs); } diff --git a/gcc/testsuite/gcc.dg/tree-ssa/scev-cast.c b/gcc/testsuite/gcc.dg/tree-ssa/scev-cast.c index c569523..469e493 100644 --- a/gcc/testsuite/gcc.dg/tree-ssa/scev-cast.c +++ b/gcc/testsuite/gcc.dg/tree-ssa/scev-cast.c @@ -1,6 +1,5 @@ /* A test for various conversions of chrecs. */ -/* { dg-do compile { target i?86-*-* x86_64-*-* } } */ /* { dg-options "-O2 -fdump-tree-optimized" } */ void blas (signed char xxx); @@ -22,6 +21,6 @@ void tst(void) blau ((unsigned char) i); } -/* { dg-final { scan-tree-dump-times "& 255" 1 "optimized" } } */ -/* { dg-final { scan-tree-dump-times "= \\(signed char\\)" 1 "optimized" } } */ +/* { dg-final { scan-tree-dump-times "= \\(unsigned char\\)" 2 "optimized" } } */ +/* { dg-final { scan-tree-dump-times "= \\(signed char\\)" 3 "optimized" } } */ diff --git a/gcc/testsuite/gcc.dg/tree-ssa/ssa-fre-4.c b/gcc/testsuite/gcc.dg/tree-ssa/ssa-fre-4.c index 5a7588f..246fea3 100644 --- a/gcc/testsuite/gcc.dg/tree-ssa/ssa-fre-4.c +++ b/gcc/testsuite/gcc.dg/tree-ssa/ssa-fre-4.c @@ -1,6 +1,4 @@ -/* If the target returns false for TARGET_PROMOTE_PROTOTYPES, then there - will be no casts for FRE to eliminate and the test will fail. */ -/* { dg-do compile { target i?86-*-* x86_64-*-* hppa*-*-* m68k*-*-* } } */ +/* { dg-do compile } */ /* { dg-options "-O -fno-tree-ccp -fno-tree-forwprop -fdump-tree-fre1-details" } */ /* From PR21608. */ @@ -11,4 +9,4 @@ char bar(char f) return wrap(f); } -/* { dg-final { scan-tree-dump "Replaced \\\(char\\\) .*with " "fre1" } } */ +/* { dg-final { scan-tree-dump-not " = \\\(\[^)\]*\\\)" "fre1" } } */ diff --git a/gcc/testsuite/gcc.dg/vect/vect-simd-clone-16c.c b/gcc/testsuite/gcc.dg/vect/vect-simd-clone-16c.c index 4fdf25d..628d457 100644 --- a/gcc/testsuite/gcc.dg/vect/vect-simd-clone-16c.c +++ b/gcc/testsuite/gcc.dg/vect/vect-simd-clone-16c.c @@ -7,11 +7,8 @@ /* Ensure the the in-branch simd clones are used on targets that support them. Some targets use another call for the epilogue loops. */ -/* { dg-final { scan-tree-dump-times {[\n\r] [^\n]* = foo\.simdclone} 2 "vect" { target { ! { x86_64-*-* || { i?86-*-* || aarch64*-*-* } } } } } } */ +/* { dg-final { scan-tree-dump-times {[\n\r] [^\n]* = foo\.simdclone} 2 "vect" { target { !aarch64*-*-* } } } } */ /* { dg-final { scan-tree-dump-times {[\n\r] [^\n]* = foo\.simdclone} 3 "vect" { target { aarch64*-*-* } } } } */ -/* x86_64 fails to use in-branch clones for TYPE=short. */ -/* { dg-final { scan-tree-dump-times {[\n\r] [^\n]* = foo\.simdclone} 0 "vect" { target x86_64-*-* i?86-*-* } } } */ - /* The LTO test produces two dump files and we scan the wrong one. */ /* { dg-skip-if "" { *-*-* } { "-flto" } { "" } } */ diff --git a/gcc/testsuite/gcc.dg/vect/vect-simd-clone-16d.c b/gcc/testsuite/gcc.dg/vect/vect-simd-clone-16d.c index 55d3c0a..d1f85b0 100644 --- a/gcc/testsuite/gcc.dg/vect/vect-simd-clone-16d.c +++ b/gcc/testsuite/gcc.dg/vect/vect-simd-clone-16d.c @@ -7,11 +7,9 @@ /* Ensure the the in-branch simd clones are used on targets that support them. Some targets use another call for the epilogue loops. */ -/* { dg-final { scan-tree-dump-times {[\n\r] [^\n]* = foo\.simdclone} 2 "vect" { target { ! { x86_64-*-* || { i?86-*-* || aarch64*-*-* } } } } } } */ +/* { dg-final { scan-tree-dump-times {[\n\r] [^\n]* = foo\.simdclone} 2 "vect" { target { !aarch64*-*-* } } } } */ /* { dg-final { scan-tree-dump-times {[\n\r] [^\n]* = foo\.simdclone} 3 "vect" { target { aarch64*-*-* } } } } */ -/* x86_64 fails to use in-branch clones for TYPE=char. */ -/* { dg-final { scan-tree-dump-times {[\n\r] [^\n]* = foo\.simdclone} 0 "vect" { target x86_64-*-* i?86-*-* } } } */ /* The LTO test produces two dump files and we scan the wrong one. */ /* { dg-skip-if "" { *-*-* } { "-flto" } { "" } } */ diff --git a/gcc/testsuite/gcc.dg/vect/vect-simd-clone-17c.c b/gcc/testsuite/gcc.dg/vect/vect-simd-clone-17c.c index 6afa2fd..6148abe 100644 --- a/gcc/testsuite/gcc.dg/vect/vect-simd-clone-17c.c +++ b/gcc/testsuite/gcc.dg/vect/vect-simd-clone-17c.c @@ -7,11 +7,8 @@ /* Ensure the the in-branch simd clones are used on targets that support them. Some targets use another call for the epilogue loops. */ -/* { dg-final { scan-tree-dump-times {[\n\r] [^\n]* = foo\.simdclone} 2 "vect" { target { ! { x86_64-*-* || { i?86-*-* || aarch64*-*-* } } } } } } */ +/* { dg-final { scan-tree-dump-times {[\n\r] [^\n]* = foo\.simdclone} 2 "vect" { target { !aarch64*-*-* } } } } */ /* { dg-final { scan-tree-dump-times {[\n\r] [^\n]* = foo\.simdclone} 3 "vect" { target { aarch64*-*-* } } } } */ -/* x86_64 fails to use in-branch clones for TYPE=short. */ -/* { dg-final { scan-tree-dump-times {[\n\r] [^\n]* = foo\.simdclone} 0 "vect" { target x86_64-*-* i?86-*-* } } } */ - /* The LTO test produces two dump files and we scan the wrong one. */ /* { dg-skip-if "" { *-*-* } { "-flto" } { "" } } */ diff --git a/gcc/testsuite/gcc.dg/vect/vect-simd-clone-17d.c b/gcc/testsuite/gcc.dg/vect/vect-simd-clone-17d.c index 5617788..6368798 100644 --- a/gcc/testsuite/gcc.dg/vect/vect-simd-clone-17d.c +++ b/gcc/testsuite/gcc.dg/vect/vect-simd-clone-17d.c @@ -7,11 +7,8 @@ /* Ensure the the in-branch simd clones are used on targets that support them. Some targets use another call for the epilogue loops. */ -/* { dg-final { scan-tree-dump-times {[\n\r] [^\n]* = foo\.simdclone} 2 "vect" { target { ! { x86_64-*-* || { i?86-*-* || aarch64*-*-* } } } } } } */ +/* { dg-final { scan-tree-dump-times {[\n\r] [^\n]* = foo\.simdclone} 2 "vect" { target { !aarch64*-*-* } } } } */ /* { dg-final { scan-tree-dump-times {[\n\r] [^\n]* = foo\.simdclone} 3 "vect" { target { aarch64*-*-* } } } } */ -/* x86_64 fails to use in-branch clones for TYPE=char. */ -/* { dg-final { scan-tree-dump-times {[\n\r] [^\n]* = foo\.simdclone} 0 "vect" { target x86_64-*-* i?86-*-* } } } */ - /* The LTO test produces two dump files and we scan the wrong one. */ /* { dg-skip-if "" { *-*-* } { "-flto" } { "" } } */ diff --git a/gcc/testsuite/gcc.dg/vect/vect-simd-clone-18c.c b/gcc/testsuite/gcc.dg/vect/vect-simd-clone-18c.c index e56e6bc..ee049e7 100644 --- a/gcc/testsuite/gcc.dg/vect/vect-simd-clone-18c.c +++ b/gcc/testsuite/gcc.dg/vect/vect-simd-clone-18c.c @@ -7,11 +7,8 @@ /* Ensure the the in-branch simd clones are used on targets that support them. Some targets use another call for the epilogue loops. */ -/* { dg-final { scan-tree-dump-times {[\n\r] [^\n]* = foo\.simdclone} 2 "vect" { target { ! { x86_64-*-* || { i?86-*-* || aarch64*-*-* } } } } } } */ +/* { dg-final { scan-tree-dump-times {[\n\r] [^\n]* = foo\.simdclone} 2 "vect" { target { !aarch64*-*-* } } } } */ /* { dg-final { scan-tree-dump-times {[\n\r] [^\n]* = foo\.simdclone} 3 "vect" { target { aarch64*-*-* } } } } */ -/* x86_64 fails to use in-branch clones for TYPE=short. */ -/* { dg-final { scan-tree-dump-times {[\n\r] [^\n]* = foo\.simdclone} 0 "vect" { target x86_64-*-* i?86-*-* } } } */ - /* The LTO test produces two dump files and we scan the wrong one. */ /* { dg-skip-if "" { *-*-* } { "-flto" } { "" } } */ diff --git a/gcc/testsuite/gcc.dg/vect/vect-simd-clone-18d.c b/gcc/testsuite/gcc.dg/vect/vect-simd-clone-18d.c index 87e7379..bad9bcb 100644 --- a/gcc/testsuite/gcc.dg/vect/vect-simd-clone-18d.c +++ b/gcc/testsuite/gcc.dg/vect/vect-simd-clone-18d.c @@ -7,11 +7,8 @@ /* Ensure the the in-branch simd clones are used on targets that support them. Some targets use another call for the epilogue loops. */ -/* { dg-final { scan-tree-dump-times {[\n\r] [^\n]* = foo\.simdclone} 2 "vect" { target { ! { x86_64-*-* || { i?86-*-* || aarch64*-*-* } } } } } } */ +/* { dg-final { scan-tree-dump-times {[\n\r] [^\n]* = foo\.simdclone} 2 "vect" { target { !aarch64*-*-* } } } } */ /* { dg-final { scan-tree-dump-times {[\n\r] [^\n]* = foo\.simdclone} 3 "vect" { target { aarch64*-*-* } } } } */ -/* x86_64 fails to use in-branch clones for TYPE=char. */ -/* { dg-final { scan-tree-dump-times {[\n\r] [^\n]* = foo\.simdclone} 0 "vect" { target x86_64-*-* i?86-*-* } } } */ - /* The LTO test produces two dump files and we scan the wrong one. */ /* { dg-skip-if "" { *-*-* } { "-flto" } { "" } } */ diff --git a/gcc/testsuite/gcc.target/i386/apx-ndd.c b/gcc/testsuite/gcc.target/i386/apx-ndd.c index ce77630..2b2f4fc 100644 --- a/gcc/testsuite/gcc.target/i386/apx-ndd.c +++ b/gcc/testsuite/gcc.target/i386/apx-ndd.c @@ -188,16 +188,13 @@ FOO2 (int64_t, imul, *) /* { dg-final { scan-assembler-times "not(?:l|w|q)\[^\n\r]%(?:|r|e)di, %(?:|r|e)ax" 4 } } */ /* { dg-final { scan-assembler-times "andb\[^\n\r]*1, \\(%(?:r|e)di\\), %al" 1 } } */ /* { dg-final { scan-assembler-times "and(?:l|w|q)\[^\n\r]*1, \\(%(?:r|e)di\\), %(?:|r|e)ax" 3 } } */ -/* { dg-final { scan-assembler-times "and(?:l|w|q)\[^\n\r]%(?:|r|e)di, %(?:|r|e)si, %(?:|r|e)ax" 2 } } */ -/* { dg-final { scan-assembler-times "and(?:l|w|q)\[^\n\r]%(?:|r|e)si, %(?:|r|e)di, %(?:|r|e)ax" 2 } } */ +/* { dg-final { scan-assembler-times "and(?:l|w|q)\[^\n\r]%(?:|r|e)si, %(?:|r|e)di, %(?:|r|e)ax" 4 } } */ /* { dg-final { scan-assembler-times "orb\[^\n\r]*1, \\(%(?:r|e)di\\), %al" 2} } */ /* { dg-final { scan-assembler-times "or(?:l|w|q)\[^\n\r]*1, \\(%(?:r|e)di\\), %(?:|r|e)ax" 6 } } */ -/* { dg-final { scan-assembler-times "or(?:l|w|q)\[^\n\r]%(?:|r|e)di, %(?:|r|e)si, %(?:|r|e)ax" 4 } } */ -/* { dg-final { scan-assembler-times "or(?:l|w|q)\[^\n\r]%(?:|r|e)si, %(?:|r|e)di, %(?:|r|e)ax" 4 } } */ +/* { dg-final { scan-assembler-times "or(?:l|w|q)\[^\n\r]%(?:|r|e)si, %(?:|r|e)di, %(?:|r|e)ax" 8 } } */ /* { dg-final { scan-assembler-times "xorb\[^\n\r]*1, \\(%(?:r|e)di\\), %al" 1 } } */ /* { dg-final { scan-assembler-times "xor(?:l|w|q)\[^\n\r]*1, \\(%(?:r|e)di\\), %(?:|r|e)ax" 3 } } */ -/* { dg-final { scan-assembler-times "xor(?:l|w|q)\[^\n\r]%(?:|r|e)di, %(?:|r|e)si, %(?:|r|e)ax" 2 } } */ -/* { dg-final { scan-assembler-times "xor(?:l|w|q)\[^\n\r]%(?:|r|e)si, %(?:|r|e)di, %(?:|r|e)ax" 2 } } */ +/* { dg-final { scan-assembler-times "xor(?:l|w|q)\[^\n\r]%(?:|r|e)si, %(?:|r|e)di, %(?:|r|e)ax" 4 } } */ /* { dg-final { scan-assembler-times "sal(?:b|l|w|q)\[^\n\r]*1, \\(%(?:r|e)di\\), %(?:|r|e)a(?:x|l)" 4 } } */ /* { dg-final { scan-assembler-times "sal(?:l|w|q)\[^\n\r]*7, %(?:|r|e)di, %(?:|r|e)ax" 4 } } */ /* { dg-final { scan-assembler-times "sar(?:b|l|w|q)\[^\n\r]*1, \\(%(?:r|e)di\\), %(?:|r|e)a(?:x|l)" 4 } } */ diff --git a/gcc/testsuite/gfortran.dg/pr112877-1.f90 b/gcc/testsuite/gfortran.dg/pr112877-1.f90 new file mode 100644 index 0000000..f5596f0 --- /dev/null +++ b/gcc/testsuite/gfortran.dg/pr112877-1.f90 @@ -0,0 +1,17 @@ +! { dg-do compile } +! { dg-options "-Os" } + +program test + use iso_c_binding, only: c_short + interface + subroutine foo(a) bind(c) + import c_short + integer(kind=c_short), intent(in), value :: a + end subroutine foo + end interface + integer(kind=c_short) a(5); + call foo (a(3)) +end + +! { dg-final { scan-assembler "movswl\t10\\(%rsp\\), %edi" { target { { i?86-*-linux* i?86-*-gnu* x86_64-*-linux* x86_64-*-gnu* } && { ! ia32 } } } } } +! { dg-final { scan-assembler "movswl\t-14\\(%ebp\\), %eax" { target { { i?86-*-linux* i?86-*-gnu* x86_64-*-linux* x86_64-*-gnu* } && { ia32 } } } } } diff --git a/gcc/tree.cc b/gcc/tree.cc index eccfcc8..98575a5 100644 --- a/gcc/tree.cc +++ b/gcc/tree.cc @@ -8770,20 +8770,6 @@ tree_builtin_call_types_compatible_p (const_tree call, tree fndecl) && POINTER_TYPE_P (TREE_TYPE (arg)) && tree_nop_conversion_p (type, TREE_TYPE (arg))) continue; - /* char/short integral arguments are promoted to int - by several frontends if targetm.calls.promote_prototypes - is true. Allow such promotion too. */ - if (INTEGRAL_TYPE_P (type) - && TYPE_PRECISION (type) < TYPE_PRECISION (integer_type_node) - && INTEGRAL_TYPE_P (TREE_TYPE (arg)) - && !TYPE_UNSIGNED (TREE_TYPE (arg)) - && targetm.calls.promote_prototypes (TREE_TYPE (fndecl)) - && (gimple_form - ? useless_type_conversion_p (integer_type_node, - TREE_TYPE (arg)) - : tree_nop_conversion_p (integer_type_node, - TREE_TYPE (arg)))) - continue; return false; } } |