aboutsummaryrefslogtreecommitdiff
path: root/gcc
diff options
context:
space:
mode:
Diffstat (limited to 'gcc')
-rw-r--r--gcc/ChangeLog15
-rw-r--r--gcc/DATESTAMP2
-rw-r--r--gcc/ada/gcc-interface/utils.cc24
-rw-r--r--gcc/c/c-decl.cc40
-rw-r--r--gcc/c/c-typeck.cc19
-rw-r--r--gcc/calls.cc9
-rw-r--r--gcc/common/config/i386/i386-common.cc23
-rw-r--r--gcc/config.gcc1
-rw-r--r--gcc/config/i386/i386-options.cc7
-rw-r--r--gcc/config/i386/i386.cc6
-rw-r--r--gcc/config/i386/i386.opt6
-rw-r--r--gcc/config/riscv/riscv-v.cc850
-rw-r--r--gcc/cp/call.cc10
-rw-r--r--gcc/cp/typeck.cc13
-rw-r--r--gcc/gimple.cc10
-rw-r--r--gcc/testsuite/gcc.dg/tree-ssa/scev-cast.c5
-rw-r--r--gcc/testsuite/gcc.dg/tree-ssa/ssa-fre-4.c6
-rw-r--r--gcc/testsuite/gcc.dg/vect/vect-simd-clone-16c.c5
-rw-r--r--gcc/testsuite/gcc.dg/vect/vect-simd-clone-16d.c4
-rw-r--r--gcc/testsuite/gcc.dg/vect/vect-simd-clone-17c.c5
-rw-r--r--gcc/testsuite/gcc.dg/vect/vect-simd-clone-17d.c5
-rw-r--r--gcc/testsuite/gcc.dg/vect/vect-simd-clone-18c.c5
-rw-r--r--gcc/testsuite/gcc.dg/vect/vect-simd-clone-18d.c5
-rw-r--r--gcc/testsuite/gcc.target/i386/apx-ndd.c9
-rw-r--r--gcc/testsuite/gfortran.dg/pr112877-1.f9017
-rw-r--r--gcc/tree.cc14
26 files changed, 532 insertions, 583 deletions
diff --git a/gcc/ChangeLog b/gcc/ChangeLog
index 3ab7f56..1b8058d 100644
--- a/gcc/ChangeLog
+++ b/gcc/ChangeLog
@@ -1,3 +1,18 @@
+2025-04-26 Jan Hubicka <hubicka@ucw.cz>
+
+ PR target/105275
+ * config/i386/i386.cc (ix86_vector_costs::add_stmt_cost): Fix cost of FP scalar
+ MAX_EXPR and MIN_EXPR
+
+2025-04-26 Iain Buclaw <ibuclaw@gdcproject.org>
+
+ * config.gcc (LIST): --enable-obsolete for m32c-elf.
+
+2025-04-26 Andrew Pinski <quic_apinski@quicinc.com>
+
+ * simplify-rtx.cc (simplify_context::simplify_unary_operation_1) <case ZERO_EXTEND>:
+ Add simplifcation for and with a constant.
+
2025-04-25 Dimitar Dimitrov <dimitar@dinux.eu>
* doc/sourcebuild.texi: Document variadic_mi_thunk effective
diff --git a/gcc/DATESTAMP b/gcc/DATESTAMP
index 61358eb..bcfb36f 100644
--- a/gcc/DATESTAMP
+++ b/gcc/DATESTAMP
@@ -1 +1 @@
-20250426
+20250427
diff --git a/gcc/ada/gcc-interface/utils.cc b/gcc/ada/gcc-interface/utils.cc
index 9212827..23737c3 100644
--- a/gcc/ada/gcc-interface/utils.cc
+++ b/gcc/ada/gcc-interface/utils.cc
@@ -3286,30 +3286,6 @@ tree
create_param_decl (tree name, tree type)
{
tree param_decl = build_decl (input_location, PARM_DECL, name, type);
-
- /* Honor TARGET_PROMOTE_PROTOTYPES like the C compiler, as not doing so
- can lead to various ABI violations. */
- if (targetm.calls.promote_prototypes (NULL_TREE)
- && INTEGRAL_TYPE_P (type)
- && TYPE_PRECISION (type) < TYPE_PRECISION (integer_type_node))
- {
- /* We have to be careful about biased types here. Make a subtype
- of integer_type_node with the proper biasing. */
- if (TREE_CODE (type) == INTEGER_TYPE
- && TYPE_BIASED_REPRESENTATION_P (type))
- {
- tree subtype
- = make_unsigned_type (TYPE_PRECISION (integer_type_node));
- TREE_TYPE (subtype) = integer_type_node;
- TYPE_BIASED_REPRESENTATION_P (subtype) = 1;
- SET_TYPE_RM_MIN_VALUE (subtype, TYPE_MIN_VALUE (type));
- SET_TYPE_RM_MAX_VALUE (subtype, TYPE_MAX_VALUE (type));
- type = subtype;
- }
- else
- type = integer_type_node;
- }
-
DECL_ARG_TYPE (param_decl) = type;
return param_decl;
}
diff --git a/gcc/c/c-decl.cc b/gcc/c/c-decl.cc
index 8c420f2..e7aee8a 100644
--- a/gcc/c/c-decl.cc
+++ b/gcc/c/c-decl.cc
@@ -5721,26 +5721,6 @@ start_decl (struct c_declarator *declarator, struct c_declspecs *declspecs,
}
if (TREE_CODE (decl) == FUNCTION_DECL
- && targetm.calls.promote_prototypes (TREE_TYPE (decl)))
- {
- struct c_declarator *ce = declarator;
-
- if (ce->kind == cdk_pointer)
- ce = declarator->declarator;
- if (ce->kind == cdk_function)
- {
- tree args = ce->u.arg_info->parms;
- for (; args; args = DECL_CHAIN (args))
- {
- tree type = TREE_TYPE (args);
- if (type && INTEGRAL_TYPE_P (type)
- && TYPE_PRECISION (type) < TYPE_PRECISION (integer_type_node))
- DECL_ARG_TYPE (args) = c_type_promotes_to (type);
- }
- }
- }
-
- if (TREE_CODE (decl) == FUNCTION_DECL
&& DECL_DECLARED_INLINE_P (decl)
&& DECL_UNINLINABLE (decl)
&& lookup_attribute ("noinline", DECL_ATTRIBUTES (decl)))
@@ -11179,13 +11159,6 @@ store_parm_decls_oldstyle (tree fndecl, const struct c_arg_info *arg_info)
useful for argument types like uid_t. */
DECL_ARG_TYPE (parm) = TREE_TYPE (parm);
- if (targetm.calls.promote_prototypes (TREE_TYPE (current_function_decl))
- && INTEGRAL_TYPE_P (TREE_TYPE (parm))
- && (TYPE_PRECISION (TREE_TYPE (parm))
- < TYPE_PRECISION (integer_type_node)))
- DECL_ARG_TYPE (parm)
- = c_type_promotes_to (TREE_TYPE (parm));
-
/* ??? Is it possible to get here with a
built-in prototype or will it always have
been diagnosed as conflicting with an
@@ -11413,19 +11386,6 @@ finish_function (location_t end_loc)
if (c_dialect_objc ())
objc_finish_function ();
- if (TREE_CODE (fndecl) == FUNCTION_DECL
- && targetm.calls.promote_prototypes (TREE_TYPE (fndecl)))
- {
- tree args = DECL_ARGUMENTS (fndecl);
- for (; args; args = DECL_CHAIN (args))
- {
- tree type = TREE_TYPE (args);
- if (INTEGRAL_TYPE_P (type)
- && TYPE_PRECISION (type) < TYPE_PRECISION (integer_type_node))
- DECL_ARG_TYPE (args) = c_type_promotes_to (type);
- }
- }
-
if (DECL_INITIAL (fndecl) && DECL_INITIAL (fndecl) != error_mark_node)
BLOCK_SUPERCONTEXT (DECL_INITIAL (fndecl)) = fndecl;
diff --git a/gcc/c/c-typeck.cc b/gcc/c/c-typeck.cc
index 55d896e..d94ecb5 100644
--- a/gcc/c/c-typeck.cc
+++ b/gcc/c/c-typeck.cc
@@ -4163,12 +4163,6 @@ convert_argument (location_t ploc, tree function, tree fundecl,
val, origtype, ic_argpass,
npc, fundecl, function,
parmnum + 1, warnopt);
-
- if (targetm.calls.promote_prototypes (fundecl ? TREE_TYPE (fundecl) : 0)
- && INTEGRAL_TYPE_P (type)
- && (TYPE_PRECISION (type) < TYPE_PRECISION (integer_type_node)))
- parmval = default_conversion (parmval);
-
return parmval;
}
@@ -6744,17 +6738,12 @@ c_safe_arg_type_equiv_p (tree t1, tree t2)
&& TREE_CODE (t2) == POINTER_TYPE)
return true;
- /* The signedness of the parameter matters only when an integral
- type smaller than int is promoted to int, otherwise only the
- precision of the parameter matters.
- This check should make sure that the callee does not see
- undefined values in argument registers. */
+ /* Only the precision of the parameter matters. This check should
+ make sure that the callee does not see undefined values in argument
+ registers. */
if (INTEGRAL_TYPE_P (t1)
&& INTEGRAL_TYPE_P (t2)
- && TYPE_PRECISION (t1) == TYPE_PRECISION (t2)
- && (TYPE_UNSIGNED (t1) == TYPE_UNSIGNED (t2)
- || !targetm.calls.promote_prototypes (NULL_TREE)
- || TYPE_PRECISION (t1) >= TYPE_PRECISION (integer_type_node)))
+ && TYPE_PRECISION (t1) == TYPE_PRECISION (t2))
return true;
return comptypes (t1, t2);
diff --git a/gcc/calls.cc b/gcc/calls.cc
index 076e046..676f0f9 100644
--- a/gcc/calls.cc
+++ b/gcc/calls.cc
@@ -1382,6 +1382,11 @@ initialize_argument_information (int num_actuals ATTRIBUTE_UNUSED,
}
}
+ bool promote_p
+ = targetm.calls.promote_prototypes (fndecl
+ ? TREE_TYPE (fndecl)
+ : fntype);
+
/* I counts args in order (to be) pushed; ARGPOS counts in order written. */
for (argpos = 0; argpos < num_actuals; i--, argpos++)
{
@@ -1391,6 +1396,10 @@ initialize_argument_information (int num_actuals ATTRIBUTE_UNUSED,
/* Replace erroneous argument with constant zero. */
if (type == error_mark_node || !COMPLETE_TYPE_P (type))
args[i].tree_value = integer_zero_node, type = integer_type_node;
+ else if (promote_p
+ && INTEGRAL_TYPE_P (type)
+ && TYPE_PRECISION (type) < TYPE_PRECISION (integer_type_node))
+ type = integer_type_node;
/* If TYPE is a transparent union or record, pass things the way
we would pass the first field of the union or record. We have
diff --git a/gcc/common/config/i386/i386-common.cc b/gcc/common/config/i386/i386-common.cc
index 4815fbc..296df3b 100644
--- a/gcc/common/config/i386/i386-common.cc
+++ b/gcc/common/config/i386/i386-common.cc
@@ -1519,17 +1519,18 @@ ix86_handle_option (struct gcc_options *opts,
return true;
case OPT_msse4:
- gcc_assert (value != 0);
- opts->x_ix86_isa_flags |= OPTION_MASK_ISA_SSE4_SET;
- opts->x_ix86_isa_flags_explicit |= OPTION_MASK_ISA_SSE4_SET;
- return true;
-
- case OPT_mno_sse4:
- gcc_assert (value != 0);
- opts->x_ix86_isa_flags &= ~OPTION_MASK_ISA_SSE4_UNSET;
- opts->x_ix86_isa_flags_explicit |= OPTION_MASK_ISA_SSE4_UNSET;
- opts->x_ix86_isa_flags2 &= ~OPTION_MASK_ISA2_SSE4_UNSET;
- opts->x_ix86_isa_flags2_explicit |= OPTION_MASK_ISA2_SSE4_UNSET;
+ if (value)
+ {
+ opts->x_ix86_isa_flags |= OPTION_MASK_ISA_SSE4_SET;
+ opts->x_ix86_isa_flags_explicit |= OPTION_MASK_ISA_SSE4_SET;
+ }
+ else
+ {
+ opts->x_ix86_isa_flags &= ~OPTION_MASK_ISA_SSE4_UNSET;
+ opts->x_ix86_isa_flags_explicit |= OPTION_MASK_ISA_SSE4_UNSET;
+ opts->x_ix86_isa_flags2 &= ~OPTION_MASK_ISA2_SSE4_UNSET;
+ opts->x_ix86_isa_flags2_explicit |= OPTION_MASK_ISA2_SSE4_UNSET;
+ }
return true;
case OPT_msse4a:
diff --git a/gcc/config.gcc b/gcc/config.gcc
index d98df88..6dbe880 100644
--- a/gcc/config.gcc
+++ b/gcc/config.gcc
@@ -273,6 +273,7 @@ esac
# Obsolete configurations.
case ${target} in
ia64*-*-hpux* | ia64*-*-*vms* | ia64*-*-elf* \
+ | m32c*-*-* \
)
if test "x$enable_obsolete" != xyes; then
echo "*** Configuration ${target} is obsolete." >&2
diff --git a/gcc/config/i386/i386-options.cc b/gcc/config/i386/i386-options.cc
index 964449f..45aa9b4 100644
--- a/gcc/config/i386/i386-options.cc
+++ b/gcc/config/i386/i386-options.cc
@@ -1271,13 +1271,6 @@ ix86_valid_target_attribute_inner_p (tree fndecl, tree args, char *p_strings[],
}
}
- /* Fixup -msse4 which is RejectNegative to -mno-sse4 when negated. */
- if (opt == OPT_msse4 && !opt_set_p)
- {
- opt = OPT_mno_sse4;
- opt_set_p = true;
- }
-
/* Process the option. */
if (opt == N_OPTS)
{
diff --git a/gcc/config/i386/i386.cc b/gcc/config/i386/i386.cc
index 78df3d9..3171d6e 100644
--- a/gcc/config/i386/i386.cc
+++ b/gcc/config/i386/i386.cc
@@ -25420,7 +25420,8 @@ ix86_vector_costs::add_stmt_cost (int count, vect_cost_for_stmt kind,
case MAX_EXPR:
if (fp)
{
- if (X87_FLOAT_MODE_P (mode))
+ if (X87_FLOAT_MODE_P (mode)
+ && !SSE_FLOAT_MODE_SSEMATH_OR_HFBF_P (mode))
/* x87 requires conditional branch. We don't have cost for
that. */
;
@@ -25457,7 +25458,8 @@ ix86_vector_costs::add_stmt_cost (int count, vect_cost_for_stmt kind,
case ABSU_EXPR:
if (fp)
{
- if (X87_FLOAT_MODE_P (mode))
+ if (X87_FLOAT_MODE_P (mode)
+ && !SSE_FLOAT_MODE_SSEMATH_OR_HFBF_P (mode))
/* fabs. */
stmt_cost = ix86_cost->fabs;
else
diff --git a/gcc/config/i386/i386.opt b/gcc/config/i386/i386.opt
index 27d34bd..0abf134 100644
--- a/gcc/config/i386/i386.opt
+++ b/gcc/config/i386/i386.opt
@@ -721,13 +721,9 @@ Target Mask(ISA_SSE4_2) Var(ix86_isa_flags) Save
Support MMX, SSE, SSE2, SSE3, SSSE3, SSE4.1 and SSE4.2 built-in functions and code generation.
msse4
-Target RejectNegative Mask(ISA_SSE4_2) Var(ix86_isa_flags) Save
+Target Mask(ISA_SSE4_2) Var(ix86_isa_flags) Save
Support MMX, SSE, SSE2, SSE3, SSSE3, SSE4.1 and SSE4.2 built-in functions and code generation.
-mno-sse4
-Target RejectNegative InverseMask(ISA_SSE4_1) Var(ix86_isa_flags) Save
-Do not support SSE4.1 and SSE4.2 built-in functions and code generation.
-
msse5
Target Undocumented Alias(mavx) Warn(%<-msse5%> was removed)
;; Deprecated
diff --git a/gcc/config/riscv/riscv-v.cc b/gcc/config/riscv/riscv-v.cc
index aae2d27..66c8b29 100644
--- a/gcc/config/riscv/riscv-v.cc
+++ b/gcc/config/riscv/riscv-v.cc
@@ -1171,461 +1171,511 @@ expand_vector_init_trailing_same_elem (rtx target,
}
static void
-expand_const_vector (rtx target, rtx src)
+expand_const_vec_duplicate (rtx target, rtx src, rtx elt)
{
machine_mode mode = GET_MODE (target);
rtx result = register_operand (target, mode) ? target : gen_reg_rtx (mode);
- rtx elt;
- if (const_vec_duplicate_p (src, &elt))
+
+ if (GET_MODE_CLASS (mode) == MODE_VECTOR_BOOL)
+ {
+ gcc_assert (rtx_equal_p (elt, const0_rtx)
+ || rtx_equal_p (elt, const1_rtx));
+
+ rtx ops[] = {result, src};
+ emit_vlmax_insn (code_for_pred_mov (mode), UNARY_MASK_OP, ops);
+ }
+ else if (valid_vec_immediate_p (src))
{
- if (GET_MODE_CLASS (mode) == MODE_VECTOR_BOOL)
- {
- gcc_assert (rtx_equal_p (elt, const0_rtx)
- || rtx_equal_p (elt, const1_rtx));
- rtx ops[] = {result, src};
- emit_vlmax_insn (code_for_pred_mov (mode), UNARY_MASK_OP, ops);
- }
/* Element in range -16 ~ 15 integer or 0.0 floating-point,
we use vmv.v.i instruction. */
- else if (valid_vec_immediate_p (src))
+ rtx ops[] = {result, src};
+ emit_vlmax_insn (code_for_pred_mov (mode), UNARY_OP, ops);
+ }
+ else
+ {
+ /* Emit vec_duplicate<mode> split pattern before RA so that
+ we could have a better optimization opportunity in LICM
+ which will hoist vmv.v.x outside the loop and in fwprop && combine
+ which will transform 'vv' into 'vx' instruction.
+
+ The reason we don't emit vec_duplicate<mode> split pattern during
+ RA since the split stage after RA is a too late stage to generate
+ RVV instruction which need an additional register (We can't
+ allocate a new register after RA) for VL operand of vsetvl
+ instruction (vsetvl a5, zero). */
+ if (lra_in_progress)
{
- rtx ops[] = {result, src};
- emit_vlmax_insn (code_for_pred_mov (mode), UNARY_OP, ops);
+ rtx ops[] = {result, elt};
+ emit_vlmax_insn (code_for_pred_broadcast (mode), UNARY_OP, ops);
}
else
{
- /* Emit vec_duplicate<mode> split pattern before RA so that
- we could have a better optimization opportunity in LICM
- which will hoist vmv.v.x outside the loop and in fwprop && combine
- which will transform 'vv' into 'vx' instruction.
-
- The reason we don't emit vec_duplicate<mode> split pattern during
- RA since the split stage after RA is a too late stage to generate
- RVV instruction which need an additional register (We can't
- allocate a new register after RA) for VL operand of vsetvl
- instruction (vsetvl a5, zero). */
- if (lra_in_progress)
- {
- rtx ops[] = {result, elt};
- emit_vlmax_insn (code_for_pred_broadcast (mode), UNARY_OP, ops);
- }
- else
- {
- struct expand_operand ops[2];
- enum insn_code icode = optab_handler (vec_duplicate_optab, mode);
- gcc_assert (icode != CODE_FOR_nothing);
- create_output_operand (&ops[0], result, mode);
- create_input_operand (&ops[1], elt, GET_MODE_INNER (mode));
- expand_insn (icode, 2, ops);
- result = ops[0].value;
- }
+ struct expand_operand ops[2];
+ enum insn_code icode = optab_handler (vec_duplicate_optab, mode);
+ gcc_assert (icode != CODE_FOR_nothing);
+ create_output_operand (&ops[0], result, mode);
+ create_input_operand (&ops[1], elt, GET_MODE_INNER (mode));
+ expand_insn (icode, 2, ops);
+ result = ops[0].value;
}
-
- if (result != target)
- emit_move_insn (target, result);
- return;
}
- /* Support scalable const series vector. */
- rtx base, step;
- if (const_vec_series_p (src, &base, &step))
- {
- expand_vec_series (result, base, step);
+ if (result != target)
+ emit_move_insn (target, result);
+}
- if (result != target)
- emit_move_insn (target, result);
- return;
+static void
+expand_const_vec_series (rtx target, rtx base, rtx step)
+{
+ machine_mode mode = GET_MODE (target);
+ rtx result = register_operand (target, mode) ? target : gen_reg_rtx (mode);
+
+ expand_vec_series (result, base, step);
+
+ if (result != target)
+ emit_move_insn (target, result);
+}
+
+
+/* We handle the case that we can find a vector container to hold
+ element bitsize = NPATTERNS * ele_bitsize.
+
+ NPATTERNS = 8, element width = 8
+ v = { 0, 1, 2, 3, 4, 5, 6, 7, ... }
+ In this case, we can combine NPATTERNS element into a larger
+ element. Use element width = 64 and broadcast a vector with
+ all element equal to 0x0706050403020100. */
+
+static void
+expand_const_vector_duplicate_repeating (rtx target, rvv_builder *builder)
+{
+ machine_mode mode = GET_MODE (target);
+ rtx result = register_operand (target, mode) ? target : gen_reg_rtx (mode);
+ rtx ele = builder->get_merged_repeating_sequence ();
+ rtx dup;
+
+ if (lra_in_progress)
+ {
+ dup = gen_reg_rtx (builder->new_mode ());
+ rtx ops[] = {dup, ele};
+ emit_vlmax_insn (code_for_pred_broadcast (builder->new_mode ()),
+ UNARY_OP, ops);
}
+ else
+ dup = expand_vector_broadcast (builder->new_mode (), ele);
- /* Handle variable-length vector. */
- unsigned int nelts_per_pattern = CONST_VECTOR_NELTS_PER_PATTERN (src);
- unsigned int npatterns = CONST_VECTOR_NPATTERNS (src);
- rvv_builder builder (mode, npatterns, nelts_per_pattern);
- for (unsigned int i = 0; i < nelts_per_pattern; i++)
+ emit_move_insn (result, gen_lowpart (mode, dup));
+
+ if (result != target)
+ emit_move_insn (target, result);
+}
+
+/* We handle the case that we can't find a vector container to hold
+ element bitsize = NPATTERNS * ele_bitsize.
+
+ NPATTERNS = 8, element width = 16
+ v = { 0, 1, 2, 3, 4, 5, 6, 7, ... }
+ Since NPATTERNS * element width = 128, we can't find a container
+ to hold it.
+
+ In this case, we use NPATTERNS merge operations to generate such
+ vector. */
+
+static void
+expand_const_vector_duplicate_default (rtx target, rvv_builder *builder)
+{
+ machine_mode mode = GET_MODE (target);
+ rtx result = register_operand (target, mode) ? target : gen_reg_rtx (mode);
+ unsigned int nbits = builder->npatterns () - 1;
+
+ /* Generate vid = { 0, 1, 2, 3, 4, 5, 6, 7, ... }. */
+ rtx vid = gen_reg_rtx (builder->int_mode ());
+ rtx op[] = {vid};
+ emit_vlmax_insn (code_for_pred_series (builder->int_mode ()), NULLARY_OP, op);
+
+ /* Generate vid_repeat = { 0, 1, ... nbits, ... } */
+ rtx vid_repeat = gen_reg_rtx (builder->int_mode ());
+ rtx and_ops[] = {vid_repeat, vid,
+ gen_int_mode (nbits, builder->inner_int_mode ())};
+ emit_vlmax_insn (code_for_pred_scalar (AND, builder->int_mode ()), BINARY_OP,
+ and_ops);
+
+ rtx tmp1 = gen_reg_rtx (builder->mode ());
+ rtx dup_ops[] = {tmp1, builder->elt (0)};
+ emit_vlmax_insn (code_for_pred_broadcast (builder->mode ()), UNARY_OP,
+ dup_ops);
+
+ for (unsigned int i = 1; i < builder->npatterns (); i++)
{
- for (unsigned int j = 0; j < npatterns; j++)
- builder.quick_push (CONST_VECTOR_ELT (src, i * npatterns + j));
+ /* Generate mask according to i. */
+ rtx mask = gen_reg_rtx (builder->mask_mode ());
+ rtx const_vec = gen_const_vector_dup (builder->int_mode (), i);
+ expand_vec_cmp (mask, EQ, vid_repeat, const_vec);
+
+ /* Merge scalar to each i. */
+ rtx tmp2 = gen_reg_rtx (builder->mode ());
+ rtx merge_ops[] = {tmp2, tmp1, builder->elt (i), mask};
+ insn_code icode = code_for_pred_merge_scalar (builder->mode ());
+ emit_vlmax_insn (icode, MERGE_OP, merge_ops);
+ tmp1 = tmp2;
}
- builder.finalize ();
- if (CONST_VECTOR_DUPLICATE_P (src))
+ emit_move_insn (result, tmp1);
+
+ if (result != target)
+ emit_move_insn (target, result);
+}
+
+/* Handle the case with repeating sequence that NELTS_PER_PATTERN = 1
+ E.g. NPATTERNS = 4, v = { 0, 2, 6, 7, ... }
+ NPATTERNS = 8, v = { 0, 2, 6, 7, 19, 20, 8, 7 ... }
+ The elements within NPATTERNS are not necessary regular. */
+static void
+expand_const_vector_duplicate (rtx target, rvv_builder *builder)
+{
+ if (builder->can_duplicate_repeating_sequence_p ())
+ return expand_const_vector_duplicate_repeating (target, builder);
+ else
+ return expand_const_vector_duplicate_default (target, builder);
+}
+
+static void
+expand_const_vector_single_step_npatterns (rtx target, rvv_builder *builder)
+{
+ machine_mode mode = GET_MODE (target);
+ rtx result = register_operand (target, mode) ? target : gen_reg_rtx (mode);
+
+ /* Describe the case by choosing NPATTERNS = 4 as an example. */
+ insn_code icode;
+
+ /* Step 1: Generate vid = { 0, 1, 2, 3, 4, 5, 6, 7, ... }. */
+ rtx vid = gen_reg_rtx (builder->mode ());
+ rtx vid_ops[] = {vid};
+ icode = code_for_pred_series (builder->mode ());
+ emit_vlmax_insn (icode, NULLARY_OP, vid_ops);
+
+ if (builder->npatterns_all_equal_p ())
{
- /* Handle the case with repeating sequence that NELTS_PER_PATTERN = 1
- E.g. NPATTERNS = 4, v = { 0, 2, 6, 7, ... }
- NPATTERNS = 8, v = { 0, 2, 6, 7, 19, 20, 8, 7 ... }
- The elements within NPATTERNS are not necessary regular. */
- if (builder.can_duplicate_repeating_sequence_p ())
+ /* Generate the variable-length vector following this rule:
+ { a, a, a + step, a + step, a + step * 2, a + step * 2, ...}
+ E.g. { 0, 0, 8, 8, 16, 16, ... } */
+
+ /* We want to create a pattern where value[idx] = floor (idx /
+ NPATTERNS). As NPATTERNS is always a power of two we can
+ rewrite this as = idx & -NPATTERNS. */
+ /* Step 2: VID AND -NPATTERNS:
+ { 0&-4, 1&-4, 2&-4, 3 &-4, 4 &-4, 5 &-4, 6 &-4, 7 &-4, ... } */
+ rtx imm = gen_int_mode (-builder->npatterns (), builder->inner_mode ());
+ rtx tmp1 = gen_reg_rtx (builder->mode ());
+ rtx and_ops[] = {tmp1, vid, imm};
+ icode = code_for_pred_scalar (AND, builder->mode ());
+ emit_vlmax_insn (icode, BINARY_OP, and_ops);
+
+ /* Step 3: Convert to step size 1. */
+ rtx tmp2 = gen_reg_rtx (builder->mode ());
+ /* log2 (npatterns) to get the shift amount to convert
+ Eg. { 0, 0, 0, 0, 4, 4, ... }
+ into { 0, 0, 0, 0, 1, 1, ... }. */
+ HOST_WIDE_INT shift_amt = exact_log2 (builder->npatterns ());
+ rtx shift = gen_int_mode (shift_amt, builder->inner_mode ());
+ rtx shift_ops[] = {tmp2, tmp1, shift};
+ icode = code_for_pred_scalar (ASHIFTRT, builder->mode ());
+ emit_vlmax_insn (icode, BINARY_OP, shift_ops);
+
+ /* Step 4: Multiply to step size n. */
+ HOST_WIDE_INT step_size =
+ INTVAL (builder->elt (builder->npatterns ()))
+ - INTVAL (builder->elt (0));
+ rtx tmp3 = gen_reg_rtx (builder->mode ());
+ if (pow2p_hwi (step_size))
{
- /* We handle the case that we can find a vector container to hold
- element bitsize = NPATTERNS * ele_bitsize.
-
- NPATTERNS = 8, element width = 8
- v = { 0, 1, 2, 3, 4, 5, 6, 7, ... }
- In this case, we can combine NPATTERNS element into a larger
- element. Use element width = 64 and broadcast a vector with
- all element equal to 0x0706050403020100. */
- rtx ele = builder.get_merged_repeating_sequence ();
- rtx dup;
- if (lra_in_progress)
- {
- dup = gen_reg_rtx (builder.new_mode ());
- rtx ops[] = {dup, ele};
- emit_vlmax_insn (code_for_pred_broadcast
- (builder.new_mode ()), UNARY_OP, ops);
- }
- else
- dup = expand_vector_broadcast (builder.new_mode (), ele);
- emit_move_insn (result, gen_lowpart (mode, dup));
+ /* Power of 2 can be handled with a left shift. */
+ HOST_WIDE_INT shift = exact_log2 (step_size);
+ rtx shift_amount = gen_int_mode (shift, Pmode);
+ insn_code icode = code_for_pred_scalar (ASHIFT, mode);
+ rtx ops[] = {tmp3, tmp2, shift_amount};
+ emit_vlmax_insn (icode, BINARY_OP, ops);
}
else
{
- /* We handle the case that we can't find a vector container to hold
- element bitsize = NPATTERNS * ele_bitsize.
-
- NPATTERNS = 8, element width = 16
- v = { 0, 1, 2, 3, 4, 5, 6, 7, ... }
- Since NPATTERNS * element width = 128, we can't find a container
- to hold it.
-
- In this case, we use NPATTERNS merge operations to generate such
- vector. */
- unsigned int nbits = npatterns - 1;
-
- /* Generate vid = { 0, 1, 2, 3, 4, 5, 6, 7, ... }. */
- rtx vid = gen_reg_rtx (builder.int_mode ());
- rtx op[] = {vid};
- emit_vlmax_insn (code_for_pred_series (builder.int_mode ()),
- NULLARY_OP, op);
-
- /* Generate vid_repeat = { 0, 1, ... nbits, ... } */
- rtx vid_repeat = gen_reg_rtx (builder.int_mode ());
- rtx and_ops[] = {vid_repeat, vid,
- gen_int_mode (nbits, builder.inner_int_mode ())};
- emit_vlmax_insn (code_for_pred_scalar (AND, builder.int_mode ()),
- BINARY_OP, and_ops);
-
- rtx tmp1 = gen_reg_rtx (builder.mode ());
- rtx dup_ops[] = {tmp1, builder.elt (0)};
- emit_vlmax_insn (code_for_pred_broadcast (builder.mode ()), UNARY_OP,
- dup_ops);
- for (unsigned int i = 1; i < builder.npatterns (); i++)
- {
- /* Generate mask according to i. */
- rtx mask = gen_reg_rtx (builder.mask_mode ());
- rtx const_vec = gen_const_vector_dup (builder.int_mode (), i);
- expand_vec_cmp (mask, EQ, vid_repeat, const_vec);
-
- /* Merge scalar to each i. */
- rtx tmp2 = gen_reg_rtx (builder.mode ());
- rtx merge_ops[] = {tmp2, tmp1, builder.elt (i), mask};
- insn_code icode = code_for_pred_merge_scalar (builder.mode ());
- emit_vlmax_insn (icode, MERGE_OP, merge_ops);
- tmp1 = tmp2;
- }
- emit_move_insn (result, tmp1);
+ rtx mult_amt = gen_int_mode (step_size, builder->inner_mode ());
+ insn_code icode = code_for_pred_scalar (MULT, builder->mode ());
+ rtx ops[] = {tmp3, tmp2, mult_amt};
+ emit_vlmax_insn (icode, BINARY_OP, ops);
+ }
+
+ /* Step 5: Add starting value to all elements. */
+ HOST_WIDE_INT init_val = INTVAL (builder->elt (0));
+ if (init_val == 0)
+ emit_move_insn (result, tmp3);
+ else
+ {
+ rtx dup = gen_const_vector_dup (builder->mode (), init_val);
+ rtx add_ops[] = {result, tmp3, dup};
+ icode = code_for_pred (PLUS, builder->mode ());
+ emit_vlmax_insn (icode, BINARY_OP, add_ops);
}
}
- else if (CONST_VECTOR_STEPPED_P (src))
+ else
{
- gcc_assert (GET_MODE_CLASS (mode) == MODE_VECTOR_INT);
- if (builder.single_step_npatterns_p ())
+ /* Generate the variable-length vector following this rule:
+ { a, b, a + step, b + step, a + step*2, b + step*2, ... } */
+ if (builder->npatterns_vid_diff_repeated_p ())
{
- /* Describe the case by choosing NPATTERNS = 4 as an example. */
- insn_code icode;
-
- /* Step 1: Generate vid = { 0, 1, 2, 3, 4, 5, 6, 7, ... }. */
- rtx vid = gen_reg_rtx (builder.mode ());
- rtx vid_ops[] = {vid};
- icode = code_for_pred_series (builder.mode ());
- emit_vlmax_insn (icode, NULLARY_OP, vid_ops);
-
- if (builder.npatterns_all_equal_p ())
+ /* Case 1: For example as below:
+ {3, 2, 1, 0, 7, 6, 5, 4, 11, 10, 9, 8... }
+ We have 3 - 0 = 3 equals 7 - 4 = 3, the sequence is
+ repeated as below after minus vid.
+ {3, 1, -1, -3, 3, 1, -1, -3...}
+ Then we can simplify the diff code gen to at most
+ npatterns(). */
+ rvv_builder v (builder->mode (), builder->npatterns (), 1);
+
+ /* Step 1: Generate diff = TARGET - VID. */
+ for (unsigned int i = 0; i < v.npatterns (); ++i)
{
- /* Generate the variable-length vector following this rule:
- { a, a, a + step, a + step, a + step * 2, a + step * 2, ...}
- E.g. { 0, 0, 8, 8, 16, 16, ... } */
-
- /* We want to create a pattern where value[idx] = floor (idx /
- NPATTERNS). As NPATTERNS is always a power of two we can
- rewrite this as = idx & -NPATTERNS. */
- /* Step 2: VID AND -NPATTERNS:
- { 0&-4, 1&-4, 2&-4, 3 &-4, 4 &-4, 5 &-4, 6 &-4, 7 &-4, ... }
- */
- rtx imm
- = gen_int_mode (-builder.npatterns (), builder.inner_mode ());
- rtx tmp1 = gen_reg_rtx (builder.mode ());
- rtx and_ops[] = {tmp1, vid, imm};
- icode = code_for_pred_scalar (AND, builder.mode ());
- emit_vlmax_insn (icode, BINARY_OP, and_ops);
-
- /* Step 3: Convert to step size 1. */
- rtx tmp2 = gen_reg_rtx (builder.mode ());
- /* log2 (npatterns) to get the shift amount to convert
- Eg. { 0, 0, 0, 0, 4, 4, ... }
- into { 0, 0, 0, 0, 1, 1, ... }. */
- HOST_WIDE_INT shift_amt = exact_log2 (builder.npatterns ()) ;
- rtx shift = gen_int_mode (shift_amt, builder.inner_mode ());
- rtx shift_ops[] = {tmp2, tmp1, shift};
- icode = code_for_pred_scalar (ASHIFTRT, builder.mode ());
- emit_vlmax_insn (icode, BINARY_OP, shift_ops);
-
- /* Step 4: Multiply to step size n. */
- HOST_WIDE_INT step_size =
- INTVAL (builder.elt (builder.npatterns ()))
- - INTVAL (builder.elt (0));
- rtx tmp3 = gen_reg_rtx (builder.mode ());
- if (pow2p_hwi (step_size))
- {
- /* Power of 2 can be handled with a left shift. */
- HOST_WIDE_INT shift = exact_log2 (step_size);
- rtx shift_amount = gen_int_mode (shift, Pmode);
- insn_code icode = code_for_pred_scalar (ASHIFT, mode);
- rtx ops[] = {tmp3, tmp2, shift_amount};
- emit_vlmax_insn (icode, BINARY_OP, ops);
- }
- else
- {
- rtx mult_amt = gen_int_mode (step_size, builder.inner_mode ());
- insn_code icode = code_for_pred_scalar (MULT, builder.mode ());
- rtx ops[] = {tmp3, tmp2, mult_amt};
- emit_vlmax_insn (icode, BINARY_OP, ops);
- }
-
- /* Step 5: Add starting value to all elements. */
- HOST_WIDE_INT init_val = INTVAL (builder.elt (0));
- if (init_val == 0)
- emit_move_insn (result, tmp3);
- else
- {
- rtx dup = gen_const_vector_dup (builder.mode (), init_val);
- rtx add_ops[] = {result, tmp3, dup};
- icode = code_for_pred (PLUS, builder.mode ());
- emit_vlmax_insn (icode, BINARY_OP, add_ops);
- }
+ poly_int64 diff = rtx_to_poly_int64 (builder->elt (i)) - i;
+ v.quick_push (gen_int_mode (diff, v.inner_mode ()));
}
- else
- {
- /* Generate the variable-length vector following this rule:
- { a, b, a + step, b + step, a + step*2, b + step*2, ... } */
- if (builder.npatterns_vid_diff_repeated_p ())
- {
- /* Case 1: For example as below:
- {3, 2, 1, 0, 7, 6, 5, 4, 11, 10, 9, 8... }
- We have 3 - 0 = 3 equals 7 - 4 = 3, the sequence is
- repeated as below after minus vid.
- {3, 1, -1, -3, 3, 1, -1, -3...}
- Then we can simplify the diff code gen to at most
- npatterns(). */
- rvv_builder v (builder.mode (), builder.npatterns (), 1);
-
- /* Step 1: Generate diff = TARGET - VID. */
- for (unsigned int i = 0; i < v.npatterns (); ++i)
- {
- poly_int64 diff = rtx_to_poly_int64 (builder.elt (i)) - i;
- v.quick_push (gen_int_mode (diff, v.inner_mode ()));
- }
-
- /* Step 2: Generate result = VID + diff. */
- rtx vec = v.build ();
- rtx add_ops[] = {result, vid, vec};
- emit_vlmax_insn (code_for_pred (PLUS, builder.mode ()),
- BINARY_OP, add_ops);
- }
- else
- {
- /* Case 2: For example as below:
- { -4, 4, -4 + 1, 4 + 1, -4 + 2, 4 + 2, -4 + 3, 4 + 3, ... }
- */
- rvv_builder v (builder.mode (), builder.npatterns (), 1);
-
- /* Step 1: Generate { a, b, a, b, ... } */
- for (unsigned int i = 0; i < v.npatterns (); ++i)
- v.quick_push (builder.elt (i));
- rtx new_base = v.build ();
-
- /* Step 2: Generate tmp1 = VID >> LOG2 (NPATTERNS).  */
- rtx shift_count
- = gen_int_mode (exact_log2 (builder.npatterns ()),
- builder.inner_mode ());
- rtx tmp1 = gen_reg_rtx (builder.mode ());
- rtx shift_ops[] = {tmp1, vid, shift_count};
- emit_vlmax_insn (code_for_pred_scalar
- (LSHIFTRT, builder.mode ()), BINARY_OP,
- shift_ops);
-
- /* Step 3: Generate tmp2 = tmp1 * step.  */
- rtx tmp2 = gen_reg_rtx (builder.mode ());
- rtx step
- = simplify_binary_operation (MINUS, builder.inner_mode (),
- builder.elt (v.npatterns()),
- builder.elt (0));
- expand_vec_series (tmp2, const0_rtx, step, tmp1);
-
- /* Step 4: Generate result = tmp2 + new_base.  */
- rtx add_ops[] = {result, tmp2, new_base};
- emit_vlmax_insn (code_for_pred (PLUS, builder.mode ()),
- BINARY_OP, add_ops);
- }
- }
+ /* Step 2: Generate result = VID + diff. */
+ rtx vec = v.build ();
+ rtx add_ops[] = {result, vid, vec};
+ emit_vlmax_insn (code_for_pred (PLUS, builder->mode ()), BINARY_OP,
+ add_ops);
}
- else if (builder.interleaved_stepped_npatterns_p ())
+ else
{
- rtx base1 = builder.elt (0);
- rtx base2 = builder.elt (1);
- poly_int64 step1
- = rtx_to_poly_int64 (builder.elt (builder.npatterns ()))
- - rtx_to_poly_int64 (base1);
- poly_int64 step2
- = rtx_to_poly_int64 (builder.elt (builder.npatterns () + 1))
- - rtx_to_poly_int64 (base2);
+ /* Case 2: For example as below:
+ { -4, 4, -4 + 1, 4 + 1, -4 + 2, 4 + 2, -4 + 3, 4 + 3, ... }
+ */
+ rvv_builder v (builder->mode (), builder->npatterns (), 1);
+
+ /* Step 1: Generate { a, b, a, b, ... } */
+ for (unsigned int i = 0; i < v.npatterns (); ++i)
+ v.quick_push (builder->elt (i));
+ rtx new_base = v.build ();
+
+ /* Step 2: Generate tmp1 = VID >> LOG2 (NPATTERNS).  */
+ rtx shift_count = gen_int_mode (exact_log2 (builder->npatterns ()),
+ builder->inner_mode ());
+ rtx tmp1 = gen_reg_rtx (builder->mode ());
+ rtx shift_ops[] = {tmp1, vid, shift_count};
+ emit_vlmax_insn (code_for_pred_scalar (LSHIFTRT, builder->mode ()),
+ BINARY_OP, shift_ops);
+
+ /* Step 3: Generate tmp2 = tmp1 * step.  */
+ rtx tmp2 = gen_reg_rtx (builder->mode ());
+ rtx step
+ = simplify_binary_operation (MINUS, builder->inner_mode (),
+ builder->elt (v.npatterns()),
+ builder->elt (0));
+ expand_vec_series (tmp2, const0_rtx, step, tmp1);
+
+ /* Step 4: Generate result = tmp2 + new_base.  */
+ rtx add_ops[] = {result, tmp2, new_base};
+ emit_vlmax_insn (code_for_pred (PLUS, builder->mode ()), BINARY_OP,
+ add_ops);
+ }
+ }
- /* For { 1, 0, 2, 0, ... , n - 1, 0 }, we can use larger EEW
- integer vector mode to generate such vector efficiently.
+ if (result != target)
+ emit_move_insn (target, result);
+}
- E.g. EEW = 16, { 2, 0, 4, 0, ... }
+static void
+expand_const_vector_interleaved_stepped_npatterns (rtx target, rtx src,
+ rvv_builder *builder)
+{
+ machine_mode mode = GET_MODE (target);
+ rtx result = register_operand (target, mode) ? target : gen_reg_rtx (mode);
+ rtx base1 = builder->elt (0);
+ rtx base2 = builder->elt (1);
- can be interpreted into:
+ poly_int64 step1 = rtx_to_poly_int64 (builder->elt (builder->npatterns ()))
+ - rtx_to_poly_int64 (base1);
+ poly_int64 step2 =
+ rtx_to_poly_int64 (builder->elt (builder->npatterns () + 1))
+ - rtx_to_poly_int64 (base2);
- EEW = 32, { 2, 4, ... }.
+ /* For { 1, 0, 2, 0, ... , n - 1, 0 }, we can use larger EEW
+ integer vector mode to generate such vector efficiently.
- Both the series1 and series2 may overflow before taking the IOR
- to generate the final result. However, only series1 matters
- because the series2 will shift before IOR, thus the overflow
- bits will never pollute the final result.
+ E.g. EEW = 16, { 2, 0, 4, 0, ... }
- For now we forbid the negative steps and overflow, and they
- will fall back to the default merge way to generate the
- const_vector. */
+ can be interpreted into:
- unsigned int new_smode_bitsize = builder.inner_bits_size () * 2;
- scalar_int_mode new_smode;
- machine_mode new_mode;
- poly_uint64 new_nunits
- = exact_div (GET_MODE_NUNITS (builder.mode ()), 2);
+ EEW = 32, { 2, 4, ... }.
- poly_int64 base1_poly = rtx_to_poly_int64 (base1);
- bool overflow_smode_p = false;
+ Both the series1 and series2 may overflow before taking the IOR
+ to generate the final result. However, only series1 matters
+ because the series2 will shift before IOR, thus the overflow
+ bits will never pollute the final result.
- if (!step1.is_constant ())
- overflow_smode_p = true;
- else
- {
- int elem_count = XVECLEN (src, 0);
- uint64_t step1_val = step1.to_constant ();
- uint64_t base1_val = base1_poly.to_constant ();
- uint64_t elem_val = base1_val + (elem_count - 1) * step1_val;
+ For now we forbid the negative steps and overflow, and they
+ will fall back to the default merge way to generate the
+ const_vector. */
- if ((elem_val >> builder.inner_bits_size ()) != 0)
- overflow_smode_p = true;
- }
+ unsigned int new_smode_bitsize = builder->inner_bits_size () * 2;
+ scalar_int_mode new_smode;
+ machine_mode new_mode;
+ poly_uint64 new_nunits = exact_div (GET_MODE_NUNITS (builder->mode ()), 2);
+
+ poly_int64 base1_poly = rtx_to_poly_int64 (base1);
+ bool overflow_smode_p = false;
+
+ if (!step1.is_constant ())
+ overflow_smode_p = true;
+ else
+ {
+ int elem_count = XVECLEN (src, 0);
+ uint64_t step1_val = step1.to_constant ();
+ uint64_t base1_val = base1_poly.to_constant ();
+ uint64_t elem_val = base1_val + (elem_count - 1) * step1_val;
- if (known_ge (step1, 0) && known_ge (step2, 0)
- && int_mode_for_size (new_smode_bitsize, 0).exists (&new_smode)
- && get_vector_mode (new_smode, new_nunits).exists (&new_mode)
- && !overflow_smode_p)
+ if ((elem_val >> builder->inner_bits_size ()) != 0)
+ overflow_smode_p = true;
+ }
+
+ if (known_ge (step1, 0) && known_ge (step2, 0)
+ && int_mode_for_size (new_smode_bitsize, 0).exists (&new_smode)
+ && get_vector_mode (new_smode, new_nunits).exists (&new_mode)
+ && !overflow_smode_p)
+ {
+ rtx tmp1 = gen_reg_rtx (new_mode);
+ base1 = gen_int_mode (base1_poly, new_smode);
+ expand_vec_series (tmp1, base1, gen_int_mode (step1, new_smode));
+
+ if (rtx_equal_p (base2, const0_rtx) && known_eq (step2, 0))
+ /* { 1, 0, 2, 0, ... }. */
+ emit_move_insn (result, gen_lowpart (mode, tmp1));
+ else if (known_eq (step2, 0))
+ {
+ /* { 1, 1, 2, 1, ... }. */
+ rtx scalar = expand_simple_binop (
+ Xmode, ASHIFT, gen_int_mode (rtx_to_poly_int64 (base2), Xmode),
+ gen_int_mode (builder->inner_bits_size (), Xmode), NULL_RTX, false,
+ OPTAB_DIRECT);
+ scalar = simplify_gen_subreg (new_smode, scalar, Xmode, 0);
+ rtx tmp2 = gen_reg_rtx (new_mode);
+ rtx ior_ops[] = {tmp2, tmp1, scalar};
+ emit_vlmax_insn (code_for_pred_scalar (IOR, new_mode), BINARY_OP,
+ ior_ops);
+ emit_move_insn (result, gen_lowpart (mode, tmp2));
+ }
+ else
+ {
+ /* { 1, 3, 2, 6, ... }. */
+ rtx tmp2 = gen_reg_rtx (new_mode);
+ base2 = gen_int_mode (rtx_to_poly_int64 (base2), new_smode);
+ expand_vec_series (tmp2, base2, gen_int_mode (step2, new_smode));
+ rtx shifted_tmp2;
+ rtx shift = gen_int_mode (builder->inner_bits_size (), Xmode);
+ if (lra_in_progress)
{
- rtx tmp1 = gen_reg_rtx (new_mode);
- base1 = gen_int_mode (base1_poly, new_smode);
- expand_vec_series (tmp1, base1, gen_int_mode (step1, new_smode));
-
- if (rtx_equal_p (base2, const0_rtx) && known_eq (step2, 0))
- /* { 1, 0, 2, 0, ... }. */
- emit_move_insn (result, gen_lowpart (mode, tmp1));
- else if (known_eq (step2, 0))
- {
- /* { 1, 1, 2, 1, ... }. */
- rtx scalar = expand_simple_binop (
- Xmode, ASHIFT,
- gen_int_mode (rtx_to_poly_int64 (base2), Xmode),
- gen_int_mode (builder.inner_bits_size (), Xmode),
- NULL_RTX, false, OPTAB_DIRECT);
- scalar = simplify_gen_subreg (new_smode, scalar, Xmode, 0);
- rtx tmp2 = gen_reg_rtx (new_mode);
- rtx ior_ops[] = {tmp2, tmp1, scalar};
- emit_vlmax_insn (code_for_pred_scalar (IOR, new_mode),
- BINARY_OP, ior_ops);
- emit_move_insn (result, gen_lowpart (mode, tmp2));
- }
- else
- {
- /* { 1, 3, 2, 6, ... }. */
- rtx tmp2 = gen_reg_rtx (new_mode);
- base2 = gen_int_mode (rtx_to_poly_int64 (base2), new_smode);
- expand_vec_series (tmp2, base2,
- gen_int_mode (step2, new_smode));
- rtx shifted_tmp2;
- rtx shift = gen_int_mode (builder.inner_bits_size (), Xmode);
- if (lra_in_progress)
- {
- shifted_tmp2 = gen_reg_rtx (new_mode);
- rtx shift_ops[] = {shifted_tmp2, tmp2, shift};
- emit_vlmax_insn (code_for_pred_scalar
- (ASHIFT, new_mode), BINARY_OP,
- shift_ops);
- }
- else
- shifted_tmp2 = expand_simple_binop (new_mode, ASHIFT, tmp2,
- shift, NULL_RTX, false,
- OPTAB_DIRECT);
- rtx tmp3 = gen_reg_rtx (new_mode);
- rtx ior_ops[] = {tmp3, tmp1, shifted_tmp2};
- emit_vlmax_insn (code_for_pred (IOR, new_mode), BINARY_OP,
- ior_ops);
- emit_move_insn (result, gen_lowpart (mode, tmp3));
- }
+ shifted_tmp2 = gen_reg_rtx (new_mode);
+ rtx shift_ops[] = {shifted_tmp2, tmp2, shift};
+ emit_vlmax_insn (code_for_pred_scalar (ASHIFT, new_mode),
+ BINARY_OP, shift_ops);
}
else
- {
- rtx vid = gen_reg_rtx (mode);
- expand_vec_series (vid, const0_rtx, const1_rtx);
- /* Transform into { 0, 0, 1, 1, 2, 2, ... }. */
- rtx shifted_vid;
- if (lra_in_progress)
- {
- shifted_vid = gen_reg_rtx (mode);
- rtx shift = gen_int_mode (1, Xmode);
- rtx shift_ops[] = {shifted_vid, vid, shift};
- emit_vlmax_insn (code_for_pred_scalar
- (ASHIFT, mode), BINARY_OP,
- shift_ops);
- }
- else
- shifted_vid = expand_simple_binop (mode, LSHIFTRT, vid,
- const1_rtx, NULL_RTX,
- false, OPTAB_DIRECT);
- rtx tmp1 = gen_reg_rtx (mode);
- rtx tmp2 = gen_reg_rtx (mode);
- expand_vec_series (tmp1, base1,
- gen_int_mode (step1, builder.inner_mode ()),
- shifted_vid);
- expand_vec_series (tmp2, base2,
- gen_int_mode (step2, builder.inner_mode ()),
- shifted_vid);
-
- /* Transform into { 0, 1, 0, 1, 0, 1, ... }. */
- rtx and_vid = gen_reg_rtx (mode);
- rtx and_ops[] = {and_vid, vid, const1_rtx};
- emit_vlmax_insn (code_for_pred_scalar (AND, mode), BINARY_OP,
- and_ops);
- rtx mask = gen_reg_rtx (builder.mask_mode ());
- expand_vec_cmp (mask, EQ, and_vid, CONST1_RTX (mode));
-
- rtx ops[] = {result, tmp1, tmp2, mask};
- emit_vlmax_insn (code_for_pred_merge (mode), MERGE_OP, ops);
- }
+ shifted_tmp2 = expand_simple_binop (new_mode, ASHIFT, tmp2, shift,
+ NULL_RTX, false, OPTAB_DIRECT);
+ rtx tmp3 = gen_reg_rtx (new_mode);
+ rtx ior_ops[] = {tmp3, tmp1, shifted_tmp2};
+ emit_vlmax_insn (code_for_pred (IOR, new_mode), BINARY_OP, ior_ops);
+ emit_move_insn (result, gen_lowpart (mode, tmp3));
}
- else
- /* TODO: We will enable more variable-length vector in the future. */
- gcc_unreachable ();
}
else
- gcc_unreachable ();
+ {
+ rtx vid = gen_reg_rtx (mode);
+ expand_vec_series (vid, const0_rtx, const1_rtx);
+ /* Transform into { 0, 0, 1, 1, 2, 2, ... }. */
+ rtx shifted_vid;
+ if (lra_in_progress)
+ {
+ shifted_vid = gen_reg_rtx (mode);
+ rtx shift = gen_int_mode (1, Xmode);
+ rtx shift_ops[] = {shifted_vid, vid, shift};
+ emit_vlmax_insn (code_for_pred_scalar (ASHIFT, mode), BINARY_OP,
+ shift_ops);
+ }
+ else
+ shifted_vid = expand_simple_binop (mode, LSHIFTRT, vid, const1_rtx,
+ NULL_RTX, false, OPTAB_DIRECT);
+ rtx tmp1 = gen_reg_rtx (mode);
+ rtx tmp2 = gen_reg_rtx (mode);
+ expand_vec_series (tmp1, base1,
+ gen_int_mode (step1, builder->inner_mode ()),
+ shifted_vid);
+ expand_vec_series (tmp2, base2,
+ gen_int_mode (step2, builder->inner_mode ()),
+ shifted_vid);
+
+ /* Transform into { 0, 1, 0, 1, 0, 1, ... }. */
+ rtx and_vid = gen_reg_rtx (mode);
+ rtx and_ops[] = {and_vid, vid, const1_rtx};
+ emit_vlmax_insn (code_for_pred_scalar (AND, mode), BINARY_OP, and_ops);
+ rtx mask = gen_reg_rtx (builder->mask_mode ());
+ expand_vec_cmp (mask, EQ, and_vid, CONST1_RTX (mode));
+
+ rtx ops[] = {result, tmp1, tmp2, mask};
+ emit_vlmax_insn (code_for_pred_merge (mode), MERGE_OP, ops);
+ }
if (result != target)
emit_move_insn (target, result);
}
+static void
+expand_const_vector_stepped (rtx target, rtx src, rvv_builder *builder)
+{
+ gcc_assert (GET_MODE_CLASS (GET_MODE (target)) == MODE_VECTOR_INT);
+
+ if (builder->single_step_npatterns_p ())
+ return expand_const_vector_single_step_npatterns (target, builder);
+ else if (builder->interleaved_stepped_npatterns_p ())
+ return expand_const_vector_interleaved_stepped_npatterns (target, src,
+ builder);
+
+ /* TODO: We will enable more variable-length vector in the future. */
+ gcc_unreachable ();
+}
+
+static void
+expand_const_vector (rtx target, rtx src)
+{
+ rtx elt;
+ if (const_vec_duplicate_p (src, &elt))
+ return expand_const_vec_duplicate (target, src, elt);
+
+ /* Support scalable const series vector. */
+ rtx base, step;
+ if (const_vec_series_p (src, &base, &step))
+ return expand_const_vec_series(target, base, step);
+
+ /* Handle variable-length vector. */
+ unsigned int nelts_per_pattern = CONST_VECTOR_NELTS_PER_PATTERN (src);
+ unsigned int npatterns = CONST_VECTOR_NPATTERNS (src);
+ rvv_builder builder (GET_MODE (target), npatterns, nelts_per_pattern);
+
+ for (unsigned int i = 0; i < nelts_per_pattern; i++)
+ {
+ for (unsigned int j = 0; j < npatterns; j++)
+ builder.quick_push (CONST_VECTOR_ELT (src, i * npatterns + j));
+ }
+
+ builder.finalize ();
+
+ if (CONST_VECTOR_DUPLICATE_P (src))
+ return expand_const_vector_duplicate (target, &builder);
+ else if (CONST_VECTOR_STEPPED_P (src))
+ return expand_const_vector_stepped (target, src, &builder);
+
+ gcc_unreachable ();
+}
+
/* Get the frm mode with given CONST_INT rtx, the default mode is
FRM_DYN. */
enum floating_point_rounding_mode
diff --git a/gcc/cp/call.cc b/gcc/cp/call.cc
index 6caac89..2c3ef3d 100644
--- a/gcc/cp/call.cc
+++ b/gcc/cp/call.cc
@@ -9707,11 +9707,6 @@ type_passed_as (tree type)
/* Pass classes with copy ctors by invisible reference. */
if (TREE_ADDRESSABLE (type))
type = build_reference_type (type);
- else if (targetm.calls.promote_prototypes (NULL_TREE)
- && INTEGRAL_TYPE_P (type)
- && COMPLETE_TYPE_P (type)
- && tree_int_cst_lt (TYPE_SIZE (type), TYPE_SIZE (integer_type_node)))
- type = integer_type_node;
return type;
}
@@ -9747,11 +9742,6 @@ convert_for_arg_passing (tree type, tree val, tsubst_flags_t complain)
/* Pass classes with copy ctors by invisible reference. */
else if (TREE_ADDRESSABLE (type))
val = build1 (ADDR_EXPR, build_reference_type (type), val);
- else if (targetm.calls.promote_prototypes (NULL_TREE)
- && INTEGRAL_TYPE_P (type)
- && COMPLETE_TYPE_P (type)
- && tree_int_cst_lt (TYPE_SIZE (type), TYPE_SIZE (integer_type_node)))
- val = cp_perform_integral_promotions (val, complain);
if (complain & tf_warning)
maybe_warn_parm_abi (type, cp_expr_loc_or_input_loc (val));
diff --git a/gcc/cp/typeck.cc b/gcc/cp/typeck.cc
index 88f8f34..1b9fdf5 100644
--- a/gcc/cp/typeck.cc
+++ b/gcc/cp/typeck.cc
@@ -1372,17 +1372,12 @@ cxx_safe_arg_type_equiv_p (tree t1, tree t2)
&& TYPE_PTR_P (t2))
return true;
- /* The signedness of the parameter matters only when an integral
- type smaller than int is promoted to int, otherwise only the
- precision of the parameter matters.
- This check should make sure that the callee does not see
- undefined values in argument registers. */
+ /* Only the precision of the parameter matters. This check should
+ make sure that the callee does not see undefined values in argument
+ registers. */
if (INTEGRAL_TYPE_P (t1)
&& INTEGRAL_TYPE_P (t2)
- && TYPE_PRECISION (t1) == TYPE_PRECISION (t2)
- && (TYPE_UNSIGNED (t1) == TYPE_UNSIGNED (t2)
- || !targetm.calls.promote_prototypes (NULL_TREE)
- || TYPE_PRECISION (t1) >= TYPE_PRECISION (integer_type_node)))
+ && TYPE_PRECISION (t1) == TYPE_PRECISION (t2))
return true;
return same_type_p (t1, t2);
diff --git a/gcc/gimple.cc b/gcc/gimple.cc
index 9acfa38..77b2e50 100644
--- a/gcc/gimple.cc
+++ b/gcc/gimple.cc
@@ -2916,15 +2916,7 @@ gimple_builtin_call_types_compatible_p (const gimple *stmt, tree fndecl)
return true;
tree arg = gimple_call_arg (stmt, i);
tree type = TREE_VALUE (targs);
- if (!useless_type_conversion_p (type, TREE_TYPE (arg))
- /* char/short integral arguments are promoted to int
- by several frontends if targetm.calls.promote_prototypes
- is true. Allow such promotion too. */
- && !(INTEGRAL_TYPE_P (type)
- && TYPE_PRECISION (type) < TYPE_PRECISION (integer_type_node)
- && targetm.calls.promote_prototypes (TREE_TYPE (fndecl))
- && useless_type_conversion_p (integer_type_node,
- TREE_TYPE (arg))))
+ if (!useless_type_conversion_p (type, TREE_TYPE (arg)))
return false;
targs = TREE_CHAIN (targs);
}
diff --git a/gcc/testsuite/gcc.dg/tree-ssa/scev-cast.c b/gcc/testsuite/gcc.dg/tree-ssa/scev-cast.c
index c569523..469e493 100644
--- a/gcc/testsuite/gcc.dg/tree-ssa/scev-cast.c
+++ b/gcc/testsuite/gcc.dg/tree-ssa/scev-cast.c
@@ -1,6 +1,5 @@
/* A test for various conversions of chrecs. */
-/* { dg-do compile { target i?86-*-* x86_64-*-* } } */
/* { dg-options "-O2 -fdump-tree-optimized" } */
void blas (signed char xxx);
@@ -22,6 +21,6 @@ void tst(void)
blau ((unsigned char) i);
}
-/* { dg-final { scan-tree-dump-times "& 255" 1 "optimized" } } */
-/* { dg-final { scan-tree-dump-times "= \\(signed char\\)" 1 "optimized" } } */
+/* { dg-final { scan-tree-dump-times "= \\(unsigned char\\)" 2 "optimized" } } */
+/* { dg-final { scan-tree-dump-times "= \\(signed char\\)" 3 "optimized" } } */
diff --git a/gcc/testsuite/gcc.dg/tree-ssa/ssa-fre-4.c b/gcc/testsuite/gcc.dg/tree-ssa/ssa-fre-4.c
index 5a7588f..246fea3 100644
--- a/gcc/testsuite/gcc.dg/tree-ssa/ssa-fre-4.c
+++ b/gcc/testsuite/gcc.dg/tree-ssa/ssa-fre-4.c
@@ -1,6 +1,4 @@
-/* If the target returns false for TARGET_PROMOTE_PROTOTYPES, then there
- will be no casts for FRE to eliminate and the test will fail. */
-/* { dg-do compile { target i?86-*-* x86_64-*-* hppa*-*-* m68k*-*-* } } */
+/* { dg-do compile } */
/* { dg-options "-O -fno-tree-ccp -fno-tree-forwprop -fdump-tree-fre1-details" } */
/* From PR21608. */
@@ -11,4 +9,4 @@ char bar(char f)
return wrap(f);
}
-/* { dg-final { scan-tree-dump "Replaced \\\(char\\\) .*with " "fre1" } } */
+/* { dg-final { scan-tree-dump-not " = \\\(\[^)\]*\\\)" "fre1" } } */
diff --git a/gcc/testsuite/gcc.dg/vect/vect-simd-clone-16c.c b/gcc/testsuite/gcc.dg/vect/vect-simd-clone-16c.c
index 4fdf25d..628d457 100644
--- a/gcc/testsuite/gcc.dg/vect/vect-simd-clone-16c.c
+++ b/gcc/testsuite/gcc.dg/vect/vect-simd-clone-16c.c
@@ -7,11 +7,8 @@
/* Ensure the the in-branch simd clones are used on targets that support them.
Some targets use another call for the epilogue loops. */
-/* { dg-final { scan-tree-dump-times {[\n\r] [^\n]* = foo\.simdclone} 2 "vect" { target { ! { x86_64-*-* || { i?86-*-* || aarch64*-*-* } } } } } } */
+/* { dg-final { scan-tree-dump-times {[\n\r] [^\n]* = foo\.simdclone} 2 "vect" { target { !aarch64*-*-* } } } } */
/* { dg-final { scan-tree-dump-times {[\n\r] [^\n]* = foo\.simdclone} 3 "vect" { target { aarch64*-*-* } } } } */
-/* x86_64 fails to use in-branch clones for TYPE=short. */
-/* { dg-final { scan-tree-dump-times {[\n\r] [^\n]* = foo\.simdclone} 0 "vect" { target x86_64-*-* i?86-*-* } } } */
-
/* The LTO test produces two dump files and we scan the wrong one. */
/* { dg-skip-if "" { *-*-* } { "-flto" } { "" } } */
diff --git a/gcc/testsuite/gcc.dg/vect/vect-simd-clone-16d.c b/gcc/testsuite/gcc.dg/vect/vect-simd-clone-16d.c
index 55d3c0a..d1f85b0 100644
--- a/gcc/testsuite/gcc.dg/vect/vect-simd-clone-16d.c
+++ b/gcc/testsuite/gcc.dg/vect/vect-simd-clone-16d.c
@@ -7,11 +7,9 @@
/* Ensure the the in-branch simd clones are used on targets that support them.
Some targets use another call for the epilogue loops. */
-/* { dg-final { scan-tree-dump-times {[\n\r] [^\n]* = foo\.simdclone} 2 "vect" { target { ! { x86_64-*-* || { i?86-*-* || aarch64*-*-* } } } } } } */
+/* { dg-final { scan-tree-dump-times {[\n\r] [^\n]* = foo\.simdclone} 2 "vect" { target { !aarch64*-*-* } } } } */
/* { dg-final { scan-tree-dump-times {[\n\r] [^\n]* = foo\.simdclone} 3 "vect" { target { aarch64*-*-* } } } } */
-/* x86_64 fails to use in-branch clones for TYPE=char. */
-/* { dg-final { scan-tree-dump-times {[\n\r] [^\n]* = foo\.simdclone} 0 "vect" { target x86_64-*-* i?86-*-* } } } */
/* The LTO test produces two dump files and we scan the wrong one. */
/* { dg-skip-if "" { *-*-* } { "-flto" } { "" } } */
diff --git a/gcc/testsuite/gcc.dg/vect/vect-simd-clone-17c.c b/gcc/testsuite/gcc.dg/vect/vect-simd-clone-17c.c
index 6afa2fd..6148abe 100644
--- a/gcc/testsuite/gcc.dg/vect/vect-simd-clone-17c.c
+++ b/gcc/testsuite/gcc.dg/vect/vect-simd-clone-17c.c
@@ -7,11 +7,8 @@
/* Ensure the the in-branch simd clones are used on targets that support them.
Some targets use another call for the epilogue loops. */
-/* { dg-final { scan-tree-dump-times {[\n\r] [^\n]* = foo\.simdclone} 2 "vect" { target { ! { x86_64-*-* || { i?86-*-* || aarch64*-*-* } } } } } } */
+/* { dg-final { scan-tree-dump-times {[\n\r] [^\n]* = foo\.simdclone} 2 "vect" { target { !aarch64*-*-* } } } } */
/* { dg-final { scan-tree-dump-times {[\n\r] [^\n]* = foo\.simdclone} 3 "vect" { target { aarch64*-*-* } } } } */
-/* x86_64 fails to use in-branch clones for TYPE=short. */
-/* { dg-final { scan-tree-dump-times {[\n\r] [^\n]* = foo\.simdclone} 0 "vect" { target x86_64-*-* i?86-*-* } } } */
-
/* The LTO test produces two dump files and we scan the wrong one. */
/* { dg-skip-if "" { *-*-* } { "-flto" } { "" } } */
diff --git a/gcc/testsuite/gcc.dg/vect/vect-simd-clone-17d.c b/gcc/testsuite/gcc.dg/vect/vect-simd-clone-17d.c
index 5617788..6368798 100644
--- a/gcc/testsuite/gcc.dg/vect/vect-simd-clone-17d.c
+++ b/gcc/testsuite/gcc.dg/vect/vect-simd-clone-17d.c
@@ -7,11 +7,8 @@
/* Ensure the the in-branch simd clones are used on targets that support them.
Some targets use another call for the epilogue loops. */
-/* { dg-final { scan-tree-dump-times {[\n\r] [^\n]* = foo\.simdclone} 2 "vect" { target { ! { x86_64-*-* || { i?86-*-* || aarch64*-*-* } } } } } } */
+/* { dg-final { scan-tree-dump-times {[\n\r] [^\n]* = foo\.simdclone} 2 "vect" { target { !aarch64*-*-* } } } } */
/* { dg-final { scan-tree-dump-times {[\n\r] [^\n]* = foo\.simdclone} 3 "vect" { target { aarch64*-*-* } } } } */
-/* x86_64 fails to use in-branch clones for TYPE=char. */
-/* { dg-final { scan-tree-dump-times {[\n\r] [^\n]* = foo\.simdclone} 0 "vect" { target x86_64-*-* i?86-*-* } } } */
-
/* The LTO test produces two dump files and we scan the wrong one. */
/* { dg-skip-if "" { *-*-* } { "-flto" } { "" } } */
diff --git a/gcc/testsuite/gcc.dg/vect/vect-simd-clone-18c.c b/gcc/testsuite/gcc.dg/vect/vect-simd-clone-18c.c
index e56e6bc..ee049e7 100644
--- a/gcc/testsuite/gcc.dg/vect/vect-simd-clone-18c.c
+++ b/gcc/testsuite/gcc.dg/vect/vect-simd-clone-18c.c
@@ -7,11 +7,8 @@
/* Ensure the the in-branch simd clones are used on targets that support them.
Some targets use another call for the epilogue loops. */
-/* { dg-final { scan-tree-dump-times {[\n\r] [^\n]* = foo\.simdclone} 2 "vect" { target { ! { x86_64-*-* || { i?86-*-* || aarch64*-*-* } } } } } } */
+/* { dg-final { scan-tree-dump-times {[\n\r] [^\n]* = foo\.simdclone} 2 "vect" { target { !aarch64*-*-* } } } } */
/* { dg-final { scan-tree-dump-times {[\n\r] [^\n]* = foo\.simdclone} 3 "vect" { target { aarch64*-*-* } } } } */
-/* x86_64 fails to use in-branch clones for TYPE=short. */
-/* { dg-final { scan-tree-dump-times {[\n\r] [^\n]* = foo\.simdclone} 0 "vect" { target x86_64-*-* i?86-*-* } } } */
-
/* The LTO test produces two dump files and we scan the wrong one. */
/* { dg-skip-if "" { *-*-* } { "-flto" } { "" } } */
diff --git a/gcc/testsuite/gcc.dg/vect/vect-simd-clone-18d.c b/gcc/testsuite/gcc.dg/vect/vect-simd-clone-18d.c
index 87e7379..bad9bcb 100644
--- a/gcc/testsuite/gcc.dg/vect/vect-simd-clone-18d.c
+++ b/gcc/testsuite/gcc.dg/vect/vect-simd-clone-18d.c
@@ -7,11 +7,8 @@
/* Ensure the the in-branch simd clones are used on targets that support them.
Some targets use another call for the epilogue loops. */
-/* { dg-final { scan-tree-dump-times {[\n\r] [^\n]* = foo\.simdclone} 2 "vect" { target { ! { x86_64-*-* || { i?86-*-* || aarch64*-*-* } } } } } } */
+/* { dg-final { scan-tree-dump-times {[\n\r] [^\n]* = foo\.simdclone} 2 "vect" { target { !aarch64*-*-* } } } } */
/* { dg-final { scan-tree-dump-times {[\n\r] [^\n]* = foo\.simdclone} 3 "vect" { target { aarch64*-*-* } } } } */
-/* x86_64 fails to use in-branch clones for TYPE=char. */
-/* { dg-final { scan-tree-dump-times {[\n\r] [^\n]* = foo\.simdclone} 0 "vect" { target x86_64-*-* i?86-*-* } } } */
-
/* The LTO test produces two dump files and we scan the wrong one. */
/* { dg-skip-if "" { *-*-* } { "-flto" } { "" } } */
diff --git a/gcc/testsuite/gcc.target/i386/apx-ndd.c b/gcc/testsuite/gcc.target/i386/apx-ndd.c
index ce77630..2b2f4fc 100644
--- a/gcc/testsuite/gcc.target/i386/apx-ndd.c
+++ b/gcc/testsuite/gcc.target/i386/apx-ndd.c
@@ -188,16 +188,13 @@ FOO2 (int64_t, imul, *)
/* { dg-final { scan-assembler-times "not(?:l|w|q)\[^\n\r]%(?:|r|e)di, %(?:|r|e)ax" 4 } } */
/* { dg-final { scan-assembler-times "andb\[^\n\r]*1, \\(%(?:r|e)di\\), %al" 1 } } */
/* { dg-final { scan-assembler-times "and(?:l|w|q)\[^\n\r]*1, \\(%(?:r|e)di\\), %(?:|r|e)ax" 3 } } */
-/* { dg-final { scan-assembler-times "and(?:l|w|q)\[^\n\r]%(?:|r|e)di, %(?:|r|e)si, %(?:|r|e)ax" 2 } } */
-/* { dg-final { scan-assembler-times "and(?:l|w|q)\[^\n\r]%(?:|r|e)si, %(?:|r|e)di, %(?:|r|e)ax" 2 } } */
+/* { dg-final { scan-assembler-times "and(?:l|w|q)\[^\n\r]%(?:|r|e)si, %(?:|r|e)di, %(?:|r|e)ax" 4 } } */
/* { dg-final { scan-assembler-times "orb\[^\n\r]*1, \\(%(?:r|e)di\\), %al" 2} } */
/* { dg-final { scan-assembler-times "or(?:l|w|q)\[^\n\r]*1, \\(%(?:r|e)di\\), %(?:|r|e)ax" 6 } } */
-/* { dg-final { scan-assembler-times "or(?:l|w|q)\[^\n\r]%(?:|r|e)di, %(?:|r|e)si, %(?:|r|e)ax" 4 } } */
-/* { dg-final { scan-assembler-times "or(?:l|w|q)\[^\n\r]%(?:|r|e)si, %(?:|r|e)di, %(?:|r|e)ax" 4 } } */
+/* { dg-final { scan-assembler-times "or(?:l|w|q)\[^\n\r]%(?:|r|e)si, %(?:|r|e)di, %(?:|r|e)ax" 8 } } */
/* { dg-final { scan-assembler-times "xorb\[^\n\r]*1, \\(%(?:r|e)di\\), %al" 1 } } */
/* { dg-final { scan-assembler-times "xor(?:l|w|q)\[^\n\r]*1, \\(%(?:r|e)di\\), %(?:|r|e)ax" 3 } } */
-/* { dg-final { scan-assembler-times "xor(?:l|w|q)\[^\n\r]%(?:|r|e)di, %(?:|r|e)si, %(?:|r|e)ax" 2 } } */
-/* { dg-final { scan-assembler-times "xor(?:l|w|q)\[^\n\r]%(?:|r|e)si, %(?:|r|e)di, %(?:|r|e)ax" 2 } } */
+/* { dg-final { scan-assembler-times "xor(?:l|w|q)\[^\n\r]%(?:|r|e)si, %(?:|r|e)di, %(?:|r|e)ax" 4 } } */
/* { dg-final { scan-assembler-times "sal(?:b|l|w|q)\[^\n\r]*1, \\(%(?:r|e)di\\), %(?:|r|e)a(?:x|l)" 4 } } */
/* { dg-final { scan-assembler-times "sal(?:l|w|q)\[^\n\r]*7, %(?:|r|e)di, %(?:|r|e)ax" 4 } } */
/* { dg-final { scan-assembler-times "sar(?:b|l|w|q)\[^\n\r]*1, \\(%(?:r|e)di\\), %(?:|r|e)a(?:x|l)" 4 } } */
diff --git a/gcc/testsuite/gfortran.dg/pr112877-1.f90 b/gcc/testsuite/gfortran.dg/pr112877-1.f90
new file mode 100644
index 0000000..f5596f0
--- /dev/null
+++ b/gcc/testsuite/gfortran.dg/pr112877-1.f90
@@ -0,0 +1,17 @@
+! { dg-do compile }
+! { dg-options "-Os" }
+
+program test
+ use iso_c_binding, only: c_short
+ interface
+ subroutine foo(a) bind(c)
+ import c_short
+ integer(kind=c_short), intent(in), value :: a
+ end subroutine foo
+ end interface
+ integer(kind=c_short) a(5);
+ call foo (a(3))
+end
+
+! { dg-final { scan-assembler "movswl\t10\\(%rsp\\), %edi" { target { { i?86-*-linux* i?86-*-gnu* x86_64-*-linux* x86_64-*-gnu* } && { ! ia32 } } } } }
+! { dg-final { scan-assembler "movswl\t-14\\(%ebp\\), %eax" { target { { i?86-*-linux* i?86-*-gnu* x86_64-*-linux* x86_64-*-gnu* } && { ia32 } } } } }
diff --git a/gcc/tree.cc b/gcc/tree.cc
index eccfcc8..98575a5 100644
--- a/gcc/tree.cc
+++ b/gcc/tree.cc
@@ -8770,20 +8770,6 @@ tree_builtin_call_types_compatible_p (const_tree call, tree fndecl)
&& POINTER_TYPE_P (TREE_TYPE (arg))
&& tree_nop_conversion_p (type, TREE_TYPE (arg)))
continue;
- /* char/short integral arguments are promoted to int
- by several frontends if targetm.calls.promote_prototypes
- is true. Allow such promotion too. */
- if (INTEGRAL_TYPE_P (type)
- && TYPE_PRECISION (type) < TYPE_PRECISION (integer_type_node)
- && INTEGRAL_TYPE_P (TREE_TYPE (arg))
- && !TYPE_UNSIGNED (TREE_TYPE (arg))
- && targetm.calls.promote_prototypes (TREE_TYPE (fndecl))
- && (gimple_form
- ? useless_type_conversion_p (integer_type_node,
- TREE_TYPE (arg))
- : tree_nop_conversion_p (integer_type_node,
- TREE_TYPE (arg))))
- continue;
return false;
}
}