aboutsummaryrefslogtreecommitdiff
path: root/gcc
diff options
context:
space:
mode:
Diffstat (limited to 'gcc')
-rw-r--r--gcc/config/i386/cygming.h9
-rw-r--r--gcc/config/i386/i386-features.cc118
-rw-r--r--gcc/config/i386/i386-features.h2
-rw-r--r--gcc/config/i386/i386.cc9
-rw-r--r--gcc/config/i386/i386.h1
-rw-r--r--gcc/config/i386/x86-tune-costs.h43
-rw-r--r--gcc/config/riscv/bitmanip.md5
-rw-r--r--gcc/config/riscv/iterators.md3
-rw-r--r--gcc/config/riscv/predicates.md12
-rw-r--r--gcc/config/riscv/riscv-protos.h1
-rw-r--r--gcc/config/riscv/riscv.cc307
-rw-r--r--gcc/config/riscv/riscv.md11
-rw-r--r--gcc/fortran/dependency.cc6
-rw-r--r--gcc/fortran/trans-types.cc31
-rw-r--r--gcc/testsuite/gcc.dg/pr87600.h2
-rw-r--r--gcc/testsuite/gcc.dg/pr89313.c2
-rw-r--r--gcc/testsuite/gcc.dg/tree-ssa/pr120080.c26
-rw-r--r--gcc/testsuite/gcc.target/aarch64/pr99988.c2
-rw-r--r--gcc/testsuite/gcc.target/i386/minmax-6.c2
-rw-r--r--gcc/testsuite/gcc.target/i386/minmax-7.c2
-rw-r--r--gcc/testsuite/gcc.target/riscv/ior-synthesis-1.c8
-rw-r--r--gcc/testsuite/gcc.target/riscv/ior-synthesis-2.c8
-rw-r--r--gcc/testsuite/gcc.target/riscv/rvv/base/crc-builtin-zvbc.c66
-rw-r--r--gcc/testsuite/gcc.target/riscv/xor-synthesis-1.c8
-rw-r--r--gcc/testsuite/gcc.target/riscv/xor-synthesis-2.c10
-rw-r--r--gcc/testsuite/gcc.target/riscv/xor-synthesis-3.c8
-rw-r--r--gcc/testsuite/gfortran.dg/guality/pr120193.f9026
-rw-r--r--gcc/testsuite/gfortran.dg/transfer_array_subref.f9048
-rw-r--r--gcc/tree-switch-conversion.cc8
29 files changed, 677 insertions, 107 deletions
diff --git a/gcc/config/i386/cygming.h b/gcc/config/i386/cygming.h
index d587d25..743cc38 100644
--- a/gcc/config/i386/cygming.h
+++ b/gcc/config/i386/cygming.h
@@ -28,16 +28,15 @@ along with GCC; see the file COPYING3. If not see
#undef TARGET_SEH
#define TARGET_SEH (TARGET_64BIT_MS_ABI && flag_unwind_tables)
+#undef PREFERRED_STACK_BOUNDARY_DEFAULT
+#define PREFERRED_STACK_BOUNDARY_DEFAULT \
+ (TARGET_64BIT ? 128 : MIN_STACK_BOUNDARY)
+
/* Win64 with SEH cannot represent DRAP stack frames. Disable its use.
Force the use of different mechanisms to allocate aligned local data. */
#undef MAX_STACK_ALIGNMENT
#define MAX_STACK_ALIGNMENT (TARGET_SEH ? 128 : MAX_OFILE_ALIGNMENT)
-/* 32-bit Windows aligns the stack on a 4-byte boundary but SSE instructions
- may require 16-byte alignment. */
-#undef STACK_REALIGN_DEFAULT
-#define STACK_REALIGN_DEFAULT (TARGET_64BIT ? 0 : 1)
-
/* Support hooks for SEH. */
#undef TARGET_ASM_UNWIND_EMIT
#define TARGET_ASM_UNWIND_EMIT i386_pe_seh_unwind_emit
diff --git a/gcc/config/i386/i386-features.cc b/gcc/config/i386/i386-features.cc
index 1ba5ac4..54b3f6d 100644
--- a/gcc/config/i386/i386-features.cc
+++ b/gcc/config/i386/i386-features.cc
@@ -518,15 +518,17 @@ scalar_chain::build (bitmap candidates, unsigned insn_uid, bitmap disallowed)
instead of using a scalar one. */
int
-general_scalar_chain::vector_const_cost (rtx exp)
+general_scalar_chain::vector_const_cost (rtx exp, basic_block bb)
{
gcc_assert (CONST_INT_P (exp));
if (standard_sse_constant_p (exp, vmode))
return ix86_cost->sse_op;
+ if (optimize_bb_for_size_p (bb))
+ return COSTS_N_BYTES (8);
/* We have separate costs for SImode and DImode, use SImode costs
for smaller modes. */
- return ix86_cost->sse_load[smode == DImode ? 1 : 0];
+ return COSTS_N_INSNS (ix86_cost->sse_load[smode == DImode ? 1 : 0]) / 2;
}
/* Compute a gain for chain conversion. */
@@ -547,7 +549,7 @@ general_scalar_chain::compute_convert_gain ()
smaller modes than SImode the int load/store costs need to be
adjusted as well. */
unsigned sse_cost_idx = smode == DImode ? 1 : 0;
- unsigned m = smode == DImode ? (TARGET_64BIT ? 1 : 2) : 1;
+ int m = smode == DImode ? (TARGET_64BIT ? 1 : 2) : 1;
EXECUTE_IF_SET_IN_BITMAP (insns, 0, insn_uid, bi)
{
@@ -555,26 +557,55 @@ general_scalar_chain::compute_convert_gain ()
rtx def_set = single_set (insn);
rtx src = SET_SRC (def_set);
rtx dst = SET_DEST (def_set);
+ basic_block bb = BLOCK_FOR_INSN (insn);
int igain = 0;
if (REG_P (src) && REG_P (dst))
- igain += 2 * m - ix86_cost->xmm_move;
+ {
+ if (optimize_bb_for_size_p (bb))
+ /* reg-reg move is 2 bytes, while SSE 3. */
+ igain += COSTS_N_BYTES (2 * m - 3);
+ else
+ /* Move costs are normalized to reg-reg move having cost 2. */
+ igain += COSTS_N_INSNS (2 * m - ix86_cost->xmm_move) / 2;
+ }
else if (REG_P (src) && MEM_P (dst))
- igain
- += m * ix86_cost->int_store[2] - ix86_cost->sse_store[sse_cost_idx];
+ {
+ if (optimize_bb_for_size_p (bb))
+ /* Integer load/store is 3+ bytes and SSE 4+. */
+ igain += COSTS_N_BYTES (3 * m - 4);
+ else
+ igain
+ += COSTS_N_INSNS (m * ix86_cost->int_store[2]
+ - ix86_cost->sse_store[sse_cost_idx]) / 2;
+ }
else if (MEM_P (src) && REG_P (dst))
- igain += m * ix86_cost->int_load[2] - ix86_cost->sse_load[sse_cost_idx];
+ {
+ if (optimize_bb_for_size_p (bb))
+ igain += COSTS_N_BYTES (3 * m - 4);
+ else
+ igain += COSTS_N_INSNS (m * ix86_cost->int_load[2]
+ - ix86_cost->sse_load[sse_cost_idx]) / 2;
+ }
else
{
/* For operations on memory operands, include the overhead
of explicit load and store instructions. */
if (MEM_P (dst))
- igain += optimize_insn_for_size_p ()
- ? -COSTS_N_BYTES (8)
- : (m * (ix86_cost->int_load[2]
- + ix86_cost->int_store[2])
- - (ix86_cost->sse_load[sse_cost_idx] +
- ix86_cost->sse_store[sse_cost_idx]));
+ {
+ if (optimize_bb_for_size_p (bb))
+ /* ??? This probably should account size difference
+ of SSE and integer load rather than full SSE load. */
+ igain -= COSTS_N_BYTES (8);
+ else
+ {
+ int cost = (m * (ix86_cost->int_load[2]
+ + ix86_cost->int_store[2])
+ - (ix86_cost->sse_load[sse_cost_idx] +
+ ix86_cost->sse_store[sse_cost_idx]));
+ igain += COSTS_N_INSNS (cost) / 2;
+ }
+ }
switch (GET_CODE (src))
{
@@ -595,7 +626,7 @@ general_scalar_chain::compute_convert_gain ()
igain += ix86_cost->shift_const - ix86_cost->sse_op;
if (CONST_INT_P (XEXP (src, 0)))
- igain -= vector_const_cost (XEXP (src, 0));
+ igain -= vector_const_cost (XEXP (src, 0), bb);
break;
case ROTATE:
@@ -631,16 +662,17 @@ general_scalar_chain::compute_convert_gain ()
igain += m * ix86_cost->add;
if (CONST_INT_P (XEXP (src, 0)))
- igain -= vector_const_cost (XEXP (src, 0));
+ igain -= vector_const_cost (XEXP (src, 0), bb);
if (CONST_INT_P (XEXP (src, 1)))
- igain -= vector_const_cost (XEXP (src, 1));
+ igain -= vector_const_cost (XEXP (src, 1), bb);
if (MEM_P (XEXP (src, 1)))
{
- if (optimize_insn_for_size_p ())
+ if (optimize_bb_for_size_p (bb))
igain -= COSTS_N_BYTES (m == 2 ? 3 : 5);
else
- igain += m * ix86_cost->int_load[2]
- - ix86_cost->sse_load[sse_cost_idx];
+ igain += COSTS_N_INSNS
+ (m * ix86_cost->int_load[2]
+ - ix86_cost->sse_load[sse_cost_idx]) / 2;
}
break;
@@ -698,7 +730,7 @@ general_scalar_chain::compute_convert_gain ()
case CONST_INT:
if (REG_P (dst))
{
- if (optimize_insn_for_size_p ())
+ if (optimize_bb_for_size_p (bb))
{
/* xor (2 bytes) vs. xorps (3 bytes). */
if (src == const0_rtx)
@@ -722,14 +754,14 @@ general_scalar_chain::compute_convert_gain ()
/* DImode can be immediate for TARGET_64BIT
and SImode always. */
igain += m * COSTS_N_INSNS (1);
- igain -= vector_const_cost (src);
+ igain -= vector_const_cost (src, bb);
}
}
else if (MEM_P (dst))
{
igain += (m * ix86_cost->int_store[2]
- ix86_cost->sse_store[sse_cost_idx]);
- igain -= vector_const_cost (src);
+ igain -= vector_const_cost (src, bb);
}
break;
@@ -737,13 +769,14 @@ general_scalar_chain::compute_convert_gain ()
if (XVECEXP (XEXP (src, 1), 0, 0) == const0_rtx)
{
// movd (4 bytes) replaced with movdqa (4 bytes).
- if (!optimize_insn_for_size_p ())
- igain += ix86_cost->sse_to_integer - ix86_cost->xmm_move;
+ if (!optimize_bb_for_size_p (bb))
+ igain += COSTS_N_INSNS (ix86_cost->sse_to_integer
+ - ix86_cost->xmm_move) / 2;
}
else
{
// pshufd; movd replaced with pshufd.
- if (optimize_insn_for_size_p ())
+ if (optimize_bb_for_size_p (bb))
igain += COSTS_N_BYTES (4);
else
igain += ix86_cost->sse_to_integer;
@@ -769,11 +802,11 @@ general_scalar_chain::compute_convert_gain ()
/* Cost the integer to sse and sse to integer moves. */
if (!optimize_function_for_size_p (cfun))
{
- cost += n_sse_to_integer * ix86_cost->sse_to_integer;
+ cost += n_sse_to_integer * COSTS_N_INSNS (ix86_cost->sse_to_integer) / 2;
/* ??? integer_to_sse but we only have that in the RA cost table.
Assume sse_to_integer/integer_to_sse are the same which they
are at the moment. */
- cost += n_integer_to_sse * ix86_cost->sse_to_integer;
+ cost += n_integer_to_sse * COSTS_N_INSNS (ix86_cost->integer_to_sse) / 2;
}
else if (TARGET_64BIT || smode == SImode)
{
@@ -1508,13 +1541,13 @@ general_scalar_chain::convert_insn (rtx_insn *insn)
with numerous special cases. */
static int
-timode_immed_const_gain (rtx cst)
+timode_immed_const_gain (rtx cst, basic_block bb)
{
/* movabsq vs. movabsq+vmovq+vunpacklqdq. */
if (CONST_WIDE_INT_P (cst)
&& CONST_WIDE_INT_NUNITS (cst) == 2
&& CONST_WIDE_INT_ELT (cst, 0) == CONST_WIDE_INT_ELT (cst, 1))
- return optimize_insn_for_size_p () ? -COSTS_N_BYTES (9)
+ return optimize_bb_for_size_p (bb) ? -COSTS_N_BYTES (9)
: -COSTS_N_INSNS (2);
/* 2x movabsq ~ vmovdqa. */
return 0;
@@ -1546,33 +1579,34 @@ timode_scalar_chain::compute_convert_gain ()
rtx src = SET_SRC (def_set);
rtx dst = SET_DEST (def_set);
HOST_WIDE_INT op1val;
+ basic_block bb = BLOCK_FOR_INSN (insn);
int scost, vcost;
int igain = 0;
switch (GET_CODE (src))
{
case REG:
- if (optimize_insn_for_size_p ())
+ if (optimize_bb_for_size_p (bb))
igain = MEM_P (dst) ? COSTS_N_BYTES (6) : COSTS_N_BYTES (3);
else
igain = COSTS_N_INSNS (1);
break;
case MEM:
- igain = optimize_insn_for_size_p () ? COSTS_N_BYTES (7)
+ igain = optimize_bb_for_size_p (bb) ? COSTS_N_BYTES (7)
: COSTS_N_INSNS (1);
break;
case CONST_INT:
if (MEM_P (dst)
&& standard_sse_constant_p (src, V1TImode))
- igain = optimize_insn_for_size_p () ? COSTS_N_BYTES (11) : 1;
+ igain = optimize_bb_for_size_p (bb) ? COSTS_N_BYTES (11) : 1;
break;
case CONST_WIDE_INT:
/* 2 x mov vs. vmovdqa. */
if (MEM_P (dst))
- igain = optimize_insn_for_size_p () ? COSTS_N_BYTES (3)
+ igain = optimize_bb_for_size_p (bb) ? COSTS_N_BYTES (3)
: COSTS_N_INSNS (1);
break;
@@ -1587,14 +1621,14 @@ timode_scalar_chain::compute_convert_gain ()
if (!MEM_P (dst))
igain = COSTS_N_INSNS (1);
if (CONST_SCALAR_INT_P (XEXP (src, 1)))
- igain += timode_immed_const_gain (XEXP (src, 1));
+ igain += timode_immed_const_gain (XEXP (src, 1), bb);
break;
case ASHIFT:
case LSHIFTRT:
/* See ix86_expand_v1ti_shift. */
op1val = INTVAL (XEXP (src, 1));
- if (optimize_insn_for_size_p ())
+ if (optimize_bb_for_size_p (bb))
{
if (op1val == 64 || op1val == 65)
scost = COSTS_N_BYTES (5);
@@ -1628,7 +1662,7 @@ timode_scalar_chain::compute_convert_gain ()
case ASHIFTRT:
/* See ix86_expand_v1ti_ashiftrt. */
op1val = INTVAL (XEXP (src, 1));
- if (optimize_insn_for_size_p ())
+ if (optimize_bb_for_size_p (bb))
{
if (op1val == 64 || op1val == 127)
scost = COSTS_N_BYTES (7);
@@ -1706,7 +1740,7 @@ timode_scalar_chain::compute_convert_gain ()
case ROTATERT:
/* See ix86_expand_v1ti_rotate. */
op1val = INTVAL (XEXP (src, 1));
- if (optimize_insn_for_size_p ())
+ if (optimize_bb_for_size_p (bb))
{
scost = COSTS_N_BYTES (13);
if ((op1val & 31) == 0)
@@ -1738,16 +1772,16 @@ timode_scalar_chain::compute_convert_gain ()
{
if (GET_CODE (XEXP (src, 0)) == AND)
/* and;and;or (9 bytes) vs. ptest (5 bytes). */
- igain = optimize_insn_for_size_p() ? COSTS_N_BYTES (4)
- : COSTS_N_INSNS (2);
+ igain = optimize_bb_for_size_p (bb) ? COSTS_N_BYTES (4)
+ : COSTS_N_INSNS (2);
/* or (3 bytes) vs. ptest (5 bytes). */
- else if (optimize_insn_for_size_p ())
+ else if (optimize_bb_for_size_p (bb))
igain = -COSTS_N_BYTES (2);
}
else if (XEXP (src, 1) == const1_rtx)
/* and;cmp -1 (7 bytes) vs. pcmpeqd;pxor;ptest (13 bytes). */
- igain = optimize_insn_for_size_p() ? -COSTS_N_BYTES (6)
- : -COSTS_N_INSNS (1);
+ igain = optimize_bb_for_size_p (bb) ? -COSTS_N_BYTES (6)
+ : -COSTS_N_INSNS (1);
break;
default:
diff --git a/gcc/config/i386/i386-features.h b/gcc/config/i386/i386-features.h
index 24b0c4e..7f7c0f7 100644
--- a/gcc/config/i386/i386-features.h
+++ b/gcc/config/i386/i386-features.h
@@ -188,7 +188,7 @@ class general_scalar_chain : public scalar_chain
private:
void convert_insn (rtx_insn *insn) final override;
- int vector_const_cost (rtx exp);
+ int vector_const_cost (rtx exp, basic_block bb);
rtx convert_rotate (enum rtx_code, rtx op0, rtx op1, rtx_insn *insn);
};
diff --git a/gcc/config/i386/i386.cc b/gcc/config/i386/i386.cc
index fd36ea8..9c24a92 100644
--- a/gcc/config/i386/i386.cc
+++ b/gcc/config/i386/i386.cc
@@ -7942,6 +7942,15 @@ ix86_update_stack_boundary (void)
if (ix86_tls_descriptor_calls_expanded_in_cfun
&& crtl->preferred_stack_boundary < 128)
crtl->preferred_stack_boundary = 128;
+
+ /* For 32-bit MS ABI, both the incoming and preferred stack boundaries
+ are 32 bits, but if force_align_arg_pointer is specified, it should
+ prefer 128 bits for a backward-compatibility reason, which is also
+ what the doc suggests. */
+ if (lookup_attribute ("force_align_arg_pointer",
+ TYPE_ATTRIBUTES (TREE_TYPE (current_function_decl)))
+ && crtl->preferred_stack_boundary < 128)
+ crtl->preferred_stack_boundary = 128;
}
/* Handle the TARGET_GET_DRAP_RTX hook. Return NULL if no DRAP is
diff --git a/gcc/config/i386/i386.h b/gcc/config/i386/i386.h
index 6a38de3..18fa97a 100644
--- a/gcc/config/i386/i386.h
+++ b/gcc/config/i386/i386.h
@@ -179,6 +179,7 @@ struct processor_costs {
const int xmm_move, ymm_move, /* cost of moving XMM and YMM register. */
zmm_move;
const int sse_to_integer; /* cost of moving SSE register to integer. */
+ const int integer_to_sse; /* cost of moving integer register to SSE. */
const int gather_static, gather_per_elt; /* Cost of gather load is computed
as static + per_item * nelts. */
const int scatter_static, scatter_per_elt; /* Cost of gather store is
diff --git a/gcc/config/i386/x86-tune-costs.h b/gcc/config/i386/x86-tune-costs.h
index 6cce70a..e509129 100644
--- a/gcc/config/i386/x86-tune-costs.h
+++ b/gcc/config/i386/x86-tune-costs.h
@@ -107,6 +107,7 @@ struct processor_costs ix86_size_cost = {/* costs for tuning for size */
in 128bit, 256bit and 512bit */
4, 4, 6, /* cost of moving XMM,YMM,ZMM register */
4, /* cost of moving SSE register to integer. */
+ 4, /* cost of moving integer register to SSE. */
COSTS_N_BYTES (5), 0, /* Gather load static, per_elt. */
COSTS_N_BYTES (5), 0, /* Gather store static, per_elt. */
0, /* size of l1 cache */
@@ -227,6 +228,7 @@ struct processor_costs i386_cost = { /* 386 specific costs */
{4, 8, 16, 32, 64}, /* cost of unaligned stores. */
2, 4, 8, /* cost of moving XMM,YMM,ZMM register */
3, /* cost of moving SSE register to integer. */
+ 3, /* cost of moving integer register to SSE. */
4, 4, /* Gather load static, per_elt. */
4, 4, /* Gather store static, per_elt. */
0, /* size of l1 cache */
@@ -345,6 +347,7 @@ struct processor_costs i486_cost = { /* 486 specific costs */
{4, 8, 16, 32, 64}, /* cost of unaligned stores. */
2, 4, 8, /* cost of moving XMM,YMM,ZMM register */
3, /* cost of moving SSE register to integer. */
+ 3, /* cost of moving integer register to SSE. */
4, 4, /* Gather load static, per_elt. */
4, 4, /* Gather store static, per_elt. */
4, /* size of l1 cache. 486 has 8kB cache
@@ -465,6 +468,7 @@ struct processor_costs pentium_cost = {
{4, 8, 16, 32, 64}, /* cost of unaligned stores. */
2, 4, 8, /* cost of moving XMM,YMM,ZMM register */
3, /* cost of moving SSE register to integer. */
+ 3, /* cost of moving integer register to SSE. */
4, 4, /* Gather load static, per_elt. */
4, 4, /* Gather store static, per_elt. */
8, /* size of l1 cache. */
@@ -576,6 +580,7 @@ struct processor_costs lakemont_cost = {
{4, 8, 16, 32, 64}, /* cost of unaligned stores. */
2, 4, 8, /* cost of moving XMM,YMM,ZMM register */
3, /* cost of moving SSE register to integer. */
+ 3, /* cost of moving integer register to SSE. */
4, 4, /* Gather load static, per_elt. */
4, 4, /* Gather store static, per_elt. */
8, /* size of l1 cache. */
@@ -702,6 +707,7 @@ struct processor_costs pentiumpro_cost = {
{4, 8, 16, 32, 64}, /* cost of unaligned stores. */
2, 4, 8, /* cost of moving XMM,YMM,ZMM register */
3, /* cost of moving SSE register to integer. */
+ 3, /* cost of moving integer register to SSE. */
4, 4, /* Gather load static, per_elt. */
4, 4, /* Gather store static, per_elt. */
8, /* size of l1 cache. */
@@ -819,6 +825,7 @@ struct processor_costs geode_cost = {
{2, 2, 8, 16, 32}, /* cost of unaligned stores. */
2, 4, 8, /* cost of moving XMM,YMM,ZMM register */
6, /* cost of moving SSE register to integer. */
+ 6, /* cost of moving integer register to SSE. */
2, 2, /* Gather load static, per_elt. */
2, 2, /* Gather store static, per_elt. */
64, /* size of l1 cache. */
@@ -936,6 +943,7 @@ struct processor_costs k6_cost = {
{2, 2, 8, 16, 32}, /* cost of unaligned stores. */
2, 4, 8, /* cost of moving XMM,YMM,ZMM register */
6, /* cost of moving SSE register to integer. */
+ 6, /* cost of moving integer register to SSE. */
2, 2, /* Gather load static, per_elt. */
2, 2, /* Gather store static, per_elt. */
32, /* size of l1 cache. */
@@ -1059,6 +1067,7 @@ struct processor_costs athlon_cost = {
{4, 4, 10, 10, 20}, /* cost of unaligned stores. */
2, 4, 8, /* cost of moving XMM,YMM,ZMM register */
5, /* cost of moving SSE register to integer. */
+ 5, /* cost of moving integer register to SSE. */
4, 4, /* Gather load static, per_elt. */
4, 4, /* Gather store static, per_elt. */
64, /* size of l1 cache. */
@@ -1184,6 +1193,7 @@ struct processor_costs k8_cost = {
{4, 4, 10, 10, 20}, /* cost of unaligned stores. */
2, 4, 8, /* cost of moving XMM,YMM,ZMM register */
5, /* cost of moving SSE register to integer. */
+ 5, /* cost of moving integer register to SSE. */
4, 4, /* Gather load static, per_elt. */
4, 4, /* Gather store static, per_elt. */
64, /* size of l1 cache. */
@@ -1322,6 +1332,7 @@ struct processor_costs amdfam10_cost = {
{4, 4, 5, 10, 20}, /* cost of unaligned stores. */
2, 4, 8, /* cost of moving XMM,YMM,ZMM register */
3, /* cost of moving SSE register to integer. */
+ 3, /* cost of moving integer register to SSE. */
4, 4, /* Gather load static, per_elt. */
4, 4, /* Gather store static, per_elt. */
64, /* size of l1 cache. */
@@ -1452,6 +1463,7 @@ const struct processor_costs bdver_cost = {
{10, 10, 10, 40, 60}, /* cost of unaligned stores. */
2, 4, 8, /* cost of moving XMM,YMM,ZMM register */
16, /* cost of moving SSE register to integer. */
+ 16, /* cost of moving integer register to SSE. */
12, 12, /* Gather load static, per_elt. */
10, 10, /* Gather store static, per_elt. */
16, /* size of l1 cache. */
@@ -1603,6 +1615,7 @@ struct processor_costs znver1_cost = {
{8, 8, 8, 16, 32}, /* cost of unaligned stores. */
2, 3, 6, /* cost of moving XMM,YMM,ZMM register. */
6, /* cost of moving SSE register to integer. */
+ 6, /* cost of moving integer register to SSE. */
/* VGATHERDPD is 23 uops and throughput is 9, VGATHERDPD is 35 uops,
throughput 12. Approx 9 uops do not depend on vector size and every load
is 7 uops. */
@@ -1770,6 +1783,7 @@ struct processor_costs znver2_cost = {
2, 2, 3, /* cost of moving XMM,YMM,ZMM
register. */
6, /* cost of moving SSE register to integer. */
+ 6, /* cost of moving integer register to SSE. */
/* VGATHERDPD is 23 uops and throughput is 9, VGATHERDPD is 35 uops,
throughput 12. Approx 9 uops do not depend on vector size and every load
is 7 uops. */
@@ -1912,6 +1926,7 @@ struct processor_costs znver3_cost = {
2, 2, 3, /* cost of moving XMM,YMM,ZMM
register. */
6, /* cost of moving SSE register to integer. */
+ 6, /* cost of moving integer register to SSE. */
/* VGATHERDPD is 15 uops and throughput is 4, VGATHERDPS is 23 uops,
throughput 9. Approx 7 uops do not depend on vector size and every load
is 4 uops. */
@@ -2056,6 +2071,7 @@ struct processor_costs znver4_cost = {
2, 2, 2, /* cost of moving XMM,YMM,ZMM
register. */
6, /* cost of moving SSE register to integer. */
+ 6, /* cost of moving integer register to SSE. */
/* VGATHERDPD is 17 uops and throughput is 4, VGATHERDPS is 24 uops,
throughput 5. Approx 7 uops do not depend on vector size and every load
is 5 uops. */
@@ -2204,6 +2220,7 @@ struct processor_costs znver5_cost = {
2, 2, 2, /* cost of moving XMM,YMM,ZMM
register. */
6, /* cost of moving SSE register to integer. */
+ 6, /* cost of moving integer register to SSE. */
/* TODO: gather and scatter instructions are currently disabled in
x86-tune.def. In some cases they are however a win, see PR116582
@@ -2372,6 +2389,7 @@ struct processor_costs skylake_cost = {
{8, 8, 8, 8, 16}, /* cost of unaligned stores. */
2, 2, 4, /* cost of moving XMM,YMM,ZMM register */
6, /* cost of moving SSE register to integer. */
+ 6, /* cost of moving integer register to SSE. */
20, 8, /* Gather load static, per_elt. */
22, 10, /* Gather store static, per_elt. */
64, /* size of l1 cache. */
@@ -2508,6 +2526,7 @@ struct processor_costs icelake_cost = {
{8, 8, 8, 8, 16}, /* cost of unaligned stores. */
2, 2, 4, /* cost of moving XMM,YMM,ZMM register */
6, /* cost of moving SSE register to integer. */
+ 6, /* cost of moving integer register to SSE. */
20, 8, /* Gather load static, per_elt. */
22, 10, /* Gather store static, per_elt. */
64, /* size of l1 cache. */
@@ -2638,6 +2657,7 @@ struct processor_costs alderlake_cost = {
{8, 8, 8, 10, 15}, /* cost of unaligned storess. */
2, 3, 4, /* cost of moving XMM,YMM,ZMM register */
6, /* cost of moving SSE register to integer. */
+ 6, /* cost of moving integer register to SSE. */
18, 6, /* Gather load static, per_elt. */
18, 6, /* Gather store static, per_elt. */
32, /* size of l1 cache. */
@@ -2761,6 +2781,7 @@ const struct processor_costs btver1_cost = {
{10, 10, 12, 48, 96}, /* cost of unaligned stores. */
2, 4, 8, /* cost of moving XMM,YMM,ZMM register */
14, /* cost of moving SSE register to integer. */
+ 14, /* cost of moving integer register to SSE. */
10, 10, /* Gather load static, per_elt. */
10, 10, /* Gather store static, per_elt. */
32, /* size of l1 cache. */
@@ -2881,6 +2902,7 @@ const struct processor_costs btver2_cost = {
{10, 10, 12, 48, 96}, /* cost of unaligned stores. */
2, 4, 8, /* cost of moving XMM,YMM,ZMM register */
14, /* cost of moving SSE register to integer. */
+ 14, /* cost of moving integer register to SSE. */
10, 10, /* Gather load static, per_elt. */
10, 10, /* Gather store static, per_elt. */
32, /* size of l1 cache. */
@@ -3000,6 +3022,7 @@ struct processor_costs pentium4_cost = {
{32, 32, 32, 64, 128}, /* cost of unaligned stores. */
12, 24, 48, /* cost of moving XMM,YMM,ZMM register */
20, /* cost of moving SSE register to integer. */
+ 20, /* cost of moving integer register to SSE. */
16, 16, /* Gather load static, per_elt. */
16, 16, /* Gather store static, per_elt. */
8, /* size of l1 cache. */
@@ -3122,6 +3145,7 @@ struct processor_costs nocona_cost = {
{24, 24, 24, 48, 96}, /* cost of unaligned stores. */
6, 12, 24, /* cost of moving XMM,YMM,ZMM register */
20, /* cost of moving SSE register to integer. */
+ 20, /* cost of moving integer register to SSE. */
12, 12, /* Gather load static, per_elt. */
12, 12, /* Gather store static, per_elt. */
8, /* size of l1 cache. */
@@ -3242,6 +3266,7 @@ struct processor_costs atom_cost = {
{16, 16, 16, 32, 64}, /* cost of unaligned stores. */
2, 4, 8, /* cost of moving XMM,YMM,ZMM register */
8, /* cost of moving SSE register to integer. */
+ 8, /* cost of moving integer register to SSE. */
8, 8, /* Gather load static, per_elt. */
8, 8, /* Gather store static, per_elt. */
32, /* size of l1 cache. */
@@ -3362,6 +3387,7 @@ struct processor_costs slm_cost = {
{16, 16, 16, 32, 64}, /* cost of unaligned stores. */
2, 4, 8, /* cost of moving XMM,YMM,ZMM register */
8, /* cost of moving SSE register to integer. */
+ 8, /* cost of moving integer register to SSE. */
8, 8, /* Gather load static, per_elt. */
8, 8, /* Gather store static, per_elt. */
32, /* size of l1 cache. */
@@ -3494,6 +3520,7 @@ struct processor_costs tremont_cost = {
{6, 6, 6, 10, 15}, /* cost of unaligned storess. */
2, 3, 4, /* cost of moving XMM,YMM,ZMM register */
6, /* cost of moving SSE register to integer. */
+ 6, /* cost of moving integer register to SSE. */
18, 6, /* Gather load static, per_elt. */
18, 6, /* Gather store static, per_elt. */
32, /* size of l1 cache. */
@@ -3616,6 +3643,7 @@ struct processor_costs intel_cost = {
{10, 10, 10, 10, 10}, /* cost of unaligned loads. */
2, 2, 2, /* cost of moving XMM,YMM,ZMM register */
4, /* cost of moving SSE register to integer. */
+ 4, /* cost of moving integer register to SSE. */
6, 6, /* Gather load static, per_elt. */
6, 6, /* Gather store static, per_elt. */
32, /* size of l1 cache. */
@@ -3731,15 +3759,16 @@ struct processor_costs lujiazui_cost = {
{6, 6, 6}, /* cost of loading integer registers
in QImode, HImode and SImode.
Relative to reg-reg move (2). */
- {6, 6, 6}, /* cost of storing integer registers. */
+ {6, 6, 6}, /* cost of storing integer registers. */
{6, 6, 6, 10, 15}, /* cost of loading SSE register
- in 32bit, 64bit, 128bit, 256bit and 512bit. */
+ in 32bit, 64bit, 128bit, 256bit and 512bit. */
{6, 6, 6, 10, 15}, /* cost of storing SSE register
- in 32bit, 64bit, 128bit, 256bit and 512bit. */
+ in 32bit, 64bit, 128bit, 256bit and 512bit. */
{6, 6, 6, 10, 15}, /* cost of unaligned loads. */
{6, 6, 6, 10, 15}, /* cost of unaligned storess. */
- 2, 3, 4, /* cost of moving XMM,YMM,ZMM register. */
- 6, /* cost of moving SSE register to integer. */
+ 2, 3, 4, /* cost of moving XMM,YMM,ZMM register. */
+ 6, /* cost of moving SSE register to integer. */
+ 6, /* cost of moving integer register to SSE. */
18, 6, /* Gather load static, per_elt. */
18, 6, /* Gather store static, per_elt. */
32, /* size of l1 cache. */
@@ -3864,6 +3893,7 @@ struct processor_costs yongfeng_cost = {
{8, 8, 8, 12, 15}, /* cost of unaligned storess. */
2, 3, 4, /* cost of moving XMM,YMM,ZMM register. */
8, /* cost of moving SSE register to integer. */
+ 8, /* cost of moving integer register to SSE. */
18, 6, /* Gather load static, per_elt. */
18, 6, /* Gather store static, per_elt. */
32, /* size of l1 cache. */
@@ -3987,6 +4017,7 @@ struct processor_costs shijidadao_cost = {
{8, 8, 8, 12, 15}, /* cost of unaligned storess. */
2, 3, 4, /* cost of moving XMM,YMM,ZMM register. */
8, /* cost of moving SSE register to integer. */
+ 8, /* cost of moving integer register to SSE. */
18, 6, /* Gather load static, per_elt. */
18, 6, /* Gather store static, per_elt. */
32, /* size of l1 cache. */
@@ -4116,6 +4147,7 @@ struct processor_costs generic_cost = {
{6, 6, 6, 10, 15}, /* cost of unaligned storess. */
2, 3, 4, /* cost of moving XMM,YMM,ZMM register */
6, /* cost of moving SSE register to integer. */
+ 6, /* cost of moving integer register to SSE. */
18, 6, /* Gather load static, per_elt. */
18, 6, /* Gather store static, per_elt. */
32, /* size of l1 cache. */
@@ -4249,6 +4281,7 @@ struct processor_costs core_cost = {
{6, 6, 6, 6, 12}, /* cost of unaligned stores. */
2, 2, 4, /* cost of moving XMM,YMM,ZMM register */
2, /* cost of moving SSE register to integer. */
+ 2, /* cost of moving integer register to SSE. */
/* VGATHERDPD is 7 uops, rec throughput 5, while VGATHERDPD is 9 uops,
rec. throughput 6.
So 5 uops statically and one uops per load. */
diff --git a/gcc/config/riscv/bitmanip.md b/gcc/config/riscv/bitmanip.md
index 95df533..c226c39 100644
--- a/gcc/config/riscv/bitmanip.md
+++ b/gcc/config/riscv/bitmanip.md
@@ -1222,7 +1222,7 @@
we can't keep it in 64 bit variable.)
then use clmul instruction to implement the CRC,
otherwise (TARGET_ZBKB) generate table based using brev. */
- if ((TARGET_ZBKC || TARGET_ZBC) && <ANYI:MODE>mode < word_mode)
+ if ((TARGET_ZBKC || TARGET_ZBC || TARGET_ZVBC) && <ANYI:MODE>mode < word_mode)
expand_reversed_crc_using_clmul (<ANYI:MODE>mode, <ANYI1:MODE>mode,
operands);
else if (TARGET_ZBKB)
@@ -1254,7 +1254,8 @@
(match_operand:SUBX 3)]
UNSPEC_CRC))]
/* We don't support the case when data's size is bigger than CRC's size. */
- "(TARGET_ZBKC || TARGET_ZBC) && <SUBX:MODE>mode >= <SUBX1:MODE>mode"
+ "(TARGET_ZBKC || TARGET_ZBC || TARGET_ZVBC)
+ && <SUBX:MODE>mode >= <SUBX1:MODE>mode"
{
/* If we have the ZBC or ZBKC extension (ie, clmul) and
it is possible to store the quotient within a single variable
diff --git a/gcc/config/riscv/iterators.md b/gcc/config/riscv/iterators.md
index 214c20b..584b345 100644
--- a/gcc/config/riscv/iterators.md
+++ b/gcc/config/riscv/iterators.md
@@ -262,6 +262,9 @@
(define_code_attr fix_uns [(fix "fix") (unsigned_fix "fixuns")])
+(define_code_attr OPTAB [(ior "IOR")
+ (xor "XOR")])
+
;; -------------------------------------------------------------------
;; Code Attributes
diff --git a/gcc/config/riscv/predicates.md b/gcc/config/riscv/predicates.md
index c9a638c..23690792 100644
--- a/gcc/config/riscv/predicates.md
+++ b/gcc/config/riscv/predicates.md
@@ -380,14 +380,6 @@
(and (match_code "const_int")
(match_test "SINGLE_BIT_MASK_OPERAND (UINTVAL (op))")))
-;; Register, small constant or single bit constant for use in
-;; bseti/binvi.
-(define_predicate "arith_or_zbs_operand"
- (ior (match_operand 0 "const_arith_operand")
- (match_operand 0 "register_operand")
- (and (match_test "TARGET_ZBS")
- (match_operand 0 "single_bit_mask_operand"))))
-
(define_predicate "not_single_bit_mask_operand"
(and (match_code "const_int")
(match_test "SINGLE_BIT_MASK_OPERAND (~UINTVAL (op))")))
@@ -689,3 +681,7 @@
(define_predicate "bitpos_mask_operand"
(and (match_code "const_int")
(match_test "TARGET_64BIT ? INTVAL (op) == 63 : INTVAL (op) == 31")))
+
+(define_predicate "reg_or_const_int_operand"
+ (ior (match_operand 0 "const_int_operand")
+ (match_operand 0 "register_operand")))
diff --git a/gcc/config/riscv/riscv-protos.h b/gcc/config/riscv/riscv-protos.h
index b0d5bbb..271a9a3 100644
--- a/gcc/config/riscv/riscv-protos.h
+++ b/gcc/config/riscv/riscv-protos.h
@@ -140,6 +140,7 @@ extern void riscv_expand_sssub (rtx, rtx, rtx);
extern void riscv_expand_ustrunc (rtx, rtx);
extern void riscv_expand_sstrunc (rtx, rtx);
extern int riscv_register_move_cost (machine_mode, reg_class_t, reg_class_t);
+extern bool synthesize_ior_xor (rtx_code, rtx [3]);
#ifdef RTX_CODE
extern void riscv_expand_int_scc (rtx, enum rtx_code, rtx, rtx, bool *invert_ptr = 0);
diff --git a/gcc/config/riscv/riscv.cc b/gcc/config/riscv/riscv.cc
index 3ee88db..8b77a35 100644
--- a/gcc/config/riscv/riscv.cc
+++ b/gcc/config/riscv/riscv.cc
@@ -14035,17 +14035,53 @@ expand_crc_using_clmul (scalar_mode crc_mode, scalar_mode data_mode,
rtx data = gen_rtx_ZERO_EXTEND (word_mode, operands[2]);
riscv_expand_op (XOR, word_mode, a0, crc, data);
- if (TARGET_64BIT)
- emit_insn (gen_riscv_clmul_di (a0, a0, t0));
- else
- emit_insn (gen_riscv_clmul_si (a0, a0, t0));
+ if (TARGET_ZBKC || TARGET_ZBC)
+ {
+ if (TARGET_64BIT)
+ emit_insn (gen_riscv_clmul_di (a0, a0, t0));
+ else
+ emit_insn (gen_riscv_clmul_si (a0, a0, t0));
- riscv_expand_op (LSHIFTRT, word_mode, a0, a0,
- gen_int_mode (crc_size, word_mode));
- if (TARGET_64BIT)
- emit_insn (gen_riscv_clmul_di (a0, a0, t1));
+ riscv_expand_op (LSHIFTRT, word_mode, a0, a0,
+ gen_int_mode (crc_size, word_mode));
+ if (TARGET_64BIT)
+ emit_insn (gen_riscv_clmul_di (a0, a0, t1));
+ else
+ emit_insn (gen_riscv_clmul_si (a0, a0, t1));
+ }
else
- emit_insn (gen_riscv_clmul_si (a0, a0, t1));
+ {
+ machine_mode vmode;
+ if (!riscv_vector::get_vector_mode (DImode, 1).exists (&vmode))
+ gcc_unreachable ();
+
+ rtx vec = gen_reg_rtx (vmode);
+
+ insn_code icode1 = code_for_pred_broadcast (vmode);
+ rtx ops1[] = {vec, a0};
+ emit_nonvlmax_insn (icode1, UNARY_OP, ops1, CONST1_RTX (Pmode));
+
+ rtx rvv1di_reg = gen_rtx_SUBREG (RVVM1DImode, vec, 0);
+ insn_code icode2 = code_for_pred_vclmul_scalar (UNSPEC_VCLMUL,
+ E_RVVM1DImode);
+ rtx ops2[] = {rvv1di_reg, rvv1di_reg, t0};
+ emit_nonvlmax_insn (icode2, riscv_vector::BINARY_OP, ops2, CONST1_RTX
+ (Pmode));
+
+ rtx shift_amount = gen_int_mode (data_size, Pmode);
+ insn_code icode3 = code_for_pred_scalar (LSHIFTRT, vmode);
+ rtx ops3[] = {vec, vec, shift_amount};
+ emit_nonvlmax_insn (icode3, BINARY_OP, ops3, CONST1_RTX (Pmode));
+
+ insn_code icode4 = code_for_pred_vclmul_scalar (UNSPEC_VCLMULH,
+ E_RVVM1DImode);
+ rtx ops4[] = {rvv1di_reg, rvv1di_reg, t1};
+ emit_nonvlmax_insn (icode4, riscv_vector::BINARY_OP, ops4, CONST1_RTX
+ (Pmode));
+
+ rtx vec_low_lane = gen_lowpart (DImode, vec);
+ riscv_emit_move (a0, vec_low_lane);
+ }
if (crc_size > data_size)
{
@@ -14094,19 +14130,53 @@ expand_reversed_crc_using_clmul (scalar_mode crc_mode, scalar_mode data_mode,
rtx a0 = gen_reg_rtx (word_mode);
riscv_expand_op (XOR, word_mode, a0, crc, data);
- if (TARGET_64BIT)
- emit_insn (gen_riscv_clmul_di (a0, a0, t0));
- else
- emit_insn (gen_riscv_clmul_si (a0, a0, t0));
+ if (TARGET_ZBKC || TARGET_ZBC)
+ {
+ if (TARGET_64BIT)
+ emit_insn (gen_riscv_clmul_di (a0, a0, t0));
+ else
+ emit_insn (gen_riscv_clmul_si (a0, a0, t0));
- rtx num_shift = gen_int_mode (GET_MODE_BITSIZE (word_mode) - data_size,
- word_mode);
- riscv_expand_op (ASHIFT, word_mode, a0, a0, num_shift);
+ rtx num_shift = gen_int_mode (BITS_PER_WORD - data_size, word_mode);
+ riscv_expand_op (ASHIFT, word_mode, a0, a0, num_shift);
- if (TARGET_64BIT)
- emit_insn (gen_riscv_clmulh_di (a0, a0, t1));
+ if (TARGET_64BIT)
+ emit_insn (gen_riscv_clmulh_di (a0, a0, t1));
+ else
+ emit_insn (gen_riscv_clmulh_si (a0, a0, t1));
+ }
else
- emit_insn (gen_riscv_clmulh_si (a0, a0, t1));
+ {
+ machine_mode vmode;
+ if (!riscv_vector::get_vector_mode (DImode, 1).exists (&vmode))
+ gcc_unreachable ();
+
+ rtx vec = gen_reg_rtx (vmode);
+ insn_code icode1 = code_for_pred_broadcast (vmode);
+ rtx ops1[] = {vec, a0};
+ emit_nonvlmax_insn (icode1, UNARY_OP, ops1, CONST1_RTX (Pmode));
+
+ rtx rvv1di_reg = gen_rtx_SUBREG (RVVM1DImode, vec, 0);
+ insn_code icode2 = code_for_pred_vclmul_scalar (UNSPEC_VCLMUL,
+ E_RVVM1DImode);
+ rtx ops2[] = {rvv1di_reg, rvv1di_reg, t0};
+ emit_nonvlmax_insn (icode2, riscv_vector::BINARY_OP, ops2, CONST1_RTX
+ (Pmode));
+
+ rtx shift_amount = gen_int_mode (BITS_PER_WORD - data_size, Pmode);
+ insn_code icode3 = code_for_pred_scalar (ASHIFT, vmode);
+ rtx ops3[] = {vec, vec, shift_amount};
+ emit_nonvlmax_insn (icode3, BINARY_OP, ops3, CONST1_RTX (Pmode));
+
+ insn_code icode4 = code_for_pred_vclmul_scalar (UNSPEC_VCLMULH,
+ E_RVVM1DImode);
+ rtx ops4[] = {rvv1di_reg, rvv1di_reg, t1};
+ emit_nonvlmax_insn (icode4, riscv_vector::BINARY_OP, ops4, CONST1_RTX
+ (Pmode));
+
+ rtx vec_low_lane = gen_lowpart (DImode, vec);
+ riscv_emit_move (a0, vec_low_lane);
+ }
if (crc_size > data_size)
{
@@ -14140,6 +14210,205 @@ bool need_shadow_stack_push_pop_p ()
return is_zicfiss_p () && riscv_save_return_addr_reg_p ();
}
+/* Synthesize OPERANDS[0] = OPERANDS[1] CODE OPERANDS[2].
+
+ OPERANDS[0] and OPERANDS[1] will be a REG and may be the same
+ REG.
+
+ OPERANDS[2] is a CONST_INT.
+
+ CODE is IOR or XOR.
+
+ Return TRUE if the operation was fully synthesized and the caller
+ need not generate additional code. Return FALSE if the operation
+ was not synthesized and the caller is responsible for emitting the
+ proper sequence. */
+
+bool
+synthesize_ior_xor (rtx_code code, rtx operands[3])
+{
+ /* Trivial cases that don't need synthesis. */
+ if (SMALL_OPERAND (INTVAL (operands[2]))
+ || ((TARGET_ZBS || TARGET_XTHEADBS || TARGET_ZBKB)
+ && single_bit_mask_operand (operands[2], word_mode)))
+ return false;
+
+ /* The number of instructions to synthesize the constant is a good
+ estimate of the budget. That does not account for out of order
+ execution an fusion in the constant synthesis those would naturally
+ decrease the budget. It also does not account for the IOR/XOR at
+ the end of the sequence which would increase the budget. */
+ int budget = (TARGET_ZBS ? riscv_const_insns (operands[2], true) : -1);
+ int original_budget = budget;
+
+ /* Bits we need to set in operands[0]. As we synthesize the operation,
+ we clear bits in IVAL. Once IVAL is zero, then synthesis of the
+ operation is complete. */
+ unsigned HOST_WIDE_INT ival = INTVAL (operands[2]);
+
+ /* Check if we want to use [x]ori. Then get the remaining bits
+ and decrease the budget by one. */
+ if ((ival & HOST_WIDE_INT_UC (0x7ff)) != 0)
+ {
+ ival &= ~HOST_WIDE_INT_UC (0x7ff);
+ budget--;
+ }
+
+ /* Check for bseti cases. For each remaining bit in ival,
+ decrease the budget by one. */
+ while (ival)
+ {
+ HOST_WIDE_INT tmpval = HOST_WIDE_INT_UC (1) << ctz_hwi (ival);
+ ival &= ~tmpval;
+ budget--;
+ }
+
+ /* If we're flipping all but a small number of bits we can pre-flip
+ the outliers, then flip all the bits, which would restore those
+ bits that were pre-flipped. */
+ if ((TARGET_ZBS || TARGET_XTHEADBS || TARGET_ZBKB)
+ && budget < 0
+ && code == XOR
+ && popcount_hwi (~INTVAL (operands[2])) < original_budget)
+ {
+ /* Pre-flipping bits we want to preserve. */
+ rtx input = operands[1];
+ ival = ~INTVAL (operands[2]);
+ while (ival)
+ {
+ HOST_WIDE_INT tmpval = HOST_WIDE_INT_UC (1) << ctz_hwi (ival);
+ rtx x = GEN_INT (tmpval);
+ x = gen_rtx_XOR (word_mode, input, x);
+ emit_insn (gen_rtx_SET (operands[0], x));
+ input = operands[0];
+ ival &= ~tmpval;
+ }
+
+ /* Now flip all the bits, which restores the bits we were
+ preserving. */
+ rtx x = gen_rtx_NOT (word_mode, input);
+ emit_insn (gen_rtx_SET (operands[0], x));
+ return true;
+ }
+
+ /* One more approach we can try. If our budget is 3+ instructions,
+ then we can try to rotate the source so that the bits we want to
+ set are in the low 11 bits. We then use [x]ori to set those low
+ bits, then rotate things back into their proper place. */
+ if ((TARGET_ZBB || TARGET_XTHEADBB || TARGET_ZBKB)
+ && budget < 0
+ && popcount_hwi (INTVAL (operands[2])) <= 11
+ && riscv_const_insns (operands[2], true) >= 3)
+ {
+ ival = INTVAL (operands[2]);
+ /* First see if the constant trivially fits into 11 bits in the LSB. */
+ int lsb = ctz_hwi (ival);
+ int msb = BITS_PER_WORD - 1 - clz_hwi (ival);
+ if (msb - lsb + 1 <= 11)
+ {
+ /* Rotate the source right by LSB bits. */
+ rtx x = GEN_INT (lsb);
+ x = gen_rtx_ROTATERT (word_mode, operands[1], x);
+ emit_insn (gen_rtx_SET (operands[0], x));
+
+ /* Shift the constant right by LSB bits. */
+ x = GEN_INT (ival >> lsb);
+
+ /* Perform the IOR/XOR operation. */
+ x = gen_rtx_fmt_ee (code, word_mode, operands[0], x);
+ emit_insn (gen_rtx_SET (operands[0], x));
+
+ /* And rotate left to put everything back in place, we don't
+ have rotate left by a constant, so use rotate right by
+ an adjusted constant. */
+ x = GEN_INT (BITS_PER_WORD - lsb);
+ x = gen_rtx_ROTATERT (word_mode, operands[1], x);
+ emit_insn (gen_rtx_SET (operands[0], x));
+ return true;
+ }
+
+ /* Maybe the bits are split between the high and low parts
+ of the constant. A bit more complex, but still manageable.
+
+ Conceptually we want to rotate left the constant by the number
+ of leading zeros after masking off all but the low 11 bits. */
+ int rotcount = clz_hwi (ival & 0x7ff) - (BITS_PER_WORD - 11);
+
+ /* Rotate the constant left by MSB bits. */
+ ival = (ival << rotcount) | (ival >> (BITS_PER_WORD - rotcount));
+
+ /* Now we can do the same tests as before. */
+ lsb = ctz_hwi (ival);
+ msb = BITS_PER_WORD - clz_hwi (ival);
+ if ((INTVAL (operands[2]) & HOST_WIDE_INT_UC (0x7ff)) != 0
+ && msb - lsb + 1 <= 11)
+ {
+ /* Rotate the source left by ROTCOUNT bits, we don't have
+ rotate left by a constant, so use rotate right by an
+ adjusted constant. */
+ rtx x = GEN_INT (BITS_PER_WORD - rotcount);
+ x = gen_rtx_ROTATERT (word_mode, operands[1], x);
+ emit_insn (gen_rtx_SET (operands[0], x));
+
+ /* We've already rotated the constant. So perform the IOR/XOR
+ operation. */
+ x = GEN_INT (ival);
+ x = gen_rtx_fmt_ee (code, word_mode, operands[0], x);
+ emit_insn (gen_rtx_SET (operands[0], x));
+
+ /* And rotate right to put everything into its proper place. */
+ x = GEN_INT (rotcount);
+ x = gen_rtx_ROTATERT (word_mode, operands[0], x);
+ emit_insn (gen_rtx_SET (operands[0], x));
+ return true;
+ }
+ }
+
+ /* If after accounting for bseti the remaining budget has
+ gone to less than zero, it forces the value into a
+ register and performs the IOR operation. It returns
+ TRUE to the caller so the caller knows code generation
+ is complete. */
+ if (budget < 0)
+ {
+ rtx x = force_reg (word_mode, operands[2]);
+ x = gen_rtx_fmt_ee (code, word_mode, operands[1], x);
+ emit_insn (gen_rtx_SET (operands[0], x));
+ return true;
+ }
+
+ /* Synthesis is better than loading the constant. */
+ ival = INTVAL (operands[2]);
+ rtx input = operands[1];
+
+ /* Emit the [x]ori insn that sets the low 11 bits into
+ the proper state. */
+ if ((ival & HOST_WIDE_INT_UC (0x7ff)) != 0)
+ {
+ rtx x = GEN_INT (ival & HOST_WIDE_INT_UC (0x7ff));
+ x = gen_rtx_fmt_ee (code, word_mode, input, x);
+ emit_insn (gen_rtx_SET (operands[0], x));
+ input = operands[0];
+ ival &= ~HOST_WIDE_INT_UC (0x7ff);
+ }
+
+ /* We figure out a single bit as a constant and
+ generate a CONST_INT node for that. Then we
+ construct the IOR node, then the SET node and
+ emit it. An IOR with a suitable constant that is
+ a single bit will be implemented with a bseti. */
+ while (ival)
+ {
+ HOST_WIDE_INT tmpval = HOST_WIDE_INT_UC (1) << ctz_hwi (ival);
+ rtx x = GEN_INT (tmpval);
+ x = gen_rtx_fmt_ee (code, word_mode, input, x);
+ emit_insn (gen_rtx_SET (operands[0], x));
+ input = operands[0];
+ ival &= ~tmpval;
+ }
+ return true;
+}
+
/* Initialize the GCC target structure. */
#undef TARGET_ASM_ALIGNED_HI_OP
#define TARGET_ASM_ALIGNED_HI_OP "\t.half\t"
diff --git a/gcc/config/riscv/riscv.md b/gcc/config/riscv/riscv.md
index 259997f..154b49d 100644
--- a/gcc/config/riscv/riscv.md
+++ b/gcc/config/riscv/riscv.md
@@ -1767,8 +1767,15 @@
(define_expand "<optab><mode>3"
[(set (match_operand:X 0 "register_operand")
(any_or:X (match_operand:X 1 "register_operand" "")
- (match_operand:X 2 "arith_or_zbs_operand" "")))]
- "")
+ (match_operand:X 2 "reg_or_const_int_operand" "")))]
+ ""
+
+{
+ /* If synthesis of the logical op is successful, then no further code
+ generation is necessary. Else just generate code normally. */
+ if (CONST_INT_P (operands[2]) && synthesize_ior_xor (<OPTAB>, operands))
+ DONE;
+})
(define_insn "*<optab><mode>3"
[(set (match_operand:X 0 "register_operand" "=r,r")
diff --git a/gcc/fortran/dependency.cc b/gcc/fortran/dependency.cc
index 57c0c49..aa8a57a 100644
--- a/gcc/fortran/dependency.cc
+++ b/gcc/fortran/dependency.cc
@@ -944,8 +944,12 @@ gfc_ref_needs_temporary_p (gfc_ref *ref)
types), not in characters. */
return subarray_p;
- case REF_COMPONENT:
case REF_INQUIRY:
+ /* Within an array reference, inquiry references of complex
+ variables generally need a temporary. */
+ return subarray_p;
+
+ case REF_COMPONENT:
break;
}
diff --git a/gcc/fortran/trans-types.cc b/gcc/fortran/trans-types.cc
index 3374778..f898075 100644
--- a/gcc/fortran/trans-types.cc
+++ b/gcc/fortran/trans-types.cc
@@ -1140,11 +1140,6 @@ gfc_init_types (void)
}
gfc_character1_type_node = gfc_character_types[0];
- /* The middle end only recognizes a single unsigned type. For
- compatibility of existing test cases, let's just use the
- character type. The reader of tree dumps is expected to be able
- to deal with this. */
-
if (flag_unsigned)
{
for (index = 0; gfc_unsigned_kinds[index].kind != 0;++index)
@@ -1159,18 +1154,26 @@ gfc_init_types (void)
break;
}
}
- if (index_char > 0)
+ if (index_char > -1)
{
- gfc_unsigned_types[index] = gfc_character_types[index_char];
+ type = gfc_character_types[index_char];
+ if (TYPE_STRING_FLAG (type))
+ {
+ type = build_distinct_type_copy (type);
+ TYPE_CANONICAL (type)
+ = TYPE_CANONICAL (gfc_character_types[index_char]);
+ }
+ else
+ type = build_variant_type_copy (type);
+ TYPE_NAME (type) = NULL_TREE;
+ TYPE_STRING_FLAG (type) = 0;
}
else
- {
- type = gfc_build_unsigned_type (&gfc_unsigned_kinds[index]);
- gfc_unsigned_types[index] = type;
- snprintf (name_buf, sizeof(name_buf), "unsigned(kind=%d)",
- gfc_integer_kinds[index].kind);
- PUSH_TYPE (name_buf, type);
- }
+ type = gfc_build_unsigned_type (&gfc_unsigned_kinds[index]);
+ gfc_unsigned_types[index] = type;
+ snprintf (name_buf, sizeof(name_buf), "unsigned(kind=%d)",
+ gfc_integer_kinds[index].kind);
+ PUSH_TYPE (name_buf, type);
}
}
diff --git a/gcc/testsuite/gcc.dg/pr87600.h b/gcc/testsuite/gcc.dg/pr87600.h
index af91f63..c89071eb 100644
--- a/gcc/testsuite/gcc.dg/pr87600.h
+++ b/gcc/testsuite/gcc.dg/pr87600.h
@@ -7,7 +7,7 @@
#elif defined (__i386__)
# define REG1 "%eax"
# define REG2 "%edx"
-#elif defined (__powerpc__) || defined (__POWERPC__)
+#elif defined (__powerpc__) || defined (__POWERPC__) || defined (__PPC__)
# define REG1 "r3"
# define REG2 "r4"
#elif defined (__s390__)
diff --git a/gcc/testsuite/gcc.dg/pr89313.c b/gcc/testsuite/gcc.dg/pr89313.c
index 76cb091..7de64da 100644
--- a/gcc/testsuite/gcc.dg/pr89313.c
+++ b/gcc/testsuite/gcc.dg/pr89313.c
@@ -8,7 +8,7 @@
# define REG "r0"
#elif defined (__i386__)
# define REG "%eax"
-#elif defined (__powerpc__) || defined (__POWERPC__)
+#elif defined (__powerpc__) || defined (__POWERPC__) || defined (__PPC__)
# define REG "r3"
#elif defined (__s390__)
# define REG "0"
diff --git a/gcc/testsuite/gcc.dg/tree-ssa/pr120080.c b/gcc/testsuite/gcc.dg/tree-ssa/pr120080.c
new file mode 100644
index 0000000..d71ef5e
--- /dev/null
+++ b/gcc/testsuite/gcc.dg/tree-ssa/pr120080.c
@@ -0,0 +1,26 @@
+/* { dg-do compile } */
+/* { dg-options "-fgimple -O2" } */
+
+void __GIMPLE (ssa,startwith("switchlower1"))
+foo (int b)
+{
+ __BB(2):
+ switch (b) {default: L9; case 0: L5; case 5: L5; case 101: L5; }
+
+ __BB(3):
+L9:
+ switch (b) {default: L7; case 5: L6; case 101: L6; }
+
+ __BB(4):
+L6:
+ __builtin_unreachable ();
+
+ __BB(5):
+L7:
+ __builtin_trap ();
+
+ __BB(6):
+L5:
+ return;
+
+}
diff --git a/gcc/testsuite/gcc.target/aarch64/pr99988.c b/gcc/testsuite/gcc.target/aarch64/pr99988.c
index 7cca496..c09ce67 100644
--- a/gcc/testsuite/gcc.target/aarch64/pr99988.c
+++ b/gcc/testsuite/gcc.target/aarch64/pr99988.c
@@ -1,5 +1,5 @@
/* { dg-do compile { target lp64 } } */
-/* { dg-options "-O2 -mbranch-protection=standard" } */
+/* { dg-options "-O2 -mbranch-protection=standard -fno-bit-tests" } */
/* { dg-final { scan-assembler-times {bti j} 13 } } */
int a;
int c();
diff --git a/gcc/testsuite/gcc.target/i386/minmax-6.c b/gcc/testsuite/gcc.target/i386/minmax-6.c
index 615f919..23f61c5 100644
--- a/gcc/testsuite/gcc.target/i386/minmax-6.c
+++ b/gcc/testsuite/gcc.target/i386/minmax-6.c
@@ -15,4 +15,4 @@ UMVLine16Y_11 (short unsigned int * Pic, int y, int width)
/* We do not want the RA to spill %esi for it's dual-use but using
pmaxsd is OK. */
/* { dg-final { scan-assembler-not "rsp" { target { ! { ia32 } } } } } */
-/* { dg-final { scan-assembler "pmaxsd" } } */
+/* { dg-final { scan-assembler "pmaxsd" { xfail *-*-* } } } */
diff --git a/gcc/testsuite/gcc.target/i386/minmax-7.c b/gcc/testsuite/gcc.target/i386/minmax-7.c
index 619a939..b2cb1c2 100644
--- a/gcc/testsuite/gcc.target/i386/minmax-7.c
+++ b/gcc/testsuite/gcc.target/i386/minmax-7.c
@@ -17,4 +17,4 @@ void bar (int aleft, int axcenter)
/* We do not want the RA to spill %esi for it's dual-use but using
pminsd is OK. */
/* { dg-final { scan-assembler-not "rsp" { target { ! { ia32 } } } } } */
-/* { dg-final { scan-assembler "pminsd" } } */
+/* { dg-final { scan-assembler "pminsd" { xfail *-*-* } } } */
diff --git a/gcc/testsuite/gcc.target/riscv/ior-synthesis-1.c b/gcc/testsuite/gcc.target/riscv/ior-synthesis-1.c
new file mode 100644
index 0000000..04644cd
--- /dev/null
+++ b/gcc/testsuite/gcc.target/riscv/ior-synthesis-1.c
@@ -0,0 +1,8 @@
+/* { dg-do compile { target { rv64 } } } */
+/* { dg-options "-march=rv64gb -mabi=lp64d" } */
+
+unsigned long foo(unsigned long src) { return src | 0x8c00000000000001; }
+
+/* { dg-final { scan-assembler-times "\\srori\t" 2 } } */
+/* { dg-final { scan-assembler-times "\\sori\t" 1 } } */
+
diff --git a/gcc/testsuite/gcc.target/riscv/ior-synthesis-2.c b/gcc/testsuite/gcc.target/riscv/ior-synthesis-2.c
new file mode 100644
index 0000000..f28fe5e
--- /dev/null
+++ b/gcc/testsuite/gcc.target/riscv/ior-synthesis-2.c
@@ -0,0 +1,8 @@
+/* { dg-do compile { target { rv64 } } } */
+/* { dg-options "-march=rv64gb -mabi=lp64d" } */
+
+unsigned long foo(unsigned long src) { return src | 0x8800000000000007; }
+
+/* { dg-final { scan-assembler-times "\\sbseti\t" 2 } } */
+/* { dg-final { scan-assembler-times "\\sori\t" 1 } } */
+
diff --git a/gcc/testsuite/gcc.target/riscv/rvv/base/crc-builtin-zvbc.c b/gcc/testsuite/gcc.target/riscv/rvv/base/crc-builtin-zvbc.c
new file mode 100644
index 0000000..2d5fa88
--- /dev/null
+++ b/gcc/testsuite/gcc.target/riscv/rvv/base/crc-builtin-zvbc.c
@@ -0,0 +1,66 @@
+/* { dg-do compile } */
+/* { dg-options "-march=rv64gcv_zvbc -mabi=lp64d" } */
+
+#include <stdint-gcc.h>
+
+int8_t crc8_data8 ()
+{
+ return __builtin_crc8_data8 (0x34, 'a', 0x12);
+}
+
+int16_t crc16_data8 ()
+{
+ return __builtin_crc16_data8 (0x1234, 'a', 0x1021);
+}
+
+int16_t crc16_data16 ()
+{
+ return __builtin_crc16_data16 (0x1234, 0x3214, 0x1021);
+}
+
+int32_t crc32_data8 ()
+{
+ return __builtin_crc32_data8 (0xffffffff, 0x32, 0x4002123);
+}
+
+int32_t crc32_data16 ()
+{
+ return __builtin_crc32_data16 (0xffffffff, 0x3232, 0x4002123);
+}
+
+int32_t crc32_data32 ()
+{
+ return __builtin_crc32_data32 (0xffffffff, 0x123546ff, 0x4002123);
+}
+
+int8_t rev_crc8_data8 ()
+{
+ return __builtin_rev_crc8_data8 (0x34, 'a', 0x12);
+}
+
+int16_t rev_crc16_data8 ()
+{
+ return __builtin_rev_crc16_data8 (0x1234, 'a', 0x1021);
+}
+
+int16_t rev_crc16_data16 ()
+{
+ return __builtin_rev_crc16_data16 (0x1234, 0x3214, 0x1021);
+}
+
+int32_t rev_crc32_data8 ()
+{
+ return __builtin_rev_crc32_data8 (0xffffffff, 0x32, 0x4002123);
+}
+
+int32_t rev_crc32_data16 ()
+{
+ return __builtin_rev_crc32_data16 (0xffffffff, 0x3232, 0x4002123);
+}
+
+int32_t rev_crc32_data32 ()
+{
+ return __builtin_rev_crc32_data32 (0xffffffff, 0x123546ff, 0x4002123);
+}
+/* { dg-final { scan-assembler-times "vclmul.vx" 12 } } */
+/* { dg-final { scan-assembler-times "vclmulh.vx" 12 } } */
diff --git a/gcc/testsuite/gcc.target/riscv/xor-synthesis-1.c b/gcc/testsuite/gcc.target/riscv/xor-synthesis-1.c
new file mode 100644
index 0000000..c630a79
--- /dev/null
+++ b/gcc/testsuite/gcc.target/riscv/xor-synthesis-1.c
@@ -0,0 +1,8 @@
+/* { dg-do compile { target { rv64 } } } */
+/* { dg-options "-march=rv64gb -mabi=lp64d" } */
+
+unsigned long foo(unsigned long src) { return src ^ 0xffffffffefffffffUL; }
+
+/* { dg-final { scan-assembler-times "\\sbinvi\t" 1 } } */
+/* { dg-final { scan-assembler-times "\\snot\t" 1 } } */
+
diff --git a/gcc/testsuite/gcc.target/riscv/xor-synthesis-2.c b/gcc/testsuite/gcc.target/riscv/xor-synthesis-2.c
new file mode 100644
index 0000000..25457d2
--- /dev/null
+++ b/gcc/testsuite/gcc.target/riscv/xor-synthesis-2.c
@@ -0,0 +1,10 @@
+/* { dg-do compile { target { rv64 } } } */
+/* { dg-options "-march=rv64gb -mabi=lp64d" } */
+/* { dg-skip-if "" { *-*-* } { "-O0" "-O1" "-Og" } } */
+
+unsigned long foo(unsigned long src) { return src ^ 0x8800000000000007; }
+
+/* xfailed until we remove mvconst_internal. */
+/* { dg-final { scan-assembler-times "\\sbinvi\t" 2 { xfail *-*-* } } } */
+/* { dg-final { scan-assembler-times "\\sxori\t" 1 { xfail *-*-* } } } */
+
diff --git a/gcc/testsuite/gcc.target/riscv/xor-synthesis-3.c b/gcc/testsuite/gcc.target/riscv/xor-synthesis-3.c
new file mode 100644
index 0000000..765904b
--- /dev/null
+++ b/gcc/testsuite/gcc.target/riscv/xor-synthesis-3.c
@@ -0,0 +1,8 @@
+/* { dg-do compile { target { rv64 } } } */
+/* { dg-options "-march=rv64gb -mabi=lp64d" } */
+
+unsigned long foo(unsigned long src) { return src ^ 0x8c00000000000001; }
+
+/* { dg-final { scan-assembler-times "\\srori\t" 2 } } */
+/* { dg-final { scan-assembler-times "\\sxori\t" 1 } } */
+
diff --git a/gcc/testsuite/gfortran.dg/guality/pr120193.f90 b/gcc/testsuite/gfortran.dg/guality/pr120193.f90
new file mode 100644
index 0000000..e65febf
--- /dev/null
+++ b/gcc/testsuite/gfortran.dg/guality/pr120193.f90
@@ -0,0 +1,26 @@
+! PR fortran/120193
+! { dg-do run }
+! { dg-options "-g -funsigned" }
+! { dg-skip-if "" { *-*-* } { "*" } { "-O0" } }
+
+program foo
+ unsigned(kind=1) :: a(2), e
+ unsigned(kind=2) :: b(2), f
+ unsigned(kind=4) :: c(2), g
+ unsigned(kind=8) :: d(2), h
+ character(kind=1, len=1) :: i(2), j
+ character(kind=4, len=1) :: k(2), l
+ a = 97u_1 ! { dg-final { gdb-test 24 "a" "d" } }
+ b = 97u_2 ! { dg-final { gdb-test 24 "b" "c" } }
+ c = 97u_4 ! { dg-final { gdb-test 24 "c" "b" } }
+ d = 97u_8 ! { dg-final { gdb-test 24 "d" "a" } }
+ e = 97u_1 ! { dg-final { gdb-test 24 "e" "97" } }
+ f = 97u_2 ! { dg-final { gdb-test 24 "f" "97" } }
+ g = 97u_4 ! { dg-final { gdb-test 24 "g" "97" } }
+ h = 97u_8 ! { dg-final { gdb-test 24 "h" "97" } }
+ i = 'a' ! { dg-final { gdb-test 24 "i" "('a', 'a')" } }
+ j = 'b' ! { dg-final { gdb-test 24 "j" "'b'" } }
+ k = 'c'
+ l = 'd'
+ print *, a
+end program
diff --git a/gcc/testsuite/gfortran.dg/transfer_array_subref.f90 b/gcc/testsuite/gfortran.dg/transfer_array_subref.f90
new file mode 100644
index 0000000..b480dff
--- /dev/null
+++ b/gcc/testsuite/gfortran.dg/transfer_array_subref.f90
@@ -0,0 +1,48 @@
+! { dg-do run }
+! { dg-additional-options "-O2 -fdump-tree-optimized" }
+!
+! PR fortran/102891 - passing of inquiry ref of complex array to TRANSFER
+
+program main
+ implicit none
+ integer, parameter :: dp = 8
+
+ type complex_wrap1
+ complex(dp) :: z(2)
+ end type complex_wrap1
+
+ type complex_wrap2
+ complex(dp), dimension(:), allocatable :: z
+ end type complex_wrap2
+
+ type(complex_wrap1) :: x = complex_wrap1([ (1, 2), (3, 4) ])
+ type(complex_wrap2) :: w
+
+ w%z = x%z
+
+ ! The following statements should get optimized away...
+ if (size (transfer ( x%z%re ,[1.0_dp])) /= 2) error stop 1
+ if (size (transfer ((x%z%re),[1.0_dp])) /= 2) error stop 2
+ if (size (transfer ([x%z%re],[1.0_dp])) /= 2) error stop 3
+ if (size (transfer ( x%z%im ,[1.0_dp])) /= 2) error stop 4
+ if (size (transfer ((x%z%im),[1.0_dp])) /= 2) error stop 5
+ if (size (transfer ([x%z%im],[1.0_dp])) /= 2) error stop 6
+
+ ! ... while the following may not:
+ if (any (transfer ( x%z%re ,[1.0_dp]) /= x%z%re)) stop 7
+ if (any (transfer ( x%z%im ,[1.0_dp]) /= x%z%im)) stop 8
+
+ if (size (transfer ( w%z%re ,[1.0_dp])) /= 2) stop 11
+ if (size (transfer ((w%z%re),[1.0_dp])) /= 2) stop 12
+ if (size (transfer ([w%z%re],[1.0_dp])) /= 2) stop 13
+ if (size (transfer ( w%z%im ,[1.0_dp])) /= 2) stop 14
+ if (size (transfer ((w%z%im),[1.0_dp])) /= 2) stop 15
+ if (size (transfer ([w%z%im],[1.0_dp])) /= 2) stop 16
+
+ if (any (transfer ( w%z%re ,[1.0_dp]) /= x%z%re)) stop 17
+ if (any (transfer ( w%z%im ,[1.0_dp]) /= x%z%im)) stop 18
+
+ deallocate (w%z)
+end program main
+
+! { dg-final { scan-tree-dump-not "_gfortran_error_stop_numeric" "optimized" } }
diff --git a/gcc/tree-switch-conversion.cc b/gcc/tree-switch-conversion.cc
index dea217a..bd4de96 100644
--- a/gcc/tree-switch-conversion.cc
+++ b/gcc/tree-switch-conversion.cc
@@ -1793,12 +1793,14 @@ bit_test_cluster::find_bit_tests (vec<cluster *> &clusters, int max_c)
end up with as few clusters as possible. */
unsigned l = clusters.length ();
- auto_vec<min_cluster_item> min;
- min.reserve (l + 1);
- gcc_checking_assert (l > 0);
+ if (l == 0)
+ return clusters.copy ();
gcc_checking_assert (l <= INT_MAX);
+ auto_vec<min_cluster_item> min;
+ min.reserve (l + 1);
+
int bits_in_word = GET_MODE_BITSIZE (word_mode);
/* First phase: Compute the minimum number of clusters for each prefix of the