aboutsummaryrefslogtreecommitdiff
path: root/gcc
diff options
context:
space:
mode:
Diffstat (limited to 'gcc')
-rw-r--r--gcc/ChangeLog16
-rw-r--r--gcc/config/i386/i386.c59
-rw-r--r--gcc/config/i386/i386.h59
-rw-r--r--gcc/config/i386/x86-tune-costs.h1238
4 files changed, 835 insertions, 537 deletions
diff --git a/gcc/ChangeLog b/gcc/ChangeLog
index eba58f6..aa295f0 100644
--- a/gcc/ChangeLog
+++ b/gcc/ChangeLog
@@ -1,3 +1,19 @@
+2019-08-15 H.J. Lu <hongjiu.lu@intel.com>
+
+ PR target/90878
+ * config/i386/i386.c (inline_memory_move_cost): Use hard_register
+ for costs of hard register moves.
+ (ix86_register_move_cost): Likewise.
+ * config/i386/i386.h (processor_costs): Move costs of hard
+ register moves to hard_register. Add int_load, int_store,
+ xmm_move, ymm_move, zmm_move, sse_to_integer, integer_to_sse,
+ sse_load, sse_store, sse_unaligned_load and sse_unaligned_store
+ for costs of RTL expressions.
+ * config/i386/x86-tune-costs.h: Move costs of hard register
+ moves to hard_register. Duplicate int_load, int_store,
+ xmm_move, ymm_move, zmm_move, sse_to_integer, integer_to_sse,
+ sse_load, sse_store for costs of RTL expressions.
+
2019-08-15 Richard Sandiford <richard.sandiford@arm.com>
* target.def (setup_incoming_vararg_bounds): Remove.
diff --git a/gcc/config/i386/i386.c b/gcc/config/i386/i386.c
index 5eb625c..647bcbe 100644
--- a/gcc/config/i386/i386.c
+++ b/gcc/config/i386/i386.c
@@ -18464,8 +18464,10 @@ inline_memory_move_cost (machine_mode mode, enum reg_class regclass, int in)
return 100;
}
if (in == 2)
- return MAX (ix86_cost->fp_load [index], ix86_cost->fp_store [index]);
- return in ? ix86_cost->fp_load [index] : ix86_cost->fp_store [index];
+ return MAX (ix86_cost->hard_register.fp_load [index],
+ ix86_cost->hard_register.fp_store [index]);
+ return in ? ix86_cost->hard_register.fp_load [index]
+ : ix86_cost->hard_register.fp_store [index];
}
if (SSE_CLASS_P (regclass))
{
@@ -18473,8 +18475,10 @@ inline_memory_move_cost (machine_mode mode, enum reg_class regclass, int in)
if (index == -1)
return 100;
if (in == 2)
- return MAX (ix86_cost->sse_load [index], ix86_cost->sse_store [index]);
- return in ? ix86_cost->sse_load [index] : ix86_cost->sse_store [index];
+ return MAX (ix86_cost->hard_register.sse_load [index],
+ ix86_cost->hard_register.sse_store [index]);
+ return in ? ix86_cost->hard_register.sse_load [index]
+ : ix86_cost->hard_register.sse_store [index];
}
if (MMX_CLASS_P (regclass))
{
@@ -18491,8 +18495,10 @@ inline_memory_move_cost (machine_mode mode, enum reg_class regclass, int in)
return 100;
}
if (in == 2)
- return MAX (ix86_cost->mmx_load [index], ix86_cost->mmx_store [index]);
- return in ? ix86_cost->mmx_load [index] : ix86_cost->mmx_store [index];
+ return MAX (ix86_cost->hard_register.mmx_load [index],
+ ix86_cost->hard_register.mmx_store [index]);
+ return in ? ix86_cost->hard_register.mmx_load [index]
+ : ix86_cost->hard_register.mmx_store [index];
}
switch (GET_MODE_SIZE (mode))
{
@@ -18500,37 +18506,41 @@ inline_memory_move_cost (machine_mode mode, enum reg_class regclass, int in)
if (Q_CLASS_P (regclass) || TARGET_64BIT)
{
if (!in)
- return ix86_cost->int_store[0];
+ return ix86_cost->hard_register.int_store[0];
if (TARGET_PARTIAL_REG_DEPENDENCY
&& optimize_function_for_speed_p (cfun))
- cost = ix86_cost->movzbl_load;
+ cost = ix86_cost->hard_register.movzbl_load;
else
- cost = ix86_cost->int_load[0];
+ cost = ix86_cost->hard_register.int_load[0];
if (in == 2)
- return MAX (cost, ix86_cost->int_store[0]);
+ return MAX (cost, ix86_cost->hard_register.int_store[0]);
return cost;
}
else
{
if (in == 2)
- return MAX (ix86_cost->movzbl_load, ix86_cost->int_store[0] + 4);
+ return MAX (ix86_cost->hard_register.movzbl_load,
+ ix86_cost->hard_register.int_store[0] + 4);
if (in)
- return ix86_cost->movzbl_load;
+ return ix86_cost->hard_register.movzbl_load;
else
- return ix86_cost->int_store[0] + 4;
+ return ix86_cost->hard_register.int_store[0] + 4;
}
break;
case 2:
if (in == 2)
- return MAX (ix86_cost->int_load[1], ix86_cost->int_store[1]);
- return in ? ix86_cost->int_load[1] : ix86_cost->int_store[1];
+ return MAX (ix86_cost->hard_register.int_load[1],
+ ix86_cost->hard_register.int_store[1]);
+ return in ? ix86_cost->hard_register.int_load[1]
+ : ix86_cost->hard_register.int_store[1];
default:
if (in == 2)
- cost = MAX (ix86_cost->int_load[2], ix86_cost->int_store[2]);
+ cost = MAX (ix86_cost->hard_register.int_load[2],
+ ix86_cost->hard_register.int_store[2]);
else if (in)
- cost = ix86_cost->int_load[2];
+ cost = ix86_cost->hard_register.int_load[2];
else
- cost = ix86_cost->int_store[2];
+ cost = ix86_cost->hard_register.int_store[2];
/* Multiply with the number of GPR moves needed. */
return cost * CEIL ((int) GET_MODE_SIZE (mode), UNITS_PER_WORD);
}
@@ -18600,20 +18610,21 @@ ix86_register_move_cost (machine_mode mode, reg_class_t class1_i,
because of missing QImode and HImode moves to, from or between
MMX/SSE registers. */
return MAX (8, SSE_CLASS_P (class1)
- ? ix86_cost->sse_to_integer : ix86_cost->integer_to_sse);
+ ? ix86_cost->hard_register.sse_to_integer
+ : ix86_cost->hard_register.integer_to_sse);
if (MAYBE_FLOAT_CLASS_P (class1))
- return ix86_cost->fp_move;
+ return ix86_cost->hard_register.fp_move;
if (MAYBE_SSE_CLASS_P (class1))
{
if (GET_MODE_BITSIZE (mode) <= 128)
- return ix86_cost->xmm_move;
+ return ix86_cost->hard_register.xmm_move;
if (GET_MODE_BITSIZE (mode) <= 256)
- return ix86_cost->ymm_move;
- return ix86_cost->zmm_move;
+ return ix86_cost->hard_register.ymm_move;
+ return ix86_cost->hard_register.zmm_move;
}
if (MAYBE_MMX_CLASS_P (class1))
- return ix86_cost->mmx_move;
+ return ix86_cost->hard_register.mmx_move;
return 2;
}
diff --git a/gcc/config/i386/i386.h b/gcc/config/i386/i386.h
index 800d7c4..e0a77e1 100644
--- a/gcc/config/i386/i386.h
+++ b/gcc/config/i386/i386.h
@@ -237,9 +237,46 @@ struct stringop_algs
} size [MAX_STRINGOP_ALGS];
};
-/* Define the specific costs for a given cpu */
+/* Define the specific costs for a given cpu. NB: hard_register is used
+ by TARGET_REGISTER_MOVE_COST and TARGET_MEMORY_MOVE_COST to compute
+ hard register move costs by register allocator. Relative costs of
+ pseudo register load and store versus pseudo register moves in RTL
+ expressions for TARGET_RTX_COSTS can be different from relative
+ costs of hard registers to get the most efficient operations with
+ pseudo registers. */
struct processor_costs {
+ /* Costs used by register allocator. integer->integer register move
+ cost is 2. */
+ struct
+ {
+ const int movzbl_load; /* cost of loading using movzbl */
+ const int int_load[3]; /* cost of loading integer registers
+ in QImode, HImode and SImode relative
+ to reg-reg move (2). */
+ const int int_store[3]; /* cost of storing integer register
+ in QImode, HImode and SImode */
+ const int fp_move; /* cost of reg,reg fld/fst */
+ const int fp_load[3]; /* cost of loading FP register
+ in SFmode, DFmode and XFmode */
+ const int fp_store[3]; /* cost of storing FP register
+ in SFmode, DFmode and XFmode */
+ const int mmx_move; /* cost of moving MMX register. */
+ const int mmx_load[2]; /* cost of loading MMX register
+ in SImode and DImode */
+ const int mmx_store[2]; /* cost of storing MMX register
+ in SImode and DImode */
+ const int xmm_move; /* cost of moving XMM register. */
+ const int ymm_move; /* cost of moving XMM register. */
+ const int zmm_move; /* cost of moving XMM register. */
+ const int sse_load[5]; /* cost of loading SSE register
+ in 32bit, 64bit, 128bit, 256bit and 512bit */
+ const int sse_store[5]; /* cost of storing SSE register
+ in SImode, DImode and TImode. */
+ const int sse_to_integer; /* cost of moving SSE register to integer. */
+ const int integer_to_sse; /* cost of moving integer register to SSE. */
+ } hard_register;
+
const int add; /* cost of an add instruction */
const int lea; /* cost of a lea instruction */
const int shift_var; /* variable shift costs */
@@ -254,32 +291,20 @@ struct processor_costs {
const int large_insn; /* insns larger than this cost more */
const int move_ratio; /* The threshold of number of scalar
memory-to-memory move insns. */
- const int movzbl_load; /* cost of loading using movzbl */
const int int_load[3]; /* cost of loading integer registers
in QImode, HImode and SImode relative
to reg-reg move (2). */
const int int_store[3]; /* cost of storing integer register
in QImode, HImode and SImode */
- const int fp_move; /* cost of reg,reg fld/fst */
- const int fp_load[3]; /* cost of loading FP register
- in SFmode, DFmode and XFmode */
- const int fp_store[3]; /* cost of storing FP register
- in SFmode, DFmode and XFmode */
- const int mmx_move; /* cost of moving MMX register. */
- const int mmx_load[2]; /* cost of loading MMX register
- in SImode and DImode */
- const int mmx_store[2]; /* cost of storing MMX register
- in SImode and DImode */
- const int xmm_move, ymm_move, /* cost of moving XMM and YMM register. */
- zmm_move;
const int sse_load[5]; /* cost of loading SSE register
in 32bit, 64bit, 128bit, 256bit and 512bit */
- const int sse_unaligned_load[5];/* cost of unaligned load. */
const int sse_store[5]; /* cost of storing SSE register
- in SImode, DImode and TImode. */
+ in 32bit, 64bit, 128bit, 256bit and 512bit */
+ const int sse_unaligned_load[5];/* cost of unaligned load. */
const int sse_unaligned_store[5];/* cost of unaligned store. */
+ const int xmm_move, ymm_move, /* cost of moving XMM and YMM register. */
+ zmm_move;
const int sse_to_integer; /* cost of moving SSE register to integer. */
- const int integer_to_sse; /* cost of moving integer register to SSE. */
const int gather_static, gather_per_elt; /* Cost of gather load is computed
as static + per_item * nelts. */
const int scatter_static, scatter_per_elt; /* Cost of gather store is
diff --git a/gcc/config/i386/x86-tune-costs.h b/gcc/config/i386/x86-tune-costs.h
index 8b963c0..ad9ea4b 100644
--- a/gcc/config/i386/x86-tune-costs.h
+++ b/gcc/config/i386/x86-tune-costs.h
@@ -36,6 +36,30 @@ static stringop_algs ix86_size_memset[2] = {
const
struct processor_costs ix86_size_cost = {/* costs for tuning for size */
+ /* Start of register allocator costs. integer->integer move cost is 2. */
+ 2, /* cost for loading QImode using movzbl */
+ {2, 2, 2}, /* cost of loading integer registers
+ in QImode, HImode and SImode.
+ Relative to reg-reg move (2). */
+ {2, 2, 2}, /* cost of storing integer registers */
+ 2, /* cost of reg,reg fld/fst */
+ {2, 2, 2}, /* cost of loading fp registers
+ in SFmode, DFmode and XFmode */
+ {2, 2, 2}, /* cost of storing fp registers
+ in SFmode, DFmode and XFmode */
+ 3, /* cost of moving MMX register */
+ {3, 3}, /* cost of loading MMX registers
+ in SImode and DImode */
+ {3, 3}, /* cost of storing MMX registers
+ in SImode and DImode */
+ 3, 3, 3, /* cost of moving XMM,YMM,ZMM register */
+ {3, 3, 3, 3, 3}, /* cost of loading SSE registers
+ in 32,64,128,256 and 512-bit */
+ {3, 3, 3, 3, 3}, /* cost of storing SSE registers
+ in 32,64,128,256 and 512-bit */
+ 3, 3, /* SSE->integer and integer->SSE moves */
+ /* End of register allocator costs. */
+
COSTS_N_BYTES (2), /* cost of an add instruction */
COSTS_N_BYTES (3), /* cost of a lea instruction */
COSTS_N_BYTES (2), /* variable shift costs */
@@ -55,33 +79,20 @@ struct processor_costs ix86_size_cost = {/* costs for tuning for size */
COSTS_N_BYTES (3), /* cost of movzx */
0, /* "large" insn */
2, /* MOVE_RATIO */
-
- /* All move costs are relative to integer->integer move times 2. */
- 2, /* cost for loading QImode using movzbl */
{2, 2, 2}, /* cost of loading integer registers
in QImode, HImode and SImode.
Relative to reg-reg move (2). */
{2, 2, 2}, /* cost of storing integer registers */
- 2, /* cost of reg,reg fld/fst */
- {2, 2, 2}, /* cost of loading fp registers
- in SFmode, DFmode and XFmode */
- {2, 2, 2}, /* cost of storing fp registers
- in SFmode, DFmode and XFmode */
- 3, /* cost of moving MMX register */
- {3, 3}, /* cost of loading MMX registers
- in SImode and DImode */
- {3, 3}, /* cost of storing MMX registers
- in SImode and DImode */
- 3, 3, 3, /* cost of moving XMM,YMM,ZMM register */
- {3, 3, 3, 3, 3}, /* cost of loading SSE registers
- in 32,64,128,256 and 512-bit */
+ {3, 3, 3, 3, 3}, /* cost of loading SSE register
+ in 32bit, 64bit, 128bit, 256bit and 512bit */
+ {3, 3, 3, 3, 3}, /* cost of storing SSE register
+ in 32bit, 64bit, 128bit, 256bit and 512bit */
{3, 3, 3, 3, 3}, /* cost of unaligned SSE load
in 128bit, 256bit and 512bit */
- {3, 3, 3, 3, 3}, /* cost of storing SSE registers
- in 32,64,128,256 and 512-bit */
- {3, 3, 3, 3, 3}, /* cost of unaligned SSE store
+ {3, 3, 3, 3, 3}, /* cost of unaligned SSE store
in 128bit, 256bit and 512bit */
- 3, 3, /* SSE->integer and integer->SSE moves */
+ 3, 3, 3, /* cost of moving XMM,YMM,ZMM register */
+ 3, /* cost of moving SSE register to integer. */
5, 0, /* Gather load static, per_elt. */
5, 0, /* Gather store static, per_elt. */
0, /* size of l1 cache */
@@ -127,6 +138,30 @@ static stringop_algs i386_memset[2] = {
static const
struct processor_costs i386_cost = { /* 386 specific costs */
+ /* Start of register allocator costs. integer->integer move cost is 2. */
+ 4, /* cost for loading QImode using movzbl */
+ {2, 4, 2}, /* cost of loading integer registers
+ in QImode, HImode and SImode.
+ Relative to reg-reg move (2). */
+ {2, 4, 2}, /* cost of storing integer registers */
+ 2, /* cost of reg,reg fld/fst */
+ {8, 8, 8}, /* cost of loading fp registers
+ in SFmode, DFmode and XFmode */
+ {8, 8, 8}, /* cost of storing fp registers
+ in SFmode, DFmode and XFmode */
+ 2, /* cost of moving MMX register */
+ {4, 8}, /* cost of loading MMX registers
+ in SImode and DImode */
+ {4, 8}, /* cost of storing MMX registers
+ in SImode and DImode */
+ 2, 4, 8, /* cost of moving XMM,YMM,ZMM register */
+ {4, 8, 16, 32, 64}, /* cost of loading SSE registers
+ in 32,64,128,256 and 512-bit */
+ {4, 8, 16, 32, 64}, /* cost of storing SSE registers
+ in 32,64,128,256 and 512-bit */
+ 3, 3, /* SSE->integer and integer->SSE moves */
+ /* End of register allocator costs. */
+
COSTS_N_INSNS (1), /* cost of an add instruction */
COSTS_N_INSNS (1), /* cost of a lea instruction */
COSTS_N_INSNS (3), /* variable shift costs */
@@ -146,32 +181,18 @@ struct processor_costs i386_cost = { /* 386 specific costs */
COSTS_N_INSNS (2), /* cost of movzx */
15, /* "large" insn */
3, /* MOVE_RATIO */
-
- /* All move costs are relative to integer->integer move times 2 and thus
- they are latency*2. */
- 4, /* cost for loading QImode using movzbl */
{2, 4, 2}, /* cost of loading integer registers
in QImode, HImode and SImode.
Relative to reg-reg move (2). */
{2, 4, 2}, /* cost of storing integer registers */
- 2, /* cost of reg,reg fld/fst */
- {8, 8, 8}, /* cost of loading fp registers
- in SFmode, DFmode and XFmode */
- {8, 8, 8}, /* cost of storing fp registers
- in SFmode, DFmode and XFmode */
- 2, /* cost of moving MMX register */
- {4, 8}, /* cost of loading MMX registers
- in SImode and DImode */
- {4, 8}, /* cost of storing MMX registers
- in SImode and DImode */
- 2, 4, 8, /* cost of moving XMM,YMM,ZMM register */
- {4, 8, 16, 32, 64}, /* cost of loading SSE registers
- in 32,64,128,256 and 512-bit */
+ {4, 8, 16, 32, 64}, /* cost of loading SSE register
+ in 32bit, 64bit, 128bit, 256bit and 512bit */
+ {4, 8, 16, 32, 64}, /* cost of storing SSE register
+ in 32bit, 64bit, 128bit, 256bit and 512bit */
{4, 8, 16, 32, 64}, /* cost of unaligned loads. */
- {4, 8, 16, 32, 64}, /* cost of storing SSE registers
- in 32,64,128,256 and 512-bit */
{4, 8, 16, 32, 64}, /* cost of unaligned stores. */
- 3, 3, /* SSE->integer and integer->SSE moves */
+ 2, 4, 8, /* cost of moving XMM,YMM,ZMM register */
+ 3, /* cost of moving SSE register to integer. */
4, 4, /* Gather load static, per_elt. */
4, 4, /* Gather store static, per_elt. */
0, /* size of l1 cache */
@@ -216,6 +237,30 @@ static stringop_algs i486_memset[2] = {
static const
struct processor_costs i486_cost = { /* 486 specific costs */
+ /* Start of register allocator costs. integer->integer move cost is 2. */
+ 4, /* cost for loading QImode using movzbl */
+ {2, 4, 2}, /* cost of loading integer registers
+ in QImode, HImode and SImode.
+ Relative to reg-reg move (2). */
+ {2, 4, 2}, /* cost of storing integer registers */
+ 2, /* cost of reg,reg fld/fst */
+ {8, 8, 8}, /* cost of loading fp registers
+ in SFmode, DFmode and XFmode */
+ {8, 8, 8}, /* cost of storing fp registers
+ in SFmode, DFmode and XFmode */
+ 2, /* cost of moving MMX register */
+ {4, 8}, /* cost of loading MMX registers
+ in SImode and DImode */
+ {4, 8}, /* cost of storing MMX registers
+ in SImode and DImode */
+ 2, 4, 8, /* cost of moving XMM,YMM,ZMM register */
+ {4, 8, 16, 32, 64}, /* cost of loading SSE registers
+ in 32,64,128,256 and 512-bit */
+ {4, 8, 16, 32, 64}, /* cost of storing SSE registers
+ in 32,64,128,256 and 512-bit */
+ 3, 3, /* SSE->integer and integer->SSE moves */
+ /* End of register allocator costs. */
+
COSTS_N_INSNS (1), /* cost of an add instruction */
COSTS_N_INSNS (1), /* cost of a lea instruction */
COSTS_N_INSNS (3), /* variable shift costs */
@@ -235,32 +280,18 @@ struct processor_costs i486_cost = { /* 486 specific costs */
COSTS_N_INSNS (2), /* cost of movzx */
15, /* "large" insn */
3, /* MOVE_RATIO */
-
- /* All move costs are relative to integer->integer move times 2 and thus
- they are latency*2. */
- 4, /* cost for loading QImode using movzbl */
{2, 4, 2}, /* cost of loading integer registers
in QImode, HImode and SImode.
Relative to reg-reg move (2). */
{2, 4, 2}, /* cost of storing integer registers */
- 2, /* cost of reg,reg fld/fst */
- {8, 8, 8}, /* cost of loading fp registers
- in SFmode, DFmode and XFmode */
- {8, 8, 8}, /* cost of storing fp registers
- in SFmode, DFmode and XFmode */
- 2, /* cost of moving MMX register */
- {4, 8}, /* cost of loading MMX registers
- in SImode and DImode */
- {4, 8}, /* cost of storing MMX registers
- in SImode and DImode */
- 2, 4, 8, /* cost of moving XMM,YMM,ZMM register */
- {4, 8, 16, 32, 64}, /* cost of loading SSE registers
- in 32,64,128,256 and 512-bit */
+ {4, 8, 16, 32, 64}, /* cost of loading SSE register
+ in 32bit, 64bit, 128bit, 256bit and 512bit */
+ {4, 8, 16, 32, 64}, /* cost of storing SSE register
+ in 32bit, 64bit, 128bit, 256bit and 512bit */
{4, 8, 16, 32, 64}, /* cost of unaligned loads. */
- {4, 8, 16, 32, 64}, /* cost of storing SSE registers
- in 32,64,128,256 and 512-bit */
{4, 8, 16, 32, 64}, /* cost of unaligned stores. */
- 3, 3, /* SSE->integer and integer->SSE moves */
+ 2, 4, 8, /* cost of moving XMM,YMM,ZMM register */
+ 3, /* cost of moving SSE register to integer. */
4, 4, /* Gather load static, per_elt. */
4, 4, /* Gather store static, per_elt. */
4, /* size of l1 cache. 486 has 8kB cache
@@ -307,6 +338,30 @@ static stringop_algs pentium_memset[2] = {
static const
struct processor_costs pentium_cost = {
+ /* Start of register allocator costs. integer->integer move cost is 2. */
+ 6, /* cost for loading QImode using movzbl */
+ {2, 4, 2}, /* cost of loading integer registers
+ in QImode, HImode and SImode.
+ Relative to reg-reg move (2). */
+ {2, 4, 2}, /* cost of storing integer registers */
+ 2, /* cost of reg,reg fld/fst */
+ {2, 2, 6}, /* cost of loading fp registers
+ in SFmode, DFmode and XFmode */
+ {4, 4, 6}, /* cost of storing fp registers
+ in SFmode, DFmode and XFmode */
+ 8, /* cost of moving MMX register */
+ {8, 8}, /* cost of loading MMX registers
+ in SImode and DImode */
+ {8, 8}, /* cost of storing MMX registers
+ in SImode and DImode */
+ 2, 4, 8, /* cost of moving XMM,YMM,ZMM register */
+ {4, 8, 16, 32, 64}, /* cost of loading SSE registers
+ in 32,64,128,256 and 512-bit */
+ {4, 8, 16, 32, 64}, /* cost of storing SSE registers
+ in 32,64,128,256 and 512-bit */
+ 3, 3, /* SSE->integer and integer->SSE moves */
+ /* End of register allocator costs. */
+
COSTS_N_INSNS (1), /* cost of an add instruction */
COSTS_N_INSNS (1), /* cost of a lea instruction */
COSTS_N_INSNS (4), /* variable shift costs */
@@ -326,32 +381,18 @@ struct processor_costs pentium_cost = {
COSTS_N_INSNS (2), /* cost of movzx */
8, /* "large" insn */
6, /* MOVE_RATIO */
-
- /* All move costs are relative to integer->integer move times 2 and thus
- they are latency*2. */
- 6, /* cost for loading QImode using movzbl */
{2, 4, 2}, /* cost of loading integer registers
in QImode, HImode and SImode.
Relative to reg-reg move (2). */
{2, 4, 2}, /* cost of storing integer registers */
- 2, /* cost of reg,reg fld/fst */
- {2, 2, 6}, /* cost of loading fp registers
- in SFmode, DFmode and XFmode */
- {4, 4, 6}, /* cost of storing fp registers
- in SFmode, DFmode and XFmode */
- 8, /* cost of moving MMX register */
- {8, 8}, /* cost of loading MMX registers
- in SImode and DImode */
- {8, 8}, /* cost of storing MMX registers
- in SImode and DImode */
- 2, 4, 8, /* cost of moving XMM,YMM,ZMM register */
- {4, 8, 16, 32, 64}, /* cost of loading SSE registers
- in 32,64,128,256 and 512-bit */
+ {4, 8, 16, 32, 64}, /* cost of loading SSE register
+ in 32bit, 64bit, 128bit, 256bit and 512bit */
+ {4, 8, 16, 32, 64}, /* cost of storing SSE register
+ in 32bit, 64bit, 128bit, 256bit and 512bit */
{4, 8, 16, 32, 64}, /* cost of unaligned loads. */
- {4, 8, 16, 32, 64}, /* cost of storing SSE registers
- in 32,64,128,256 and 512-bit */
{4, 8, 16, 32, 64}, /* cost of unaligned stores. */
- 3, 3, /* SSE->integer and integer->SSE moves */
+ 2, 4, 8, /* cost of moving XMM,YMM,ZMM register */
+ 3, /* cost of moving SSE register to integer. */
4, 4, /* Gather load static, per_elt. */
4, 4, /* Gather store static, per_elt. */
8, /* size of l1 cache. */
@@ -389,6 +430,30 @@ struct processor_costs pentium_cost = {
static const
struct processor_costs lakemont_cost = {
+ /* Start of register allocator costs. integer->integer move cost is 2. */
+ 6, /* cost for loading QImode using movzbl */
+ {2, 4, 2}, /* cost of loading integer registers
+ in QImode, HImode and SImode.
+ Relative to reg-reg move (2). */
+ {2, 4, 2}, /* cost of storing integer registers */
+ 2, /* cost of reg,reg fld/fst */
+ {2, 2, 6}, /* cost of loading fp registers
+ in SFmode, DFmode and XFmode */
+ {4, 4, 6}, /* cost of storing fp registers
+ in SFmode, DFmode and XFmode */
+ 8, /* cost of moving MMX register */
+ {8, 8}, /* cost of loading MMX registers
+ in SImode and DImode */
+ {8, 8}, /* cost of storing MMX registers
+ in SImode and DImode */
+ 2, 4, 8, /* cost of moving XMM,YMM,ZMM register */
+ {4, 8, 16, 32, 64}, /* cost of loading SSE registers
+ in 32,64,128,256 and 512-bit */
+ {4, 8, 16, 32, 64}, /* cost of storing SSE registers
+ in 32,64,128,256 and 512-bit */
+ 3, 3, /* SSE->integer and integer->SSE moves */
+ /* End of register allocator costs. */
+
COSTS_N_INSNS (1), /* cost of an add instruction */
COSTS_N_INSNS (1) + 1, /* cost of a lea instruction */
COSTS_N_INSNS (1), /* variable shift costs */
@@ -408,32 +473,18 @@ struct processor_costs lakemont_cost = {
COSTS_N_INSNS (2), /* cost of movzx */
8, /* "large" insn */
17, /* MOVE_RATIO */
-
- /* All move costs are relative to integer->integer move times 2 and thus
- they are latency*2. */
- 6, /* cost for loading QImode using movzbl */
{2, 4, 2}, /* cost of loading integer registers
in QImode, HImode and SImode.
Relative to reg-reg move (2). */
{2, 4, 2}, /* cost of storing integer registers */
- 2, /* cost of reg,reg fld/fst */
- {2, 2, 6}, /* cost of loading fp registers
- in SFmode, DFmode and XFmode */
- {4, 4, 6}, /* cost of storing fp registers
- in SFmode, DFmode and XFmode */
- 8, /* cost of moving MMX register */
- {8, 8}, /* cost of loading MMX registers
- in SImode and DImode */
- {8, 8}, /* cost of storing MMX registers
- in SImode and DImode */
- 2, 4, 8, /* cost of moving XMM,YMM,ZMM register */
- {4, 8, 16, 32, 64}, /* cost of loading SSE registers
- in 32,64,128,256 and 512-bit */
+ {4, 8, 16, 32, 64}, /* cost of loading SSE register
+ in 32bit, 64bit, 128bit, 256bit and 512bit */
+ {4, 8, 16, 32, 64}, /* cost of storing SSE register
+ in 32bit, 64bit, 128bit, 256bit and 512bit */
{4, 8, 16, 32, 64}, /* cost of unaligned loads. */
- {4, 8, 16, 32, 64}, /* cost of storing SSE registers
- in 32,64,128,256 and 512-bit */
{4, 8, 16, 32, 64}, /* cost of unaligned stores. */
- 3, 3, /* SSE->integer and integer->SSE moves */
+ 2, 4, 8, /* cost of moving XMM,YMM,ZMM register */
+ 3, /* cost of moving SSE register to integer. */
4, 4, /* Gather load static, per_elt. */
4, 4, /* Gather store static, per_elt. */
8, /* size of l1 cache. */
@@ -486,6 +537,30 @@ static stringop_algs pentiumpro_memset[2] = {
DUMMY_STRINGOP_ALGS};
static const
struct processor_costs pentiumpro_cost = {
+ /* Start of register allocator costs. integer->integer move cost is 2. */
+ 2, /* cost for loading QImode using movzbl */
+ {4, 4, 4}, /* cost of loading integer registers
+ in QImode, HImode and SImode.
+ Relative to reg-reg move (2). */
+ {2, 2, 2}, /* cost of storing integer registers */
+ 2, /* cost of reg,reg fld/fst */
+ {2, 2, 6}, /* cost of loading fp registers
+ in SFmode, DFmode and XFmode */
+ {4, 4, 6}, /* cost of storing fp registers
+ in SFmode, DFmode and XFmode */
+ 2, /* cost of moving MMX register */
+ {2, 2}, /* cost of loading MMX registers
+ in SImode and DImode */
+ {2, 2}, /* cost of storing MMX registers
+ in SImode and DImode */
+ 2, 4, 8, /* cost of moving XMM,YMM,ZMM register */
+ {4, 8, 16, 32, 64}, /* cost of loading SSE registers
+ in 32,64,128,256 and 512-bit */
+ {4, 8, 16, 32, 64}, /* cost of storing SSE registers
+ in 32,64,128,256 and 512-bit */
+ 3, 3, /* SSE->integer and integer->SSE moves */
+ /* End of register allocator costs. */
+
COSTS_N_INSNS (1), /* cost of an add instruction */
COSTS_N_INSNS (1), /* cost of a lea instruction */
COSTS_N_INSNS (1), /* variable shift costs */
@@ -505,32 +580,18 @@ struct processor_costs pentiumpro_cost = {
COSTS_N_INSNS (1), /* cost of movzx */
8, /* "large" insn */
6, /* MOVE_RATIO */
-
- /* All move costs are relative to integer->integer move times 2 and thus
- they are latency*2. */
- 2, /* cost for loading QImode using movzbl */
{4, 4, 4}, /* cost of loading integer registers
in QImode, HImode and SImode.
Relative to reg-reg move (2). */
{2, 2, 2}, /* cost of storing integer registers */
- 2, /* cost of reg,reg fld/fst */
- {2, 2, 6}, /* cost of loading fp registers
- in SFmode, DFmode and XFmode */
- {4, 4, 6}, /* cost of storing fp registers
- in SFmode, DFmode and XFmode */
- 2, /* cost of moving MMX register */
- {2, 2}, /* cost of loading MMX registers
- in SImode and DImode */
- {2, 2}, /* cost of storing MMX registers
- in SImode and DImode */
- 2, 4, 8, /* cost of moving XMM,YMM,ZMM register */
- {4, 8, 16, 32, 64}, /* cost of loading SSE registers
- in 32,64,128,256 and 512-bit */
+ {4, 8, 16, 32, 64}, /* cost of loading SSE register
+ in 32bit, 64bit, 128bit, 256bit and 512bit */
+ {4, 8, 16, 32, 64}, /* cost of storing SSE register
+ in 32bit, 64bit, 128bit, 256bit and 512bit */
{4, 8, 16, 32, 64}, /* cost of unaligned loads. */
- {4, 8, 16, 32, 64}, /* cost of storing SSE registers
- in 32,64,128,256 and 512-bit */
{4, 8, 16, 32, 64}, /* cost of unaligned stores. */
- 3, 3, /* SSE->integer and integer->SSE moves */
+ 2, 4, 8, /* cost of moving XMM,YMM,ZMM register */
+ 3, /* cost of moving SSE register to integer. */
4, 4, /* Gather load static, per_elt. */
4, 4, /* Gather store static, per_elt. */
8, /* size of l1 cache. */
@@ -574,6 +635,30 @@ static stringop_algs geode_memset[2] = {
DUMMY_STRINGOP_ALGS};
static const
struct processor_costs geode_cost = {
+ /* Start of register allocator costs. integer->integer move cost is 2. */
+ 2, /* cost for loading QImode using movzbl */
+ {2, 2, 2}, /* cost of loading integer registers
+ in QImode, HImode and SImode.
+ Relative to reg-reg move (2). */
+ {2, 2, 2}, /* cost of storing integer registers */
+ 2, /* cost of reg,reg fld/fst */
+ {2, 2, 2}, /* cost of loading fp registers
+ in SFmode, DFmode and XFmode */
+ {4, 6, 6}, /* cost of storing fp registers
+ in SFmode, DFmode and XFmode */
+ 2, /* cost of moving MMX register */
+ {2, 2}, /* cost of loading MMX registers
+ in SImode and DImode */
+ {2, 2}, /* cost of storing MMX registers
+ in SImode and DImode */
+ 2, 4, 8, /* cost of moving XMM,YMM,ZMM register */
+ {2, 2, 8, 16, 32}, /* cost of loading SSE registers
+ in 32,64,128,256 and 512-bit */
+ {2, 2, 8, 16, 32}, /* cost of storing SSE registers
+ in 32,64,128,256 and 512-bit */
+ 6, 6, /* SSE->integer and integer->SSE moves */
+ /* End of register allocator costs. */
+
COSTS_N_INSNS (1), /* cost of an add instruction */
COSTS_N_INSNS (1), /* cost of a lea instruction */
COSTS_N_INSNS (2), /* variable shift costs */
@@ -593,33 +678,18 @@ struct processor_costs geode_cost = {
COSTS_N_INSNS (1), /* cost of movzx */
8, /* "large" insn */
4, /* MOVE_RATIO */
-
- /* All move costs are relative to integer->integer move times 2 and thus
- they are latency*2. */
- 2, /* cost for loading QImode using movzbl */
{2, 2, 2}, /* cost of loading integer registers
in QImode, HImode and SImode.
Relative to reg-reg move (2). */
{2, 2, 2}, /* cost of storing integer registers */
- 2, /* cost of reg,reg fld/fst */
- {2, 2, 2}, /* cost of loading fp registers
- in SFmode, DFmode and XFmode */
- {4, 6, 6}, /* cost of storing fp registers
- in SFmode, DFmode and XFmode */
-
- 2, /* cost of moving MMX register */
- {2, 2}, /* cost of loading MMX registers
- in SImode and DImode */
- {2, 2}, /* cost of storing MMX registers
- in SImode and DImode */
- 2, 4, 8, /* cost of moving XMM,YMM,ZMM register */
- {2, 2, 8, 16, 32}, /* cost of loading SSE registers
- in 32,64,128,256 and 512-bit */
+ {2, 2, 8, 16, 32}, /* cost of loading SSE register
+ in 32bit, 64bit, 128bit, 256bit and 512bit */
+ {2, 2, 8, 16, 32}, /* cost of storing SSE register
+ in 32bit, 64bit, 128bit, 256bit and 512bit */
{2, 2, 8, 16, 32}, /* cost of unaligned loads. */
- {2, 2, 8, 16, 32}, /* cost of storing SSE registers
- in 32,64,128,256 and 512-bit */
{2, 2, 8, 16, 32}, /* cost of unaligned stores. */
- 6, 6, /* SSE->integer and integer->SSE moves */
+ 2, 4, 8, /* cost of moving XMM,YMM,ZMM register */
+ 6, /* cost of moving SSE register to integer. */
2, 2, /* Gather load static, per_elt. */
2, 2, /* Gather store static, per_elt. */
64, /* size of l1 cache. */
@@ -663,6 +733,30 @@ static stringop_algs k6_memset[2] = {
DUMMY_STRINGOP_ALGS};
static const
struct processor_costs k6_cost = {
+ /* Start of register allocator costs. integer->integer move cost is 2. */
+ 3, /* cost for loading QImode using movzbl */
+ {4, 5, 4}, /* cost of loading integer registers
+ in QImode, HImode and SImode.
+ Relative to reg-reg move (2). */
+ {2, 3, 2}, /* cost of storing integer registers */
+ 4, /* cost of reg,reg fld/fst */
+ {6, 6, 6}, /* cost of loading fp registers
+ in SFmode, DFmode and XFmode */
+ {4, 4, 4}, /* cost of storing fp registers
+ in SFmode, DFmode and XFmode */
+ 2, /* cost of moving MMX register */
+ {2, 2}, /* cost of loading MMX registers
+ in SImode and DImode */
+ {2, 2}, /* cost of storing MMX registers
+ in SImode and DImode */
+ 2, 4, 8, /* cost of moving XMM,YMM,ZMM register */
+ {2, 2, 8, 16, 32}, /* cost of loading SSE registers
+ in 32,64,128,256 and 512-bit */
+ {2, 2, 8, 16, 32}, /* cost of storing SSE registers
+ in 32,64,128,256 and 512-bit */
+ 6, 6, /* SSE->integer and integer->SSE moves */
+ /* End of register allocator costs. */
+
COSTS_N_INSNS (1), /* cost of an add instruction */
COSTS_N_INSNS (2), /* cost of a lea instruction */
COSTS_N_INSNS (1), /* variable shift costs */
@@ -682,32 +776,18 @@ struct processor_costs k6_cost = {
COSTS_N_INSNS (2), /* cost of movzx */
8, /* "large" insn */
4, /* MOVE_RATIO */
-
- /* All move costs are relative to integer->integer move times 2 and thus
- they are latency*2. */
- 3, /* cost for loading QImode using movzbl */
{4, 5, 4}, /* cost of loading integer registers
in QImode, HImode and SImode.
Relative to reg-reg move (2). */
{2, 3, 2}, /* cost of storing integer registers */
- 4, /* cost of reg,reg fld/fst */
- {6, 6, 6}, /* cost of loading fp registers
- in SFmode, DFmode and XFmode */
- {4, 4, 4}, /* cost of storing fp registers
- in SFmode, DFmode and XFmode */
- 2, /* cost of moving MMX register */
- {2, 2}, /* cost of loading MMX registers
- in SImode and DImode */
- {2, 2}, /* cost of storing MMX registers
- in SImode and DImode */
- 2, 4, 8, /* cost of moving XMM,YMM,ZMM register */
- {2, 2, 8, 16, 32}, /* cost of loading SSE registers
- in 32,64,128,256 and 512-bit */
+ {2, 2, 8, 16, 32}, /* cost of loading SSE register
+ in 32bit, 64bit, 128bit, 256bit and 512bit */
+ {2, 2, 8, 16, 32}, /* cost of storing SSE register
+ in 32bit, 64bit, 128bit, 256bit and 512bit */
{2, 2, 8, 16, 32}, /* cost of unaligned loads. */
- {2, 2, 8, 16, 32}, /* cost of storing SSE registers
- in 32,64,128,256 and 512-bit */
{2, 2, 8, 16, 32}, /* cost of unaligned stores. */
- 6, 6, /* SSE->integer and integer->SSE moves */
+ 2, 4, 8, /* cost of moving XMM,YMM,ZMM register */
+ 6, /* cost of moving SSE register to integer. */
2, 2, /* Gather load static, per_elt. */
2, 2, /* Gather store static, per_elt. */
32, /* size of l1 cache. */
@@ -757,6 +837,30 @@ static stringop_algs athlon_memset[2] = {
DUMMY_STRINGOP_ALGS};
static const
struct processor_costs athlon_cost = {
+ /* Start of register allocator costs. integer->integer move cost is 2. */
+ 4, /* cost for loading QImode using movzbl */
+ {3, 4, 3}, /* cost of loading integer registers
+ in QImode, HImode and SImode.
+ Relative to reg-reg move (2). */
+ {3, 4, 3}, /* cost of storing integer registers */
+ 4, /* cost of reg,reg fld/fst */
+ {4, 4, 12}, /* cost of loading fp registers
+ in SFmode, DFmode and XFmode */
+ {6, 6, 8}, /* cost of storing fp registers
+ in SFmode, DFmode and XFmode */
+ 2, /* cost of moving MMX register */
+ {4, 4}, /* cost of loading MMX registers
+ in SImode and DImode */
+ {4, 4}, /* cost of storing MMX registers
+ in SImode and DImode */
+ 2, 4, 8, /* cost of moving XMM,YMM,ZMM register */
+ {4, 4, 12, 12, 24}, /* cost of loading SSE registers
+ in 32,64,128,256 and 512-bit */
+ {4, 4, 10, 10, 20}, /* cost of storing SSE registers
+ in 32,64,128,256 and 512-bit */
+ 5, 5, /* SSE->integer and integer->SSE moves */
+ /* End of register allocator costs. */
+
COSTS_N_INSNS (1), /* cost of an add instruction */
COSTS_N_INSNS (2), /* cost of a lea instruction */
COSTS_N_INSNS (1), /* variable shift costs */
@@ -776,32 +880,18 @@ struct processor_costs athlon_cost = {
COSTS_N_INSNS (1), /* cost of movzx */
8, /* "large" insn */
9, /* MOVE_RATIO */
-
- /* All move costs are relative to integer->integer move times 2 and thus
- they are latency*2. */
- 4, /* cost for loading QImode using movzbl */
{3, 4, 3}, /* cost of loading integer registers
in QImode, HImode and SImode.
Relative to reg-reg move (2). */
{3, 4, 3}, /* cost of storing integer registers */
- 4, /* cost of reg,reg fld/fst */
- {4, 4, 12}, /* cost of loading fp registers
- in SFmode, DFmode and XFmode */
- {6, 6, 8}, /* cost of storing fp registers
- in SFmode, DFmode and XFmode */
- 2, /* cost of moving MMX register */
- {4, 4}, /* cost of loading MMX registers
- in SImode and DImode */
- {4, 4}, /* cost of storing MMX registers
- in SImode and DImode */
- 2, 4, 8, /* cost of moving XMM,YMM,ZMM register */
- {4, 4, 12, 12, 24}, /* cost of loading SSE registers
- in 32,64,128,256 and 512-bit */
+ {4, 4, 12, 12, 24}, /* cost of loading SSE register
+ in 32bit, 64bit, 128bit, 256bit and 512bit */
+ {4, 4, 10, 10, 20}, /* cost of storing SSE register
+ in 32bit, 64bit, 128bit, 256bit and 512bit */
{4, 4, 12, 12, 24}, /* cost of unaligned loads. */
- {4, 4, 10, 10, 20}, /* cost of storing SSE registers
- in 32,64,128,256 and 512-bit */
{4, 4, 10, 10, 20}, /* cost of unaligned stores. */
- 5, 5, /* SSE->integer and integer->SSE moves */
+ 2, 4, 8, /* cost of moving XMM,YMM,ZMM register */
+ 5, /* cost of moving SSE register to integer. */
4, 4, /* Gather load static, per_elt. */
4, 4, /* Gather store static, per_elt. */
64, /* size of l1 cache. */
@@ -853,6 +943,30 @@ static stringop_algs k8_memset[2] = {
{8192, rep_prefix_8_byte, false}, {-1, libcall, false}}}};
static const
struct processor_costs k8_cost = {
+ /* Start of register allocator costs. integer->integer move cost is 2. */
+ 4, /* cost for loading QImode using movzbl */
+ {3, 4, 3}, /* cost of loading integer registers
+ in QImode, HImode and SImode.
+ Relative to reg-reg move (2). */
+ {3, 4, 3}, /* cost of storing integer registers */
+ 4, /* cost of reg,reg fld/fst */
+ {4, 4, 12}, /* cost of loading fp registers
+ in SFmode, DFmode and XFmode */
+ {6, 6, 8}, /* cost of storing fp registers
+ in SFmode, DFmode and XFmode */
+ 2, /* cost of moving MMX register */
+ {3, 3}, /* cost of loading MMX registers
+ in SImode and DImode */
+ {4, 4}, /* cost of storing MMX registers
+ in SImode and DImode */
+ 2, 4, 8, /* cost of moving XMM,YMM,ZMM register */
+ {4, 3, 12, 12, 24}, /* cost of loading SSE registers
+ in 32,64,128,256 and 512-bit */
+ {4, 4, 10, 10, 20}, /* cost of storing SSE registers
+ in 32,64,128,256 and 512-bit */
+ 5, 5, /* SSE->integer and integer->SSE moves */
+ /* End of register allocator costs. */
+
COSTS_N_INSNS (1), /* cost of an add instruction */
COSTS_N_INSNS (2), /* cost of a lea instruction */
COSTS_N_INSNS (1), /* variable shift costs */
@@ -872,32 +986,18 @@ struct processor_costs k8_cost = {
COSTS_N_INSNS (1), /* cost of movzx */
8, /* "large" insn */
9, /* MOVE_RATIO */
-
- /* All move costs are relative to integer->integer move times 2 and thus
- they are latency*2. */
- 4, /* cost for loading QImode using movzbl */
{3, 4, 3}, /* cost of loading integer registers
in QImode, HImode and SImode.
Relative to reg-reg move (2). */
{3, 4, 3}, /* cost of storing integer registers */
- 4, /* cost of reg,reg fld/fst */
- {4, 4, 12}, /* cost of loading fp registers
- in SFmode, DFmode and XFmode */
- {6, 6, 8}, /* cost of storing fp registers
- in SFmode, DFmode and XFmode */
- 2, /* cost of moving MMX register */
- {3, 3}, /* cost of loading MMX registers
- in SImode and DImode */
- {4, 4}, /* cost of storing MMX registers
- in SImode and DImode */
- 2, 4, 8, /* cost of moving XMM,YMM,ZMM register */
- {4, 3, 12, 12, 24}, /* cost of loading SSE registers
- in 32,64,128,256 and 512-bit */
+ {4, 3, 12, 12, 24}, /* cost of loading SSE register
+ in 32bit, 64bit, 128bit, 256bit and 512bit */
+ {4, 4, 10, 10, 20}, /* cost of storing SSE register
+ in 32bit, 64bit, 128bit, 256bit and 512bit */
{4, 3, 12, 12, 24}, /* cost of unaligned loads. */
- {4, 4, 10, 10, 20}, /* cost of storing SSE registers
- in 32,64,128,256 and 512-bit */
{4, 4, 10, 10, 20}, /* cost of unaligned stores. */
- 5, 5, /* SSE->integer and integer->SSE moves */
+ 2, 4, 8, /* cost of moving XMM,YMM,ZMM register */
+ 5, /* cost of moving SSE register to integer. */
4, 4, /* Gather load static, per_elt. */
4, 4, /* Gather store static, per_elt. */
64, /* size of l1 cache. */
@@ -953,28 +1053,7 @@ static stringop_algs amdfam10_memset[2] = {
{libcall, {{48, unrolled_loop, false}, {8192, rep_prefix_8_byte, false},
{-1, libcall, false}}}};
struct processor_costs amdfam10_cost = {
- COSTS_N_INSNS (1), /* cost of an add instruction */
- COSTS_N_INSNS (2), /* cost of a lea instruction */
- COSTS_N_INSNS (1), /* variable shift costs */
- COSTS_N_INSNS (1), /* constant shift costs */
- {COSTS_N_INSNS (3), /* cost of starting multiply for QI */
- COSTS_N_INSNS (4), /* HI */
- COSTS_N_INSNS (3), /* SI */
- COSTS_N_INSNS (4), /* DI */
- COSTS_N_INSNS (5)}, /* other */
- 0, /* cost of multiply per each bit set */
- {COSTS_N_INSNS (19), /* cost of a divide/mod for QI */
- COSTS_N_INSNS (35), /* HI */
- COSTS_N_INSNS (51), /* SI */
- COSTS_N_INSNS (83), /* DI */
- COSTS_N_INSNS (83)}, /* other */
- COSTS_N_INSNS (1), /* cost of movsx */
- COSTS_N_INSNS (1), /* cost of movzx */
- 8, /* "large" insn */
- 9, /* MOVE_RATIO */
-
- /* All move costs are relative to integer->integer move times 2 and thus
- they are latency*2. */
+ /* Start of register allocator costs. integer->integer move cost is 2. */
4, /* cost for loading QImode using movzbl */
{3, 4, 3}, /* cost of loading integer registers
in QImode, HImode and SImode.
@@ -993,11 +1072,10 @@ struct processor_costs amdfam10_cost = {
2, 4, 8, /* cost of moving XMM,YMM,ZMM register */
{4, 4, 3, 6, 12}, /* cost of loading SSE registers
in 32,64,128,256 and 512-bit */
- {4, 4, 3, 7, 12}, /* cost of unaligned loads. */
{4, 4, 5, 10, 20}, /* cost of storing SSE registers
in 32,64,128,256 and 512-bit */
- {4, 4, 5, 10, 20}, /* cost of unaligned stores. */
3, 3, /* SSE->integer and integer->SSE moves */
+
/* On K8:
MOVD reg64, xmmreg Double FSTORE 4
MOVD reg32, xmmreg Double FSTORE 4
@@ -1006,6 +1084,39 @@ struct processor_costs amdfam10_cost = {
1/1 1/1
MOVD reg32, xmmreg Double FADD 3
1/1 1/1 */
+ /* End of register allocator costs. */
+
+ COSTS_N_INSNS (1), /* cost of an add instruction */
+ COSTS_N_INSNS (2), /* cost of a lea instruction */
+ COSTS_N_INSNS (1), /* variable shift costs */
+ COSTS_N_INSNS (1), /* constant shift costs */
+ {COSTS_N_INSNS (3), /* cost of starting multiply for QI */
+ COSTS_N_INSNS (4), /* HI */
+ COSTS_N_INSNS (3), /* SI */
+ COSTS_N_INSNS (4), /* DI */
+ COSTS_N_INSNS (5)}, /* other */
+ 0, /* cost of multiply per each bit set */
+ {COSTS_N_INSNS (19), /* cost of a divide/mod for QI */
+ COSTS_N_INSNS (35), /* HI */
+ COSTS_N_INSNS (51), /* SI */
+ COSTS_N_INSNS (83), /* DI */
+ COSTS_N_INSNS (83)}, /* other */
+ COSTS_N_INSNS (1), /* cost of movsx */
+ COSTS_N_INSNS (1), /* cost of movzx */
+ 8, /* "large" insn */
+ 9, /* MOVE_RATIO */
+ {3, 4, 3}, /* cost of loading integer registers
+ in QImode, HImode and SImode.
+ Relative to reg-reg move (2). */
+ {3, 4, 3}, /* cost of storing integer registers */
+ {4, 4, 3, 6, 12}, /* cost of loading SSE register
+ in 32bit, 64bit, 128bit, 256bit and 512bit */
+ {4, 4, 5, 10, 20}, /* cost of storing SSE register
+ in 32bit, 64bit, 128bit, 256bit and 512bit */
+ {4, 4, 3, 7, 12}, /* cost of unaligned loads. */
+ {4, 4, 5, 10, 20}, /* cost of unaligned stores. */
+ 2, 4, 8, /* cost of moving XMM,YMM,ZMM register */
+ 3, /* cost of moving SSE register to integer. */
4, 4, /* Gather load static, per_elt. */
4, 4, /* Gather store static, per_elt. */
64, /* size of l1 cache. */
@@ -1062,6 +1173,30 @@ static stringop_algs bdver_memset[2] = {
{-1, libcall, false}}}};
const struct processor_costs bdver_cost = {
+ /* Start of register allocator costs. integer->integer move cost is 2. */
+ 8, /* cost for loading QImode using movzbl */
+ {8, 8, 8}, /* cost of loading integer registers
+ in QImode, HImode and SImode.
+ Relative to reg-reg move (2). */
+ {8, 8, 8}, /* cost of storing integer registers */
+ 4, /* cost of reg,reg fld/fst */
+ {12, 12, 28}, /* cost of loading fp registers
+ in SFmode, DFmode and XFmode */
+ {10, 10, 18}, /* cost of storing fp registers
+ in SFmode, DFmode and XFmode */
+ 4, /* cost of moving MMX register */
+ {12, 12}, /* cost of loading MMX registers
+ in SImode and DImode */
+ {10, 10}, /* cost of storing MMX registers
+ in SImode and DImode */
+ 2, 4, 8, /* cost of moving XMM,YMM,ZMM register */
+ {12, 12, 10, 40, 60}, /* cost of loading SSE registers
+ in 32,64,128,256 and 512-bit */
+ {10, 10, 10, 40, 60}, /* cost of storing SSE registers
+ in 32,64,128,256 and 512-bit */
+ 16, 20, /* SSE->integer and integer->SSE moves */
+ /* End of register allocator costs. */
+
COSTS_N_INSNS (1), /* cost of an add instruction */
COSTS_N_INSNS (1), /* cost of a lea instruction */
COSTS_N_INSNS (1), /* variable shift costs */
@@ -1081,32 +1216,18 @@ const struct processor_costs bdver_cost = {
COSTS_N_INSNS (1), /* cost of movzx */
8, /* "large" insn */
9, /* MOVE_RATIO */
-
- /* All move costs are relative to integer->integer move times 2 and thus
- they are latency*2. */
- 8, /* cost for loading QImode using movzbl */
{8, 8, 8}, /* cost of loading integer registers
in QImode, HImode and SImode.
Relative to reg-reg move (2). */
{8, 8, 8}, /* cost of storing integer registers */
- 4, /* cost of reg,reg fld/fst */
- {12, 12, 28}, /* cost of loading fp registers
- in SFmode, DFmode and XFmode */
- {10, 10, 18}, /* cost of storing fp registers
- in SFmode, DFmode and XFmode */
- 4, /* cost of moving MMX register */
- {12, 12}, /* cost of loading MMX registers
- in SImode and DImode */
- {10, 10}, /* cost of storing MMX registers
- in SImode and DImode */
- 2, 4, 8, /* cost of moving XMM,YMM,ZMM register */
- {12, 12, 10, 40, 60}, /* cost of loading SSE registers
- in 32,64,128,256 and 512-bit */
+ {12, 12, 10, 40, 60}, /* cost of loading SSE register
+ in 32bit, 64bit, 128bit, 256bit and 512bit */
+ {10, 10, 10, 40, 60}, /* cost of storing SSE register
+ in 32bit, 64bit, 128bit, 256bit and 512bit */
{12, 12, 10, 40, 60}, /* cost of unaligned loads. */
- {10, 10, 10, 40, 60}, /* cost of storing SSE registers
- in 32,64,128,256 and 512-bit */
{10, 10, 10, 40, 60}, /* cost of unaligned stores. */
- 16, 20, /* SSE->integer and integer->SSE moves */
+ 2, 4, 8, /* cost of moving XMM,YMM,ZMM register */
+ 16, /* cost of moving SSE register to integer. */
12, 12, /* Gather load static, per_elt. */
10, 10, /* Gather store static, per_elt. */
16, /* size of l1 cache. */
@@ -1164,6 +1285,37 @@ static stringop_algs znver1_memset[2] = {
{libcall, {{48, unrolled_loop, false}, {8192, rep_prefix_8_byte, false},
{-1, libcall, false}}}};
struct processor_costs znver1_cost = {
+ /* Start of register allocator costs. integer->integer move cost is 2. */
+
+ /* reg-reg moves are done by renaming and thus they are even cheaper than
+ 1 cycle. Becuase reg-reg move cost is 2 and the following tables correspond
+ to doubles of latencies, we do not model this correctly. It does not
+ seem to make practical difference to bump prices up even more. */
+ 6, /* cost for loading QImode using
+ movzbl. */
+ {6, 6, 6}, /* cost of loading integer registers
+ in QImode, HImode and SImode.
+ Relative to reg-reg move (2). */
+ {8, 8, 8}, /* cost of storing integer
+ registers. */
+ 2, /* cost of reg,reg fld/fst. */
+ {6, 6, 16}, /* cost of loading fp registers
+ in SFmode, DFmode and XFmode. */
+ {8, 8, 16}, /* cost of storing fp registers
+ in SFmode, DFmode and XFmode. */
+ 2, /* cost of moving MMX register. */
+ {6, 6}, /* cost of loading MMX registers
+ in SImode and DImode. */
+ {8, 8}, /* cost of storing MMX registers
+ in SImode and DImode. */
+ 2, 3, 6, /* cost of moving XMM,YMM,ZMM register. */
+ {6, 6, 6, 12, 24}, /* cost of loading SSE registers
+ in 32,64,128,256 and 512-bit. */
+ {8, 8, 8, 16, 32}, /* cost of storing SSE registers
+ in 32,64,128,256 and 512-bit. */
+ 6, 6, /* SSE->integer and integer->SSE moves. */
+ /* End of register allocator costs. */
+
COSTS_N_INSNS (1), /* cost of an add instruction. */
COSTS_N_INSNS (1), /* cost of a lea instruction. */
COSTS_N_INSNS (1), /* variable shift costs. */
@@ -1186,39 +1338,19 @@ struct processor_costs znver1_cost = {
COSTS_N_INSNS (1), /* cost of movzx. */
8, /* "large" insn. */
9, /* MOVE_RATIO. */
-
- /* All move costs are relative to integer->integer move times 2 and thus
- they are latency*2. */
-
- /* reg-reg moves are done by renaming and thus they are even cheaper than
- 1 cycle. Becuase reg-reg move cost is 2 and the following tables correspond
- to doubles of latencies, we do not model this correctly. It does not
- seem to make practical difference to bump prices up even more. */
- 6, /* cost for loading QImode using
- movzbl. */
{6, 6, 6}, /* cost of loading integer registers
in QImode, HImode and SImode.
Relative to reg-reg move (2). */
{8, 8, 8}, /* cost of storing integer
registers. */
- 2, /* cost of reg,reg fld/fst. */
- {6, 6, 16}, /* cost of loading fp registers
- in SFmode, DFmode and XFmode. */
- {8, 8, 16}, /* cost of storing fp registers
- in SFmode, DFmode and XFmode. */
- 2, /* cost of moving MMX register. */
- {6, 6}, /* cost of loading MMX registers
- in SImode and DImode. */
- {8, 8}, /* cost of storing MMX registers
- in SImode and DImode. */
- 2, 3, 6, /* cost of moving XMM,YMM,ZMM register. */
- {6, 6, 6, 12, 24}, /* cost of loading SSE registers
- in 32,64,128,256 and 512-bit. */
+ {6, 6, 6, 12, 24}, /* cost of loading SSE register
+ in 32bit, 64bit, 128bit, 256bit and 512bit */
+ {8, 8, 8, 16, 32}, /* cost of storing SSE register
+ in 32bit, 64bit, 128bit, 256bit and 512bit */
{6, 6, 6, 12, 24}, /* cost of unaligned loads. */
- {8, 8, 8, 16, 32}, /* cost of storing SSE registers
- in 32,64,128,256 and 512-bit. */
{8, 8, 8, 16, 32}, /* cost of unaligned stores. */
- 6, 6, /* SSE->integer and integer->SSE moves. */
+ 2, 3, 6, /* cost of moving XMM,YMM,ZMM register. */
+ 6, /* cost of moving SSE register to integer. */
/* VGATHERDPD is 23 uops and throughput is 9, VGATHERDPD is 35 uops,
throughput 12. Approx 9 uops do not depend on vector size and every load
is 7 uops. */
@@ -1288,31 +1420,7 @@ static stringop_algs znver2_memset[2] = {
{-1, libcall, false}}}};
struct processor_costs znver2_cost = {
- COSTS_N_INSNS (1), /* cost of an add instruction. */
- COSTS_N_INSNS (1), /* cost of a lea instruction. */
- COSTS_N_INSNS (1), /* variable shift costs. */
- COSTS_N_INSNS (1), /* constant shift costs. */
- {COSTS_N_INSNS (3), /* cost of starting multiply for QI. */
- COSTS_N_INSNS (3), /* HI. */
- COSTS_N_INSNS (3), /* SI. */
- COSTS_N_INSNS (3), /* DI. */
- COSTS_N_INSNS (3)}, /* other. */
- 0, /* cost of multiply per each bit
- set. */
- /* Depending on parameters, idiv can get faster on ryzen. This is upper
- bound. */
- {COSTS_N_INSNS (16), /* cost of a divide/mod for QI. */
- COSTS_N_INSNS (22), /* HI. */
- COSTS_N_INSNS (30), /* SI. */
- COSTS_N_INSNS (45), /* DI. */
- COSTS_N_INSNS (45)}, /* other. */
- COSTS_N_INSNS (1), /* cost of movsx. */
- COSTS_N_INSNS (1), /* cost of movzx. */
- 8, /* "large" insn. */
- 9, /* MOVE_RATIO. */
-
- /* All move costs are relative to integer->integer move times 2 and thus
- they are latency*2. */
+ /* Start of register allocator costs. integer->integer move cost is 2. */
/* reg-reg moves are done by renaming and thus they are even cheaper than
1 cycle. Because reg-reg move cost is 2 and following tables correspond
@@ -1339,12 +1447,48 @@ struct processor_costs znver2_cost = {
register. */
{6, 6, 6, 6, 12}, /* cost of loading SSE registers
in 32,64,128,256 and 512-bit. */
- {6, 6, 6, 6, 12}, /* cost of unaligned loads. */
{8, 8, 8, 8, 16}, /* cost of storing SSE registers
in 32,64,128,256 and 512-bit. */
- {8, 8, 8, 8, 16}, /* cost of unaligned stores. */
6, 6, /* SSE->integer and integer->SSE
moves. */
+ /* End of register allocator costs. */
+
+ COSTS_N_INSNS (1), /* cost of an add instruction. */
+ COSTS_N_INSNS (1), /* cost of a lea instruction. */
+ COSTS_N_INSNS (1), /* variable shift costs. */
+ COSTS_N_INSNS (1), /* constant shift costs. */
+ {COSTS_N_INSNS (3), /* cost of starting multiply for QI. */
+ COSTS_N_INSNS (3), /* HI. */
+ COSTS_N_INSNS (3), /* SI. */
+ COSTS_N_INSNS (3), /* DI. */
+ COSTS_N_INSNS (3)}, /* other. */
+ 0, /* cost of multiply per each bit
+ set. */
+ /* Depending on parameters, idiv can get faster on ryzen. This is upper
+ bound. */
+ {COSTS_N_INSNS (16), /* cost of a divide/mod for QI. */
+ COSTS_N_INSNS (22), /* HI. */
+ COSTS_N_INSNS (30), /* SI. */
+ COSTS_N_INSNS (45), /* DI. */
+ COSTS_N_INSNS (45)}, /* other. */
+ COSTS_N_INSNS (1), /* cost of movsx. */
+ COSTS_N_INSNS (1), /* cost of movzx. */
+ 8, /* "large" insn. */
+ 9, /* MOVE_RATIO. */
+ {6, 6, 6}, /* cost of loading integer registers
+ in QImode, HImode and SImode.
+ Relative to reg-reg move (2). */
+ {8, 8, 8}, /* cost of storing integer
+ registers. */
+ {6, 6, 6, 6, 12}, /* cost of loading SSE registers
+ in 32bit, 64bit, 128bit, 256bit and 512bit */
+ {8, 8, 8, 8, 16}, /* cost of storing SSE register
+ in 32bit, 64bit, 128bit, 256bit and 512bit */
+ {6, 6, 6, 6, 12}, /* cost of unaligned loads. */
+ {8, 8, 8, 8, 16}, /* cost of unaligned stores. */
+ 2, 2, 3, /* cost of moving XMM,YMM,ZMM
+ register. */
+ 6, /* cost of moving SSE register to integer. */
/* VGATHERDPD is 23 uops and throughput is 9, VGATHERDPD is 35 uops,
throughput 12. Approx 9 uops do not depend on vector size and every load
is 7 uops. */
@@ -1416,6 +1560,30 @@ static stringop_algs skylake_memset[2] = {
static const
struct processor_costs skylake_cost = {
+ /* Start of register allocator costs. integer->integer move cost is 2. */
+ 6, /* cost for loading QImode using movzbl */
+ {4, 4, 4}, /* cost of loading integer registers
+ in QImode, HImode and SImode.
+ Relative to reg-reg move (2). */
+ {6, 6, 3}, /* cost of storing integer registers */
+ 2, /* cost of reg,reg fld/fst */
+ {6, 6, 8}, /* cost of loading fp registers
+ in SFmode, DFmode and XFmode */
+ {6, 6, 10}, /* cost of storing fp registers
+ in SFmode, DFmode and XFmode */
+ 2, /* cost of moving MMX register */
+ {6, 6}, /* cost of loading MMX registers
+ in SImode and DImode */
+ {6, 6}, /* cost of storing MMX registers
+ in SImode and DImode */
+ 2, 2, 4, /* cost of moving XMM,YMM,ZMM register */
+ {6, 6, 6, 10, 20}, /* cost of loading SSE registers
+ in 32,64,128,256 and 512-bit */
+ {8, 8, 8, 12, 24}, /* cost of storing SSE registers
+ in 32,64,128,256 and 512-bit */
+ 2, 2, /* SSE->integer and integer->SSE moves */
+ /* End of register allocator costs. */
+
COSTS_N_INSNS (1), /* cost of an add instruction */
COSTS_N_INSNS (1)+1, /* cost of a lea instruction */
COSTS_N_INSNS (1), /* variable shift costs */
@@ -1437,30 +1605,18 @@ struct processor_costs skylake_cost = {
COSTS_N_INSNS (0), /* cost of movzx */
8, /* "large" insn */
17, /* MOVE_RATIO */
-
- 6, /* cost for loading QImode using movzbl */
{4, 4, 4}, /* cost of loading integer registers
in QImode, HImode and SImode.
Relative to reg-reg move (2). */
{6, 6, 3}, /* cost of storing integer registers */
- 2, /* cost of reg,reg fld/fst */
- {6, 6, 8}, /* cost of loading fp registers
- in SFmode, DFmode and XFmode */
- {6, 6, 10}, /* cost of storing fp registers
- in SFmode, DFmode and XFmode */
- 2, /* cost of moving MMX register */
- {6, 6}, /* cost of loading MMX registers
- in SImode and DImode */
- {6, 6}, /* cost of storing MMX registers
- in SImode and DImode */
- 2, 2, 4, /* cost of moving XMM,YMM,ZMM register */
- {6, 6, 6, 10, 20}, /* cost of loading SSE registers
- in 32,64,128,256 and 512-bit */
+ {6, 6, 6, 10, 20}, /* cost of loading SSE register
+ in 32bit, 64bit, 128bit, 256bit and 512bit */
+ {8, 8, 8, 12, 24}, /* cost of storing SSE register
+ in 32bit, 64bit, 128bit, 256bit and 512bit */
{6, 6, 6, 10, 20}, /* cost of unaligned loads. */
- {8, 8, 8, 12, 24}, /* cost of storing SSE registers
- in 32,64,128,256 and 512-bit */
{8, 8, 8, 8, 16}, /* cost of unaligned stores. */
- 2, 2, /* SSE->integer and integer->SSE moves */
+ 2, 2, 4, /* cost of moving XMM,YMM,ZMM register */
+ 2, /* cost of moving SSE register to integer. */
20, 8, /* Gather load static, per_elt. */
22, 10, /* Gather store static, per_elt. */
64, /* size of l1 cache. */
@@ -1509,6 +1665,30 @@ static stringop_algs btver1_memset[2] = {
{libcall, {{48, unrolled_loop, false}, {8192, rep_prefix_8_byte, false},
{-1, libcall, false}}}};
const struct processor_costs btver1_cost = {
+ /* Start of register allocator costs. integer->integer move cost is 2. */
+ 8, /* cost for loading QImode using movzbl */
+ {6, 8, 6}, /* cost of loading integer registers
+ in QImode, HImode and SImode.
+ Relative to reg-reg move (2). */
+ {6, 8, 6}, /* cost of storing integer registers */
+ 4, /* cost of reg,reg fld/fst */
+ {12, 12, 28}, /* cost of loading fp registers
+ in SFmode, DFmode and XFmode */
+ {12, 12, 38}, /* cost of storing fp registers
+ in SFmode, DFmode and XFmode */
+ 4, /* cost of moving MMX register */
+ {10, 10}, /* cost of loading MMX registers
+ in SImode and DImode */
+ {12, 12}, /* cost of storing MMX registers
+ in SImode and DImode */
+ 2, 4, 8, /* cost of moving XMM,YMM,ZMM register */
+ {10, 10, 12, 48, 96}, /* cost of loading SSE registers
+ in 32,64,128,256 and 512-bit */
+ {10, 10, 12, 48, 96}, /* cost of storing SSE registers
+ in 32,64,128,256 and 512-bit */
+ 14, 14, /* SSE->integer and integer->SSE moves */
+ /* End of register allocator costs. */
+
COSTS_N_INSNS (1), /* cost of an add instruction */
COSTS_N_INSNS (2), /* cost of a lea instruction */
COSTS_N_INSNS (1), /* variable shift costs */
@@ -1528,32 +1708,18 @@ const struct processor_costs btver1_cost = {
COSTS_N_INSNS (1), /* cost of movzx */
8, /* "large" insn */
9, /* MOVE_RATIO */
-
- /* All move costs are relative to integer->integer move times 2 and thus
- they are latency*2. */
- 8, /* cost for loading QImode using movzbl */
{6, 8, 6}, /* cost of loading integer registers
in QImode, HImode and SImode.
Relative to reg-reg move (2). */
{6, 8, 6}, /* cost of storing integer registers */
- 4, /* cost of reg,reg fld/fst */
- {12, 12, 28}, /* cost of loading fp registers
- in SFmode, DFmode and XFmode */
- {12, 12, 38}, /* cost of storing fp registers
- in SFmode, DFmode and XFmode */
- 4, /* cost of moving MMX register */
- {10, 10}, /* cost of loading MMX registers
- in SImode and DImode */
- {12, 12}, /* cost of storing MMX registers
- in SImode and DImode */
- 2, 4, 8, /* cost of moving XMM,YMM,ZMM register */
- {10, 10, 12, 48, 96}, /* cost of loading SSE registers
- in 32,64,128,256 and 512-bit */
+ {10, 10, 12, 48, 96}, /* cost of loading SSE register
+ in 32bit, 64bit, 128bit, 256bit and 512bit */
+ {10, 10, 12, 48, 96}, /* cost of storing SSE register
+ in 32bit, 64bit, 128bit, 256bit and 512bit */
{10, 10, 12, 48, 96}, /* cost of unaligned loads. */
- {10, 10, 12, 48, 96}, /* cost of storing SSE registers
- in 32,64,128,256 and 512-bit */
{10, 10, 12, 48, 96}, /* cost of unaligned stores. */
- 14, 14, /* SSE->integer and integer->SSE moves */
+ 2, 4, 8, /* cost of moving XMM,YMM,ZMM register */
+ 14, /* cost of moving SSE register to integer. */
10, 10, /* Gather load static, per_elt. */
10, 10, /* Gather store static, per_elt. */
32, /* size of l1 cache. */
@@ -1600,6 +1766,30 @@ static stringop_algs btver2_memset[2] = {
{libcall, {{48, unrolled_loop, false}, {8192, rep_prefix_8_byte, false},
{-1, libcall, false}}}};
const struct processor_costs btver2_cost = {
+ /* Start of register allocator costs. integer->integer move cost is 2. */
+ 8, /* cost for loading QImode using movzbl */
+ {8, 8, 6}, /* cost of loading integer registers
+ in QImode, HImode and SImode.
+ Relative to reg-reg move (2). */
+ {8, 8, 6}, /* cost of storing integer registers */
+ 4, /* cost of reg,reg fld/fst */
+ {12, 12, 28}, /* cost of loading fp registers
+ in SFmode, DFmode and XFmode */
+ {12, 12, 38}, /* cost of storing fp registers
+ in SFmode, DFmode and XFmode */
+ 4, /* cost of moving MMX register */
+ {10, 10}, /* cost of loading MMX registers
+ in SImode and DImode */
+ {12, 12}, /* cost of storing MMX registers
+ in SImode and DImode */
+ 2, 4, 8, /* cost of moving XMM,YMM,ZMM register */
+ {10, 10, 12, 48, 96}, /* cost of loading SSE registers
+ in 32,64,128,256 and 512-bit */
+ {10, 10, 12, 48, 96}, /* cost of storing SSE registers
+ in 32,64,128,256 and 512-bit */
+ 14, 14, /* SSE->integer and integer->SSE moves */
+ /* End of register allocator costs. */
+
COSTS_N_INSNS (1), /* cost of an add instruction */
COSTS_N_INSNS (2), /* cost of a lea instruction */
COSTS_N_INSNS (1), /* variable shift costs */
@@ -1619,32 +1809,18 @@ const struct processor_costs btver2_cost = {
COSTS_N_INSNS (1), /* cost of movzx */
8, /* "large" insn */
9, /* MOVE_RATIO */
-
- /* All move costs are relative to integer->integer move times 2 and thus
- they are latency*2. */
- 8, /* cost for loading QImode using movzbl */
{8, 8, 6}, /* cost of loading integer registers
in QImode, HImode and SImode.
Relative to reg-reg move (2). */
{8, 8, 6}, /* cost of storing integer registers */
- 4, /* cost of reg,reg fld/fst */
- {12, 12, 28}, /* cost of loading fp registers
- in SFmode, DFmode and XFmode */
- {12, 12, 38}, /* cost of storing fp registers
- in SFmode, DFmode and XFmode */
- 4, /* cost of moving MMX register */
- {10, 10}, /* cost of loading MMX registers
- in SImode and DImode */
- {12, 12}, /* cost of storing MMX registers
- in SImode and DImode */
- 2, 4, 8, /* cost of moving XMM,YMM,ZMM register */
- {10, 10, 12, 48, 96}, /* cost of loading SSE registers
- in 32,64,128,256 and 512-bit */
+ {10, 10, 12, 48, 96}, /* cost of loading SSE register
+ in 32bit, 64bit, 128bit, 256bit and 512bit */
+ {10, 10, 12, 48, 96}, /* cost of storing SSE register
+ in 32bit, 64bit, 128bit, 256bit and 512bit */
{10, 10, 12, 48, 96}, /* cost of unaligned loads. */
- {10, 10, 12, 48, 96}, /* cost of storing SSE registers
- in 32,64,128,256 and 512-bit */
{10, 10, 12, 48, 96}, /* cost of unaligned stores. */
- 14, 14, /* SSE->integer and integer->SSE moves */
+ 2, 4, 8, /* cost of moving XMM,YMM,ZMM register */
+ 14, /* cost of moving SSE register to integer. */
10, 10, /* Gather load static, per_elt. */
10, 10, /* Gather store static, per_elt. */
32, /* size of l1 cache. */
@@ -1690,6 +1866,30 @@ static stringop_algs pentium4_memset[2] = {
static const
struct processor_costs pentium4_cost = {
+ /* Start of register allocator costs. integer->integer move cost is 2. */
+ 5, /* cost for loading QImode using movzbl */
+ {4, 5, 4}, /* cost of loading integer registers
+ in QImode, HImode and SImode.
+ Relative to reg-reg move (2). */
+ {2, 3, 2}, /* cost of storing integer registers */
+ 12, /* cost of reg,reg fld/fst */
+ {14, 14, 14}, /* cost of loading fp registers
+ in SFmode, DFmode and XFmode */
+ {14, 14, 14}, /* cost of storing fp registers
+ in SFmode, DFmode and XFmode */
+ 12, /* cost of moving MMX register */
+ {16, 16}, /* cost of loading MMX registers
+ in SImode and DImode */
+ {16, 16}, /* cost of storing MMX registers
+ in SImode and DImode */
+ 12, 24, 48, /* cost of moving XMM,YMM,ZMM register */
+ {16, 16, 16, 32, 64}, /* cost of loading SSE registers
+ in 32,64,128,256 and 512-bit */
+ {16, 16, 16, 32, 64}, /* cost of storing SSE registers
+ in 32,64,128,256 and 512-bit */
+ 20, 12, /* SSE->integer and integer->SSE moves */
+ /* End of register allocator costs. */
+
COSTS_N_INSNS (1), /* cost of an add instruction */
COSTS_N_INSNS (3), /* cost of a lea instruction */
COSTS_N_INSNS (4), /* variable shift costs */
@@ -1709,32 +1909,18 @@ struct processor_costs pentium4_cost = {
COSTS_N_INSNS (1), /* cost of movzx */
16, /* "large" insn */
6, /* MOVE_RATIO */
-
- /* All move costs are relative to integer->integer move times 2 and thus
- they are latency*2. */
- 5, /* cost for loading QImode using movzbl */
{4, 5, 4}, /* cost of loading integer registers
in QImode, HImode and SImode.
Relative to reg-reg move (2). */
{2, 3, 2}, /* cost of storing integer registers */
- 12, /* cost of reg,reg fld/fst */
- {14, 14, 14}, /* cost of loading fp registers
- in SFmode, DFmode and XFmode */
- {14, 14, 14}, /* cost of storing fp registers
- in SFmode, DFmode and XFmode */
- 12, /* cost of moving MMX register */
- {16, 16}, /* cost of loading MMX registers
- in SImode and DImode */
- {16, 16}, /* cost of storing MMX registers
- in SImode and DImode */
- 12, 24, 48, /* cost of moving XMM,YMM,ZMM register */
- {16, 16, 16, 32, 64}, /* cost of loading SSE registers
- in 32,64,128,256 and 512-bit */
+ {16, 16, 16, 32, 64}, /* cost of loading SSE register
+ in 32bit, 64bit, 128bit, 256bit and 512bit */
+ {16, 16, 16, 32, 64}, /* cost of storing SSE register
+ in 32bit, 64bit, 128bit, 256bit and 512bit */
{32, 32, 32, 64, 128}, /* cost of unaligned loads. */
- {16, 16, 16, 32, 64}, /* cost of storing SSE registers
- in 32,64,128,256 and 512-bit */
{32, 32, 32, 64, 128}, /* cost of unaligned stores. */
- 20, 12, /* SSE->integer and integer->SSE moves */
+ 12, 24, 48, /* cost of moving XMM,YMM,ZMM register */
+ 20, /* cost of moving SSE register to integer. */
16, 16, /* Gather load static, per_elt. */
16, 16, /* Gather store static, per_elt. */
8, /* size of l1 cache. */
@@ -1783,6 +1969,30 @@ static stringop_algs nocona_memset[2] = {
static const
struct processor_costs nocona_cost = {
+ /* Start of register allocator costs. integer->integer move cost is 2. */
+ 4, /* cost for loading QImode using movzbl */
+ {4, 4, 4}, /* cost of loading integer registers
+ in QImode, HImode and SImode.
+ Relative to reg-reg move (2). */
+ {4, 4, 4}, /* cost of storing integer registers */
+ 12, /* cost of reg,reg fld/fst */
+ {14, 14, 14}, /* cost of loading fp registers
+ in SFmode, DFmode and XFmode */
+ {14, 14, 14}, /* cost of storing fp registers
+ in SFmode, DFmode and XFmode */
+ 14, /* cost of moving MMX register */
+ {12, 12}, /* cost of loading MMX registers
+ in SImode and DImode */
+ {12, 12}, /* cost of storing MMX registers
+ in SImode and DImode */
+ 6, 12, 24, /* cost of moving XMM,YMM,ZMM register */
+ {12, 12, 12, 24, 48}, /* cost of loading SSE registers
+ in 32,64,128,256 and 512-bit */
+ {12, 12, 12, 24, 48}, /* cost of storing SSE registers
+ in 32,64,128,256 and 512-bit */
+ 20, 12, /* SSE->integer and integer->SSE moves */
+ /* End of register allocator costs. */
+
COSTS_N_INSNS (1), /* cost of an add instruction */
COSTS_N_INSNS (1), /* cost of a lea instruction */
COSTS_N_INSNS (1), /* variable shift costs */
@@ -1802,32 +2012,18 @@ struct processor_costs nocona_cost = {
COSTS_N_INSNS (1), /* cost of movzx */
16, /* "large" insn */
17, /* MOVE_RATIO */
-
- /* All move costs are relative to integer->integer move times 2 and thus
- they are latency*2. */
- 4, /* cost for loading QImode using movzbl */
{4, 4, 4}, /* cost of loading integer registers
in QImode, HImode and SImode.
Relative to reg-reg move (2). */
{4, 4, 4}, /* cost of storing integer registers */
- 12, /* cost of reg,reg fld/fst */
- {14, 14, 14}, /* cost of loading fp registers
- in SFmode, DFmode and XFmode */
- {14, 14, 14}, /* cost of storing fp registers
- in SFmode, DFmode and XFmode */
- 14, /* cost of moving MMX register */
- {12, 12}, /* cost of loading MMX registers
- in SImode and DImode */
- {12, 12}, /* cost of storing MMX registers
- in SImode and DImode */
- 6, 12, 24, /* cost of moving XMM,YMM,ZMM register */
- {12, 12, 12, 24, 48}, /* cost of loading SSE registers
- in 32,64,128,256 and 512-bit */
+ {12, 12, 12, 24, 48}, /* cost of loading SSE register
+ in 32bit, 64bit, 128bit, 256bit and 512bit */
+ {12, 12, 12, 24, 48}, /* cost of storing SSE register
+ in 32bit, 64bit, 128bit, 256bit and 512bit */
{24, 24, 24, 48, 96}, /* cost of unaligned loads. */
- {12, 12, 12, 24, 48}, /* cost of storing SSE registers
- in 32,64,128,256 and 512-bit */
{24, 24, 24, 48, 96}, /* cost of unaligned stores. */
- 20, 12, /* SSE->integer and integer->SSE moves */
+ 6, 12, 24, /* cost of moving XMM,YMM,ZMM register */
+ 20, /* cost of moving SSE register to integer. */
12, 12, /* Gather load static, per_elt. */
12, 12, /* Gather store static, per_elt. */
8, /* size of l1 cache. */
@@ -1874,6 +2070,30 @@ static stringop_algs atom_memset[2] = {
{8192, rep_prefix_8_byte, false}, {-1, libcall, false}}}};
static const
struct processor_costs atom_cost = {
+ /* Start of register allocator costs. integer->integer move cost is 2. */
+ 6, /* cost for loading QImode using movzbl */
+ {6, 6, 6}, /* cost of loading integer registers
+ in QImode, HImode and SImode.
+ Relative to reg-reg move (2). */
+ {6, 6, 6}, /* cost of storing integer registers */
+ 4, /* cost of reg,reg fld/fst */
+ {6, 6, 18}, /* cost of loading fp registers
+ in SFmode, DFmode and XFmode */
+ {14, 14, 24}, /* cost of storing fp registers
+ in SFmode, DFmode and XFmode */
+ 2, /* cost of moving MMX register */
+ {8, 8}, /* cost of loading MMX registers
+ in SImode and DImode */
+ {10, 10}, /* cost of storing MMX registers
+ in SImode and DImode */
+ 2, 4, 8, /* cost of moving XMM,YMM,ZMM register */
+ {8, 8, 8, 16, 32}, /* cost of loading SSE registers
+ in 32,64,128,256 and 512-bit */
+ {8, 8, 8, 16, 32}, /* cost of storing SSE registers
+ in 32,64,128,256 and 512-bit */
+ 8, 6, /* SSE->integer and integer->SSE moves */
+ /* End of register allocator costs. */
+
COSTS_N_INSNS (1), /* cost of an add instruction */
COSTS_N_INSNS (1) + 1, /* cost of a lea instruction */
COSTS_N_INSNS (1), /* variable shift costs */
@@ -1893,32 +2113,18 @@ struct processor_costs atom_cost = {
COSTS_N_INSNS (1), /* cost of movzx */
8, /* "large" insn */
17, /* MOVE_RATIO */
-
- /* All move costs are relative to integer->integer move times 2 and thus
- they are latency*2. */
- 6, /* cost for loading QImode using movzbl */
{6, 6, 6}, /* cost of loading integer registers
in QImode, HImode and SImode.
Relative to reg-reg move (2). */
{6, 6, 6}, /* cost of storing integer registers */
- 4, /* cost of reg,reg fld/fst */
- {6, 6, 18}, /* cost of loading fp registers
- in SFmode, DFmode and XFmode */
- {14, 14, 24}, /* cost of storing fp registers
- in SFmode, DFmode and XFmode */
- 2, /* cost of moving MMX register */
- {8, 8}, /* cost of loading MMX registers
- in SImode and DImode */
- {10, 10}, /* cost of storing MMX registers
- in SImode and DImode */
- 2, 4, 8, /* cost of moving XMM,YMM,ZMM register */
- {8, 8, 8, 16, 32}, /* cost of loading SSE registers
- in 32,64,128,256 and 512-bit */
+ {8, 8, 8, 16, 32}, /* cost of loading SSE register
+ in 32bit, 64bit, 128bit, 256bit and 512bit */
+ {8, 8, 8, 16, 32}, /* cost of storing SSE register
+ in 32bit, 64bit, 128bit, 256bit and 512bit */
{16, 16, 16, 32, 64}, /* cost of unaligned loads. */
- {8, 8, 8, 16, 32}, /* cost of storing SSE registers
- in 32,64,128,256 and 512-bit */
{16, 16, 16, 32, 64}, /* cost of unaligned stores. */
- 8, 6, /* SSE->integer and integer->SSE moves */
+ 2, 4, 8, /* cost of moving XMM,YMM,ZMM register */
+ 8, /* cost of moving SSE register to integer. */
8, 8, /* Gather load static, per_elt. */
8, 8, /* Gather store static, per_elt. */
32, /* size of l1 cache. */
@@ -1965,6 +2171,30 @@ static stringop_algs slm_memset[2] = {
{8192, rep_prefix_8_byte, false}, {-1, libcall, false}}}};
static const
struct processor_costs slm_cost = {
+ /* Start of register allocator costs. integer->integer move cost is 2. */
+ 8, /* cost for loading QImode using movzbl */
+ {8, 8, 8}, /* cost of loading integer registers
+ in QImode, HImode and SImode.
+ Relative to reg-reg move (2). */
+ {6, 6, 6}, /* cost of storing integer registers */
+ 2, /* cost of reg,reg fld/fst */
+ {8, 8, 18}, /* cost of loading fp registers
+ in SFmode, DFmode and XFmode */
+ {6, 6, 18}, /* cost of storing fp registers
+ in SFmode, DFmode and XFmode */
+ 2, /* cost of moving MMX register */
+ {8, 8}, /* cost of loading MMX registers
+ in SImode and DImode */
+ {6, 6}, /* cost of storing MMX registers
+ in SImode and DImode */
+ 2, 4, 8, /* cost of moving XMM,YMM,ZMM register */
+ {8, 8, 8, 16, 32}, /* cost of loading SSE registers
+ in 32,64,128,256 and 512-bit */
+ {8, 8, 8, 16, 32}, /* cost of storing SSE registers
+ in 32,64,128,256 and 512-bit */
+ 8, 6, /* SSE->integer and integer->SSE moves */
+ /* End of register allocator costs. */
+
COSTS_N_INSNS (1), /* cost of an add instruction */
COSTS_N_INSNS (1) + 1, /* cost of a lea instruction */
COSTS_N_INSNS (1), /* variable shift costs */
@@ -1984,32 +2214,18 @@ struct processor_costs slm_cost = {
COSTS_N_INSNS (1), /* cost of movzx */
8, /* "large" insn */
17, /* MOVE_RATIO */
-
- /* All move costs are relative to integer->integer move times 2 and thus
- they are latency*2. */
- 8, /* cost for loading QImode using movzbl */
{8, 8, 8}, /* cost of loading integer registers
in QImode, HImode and SImode.
Relative to reg-reg move (2). */
{6, 6, 6}, /* cost of storing integer registers */
- 2, /* cost of reg,reg fld/fst */
- {8, 8, 18}, /* cost of loading fp registers
- in SFmode, DFmode and XFmode */
- {6, 6, 18}, /* cost of storing fp registers
- in SFmode, DFmode and XFmode */
- 2, /* cost of moving MMX register */
- {8, 8}, /* cost of loading MMX registers
- in SImode and DImode */
- {6, 6}, /* cost of storing MMX registers
- in SImode and DImode */
- 2, 4, 8, /* cost of moving XMM,YMM,ZMM register */
- {8, 8, 8, 16, 32}, /* cost of loading SSE registers
- in 32,64,128,256 and 512-bit */
+ {8, 8, 8, 16, 32}, /* cost of loading SSE register
+ in 32bit, 64bit, 128bit, 256bit and 512bit */
+ {8, 8, 8, 16, 32}, /* cost of storing SSE register
+ in SImode, DImode and TImode. */
{16, 16, 16, 32, 64}, /* cost of unaligned loads. */
- {8, 8, 8, 16, 32}, /* cost of storing SSE registers
- in 32,64,128,256 and 512-bit */
{16, 16, 16, 32, 64}, /* cost of unaligned stores. */
- 8, 6, /* SSE->integer and integer->SSE moves */
+ 2, 4, 8, /* cost of moving XMM,YMM,ZMM register */
+ 8, /* cost of moving SSE register to integer. */
8, 8, /* Gather load static, per_elt. */
8, 8, /* Gather store static, per_elt. */
32, /* size of l1 cache. */
@@ -2056,6 +2272,30 @@ static stringop_algs intel_memset[2] = {
{8192, rep_prefix_8_byte, false}, {-1, libcall, false}}}};
static const
struct processor_costs intel_cost = {
+ /* Start of register allocator costs. integer->integer move cost is 2. */
+ 6, /* cost for loading QImode using movzbl */
+ {4, 4, 4}, /* cost of loading integer registers
+ in QImode, HImode and SImode.
+ Relative to reg-reg move (2). */
+ {6, 6, 6}, /* cost of storing integer registers */
+ 2, /* cost of reg,reg fld/fst */
+ {6, 6, 8}, /* cost of loading fp registers
+ in SFmode, DFmode and XFmode */
+ {6, 6, 10}, /* cost of storing fp registers
+ in SFmode, DFmode and XFmode */
+ 2, /* cost of moving MMX register */
+ {6, 6}, /* cost of loading MMX registers
+ in SImode and DImode */
+ {6, 6}, /* cost of storing MMX registers
+ in SImode and DImode */
+ 2, 2, 2, /* cost of moving XMM,YMM,ZMM register */
+ {6, 6, 6, 6, 6}, /* cost of loading SSE registers
+ in 32,64,128,256 and 512-bit */
+ {6, 6, 6, 6, 6}, /* cost of storing SSE registers
+ in 32,64,128,256 and 512-bit */
+ 4, 4, /* SSE->integer and integer->SSE moves */
+ /* End of register allocator costs. */
+
COSTS_N_INSNS (1), /* cost of an add instruction */
COSTS_N_INSNS (1) + 1, /* cost of a lea instruction */
COSTS_N_INSNS (1), /* variable shift costs */
@@ -2075,32 +2315,18 @@ struct processor_costs intel_cost = {
COSTS_N_INSNS (1), /* cost of movzx */
8, /* "large" insn */
17, /* MOVE_RATIO */
-
- /* All move costs are relative to integer->integer move times 2 and thus
- they are latency*2. */
- 6, /* cost for loading QImode using movzbl */
{4, 4, 4}, /* cost of loading integer registers
in QImode, HImode and SImode.
Relative to reg-reg move (2). */
{6, 6, 6}, /* cost of storing integer registers */
- 2, /* cost of reg,reg fld/fst */
- {6, 6, 8}, /* cost of loading fp registers
- in SFmode, DFmode and XFmode */
- {6, 6, 10}, /* cost of storing fp registers
- in SFmode, DFmode and XFmode */
- 2, /* cost of moving MMX register */
- {6, 6}, /* cost of loading MMX registers
- in SImode and DImode */
- {6, 6}, /* cost of storing MMX registers
- in SImode and DImode */
- 2, 2, 2, /* cost of moving XMM,YMM,ZMM register */
- {6, 6, 6, 6, 6}, /* cost of loading SSE registers
- in 32,64,128,256 and 512-bit */
+ {6, 6, 6, 6, 6}, /* cost of loading SSE register
+ in 32bit, 64bit, 128bit, 256bit and 512bit */
+ {6, 6, 6, 6, 6}, /* cost of storing SSE register
+ in 32bit, 64bit, 128bit, 256bit and 512bit */
{10, 10, 10, 10, 10}, /* cost of unaligned loads. */
- {6, 6, 6, 6, 6}, /* cost of storing SSE registers
- in 32,64,128,256 and 512-bit */
{10, 10, 10, 10, 10}, /* cost of unaligned loads. */
- 4, 4, /* SSE->integer and integer->SSE moves */
+ 2, 2, 2, /* cost of moving XMM,YMM,ZMM register */
+ 4, /* cost of moving SSE register to integer. */
6, 6, /* Gather load static, per_elt. */
6, 6, /* Gather store static, per_elt. */
32, /* size of l1 cache. */
@@ -2151,6 +2377,30 @@ static stringop_algs generic_memset[2] = {
{-1, libcall, false}}}};
static const
struct processor_costs generic_cost = {
+ /* Start of register allocator costs. integer->integer move cost is 2. */
+ 6, /* cost for loading QImode using movzbl */
+ {6, 6, 6}, /* cost of loading integer registers
+ in QImode, HImode and SImode.
+ Relative to reg-reg move (2). */
+ {6, 6, 6}, /* cost of storing integer registers */
+ 4, /* cost of reg,reg fld/fst */
+ {6, 6, 12}, /* cost of loading fp registers
+ in SFmode, DFmode and XFmode */
+ {6, 6, 12}, /* cost of storing fp registers
+ in SFmode, DFmode and XFmode */
+ 2, /* cost of moving MMX register */
+ {6, 6}, /* cost of loading MMX registers
+ in SImode and DImode */
+ {6, 6}, /* cost of storing MMX registers
+ in SImode and DImode */
+ 2, 3, 4, /* cost of moving XMM,YMM,ZMM register */
+ {6, 6, 6, 10, 15}, /* cost of loading SSE registers
+ in 32,64,128,256 and 512-bit */
+ {6, 6, 6, 10, 15}, /* cost of storing SSE registers
+ in 32,64,128,256 and 512-bit */
+ 6, 6, /* SSE->integer and integer->SSE moves */
+ /* End of register allocator costs. */
+
COSTS_N_INSNS (1), /* cost of an add instruction */
/* Setting cost to 2 makes our current implementation of synth_mult result in
use of unnecessary temporary registers causing regression on several
@@ -2173,32 +2423,18 @@ struct processor_costs generic_cost = {
COSTS_N_INSNS (1), /* cost of movzx */
8, /* "large" insn */
17, /* MOVE_RATIO */
-
- /* All move costs are relative to integer->integer move times 2 and thus
- they are latency*2. */
- 6, /* cost for loading QImode using movzbl */
{6, 6, 6}, /* cost of loading integer registers
in QImode, HImode and SImode.
Relative to reg-reg move (2). */
{6, 6, 6}, /* cost of storing integer registers */
- 4, /* cost of reg,reg fld/fst */
- {6, 6, 12}, /* cost of loading fp registers
- in SFmode, DFmode and XFmode */
- {6, 6, 12}, /* cost of storing fp registers
- in SFmode, DFmode and XFmode */
- 2, /* cost of moving MMX register */
- {6, 6}, /* cost of loading MMX registers
- in SImode and DImode */
- {6, 6}, /* cost of storing MMX registers
- in SImode and DImode */
- 2, 3, 4, /* cost of moving XMM,YMM,ZMM register */
- {6, 6, 6, 10, 15}, /* cost of loading SSE registers
- in 32,64,128,256 and 512-bit */
+ {6, 6, 6, 10, 15}, /* cost of loading SSE register
+ in 32bit, 64bit, 128bit, 256bit and 512bit */
+ {6, 6, 6, 10, 15}, /* cost of storing SSE register
+ in 32bit, 64bit, 128bit, 256bit and 512bit */
{6, 6, 6, 10, 15}, /* cost of unaligned loads. */
- {6, 6, 6, 10, 15}, /* cost of storing SSE registers
- in 32,64,128,256 and 512-bit */
{6, 6, 6, 10, 15}, /* cost of unaligned storess. */
- 6, 6, /* SSE->integer and integer->SSE moves */
+ 2, 3, 4, /* cost of moving XMM,YMM,ZMM register */
+ 6, /* cost of moving SSE register to integer. */
18, 6, /* Gather load static, per_elt. */
18, 6, /* Gather store static, per_elt. */
32, /* size of l1 cache. */
@@ -2251,6 +2487,30 @@ static stringop_algs core_memset[2] = {
static const
struct processor_costs core_cost = {
+ /* Start of register allocator costs. integer->integer move cost is 2. */
+ 6, /* cost for loading QImode using movzbl */
+ {4, 4, 4}, /* cost of loading integer registers
+ in QImode, HImode and SImode.
+ Relative to reg-reg move (2). */
+ {6, 6, 6}, /* cost of storing integer registers */
+ 2, /* cost of reg,reg fld/fst */
+ {6, 6, 8}, /* cost of loading fp registers
+ in SFmode, DFmode and XFmode */
+ {6, 6, 10}, /* cost of storing fp registers
+ in SFmode, DFmode and XFmode */
+ 2, /* cost of moving MMX register */
+ {6, 6}, /* cost of loading MMX registers
+ in SImode and DImode */
+ {6, 6}, /* cost of storing MMX registers
+ in SImode and DImode */
+ 2, 2, 4, /* cost of moving XMM,YMM,ZMM register */
+ {6, 6, 6, 6, 12}, /* cost of loading SSE registers
+ in 32,64,128,256 and 512-bit */
+ {6, 6, 6, 6, 12}, /* cost of storing SSE registers
+ in 32,64,128,256 and 512-bit */
+ 2, 2, /* SSE->integer and integer->SSE moves */
+ /* End of register allocator costs. */
+
COSTS_N_INSNS (1), /* cost of an add instruction */
/* On all chips taken into consideration lea is 2 cycles and more. With
this cost however our current implementation of synth_mult results in
@@ -2277,32 +2537,18 @@ struct processor_costs core_cost = {
COSTS_N_INSNS (1), /* cost of movzx */
8, /* "large" insn */
17, /* MOVE_RATIO */
-
- /* All move costs are relative to integer->integer move times 2 and thus
- they are latency*2. */
- 6, /* cost for loading QImode using movzbl */
{4, 4, 4}, /* cost of loading integer registers
in QImode, HImode and SImode.
Relative to reg-reg move (2). */
{6, 6, 6}, /* cost of storing integer registers */
- 2, /* cost of reg,reg fld/fst */
- {6, 6, 8}, /* cost of loading fp registers
- in SFmode, DFmode and XFmode */
- {6, 6, 10}, /* cost of storing fp registers
- in SFmode, DFmode and XFmode */
- 2, /* cost of moving MMX register */
- {6, 6}, /* cost of loading MMX registers
- in SImode and DImode */
- {6, 6}, /* cost of storing MMX registers
- in SImode and DImode */
- 2, 2, 4, /* cost of moving XMM,YMM,ZMM register */
- {6, 6, 6, 6, 12}, /* cost of loading SSE registers
- in 32,64,128,256 and 512-bit */
+ {6, 6, 6, 6, 12}, /* cost of loading SSE register
+ in 32bit, 64bit, 128bit, 256bit and 512bit */
+ {6, 6, 6, 6, 12}, /* cost of storing SSE register
+ in 32bit, 64bit, 128bit, 256bit and 512bit */
{6, 6, 6, 6, 12}, /* cost of unaligned loads. */
- {6, 6, 6, 6, 12}, /* cost of storing SSE registers
- in 32,64,128,256 and 512-bit */
{6, 6, 6, 6, 12}, /* cost of unaligned stores. */
- 2, 2, /* SSE->integer and integer->SSE moves */
+ 2, 2, 4, /* cost of moving XMM,YMM,ZMM register */
+ 2, /* cost of moving SSE register to integer. */
/* VGATHERDPD is 7 uops, rec throughput 5, while VGATHERDPD is 9 uops,
rec. throughput 6.
So 5 uops statically and one uops per load. */