aboutsummaryrefslogtreecommitdiff
path: root/gcc/config
diff options
context:
space:
mode:
Diffstat (limited to 'gcc/config')
-rw-r--r--gcc/config/i386/x86-tune-costs.h61
1 files changed, 31 insertions, 30 deletions
diff --git a/gcc/config/i386/x86-tune-costs.h b/gcc/config/i386/x86-tune-costs.h
index f01b8ee..18ad3cc 100644
--- a/gcc/config/i386/x86-tune-costs.h
+++ b/gcc/config/i386/x86-tune-costs.h
@@ -1867,9 +1867,9 @@ struct processor_costs znver4_cost = {
{8, 8, 8}, /* cost of storing integer
registers. */
2, /* cost of reg,reg fld/fst. */
- {6, 6, 16}, /* cost of loading fp registers
+ {14, 14, 17}, /* cost of loading fp registers
in SFmode, DFmode and XFmode. */
- {8, 8, 16}, /* cost of storing fp registers
+ {12, 12, 16}, /* cost of storing fp registers
in SFmode, DFmode and XFmode. */
2, /* cost of moving MMX register. */
{6, 6}, /* cost of loading MMX registers
@@ -1878,13 +1878,13 @@ struct processor_costs znver4_cost = {
in SImode and DImode. */
2, 2, 3, /* cost of moving XMM,YMM,ZMM
register. */
- {6, 6, 6, 6, 12}, /* cost of loading SSE registers
+ {6, 6, 10, 10, 12}, /* cost of loading SSE registers
in 32,64,128,256 and 512-bit. */
- {8, 8, 8, 8, 16}, /* cost of storing SSE registers
+ {8, 8, 8, 12, 12}, /* cost of storing SSE registers
in 32,64,128,256 and 512-bit. */
- 6, 6, /* SSE->integer and integer->SSE
+ 6, 8, /* SSE->integer and integer->SSE
moves. */
- 8, 8, /* mask->integer and integer->mask moves */
+ 8, 8, /* mask->integer and integer->mask moves */
{6, 6, 6}, /* cost of loading mask register
in QImode, HImode, SImode. */
{8, 8, 8}, /* cost if storing mask register
@@ -1894,6 +1894,7 @@ struct processor_costs znver4_cost = {
},
COSTS_N_INSNS (1), /* cost of an add instruction. */
+ /* TODO: Lea with 3 components has cost 2. */
COSTS_N_INSNS (1), /* cost of a lea instruction. */
COSTS_N_INSNS (1), /* variable shift costs. */
COSTS_N_INSNS (1), /* constant shift costs. */
@@ -1904,11 +1905,11 @@ struct processor_costs znver4_cost = {
COSTS_N_INSNS (3)}, /* other. */
0, /* cost of multiply per each bit
set. */
- {COSTS_N_INSNS (9), /* cost of a divide/mod for QI. */
- COSTS_N_INSNS (10), /* HI. */
- COSTS_N_INSNS (12), /* SI. */
- COSTS_N_INSNS (17), /* DI. */
- COSTS_N_INSNS (17)}, /* other. */
+ {COSTS_N_INSNS (12), /* cost of a divide/mod for QI. */
+ COSTS_N_INSNS (13), /* HI. */
+ COSTS_N_INSNS (13), /* SI. */
+ COSTS_N_INSNS (18), /* DI. */
+ COSTS_N_INSNS (18)}, /* other. */
COSTS_N_INSNS (1), /* cost of movsx. */
COSTS_N_INSNS (1), /* cost of movzx. */
8, /* "large" insn. */
@@ -1919,22 +1920,22 @@ struct processor_costs znver4_cost = {
Relative to reg-reg move (2). */
{8, 8, 8}, /* cost of storing integer
registers. */
- {6, 6, 6, 6, 12}, /* cost of loading SSE registers
+ {6, 6, 10, 10, 12}, /* cost of loading SSE registers
in 32bit, 64bit, 128bit, 256bit and 512bit */
- {8, 8, 8, 8, 16}, /* cost of storing SSE register
+ {8, 8, 8, 12, 12}, /* cost of storing SSE register
in 32bit, 64bit, 128bit, 256bit and 512bit */
- {6, 6, 6, 6, 12}, /* cost of unaligned loads. */
- {8, 8, 8, 8, 16}, /* cost of unaligned stores. */
- 2, 2, 3, /* cost of moving XMM,YMM,ZMM
+ {6, 6, 6, 6, 6}, /* cost of unaligned loads. */
+ {8, 8, 8, 8, 8}, /* cost of unaligned stores. */
+ 2, 2, 2, /* cost of moving XMM,YMM,ZMM
register. */
6, /* cost of moving SSE register to integer. */
- /* VGATHERDPD is 15 uops and throughput is 4, VGATHERDPS is 23 uops,
- throughput 9. Approx 7 uops do not depend on vector size and every load
- is 4 uops. */
- 14, 8, /* Gather load static, per_elt. */
- 14, 10, /* Gather store static, per_elt. */
+ /* VGATHERDPD is 17 uops and throughput is 4, VGATHERDPS is 24 uops,
+ throughput 5. Approx 7 uops do not depend on vector size and every load
+ is 5 uops. */
+ 14, 10, /* Gather load static, per_elt. */
+ 14, 20, /* Gather store static, per_elt. */
32, /* size of l1 cache. */
- 512, /* size of l2 cache. */
+ 1024, /* size of l2 cache. */
64, /* size of prefetch block. */
/* New AMD processors never drop prefetches; if they cannot be performed
immediately, they are queued. We set number of simultaneous prefetches
@@ -1943,26 +1944,26 @@ struct processor_costs znver4_cost = {
time). */
100, /* number of parallel prefetches. */
3, /* Branch cost. */
- COSTS_N_INSNS (5), /* cost of FADD and FSUB insns. */
- COSTS_N_INSNS (5), /* cost of FMUL instruction. */
+ COSTS_N_INSNS (7), /* cost of FADD and FSUB insns. */
+ COSTS_N_INSNS (7), /* cost of FMUL instruction. */
/* Latency of fdiv is 8-15. */
COSTS_N_INSNS (15), /* cost of FDIV instruction. */
COSTS_N_INSNS (1), /* cost of FABS instruction. */
COSTS_N_INSNS (1), /* cost of FCHS instruction. */
/* Latency of fsqrt is 4-10. */
- COSTS_N_INSNS (10), /* cost of FSQRT instruction. */
+ COSTS_N_INSNS (25), /* cost of FSQRT instruction. */
COSTS_N_INSNS (1), /* cost of cheap SSE instruction. */
COSTS_N_INSNS (3), /* cost of ADDSS/SD SUBSS/SD insns. */
COSTS_N_INSNS (3), /* cost of MULSS instruction. */
COSTS_N_INSNS (3), /* cost of MULSD instruction. */
- COSTS_N_INSNS (5), /* cost of FMA SS instruction. */
- COSTS_N_INSNS (5), /* cost of FMA SD instruction. */
- COSTS_N_INSNS (10), /* cost of DIVSS instruction. */
+ COSTS_N_INSNS (4), /* cost of FMA SS instruction. */
+ COSTS_N_INSNS (4), /* cost of FMA SD instruction. */
+ COSTS_N_INSNS (13), /* cost of DIVSS instruction. */
/* 9-13. */
COSTS_N_INSNS (13), /* cost of DIVSD instruction. */
- COSTS_N_INSNS (10), /* cost of SQRTSS instruction. */
- COSTS_N_INSNS (15), /* cost of SQRTSD instruction. */
+ COSTS_N_INSNS (15), /* cost of SQRTSS instruction. */
+ COSTS_N_INSNS (21), /* cost of SQRTSD instruction. */
/* Zen can execute 4 integer operations per cycle. FP operations
take 3 cycles and it can execute 2 integer additions and 2
multiplications thus reassociation may make sense up to with of 6.