aboutsummaryrefslogtreecommitdiff
path: root/gcc/config/i386/x86-tune-costs.h
diff options
context:
space:
mode:
Diffstat (limited to 'gcc/config/i386/x86-tune-costs.h')
-rw-r--r--gcc/config/i386/x86-tune-costs.h162
1 files changed, 29 insertions, 133 deletions
diff --git a/gcc/config/i386/x86-tune-costs.h b/gcc/config/i386/x86-tune-costs.h
index e509129..c8603b9 100644
--- a/gcc/config/i386/x86-tune-costs.h
+++ b/gcc/config/i386/x86-tune-costs.h
@@ -2252,7 +2252,7 @@ struct processor_costs znver5_cost = {
COSTS_N_INSNS (1), /* cost of cheap SSE instruction. */
/* ADDSS has throughput 2 and latency 2
(in some cases when source is another addition). */
- COSTS_N_INSNS (3), /* cost of ADDSS/SD SUBSS/SD insns. */
+ COSTS_N_INSNS (2), /* cost of ADDSS/SD SUBSS/SD insns. */
/* MULSS has throughput 2 and latency 3. */
COSTS_N_INSNS (3), /* cost of MULSS instruction. */
COSTS_N_INSNS (3), /* cost of MULSD instruction. */
@@ -3568,127 +3568,6 @@ struct processor_costs tremont_cost = {
COSTS_N_INSNS (2), /* Branch mispredict scale. */
};
-static stringop_algs intel_memcpy[2] = {
- {libcall, {{11, loop, false}, {-1, rep_prefix_4_byte, false}}},
- {libcall, {{32, loop, false}, {64, rep_prefix_4_byte, false},
- {8192, rep_prefix_8_byte, false}, {-1, libcall, false}}}};
-static stringop_algs intel_memset[2] = {
- {libcall, {{8, loop, false}, {15, unrolled_loop, false},
- {2048, rep_prefix_4_byte, false}, {-1, libcall, false}}},
- {libcall, {{24, loop, false}, {32, unrolled_loop, false},
- {8192, rep_prefix_8_byte, false}, {-1, libcall, false}}}};
-static const
-struct processor_costs intel_cost = {
- {
- /* Start of register allocator costs. integer->integer move cost is 2. */
- 6, /* cost for loading QImode using movzbl */
- {4, 4, 4}, /* cost of loading integer registers
- in QImode, HImode and SImode.
- Relative to reg-reg move (2). */
- {6, 6, 6}, /* cost of storing integer registers */
- 2, /* cost of reg,reg fld/fst */
- {6, 6, 8}, /* cost of loading fp registers
- in SFmode, DFmode and XFmode */
- {6, 6, 10}, /* cost of storing fp registers
- in SFmode, DFmode and XFmode */
- 2, /* cost of moving MMX register */
- {6, 6}, /* cost of loading MMX registers
- in SImode and DImode */
- {6, 6}, /* cost of storing MMX registers
- in SImode and DImode */
- 2, 2, 2, /* cost of moving XMM,YMM,ZMM register */
- {6, 6, 6, 6, 6}, /* cost of loading SSE registers
- in 32,64,128,256 and 512-bit */
- {6, 6, 6, 6, 6}, /* cost of storing SSE registers
- in 32,64,128,256 and 512-bit */
- 4, 4, /* SSE->integer and integer->SSE moves */
- 4, 4, /* mask->integer and integer->mask moves */
- {4, 4, 4}, /* cost of loading mask register
- in QImode, HImode, SImode. */
- {6, 6, 6}, /* cost if storing mask register
- in QImode, HImode, SImode. */
- 2, /* cost of moving mask register. */
- /* End of register allocator costs. */
- },
-
- COSTS_N_INSNS (1), /* cost of an add instruction */
- COSTS_N_INSNS (1) + 1, /* cost of a lea instruction */
- COSTS_N_INSNS (1), /* variable shift costs */
- COSTS_N_INSNS (1), /* constant shift costs */
- {COSTS_N_INSNS (3), /* cost of starting multiply for QI */
- COSTS_N_INSNS (3), /* HI */
- COSTS_N_INSNS (3), /* SI */
- COSTS_N_INSNS (4), /* DI */
- COSTS_N_INSNS (2)}, /* other */
- 0, /* cost of multiply per each bit set */
- {COSTS_N_INSNS (18), /* cost of a divide/mod for QI */
- COSTS_N_INSNS (26), /* HI */
- COSTS_N_INSNS (42), /* SI */
- COSTS_N_INSNS (74), /* DI */
- COSTS_N_INSNS (74)}, /* other */
- COSTS_N_INSNS (1), /* cost of movsx */
- COSTS_N_INSNS (1), /* cost of movzx */
- 8, /* "large" insn */
- 17, /* MOVE_RATIO */
- 6, /* CLEAR_RATIO */
- {4, 4, 4}, /* cost of loading integer registers
- in QImode, HImode and SImode.
- Relative to reg-reg move (2). */
- {6, 6, 6}, /* cost of storing integer registers */
- {6, 6, 6, 6, 6}, /* cost of loading SSE register
- in 32bit, 64bit, 128bit, 256bit and 512bit */
- {6, 6, 6, 6, 6}, /* cost of storing SSE register
- in 32bit, 64bit, 128bit, 256bit and 512bit */
- {10, 10, 10, 10, 10}, /* cost of unaligned loads. */
- {10, 10, 10, 10, 10}, /* cost of unaligned loads. */
- 2, 2, 2, /* cost of moving XMM,YMM,ZMM register */
- 4, /* cost of moving SSE register to integer. */
- 4, /* cost of moving integer register to SSE. */
- 6, 6, /* Gather load static, per_elt. */
- 6, 6, /* Gather store static, per_elt. */
- 32, /* size of l1 cache. */
- 256, /* size of l2 cache. */
- 64, /* size of prefetch block */
- 6, /* number of parallel prefetches */
- 3, /* Branch cost */
- COSTS_N_INSNS (8), /* cost of FADD and FSUB insns. */
- COSTS_N_INSNS (8), /* cost of FMUL instruction. */
- COSTS_N_INSNS (20), /* cost of FDIV instruction. */
- COSTS_N_INSNS (8), /* cost of FABS instruction. */
- COSTS_N_INSNS (8), /* cost of FCHS instruction. */
- COSTS_N_INSNS (40), /* cost of FSQRT instruction. */
-
- COSTS_N_INSNS (1), /* cost of cheap SSE instruction. */
- COSTS_N_INSNS (8), /* cost of ADDSS/SD SUBSS/SD insns. */
- COSTS_N_INSNS (8), /* cost of MULSS instruction. */
- COSTS_N_INSNS (8), /* cost of MULSD instruction. */
- COSTS_N_INSNS (6), /* cost of FMA SS instruction. */
- COSTS_N_INSNS (6), /* cost of FMA SD instruction. */
- COSTS_N_INSNS (20), /* cost of DIVSS instruction. */
- COSTS_N_INSNS (20), /* cost of DIVSD instruction. */
- COSTS_N_INSNS (40), /* cost of SQRTSS instruction. */
- COSTS_N_INSNS (40), /* cost of SQRTSD instruction. */
- COSTS_N_INSNS (8), /* cost of CVTSS2SD etc. */
- COSTS_N_INSNS (16), /* cost of 256bit VCVTPS2PD etc. */
- COSTS_N_INSNS (32), /* cost of 512bit VCVTPS2PD etc. */
- COSTS_N_INSNS (8), /* cost of CVTSI2SS instruction. */
- COSTS_N_INSNS (8), /* cost of CVT(T)SS2SI instruction. */
- COSTS_N_INSNS (8), /* cost of CVTPI2PS instruction. */
- COSTS_N_INSNS (8), /* cost of CVT(T)PS2PI instruction. */
- 1, 4, 1, 1, /* reassoc int, fp, vec_int, vec_fp. */
- intel_memcpy,
- intel_memset,
- COSTS_N_INSNS (3), /* cond_taken_branch_cost. */
- COSTS_N_INSNS (1), /* cond_not_taken_branch_cost. */
- "16", /* Loop alignment. */
- "16:8:8", /* Jump alignment. */
- "0:0:8", /* Label alignment. */
- "16", /* Func alignment. */
- 4, /* Small unroll limit. */
- 2, /* Small unroll factor. */
- COSTS_N_INSNS (2), /* Branch mispredict scale. */
-};
-
/* lujiazui_cost should produce code tuned for ZHAOXIN lujiazui CPU. */
static stringop_algs lujiazui_memcpy[2] = {
{libcall, {{32, loop, false}, {8192, rep_prefix_4_byte, false},
@@ -4065,19 +3944,36 @@ struct processor_costs shijidadao_cost = {
-/* Generic should produce code tuned for Core-i7 (and newer chips)
- and btver1 (and newer chips). */
+/* Generic should produce code tuned for Haswell (and newer chips)
+ and znver1 (and newer chips):
+ 1. Don't align memory.
+ 2. For known sizes, prefer vector loop, unroll loop with 4 moves or
+ stores per iteration without aligning the loop, up to 256 bytes.
+ 3. For unknown sizes, use memcpy/memset.
+ 4. Since each loop iteration has 4 stores and 8 stores for zeroing
+ with unroll loop may be needed, change CLEAR_RATIO to 10 so that
+ zeroing up to 72 bytes are fully unrolled with 9 stores without
+ SSE.
+ */
static stringop_algs generic_memcpy[2] = {
- {libcall, {{32, loop, false}, {8192, rep_prefix_4_byte, false},
- {-1, libcall, false}}},
- {libcall, {{32, loop, false}, {8192, rep_prefix_8_byte, false},
- {-1, libcall, false}}}};
+ {libcall,
+ {{256, vector_loop, true},
+ {256, unrolled_loop, true},
+ {-1, libcall, true}}},
+ {libcall,
+ {{256, vector_loop, true},
+ {256, unrolled_loop, true},
+ {-1, libcall, true}}}};
static stringop_algs generic_memset[2] = {
- {libcall, {{32, loop, false}, {8192, rep_prefix_4_byte, false},
- {-1, libcall, false}}},
- {libcall, {{32, loop, false}, {8192, rep_prefix_8_byte, false},
- {-1, libcall, false}}}};
+ {libcall,
+ {{256, vector_loop, true},
+ {256, unrolled_loop, true},
+ {-1, libcall, true}}},
+ {libcall,
+ {{256, vector_loop, true},
+ {256, unrolled_loop, true},
+ {-1, libcall, true}}}};
static const
struct processor_costs generic_cost = {
{
@@ -4134,7 +4030,7 @@ struct processor_costs generic_cost = {
COSTS_N_INSNS (1), /* cost of movzx */
8, /* "large" insn */
17, /* MOVE_RATIO */
- 6, /* CLEAR_RATIO */
+ 10, /* CLEAR_RATIO */
{6, 6, 6}, /* cost of loading integer registers
in QImode, HImode and SImode.
Relative to reg-reg move (2). */