diff options
Diffstat (limited to 'gcc/config/i386/x86-tune-costs.h')
-rw-r--r-- | gcc/config/i386/x86-tune-costs.h | 162 |
1 files changed, 29 insertions, 133 deletions
diff --git a/gcc/config/i386/x86-tune-costs.h b/gcc/config/i386/x86-tune-costs.h index e509129..c8603b9 100644 --- a/gcc/config/i386/x86-tune-costs.h +++ b/gcc/config/i386/x86-tune-costs.h @@ -2252,7 +2252,7 @@ struct processor_costs znver5_cost = { COSTS_N_INSNS (1), /* cost of cheap SSE instruction. */ /* ADDSS has throughput 2 and latency 2 (in some cases when source is another addition). */ - COSTS_N_INSNS (3), /* cost of ADDSS/SD SUBSS/SD insns. */ + COSTS_N_INSNS (2), /* cost of ADDSS/SD SUBSS/SD insns. */ /* MULSS has throughput 2 and latency 3. */ COSTS_N_INSNS (3), /* cost of MULSS instruction. */ COSTS_N_INSNS (3), /* cost of MULSD instruction. */ @@ -3568,127 +3568,6 @@ struct processor_costs tremont_cost = { COSTS_N_INSNS (2), /* Branch mispredict scale. */ }; -static stringop_algs intel_memcpy[2] = { - {libcall, {{11, loop, false}, {-1, rep_prefix_4_byte, false}}}, - {libcall, {{32, loop, false}, {64, rep_prefix_4_byte, false}, - {8192, rep_prefix_8_byte, false}, {-1, libcall, false}}}}; -static stringop_algs intel_memset[2] = { - {libcall, {{8, loop, false}, {15, unrolled_loop, false}, - {2048, rep_prefix_4_byte, false}, {-1, libcall, false}}}, - {libcall, {{24, loop, false}, {32, unrolled_loop, false}, - {8192, rep_prefix_8_byte, false}, {-1, libcall, false}}}}; -static const -struct processor_costs intel_cost = { - { - /* Start of register allocator costs. integer->integer move cost is 2. */ - 6, /* cost for loading QImode using movzbl */ - {4, 4, 4}, /* cost of loading integer registers - in QImode, HImode and SImode. - Relative to reg-reg move (2). */ - {6, 6, 6}, /* cost of storing integer registers */ - 2, /* cost of reg,reg fld/fst */ - {6, 6, 8}, /* cost of loading fp registers - in SFmode, DFmode and XFmode */ - {6, 6, 10}, /* cost of storing fp registers - in SFmode, DFmode and XFmode */ - 2, /* cost of moving MMX register */ - {6, 6}, /* cost of loading MMX registers - in SImode and DImode */ - {6, 6}, /* cost of storing MMX registers - in SImode and DImode */ - 2, 2, 2, /* cost of moving XMM,YMM,ZMM register */ - {6, 6, 6, 6, 6}, /* cost of loading SSE registers - in 32,64,128,256 and 512-bit */ - {6, 6, 6, 6, 6}, /* cost of storing SSE registers - in 32,64,128,256 and 512-bit */ - 4, 4, /* SSE->integer and integer->SSE moves */ - 4, 4, /* mask->integer and integer->mask moves */ - {4, 4, 4}, /* cost of loading mask register - in QImode, HImode, SImode. */ - {6, 6, 6}, /* cost if storing mask register - in QImode, HImode, SImode. */ - 2, /* cost of moving mask register. */ - /* End of register allocator costs. */ - }, - - COSTS_N_INSNS (1), /* cost of an add instruction */ - COSTS_N_INSNS (1) + 1, /* cost of a lea instruction */ - COSTS_N_INSNS (1), /* variable shift costs */ - COSTS_N_INSNS (1), /* constant shift costs */ - {COSTS_N_INSNS (3), /* cost of starting multiply for QI */ - COSTS_N_INSNS (3), /* HI */ - COSTS_N_INSNS (3), /* SI */ - COSTS_N_INSNS (4), /* DI */ - COSTS_N_INSNS (2)}, /* other */ - 0, /* cost of multiply per each bit set */ - {COSTS_N_INSNS (18), /* cost of a divide/mod for QI */ - COSTS_N_INSNS (26), /* HI */ - COSTS_N_INSNS (42), /* SI */ - COSTS_N_INSNS (74), /* DI */ - COSTS_N_INSNS (74)}, /* other */ - COSTS_N_INSNS (1), /* cost of movsx */ - COSTS_N_INSNS (1), /* cost of movzx */ - 8, /* "large" insn */ - 17, /* MOVE_RATIO */ - 6, /* CLEAR_RATIO */ - {4, 4, 4}, /* cost of loading integer registers - in QImode, HImode and SImode. - Relative to reg-reg move (2). */ - {6, 6, 6}, /* cost of storing integer registers */ - {6, 6, 6, 6, 6}, /* cost of loading SSE register - in 32bit, 64bit, 128bit, 256bit and 512bit */ - {6, 6, 6, 6, 6}, /* cost of storing SSE register - in 32bit, 64bit, 128bit, 256bit and 512bit */ - {10, 10, 10, 10, 10}, /* cost of unaligned loads. */ - {10, 10, 10, 10, 10}, /* cost of unaligned loads. */ - 2, 2, 2, /* cost of moving XMM,YMM,ZMM register */ - 4, /* cost of moving SSE register to integer. */ - 4, /* cost of moving integer register to SSE. */ - 6, 6, /* Gather load static, per_elt. */ - 6, 6, /* Gather store static, per_elt. */ - 32, /* size of l1 cache. */ - 256, /* size of l2 cache. */ - 64, /* size of prefetch block */ - 6, /* number of parallel prefetches */ - 3, /* Branch cost */ - COSTS_N_INSNS (8), /* cost of FADD and FSUB insns. */ - COSTS_N_INSNS (8), /* cost of FMUL instruction. */ - COSTS_N_INSNS (20), /* cost of FDIV instruction. */ - COSTS_N_INSNS (8), /* cost of FABS instruction. */ - COSTS_N_INSNS (8), /* cost of FCHS instruction. */ - COSTS_N_INSNS (40), /* cost of FSQRT instruction. */ - - COSTS_N_INSNS (1), /* cost of cheap SSE instruction. */ - COSTS_N_INSNS (8), /* cost of ADDSS/SD SUBSS/SD insns. */ - COSTS_N_INSNS (8), /* cost of MULSS instruction. */ - COSTS_N_INSNS (8), /* cost of MULSD instruction. */ - COSTS_N_INSNS (6), /* cost of FMA SS instruction. */ - COSTS_N_INSNS (6), /* cost of FMA SD instruction. */ - COSTS_N_INSNS (20), /* cost of DIVSS instruction. */ - COSTS_N_INSNS (20), /* cost of DIVSD instruction. */ - COSTS_N_INSNS (40), /* cost of SQRTSS instruction. */ - COSTS_N_INSNS (40), /* cost of SQRTSD instruction. */ - COSTS_N_INSNS (8), /* cost of CVTSS2SD etc. */ - COSTS_N_INSNS (16), /* cost of 256bit VCVTPS2PD etc. */ - COSTS_N_INSNS (32), /* cost of 512bit VCVTPS2PD etc. */ - COSTS_N_INSNS (8), /* cost of CVTSI2SS instruction. */ - COSTS_N_INSNS (8), /* cost of CVT(T)SS2SI instruction. */ - COSTS_N_INSNS (8), /* cost of CVTPI2PS instruction. */ - COSTS_N_INSNS (8), /* cost of CVT(T)PS2PI instruction. */ - 1, 4, 1, 1, /* reassoc int, fp, vec_int, vec_fp. */ - intel_memcpy, - intel_memset, - COSTS_N_INSNS (3), /* cond_taken_branch_cost. */ - COSTS_N_INSNS (1), /* cond_not_taken_branch_cost. */ - "16", /* Loop alignment. */ - "16:8:8", /* Jump alignment. */ - "0:0:8", /* Label alignment. */ - "16", /* Func alignment. */ - 4, /* Small unroll limit. */ - 2, /* Small unroll factor. */ - COSTS_N_INSNS (2), /* Branch mispredict scale. */ -}; - /* lujiazui_cost should produce code tuned for ZHAOXIN lujiazui CPU. */ static stringop_algs lujiazui_memcpy[2] = { {libcall, {{32, loop, false}, {8192, rep_prefix_4_byte, false}, @@ -4065,19 +3944,36 @@ struct processor_costs shijidadao_cost = { -/* Generic should produce code tuned for Core-i7 (and newer chips) - and btver1 (and newer chips). */ +/* Generic should produce code tuned for Haswell (and newer chips) + and znver1 (and newer chips): + 1. Don't align memory. + 2. For known sizes, prefer vector loop, unroll loop with 4 moves or + stores per iteration without aligning the loop, up to 256 bytes. + 3. For unknown sizes, use memcpy/memset. + 4. Since each loop iteration has 4 stores and 8 stores for zeroing + with unroll loop may be needed, change CLEAR_RATIO to 10 so that + zeroing up to 72 bytes are fully unrolled with 9 stores without + SSE. + */ static stringop_algs generic_memcpy[2] = { - {libcall, {{32, loop, false}, {8192, rep_prefix_4_byte, false}, - {-1, libcall, false}}}, - {libcall, {{32, loop, false}, {8192, rep_prefix_8_byte, false}, - {-1, libcall, false}}}}; + {libcall, + {{256, vector_loop, true}, + {256, unrolled_loop, true}, + {-1, libcall, true}}}, + {libcall, + {{256, vector_loop, true}, + {256, unrolled_loop, true}, + {-1, libcall, true}}}}; static stringop_algs generic_memset[2] = { - {libcall, {{32, loop, false}, {8192, rep_prefix_4_byte, false}, - {-1, libcall, false}}}, - {libcall, {{32, loop, false}, {8192, rep_prefix_8_byte, false}, - {-1, libcall, false}}}}; + {libcall, + {{256, vector_loop, true}, + {256, unrolled_loop, true}, + {-1, libcall, true}}}, + {libcall, + {{256, vector_loop, true}, + {256, unrolled_loop, true}, + {-1, libcall, true}}}}; static const struct processor_costs generic_cost = { { @@ -4134,7 +4030,7 @@ struct processor_costs generic_cost = { COSTS_N_INSNS (1), /* cost of movzx */ 8, /* "large" insn */ 17, /* MOVE_RATIO */ - 6, /* CLEAR_RATIO */ + 10, /* CLEAR_RATIO */ {6, 6, 6}, /* cost of loading integer registers in QImode, HImode and SImode. Relative to reg-reg move (2). */ |