diff options
Diffstat (limited to 'gcc/config/i386')
-rw-r--r-- | gcc/config/i386/i386-features.cc | 39 | ||||
-rw-r--r-- | gcc/config/i386/i386.cc | 23 | ||||
-rw-r--r-- | gcc/config/i386/i386.h | 4 | ||||
-rw-r--r-- | gcc/config/i386/x86-tune-costs.h | 133 |
4 files changed, 195 insertions, 4 deletions
diff --git a/gcc/config/i386/i386-features.cc b/gcc/config/i386/i386-features.cc index 31f3ee2..1ba5ac4 100644 --- a/gcc/config/i386/i386-features.cc +++ b/gcc/config/i386/i386-features.cc @@ -3309,8 +3309,16 @@ ix86_get_vector_load_mode (unsigned int size) mode = V64QImode; else if (size == 32) mode = V32QImode; - else + else if (size == 16) mode = V16QImode; + else if (size == 8) + mode = V8QImode; + else if (size == 4) + mode = V4QImode; + else if (size == 2) + mode = V2QImode; + else + gcc_unreachable (); return mode; } @@ -3338,13 +3346,36 @@ replace_vector_const (machine_mode vector_mode, rtx vector_const, if (SUBREG_P (dest) || mode == vector_mode) replace = vector_const; else - replace = gen_rtx_SUBREG (mode, vector_const, 0); + { + unsigned int size = GET_MODE_SIZE (mode); + if (size < ix86_regmode_natural_size (mode)) + { + /* If the mode size is smaller than its natural size, + first insert an extra move with a QI vector SUBREG + of the same size to avoid validate_subreg failure. */ + machine_mode vmode = ix86_get_vector_load_mode (size); + rtx vreg; + if (mode == vmode) + vreg = vector_const; + else + { + vreg = gen_reg_rtx (vmode); + rtx vsubreg = gen_rtx_SUBREG (vmode, vector_const, 0); + rtx pat = gen_rtx_SET (vreg, vsubreg); + rtx_insn *vinsn = emit_insn_before (pat, insn); + df_insn_rescan (vinsn); + } + replace = gen_rtx_SUBREG (mode, vreg, 0); + } + else + replace = gen_rtx_SUBREG (mode, vector_const, 0); + } - /* NB: Don't run recog_memoized here since vector SUBREG may not - be valid. Let LRA handle vector SUBREG. */ SET_SRC (set) = replace; /* Drop possible dead definitions. */ PATTERN (insn) = set; + INSN_CODE (insn) = -1; + recog_memoized (insn); df_insn_rescan (insn); } } diff --git a/gcc/config/i386/i386.cc b/gcc/config/i386/i386.cc index f28c92a..bef95ea 100644 --- a/gcc/config/i386/i386.cc +++ b/gcc/config/i386/i386.cc @@ -12320,6 +12320,7 @@ get_thread_pointer (machine_mode tp_mode, bool to_reg) static GTY(()) rtx ix86_tls_index_symbol; +#if TARGET_WIN32_TLS static rtx ix86_tls_index (void) { @@ -12331,6 +12332,7 @@ ix86_tls_index (void) else return ix86_tls_index_symbol; } +#endif /* Construct the SYMBOL_REF for the tls_get_addr function. */ @@ -22792,6 +22794,27 @@ ix86_rtx_costs (rtx x, machine_mode mode, int outer_code_i, int opno, else *total = vec_fp_conversion_cost (cost, GET_MODE_BITSIZE (mode)); return false; + case FLOAT: + case UNSIGNED_FLOAT: + if (!SSE_FLOAT_MODE_SSEMATH_OR_HFBF_P (mode)) + /* TODO: We do not have cost tables for x87. */ + *total = cost->fadd; + else if (VECTOR_MODE_P (mode)) + *total = ix86_vec_cost (mode, cost->cvtpi2ps); + else + *total = cost->cvtsi2ss; + return false; + + case FIX: + case UNSIGNED_FIX: + if (!SSE_FLOAT_MODE_SSEMATH_OR_HFBF_P (mode)) + /* TODO: We do not have cost tables for x87. */ + *total = cost->fadd; + else if (VECTOR_MODE_P (mode)) + *total = ix86_vec_cost (mode, cost->cvtps2pi); + else + *total = cost->cvtss2si; + return false; case ABS: /* SSE requires memory load for the constant operand. It may make diff --git a/gcc/config/i386/i386.h b/gcc/config/i386/i386.h index 02bf357..6a38de3 100644 --- a/gcc/config/i386/i386.h +++ b/gcc/config/i386/i386.h @@ -213,6 +213,10 @@ struct processor_costs { such as VCVTPD2PS with larger reg in ymm. */ const int vcvtps2pd512; /* cost 512bit packed FP conversions, such as VCVTPD2PS with larger reg in zmm. */ + const int cvtsi2ss; /* cost of CVTSI2SS instruction. */ + const int cvtss2si; /* cost of CVT(T)SS2SI instruction. */ + const int cvtpi2ps; /* cost of CVTPI2PS instruction. */ + const int cvtps2pi; /* cost of CVT(T)PS2PI instruction. */ const int reassoc_int, reassoc_fp, reassoc_vec_int, reassoc_vec_fp; /* Specify reassociation width for integer, fp, vector integer and vector fp diff --git a/gcc/config/i386/x86-tune-costs.h b/gcc/config/i386/x86-tune-costs.h index cddcf61..6cce70a 100644 --- a/gcc/config/i386/x86-tune-costs.h +++ b/gcc/config/i386/x86-tune-costs.h @@ -134,6 +134,11 @@ struct processor_costs ix86_size_cost = {/* costs for tuning for size */ COSTS_N_BYTES (4), /* cost of CVTSS2SD etc. */ COSTS_N_BYTES (4), /* cost of 256bit VCVTPS2PD etc. */ COSTS_N_BYTES (6), /* cost of 512bit VCVTPS2PD etc. */ + COSTS_N_BYTES (4), /* cost of CVTSI2SS instruction. */ + COSTS_N_BYTES (4), /* cost of CVT(T)SS2SI instruction. */ + COSTS_N_BYTES (4), /* cost of CVTPI2PS instruction. */ + COSTS_N_BYTES (4), /* cost of CVT(T)PS2PI instruction. */ + 1, 1, 1, 1, /* reassoc int, fp, vec_int, vec_fp. */ ix86_size_memcpy, ix86_size_memset, @@ -249,6 +254,10 @@ struct processor_costs i386_cost = { /* 386 specific costs */ COSTS_N_INSNS (27), /* cost of CVTSS2SD etc. */ COSTS_N_INSNS (54), /* cost of 256bit VCVTPS2PD etc. */ COSTS_N_INSNS (108), /* cost of 512bit VCVTPS2PD etc. */ + COSTS_N_INSNS (27), /* cost of CVTSI2SS instruction. */ + COSTS_N_INSNS (27), /* cost of CVT(T)SS2SI instruction. */ + COSTS_N_INSNS (27), /* cost of CVTPI2PS instruction. */ + COSTS_N_INSNS (27), /* cost of CVT(T)PS2PI instruction. */ 1, 1, 1, 1, /* reassoc int, fp, vec_int, vec_fp. */ i386_memcpy, i386_memset, @@ -365,6 +374,10 @@ struct processor_costs i486_cost = { /* 486 specific costs */ COSTS_N_INSNS (8), /* cost of CVTSS2SD etc. */ COSTS_N_INSNS (16), /* cost of 256bit VCVTPS2PD etc. */ COSTS_N_INSNS (32), /* cost of 512bit VCVTPS2PD etc. */ + COSTS_N_INSNS (27), /* cost of CVTSI2SS instruction. */ + COSTS_N_INSNS (27), /* cost of CVT(T)SS2SI instruction. */ + COSTS_N_INSNS (27), /* cost of CVTPI2PS instruction. */ + COSTS_N_INSNS (27), /* cost of CVT(T)PS2PI instruction. */ 1, 1, 1, 1, /* reassoc int, fp, vec_int, vec_fp. */ i486_memcpy, i486_memset, @@ -479,6 +492,10 @@ struct processor_costs pentium_cost = { COSTS_N_INSNS (3), /* cost of CVTSS2SD etc. */ COSTS_N_INSNS (6), /* cost of 256bit VCVTPS2PD etc. */ COSTS_N_INSNS (12), /* cost of 512bit VCVTPS2PD etc. */ + COSTS_N_INSNS (3), /* cost of CVTSI2SS instruction. */ + COSTS_N_INSNS (3), /* cost of CVT(T)SS2SI instruction. */ + COSTS_N_INSNS (3), /* cost of CVTPI2PS instruction. */ + COSTS_N_INSNS (3), /* cost of CVT(T)PS2PI instruction. */ 1, 1, 1, 1, /* reassoc int, fp, vec_int, vec_fp. */ pentium_memcpy, pentium_memset, @@ -586,6 +603,10 @@ struct processor_costs lakemont_cost = { COSTS_N_INSNS (5), /* cost of CVTSS2SD etc. */ COSTS_N_INSNS (10), /* cost of 256bit VCVTPS2PD etc. */ COSTS_N_INSNS (20), /* cost of 512bit VCVTPS2PD etc. */ + COSTS_N_INSNS (5), /* cost of CVTSI2SS instruction. */ + COSTS_N_INSNS (5), /* cost of CVT(T)SS2SI instruction. */ + COSTS_N_INSNS (5), /* cost of CVTPI2PS instruction. */ + COSTS_N_INSNS (5), /* cost of CVT(T)PS2PI instruction. */ 1, 1, 1, 1, /* reassoc int, fp, vec_int, vec_fp. */ pentium_memcpy, pentium_memset, @@ -708,6 +729,10 @@ struct processor_costs pentiumpro_cost = { COSTS_N_INSNS (3), /* cost of CVTSS2SD etc. */ COSTS_N_INSNS (6), /* cost of 256bit VCVTPS2PD etc. */ COSTS_N_INSNS (12), /* cost of 512bit VCVTPS2PD etc. */ + COSTS_N_INSNS (3), /* cost of CVTSI2SS instruction. */ + COSTS_N_INSNS (3), /* cost of CVT(T)SS2SI instruction. */ + COSTS_N_INSNS (3), /* cost of CVTPI2PS instruction. */ + COSTS_N_INSNS (3), /* cost of CVT(T)PS2PI instruction. */ 1, 1, 1, 1, /* reassoc int, fp, vec_int, vec_fp. */ pentiumpro_memcpy, pentiumpro_memset, @@ -821,6 +846,10 @@ struct processor_costs geode_cost = { COSTS_N_INSNS (6), /* cost of CVTSS2SD etc. */ COSTS_N_INSNS (12), /* cost of 256bit VCVTPS2PD etc. */ COSTS_N_INSNS (24), /* cost of 512bit VCVTPS2PD etc. */ + COSTS_N_INSNS (6), /* cost of CVTSI2SS instruction. */ + COSTS_N_INSNS (6), /* cost of CVT(T)SS2SI instruction. */ + COSTS_N_INSNS (6), /* cost of CVTPI2PS instruction. */ + COSTS_N_INSNS (6), /* cost of CVT(T)PS2PI instruction. */ 1, 1, 1, 1, /* reassoc int, fp, vec_int, vec_fp. */ geode_memcpy, geode_memset, @@ -937,6 +966,10 @@ struct processor_costs k6_cost = { COSTS_N_INSNS (2), /* cost of CVTSS2SD etc. */ COSTS_N_INSNS (4), /* cost of 256bit VCVTPS2PD etc. */ COSTS_N_INSNS (8), /* cost of 512bit VCVTPS2PD etc. */ + COSTS_N_INSNS (2), /* cost of CVTSI2SS instruction. */ + COSTS_N_INSNS (2), /* cost of CVT(T)SS2SI instruction. */ + COSTS_N_INSNS (2), /* cost of CVTPI2PS instruction. */ + COSTS_N_INSNS (2), /* cost of CVT(T)PS2PI instruction. */ 1, 1, 1, 1, /* reassoc int, fp, vec_int, vec_fp. */ k6_memcpy, k6_memset, @@ -1054,6 +1087,10 @@ struct processor_costs athlon_cost = { COSTS_N_INSNS (4), /* cost of CVTSS2SD etc. */ COSTS_N_INSNS (8), /* cost of 256bit VCVTPS2PD etc. */ COSTS_N_INSNS (16), /* cost of 512bit VCVTPS2PD etc. */ + COSTS_N_INSNS (4), /* cost of CVTSI2SS instruction. */ + COSTS_N_INSNS (6), /* cost of CVT(T)SS2SI instruction. */ + COSTS_N_INSNS (4), /* cost of CVTPI2PS instruction. */ + COSTS_N_INSNS (6), /* cost of CVT(T)PS2PI instruction. */ 1, 1, 1, 1, /* reassoc int, fp, vec_int, vec_fp. */ athlon_memcpy, athlon_memset, @@ -1180,6 +1217,10 @@ struct processor_costs k8_cost = { COSTS_N_INSNS (4), /* cost of CVTSS2SD etc. */ COSTS_N_INSNS (8), /* cost of 256bit VCVTPS2PD etc. */ COSTS_N_INSNS (16), /* cost of 512bit VCVTPS2PD etc. */ + COSTS_N_INSNS (14), /* cost of CVTSI2SS instruction. */ + COSTS_N_INSNS (10), /* cost of CVT(T)SS2SI instruction. */ + COSTS_N_INSNS (4), /* cost of CVTPI2PS instruction. */ + COSTS_N_INSNS (5), /* cost of CVT(T)PS2PI instruction. */ 1, 1, 1, 1, /* reassoc int, fp, vec_int, vec_fp. */ k8_memcpy, k8_memset, @@ -1314,6 +1355,10 @@ struct processor_costs amdfam10_cost = { COSTS_N_INSNS (4), /* cost of CVTSS2SD etc. */ COSTS_N_INSNS (8), /* cost of 256bit VCVTPS2PD etc. */ COSTS_N_INSNS (16), /* cost of 512bit VCVTPS2PD etc. */ + COSTS_N_INSNS (14), /* cost of CVTSI2SS instruction. */ + COSTS_N_INSNS (8), /* cost of CVT(T)SS2SI instruction. */ + COSTS_N_INSNS (7), /* cost of CVTPI2PS instruction. */ + COSTS_N_INSNS (4), /* cost of CVT(T)PS2PI instruction. */ 1, 1, 1, 1, /* reassoc int, fp, vec_int, vec_fp. */ amdfam10_memcpy, amdfam10_memset, @@ -1441,6 +1486,10 @@ const struct processor_costs bdver_cost = { COSTS_N_INSNS (4), /* cost of CVTSS2SD etc. */ COSTS_N_INSNS (7), /* cost of 256bit VCVTPS2PD etc. */ COSTS_N_INSNS (14), /* cost of 512bit VCVTPS2PD etc. */ + COSTS_N_INSNS (14), /* cost of CVTSI2SS instruction. */ + COSTS_N_INSNS (13), /* cost of CVT(T)SS2SI instruction. */ + COSTS_N_INSNS (4), /* cost of CVTPI2PS instruction. */ + COSTS_N_INSNS (4), /* cost of CVT(T)PS2PI instruction. */ 1, 2, 1, 1, /* reassoc int, fp, vec_int, vec_fp. */ bdver_memcpy, bdver_memset, @@ -1593,6 +1642,10 @@ struct processor_costs znver1_cost = { /* Real latency is 4, but for split regs multiply cost of half op by 2. */ COSTS_N_INSNS (6), /* cost of 256bit VCVTPS2PD etc. */ COSTS_N_INSNS (12), /* cost of 512bit VCVTPS2PD etc. */ + COSTS_N_INSNS (8), /* cost of CVTSI2SS instruction. */ + COSTS_N_INSNS (7), /* cost of CVT(T)SS2SI instruction. */ + COSTS_N_INSNS (6), /* cost of CVTPI2PS instruction. */ + COSTS_N_INSNS (6), /* cost of CVT(T)PS2PI instruction. */ /* Zen can execute 4 integer operations per cycle. FP operations take 3 cycles and it can execute 2 integer additions and 2 multiplications thus reassociation may make sense up to with of 6. SPEC2k6 bencharks suggests @@ -1755,6 +1808,10 @@ struct processor_costs znver2_cost = { COSTS_N_INSNS (3), /* cost of CVTSS2SD etc. */ COSTS_N_INSNS (5), /* cost of 256bit VCVTPS2PD etc. */ COSTS_N_INSNS (10), /* cost of 512bit VCVTPS2PD etc. */ + COSTS_N_INSNS (7), /* cost of CVTSI2SS instruction. */ + COSTS_N_INSNS (6), /* cost of CVT(T)SS2SI instruction. */ + COSTS_N_INSNS (6), /* cost of CVTPI2PS instruction. */ + COSTS_N_INSNS (3), /* cost of CVT(T)PS2PI instruction. */ /* Zen can execute 4 integer operations per cycle. FP operations take 3 cycles and it can execute 2 integer additions and 2 multiplications thus reassociation may make sense up to with of 6. @@ -1893,6 +1950,10 @@ struct processor_costs znver3_cost = { COSTS_N_INSNS (3), /* cost of CVTSS2SD etc. */ COSTS_N_INSNS (5), /* cost of 256bit VCVTPS2PD etc. */ COSTS_N_INSNS (10), /* cost of 512bit VCVTPS2PD etc. */ + COSTS_N_INSNS (6), /* cost of CVTSI2SS instruction. */ + COSTS_N_INSNS (6), /* cost of CVT(T)SS2SI instruction. */ + COSTS_N_INSNS (3), /* cost of CVTPI2PS instruction. */ + COSTS_N_INSNS (4), /* cost of CVT(T)PS2PI instruction. */ /* Zen can execute 4 integer operations per cycle. FP operations take 3 cycles and it can execute 2 integer additions and 2 multiplications thus reassociation may make sense up to with of 6. @@ -2034,6 +2095,10 @@ struct processor_costs znver4_cost = { COSTS_N_INSNS (5), /* cost of 256bit VCVTPS2PD etc. */ /* Real latency is 6, but for split regs multiply cost of half op by 2. */ COSTS_N_INSNS (10), /* cost of 512bit VCVTPS2PD etc. */ + COSTS_N_INSNS (6), /* cost of CVTSI2SS instruction. */ + COSTS_N_INSNS (6), /* cost of CVT(T)SS2SI instruction. */ + COSTS_N_INSNS (3), /* cost of CVTPI2PS instruction. */ + COSTS_N_INSNS (4), /* cost of CVT(T)PS2PI instruction. */ /* Zen can execute 4 integer operations per cycle. FP operations take 3 cycles and it can execute 2 integer additions and 2 multiplications thus reassociation may make sense up to with of 6. @@ -2188,6 +2253,10 @@ struct processor_costs znver5_cost = { COSTS_N_INSNS (3), /* cost of CVTSS2SD etc. */ COSTS_N_INSNS (5), /* cost of 256bit VCVTPS2PD etc. */ COSTS_N_INSNS (5), /* cost of 512bit VCVTPS2PD etc. */ + COSTS_N_INSNS (6), /* cost of CVTSI2SS instruction. */ + COSTS_N_INSNS (6), /* cost of CVT(T)SS2SI instruction. */ + COSTS_N_INSNS (3), /* cost of CVTPI2PS instruction. */ + COSTS_N_INSNS (4), /* cost of CVT(T)PS2PI instruction. */ /* Zen5 can execute: - integer ops: 6 per cycle, at most 3 multiplications. latency 1 for additions, 3 for multiplications (pipelined) @@ -2330,6 +2399,10 @@ struct processor_costs skylake_cost = { COSTS_N_INSNS (2), /* cost of CVTSS2SD etc. */ COSTS_N_INSNS (2), /* cost of 256bit VCVTPS2PD etc. */ COSTS_N_INSNS (4), /* cost of 512bit VCVTPS2PD etc. */ + COSTS_N_INSNS (6), /* cost of CVTSI2SS instruction. */ + COSTS_N_INSNS (6), /* cost of CVT(T)SS2SI instruction. */ + COSTS_N_INSNS (6), /* cost of CVTPI2PS instruction. */ + COSTS_N_INSNS (7), /* cost of CVT(T)PS2PI instruction. */ 1, 4, 2, 2, /* reassoc int, fp, vec_int, vec_fp. */ skylake_memcpy, skylake_memset, @@ -2462,6 +2535,10 @@ struct processor_costs icelake_cost = { COSTS_N_INSNS (2), /* cost of CVTSS2SD etc. */ COSTS_N_INSNS (2), /* cost of 256bit VCVTPS2PD etc. */ COSTS_N_INSNS (2), /* cost of 512bit VCVTPS2PD etc. */ + COSTS_N_INSNS (6), /* cost of CVTSI2SS instruction. */ + COSTS_N_INSNS (6), /* cost of CVT(T)SS2SI instruction. */ + COSTS_N_INSNS (7), /* cost of CVTPI2PS instruction. */ + COSTS_N_INSNS (6), /* cost of CVT(T)PS2PI instruction. */ 1, 4, 2, 2, /* reassoc int, fp, vec_int, vec_fp. */ icelake_memcpy, icelake_memset, @@ -2588,6 +2665,10 @@ struct processor_costs alderlake_cost = { COSTS_N_INSNS (2), /* cost of CVTSS2SD etc. */ COSTS_N_INSNS (2), /* cost of 256bit VCVTPS2PD etc. */ COSTS_N_INSNS (2), /* cost of 512bit VCVTPS2PD etc. */ + COSTS_N_INSNS (6), /* cost of CVTSI2SS instruction. */ + COSTS_N_INSNS (6), /* cost of CVT(T)SS2SI instruction. */ + COSTS_N_INSNS (7), /* cost of CVTPI2PS instruction. */ + COSTS_N_INSNS (6), /* cost of CVT(T)PS2PI instruction. */ 1, 4, 3, 3, /* reassoc int, fp, vec_int, vec_fp. */ alderlake_memcpy, alderlake_memset, @@ -2707,6 +2788,10 @@ const struct processor_costs btver1_cost = { COSTS_N_INSNS (4), /* cost of CVTSS2SD etc. */ COSTS_N_INSNS (7), /* cost of 256bit VCVTPS2PD etc. */ COSTS_N_INSNS (14), /* cost of 512bit VCVTPS2PD etc. */ + COSTS_N_INSNS (14), /* cost of CVTSI2SS instruction. */ + COSTS_N_INSNS (13), /* cost of CVT(T)SS2SI instruction. */ + COSTS_N_INSNS (4), /* cost of CVTPI2PS instruction. */ + COSTS_N_INSNS (4), /* cost of CVT(T)PS2PI instruction. */ 1, 1, 1, 1, /* reassoc int, fp, vec_int, vec_fp. */ btver1_memcpy, btver1_memset, @@ -2823,6 +2908,10 @@ const struct processor_costs btver2_cost = { COSTS_N_INSNS (4), /* cost of CVTSS2SD etc. */ COSTS_N_INSNS (7), /* cost of 256bit VCVTPS2PD etc. */ COSTS_N_INSNS (14), /* cost of 512bit VCVTPS2PD etc. */ + COSTS_N_INSNS (14), /* cost of CVTSI2SS instruction. */ + COSTS_N_INSNS (13), /* cost of CVT(T)SS2SI instruction. */ + COSTS_N_INSNS (4), /* cost of CVTPI2PS instruction. */ + COSTS_N_INSNS (4), /* cost of CVT(T)PS2PI instruction. */ 1, 1, 1, 1, /* reassoc int, fp, vec_int, vec_fp. */ btver2_memcpy, btver2_memset, @@ -2938,6 +3027,10 @@ struct processor_costs pentium4_cost = { COSTS_N_INSNS (10), /* cost of CVTSS2SD etc. */ COSTS_N_INSNS (20), /* cost of 256bit VCVTPS2PD etc. */ COSTS_N_INSNS (40), /* cost of 512bit VCVTPS2PD etc. */ + COSTS_N_INSNS (20), /* cost of CVTSI2SS instruction. */ + COSTS_N_INSNS (17), /* cost of CVT(T)SS2SI instruction. */ + COSTS_N_INSNS (12), /* cost of CVTPI2PS instruction. */ + COSTS_N_INSNS (8), /* cost of CVT(T)PS2PI instruction. */ 1, 1, 1, 1, /* reassoc int, fp, vec_int, vec_fp. */ pentium4_memcpy, pentium4_memset, @@ -3056,6 +3149,10 @@ struct processor_costs nocona_cost = { COSTS_N_INSNS (10), /* cost of CVTSS2SD etc. */ COSTS_N_INSNS (20), /* cost of 256bit VCVTPS2PD etc. */ COSTS_N_INSNS (40), /* cost of 512bit VCVTPS2PD etc. */ + COSTS_N_INSNS (20), /* cost of CVTSI2SS instruction. */ + COSTS_N_INSNS (17), /* cost of CVT(T)SS2SI instruction. */ + COSTS_N_INSNS (12), /* cost of CVTPI2PS instruction. */ + COSTS_N_INSNS (8), /* cost of CVT(T)PS2PI instruction. */ 1, 1, 1, 1, /* reassoc int, fp, vec_int, vec_fp. */ nocona_memcpy, nocona_memset, @@ -3172,6 +3269,10 @@ struct processor_costs atom_cost = { COSTS_N_INSNS (6), /* cost of CVTSS2SD etc. */ COSTS_N_INSNS (12), /* cost of 256bit VCVTPS2PD etc. */ COSTS_N_INSNS (24), /* cost of 512bit VCVTPS2PD etc. */ + COSTS_N_INSNS (7), /* cost of CVTSI2SS instruction. */ + COSTS_N_INSNS (10), /* cost of CVT(T)SS2SI instruction. */ + COSTS_N_INSNS (6), /* cost of CVTPI2PS instruction. */ + COSTS_N_INSNS (4), /* cost of CVT(T)PS2PI instruction. */ 2, 2, 2, 2, /* reassoc int, fp, vec_int, vec_fp. */ atom_memcpy, atom_memset, @@ -3288,6 +3389,10 @@ struct processor_costs slm_cost = { COSTS_N_INSNS (3), /* cost of CVTSS2SD etc. */ COSTS_N_INSNS (6), /* cost of 256bit VCVTPS2PD etc. */ COSTS_N_INSNS (12), /* cost of 512bit VCVTPS2PD etc. */ + COSTS_N_INSNS (5), /* cost of CVTSI2SS instruction. */ + COSTS_N_INSNS (5), /* cost of CVT(T)SS2SI instruction. */ + COSTS_N_INSNS (4), /* cost of CVTPI2PS instruction. */ + COSTS_N_INSNS (4), /* cost of CVT(T)PS2PI instruction. */ 1, 2, 1, 1, /* reassoc int, fp, vec_int, vec_fp. */ slm_memcpy, slm_memset, @@ -3418,6 +3523,10 @@ struct processor_costs tremont_cost = { COSTS_N_INSNS (3), /* cost of CVTSS2SD etc. */ COSTS_N_INSNS (6), /* cost of 256bit VCVTPS2PD etc. */ COSTS_N_INSNS (12), /* cost of 512bit VCVTPS2PD etc. */ + COSTS_N_INSNS (6), /* cost of CVTSI2SS instruction. */ + COSTS_N_INSNS (6), /* cost of CVT(T)SS2SI instruction. */ + COSTS_N_INSNS (4), /* cost of CVTPI2PS instruction. */ + COSTS_N_INSNS (4), /* cost of CVT(T)PS2PI instruction. */ 1, 4, 3, 3, /* reassoc int, fp, vec_int, vec_fp. */ tremont_memcpy, tremont_memset, @@ -3534,6 +3643,10 @@ struct processor_costs intel_cost = { COSTS_N_INSNS (8), /* cost of CVTSS2SD etc. */ COSTS_N_INSNS (16), /* cost of 256bit VCVTPS2PD etc. */ COSTS_N_INSNS (32), /* cost of 512bit VCVTPS2PD etc. */ + COSTS_N_INSNS (8), /* cost of CVTSI2SS instruction. */ + COSTS_N_INSNS (8), /* cost of CVT(T)SS2SI instruction. */ + COSTS_N_INSNS (8), /* cost of CVTPI2PS instruction. */ + COSTS_N_INSNS (8), /* cost of CVT(T)PS2PI instruction. */ 1, 4, 1, 1, /* reassoc int, fp, vec_int, vec_fp. */ intel_memcpy, intel_memset, @@ -3655,6 +3768,10 @@ struct processor_costs lujiazui_cost = { COSTS_N_INSNS (3), /* cost of CVTSS2SD etc. */ COSTS_N_INSNS (6), /* cost of 256bit VCVTPS2PD etc. */ COSTS_N_INSNS (12), /* cost of 512bit VCVTPS2PD etc. */ + COSTS_N_INSNS (3), /* cost of CVTSI2SS instruction. */ + COSTS_N_INSNS (3), /* cost of CVT(T)SS2SI instruction. */ + COSTS_N_INSNS (3), /* cost of CVTPI2PS instruction. */ + COSTS_N_INSNS (3), /* cost of CVT(T)PS2PI instruction. */ 1, 4, 3, 3, /* reassoc int, fp, vec_int, vec_fp. */ lujiazui_memcpy, lujiazui_memset, @@ -3774,6 +3891,10 @@ struct processor_costs yongfeng_cost = { COSTS_N_INSNS (3), /* cost of CVTSS2SD etc. */ COSTS_N_INSNS (6), /* cost of 256bit VCVTPS2PD etc. */ COSTS_N_INSNS (12), /* cost of 512bit VCVTPS2PD etc. */ + COSTS_N_INSNS (3), /* cost of CVTSI2SS instruction. */ + COSTS_N_INSNS (3), /* cost of CVT(T)SS2SI instruction. */ + COSTS_N_INSNS (3), /* cost of CVTPI2PS instruction. */ + COSTS_N_INSNS (3), /* cost of CVT(T)PS2PI instruction. */ 4, 4, 4, 4, /* reassoc int, fp, vec_int, vec_fp. */ yongfeng_memcpy, yongfeng_memset, @@ -3893,6 +4014,10 @@ struct processor_costs shijidadao_cost = { COSTS_N_INSNS (3), /* cost of CVTSS2SD etc. */ COSTS_N_INSNS (6), /* cost of 256bit VCVTPS2PD etc. */ COSTS_N_INSNS (12), /* cost of 512bit VCVTPS2PD etc. */ + COSTS_N_INSNS (3), /* cost of CVTSI2SS instruction. */ + COSTS_N_INSNS (3), /* cost of CVT(T)SS2SI instruction. */ + COSTS_N_INSNS (3), /* cost of CVTPI2PS instruction. */ + COSTS_N_INSNS (3), /* cost of CVT(T)PS2PI instruction. */ 4, 4, 4, 4, /* reassoc int, fp, vec_int, vec_fp. */ shijidadao_memcpy, shijidadao_memset, @@ -4020,6 +4145,10 @@ struct processor_costs generic_cost = { COSTS_N_INSNS (3), /* cost of CVTSS2SD etc. */ COSTS_N_INSNS (4), /* cost of 256bit VCVTPS2PD etc. */ COSTS_N_INSNS (5), /* cost of 512bit VCVTPS2PD etc. */ + COSTS_N_INSNS (6), /* cost of CVTSI2SS instruction. */ + COSTS_N_INSNS (6), /* cost of CVT(T)SS2SI instruction. */ + COSTS_N_INSNS (3), /* cost of CVTPI2PS instruction. */ + COSTS_N_INSNS (3), /* cost of CVT(T)PS2PI instruction. */ 1, 4, 3, 3, /* reassoc int, fp, vec_int, vec_fp. */ generic_memcpy, generic_memset, @@ -4152,6 +4281,10 @@ struct processor_costs core_cost = { COSTS_N_INSNS (2), /* cost of CVTSS2SD etc. */ COSTS_N_INSNS (2), /* cost of 256bit VCVTPS2PD etc. */ COSTS_N_INSNS (2), /* cost of 512bit VCVTPS2PD etc. */ + COSTS_N_INSNS (6), /* cost of CVTSI2SS instruction. */ + COSTS_N_INSNS (6), /* cost of CVT(T)SS2SI instruction. */ + COSTS_N_INSNS (6), /* cost of CVTPI2PS instruction. */ + COSTS_N_INSNS (7), /* cost of CVT(T)PS2PI instruction. */ 1, 4, 2, 2, /* reassoc int, fp, vec_int, vec_fp. */ core_memcpy, core_memset, |