aboutsummaryrefslogtreecommitdiff
path: root/gcc/config/i386
diff options
context:
space:
mode:
Diffstat (limited to 'gcc/config/i386')
-rw-r--r--gcc/config/i386/i386-features.cc39
-rw-r--r--gcc/config/i386/i386.cc23
-rw-r--r--gcc/config/i386/i386.h4
-rw-r--r--gcc/config/i386/x86-tune-costs.h133
4 files changed, 195 insertions, 4 deletions
diff --git a/gcc/config/i386/i386-features.cc b/gcc/config/i386/i386-features.cc
index 31f3ee2..1ba5ac4 100644
--- a/gcc/config/i386/i386-features.cc
+++ b/gcc/config/i386/i386-features.cc
@@ -3309,8 +3309,16 @@ ix86_get_vector_load_mode (unsigned int size)
mode = V64QImode;
else if (size == 32)
mode = V32QImode;
- else
+ else if (size == 16)
mode = V16QImode;
+ else if (size == 8)
+ mode = V8QImode;
+ else if (size == 4)
+ mode = V4QImode;
+ else if (size == 2)
+ mode = V2QImode;
+ else
+ gcc_unreachable ();
return mode;
}
@@ -3338,13 +3346,36 @@ replace_vector_const (machine_mode vector_mode, rtx vector_const,
if (SUBREG_P (dest) || mode == vector_mode)
replace = vector_const;
else
- replace = gen_rtx_SUBREG (mode, vector_const, 0);
+ {
+ unsigned int size = GET_MODE_SIZE (mode);
+ if (size < ix86_regmode_natural_size (mode))
+ {
+ /* If the mode size is smaller than its natural size,
+ first insert an extra move with a QI vector SUBREG
+ of the same size to avoid validate_subreg failure. */
+ machine_mode vmode = ix86_get_vector_load_mode (size);
+ rtx vreg;
+ if (mode == vmode)
+ vreg = vector_const;
+ else
+ {
+ vreg = gen_reg_rtx (vmode);
+ rtx vsubreg = gen_rtx_SUBREG (vmode, vector_const, 0);
+ rtx pat = gen_rtx_SET (vreg, vsubreg);
+ rtx_insn *vinsn = emit_insn_before (pat, insn);
+ df_insn_rescan (vinsn);
+ }
+ replace = gen_rtx_SUBREG (mode, vreg, 0);
+ }
+ else
+ replace = gen_rtx_SUBREG (mode, vector_const, 0);
+ }
- /* NB: Don't run recog_memoized here since vector SUBREG may not
- be valid. Let LRA handle vector SUBREG. */
SET_SRC (set) = replace;
/* Drop possible dead definitions. */
PATTERN (insn) = set;
+ INSN_CODE (insn) = -1;
+ recog_memoized (insn);
df_insn_rescan (insn);
}
}
diff --git a/gcc/config/i386/i386.cc b/gcc/config/i386/i386.cc
index f28c92a..bef95ea 100644
--- a/gcc/config/i386/i386.cc
+++ b/gcc/config/i386/i386.cc
@@ -12320,6 +12320,7 @@ get_thread_pointer (machine_mode tp_mode, bool to_reg)
static GTY(()) rtx ix86_tls_index_symbol;
+#if TARGET_WIN32_TLS
static rtx
ix86_tls_index (void)
{
@@ -12331,6 +12332,7 @@ ix86_tls_index (void)
else
return ix86_tls_index_symbol;
}
+#endif
/* Construct the SYMBOL_REF for the tls_get_addr function. */
@@ -22792,6 +22794,27 @@ ix86_rtx_costs (rtx x, machine_mode mode, int outer_code_i, int opno,
else
*total = vec_fp_conversion_cost (cost, GET_MODE_BITSIZE (mode));
return false;
+ case FLOAT:
+ case UNSIGNED_FLOAT:
+ if (!SSE_FLOAT_MODE_SSEMATH_OR_HFBF_P (mode))
+ /* TODO: We do not have cost tables for x87. */
+ *total = cost->fadd;
+ else if (VECTOR_MODE_P (mode))
+ *total = ix86_vec_cost (mode, cost->cvtpi2ps);
+ else
+ *total = cost->cvtsi2ss;
+ return false;
+
+ case FIX:
+ case UNSIGNED_FIX:
+ if (!SSE_FLOAT_MODE_SSEMATH_OR_HFBF_P (mode))
+ /* TODO: We do not have cost tables for x87. */
+ *total = cost->fadd;
+ else if (VECTOR_MODE_P (mode))
+ *total = ix86_vec_cost (mode, cost->cvtps2pi);
+ else
+ *total = cost->cvtss2si;
+ return false;
case ABS:
/* SSE requires memory load for the constant operand. It may make
diff --git a/gcc/config/i386/i386.h b/gcc/config/i386/i386.h
index 02bf357..6a38de3 100644
--- a/gcc/config/i386/i386.h
+++ b/gcc/config/i386/i386.h
@@ -213,6 +213,10 @@ struct processor_costs {
such as VCVTPD2PS with larger reg in ymm. */
const int vcvtps2pd512; /* cost 512bit packed FP conversions,
such as VCVTPD2PS with larger reg in zmm. */
+ const int cvtsi2ss; /* cost of CVTSI2SS instruction. */
+ const int cvtss2si; /* cost of CVT(T)SS2SI instruction. */
+ const int cvtpi2ps; /* cost of CVTPI2PS instruction. */
+ const int cvtps2pi; /* cost of CVT(T)PS2PI instruction. */
const int reassoc_int, reassoc_fp, reassoc_vec_int, reassoc_vec_fp;
/* Specify reassociation width for integer,
fp, vector integer and vector fp
diff --git a/gcc/config/i386/x86-tune-costs.h b/gcc/config/i386/x86-tune-costs.h
index cddcf61..6cce70a 100644
--- a/gcc/config/i386/x86-tune-costs.h
+++ b/gcc/config/i386/x86-tune-costs.h
@@ -134,6 +134,11 @@ struct processor_costs ix86_size_cost = {/* costs for tuning for size */
COSTS_N_BYTES (4), /* cost of CVTSS2SD etc. */
COSTS_N_BYTES (4), /* cost of 256bit VCVTPS2PD etc. */
COSTS_N_BYTES (6), /* cost of 512bit VCVTPS2PD etc. */
+ COSTS_N_BYTES (4), /* cost of CVTSI2SS instruction. */
+ COSTS_N_BYTES (4), /* cost of CVT(T)SS2SI instruction. */
+ COSTS_N_BYTES (4), /* cost of CVTPI2PS instruction. */
+ COSTS_N_BYTES (4), /* cost of CVT(T)PS2PI instruction. */
+
1, 1, 1, 1, /* reassoc int, fp, vec_int, vec_fp. */
ix86_size_memcpy,
ix86_size_memset,
@@ -249,6 +254,10 @@ struct processor_costs i386_cost = { /* 386 specific costs */
COSTS_N_INSNS (27), /* cost of CVTSS2SD etc. */
COSTS_N_INSNS (54), /* cost of 256bit VCVTPS2PD etc. */
COSTS_N_INSNS (108), /* cost of 512bit VCVTPS2PD etc. */
+ COSTS_N_INSNS (27), /* cost of CVTSI2SS instruction. */
+ COSTS_N_INSNS (27), /* cost of CVT(T)SS2SI instruction. */
+ COSTS_N_INSNS (27), /* cost of CVTPI2PS instruction. */
+ COSTS_N_INSNS (27), /* cost of CVT(T)PS2PI instruction. */
1, 1, 1, 1, /* reassoc int, fp, vec_int, vec_fp. */
i386_memcpy,
i386_memset,
@@ -365,6 +374,10 @@ struct processor_costs i486_cost = { /* 486 specific costs */
COSTS_N_INSNS (8), /* cost of CVTSS2SD etc. */
COSTS_N_INSNS (16), /* cost of 256bit VCVTPS2PD etc. */
COSTS_N_INSNS (32), /* cost of 512bit VCVTPS2PD etc. */
+ COSTS_N_INSNS (27), /* cost of CVTSI2SS instruction. */
+ COSTS_N_INSNS (27), /* cost of CVT(T)SS2SI instruction. */
+ COSTS_N_INSNS (27), /* cost of CVTPI2PS instruction. */
+ COSTS_N_INSNS (27), /* cost of CVT(T)PS2PI instruction. */
1, 1, 1, 1, /* reassoc int, fp, vec_int, vec_fp. */
i486_memcpy,
i486_memset,
@@ -479,6 +492,10 @@ struct processor_costs pentium_cost = {
COSTS_N_INSNS (3), /* cost of CVTSS2SD etc. */
COSTS_N_INSNS (6), /* cost of 256bit VCVTPS2PD etc. */
COSTS_N_INSNS (12), /* cost of 512bit VCVTPS2PD etc. */
+ COSTS_N_INSNS (3), /* cost of CVTSI2SS instruction. */
+ COSTS_N_INSNS (3), /* cost of CVT(T)SS2SI instruction. */
+ COSTS_N_INSNS (3), /* cost of CVTPI2PS instruction. */
+ COSTS_N_INSNS (3), /* cost of CVT(T)PS2PI instruction. */
1, 1, 1, 1, /* reassoc int, fp, vec_int, vec_fp. */
pentium_memcpy,
pentium_memset,
@@ -586,6 +603,10 @@ struct processor_costs lakemont_cost = {
COSTS_N_INSNS (5), /* cost of CVTSS2SD etc. */
COSTS_N_INSNS (10), /* cost of 256bit VCVTPS2PD etc. */
COSTS_N_INSNS (20), /* cost of 512bit VCVTPS2PD etc. */
+ COSTS_N_INSNS (5), /* cost of CVTSI2SS instruction. */
+ COSTS_N_INSNS (5), /* cost of CVT(T)SS2SI instruction. */
+ COSTS_N_INSNS (5), /* cost of CVTPI2PS instruction. */
+ COSTS_N_INSNS (5), /* cost of CVT(T)PS2PI instruction. */
1, 1, 1, 1, /* reassoc int, fp, vec_int, vec_fp. */
pentium_memcpy,
pentium_memset,
@@ -708,6 +729,10 @@ struct processor_costs pentiumpro_cost = {
COSTS_N_INSNS (3), /* cost of CVTSS2SD etc. */
COSTS_N_INSNS (6), /* cost of 256bit VCVTPS2PD etc. */
COSTS_N_INSNS (12), /* cost of 512bit VCVTPS2PD etc. */
+ COSTS_N_INSNS (3), /* cost of CVTSI2SS instruction. */
+ COSTS_N_INSNS (3), /* cost of CVT(T)SS2SI instruction. */
+ COSTS_N_INSNS (3), /* cost of CVTPI2PS instruction. */
+ COSTS_N_INSNS (3), /* cost of CVT(T)PS2PI instruction. */
1, 1, 1, 1, /* reassoc int, fp, vec_int, vec_fp. */
pentiumpro_memcpy,
pentiumpro_memset,
@@ -821,6 +846,10 @@ struct processor_costs geode_cost = {
COSTS_N_INSNS (6), /* cost of CVTSS2SD etc. */
COSTS_N_INSNS (12), /* cost of 256bit VCVTPS2PD etc. */
COSTS_N_INSNS (24), /* cost of 512bit VCVTPS2PD etc. */
+ COSTS_N_INSNS (6), /* cost of CVTSI2SS instruction. */
+ COSTS_N_INSNS (6), /* cost of CVT(T)SS2SI instruction. */
+ COSTS_N_INSNS (6), /* cost of CVTPI2PS instruction. */
+ COSTS_N_INSNS (6), /* cost of CVT(T)PS2PI instruction. */
1, 1, 1, 1, /* reassoc int, fp, vec_int, vec_fp. */
geode_memcpy,
geode_memset,
@@ -937,6 +966,10 @@ struct processor_costs k6_cost = {
COSTS_N_INSNS (2), /* cost of CVTSS2SD etc. */
COSTS_N_INSNS (4), /* cost of 256bit VCVTPS2PD etc. */
COSTS_N_INSNS (8), /* cost of 512bit VCVTPS2PD etc. */
+ COSTS_N_INSNS (2), /* cost of CVTSI2SS instruction. */
+ COSTS_N_INSNS (2), /* cost of CVT(T)SS2SI instruction. */
+ COSTS_N_INSNS (2), /* cost of CVTPI2PS instruction. */
+ COSTS_N_INSNS (2), /* cost of CVT(T)PS2PI instruction. */
1, 1, 1, 1, /* reassoc int, fp, vec_int, vec_fp. */
k6_memcpy,
k6_memset,
@@ -1054,6 +1087,10 @@ struct processor_costs athlon_cost = {
COSTS_N_INSNS (4), /* cost of CVTSS2SD etc. */
COSTS_N_INSNS (8), /* cost of 256bit VCVTPS2PD etc. */
COSTS_N_INSNS (16), /* cost of 512bit VCVTPS2PD etc. */
+ COSTS_N_INSNS (4), /* cost of CVTSI2SS instruction. */
+ COSTS_N_INSNS (6), /* cost of CVT(T)SS2SI instruction. */
+ COSTS_N_INSNS (4), /* cost of CVTPI2PS instruction. */
+ COSTS_N_INSNS (6), /* cost of CVT(T)PS2PI instruction. */
1, 1, 1, 1, /* reassoc int, fp, vec_int, vec_fp. */
athlon_memcpy,
athlon_memset,
@@ -1180,6 +1217,10 @@ struct processor_costs k8_cost = {
COSTS_N_INSNS (4), /* cost of CVTSS2SD etc. */
COSTS_N_INSNS (8), /* cost of 256bit VCVTPS2PD etc. */
COSTS_N_INSNS (16), /* cost of 512bit VCVTPS2PD etc. */
+ COSTS_N_INSNS (14), /* cost of CVTSI2SS instruction. */
+ COSTS_N_INSNS (10), /* cost of CVT(T)SS2SI instruction. */
+ COSTS_N_INSNS (4), /* cost of CVTPI2PS instruction. */
+ COSTS_N_INSNS (5), /* cost of CVT(T)PS2PI instruction. */
1, 1, 1, 1, /* reassoc int, fp, vec_int, vec_fp. */
k8_memcpy,
k8_memset,
@@ -1314,6 +1355,10 @@ struct processor_costs amdfam10_cost = {
COSTS_N_INSNS (4), /* cost of CVTSS2SD etc. */
COSTS_N_INSNS (8), /* cost of 256bit VCVTPS2PD etc. */
COSTS_N_INSNS (16), /* cost of 512bit VCVTPS2PD etc. */
+ COSTS_N_INSNS (14), /* cost of CVTSI2SS instruction. */
+ COSTS_N_INSNS (8), /* cost of CVT(T)SS2SI instruction. */
+ COSTS_N_INSNS (7), /* cost of CVTPI2PS instruction. */
+ COSTS_N_INSNS (4), /* cost of CVT(T)PS2PI instruction. */
1, 1, 1, 1, /* reassoc int, fp, vec_int, vec_fp. */
amdfam10_memcpy,
amdfam10_memset,
@@ -1441,6 +1486,10 @@ const struct processor_costs bdver_cost = {
COSTS_N_INSNS (4), /* cost of CVTSS2SD etc. */
COSTS_N_INSNS (7), /* cost of 256bit VCVTPS2PD etc. */
COSTS_N_INSNS (14), /* cost of 512bit VCVTPS2PD etc. */
+ COSTS_N_INSNS (14), /* cost of CVTSI2SS instruction. */
+ COSTS_N_INSNS (13), /* cost of CVT(T)SS2SI instruction. */
+ COSTS_N_INSNS (4), /* cost of CVTPI2PS instruction. */
+ COSTS_N_INSNS (4), /* cost of CVT(T)PS2PI instruction. */
1, 2, 1, 1, /* reassoc int, fp, vec_int, vec_fp. */
bdver_memcpy,
bdver_memset,
@@ -1593,6 +1642,10 @@ struct processor_costs znver1_cost = {
/* Real latency is 4, but for split regs multiply cost of half op by 2. */
COSTS_N_INSNS (6), /* cost of 256bit VCVTPS2PD etc. */
COSTS_N_INSNS (12), /* cost of 512bit VCVTPS2PD etc. */
+ COSTS_N_INSNS (8), /* cost of CVTSI2SS instruction. */
+ COSTS_N_INSNS (7), /* cost of CVT(T)SS2SI instruction. */
+ COSTS_N_INSNS (6), /* cost of CVTPI2PS instruction. */
+ COSTS_N_INSNS (6), /* cost of CVT(T)PS2PI instruction. */
/* Zen can execute 4 integer operations per cycle. FP operations take 3 cycles
and it can execute 2 integer additions and 2 multiplications thus
reassociation may make sense up to with of 6. SPEC2k6 bencharks suggests
@@ -1755,6 +1808,10 @@ struct processor_costs znver2_cost = {
COSTS_N_INSNS (3), /* cost of CVTSS2SD etc. */
COSTS_N_INSNS (5), /* cost of 256bit VCVTPS2PD etc. */
COSTS_N_INSNS (10), /* cost of 512bit VCVTPS2PD etc. */
+ COSTS_N_INSNS (7), /* cost of CVTSI2SS instruction. */
+ COSTS_N_INSNS (6), /* cost of CVT(T)SS2SI instruction. */
+ COSTS_N_INSNS (6), /* cost of CVTPI2PS instruction. */
+ COSTS_N_INSNS (3), /* cost of CVT(T)PS2PI instruction. */
/* Zen can execute 4 integer operations per cycle. FP operations
take 3 cycles and it can execute 2 integer additions and 2
multiplications thus reassociation may make sense up to with of 6.
@@ -1893,6 +1950,10 @@ struct processor_costs znver3_cost = {
COSTS_N_INSNS (3), /* cost of CVTSS2SD etc. */
COSTS_N_INSNS (5), /* cost of 256bit VCVTPS2PD etc. */
COSTS_N_INSNS (10), /* cost of 512bit VCVTPS2PD etc. */
+ COSTS_N_INSNS (6), /* cost of CVTSI2SS instruction. */
+ COSTS_N_INSNS (6), /* cost of CVT(T)SS2SI instruction. */
+ COSTS_N_INSNS (3), /* cost of CVTPI2PS instruction. */
+ COSTS_N_INSNS (4), /* cost of CVT(T)PS2PI instruction. */
/* Zen can execute 4 integer operations per cycle. FP operations
take 3 cycles and it can execute 2 integer additions and 2
multiplications thus reassociation may make sense up to with of 6.
@@ -2034,6 +2095,10 @@ struct processor_costs znver4_cost = {
COSTS_N_INSNS (5), /* cost of 256bit VCVTPS2PD etc. */
/* Real latency is 6, but for split regs multiply cost of half op by 2. */
COSTS_N_INSNS (10), /* cost of 512bit VCVTPS2PD etc. */
+ COSTS_N_INSNS (6), /* cost of CVTSI2SS instruction. */
+ COSTS_N_INSNS (6), /* cost of CVT(T)SS2SI instruction. */
+ COSTS_N_INSNS (3), /* cost of CVTPI2PS instruction. */
+ COSTS_N_INSNS (4), /* cost of CVT(T)PS2PI instruction. */
/* Zen can execute 4 integer operations per cycle. FP operations
take 3 cycles and it can execute 2 integer additions and 2
multiplications thus reassociation may make sense up to with of 6.
@@ -2188,6 +2253,10 @@ struct processor_costs znver5_cost = {
COSTS_N_INSNS (3), /* cost of CVTSS2SD etc. */
COSTS_N_INSNS (5), /* cost of 256bit VCVTPS2PD etc. */
COSTS_N_INSNS (5), /* cost of 512bit VCVTPS2PD etc. */
+ COSTS_N_INSNS (6), /* cost of CVTSI2SS instruction. */
+ COSTS_N_INSNS (6), /* cost of CVT(T)SS2SI instruction. */
+ COSTS_N_INSNS (3), /* cost of CVTPI2PS instruction. */
+ COSTS_N_INSNS (4), /* cost of CVT(T)PS2PI instruction. */
/* Zen5 can execute:
- integer ops: 6 per cycle, at most 3 multiplications.
latency 1 for additions, 3 for multiplications (pipelined)
@@ -2330,6 +2399,10 @@ struct processor_costs skylake_cost = {
COSTS_N_INSNS (2), /* cost of CVTSS2SD etc. */
COSTS_N_INSNS (2), /* cost of 256bit VCVTPS2PD etc. */
COSTS_N_INSNS (4), /* cost of 512bit VCVTPS2PD etc. */
+ COSTS_N_INSNS (6), /* cost of CVTSI2SS instruction. */
+ COSTS_N_INSNS (6), /* cost of CVT(T)SS2SI instruction. */
+ COSTS_N_INSNS (6), /* cost of CVTPI2PS instruction. */
+ COSTS_N_INSNS (7), /* cost of CVT(T)PS2PI instruction. */
1, 4, 2, 2, /* reassoc int, fp, vec_int, vec_fp. */
skylake_memcpy,
skylake_memset,
@@ -2462,6 +2535,10 @@ struct processor_costs icelake_cost = {
COSTS_N_INSNS (2), /* cost of CVTSS2SD etc. */
COSTS_N_INSNS (2), /* cost of 256bit VCVTPS2PD etc. */
COSTS_N_INSNS (2), /* cost of 512bit VCVTPS2PD etc. */
+ COSTS_N_INSNS (6), /* cost of CVTSI2SS instruction. */
+ COSTS_N_INSNS (6), /* cost of CVT(T)SS2SI instruction. */
+ COSTS_N_INSNS (7), /* cost of CVTPI2PS instruction. */
+ COSTS_N_INSNS (6), /* cost of CVT(T)PS2PI instruction. */
1, 4, 2, 2, /* reassoc int, fp, vec_int, vec_fp. */
icelake_memcpy,
icelake_memset,
@@ -2588,6 +2665,10 @@ struct processor_costs alderlake_cost = {
COSTS_N_INSNS (2), /* cost of CVTSS2SD etc. */
COSTS_N_INSNS (2), /* cost of 256bit VCVTPS2PD etc. */
COSTS_N_INSNS (2), /* cost of 512bit VCVTPS2PD etc. */
+ COSTS_N_INSNS (6), /* cost of CVTSI2SS instruction. */
+ COSTS_N_INSNS (6), /* cost of CVT(T)SS2SI instruction. */
+ COSTS_N_INSNS (7), /* cost of CVTPI2PS instruction. */
+ COSTS_N_INSNS (6), /* cost of CVT(T)PS2PI instruction. */
1, 4, 3, 3, /* reassoc int, fp, vec_int, vec_fp. */
alderlake_memcpy,
alderlake_memset,
@@ -2707,6 +2788,10 @@ const struct processor_costs btver1_cost = {
COSTS_N_INSNS (4), /* cost of CVTSS2SD etc. */
COSTS_N_INSNS (7), /* cost of 256bit VCVTPS2PD etc. */
COSTS_N_INSNS (14), /* cost of 512bit VCVTPS2PD etc. */
+ COSTS_N_INSNS (14), /* cost of CVTSI2SS instruction. */
+ COSTS_N_INSNS (13), /* cost of CVT(T)SS2SI instruction. */
+ COSTS_N_INSNS (4), /* cost of CVTPI2PS instruction. */
+ COSTS_N_INSNS (4), /* cost of CVT(T)PS2PI instruction. */
1, 1, 1, 1, /* reassoc int, fp, vec_int, vec_fp. */
btver1_memcpy,
btver1_memset,
@@ -2823,6 +2908,10 @@ const struct processor_costs btver2_cost = {
COSTS_N_INSNS (4), /* cost of CVTSS2SD etc. */
COSTS_N_INSNS (7), /* cost of 256bit VCVTPS2PD etc. */
COSTS_N_INSNS (14), /* cost of 512bit VCVTPS2PD etc. */
+ COSTS_N_INSNS (14), /* cost of CVTSI2SS instruction. */
+ COSTS_N_INSNS (13), /* cost of CVT(T)SS2SI instruction. */
+ COSTS_N_INSNS (4), /* cost of CVTPI2PS instruction. */
+ COSTS_N_INSNS (4), /* cost of CVT(T)PS2PI instruction. */
1, 1, 1, 1, /* reassoc int, fp, vec_int, vec_fp. */
btver2_memcpy,
btver2_memset,
@@ -2938,6 +3027,10 @@ struct processor_costs pentium4_cost = {
COSTS_N_INSNS (10), /* cost of CVTSS2SD etc. */
COSTS_N_INSNS (20), /* cost of 256bit VCVTPS2PD etc. */
COSTS_N_INSNS (40), /* cost of 512bit VCVTPS2PD etc. */
+ COSTS_N_INSNS (20), /* cost of CVTSI2SS instruction. */
+ COSTS_N_INSNS (17), /* cost of CVT(T)SS2SI instruction. */
+ COSTS_N_INSNS (12), /* cost of CVTPI2PS instruction. */
+ COSTS_N_INSNS (8), /* cost of CVT(T)PS2PI instruction. */
1, 1, 1, 1, /* reassoc int, fp, vec_int, vec_fp. */
pentium4_memcpy,
pentium4_memset,
@@ -3056,6 +3149,10 @@ struct processor_costs nocona_cost = {
COSTS_N_INSNS (10), /* cost of CVTSS2SD etc. */
COSTS_N_INSNS (20), /* cost of 256bit VCVTPS2PD etc. */
COSTS_N_INSNS (40), /* cost of 512bit VCVTPS2PD etc. */
+ COSTS_N_INSNS (20), /* cost of CVTSI2SS instruction. */
+ COSTS_N_INSNS (17), /* cost of CVT(T)SS2SI instruction. */
+ COSTS_N_INSNS (12), /* cost of CVTPI2PS instruction. */
+ COSTS_N_INSNS (8), /* cost of CVT(T)PS2PI instruction. */
1, 1, 1, 1, /* reassoc int, fp, vec_int, vec_fp. */
nocona_memcpy,
nocona_memset,
@@ -3172,6 +3269,10 @@ struct processor_costs atom_cost = {
COSTS_N_INSNS (6), /* cost of CVTSS2SD etc. */
COSTS_N_INSNS (12), /* cost of 256bit VCVTPS2PD etc. */
COSTS_N_INSNS (24), /* cost of 512bit VCVTPS2PD etc. */
+ COSTS_N_INSNS (7), /* cost of CVTSI2SS instruction. */
+ COSTS_N_INSNS (10), /* cost of CVT(T)SS2SI instruction. */
+ COSTS_N_INSNS (6), /* cost of CVTPI2PS instruction. */
+ COSTS_N_INSNS (4), /* cost of CVT(T)PS2PI instruction. */
2, 2, 2, 2, /* reassoc int, fp, vec_int, vec_fp. */
atom_memcpy,
atom_memset,
@@ -3288,6 +3389,10 @@ struct processor_costs slm_cost = {
COSTS_N_INSNS (3), /* cost of CVTSS2SD etc. */
COSTS_N_INSNS (6), /* cost of 256bit VCVTPS2PD etc. */
COSTS_N_INSNS (12), /* cost of 512bit VCVTPS2PD etc. */
+ COSTS_N_INSNS (5), /* cost of CVTSI2SS instruction. */
+ COSTS_N_INSNS (5), /* cost of CVT(T)SS2SI instruction. */
+ COSTS_N_INSNS (4), /* cost of CVTPI2PS instruction. */
+ COSTS_N_INSNS (4), /* cost of CVT(T)PS2PI instruction. */
1, 2, 1, 1, /* reassoc int, fp, vec_int, vec_fp. */
slm_memcpy,
slm_memset,
@@ -3418,6 +3523,10 @@ struct processor_costs tremont_cost = {
COSTS_N_INSNS (3), /* cost of CVTSS2SD etc. */
COSTS_N_INSNS (6), /* cost of 256bit VCVTPS2PD etc. */
COSTS_N_INSNS (12), /* cost of 512bit VCVTPS2PD etc. */
+ COSTS_N_INSNS (6), /* cost of CVTSI2SS instruction. */
+ COSTS_N_INSNS (6), /* cost of CVT(T)SS2SI instruction. */
+ COSTS_N_INSNS (4), /* cost of CVTPI2PS instruction. */
+ COSTS_N_INSNS (4), /* cost of CVT(T)PS2PI instruction. */
1, 4, 3, 3, /* reassoc int, fp, vec_int, vec_fp. */
tremont_memcpy,
tremont_memset,
@@ -3534,6 +3643,10 @@ struct processor_costs intel_cost = {
COSTS_N_INSNS (8), /* cost of CVTSS2SD etc. */
COSTS_N_INSNS (16), /* cost of 256bit VCVTPS2PD etc. */
COSTS_N_INSNS (32), /* cost of 512bit VCVTPS2PD etc. */
+ COSTS_N_INSNS (8), /* cost of CVTSI2SS instruction. */
+ COSTS_N_INSNS (8), /* cost of CVT(T)SS2SI instruction. */
+ COSTS_N_INSNS (8), /* cost of CVTPI2PS instruction. */
+ COSTS_N_INSNS (8), /* cost of CVT(T)PS2PI instruction. */
1, 4, 1, 1, /* reassoc int, fp, vec_int, vec_fp. */
intel_memcpy,
intel_memset,
@@ -3655,6 +3768,10 @@ struct processor_costs lujiazui_cost = {
COSTS_N_INSNS (3), /* cost of CVTSS2SD etc. */
COSTS_N_INSNS (6), /* cost of 256bit VCVTPS2PD etc. */
COSTS_N_INSNS (12), /* cost of 512bit VCVTPS2PD etc. */
+ COSTS_N_INSNS (3), /* cost of CVTSI2SS instruction. */
+ COSTS_N_INSNS (3), /* cost of CVT(T)SS2SI instruction. */
+ COSTS_N_INSNS (3), /* cost of CVTPI2PS instruction. */
+ COSTS_N_INSNS (3), /* cost of CVT(T)PS2PI instruction. */
1, 4, 3, 3, /* reassoc int, fp, vec_int, vec_fp. */
lujiazui_memcpy,
lujiazui_memset,
@@ -3774,6 +3891,10 @@ struct processor_costs yongfeng_cost = {
COSTS_N_INSNS (3), /* cost of CVTSS2SD etc. */
COSTS_N_INSNS (6), /* cost of 256bit VCVTPS2PD etc. */
COSTS_N_INSNS (12), /* cost of 512bit VCVTPS2PD etc. */
+ COSTS_N_INSNS (3), /* cost of CVTSI2SS instruction. */
+ COSTS_N_INSNS (3), /* cost of CVT(T)SS2SI instruction. */
+ COSTS_N_INSNS (3), /* cost of CVTPI2PS instruction. */
+ COSTS_N_INSNS (3), /* cost of CVT(T)PS2PI instruction. */
4, 4, 4, 4, /* reassoc int, fp, vec_int, vec_fp. */
yongfeng_memcpy,
yongfeng_memset,
@@ -3893,6 +4014,10 @@ struct processor_costs shijidadao_cost = {
COSTS_N_INSNS (3), /* cost of CVTSS2SD etc. */
COSTS_N_INSNS (6), /* cost of 256bit VCVTPS2PD etc. */
COSTS_N_INSNS (12), /* cost of 512bit VCVTPS2PD etc. */
+ COSTS_N_INSNS (3), /* cost of CVTSI2SS instruction. */
+ COSTS_N_INSNS (3), /* cost of CVT(T)SS2SI instruction. */
+ COSTS_N_INSNS (3), /* cost of CVTPI2PS instruction. */
+ COSTS_N_INSNS (3), /* cost of CVT(T)PS2PI instruction. */
4, 4, 4, 4, /* reassoc int, fp, vec_int, vec_fp. */
shijidadao_memcpy,
shijidadao_memset,
@@ -4020,6 +4145,10 @@ struct processor_costs generic_cost = {
COSTS_N_INSNS (3), /* cost of CVTSS2SD etc. */
COSTS_N_INSNS (4), /* cost of 256bit VCVTPS2PD etc. */
COSTS_N_INSNS (5), /* cost of 512bit VCVTPS2PD etc. */
+ COSTS_N_INSNS (6), /* cost of CVTSI2SS instruction. */
+ COSTS_N_INSNS (6), /* cost of CVT(T)SS2SI instruction. */
+ COSTS_N_INSNS (3), /* cost of CVTPI2PS instruction. */
+ COSTS_N_INSNS (3), /* cost of CVT(T)PS2PI instruction. */
1, 4, 3, 3, /* reassoc int, fp, vec_int, vec_fp. */
generic_memcpy,
generic_memset,
@@ -4152,6 +4281,10 @@ struct processor_costs core_cost = {
COSTS_N_INSNS (2), /* cost of CVTSS2SD etc. */
COSTS_N_INSNS (2), /* cost of 256bit VCVTPS2PD etc. */
COSTS_N_INSNS (2), /* cost of 512bit VCVTPS2PD etc. */
+ COSTS_N_INSNS (6), /* cost of CVTSI2SS instruction. */
+ COSTS_N_INSNS (6), /* cost of CVT(T)SS2SI instruction. */
+ COSTS_N_INSNS (6), /* cost of CVTPI2PS instruction. */
+ COSTS_N_INSNS (7), /* cost of CVT(T)PS2PI instruction. */
1, 4, 2, 2, /* reassoc int, fp, vec_int, vec_fp. */
core_memcpy,
core_memset,