diff options
author | Peter Maydell <peter.maydell@linaro.org> | 2016-01-21 15:53:25 +0000 |
---|---|---|
committer | Peter Maydell <peter.maydell@linaro.org> | 2016-01-21 15:53:25 +0000 |
commit | 83446463dd7b7e3d7e69bcaa079d6a5d8cd3cd76 (patch) | |
tree | 9989294bc7ad481c06d57b38dc103d810140ef29 | |
parent | 1a4f446f81c63151efc30f3ce60a749e8a4cf680 (diff) | |
parent | f74eefe0b98cd7e13825de8e8d9f32e22aed102c (diff) | |
download | qemu-83446463dd7b7e3d7e69bcaa079d6a5d8cd3cd76.zip qemu-83446463dd7b7e3d7e69bcaa079d6a5d8cd3cd76.tar.gz qemu-83446463dd7b7e3d7e69bcaa079d6a5d8cd3cd76.tar.bz2 |
Merge remote-tracking branch 'remotes/ehabkost/tags/x86-pull-request' into staging
X86 queue, 2016-01-21
# gpg: Signature made Thu 21 Jan 2016 15:08:40 GMT using RSA key ID 984DC5A6
# gpg: Good signature from "Eduardo Habkost <ehabkost@redhat.com>"
* remotes/ehabkost/tags/x86-pull-request:
target-i386: Add PKU and and OSPKE support
target-i386: Add support to migrate vcpu's TSC rate
target-i386: Reorganize TSC rate setting code
target-i386: Fallback vcpu's TSC rate to value returned by KVM
target-i386: Add suffixes to MMReg struct fields
target-i386: Define MMREG_UNION macro
target-i386: Define MMXReg._d field
target-i386: Rename XMM_[BWLSDQ] helpers to ZMM_*
target-i386: Rename struct XMMReg to ZMMReg
target-i386: Use a _q array on MMXReg too
target-i386/ops_sse.h: Use MMX_Q macro
target-i386: Rename optimize_flags_init()
Signed-off-by: Peter Maydell <peter.maydell@linaro.org>
-rw-r--r-- | hw/i386/pc.c | 1 | ||||
-rw-r--r-- | hw/i386/pc_piix.c | 2 | ||||
-rw-r--r-- | hw/i386/pc_q35.c | 2 | ||||
-rw-r--r-- | include/hw/i386/pc.h | 3 | ||||
m--------- | roms/seabios | 0 | ||||
-rw-r--r-- | target-i386/cpu.c | 27 | ||||
-rw-r--r-- | target-i386/cpu.h | 86 | ||||
-rw-r--r-- | target-i386/fpu_helper.c | 8 | ||||
-rw-r--r-- | target-i386/gdbstub.c | 8 | ||||
-rw-r--r-- | target-i386/helper.c | 8 | ||||
-rw-r--r-- | target-i386/kvm.c | 104 | ||||
-rw-r--r-- | target-i386/machine.c | 96 | ||||
-rw-r--r-- | target-i386/ops_sse.h | 374 | ||||
-rw-r--r-- | target-i386/ops_sse_header.h | 80 | ||||
-rw-r--r-- | target-i386/translate.c | 190 |
15 files changed, 564 insertions, 425 deletions
diff --git a/hw/i386/pc.c b/hw/i386/pc.c index 9e37186..293e22a 100644 --- a/hw/i386/pc.c +++ b/hw/i386/pc.c @@ -1963,6 +1963,7 @@ static void pc_machine_class_init(ObjectClass *oc, void *data) /* BIOS ACPI tables: 128K. Other BIOS datastructures: less than 4K reported * to be used at the moment, 32K should be enough for a while. */ pcmc->acpi_data_size = 0x20000 + 0x8000; + pcmc->save_tsc_khz = true; mc->get_hotplug_handler = pc_get_hotpug_handler; mc->cpu_index_to_socket_id = pc_cpu_index_to_socket_id; mc->default_boot_order = "cad"; diff --git a/hw/i386/pc_piix.c b/hw/i386/pc_piix.c index df2b824..9721912 100644 --- a/hw/i386/pc_piix.c +++ b/hw/i386/pc_piix.c @@ -432,9 +432,11 @@ DEFINE_I440FX_MACHINE(v2_6, "pc-i440fx-2.6", NULL, static void pc_i440fx_2_5_machine_options(MachineClass *m) { + PCMachineClass *pcmc = PC_MACHINE_CLASS(m); pc_i440fx_2_6_machine_options(m); m->alias = NULL; m->is_default = 0; + pcmc->save_tsc_khz = false; SET_MACHINE_COMPAT(m, PC_COMPAT_2_5); } diff --git a/hw/i386/pc_q35.c b/hw/i386/pc_q35.c index 412b3cd..8b76d5c 100644 --- a/hw/i386/pc_q35.c +++ b/hw/i386/pc_q35.c @@ -360,8 +360,10 @@ DEFINE_Q35_MACHINE(v2_6, "pc-q35-2.6", NULL, static void pc_q35_2_5_machine_options(MachineClass *m) { + PCMachineClass *pcmc = PC_MACHINE_CLASS(m); pc_q35_2_6_machine_options(m); m->alias = NULL; + pcmc->save_tsc_khz = false; SET_MACHINE_COMPAT(m, PC_COMPAT_2_5); } diff --git a/include/hw/i386/pc.h b/include/hw/i386/pc.h index 588a33c..5bac03d 100644 --- a/include/hw/i386/pc.h +++ b/include/hw/i386/pc.h @@ -120,6 +120,9 @@ struct PCMachineClass { bool has_reserved_memory; bool enforce_aligned_dimm; bool broken_reserved_end; + + /* TSC rate migration: */ + bool save_tsc_khz; }; #define TYPE_PC_MACHINE "generic-pc-machine" diff --git a/roms/seabios b/roms/seabios -Subproject 01a84bea2d28a19d2405c1ecac4bdef17683cc0 +Subproject 33fbe13a3e2a01e0ba1087a8feed801a0451db2 diff --git a/target-i386/cpu.c b/target-i386/cpu.c index 4430494..b255644 100644 --- a/target-i386/cpu.c +++ b/target-i386/cpu.c @@ -263,6 +263,17 @@ static const char *cpuid_7_0_ebx_feature_name[] = { "clwb", NULL, "avx512pf", "avx512er", "avx512cd", NULL, NULL, NULL, }; +static const char *cpuid_7_0_ecx_feature_name[] = { + NULL, NULL, NULL, "pku", + "ospke", NULL, NULL, NULL, + NULL, NULL, NULL, NULL, + NULL, NULL, NULL, NULL, + NULL, NULL, NULL, NULL, + NULL, NULL, NULL, NULL, + NULL, NULL, NULL, NULL, + NULL, NULL, NULL, NULL, +}; + static const char *cpuid_apm_edx_feature_name[] = { NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, @@ -352,6 +363,7 @@ static const char *cpuid_6_feature_name[] = { CPUID_7_0_EBX_FSGSBASE, CPUID_7_0_EBX_HLE, CPUID_7_0_EBX_AVX2, CPUID_7_0_EBX_ERMS, CPUID_7_0_EBX_INVPCID, CPUID_7_0_EBX_RTM, CPUID_7_0_EBX_RDSEED */ +#define TCG_7_0_ECX_FEATURES 0 #define TCG_APM_FEATURES 0 #define TCG_6_EAX_FEATURES CPUID_6_EAX_ARAT @@ -409,6 +421,13 @@ static FeatureWordInfo feature_word_info[FEATURE_WORDS] = { .cpuid_reg = R_EBX, .tcg_features = TCG_7_0_EBX_FEATURES, }, + [FEAT_7_0_ECX] = { + .feat_names = cpuid_7_0_ecx_feature_name, + .cpuid_eax = 7, + .cpuid_needs_ecx = true, .cpuid_ecx = 0, + .cpuid_reg = R_ECX, + .tcg_features = TCG_7_0_ECX_FEATURES, + }, [FEAT_8000_0007_EDX] = { .feat_names = cpuid_apm_edx_feature_name, .cpuid_eax = 0x80000007, @@ -469,6 +488,8 @@ static const ExtSaveArea ext_save_areas[] = { .offset = 0x480, .size = 0x200 }, [7] = { .feature = FEAT_7_0_EBX, .bits = CPUID_7_0_EBX_AVX512F, .offset = 0x680, .size = 0x400 }, + [9] = { .feature = FEAT_7_0_ECX, .bits = CPUID_7_0_ECX_PKU, + .offset = 0xA80, .size = 0x8 }, }; const char *get_register_name_32(unsigned int reg) @@ -1728,7 +1749,7 @@ static void x86_cpuid_set_tsc_freq(Object *obj, Visitor *v, void *opaque, return; } - cpu->env.tsc_khz = value / 1000; + cpu->env.tsc_khz = cpu->env.user_tsc_khz = value / 1000; } static void x86_cpuid_get_apic_id(Object *obj, Visitor *v, void *opaque, @@ -2390,7 +2411,7 @@ void cpu_x86_cpuid(CPUX86State *env, uint32_t index, uint32_t count, if (count == 0) { *eax = 0; /* Maximum ECX value for sub-leaves */ *ebx = env->features[FEAT_7_0_EBX]; /* Feature flags */ - *ecx = 0; /* Reserved */ + *ecx = env->features[FEAT_7_0_ECX]; /* Feature flags */ *edx = 0; /* Reserved */ } else { *eax = 0; @@ -3091,7 +3112,7 @@ static void x86_cpu_initfn(Object *obj) /* init various static tables used in TCG mode */ if (tcg_enabled() && !inited) { inited = 1; - optimize_flags_init(); + tcg_x86_init(); } } diff --git a/target-i386/cpu.h b/target-i386/cpu.h index 595891e..a990ea7 100644 --- a/target-i386/cpu.h +++ b/target-i386/cpu.h @@ -407,6 +407,7 @@ #define XSTATE_OPMASK (1ULL << 5) #define XSTATE_ZMM_Hi256 (1ULL << 6) #define XSTATE_Hi16_ZMM (1ULL << 7) +#define XSTATE_PKRU (1ULL << 9) /* CPUID feature words */ @@ -414,6 +415,7 @@ typedef enum FeatureWord { FEAT_1_EDX, /* CPUID[1].EDX */ FEAT_1_ECX, /* CPUID[1].ECX */ FEAT_7_0_EBX, /* CPUID[EAX=7,ECX=0].EBX */ + FEAT_7_0_ECX, /* CPUID[EAX=7,ECX=0].ECX */ FEAT_8000_0001_EDX, /* CPUID[8000_0001].EDX */ FEAT_8000_0001_ECX, /* CPUID[8000_0001].ECX */ FEAT_8000_0007_EDX, /* CPUID[8000_0007].EDX */ @@ -585,6 +587,9 @@ typedef uint32_t FeatureWordArray[FEATURE_WORDS]; #define CPUID_7_0_EBX_AVX512ER (1U << 27) /* AVX-512 Exponential and Reciprocal */ #define CPUID_7_0_EBX_AVX512CD (1U << 28) /* AVX-512 Conflict Detection */ +#define CPUID_7_0_ECX_PKU (1U << 3) +#define CPUID_7_0_ECX_OSPKE (1U << 4) + #define CPUID_XSAVE_XSAVEOPT (1U << 0) #define CPUID_XSAVE_XSAVEC (1U << 1) #define CPUID_XSAVE_XGETBV1 (1U << 2) @@ -725,22 +730,18 @@ typedef struct SegmentCache { uint32_t flags; } SegmentCache; -typedef union { - uint8_t _b[64]; - uint16_t _w[32]; - uint32_t _l[16]; - uint64_t _q[8]; - float32 _s[16]; - float64 _d[8]; -} XMMReg; /* really zmm */ +#define MMREG_UNION(n, bits) \ + union n { \ + uint8_t _b_##n[(bits)/8]; \ + uint16_t _w_##n[(bits)/16]; \ + uint32_t _l_##n[(bits)/32]; \ + uint64_t _q_##n[(bits)/64]; \ + float32 _s_##n[(bits)/32]; \ + float64 _d_##n[(bits)/64]; \ + } -typedef union { - uint8_t _b[8]; - uint16_t _w[4]; - uint32_t _l[2]; - float32 _s[2]; - uint64_t q; -} MMXReg; +typedef MMREG_UNION(ZMMReg, 512) ZMMReg; +typedef MMREG_UNION(MMXReg, 64) MMXReg; typedef struct BNDReg { uint64_t lb; @@ -753,31 +754,31 @@ typedef struct BNDCSReg { } BNDCSReg; #ifdef HOST_WORDS_BIGENDIAN -#define XMM_B(n) _b[63 - (n)] -#define XMM_W(n) _w[31 - (n)] -#define XMM_L(n) _l[15 - (n)] -#define XMM_S(n) _s[15 - (n)] -#define XMM_Q(n) _q[7 - (n)] -#define XMM_D(n) _d[7 - (n)] - -#define MMX_B(n) _b[7 - (n)] -#define MMX_W(n) _w[3 - (n)] -#define MMX_L(n) _l[1 - (n)] -#define MMX_S(n) _s[1 - (n)] +#define ZMM_B(n) _b_ZMMReg[63 - (n)] +#define ZMM_W(n) _w_ZMMReg[31 - (n)] +#define ZMM_L(n) _l_ZMMReg[15 - (n)] +#define ZMM_S(n) _s_ZMMReg[15 - (n)] +#define ZMM_Q(n) _q_ZMMReg[7 - (n)] +#define ZMM_D(n) _d_ZMMReg[7 - (n)] + +#define MMX_B(n) _b_MMXReg[7 - (n)] +#define MMX_W(n) _w_MMXReg[3 - (n)] +#define MMX_L(n) _l_MMXReg[1 - (n)] +#define MMX_S(n) _s_MMXReg[1 - (n)] #else -#define XMM_B(n) _b[n] -#define XMM_W(n) _w[n] -#define XMM_L(n) _l[n] -#define XMM_S(n) _s[n] -#define XMM_Q(n) _q[n] -#define XMM_D(n) _d[n] - -#define MMX_B(n) _b[n] -#define MMX_W(n) _w[n] -#define MMX_L(n) _l[n] -#define MMX_S(n) _s[n] +#define ZMM_B(n) _b_ZMMReg[n] +#define ZMM_W(n) _w_ZMMReg[n] +#define ZMM_L(n) _l_ZMMReg[n] +#define ZMM_S(n) _s_ZMMReg[n] +#define ZMM_Q(n) _q_ZMMReg[n] +#define ZMM_D(n) _d_ZMMReg[n] + +#define MMX_B(n) _b_MMXReg[n] +#define MMX_W(n) _w_MMXReg[n] +#define MMX_L(n) _l_MMXReg[n] +#define MMX_S(n) _s_MMXReg[n] #endif -#define MMX_Q(n) q +#define MMX_Q(n) _q_MMXReg[n] typedef union { floatx80 d __attribute__((aligned(16))); @@ -865,8 +866,8 @@ typedef struct CPUX86State { float_status mmx_status; /* for 3DNow! float ops */ float_status sse_status; uint32_t mxcsr; - XMMReg xmm_regs[CPU_NB_REGS == 8 ? 8 : 32]; - XMMReg xmm_t0; + ZMMReg xmm_regs[CPU_NB_REGS == 8 ? 8 : 32]; + ZMMReg xmm_t0; MMXReg mmx_t0; uint64_t opmask_regs[NB_OPMASK_REGS]; @@ -982,6 +983,7 @@ typedef struct CPUX86State { uint32_t sipi_vector; bool tsc_valid; int64_t tsc_khz; + int64_t user_tsc_khz; /* for sanity check only */ void *kvm_xsave_buf; uint64_t mcg_cap; @@ -999,6 +1001,8 @@ typedef struct CPUX86State { uint64_t xcr0; uint64_t xss; + uint32_t pkru; + TPRAccess tpr_access_type; } CPUX86State; @@ -1224,7 +1228,7 @@ static inline target_long lshift(target_long x, int n) #define ST1 ST(1) /* translate.c */ -void optimize_flags_init(void); +void tcg_x86_init(void); #include "exec/cpu-all.h" #include "svm.h" diff --git a/target-i386/fpu_helper.c b/target-i386/fpu_helper.c index d421a47..31afa44 100644 --- a/target-i386/fpu_helper.c +++ b/target-i386/fpu_helper.c @@ -1169,8 +1169,8 @@ static void do_fxsave(CPUX86State *env, target_ulong ptr, int data64, || (env->hflags & HF_CPL_MASK) || !(env->hflags & HF_LMA_MASK)) { for (i = 0; i < nb_xmm_regs; i++) { - cpu_stq_data_ra(env, addr, env->xmm_regs[i].XMM_Q(0), retaddr); - cpu_stq_data_ra(env, addr + 8, env->xmm_regs[i].XMM_Q(1), retaddr); + cpu_stq_data_ra(env, addr, env->xmm_regs[i].ZMM_Q(0), retaddr); + cpu_stq_data_ra(env, addr + 8, env->xmm_regs[i].ZMM_Q(1), retaddr); addr += 16; } } @@ -1226,8 +1226,8 @@ static void do_fxrstor(CPUX86State *env, target_ulong ptr, int data64, || (env->hflags & HF_CPL_MASK) || !(env->hflags & HF_LMA_MASK)) { for (i = 0; i < nb_xmm_regs; i++) { - env->xmm_regs[i].XMM_Q(0) = cpu_ldq_data_ra(env, addr, retaddr); - env->xmm_regs[i].XMM_Q(1) = cpu_ldq_data_ra(env, addr + 8, retaddr); + env->xmm_regs[i].ZMM_Q(0) = cpu_ldq_data_ra(env, addr, retaddr); + env->xmm_regs[i].ZMM_Q(1) = cpu_ldq_data_ra(env, addr + 8, retaddr); addr += 16; } } diff --git a/target-i386/gdbstub.c b/target-i386/gdbstub.c index ff99cfb..6a9bf3c 100644 --- a/target-i386/gdbstub.c +++ b/target-i386/gdbstub.c @@ -61,8 +61,8 @@ int x86_cpu_gdb_read_register(CPUState *cs, uint8_t *mem_buf, int n) n -= IDX_XMM_REGS; if (n < CPU_NB_REGS32 || (TARGET_LONG_BITS == 64 && env->hflags & HF_CS64_MASK)) { - stq_p(mem_buf, env->xmm_regs[n].XMM_Q(0)); - stq_p(mem_buf + 8, env->xmm_regs[n].XMM_Q(1)); + stq_p(mem_buf, env->xmm_regs[n].ZMM_Q(0)); + stq_p(mem_buf + 8, env->xmm_regs[n].ZMM_Q(1)); return 16; } } else { @@ -170,8 +170,8 @@ int x86_cpu_gdb_write_register(CPUState *cs, uint8_t *mem_buf, int n) n -= IDX_XMM_REGS; if (n < CPU_NB_REGS32 || (TARGET_LONG_BITS == 64 && env->hflags & HF_CS64_MASK)) { - env->xmm_regs[n].XMM_Q(0) = ldq_p(mem_buf); - env->xmm_regs[n].XMM_Q(1) = ldq_p(mem_buf + 8); + env->xmm_regs[n].ZMM_Q(0) = ldq_p(mem_buf); + env->xmm_regs[n].ZMM_Q(1) = ldq_p(mem_buf + 8); return 16; } } else { diff --git a/target-i386/helper.c b/target-i386/helper.c index 6b10019..24f5811 100644 --- a/target-i386/helper.c +++ b/target-i386/helper.c @@ -535,10 +535,10 @@ void x86_cpu_dump_state(CPUState *cs, FILE *f, fprintf_function cpu_fprintf, for(i=0;i<nb;i++) { cpu_fprintf(f, "XMM%02d=%08x%08x%08x%08x", i, - env->xmm_regs[i].XMM_L(3), - env->xmm_regs[i].XMM_L(2), - env->xmm_regs[i].XMM_L(1), - env->xmm_regs[i].XMM_L(0)); + env->xmm_regs[i].ZMM_L(3), + env->xmm_regs[i].ZMM_L(2), + env->xmm_regs[i].ZMM_L(1), + env->xmm_regs[i].ZMM_L(0)); if ((i & 1) == 1) cpu_fprintf(f, "\n"); else diff --git a/target-i386/kvm.c b/target-i386/kvm.c index ab65a6e..d6f5355 100644 --- a/target-i386/kvm.c +++ b/target-i386/kvm.c @@ -532,6 +532,36 @@ static bool hyperv_enabled(X86CPU *cpu) cpu->hyperv_stimer); } +static int kvm_arch_set_tsc_khz(CPUState *cs) +{ + X86CPU *cpu = X86_CPU(cs); + CPUX86State *env = &cpu->env; + int r; + + if (!env->tsc_khz) { + return 0; + } + + r = kvm_check_extension(cs->kvm_state, KVM_CAP_TSC_CONTROL) ? + kvm_vcpu_ioctl(cs, KVM_SET_TSC_KHZ, env->tsc_khz) : + -ENOTSUP; + if (r < 0) { + /* When KVM_SET_TSC_KHZ fails, it's an error only if the current + * TSC frequency doesn't match the one we want. + */ + int cur_freq = kvm_check_extension(cs->kvm_state, KVM_CAP_GET_TSC_KHZ) ? + kvm_vcpu_ioctl(cs, KVM_GET_TSC_KHZ) : + -ENOTSUP; + if (cur_freq <= 0 || cur_freq != env->tsc_khz) { + error_report("warning: TSC frequency mismatch between " + "VM and host, and TSC scaling unavailable"); + return r; + } + } + + return 0; +} + static Error *invtsc_mig_blocker; #define KVM_MAX_CPUID_ENTRIES 100 @@ -859,12 +889,22 @@ int kvm_arch_init_vcpu(CPUState *cs) return r; } - r = kvm_check_extension(cs->kvm_state, KVM_CAP_TSC_CONTROL); - if (r && env->tsc_khz) { - r = kvm_vcpu_ioctl(cs, KVM_SET_TSC_KHZ, env->tsc_khz); - if (r < 0) { - fprintf(stderr, "KVM_SET_TSC_KHZ failed\n"); - return r; + r = kvm_arch_set_tsc_khz(cs); + if (r < 0) { + return r; + } + + /* vcpu's TSC frequency is either specified by user, or following + * the value used by KVM if the former is not present. In the + * latter case, we query it from KVM and record in env->tsc_khz, + * so that vcpu's TSC frequency can be migrated later via this field. + */ + if (!env->tsc_khz) { + r = kvm_check_extension(cs->kvm_state, KVM_CAP_GET_TSC_KHZ) ? + kvm_vcpu_ioctl(cs, KVM_GET_TSC_KHZ) : + -ENOTSUP; + if (r > 0) { + env->tsc_khz = r; } } @@ -1237,8 +1277,8 @@ static int kvm_put_fpu(X86CPU *cpu) } memcpy(fpu.fpr, env->fpregs, sizeof env->fpregs); for (i = 0; i < CPU_NB_REGS; i++) { - stq_p(&fpu.xmm[i][0], env->xmm_regs[i].XMM_Q(0)); - stq_p(&fpu.xmm[i][8], env->xmm_regs[i].XMM_Q(1)); + stq_p(&fpu.xmm[i][0], env->xmm_regs[i].ZMM_Q(0)); + stq_p(&fpu.xmm[i][8], env->xmm_regs[i].ZMM_Q(1)); } fpu.mxcsr = env->mxcsr; @@ -1259,6 +1299,7 @@ static int kvm_put_fpu(X86CPU *cpu) #define XSAVE_OPMASK 272 #define XSAVE_ZMM_Hi256 288 #define XSAVE_Hi16_ZMM 416 +#define XSAVE_PKRU 672 static int kvm_put_xsave(X86CPU *cpu) { @@ -1299,19 +1340,20 @@ static int kvm_put_xsave(X86CPU *cpu) ymmh = (uint8_t *)&xsave->region[XSAVE_YMMH_SPACE]; zmmh = (uint8_t *)&xsave->region[XSAVE_ZMM_Hi256]; for (i = 0; i < CPU_NB_REGS; i++, xmm += 16, ymmh += 16, zmmh += 32) { - stq_p(xmm, env->xmm_regs[i].XMM_Q(0)); - stq_p(xmm+8, env->xmm_regs[i].XMM_Q(1)); - stq_p(ymmh, env->xmm_regs[i].XMM_Q(2)); - stq_p(ymmh+8, env->xmm_regs[i].XMM_Q(3)); - stq_p(zmmh, env->xmm_regs[i].XMM_Q(4)); - stq_p(zmmh+8, env->xmm_regs[i].XMM_Q(5)); - stq_p(zmmh+16, env->xmm_regs[i].XMM_Q(6)); - stq_p(zmmh+24, env->xmm_regs[i].XMM_Q(7)); + stq_p(xmm, env->xmm_regs[i].ZMM_Q(0)); + stq_p(xmm+8, env->xmm_regs[i].ZMM_Q(1)); + stq_p(ymmh, env->xmm_regs[i].ZMM_Q(2)); + stq_p(ymmh+8, env->xmm_regs[i].ZMM_Q(3)); + stq_p(zmmh, env->xmm_regs[i].ZMM_Q(4)); + stq_p(zmmh+8, env->xmm_regs[i].ZMM_Q(5)); + stq_p(zmmh+16, env->xmm_regs[i].ZMM_Q(6)); + stq_p(zmmh+24, env->xmm_regs[i].ZMM_Q(7)); } #ifdef TARGET_X86_64 memcpy(&xsave->region[XSAVE_Hi16_ZMM], &env->xmm_regs[16], 16 * sizeof env->xmm_regs[16]); + memcpy(&xsave->region[XSAVE_PKRU], &env->pkru, sizeof env->pkru); #endif r = kvm_vcpu_ioctl(CPU(cpu), KVM_SET_XSAVE, xsave); return r; @@ -1665,8 +1707,8 @@ static int kvm_get_fpu(X86CPU *cpu) } memcpy(env->fpregs, fpu.fpr, sizeof env->fpregs); for (i = 0; i < CPU_NB_REGS; i++) { - env->xmm_regs[i].XMM_Q(0) = ldq_p(&fpu.xmm[i][0]); - env->xmm_regs[i].XMM_Q(1) = ldq_p(&fpu.xmm[i][8]); + env->xmm_regs[i].ZMM_Q(0) = ldq_p(&fpu.xmm[i][0]); + env->xmm_regs[i].ZMM_Q(1) = ldq_p(&fpu.xmm[i][8]); } env->mxcsr = fpu.mxcsr; @@ -1717,19 +1759,20 @@ static int kvm_get_xsave(X86CPU *cpu) ymmh = (const uint8_t *)&xsave->region[XSAVE_YMMH_SPACE]; zmmh = (const uint8_t *)&xsave->region[XSAVE_ZMM_Hi256]; for (i = 0; i < CPU_NB_REGS; i++, xmm += 16, ymmh += 16, zmmh += 32) { - env->xmm_regs[i].XMM_Q(0) = ldq_p(xmm); - env->xmm_regs[i].XMM_Q(1) = ldq_p(xmm+8); - env->xmm_regs[i].XMM_Q(2) = ldq_p(ymmh); - env->xmm_regs[i].XMM_Q(3) = ldq_p(ymmh+8); - env->xmm_regs[i].XMM_Q(4) = ldq_p(zmmh); - env->xmm_regs[i].XMM_Q(5) = ldq_p(zmmh+8); - env->xmm_regs[i].XMM_Q(6) = ldq_p(zmmh+16); - env->xmm_regs[i].XMM_Q(7) = ldq_p(zmmh+24); + env->xmm_regs[i].ZMM_Q(0) = ldq_p(xmm); + env->xmm_regs[i].ZMM_Q(1) = ldq_p(xmm+8); + env->xmm_regs[i].ZMM_Q(2) = ldq_p(ymmh); + env->xmm_regs[i].ZMM_Q(3) = ldq_p(ymmh+8); + env->xmm_regs[i].ZMM_Q(4) = ldq_p(zmmh); + env->xmm_regs[i].ZMM_Q(5) = ldq_p(zmmh+8); + env->xmm_regs[i].ZMM_Q(6) = ldq_p(zmmh+16); + env->xmm_regs[i].ZMM_Q(7) = ldq_p(zmmh+24); } #ifdef TARGET_X86_64 memcpy(&env->xmm_regs[16], &xsave->region[XSAVE_Hi16_ZMM], 16 * sizeof env->xmm_regs[16]); + memcpy(&env->pkru, &xsave->region[XSAVE_PKRU], sizeof env->pkru); #endif return 0; } @@ -2467,6 +2510,15 @@ int kvm_arch_put_registers(CPUState *cpu, int level) } } + if (level == KVM_PUT_FULL_STATE) { + /* We don't check for kvm_arch_set_tsc_khz() errors here, + * because TSC frequency mismatch shouldn't abort migration, + * unless the user explicitly asked for a more strict TSC + * setting (e.g. using an explicit "tsc-freq" option). + */ + kvm_arch_set_tsc_khz(cpu); + } + ret = kvm_getput_regs(x86_cpu, 1); if (ret < 0) { return ret; diff --git a/target-i386/machine.c b/target-i386/machine.c index 6126d96..6be7341 100644 --- a/target-i386/machine.c +++ b/target-i386/machine.c @@ -6,6 +6,8 @@ #include "cpu.h" #include "sysemu/kvm.h" +#include "qemu/error-report.h" + static const VMStateDescription vmstate_segment = { .name = "segment", .version_id = 1, @@ -36,15 +38,15 @@ static const VMStateDescription vmstate_xmm_reg = { .version_id = 1, .minimum_version_id = 1, .fields = (VMStateField[]) { - VMSTATE_UINT64(XMM_Q(0), XMMReg), - VMSTATE_UINT64(XMM_Q(1), XMMReg), + VMSTATE_UINT64(ZMM_Q(0), ZMMReg), + VMSTATE_UINT64(ZMM_Q(1), ZMMReg), VMSTATE_END_OF_LIST() } }; #define VMSTATE_XMM_REGS(_field, _state, _start) \ VMSTATE_STRUCT_SUB_ARRAY(_field, _state, _start, CPU_NB_REGS, 0, \ - vmstate_xmm_reg, XMMReg) + vmstate_xmm_reg, ZMMReg) /* YMMH format is the same as XMM, but for bits 128-255 */ static const VMStateDescription vmstate_ymmh_reg = { @@ -52,32 +54,32 @@ static const VMStateDescription vmstate_ymmh_reg = { .version_id = 1, .minimum_version_id = 1, .fields = (VMStateField[]) { - VMSTATE_UINT64(XMM_Q(2), XMMReg), - VMSTATE_UINT64(XMM_Q(3), XMMReg), + VMSTATE_UINT64(ZMM_Q(2), ZMMReg), + VMSTATE_UINT64(ZMM_Q(3), ZMMReg), VMSTATE_END_OF_LIST() } }; #define VMSTATE_YMMH_REGS_VARS(_field, _state, _start, _v) \ VMSTATE_STRUCT_SUB_ARRAY(_field, _state, _start, CPU_NB_REGS, _v, \ - vmstate_ymmh_reg, XMMReg) + vmstate_ymmh_reg, ZMMReg) static const VMStateDescription vmstate_zmmh_reg = { .name = "zmmh_reg", .version_id = 1, .minimum_version_id = 1, .fields = (VMStateField[]) { - VMSTATE_UINT64(XMM_Q(4), XMMReg), - VMSTATE_UINT64(XMM_Q(5), XMMReg), - VMSTATE_UINT64(XMM_Q(6), XMMReg), - VMSTATE_UINT64(XMM_Q(7), XMMReg), + VMSTATE_UINT64(ZMM_Q(4), ZMMReg), + VMSTATE_UINT64(ZMM_Q(5), ZMMReg), + VMSTATE_UINT64(ZMM_Q(6), ZMMReg), + VMSTATE_UINT64(ZMM_Q(7), ZMMReg), VMSTATE_END_OF_LIST() } }; #define VMSTATE_ZMMH_REGS_VARS(_field, _state, _start) \ VMSTATE_STRUCT_SUB_ARRAY(_field, _state, _start, CPU_NB_REGS, 0, \ - vmstate_zmmh_reg, XMMReg) + vmstate_zmmh_reg, ZMMReg) #ifdef TARGET_X86_64 static const VMStateDescription vmstate_hi16_zmm_reg = { @@ -85,21 +87,21 @@ static const VMStateDescription vmstate_hi16_zmm_reg = { .version_id = 1, .minimum_version_id = 1, .fields = (VMStateField[]) { - VMSTATE_UINT64(XMM_Q(0), XMMReg), - VMSTATE_UINT64(XMM_Q(1), XMMReg), - VMSTATE_UINT64(XMM_Q(2), XMMReg), - VMSTATE_UINT64(XMM_Q(3), XMMReg), - VMSTATE_UINT64(XMM_Q(4), XMMReg), - VMSTATE_UINT64(XMM_Q(5), XMMReg), - VMSTATE_UINT64(XMM_Q(6), XMMReg), - VMSTATE_UINT64(XMM_Q(7), XMMReg), + VMSTATE_UINT64(ZMM_Q(0), ZMMReg), + VMSTATE_UINT64(ZMM_Q(1), ZMMReg), + VMSTATE_UINT64(ZMM_Q(2), ZMMReg), + VMSTATE_UINT64(ZMM_Q(3), ZMMReg), + VMSTATE_UINT64(ZMM_Q(4), ZMMReg), + VMSTATE_UINT64(ZMM_Q(5), ZMMReg), + VMSTATE_UINT64(ZMM_Q(6), ZMMReg), + VMSTATE_UINT64(ZMM_Q(7), ZMMReg), VMSTATE_END_OF_LIST() } }; #define VMSTATE_Hi16_ZMM_REGS_VARS(_field, _state, _start) \ VMSTATE_STRUCT_SUB_ARRAY(_field, _state, _start, CPU_NB_REGS, 0, \ - vmstate_hi16_zmm_reg, XMMReg) + vmstate_hi16_zmm_reg, ZMMReg) #endif static const VMStateDescription vmstate_bnd_regs = { @@ -331,6 +333,13 @@ static int cpu_post_load(void *opaque, int version_id) CPUX86State *env = &cpu->env; int i; + if (env->tsc_khz && env->user_tsc_khz && + env->tsc_khz != env->user_tsc_khz) { + error_report("Mismatch between user-specified TSC frequency and " + "migrated TSC frequency"); + return -EINVAL; + } + /* * Real mode guest segments register DPL should be zero. * Older KVM version were setting it wrongly. @@ -787,7 +796,7 @@ static bool avx512_needed(void *opaque) } for (i = 0; i < CPU_NB_REGS; i++) { -#define ENV_XMM(reg, field) (env->xmm_regs[reg].XMM_Q(field)) +#define ENV_XMM(reg, field) (env->xmm_regs[reg].ZMM_Q(field)) if (ENV_XMM(i, 4) || ENV_XMM(i, 6) || ENV_XMM(i, 5) || ENV_XMM(i, 7)) { return true; @@ -839,6 +848,47 @@ static const VMStateDescription vmstate_xss = { } }; +#ifdef TARGET_X86_64 +static bool pkru_needed(void *opaque) +{ + X86CPU *cpu = opaque; + CPUX86State *env = &cpu->env; + + return env->pkru != 0; +} + +static const VMStateDescription vmstate_pkru = { + .name = "cpu/pkru", + .version_id = 1, + .minimum_version_id = 1, + .needed = pkru_needed, + .fields = (VMStateField[]){ + VMSTATE_UINT32(env.pkru, X86CPU), + VMSTATE_END_OF_LIST() + } +}; +#endif + +static bool tsc_khz_needed(void *opaque) +{ + X86CPU *cpu = opaque; + CPUX86State *env = &cpu->env; + MachineClass *mc = MACHINE_GET_CLASS(qdev_get_machine()); + PCMachineClass *pcmc = PC_MACHINE_CLASS(mc); + return env->tsc_khz && pcmc->save_tsc_khz; +} + +static const VMStateDescription vmstate_tsc_khz = { + .name = "cpu/tsc_khz", + .version_id = 1, + .minimum_version_id = 1, + .needed = tsc_khz_needed, + .fields = (VMStateField[]) { + VMSTATE_INT64(env.tsc_khz, X86CPU), + VMSTATE_END_OF_LIST() + } +}; + VMStateDescription vmstate_x86_cpu = { .name = "cpu", .version_id = 12, @@ -961,6 +1011,10 @@ VMStateDescription vmstate_x86_cpu = { &vmstate_msr_hyperv_stimer, &vmstate_avx512, &vmstate_xss, + &vmstate_tsc_khz, +#ifdef TARGET_X86_64 + &vmstate_pkru, +#endif NULL } }; diff --git a/target-i386/ops_sse.h b/target-i386/ops_sse.h index 1780d1d..7a98f53 100644 --- a/target-i386/ops_sse.h +++ b/target-i386/ops_sse.h @@ -26,15 +26,15 @@ #define B(n) MMX_B(n) #define W(n) MMX_W(n) #define L(n) MMX_L(n) -#define Q(n) q +#define Q(n) MMX_Q(n) #define SUFFIX _mmx #else -#define Reg XMMReg +#define Reg ZMMReg #define XMM_ONLY(...) __VA_ARGS__ -#define B(n) XMM_B(n) -#define W(n) XMM_W(n) -#define L(n) XMM_L(n) -#define Q(n) XMM_Q(n) +#define B(n) ZMM_B(n) +#define W(n) ZMM_W(n) +#define L(n) ZMM_L(n) +#define Q(n) ZMM_Q(n) #define SUFFIX _xmm #endif @@ -582,26 +582,26 @@ void glue(helper_pshufhw, SUFFIX)(Reg *d, Reg *s, int order) #define SSE_HELPER_S(name, F) \ void helper_ ## name ## ps(CPUX86State *env, Reg *d, Reg *s) \ { \ - d->XMM_S(0) = F(32, d->XMM_S(0), s->XMM_S(0)); \ - d->XMM_S(1) = F(32, d->XMM_S(1), s->XMM_S(1)); \ - d->XMM_S(2) = F(32, d->XMM_S(2), s->XMM_S(2)); \ - d->XMM_S(3) = F(32, d->XMM_S(3), s->XMM_S(3)); \ + d->ZMM_S(0) = F(32, d->ZMM_S(0), s->ZMM_S(0)); \ + d->ZMM_S(1) = F(32, d->ZMM_S(1), s->ZMM_S(1)); \ + d->ZMM_S(2) = F(32, d->ZMM_S(2), s->ZMM_S(2)); \ + d->ZMM_S(3) = F(32, d->ZMM_S(3), s->ZMM_S(3)); \ } \ \ void helper_ ## name ## ss(CPUX86State *env, Reg *d, Reg *s) \ { \ - d->XMM_S(0) = F(32, d->XMM_S(0), s->XMM_S(0)); \ + d->ZMM_S(0) = F(32, d->ZMM_S(0), s->ZMM_S(0)); \ } \ \ void helper_ ## name ## pd(CPUX86State *env, Reg *d, Reg *s) \ { \ - d->XMM_D(0) = F(64, d->XMM_D(0), s->XMM_D(0)); \ - d->XMM_D(1) = F(64, d->XMM_D(1), s->XMM_D(1)); \ + d->ZMM_D(0) = F(64, d->ZMM_D(0), s->ZMM_D(0)); \ + d->ZMM_D(1) = F(64, d->ZMM_D(1), s->ZMM_D(1)); \ } \ \ void helper_ ## name ## sd(CPUX86State *env, Reg *d, Reg *s) \ { \ - d->XMM_D(0) = F(64, d->XMM_D(0), s->XMM_D(0)); \ + d->ZMM_D(0) = F(64, d->ZMM_D(0), s->ZMM_D(0)); \ } #define FPU_ADD(size, a, b) float ## size ## _add(a, b, &env->sse_status) @@ -633,216 +633,216 @@ void helper_cvtps2pd(CPUX86State *env, Reg *d, Reg *s) { float32 s0, s1; - s0 = s->XMM_S(0); - s1 = s->XMM_S(1); - d->XMM_D(0) = float32_to_float64(s0, &env->sse_status); - d->XMM_D(1) = float32_to_float64(s1, &env->sse_status); + s0 = s->ZMM_S(0); + s1 = s->ZMM_S(1); + d->ZMM_D(0) = float32_to_float64(s0, &env->sse_status); + d->ZMM_D(1) = float32_to_float64(s1, &env->sse_status); } void helper_cvtpd2ps(CPUX86State *env, Reg *d, Reg *s) { - d->XMM_S(0) = float64_to_float32(s->XMM_D(0), &env->sse_status); - d->XMM_S(1) = float64_to_float32(s->XMM_D(1), &env->sse_status); + d->ZMM_S(0) = float64_to_float32(s->ZMM_D(0), &env->sse_status); + d->ZMM_S(1) = float64_to_float32(s->ZMM_D(1), &env->sse_status); d->Q(1) = 0; } void helper_cvtss2sd(CPUX86State *env, Reg *d, Reg *s) { - d->XMM_D(0) = float32_to_float64(s->XMM_S(0), &env->sse_status); + d->ZMM_D(0) = float32_to_float64(s->ZMM_S(0), &env->sse_status); } void helper_cvtsd2ss(CPUX86State *env, Reg *d, Reg *s) { - d->XMM_S(0) = float64_to_float32(s->XMM_D(0), &env->sse_status); + d->ZMM_S(0) = float64_to_float32(s->ZMM_D(0), &env->sse_status); } /* integer to float */ void helper_cvtdq2ps(CPUX86State *env, Reg *d, Reg *s) { - d->XMM_S(0) = int32_to_float32(s->XMM_L(0), &env->sse_status); - d->XMM_S(1) = int32_to_float32(s->XMM_L(1), &env->sse_status); - d->XMM_S(2) = int32_to_float32(s->XMM_L(2), &env->sse_status); - d->XMM_S(3) = int32_to_float32(s->XMM_L(3), &env->sse_status); + d->ZMM_S(0) = int32_to_float32(s->ZMM_L(0), &env->sse_status); + d->ZMM_S(1) = int32_to_float32(s->ZMM_L(1), &env->sse_status); + d->ZMM_S(2) = int32_to_float32(s->ZMM_L(2), &env->sse_status); + d->ZMM_S(3) = int32_to_float32(s->ZMM_L(3), &env->sse_status); } void helper_cvtdq2pd(CPUX86State *env, Reg *d, Reg *s) { int32_t l0, l1; - l0 = (int32_t)s->XMM_L(0); - l1 = (int32_t)s->XMM_L(1); - d->XMM_D(0) = int32_to_float64(l0, &env->sse_status); - d->XMM_D(1) = int32_to_float64(l1, &env->sse_status); + l0 = (int32_t)s->ZMM_L(0); + l1 = (int32_t)s->ZMM_L(1); + d->ZMM_D(0) = int32_to_float64(l0, &env->sse_status); + d->ZMM_D(1) = int32_to_float64(l1, &env->sse_status); } -void helper_cvtpi2ps(CPUX86State *env, XMMReg *d, MMXReg *s) +void helper_cvtpi2ps(CPUX86State *env, ZMMReg *d, MMXReg *s) { - d->XMM_S(0) = int32_to_float32(s->MMX_L(0), &env->sse_status); - d->XMM_S(1) = int32_to_float32(s->MMX_L(1), &env->sse_status); + d->ZMM_S(0) = int32_to_float32(s->MMX_L(0), &env->sse_status); + d->ZMM_S(1) = int32_to_float32(s->MMX_L(1), &env->sse_status); } -void helper_cvtpi2pd(CPUX86State *env, XMMReg *d, MMXReg *s) +void helper_cvtpi2pd(CPUX86State *env, ZMMReg *d, MMXReg *s) { - d->XMM_D(0) = int32_to_float64(s->MMX_L(0), &env->sse_status); - d->XMM_D(1) = int32_to_float64(s->MMX_L(1), &env->sse_status); + d->ZMM_D(0) = int32_to_float64(s->MMX_L(0), &env->sse_status); + d->ZMM_D(1) = int32_to_float64(s->MMX_L(1), &env->sse_status); } -void helper_cvtsi2ss(CPUX86State *env, XMMReg *d, uint32_t val) +void helper_cvtsi2ss(CPUX86State *env, ZMMReg *d, uint32_t val) { - d->XMM_S(0) = int32_to_float32(val, &env->sse_status); + d->ZMM_S(0) = int32_to_float32(val, &env->sse_status); } -void helper_cvtsi2sd(CPUX86State *env, XMMReg *d, uint32_t val) +void helper_cvtsi2sd(CPUX86State *env, ZMMReg *d, uint32_t val) { - d->XMM_D(0) = int32_to_float64(val, &env->sse_status); + d->ZMM_D(0) = int32_to_float64(val, &env->sse_status); } #ifdef TARGET_X86_64 -void helper_cvtsq2ss(CPUX86State *env, XMMReg *d, uint64_t val) +void helper_cvtsq2ss(CPUX86State *env, ZMMReg *d, uint64_t val) { - d->XMM_S(0) = int64_to_float32(val, &env->sse_status); + d->ZMM_S(0) = int64_to_float32(val, &env->sse_status); } -void helper_cvtsq2sd(CPUX86State *env, XMMReg *d, uint64_t val) +void helper_cvtsq2sd(CPUX86State *env, ZMMReg *d, uint64_t val) { - d->XMM_D(0) = int64_to_float64(val, &env->sse_status); + d->ZMM_D(0) = int64_to_float64(val, &env->sse_status); } #endif /* float to integer */ -void helper_cvtps2dq(CPUX86State *env, XMMReg *d, XMMReg *s) +void helper_cvtps2dq(CPUX86State *env, ZMMReg *d, ZMMReg *s) { - d->XMM_L(0) = float32_to_int32(s->XMM_S(0), &env->sse_status); - d->XMM_L(1) = float32_to_int32(s->XMM_S(1), &env->sse_status); - d->XMM_L(2) = float32_to_int32(s->XMM_S(2), &env->sse_status); - d->XMM_L(3) = float32_to_int32(s->XMM_S(3), &env->sse_status); + d->ZMM_L(0) = float32_to_int32(s->ZMM_S(0), &env->sse_status); + d->ZMM_L(1) = float32_to_int32(s->ZMM_S(1), &env->sse_status); + d->ZMM_L(2) = float32_to_int32(s->ZMM_S(2), &env->sse_status); + d->ZMM_L(3) = float32_to_int32(s->ZMM_S(3), &env->sse_status); } -void helper_cvtpd2dq(CPUX86State *env, XMMReg *d, XMMReg *s) +void helper_cvtpd2dq(CPUX86State *env, ZMMReg *d, ZMMReg *s) { - d->XMM_L(0) = float64_to_int32(s->XMM_D(0), &env->sse_status); - d->XMM_L(1) = float64_to_int32(s->XMM_D(1), &env->sse_status); - d->XMM_Q(1) = 0; + d->ZMM_L(0) = float64_to_int32(s->ZMM_D(0), &env->sse_status); + d->ZMM_L(1) = float64_to_int32(s->ZMM_D(1), &env->sse_status); + d->ZMM_Q(1) = 0; } -void helper_cvtps2pi(CPUX86State *env, MMXReg *d, XMMReg *s) +void helper_cvtps2pi(CPUX86State *env, MMXReg *d, ZMMReg *s) { - d->MMX_L(0) = float32_to_int32(s->XMM_S(0), &env->sse_status); - d->MMX_L(1) = float32_to_int32(s->XMM_S(1), &env->sse_status); + d->MMX_L(0) = float32_to_int32(s->ZMM_S(0), &env->sse_status); + d->MMX_L(1) = float32_to_int32(s->ZMM_S(1), &env->sse_status); } -void helper_cvtpd2pi(CPUX86State *env, MMXReg *d, XMMReg *s) +void helper_cvtpd2pi(CPUX86State *env, MMXReg *d, ZMMReg *s) { - d->MMX_L(0) = float64_to_int32(s->XMM_D(0), &env->sse_status); - d->MMX_L(1) = float64_to_int32(s->XMM_D(1), &env->sse_status); + d->MMX_L(0) = float64_to_int32(s->ZMM_D(0), &env->sse_status); + d->MMX_L(1) = float64_to_int32(s->ZMM_D(1), &env->sse_status); } -int32_t helper_cvtss2si(CPUX86State *env, XMMReg *s) +int32_t helper_cvtss2si(CPUX86State *env, ZMMReg *s) { - return float32_to_int32(s->XMM_S(0), &env->sse_status); + return float32_to_int32(s->ZMM_S(0), &env->sse_status); } -int32_t helper_cvtsd2si(CPUX86State *env, XMMReg *s) +int32_t helper_cvtsd2si(CPUX86State *env, ZMMReg *s) { - return float64_to_int32(s->XMM_D(0), &env->sse_status); + return float64_to_int32(s->ZMM_D(0), &env->sse_status); } #ifdef TARGET_X86_64 -int64_t helper_cvtss2sq(CPUX86State *env, XMMReg *s) +int64_t helper_cvtss2sq(CPUX86State *env, ZMMReg *s) { - return float32_to_int64(s->XMM_S(0), &env->sse_status); + return float32_to_int64(s->ZMM_S(0), &env->sse_status); } -int64_t helper_cvtsd2sq(CPUX86State *env, XMMReg *s) +int64_t helper_cvtsd2sq(CPUX86State *env, ZMMReg *s) { - return float64_to_int64(s->XMM_D(0), &env->sse_status); + return float64_to_int64(s->ZMM_D(0), &env->sse_status); } #endif /* float to integer truncated */ -void helper_cvttps2dq(CPUX86State *env, XMMReg *d, XMMReg *s) +void helper_cvttps2dq(CPUX86State *env, ZMMReg *d, ZMMReg *s) { - d->XMM_L(0) = float32_to_int32_round_to_zero(s->XMM_S(0), &env->sse_status); - d->XMM_L(1) = float32_to_int32_round_to_zero(s->XMM_S(1), &env->sse_status); - d->XMM_L(2) = float32_to_int32_round_to_zero(s->XMM_S(2), &env->sse_status); - d->XMM_L(3) = float32_to_int32_round_to_zero(s->XMM_S(3), &env->sse_status); + d->ZMM_L(0) = float32_to_int32_round_to_zero(s->ZMM_S(0), &env->sse_status); + d->ZMM_L(1) = float32_to_int32_round_to_zero(s->ZMM_S(1), &env->sse_status); + d->ZMM_L(2) = float32_to_int32_round_to_zero(s->ZMM_S(2), &env->sse_status); + d->ZMM_L(3) = float32_to_int32_round_to_zero(s->ZMM_S(3), &env->sse_status); } -void helper_cvttpd2dq(CPUX86State *env, XMMReg *d, XMMReg *s) +void helper_cvttpd2dq(CPUX86State *env, ZMMReg *d, ZMMReg *s) { - d->XMM_L(0) = float64_to_int32_round_to_zero(s->XMM_D(0), &env->sse_status); - d->XMM_L(1) = float64_to_int32_round_to_zero(s->XMM_D(1), &env->sse_status); - d->XMM_Q(1) = 0; + d->ZMM_L(0) = float64_to_int32_round_to_zero(s->ZMM_D(0), &env->sse_status); + d->ZMM_L(1) = float64_to_int32_round_to_zero(s->ZMM_D(1), &env->sse_status); + d->ZMM_Q(1) = 0; } -void helper_cvttps2pi(CPUX86State *env, MMXReg *d, XMMReg *s) +void helper_cvttps2pi(CPUX86State *env, MMXReg *d, ZMMReg *s) { - d->MMX_L(0) = float32_to_int32_round_to_zero(s->XMM_S(0), &env->sse_status); - d->MMX_L(1) = float32_to_int32_round_to_zero(s->XMM_S(1), &env->sse_status); + d->MMX_L(0) = float32_to_int32_round_to_zero(s->ZMM_S(0), &env->sse_status); + d->MMX_L(1) = float32_to_int32_round_to_zero(s->ZMM_S(1), &env->sse_status); } -void helper_cvttpd2pi(CPUX86State *env, MMXReg *d, XMMReg *s) +void helper_cvttpd2pi(CPUX86State *env, MMXReg *d, ZMMReg *s) { - d->MMX_L(0) = float64_to_int32_round_to_zero(s->XMM_D(0), &env->sse_status); - d->MMX_L(1) = float64_to_int32_round_to_zero(s->XMM_D(1), &env->sse_status); + d->MMX_L(0) = float64_to_int32_round_to_zero(s->ZMM_D(0), &env->sse_status); + d->MMX_L(1) = float64_to_int32_round_to_zero(s->ZMM_D(1), &env->sse_status); } -int32_t helper_cvttss2si(CPUX86State *env, XMMReg *s) +int32_t helper_cvttss2si(CPUX86State *env, ZMMReg *s) { - return float32_to_int32_round_to_zero(s->XMM_S(0), &env->sse_status); + return float32_to_int32_round_to_zero(s->ZMM_S(0), &env->sse_status); } -int32_t helper_cvttsd2si(CPUX86State *env, XMMReg *s) +int32_t helper_cvttsd2si(CPUX86State *env, ZMMReg *s) { - return float64_to_int32_round_to_zero(s->XMM_D(0), &env->sse_status); + return float64_to_int32_round_to_zero(s->ZMM_D(0), &env->sse_status); } #ifdef TARGET_X86_64 -int64_t helper_cvttss2sq(CPUX86State *env, XMMReg *s) +int64_t helper_cvttss2sq(CPUX86State *env, ZMMReg *s) { - return float32_to_int64_round_to_zero(s->XMM_S(0), &env->sse_status); + return float32_to_int64_round_to_zero(s->ZMM_S(0), &env->sse_status); } -int64_t helper_cvttsd2sq(CPUX86State *env, XMMReg *s) +int64_t helper_cvttsd2sq(CPUX86State *env, ZMMReg *s) { - return float64_to_int64_round_to_zero(s->XMM_D(0), &env->sse_status); + return float64_to_int64_round_to_zero(s->ZMM_D(0), &env->sse_status); } #endif -void helper_rsqrtps(CPUX86State *env, XMMReg *d, XMMReg *s) +void helper_rsqrtps(CPUX86State *env, ZMMReg *d, ZMMReg *s) { - d->XMM_S(0) = float32_div(float32_one, - float32_sqrt(s->XMM_S(0), &env->sse_status), + d->ZMM_S(0) = float32_div(float32_one, + float32_sqrt(s->ZMM_S(0), &env->sse_status), &env->sse_status); - d->XMM_S(1) = float32_div(float32_one, - float32_sqrt(s->XMM_S(1), &env->sse_status), + d->ZMM_S(1) = float32_div(float32_one, + float32_sqrt(s->ZMM_S(1), &env->sse_status), &env->sse_status); - d->XMM_S(2) = float32_div(float32_one, - float32_sqrt(s->XMM_S(2), &env->sse_status), + d->ZMM_S(2) = float32_div(float32_one, + float32_sqrt(s->ZMM_S(2), &env->sse_status), &env->sse_status); - d->XMM_S(3) = float32_div(float32_one, - float32_sqrt(s->XMM_S(3), &env->sse_status), + d->ZMM_S(3) = float32_div(float32_one, + float32_sqrt(s->ZMM_S(3), &env->sse_status), &env->sse_status); } -void helper_rsqrtss(CPUX86State *env, XMMReg *d, XMMReg *s) +void helper_rsqrtss(CPUX86State *env, ZMMReg *d, ZMMReg *s) { - d->XMM_S(0) = float32_div(float32_one, - float32_sqrt(s->XMM_S(0), &env->sse_status), + d->ZMM_S(0) = float32_div(float32_one, + float32_sqrt(s->ZMM_S(0), &env->sse_status), &env->sse_status); } -void helper_rcpps(CPUX86State *env, XMMReg *d, XMMReg *s) +void helper_rcpps(CPUX86State *env, ZMMReg *d, ZMMReg *s) { - d->XMM_S(0) = float32_div(float32_one, s->XMM_S(0), &env->sse_status); - d->XMM_S(1) = float32_div(float32_one, s->XMM_S(1), &env->sse_status); - d->XMM_S(2) = float32_div(float32_one, s->XMM_S(2), &env->sse_status); - d->XMM_S(3) = float32_div(float32_one, s->XMM_S(3), &env->sse_status); + d->ZMM_S(0) = float32_div(float32_one, s->ZMM_S(0), &env->sse_status); + d->ZMM_S(1) = float32_div(float32_one, s->ZMM_S(1), &env->sse_status); + d->ZMM_S(2) = float32_div(float32_one, s->ZMM_S(2), &env->sse_status); + d->ZMM_S(3) = float32_div(float32_one, s->ZMM_S(3), &env->sse_status); } -void helper_rcpss(CPUX86State *env, XMMReg *d, XMMReg *s) +void helper_rcpss(CPUX86State *env, ZMMReg *d, ZMMReg *s) { - d->XMM_S(0) = float32_div(float32_one, s->XMM_S(0), &env->sse_status); + d->ZMM_S(0) = float32_div(float32_one, s->ZMM_S(0), &env->sse_status); } static inline uint64_t helper_extrq(uint64_t src, int shift, int len) @@ -857,14 +857,14 @@ static inline uint64_t helper_extrq(uint64_t src, int shift, int len) return (src >> shift) & mask; } -void helper_extrq_r(CPUX86State *env, XMMReg *d, XMMReg *s) +void helper_extrq_r(CPUX86State *env, ZMMReg *d, ZMMReg *s) { - d->XMM_Q(0) = helper_extrq(d->XMM_Q(0), s->XMM_B(1), s->XMM_B(0)); + d->ZMM_Q(0) = helper_extrq(d->ZMM_Q(0), s->ZMM_B(1), s->ZMM_B(0)); } -void helper_extrq_i(CPUX86State *env, XMMReg *d, int index, int length) +void helper_extrq_i(CPUX86State *env, ZMMReg *d, int index, int length) { - d->XMM_Q(0) = helper_extrq(d->XMM_Q(0), index, length); + d->ZMM_Q(0) = helper_extrq(d->ZMM_Q(0), index, length); } static inline uint64_t helper_insertq(uint64_t src, int shift, int len) @@ -879,94 +879,94 @@ static inline uint64_t helper_insertq(uint64_t src, int shift, int len) return (src & ~(mask << shift)) | ((src & mask) << shift); } -void helper_insertq_r(CPUX86State *env, XMMReg *d, XMMReg *s) +void helper_insertq_r(CPUX86State *env, ZMMReg *d, ZMMReg *s) { - d->XMM_Q(0) = helper_insertq(s->XMM_Q(0), s->XMM_B(9), s->XMM_B(8)); + d->ZMM_Q(0) = helper_insertq(s->ZMM_Q(0), s->ZMM_B(9), s->ZMM_B(8)); } -void helper_insertq_i(CPUX86State *env, XMMReg *d, int index, int length) +void helper_insertq_i(CPUX86State *env, ZMMReg *d, int index, int length) { - d->XMM_Q(0) = helper_insertq(d->XMM_Q(0), index, length); + d->ZMM_Q(0) = helper_insertq(d->ZMM_Q(0), index, length); } -void helper_haddps(CPUX86State *env, XMMReg *d, XMMReg *s) +void helper_haddps(CPUX86State *env, ZMMReg *d, ZMMReg *s) { - XMMReg r; + ZMMReg r; - r.XMM_S(0) = float32_add(d->XMM_S(0), d->XMM_S(1), &env->sse_status); - r.XMM_S(1) = float32_add(d->XMM_S(2), d->XMM_S(3), &env->sse_status); - r.XMM_S(2) = float32_add(s->XMM_S(0), s->XMM_S(1), &env->sse_status); - r.XMM_S(3) = float32_add(s->XMM_S(2), s->XMM_S(3), &env->sse_status); + r.ZMM_S(0) = float32_add(d->ZMM_S(0), d->ZMM_S(1), &env->sse_status); + r.ZMM_S(1) = float32_add(d->ZMM_S(2), d->ZMM_S(3), &env->sse_status); + r.ZMM_S(2) = float32_add(s->ZMM_S(0), s->ZMM_S(1), &env->sse_status); + r.ZMM_S(3) = float32_add(s->ZMM_S(2), s->ZMM_S(3), &env->sse_status); *d = r; } -void helper_haddpd(CPUX86State *env, XMMReg *d, XMMReg *s) +void helper_haddpd(CPUX86State *env, ZMMReg *d, ZMMReg *s) { - XMMReg r; + ZMMReg r; - r.XMM_D(0) = float64_add(d->XMM_D(0), d->XMM_D(1), &env->sse_status); - r.XMM_D(1) = float64_add(s->XMM_D(0), s->XMM_D(1), &env->sse_status); + r.ZMM_D(0) = float64_add(d->ZMM_D(0), d->ZMM_D(1), &env->sse_status); + r.ZMM_D(1) = float64_add(s->ZMM_D(0), s->ZMM_D(1), &env->sse_status); *d = r; } -void helper_hsubps(CPUX86State *env, XMMReg *d, XMMReg *s) +void helper_hsubps(CPUX86State *env, ZMMReg *d, ZMMReg *s) { - XMMReg r; + ZMMReg r; - r.XMM_S(0) = float32_sub(d->XMM_S(0), d->XMM_S(1), &env->sse_status); - r.XMM_S(1) = float32_sub(d->XMM_S(2), d->XMM_S(3), &env->sse_status); - r.XMM_S(2) = float32_sub(s->XMM_S(0), s->XMM_S(1), &env->sse_status); - r.XMM_S(3) = float32_sub(s->XMM_S(2), s->XMM_S(3), &env->sse_status); + r.ZMM_S(0) = float32_sub(d->ZMM_S(0), d->ZMM_S(1), &env->sse_status); + r.ZMM_S(1) = float32_sub(d->ZMM_S(2), d->ZMM_S(3), &env->sse_status); + r.ZMM_S(2) = float32_sub(s->ZMM_S(0), s->ZMM_S(1), &env->sse_status); + r.ZMM_S(3) = float32_sub(s->ZMM_S(2), s->ZMM_S(3), &env->sse_status); *d = r; } -void helper_hsubpd(CPUX86State *env, XMMReg *d, XMMReg *s) +void helper_hsubpd(CPUX86State *env, ZMMReg *d, ZMMReg *s) { - XMMReg r; + ZMMReg r; - r.XMM_D(0) = float64_sub(d->XMM_D(0), d->XMM_D(1), &env->sse_status); - r.XMM_D(1) = float64_sub(s->XMM_D(0), s->XMM_D(1), &env->sse_status); + r.ZMM_D(0) = float64_sub(d->ZMM_D(0), d->ZMM_D(1), &env->sse_status); + r.ZMM_D(1) = float64_sub(s->ZMM_D(0), s->ZMM_D(1), &env->sse_status); *d = r; } -void helper_addsubps(CPUX86State *env, XMMReg *d, XMMReg *s) +void helper_addsubps(CPUX86State *env, ZMMReg *d, ZMMReg *s) { - d->XMM_S(0) = float32_sub(d->XMM_S(0), s->XMM_S(0), &env->sse_status); - d->XMM_S(1) = float32_add(d->XMM_S(1), s->XMM_S(1), &env->sse_status); - d->XMM_S(2) = float32_sub(d->XMM_S(2), s->XMM_S(2), &env->sse_status); - d->XMM_S(3) = float32_add(d->XMM_S(3), s->XMM_S(3), &env->sse_status); + d->ZMM_S(0) = float32_sub(d->ZMM_S(0), s->ZMM_S(0), &env->sse_status); + d->ZMM_S(1) = float32_add(d->ZMM_S(1), s->ZMM_S(1), &env->sse_status); + d->ZMM_S(2) = float32_sub(d->ZMM_S(2), s->ZMM_S(2), &env->sse_status); + d->ZMM_S(3) = float32_add(d->ZMM_S(3), s->ZMM_S(3), &env->sse_status); } -void helper_addsubpd(CPUX86State *env, XMMReg *d, XMMReg *s) +void helper_addsubpd(CPUX86State *env, ZMMReg *d, ZMMReg *s) { - d->XMM_D(0) = float64_sub(d->XMM_D(0), s->XMM_D(0), &env->sse_status); - d->XMM_D(1) = float64_add(d->XMM_D(1), s->XMM_D(1), &env->sse_status); + d->ZMM_D(0) = float64_sub(d->ZMM_D(0), s->ZMM_D(0), &env->sse_status); + d->ZMM_D(1) = float64_add(d->ZMM_D(1), s->ZMM_D(1), &env->sse_status); } /* XXX: unordered */ #define SSE_HELPER_CMP(name, F) \ void helper_ ## name ## ps(CPUX86State *env, Reg *d, Reg *s) \ { \ - d->XMM_L(0) = F(32, d->XMM_S(0), s->XMM_S(0)); \ - d->XMM_L(1) = F(32, d->XMM_S(1), s->XMM_S(1)); \ - d->XMM_L(2) = F(32, d->XMM_S(2), s->XMM_S(2)); \ - d->XMM_L(3) = F(32, d->XMM_S(3), s->XMM_S(3)); \ + d->ZMM_L(0) = F(32, d->ZMM_S(0), s->ZMM_S(0)); \ + d->ZMM_L(1) = F(32, d->ZMM_S(1), s->ZMM_S(1)); \ + d->ZMM_L(2) = F(32, d->ZMM_S(2), s->ZMM_S(2)); \ + d->ZMM_L(3) = F(32, d->ZMM_S(3), s->ZMM_S(3)); \ } \ \ void helper_ ## name ## ss(CPUX86State *env, Reg *d, Reg *s) \ { \ - d->XMM_L(0) = F(32, d->XMM_S(0), s->XMM_S(0)); \ + d->ZMM_L(0) = F(32, d->ZMM_S(0), s->ZMM_S(0)); \ } \ \ void helper_ ## name ## pd(CPUX86State *env, Reg *d, Reg *s) \ { \ - d->XMM_Q(0) = F(64, d->XMM_D(0), s->XMM_D(0)); \ - d->XMM_Q(1) = F(64, d->XMM_D(1), s->XMM_D(1)); \ + d->ZMM_Q(0) = F(64, d->ZMM_D(0), s->ZMM_D(0)); \ + d->ZMM_Q(1) = F(64, d->ZMM_D(1), s->ZMM_D(1)); \ } \ \ void helper_ ## name ## sd(CPUX86State *env, Reg *d, Reg *s) \ { \ - d->XMM_Q(0) = F(64, d->XMM_D(0), s->XMM_D(0)); \ + d->ZMM_Q(0) = F(64, d->ZMM_D(0), s->ZMM_D(0)); \ } #define FPU_CMPEQ(size, a, b) \ @@ -1002,8 +1002,8 @@ void helper_ucomiss(CPUX86State *env, Reg *d, Reg *s) int ret; float32 s0, s1; - s0 = d->XMM_S(0); - s1 = s->XMM_S(0); + s0 = d->ZMM_S(0); + s1 = s->ZMM_S(0); ret = float32_compare_quiet(s0, s1, &env->sse_status); CC_SRC = comis_eflags[ret + 1]; } @@ -1013,8 +1013,8 @@ void helper_comiss(CPUX86State *env, Reg *d, Reg *s) int ret; float32 s0, s1; - s0 = d->XMM_S(0); - s1 = s->XMM_S(0); + s0 = d->ZMM_S(0); + s1 = s->ZMM_S(0); ret = float32_compare(s0, s1, &env->sse_status); CC_SRC = comis_eflags[ret + 1]; } @@ -1024,8 +1024,8 @@ void helper_ucomisd(CPUX86State *env, Reg *d, Reg *s) int ret; float64 d0, d1; - d0 = d->XMM_D(0); - d1 = s->XMM_D(0); + d0 = d->ZMM_D(0); + d1 = s->ZMM_D(0); ret = float64_compare_quiet(d0, d1, &env->sse_status); CC_SRC = comis_eflags[ret + 1]; } @@ -1035,8 +1035,8 @@ void helper_comisd(CPUX86State *env, Reg *d, Reg *s) int ret; float64 d0, d1; - d0 = d->XMM_D(0); - d1 = s->XMM_D(0); + d0 = d->ZMM_D(0); + d1 = s->ZMM_D(0); ret = float64_compare(d0, d1, &env->sse_status); CC_SRC = comis_eflags[ret + 1]; } @@ -1045,10 +1045,10 @@ uint32_t helper_movmskps(CPUX86State *env, Reg *s) { int b0, b1, b2, b3; - b0 = s->XMM_L(0) >> 31; - b1 = s->XMM_L(1) >> 31; - b2 = s->XMM_L(2) >> 31; - b3 = s->XMM_L(3) >> 31; + b0 = s->ZMM_L(0) >> 31; + b1 = s->ZMM_L(1) >> 31; + b2 = s->ZMM_L(2) >> 31; + b3 = s->ZMM_L(3) >> 31; return b0 | (b1 << 1) | (b2 << 2) | (b3 << 3); } @@ -1056,8 +1056,8 @@ uint32_t helper_movmskpd(CPUX86State *env, Reg *s) { int b0, b1; - b0 = s->XMM_L(1) >> 31; - b1 = s->XMM_L(3) >> 31; + b0 = s->ZMM_L(1) >> 31; + b1 = s->ZMM_L(3) >> 31; return b0 | (b1 << 1); } @@ -1736,10 +1736,10 @@ void glue(helper_roundps, SUFFIX)(CPUX86State *env, Reg *d, Reg *s, } } - d->XMM_S(0) = float32_round_to_int(s->XMM_S(0), &env->sse_status); - d->XMM_S(1) = float32_round_to_int(s->XMM_S(1), &env->sse_status); - d->XMM_S(2) = float32_round_to_int(s->XMM_S(2), &env->sse_status); - d->XMM_S(3) = float32_round_to_int(s->XMM_S(3), &env->sse_status); + d->ZMM_S(0) = float32_round_to_int(s->ZMM_S(0), &env->sse_status); + d->ZMM_S(1) = float32_round_to_int(s->ZMM_S(1), &env->sse_status); + d->ZMM_S(2) = float32_round_to_int(s->ZMM_S(2), &env->sse_status); + d->ZMM_S(3) = float32_round_to_int(s->ZMM_S(3), &env->sse_status); #if 0 /* TODO */ if (mode & (1 << 3)) { @@ -1774,8 +1774,8 @@ void glue(helper_roundpd, SUFFIX)(CPUX86State *env, Reg *d, Reg *s, } } - d->XMM_D(0) = float64_round_to_int(s->XMM_D(0), &env->sse_status); - d->XMM_D(1) = float64_round_to_int(s->XMM_D(1), &env->sse_status); + d->ZMM_D(0) = float64_round_to_int(s->ZMM_D(0), &env->sse_status); + d->ZMM_D(1) = float64_round_to_int(s->ZMM_D(1), &env->sse_status); #if 0 /* TODO */ if (mode & (1 << 3)) { @@ -1810,7 +1810,7 @@ void glue(helper_roundss, SUFFIX)(CPUX86State *env, Reg *d, Reg *s, } } - d->XMM_S(0) = float32_round_to_int(s->XMM_S(0), &env->sse_status); + d->ZMM_S(0) = float32_round_to_int(s->ZMM_S(0), &env->sse_status); #if 0 /* TODO */ if (mode & (1 << 3)) { @@ -1845,7 +1845,7 @@ void glue(helper_roundsd, SUFFIX)(CPUX86State *env, Reg *d, Reg *s, } } - d->XMM_D(0) = float64_round_to_int(s->XMM_D(0), &env->sse_status); + d->ZMM_D(0) = float64_round_to_int(s->ZMM_D(0), &env->sse_status); #if 0 /* TODO */ if (mode & (1 << 3)) { @@ -1868,32 +1868,32 @@ void glue(helper_dpps, SUFFIX)(CPUX86State *env, Reg *d, Reg *s, uint32_t mask) if (mask & (1 << 4)) { iresult = float32_add(iresult, - float32_mul(d->XMM_S(0), s->XMM_S(0), + float32_mul(d->ZMM_S(0), s->ZMM_S(0), &env->sse_status), &env->sse_status); } if (mask & (1 << 5)) { iresult = float32_add(iresult, - float32_mul(d->XMM_S(1), s->XMM_S(1), + float32_mul(d->ZMM_S(1), s->ZMM_S(1), &env->sse_status), &env->sse_status); } if (mask & (1 << 6)) { iresult = float32_add(iresult, - float32_mul(d->XMM_S(2), s->XMM_S(2), + float32_mul(d->ZMM_S(2), s->ZMM_S(2), &env->sse_status), &env->sse_status); } if (mask & (1 << 7)) { iresult = float32_add(iresult, - float32_mul(d->XMM_S(3), s->XMM_S(3), + float32_mul(d->ZMM_S(3), s->ZMM_S(3), &env->sse_status), &env->sse_status); } - d->XMM_S(0) = (mask & (1 << 0)) ? iresult : float32_zero; - d->XMM_S(1) = (mask & (1 << 1)) ? iresult : float32_zero; - d->XMM_S(2) = (mask & (1 << 2)) ? iresult : float32_zero; - d->XMM_S(3) = (mask & (1 << 3)) ? iresult : float32_zero; + d->ZMM_S(0) = (mask & (1 << 0)) ? iresult : float32_zero; + d->ZMM_S(1) = (mask & (1 << 1)) ? iresult : float32_zero; + d->ZMM_S(2) = (mask & (1 << 2)) ? iresult : float32_zero; + d->ZMM_S(3) = (mask & (1 << 3)) ? iresult : float32_zero; } void glue(helper_dppd, SUFFIX)(CPUX86State *env, Reg *d, Reg *s, uint32_t mask) @@ -1902,18 +1902,18 @@ void glue(helper_dppd, SUFFIX)(CPUX86State *env, Reg *d, Reg *s, uint32_t mask) if (mask & (1 << 4)) { iresult = float64_add(iresult, - float64_mul(d->XMM_D(0), s->XMM_D(0), + float64_mul(d->ZMM_D(0), s->ZMM_D(0), &env->sse_status), &env->sse_status); } if (mask & (1 << 5)) { iresult = float64_add(iresult, - float64_mul(d->XMM_D(1), s->XMM_D(1), + float64_mul(d->ZMM_D(1), s->ZMM_D(1), &env->sse_status), &env->sse_status); } - d->XMM_D(0) = (mask & (1 << 0)) ? iresult : float64_zero; - d->XMM_D(1) = (mask & (1 << 1)) ? iresult : float64_zero; + d->ZMM_D(0) = (mask & (1 << 0)) ? iresult : float64_zero; + d->ZMM_D(1) = (mask & (1 << 1)) ? iresult : float64_zero; } void glue(helper_mpsadbw, SUFFIX)(CPUX86State *env, Reg *d, Reg *s, diff --git a/target-i386/ops_sse_header.h b/target-i386/ops_sse_header.h index a68c7cc..64c5857 100644 --- a/target-i386/ops_sse_header.h +++ b/target-i386/ops_sse_header.h @@ -20,18 +20,18 @@ #define Reg MMXReg #define SUFFIX _mmx #else -#define Reg XMMReg +#define Reg ZMMReg #define SUFFIX _xmm #endif #define dh_alias_Reg ptr -#define dh_alias_XMMReg ptr +#define dh_alias_ZMMReg ptr #define dh_alias_MMXReg ptr #define dh_ctype_Reg Reg * -#define dh_ctype_XMMReg XMMReg * +#define dh_ctype_ZMMReg ZMMReg * #define dh_ctype_MMXReg MMXReg * #define dh_is_signed_Reg dh_is_signed_ptr -#define dh_is_signed_XMMReg dh_is_signed_ptr +#define dh_is_signed_ZMMReg dh_is_signed_ptr #define dh_is_signed_MMXReg dh_is_signed_ptr DEF_HELPER_3(glue(psrlw, SUFFIX), void, env, Reg, Reg) @@ -154,52 +154,52 @@ DEF_HELPER_3(cvtss2sd, void, env, Reg, Reg) DEF_HELPER_3(cvtsd2ss, void, env, Reg, Reg) DEF_HELPER_3(cvtdq2ps, void, env, Reg, Reg) DEF_HELPER_3(cvtdq2pd, void, env, Reg, Reg) -DEF_HELPER_3(cvtpi2ps, void, env, XMMReg, MMXReg) -DEF_HELPER_3(cvtpi2pd, void, env, XMMReg, MMXReg) -DEF_HELPER_3(cvtsi2ss, void, env, XMMReg, i32) -DEF_HELPER_3(cvtsi2sd, void, env, XMMReg, i32) +DEF_HELPER_3(cvtpi2ps, void, env, ZMMReg, MMXReg) +DEF_HELPER_3(cvtpi2pd, void, env, ZMMReg, MMXReg) +DEF_HELPER_3(cvtsi2ss, void, env, ZMMReg, i32) +DEF_HELPER_3(cvtsi2sd, void, env, ZMMReg, i32) #ifdef TARGET_X86_64 -DEF_HELPER_3(cvtsq2ss, void, env, XMMReg, i64) -DEF_HELPER_3(cvtsq2sd, void, env, XMMReg, i64) +DEF_HELPER_3(cvtsq2ss, void, env, ZMMReg, i64) +DEF_HELPER_3(cvtsq2sd, void, env, ZMMReg, i64) #endif -DEF_HELPER_3(cvtps2dq, void, env, XMMReg, XMMReg) -DEF_HELPER_3(cvtpd2dq, void, env, XMMReg, XMMReg) -DEF_HELPER_3(cvtps2pi, void, env, MMXReg, XMMReg) -DEF_HELPER_3(cvtpd2pi, void, env, MMXReg, XMMReg) -DEF_HELPER_2(cvtss2si, s32, env, XMMReg) -DEF_HELPER_2(cvtsd2si, s32, env, XMMReg) +DEF_HELPER_3(cvtps2dq, void, env, ZMMReg, ZMMReg) +DEF_HELPER_3(cvtpd2dq, void, env, ZMMReg, ZMMReg) +DEF_HELPER_3(cvtps2pi, void, env, MMXReg, ZMMReg) +DEF_HELPER_3(cvtpd2pi, void, env, MMXReg, ZMMReg) +DEF_HELPER_2(cvtss2si, s32, env, ZMMReg) +DEF_HELPER_2(cvtsd2si, s32, env, ZMMReg) #ifdef TARGET_X86_64 -DEF_HELPER_2(cvtss2sq, s64, env, XMMReg) -DEF_HELPER_2(cvtsd2sq, s64, env, XMMReg) +DEF_HELPER_2(cvtss2sq, s64, env, ZMMReg) +DEF_HELPER_2(cvtsd2sq, s64, env, ZMMReg) #endif -DEF_HELPER_3(cvttps2dq, void, env, XMMReg, XMMReg) -DEF_HELPER_3(cvttpd2dq, void, env, XMMReg, XMMReg) -DEF_HELPER_3(cvttps2pi, void, env, MMXReg, XMMReg) -DEF_HELPER_3(cvttpd2pi, void, env, MMXReg, XMMReg) -DEF_HELPER_2(cvttss2si, s32, env, XMMReg) -DEF_HELPER_2(cvttsd2si, s32, env, XMMReg) +DEF_HELPER_3(cvttps2dq, void, env, ZMMReg, ZMMReg) +DEF_HELPER_3(cvttpd2dq, void, env, ZMMReg, ZMMReg) +DEF_HELPER_3(cvttps2pi, void, env, MMXReg, ZMMReg) +DEF_HELPER_3(cvttpd2pi, void, env, MMXReg, ZMMReg) +DEF_HELPER_2(cvttss2si, s32, env, ZMMReg) +DEF_HELPER_2(cvttsd2si, s32, env, ZMMReg) #ifdef TARGET_X86_64 -DEF_HELPER_2(cvttss2sq, s64, env, XMMReg) -DEF_HELPER_2(cvttsd2sq, s64, env, XMMReg) +DEF_HELPER_2(cvttss2sq, s64, env, ZMMReg) +DEF_HELPER_2(cvttsd2sq, s64, env, ZMMReg) #endif -DEF_HELPER_3(rsqrtps, void, env, XMMReg, XMMReg) -DEF_HELPER_3(rsqrtss, void, env, XMMReg, XMMReg) -DEF_HELPER_3(rcpps, void, env, XMMReg, XMMReg) -DEF_HELPER_3(rcpss, void, env, XMMReg, XMMReg) -DEF_HELPER_3(extrq_r, void, env, XMMReg, XMMReg) -DEF_HELPER_4(extrq_i, void, env, XMMReg, int, int) -DEF_HELPER_3(insertq_r, void, env, XMMReg, XMMReg) -DEF_HELPER_4(insertq_i, void, env, XMMReg, int, int) -DEF_HELPER_3(haddps, void, env, XMMReg, XMMReg) -DEF_HELPER_3(haddpd, void, env, XMMReg, XMMReg) -DEF_HELPER_3(hsubps, void, env, XMMReg, XMMReg) -DEF_HELPER_3(hsubpd, void, env, XMMReg, XMMReg) -DEF_HELPER_3(addsubps, void, env, XMMReg, XMMReg) -DEF_HELPER_3(addsubpd, void, env, XMMReg, XMMReg) +DEF_HELPER_3(rsqrtps, void, env, ZMMReg, ZMMReg) +DEF_HELPER_3(rsqrtss, void, env, ZMMReg, ZMMReg) +DEF_HELPER_3(rcpps, void, env, ZMMReg, ZMMReg) +DEF_HELPER_3(rcpss, void, env, ZMMReg, ZMMReg) +DEF_HELPER_3(extrq_r, void, env, ZMMReg, ZMMReg) +DEF_HELPER_4(extrq_i, void, env, ZMMReg, int, int) +DEF_HELPER_3(insertq_r, void, env, ZMMReg, ZMMReg) +DEF_HELPER_4(insertq_i, void, env, ZMMReg, int, int) +DEF_HELPER_3(haddps, void, env, ZMMReg, ZMMReg) +DEF_HELPER_3(haddpd, void, env, ZMMReg, ZMMReg) +DEF_HELPER_3(hsubps, void, env, ZMMReg, ZMMReg) +DEF_HELPER_3(hsubpd, void, env, ZMMReg, ZMMReg) +DEF_HELPER_3(addsubps, void, env, ZMMReg, ZMMReg) +DEF_HELPER_3(addsubpd, void, env, ZMMReg, ZMMReg) #define SSE_HELPER_CMP(name, F) \ DEF_HELPER_3(name ## ps, void, env, Reg, Reg) \ diff --git a/target-i386/translate.c b/target-i386/translate.c index a3dd167..8ce0fcc 100644 --- a/target-i386/translate.c +++ b/target-i386/translate.c @@ -2602,28 +2602,28 @@ static inline void gen_ldo_env_A0(DisasContext *s, int offset) { int mem_index = s->mem_index; tcg_gen_qemu_ld_i64(cpu_tmp1_i64, cpu_A0, mem_index, MO_LEQ); - tcg_gen_st_i64(cpu_tmp1_i64, cpu_env, offset + offsetof(XMMReg, XMM_Q(0))); + tcg_gen_st_i64(cpu_tmp1_i64, cpu_env, offset + offsetof(ZMMReg, ZMM_Q(0))); tcg_gen_addi_tl(cpu_tmp0, cpu_A0, 8); tcg_gen_qemu_ld_i64(cpu_tmp1_i64, cpu_tmp0, mem_index, MO_LEQ); - tcg_gen_st_i64(cpu_tmp1_i64, cpu_env, offset + offsetof(XMMReg, XMM_Q(1))); + tcg_gen_st_i64(cpu_tmp1_i64, cpu_env, offset + offsetof(ZMMReg, ZMM_Q(1))); } static inline void gen_sto_env_A0(DisasContext *s, int offset) { int mem_index = s->mem_index; - tcg_gen_ld_i64(cpu_tmp1_i64, cpu_env, offset + offsetof(XMMReg, XMM_Q(0))); + tcg_gen_ld_i64(cpu_tmp1_i64, cpu_env, offset + offsetof(ZMMReg, ZMM_Q(0))); tcg_gen_qemu_st_i64(cpu_tmp1_i64, cpu_A0, mem_index, MO_LEQ); tcg_gen_addi_tl(cpu_tmp0, cpu_A0, 8); - tcg_gen_ld_i64(cpu_tmp1_i64, cpu_env, offset + offsetof(XMMReg, XMM_Q(1))); + tcg_gen_ld_i64(cpu_tmp1_i64, cpu_env, offset + offsetof(ZMMReg, ZMM_Q(1))); tcg_gen_qemu_st_i64(cpu_tmp1_i64, cpu_tmp0, mem_index, MO_LEQ); } static inline void gen_op_movo(int d_offset, int s_offset) { - tcg_gen_ld_i64(cpu_tmp1_i64, cpu_env, s_offset + offsetof(XMMReg, XMM_Q(0))); - tcg_gen_st_i64(cpu_tmp1_i64, cpu_env, d_offset + offsetof(XMMReg, XMM_Q(0))); - tcg_gen_ld_i64(cpu_tmp1_i64, cpu_env, s_offset + offsetof(XMMReg, XMM_Q(1))); - tcg_gen_st_i64(cpu_tmp1_i64, cpu_env, d_offset + offsetof(XMMReg, XMM_Q(1))); + tcg_gen_ld_i64(cpu_tmp1_i64, cpu_env, s_offset + offsetof(ZMMReg, ZMM_Q(0))); + tcg_gen_st_i64(cpu_tmp1_i64, cpu_env, d_offset + offsetof(ZMMReg, ZMM_Q(0))); + tcg_gen_ld_i64(cpu_tmp1_i64, cpu_env, s_offset + offsetof(ZMMReg, ZMM_Q(1))); + tcg_gen_st_i64(cpu_tmp1_i64, cpu_env, d_offset + offsetof(ZMMReg, ZMM_Q(1))); } static inline void gen_op_movq(int d_offset, int s_offset) @@ -3074,10 +3074,10 @@ static void gen_sse(CPUX86State *env, DisasContext *s, int b, gen_lea_modrm(env, s, modrm); if (b1 & 1) { gen_stq_env_A0(s, offsetof(CPUX86State, - xmm_regs[reg].XMM_Q(0))); + xmm_regs[reg].ZMM_Q(0))); } else { tcg_gen_ld32u_tl(cpu_T[0], cpu_env, offsetof(CPUX86State, - xmm_regs[reg].XMM_L(0))); + xmm_regs[reg].ZMM_L(0))); gen_op_st_v(s, MO_32, cpu_T[0], cpu_A0); } break; @@ -3144,29 +3144,29 @@ static void gen_sse(CPUX86State *env, DisasContext *s, int b, if (mod != 3) { gen_lea_modrm(env, s, modrm); gen_op_ld_v(s, MO_32, cpu_T[0], cpu_A0); - tcg_gen_st32_tl(cpu_T[0], cpu_env, offsetof(CPUX86State,xmm_regs[reg].XMM_L(0))); + tcg_gen_st32_tl(cpu_T[0], cpu_env, offsetof(CPUX86State,xmm_regs[reg].ZMM_L(0))); tcg_gen_movi_tl(cpu_T[0], 0); - tcg_gen_st32_tl(cpu_T[0], cpu_env, offsetof(CPUX86State,xmm_regs[reg].XMM_L(1))); - tcg_gen_st32_tl(cpu_T[0], cpu_env, offsetof(CPUX86State,xmm_regs[reg].XMM_L(2))); - tcg_gen_st32_tl(cpu_T[0], cpu_env, offsetof(CPUX86State,xmm_regs[reg].XMM_L(3))); + tcg_gen_st32_tl(cpu_T[0], cpu_env, offsetof(CPUX86State,xmm_regs[reg].ZMM_L(1))); + tcg_gen_st32_tl(cpu_T[0], cpu_env, offsetof(CPUX86State,xmm_regs[reg].ZMM_L(2))); + tcg_gen_st32_tl(cpu_T[0], cpu_env, offsetof(CPUX86State,xmm_regs[reg].ZMM_L(3))); } else { rm = (modrm & 7) | REX_B(s); - gen_op_movl(offsetof(CPUX86State,xmm_regs[reg].XMM_L(0)), - offsetof(CPUX86State,xmm_regs[rm].XMM_L(0))); + gen_op_movl(offsetof(CPUX86State,xmm_regs[reg].ZMM_L(0)), + offsetof(CPUX86State,xmm_regs[rm].ZMM_L(0))); } break; case 0x310: /* movsd xmm, ea */ if (mod != 3) { gen_lea_modrm(env, s, modrm); gen_ldq_env_A0(s, offsetof(CPUX86State, - xmm_regs[reg].XMM_Q(0))); + xmm_regs[reg].ZMM_Q(0))); tcg_gen_movi_tl(cpu_T[0], 0); - tcg_gen_st32_tl(cpu_T[0], cpu_env, offsetof(CPUX86State,xmm_regs[reg].XMM_L(2))); - tcg_gen_st32_tl(cpu_T[0], cpu_env, offsetof(CPUX86State,xmm_regs[reg].XMM_L(3))); + tcg_gen_st32_tl(cpu_T[0], cpu_env, offsetof(CPUX86State,xmm_regs[reg].ZMM_L(2))); + tcg_gen_st32_tl(cpu_T[0], cpu_env, offsetof(CPUX86State,xmm_regs[reg].ZMM_L(3))); } else { rm = (modrm & 7) | REX_B(s); - gen_op_movq(offsetof(CPUX86State,xmm_regs[reg].XMM_Q(0)), - offsetof(CPUX86State,xmm_regs[rm].XMM_Q(0))); + gen_op_movq(offsetof(CPUX86State,xmm_regs[reg].ZMM_Q(0)), + offsetof(CPUX86State,xmm_regs[rm].ZMM_Q(0))); } break; case 0x012: /* movlps */ @@ -3174,12 +3174,12 @@ static void gen_sse(CPUX86State *env, DisasContext *s, int b, if (mod != 3) { gen_lea_modrm(env, s, modrm); gen_ldq_env_A0(s, offsetof(CPUX86State, - xmm_regs[reg].XMM_Q(0))); + xmm_regs[reg].ZMM_Q(0))); } else { /* movhlps */ rm = (modrm & 7) | REX_B(s); - gen_op_movq(offsetof(CPUX86State,xmm_regs[reg].XMM_Q(0)), - offsetof(CPUX86State,xmm_regs[rm].XMM_Q(1))); + gen_op_movq(offsetof(CPUX86State,xmm_regs[reg].ZMM_Q(0)), + offsetof(CPUX86State,xmm_regs[rm].ZMM_Q(1))); } break; case 0x212: /* movsldup */ @@ -3188,40 +3188,40 @@ static void gen_sse(CPUX86State *env, DisasContext *s, int b, gen_ldo_env_A0(s, offsetof(CPUX86State, xmm_regs[reg])); } else { rm = (modrm & 7) | REX_B(s); - gen_op_movl(offsetof(CPUX86State,xmm_regs[reg].XMM_L(0)), - offsetof(CPUX86State,xmm_regs[rm].XMM_L(0))); - gen_op_movl(offsetof(CPUX86State,xmm_regs[reg].XMM_L(2)), - offsetof(CPUX86State,xmm_regs[rm].XMM_L(2))); + gen_op_movl(offsetof(CPUX86State,xmm_regs[reg].ZMM_L(0)), + offsetof(CPUX86State,xmm_regs[rm].ZMM_L(0))); + gen_op_movl(offsetof(CPUX86State,xmm_regs[reg].ZMM_L(2)), + offsetof(CPUX86State,xmm_regs[rm].ZMM_L(2))); } - gen_op_movl(offsetof(CPUX86State,xmm_regs[reg].XMM_L(1)), - offsetof(CPUX86State,xmm_regs[reg].XMM_L(0))); - gen_op_movl(offsetof(CPUX86State,xmm_regs[reg].XMM_L(3)), - offsetof(CPUX86State,xmm_regs[reg].XMM_L(2))); + gen_op_movl(offsetof(CPUX86State,xmm_regs[reg].ZMM_L(1)), + offsetof(CPUX86State,xmm_regs[reg].ZMM_L(0))); + gen_op_movl(offsetof(CPUX86State,xmm_regs[reg].ZMM_L(3)), + offsetof(CPUX86State,xmm_regs[reg].ZMM_L(2))); break; case 0x312: /* movddup */ if (mod != 3) { gen_lea_modrm(env, s, modrm); gen_ldq_env_A0(s, offsetof(CPUX86State, - xmm_regs[reg].XMM_Q(0))); + xmm_regs[reg].ZMM_Q(0))); } else { rm = (modrm & 7) | REX_B(s); - gen_op_movq(offsetof(CPUX86State,xmm_regs[reg].XMM_Q(0)), - offsetof(CPUX86State,xmm_regs[rm].XMM_Q(0))); + gen_op_movq(offsetof(CPUX86State,xmm_regs[reg].ZMM_Q(0)), + offsetof(CPUX86State,xmm_regs[rm].ZMM_Q(0))); } - gen_op_movq(offsetof(CPUX86State,xmm_regs[reg].XMM_Q(1)), - offsetof(CPUX86State,xmm_regs[reg].XMM_Q(0))); + gen_op_movq(offsetof(CPUX86State,xmm_regs[reg].ZMM_Q(1)), + offsetof(CPUX86State,xmm_regs[reg].ZMM_Q(0))); break; case 0x016: /* movhps */ case 0x116: /* movhpd */ if (mod != 3) { gen_lea_modrm(env, s, modrm); gen_ldq_env_A0(s, offsetof(CPUX86State, - xmm_regs[reg].XMM_Q(1))); + xmm_regs[reg].ZMM_Q(1))); } else { /* movlhps */ rm = (modrm & 7) | REX_B(s); - gen_op_movq(offsetof(CPUX86State,xmm_regs[reg].XMM_Q(1)), - offsetof(CPUX86State,xmm_regs[rm].XMM_Q(0))); + gen_op_movq(offsetof(CPUX86State,xmm_regs[reg].ZMM_Q(1)), + offsetof(CPUX86State,xmm_regs[rm].ZMM_Q(0))); } break; case 0x216: /* movshdup */ @@ -3230,15 +3230,15 @@ static void gen_sse(CPUX86State *env, DisasContext *s, int b, gen_ldo_env_A0(s, offsetof(CPUX86State, xmm_regs[reg])); } else { rm = (modrm & 7) | REX_B(s); - gen_op_movl(offsetof(CPUX86State,xmm_regs[reg].XMM_L(1)), - offsetof(CPUX86State,xmm_regs[rm].XMM_L(1))); - gen_op_movl(offsetof(CPUX86State,xmm_regs[reg].XMM_L(3)), - offsetof(CPUX86State,xmm_regs[rm].XMM_L(3))); + gen_op_movl(offsetof(CPUX86State,xmm_regs[reg].ZMM_L(1)), + offsetof(CPUX86State,xmm_regs[rm].ZMM_L(1))); + gen_op_movl(offsetof(CPUX86State,xmm_regs[reg].ZMM_L(3)), + offsetof(CPUX86State,xmm_regs[rm].ZMM_L(3))); } - gen_op_movl(offsetof(CPUX86State,xmm_regs[reg].XMM_L(0)), - offsetof(CPUX86State,xmm_regs[reg].XMM_L(1))); - gen_op_movl(offsetof(CPUX86State,xmm_regs[reg].XMM_L(2)), - offsetof(CPUX86State,xmm_regs[reg].XMM_L(3))); + gen_op_movl(offsetof(CPUX86State,xmm_regs[reg].ZMM_L(0)), + offsetof(CPUX86State,xmm_regs[reg].ZMM_L(1))); + gen_op_movl(offsetof(CPUX86State,xmm_regs[reg].ZMM_L(2)), + offsetof(CPUX86State,xmm_regs[reg].ZMM_L(3))); break; case 0x178: case 0x378: @@ -3279,13 +3279,13 @@ static void gen_sse(CPUX86State *env, DisasContext *s, int b, #ifdef TARGET_X86_64 if (s->dflag == MO_64) { tcg_gen_ld_i64(cpu_T[0], cpu_env, - offsetof(CPUX86State,xmm_regs[reg].XMM_Q(0))); + offsetof(CPUX86State,xmm_regs[reg].ZMM_Q(0))); gen_ldst_modrm(env, s, modrm, MO_64, OR_TMP0, 1); } else #endif { tcg_gen_ld32u_tl(cpu_T[0], cpu_env, - offsetof(CPUX86State,xmm_regs[reg].XMM_L(0))); + offsetof(CPUX86State,xmm_regs[reg].ZMM_L(0))); gen_ldst_modrm(env, s, modrm, MO_32, OR_TMP0, 1); } break; @@ -3293,13 +3293,13 @@ static void gen_sse(CPUX86State *env, DisasContext *s, int b, if (mod != 3) { gen_lea_modrm(env, s, modrm); gen_ldq_env_A0(s, offsetof(CPUX86State, - xmm_regs[reg].XMM_Q(0))); + xmm_regs[reg].ZMM_Q(0))); } else { rm = (modrm & 7) | REX_B(s); - gen_op_movq(offsetof(CPUX86State,xmm_regs[reg].XMM_Q(0)), - offsetof(CPUX86State,xmm_regs[rm].XMM_Q(0))); + gen_op_movq(offsetof(CPUX86State,xmm_regs[reg].ZMM_Q(0)), + offsetof(CPUX86State,xmm_regs[rm].ZMM_Q(0))); } - gen_op_movq_env_0(offsetof(CPUX86State,xmm_regs[reg].XMM_Q(1))); + gen_op_movq_env_0(offsetof(CPUX86State,xmm_regs[reg].ZMM_Q(1))); break; case 0x7f: /* movq ea, mm */ if (mod != 3) { @@ -3329,23 +3329,23 @@ static void gen_sse(CPUX86State *env, DisasContext *s, int b, case 0x211: /* movss ea, xmm */ if (mod != 3) { gen_lea_modrm(env, s, modrm); - tcg_gen_ld32u_tl(cpu_T[0], cpu_env, offsetof(CPUX86State,xmm_regs[reg].XMM_L(0))); + tcg_gen_ld32u_tl(cpu_T[0], cpu_env, offsetof(CPUX86State,xmm_regs[reg].ZMM_L(0))); gen_op_st_v(s, MO_32, cpu_T[0], cpu_A0); } else { rm = (modrm & 7) | REX_B(s); - gen_op_movl(offsetof(CPUX86State,xmm_regs[rm].XMM_L(0)), - offsetof(CPUX86State,xmm_regs[reg].XMM_L(0))); + gen_op_movl(offsetof(CPUX86State,xmm_regs[rm].ZMM_L(0)), + offsetof(CPUX86State,xmm_regs[reg].ZMM_L(0))); } break; case 0x311: /* movsd ea, xmm */ if (mod != 3) { gen_lea_modrm(env, s, modrm); gen_stq_env_A0(s, offsetof(CPUX86State, - xmm_regs[reg].XMM_Q(0))); + xmm_regs[reg].ZMM_Q(0))); } else { rm = (modrm & 7) | REX_B(s); - gen_op_movq(offsetof(CPUX86State,xmm_regs[rm].XMM_Q(0)), - offsetof(CPUX86State,xmm_regs[reg].XMM_Q(0))); + gen_op_movq(offsetof(CPUX86State,xmm_regs[rm].ZMM_Q(0)), + offsetof(CPUX86State,xmm_regs[reg].ZMM_Q(0))); } break; case 0x013: /* movlps */ @@ -3353,7 +3353,7 @@ static void gen_sse(CPUX86State *env, DisasContext *s, int b, if (mod != 3) { gen_lea_modrm(env, s, modrm); gen_stq_env_A0(s, offsetof(CPUX86State, - xmm_regs[reg].XMM_Q(0))); + xmm_regs[reg].ZMM_Q(0))); } else { goto illegal_op; } @@ -3363,7 +3363,7 @@ static void gen_sse(CPUX86State *env, DisasContext *s, int b, if (mod != 3) { gen_lea_modrm(env, s, modrm); gen_stq_env_A0(s, offsetof(CPUX86State, - xmm_regs[reg].XMM_Q(1))); + xmm_regs[reg].ZMM_Q(1))); } else { goto illegal_op; } @@ -3380,9 +3380,9 @@ static void gen_sse(CPUX86State *env, DisasContext *s, int b, val = cpu_ldub_code(env, s->pc++); if (is_xmm) { tcg_gen_movi_tl(cpu_T[0], val); - tcg_gen_st32_tl(cpu_T[0], cpu_env, offsetof(CPUX86State,xmm_t0.XMM_L(0))); + tcg_gen_st32_tl(cpu_T[0], cpu_env, offsetof(CPUX86State,xmm_t0.ZMM_L(0))); tcg_gen_movi_tl(cpu_T[0], 0); - tcg_gen_st32_tl(cpu_T[0], cpu_env, offsetof(CPUX86State,xmm_t0.XMM_L(1))); + tcg_gen_st32_tl(cpu_T[0], cpu_env, offsetof(CPUX86State,xmm_t0.ZMM_L(1))); op1_offset = offsetof(CPUX86State,xmm_t0); } else { tcg_gen_movi_tl(cpu_T[0], val); @@ -3503,10 +3503,10 @@ static void gen_sse(CPUX86State *env, DisasContext *s, int b, if (mod != 3) { gen_lea_modrm(env, s, modrm); if ((b >> 8) & 1) { - gen_ldq_env_A0(s, offsetof(CPUX86State, xmm_t0.XMM_Q(0))); + gen_ldq_env_A0(s, offsetof(CPUX86State, xmm_t0.ZMM_Q(0))); } else { gen_op_ld_v(s, MO_32, cpu_T[0], cpu_A0); - tcg_gen_st32_tl(cpu_T[0], cpu_env, offsetof(CPUX86State,xmm_t0.XMM_L(0))); + tcg_gen_st32_tl(cpu_T[0], cpu_env, offsetof(CPUX86State,xmm_t0.ZMM_L(0))); } op2_offset = offsetof(CPUX86State,xmm_t0); } else { @@ -3538,7 +3538,7 @@ static void gen_sse(CPUX86State *env, DisasContext *s, int b, if (b1) { val &= 7; tcg_gen_st16_tl(cpu_T[0], cpu_env, - offsetof(CPUX86State,xmm_regs[reg].XMM_W(val))); + offsetof(CPUX86State,xmm_regs[reg].ZMM_W(val))); } else { val &= 3; tcg_gen_st16_tl(cpu_T[0], cpu_env, @@ -3555,7 +3555,7 @@ static void gen_sse(CPUX86State *env, DisasContext *s, int b, val &= 7; rm = (modrm & 7) | REX_B(s); tcg_gen_ld16u_tl(cpu_T[0], cpu_env, - offsetof(CPUX86State,xmm_regs[rm].XMM_W(val))); + offsetof(CPUX86State,xmm_regs[rm].ZMM_W(val))); } else { val &= 3; rm = (modrm & 7); @@ -3569,26 +3569,26 @@ static void gen_sse(CPUX86State *env, DisasContext *s, int b, if (mod != 3) { gen_lea_modrm(env, s, modrm); gen_stq_env_A0(s, offsetof(CPUX86State, - xmm_regs[reg].XMM_Q(0))); + xmm_regs[reg].ZMM_Q(0))); } else { rm = (modrm & 7) | REX_B(s); - gen_op_movq(offsetof(CPUX86State,xmm_regs[rm].XMM_Q(0)), - offsetof(CPUX86State,xmm_regs[reg].XMM_Q(0))); - gen_op_movq_env_0(offsetof(CPUX86State,xmm_regs[rm].XMM_Q(1))); + gen_op_movq(offsetof(CPUX86State,xmm_regs[rm].ZMM_Q(0)), + offsetof(CPUX86State,xmm_regs[reg].ZMM_Q(0))); + gen_op_movq_env_0(offsetof(CPUX86State,xmm_regs[rm].ZMM_Q(1))); } break; case 0x2d6: /* movq2dq */ gen_helper_enter_mmx(cpu_env); rm = (modrm & 7); - gen_op_movq(offsetof(CPUX86State,xmm_regs[reg].XMM_Q(0)), + gen_op_movq(offsetof(CPUX86State,xmm_regs[reg].ZMM_Q(0)), offsetof(CPUX86State,fpregs[rm].mmx)); - gen_op_movq_env_0(offsetof(CPUX86State,xmm_regs[reg].XMM_Q(1))); + gen_op_movq_env_0(offsetof(CPUX86State,xmm_regs[reg].ZMM_Q(1))); break; case 0x3d6: /* movdq2q */ gen_helper_enter_mmx(cpu_env); rm = (modrm & 7) | REX_B(s); gen_op_movq(offsetof(CPUX86State,fpregs[reg & 7].mmx), - offsetof(CPUX86State,xmm_regs[rm].XMM_Q(0))); + offsetof(CPUX86State,xmm_regs[rm].ZMM_Q(0))); break; case 0xd7: /* pmovmskb */ case 0x1d7: @@ -3640,20 +3640,20 @@ static void gen_sse(CPUX86State *env, DisasContext *s, int b, case 0x23: case 0x33: /* pmovsxwd, pmovzxwd */ case 0x25: case 0x35: /* pmovsxdq, pmovzxdq */ gen_ldq_env_A0(s, op2_offset + - offsetof(XMMReg, XMM_Q(0))); + offsetof(ZMMReg, ZMM_Q(0))); break; case 0x21: case 0x31: /* pmovsxbd, pmovzxbd */ case 0x24: case 0x34: /* pmovsxwq, pmovzxwq */ tcg_gen_qemu_ld_i32(cpu_tmp2_i32, cpu_A0, s->mem_index, MO_LEUL); tcg_gen_st_i32(cpu_tmp2_i32, cpu_env, op2_offset + - offsetof(XMMReg, XMM_L(0))); + offsetof(ZMMReg, ZMM_L(0))); break; case 0x22: case 0x32: /* pmovsxbq, pmovzxbq */ tcg_gen_qemu_ld_tl(cpu_tmp0, cpu_A0, s->mem_index, MO_LEUW); tcg_gen_st16_tl(cpu_tmp0, cpu_env, op2_offset + - offsetof(XMMReg, XMM_W(0))); + offsetof(ZMMReg, ZMM_W(0))); break; case 0x2a: /* movntqda */ gen_ldo_env_A0(s, op1_offset); @@ -4078,7 +4078,7 @@ static void gen_sse(CPUX86State *env, DisasContext *s, int b, switch (b) { case 0x14: /* pextrb */ tcg_gen_ld8u_tl(cpu_T[0], cpu_env, offsetof(CPUX86State, - xmm_regs[reg].XMM_B(val & 15))); + xmm_regs[reg].ZMM_B(val & 15))); if (mod == 3) { gen_op_mov_reg_v(ot, rm, cpu_T[0]); } else { @@ -4088,7 +4088,7 @@ static void gen_sse(CPUX86State *env, DisasContext *s, int b, break; case 0x15: /* pextrw */ tcg_gen_ld16u_tl(cpu_T[0], cpu_env, offsetof(CPUX86State, - xmm_regs[reg].XMM_W(val & 7))); + xmm_regs[reg].ZMM_W(val & 7))); if (mod == 3) { gen_op_mov_reg_v(ot, rm, cpu_T[0]); } else { @@ -4100,7 +4100,7 @@ static void gen_sse(CPUX86State *env, DisasContext *s, int b, if (ot == MO_32) { /* pextrd */ tcg_gen_ld_i32(cpu_tmp2_i32, cpu_env, offsetof(CPUX86State, - xmm_regs[reg].XMM_L(val & 3))); + xmm_regs[reg].ZMM_L(val & 3))); if (mod == 3) { tcg_gen_extu_i32_tl(cpu_regs[rm], cpu_tmp2_i32); } else { @@ -4111,7 +4111,7 @@ static void gen_sse(CPUX86State *env, DisasContext *s, int b, #ifdef TARGET_X86_64 tcg_gen_ld_i64(cpu_tmp1_i64, cpu_env, offsetof(CPUX86State, - xmm_regs[reg].XMM_Q(val & 1))); + xmm_regs[reg].ZMM_Q(val & 1))); if (mod == 3) { tcg_gen_mov_i64(cpu_regs[rm], cpu_tmp1_i64); } else { @@ -4125,7 +4125,7 @@ static void gen_sse(CPUX86State *env, DisasContext *s, int b, break; case 0x17: /* extractps */ tcg_gen_ld32u_tl(cpu_T[0], cpu_env, offsetof(CPUX86State, - xmm_regs[reg].XMM_L(val & 3))); + xmm_regs[reg].ZMM_L(val & 3))); if (mod == 3) { gen_op_mov_reg_v(ot, rm, cpu_T[0]); } else { @@ -4141,36 +4141,36 @@ static void gen_sse(CPUX86State *env, DisasContext *s, int b, s->mem_index, MO_UB); } tcg_gen_st8_tl(cpu_T[0], cpu_env, offsetof(CPUX86State, - xmm_regs[reg].XMM_B(val & 15))); + xmm_regs[reg].ZMM_B(val & 15))); break; case 0x21: /* insertps */ if (mod == 3) { tcg_gen_ld_i32(cpu_tmp2_i32, cpu_env, offsetof(CPUX86State,xmm_regs[rm] - .XMM_L((val >> 6) & 3))); + .ZMM_L((val >> 6) & 3))); } else { tcg_gen_qemu_ld_i32(cpu_tmp2_i32, cpu_A0, s->mem_index, MO_LEUL); } tcg_gen_st_i32(cpu_tmp2_i32, cpu_env, offsetof(CPUX86State,xmm_regs[reg] - .XMM_L((val >> 4) & 3))); + .ZMM_L((val >> 4) & 3))); if ((val >> 0) & 1) tcg_gen_st_i32(tcg_const_i32(0 /*float32_zero*/), cpu_env, offsetof(CPUX86State, - xmm_regs[reg].XMM_L(0))); + xmm_regs[reg].ZMM_L(0))); if ((val >> 1) & 1) tcg_gen_st_i32(tcg_const_i32(0 /*float32_zero*/), cpu_env, offsetof(CPUX86State, - xmm_regs[reg].XMM_L(1))); + xmm_regs[reg].ZMM_L(1))); if ((val >> 2) & 1) tcg_gen_st_i32(tcg_const_i32(0 /*float32_zero*/), cpu_env, offsetof(CPUX86State, - xmm_regs[reg].XMM_L(2))); + xmm_regs[reg].ZMM_L(2))); if ((val >> 3) & 1) tcg_gen_st_i32(tcg_const_i32(0 /*float32_zero*/), cpu_env, offsetof(CPUX86State, - xmm_regs[reg].XMM_L(3))); + xmm_regs[reg].ZMM_L(3))); break; case 0x22: if (ot == MO_32) { /* pinsrd */ @@ -4182,7 +4182,7 @@ static void gen_sse(CPUX86State *env, DisasContext *s, int b, } tcg_gen_st_i32(cpu_tmp2_i32, cpu_env, offsetof(CPUX86State, - xmm_regs[reg].XMM_L(val & 3))); + xmm_regs[reg].ZMM_L(val & 3))); } else { /* pinsrq */ #ifdef TARGET_X86_64 if (mod == 3) { @@ -4193,7 +4193,7 @@ static void gen_sse(CPUX86State *env, DisasContext *s, int b, } tcg_gen_st_i64(cpu_tmp1_i64, cpu_env, offsetof(CPUX86State, - xmm_regs[reg].XMM_Q(val & 1))); + xmm_regs[reg].ZMM_Q(val & 1))); #else goto illegal_op; #endif @@ -4318,11 +4318,11 @@ static void gen_sse(CPUX86State *env, DisasContext *s, int b, /* 32 bit access */ gen_op_ld_v(s, MO_32, cpu_T[0], cpu_A0); tcg_gen_st32_tl(cpu_T[0], cpu_env, - offsetof(CPUX86State,xmm_t0.XMM_L(0))); + offsetof(CPUX86State,xmm_t0.ZMM_L(0))); break; case 3: /* 64 bit access */ - gen_ldq_env_A0(s, offsetof(CPUX86State, xmm_t0.XMM_D(0))); + gen_ldq_env_A0(s, offsetof(CPUX86State, xmm_t0.ZMM_D(0))); break; default: /* 128 bit access */ @@ -7829,7 +7829,7 @@ static target_ulong disas_insn(CPUX86State *env, DisasContext *s, return s->pc; } -void optimize_flags_init(void) +void tcg_x86_init(void) { static const char reg_names[CPU_NB_REGS][4] = { #ifdef TARGET_X86_64 |