aboutsummaryrefslogtreecommitdiff
path: root/target-i386
diff options
context:
space:
mode:
authorPaolo Bonzini <pbonzini@redhat.com>2014-10-24 09:44:38 +0200
committerPaolo Bonzini <pbonzini@redhat.com>2015-01-14 10:38:57 +0100
commitbee818872cd9e8c07be529f75da3e48a68bf7a93 (patch)
tree4b922dc78530784a8a38fec27e11970f73060592 /target-i386
parent07958082fdf39284935d38a5b8aec1fe7d020637 (diff)
downloadqemu-bee818872cd9e8c07be529f75da3e48a68bf7a93.zip
qemu-bee818872cd9e8c07be529f75da3e48a68bf7a93.tar.gz
qemu-bee818872cd9e8c07be529f75da3e48a68bf7a93.tar.bz2
target-i386: do not memcpy in and out of xmm_regs
After the next patch, we will move the high parts of AVX and AVX512 registers in the same array as the SSE registers. This will make it impossible to memcpy an array of 128-bit values in and out of xmm_regs in one swoop. Use a for loop instead. Similarly, always use XMM_Q in translate.c. This avoids introducing bugs such as the one fixed in the previous patch. Reviewed-by: Eduardo Habkost <ehabkost@redhat.com> Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
Diffstat (limited to 'target-i386')
-rw-r--r--target-i386/kvm.c30
-rw-r--r--target-i386/translate.c8
2 files changed, 28 insertions, 10 deletions
diff --git a/target-i386/kvm.c b/target-i386/kvm.c
index f92edfe..cf9f331 100644
--- a/target-i386/kvm.c
+++ b/target-i386/kvm.c
@@ -1019,7 +1019,10 @@ static int kvm_put_fpu(X86CPU *cpu)
fpu.ftwx |= (!env->fptags[i]) << i;
}
memcpy(fpu.fpr, env->fpregs, sizeof env->fpregs);
- memcpy(fpu.xmm, env->xmm_regs, sizeof env->xmm_regs);
+ for (i = 0; i < CPU_NB_REGS; i++) {
+ stq_p(&fpu.xmm[i][0], env->xmm_regs[i].XMM_Q(0));
+ stq_p(&fpu.xmm[i][8], env->xmm_regs[i].XMM_Q(1));
+ }
fpu.mxcsr = env->mxcsr;
return kvm_vcpu_ioctl(CPU(cpu), KVM_SET_FPU, &fpu);
@@ -1045,6 +1048,7 @@ static int kvm_put_xsave(X86CPU *cpu)
CPUX86State *env = &cpu->env;
struct kvm_xsave* xsave = env->kvm_xsave_buf;
uint16_t cwd, swd, twd;
+ uint8_t *xmm;
int i, r;
if (!kvm_has_xsave()) {
@@ -1065,8 +1069,6 @@ static int kvm_put_xsave(X86CPU *cpu)
memcpy(&xsave->region[XSAVE_CWD_RDP], &env->fpdp, sizeof(env->fpdp));
memcpy(&xsave->region[XSAVE_ST_SPACE], env->fpregs,
sizeof env->fpregs);
- memcpy(&xsave->region[XSAVE_XMM_SPACE], env->xmm_regs,
- sizeof env->xmm_regs);
xsave->region[XSAVE_MXCSR] = env->mxcsr;
*(uint64_t *)&xsave->region[XSAVE_XSTATE_BV] = env->xstate_bv;
memcpy(&xsave->region[XSAVE_YMMH_SPACE], env->ymmh_regs,
@@ -1079,6 +1081,13 @@ static int kvm_put_xsave(X86CPU *cpu)
sizeof env->opmask_regs);
memcpy(&xsave->region[XSAVE_ZMM_Hi256], env->zmmh_regs,
sizeof env->zmmh_regs);
+
+ xmm = (uint8_t *)&xsave->region[XSAVE_XMM_SPACE];
+ for (i = 0; i < CPU_NB_REGS; i++, xmm += 16) {
+ stq_p(xmm, env->xmm_regs[i].XMM_Q(0));
+ stq_p(xmm+8, env->xmm_regs[i].XMM_Q(1));
+ }
+
#ifdef TARGET_X86_64
memcpy(&xsave->region[XSAVE_Hi16_ZMM], env->hi16_zmm_regs,
sizeof env->hi16_zmm_regs);
@@ -1384,7 +1393,10 @@ static int kvm_get_fpu(X86CPU *cpu)
env->fptags[i] = !((fpu.ftwx >> i) & 1);
}
memcpy(env->fpregs, fpu.fpr, sizeof env->fpregs);
- memcpy(env->xmm_regs, fpu.xmm, sizeof env->xmm_regs);
+ for (i = 0; i < CPU_NB_REGS; i++) {
+ env->xmm_regs[i].XMM_Q(0) = ldq_p(&fpu.xmm[i][0]);
+ env->xmm_regs[i].XMM_Q(1) = ldq_p(&fpu.xmm[i][8]);
+ }
env->mxcsr = fpu.mxcsr;
return 0;
@@ -1395,6 +1407,7 @@ static int kvm_get_xsave(X86CPU *cpu)
CPUX86State *env = &cpu->env;
struct kvm_xsave* xsave = env->kvm_xsave_buf;
int ret, i;
+ const uint8_t *xmm;
uint16_t cwd, swd, twd;
if (!kvm_has_xsave()) {
@@ -1421,8 +1434,6 @@ static int kvm_get_xsave(X86CPU *cpu)
env->mxcsr = xsave->region[XSAVE_MXCSR];
memcpy(env->fpregs, &xsave->region[XSAVE_ST_SPACE],
sizeof env->fpregs);
- memcpy(env->xmm_regs, &xsave->region[XSAVE_XMM_SPACE],
- sizeof env->xmm_regs);
env->xstate_bv = *(uint64_t *)&xsave->region[XSAVE_XSTATE_BV];
memcpy(env->ymmh_regs, &xsave->region[XSAVE_YMMH_SPACE],
sizeof env->ymmh_regs);
@@ -1434,6 +1445,13 @@ static int kvm_get_xsave(X86CPU *cpu)
sizeof env->opmask_regs);
memcpy(env->zmmh_regs, &xsave->region[XSAVE_ZMM_Hi256],
sizeof env->zmmh_regs);
+
+ xmm = (const uint8_t *)&xsave->region[XSAVE_XMM_SPACE];
+ for (i = 0; i < CPU_NB_REGS; i++, xmm += 16) {
+ env->xmm_regs[i].XMM_Q(0) = ldq_p(xmm);
+ env->xmm_regs[i].XMM_Q(1) = ldq_p(xmm+8);
+ }
+
#ifdef TARGET_X86_64
memcpy(env->hi16_zmm_regs, &xsave->region[XSAVE_Hi16_ZMM],
sizeof env->hi16_zmm_regs);
diff --git a/target-i386/translate.c b/target-i386/translate.c
index 5af4300..9ebdf4b 100644
--- a/target-i386/translate.c
+++ b/target-i386/translate.c
@@ -2621,10 +2621,10 @@ static inline void gen_sto_env_A0(DisasContext *s, int offset)
static inline void gen_op_movo(int d_offset, int s_offset)
{
- tcg_gen_ld_i64(cpu_tmp1_i64, cpu_env, s_offset);
- tcg_gen_st_i64(cpu_tmp1_i64, cpu_env, d_offset);
- tcg_gen_ld_i64(cpu_tmp1_i64, cpu_env, s_offset + 8);
- tcg_gen_st_i64(cpu_tmp1_i64, cpu_env, d_offset + 8);
+ tcg_gen_ld_i64(cpu_tmp1_i64, cpu_env, s_offset + offsetof(XMMReg, XMM_Q(0)));
+ tcg_gen_st_i64(cpu_tmp1_i64, cpu_env, d_offset + offsetof(XMMReg, XMM_Q(0)));
+ tcg_gen_ld_i64(cpu_tmp1_i64, cpu_env, s_offset + offsetof(XMMReg, XMM_Q(1)));
+ tcg_gen_st_i64(cpu_tmp1_i64, cpu_env, d_offset + offsetof(XMMReg, XMM_Q(1)));
}
static inline void gen_op_movq(int d_offset, int s_offset)