diff options
author | Paolo Bonzini <pbonzini@redhat.com> | 2014-10-24 09:50:21 +0200 |
---|---|---|
committer | Paolo Bonzini <pbonzini@redhat.com> | 2015-01-26 12:22:44 +0100 |
commit | b7711471f551aa4419f9d46a11121f48ced422da (patch) | |
tree | 4bbf6b6e20262ca030aaa7f5c2918f532594e63c /target-i386/kvm.c | |
parent | a03c3e90e11976fb147904d537457984bb938ce2 (diff) | |
download | qemu-b7711471f551aa4419f9d46a11121f48ced422da.zip qemu-b7711471f551aa4419f9d46a11121f48ced422da.tar.gz qemu-b7711471f551aa4419f9d46a11121f48ced422da.tar.bz2 |
target-i386: make xmm_regs 512-bit wide
Right now, the AVX512 registers are split in many different fields:
xmm_regs for the low 128 bits of the first 16 registers, ymmh_regs
for the next 128 bits of the same first 16 registers, zmmh_regs
for the next 256 bits of the same first 16 registers, and finally
hi16_zmm_regs for the full 512 bits of the second 16 bit registers.
This makes it simple to move data in and out of the xsave region,
but would be a nightmare for a hypothetical TCG implementation and
leads to a proliferation of [XYZ]MM_[BWLSQD] macros. Instead,
this patch marshals data manually from the xsave region to a single
32x512-bit array, simplifying the macro jungle and clarifying which
bits are in which vmstate subsection.
The migration format is unaffected.
Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
Diffstat (limited to 'target-i386/kvm.c')
-rw-r--r-- | target-i386/kvm.c | 40 |
1 files changed, 24 insertions, 16 deletions
diff --git a/target-i386/kvm.c b/target-i386/kvm.c index 36b1519..40d6a14 100644 --- a/target-i386/kvm.c +++ b/target-i386/kvm.c @@ -1048,7 +1048,7 @@ static int kvm_put_xsave(X86CPU *cpu) CPUX86State *env = &cpu->env; struct kvm_xsave* xsave = env->kvm_xsave_buf; uint16_t cwd, swd, twd; - uint8_t *xmm; + uint8_t *xmm, *ymmh, *zmmh; int i, r; if (!kvm_has_xsave()) { @@ -1071,26 +1071,30 @@ static int kvm_put_xsave(X86CPU *cpu) sizeof env->fpregs); xsave->region[XSAVE_MXCSR] = env->mxcsr; *(uint64_t *)&xsave->region[XSAVE_XSTATE_BV] = env->xstate_bv; - memcpy(&xsave->region[XSAVE_YMMH_SPACE], env->ymmh_regs, - sizeof env->ymmh_regs); memcpy(&xsave->region[XSAVE_BNDREGS], env->bnd_regs, sizeof env->bnd_regs); memcpy(&xsave->region[XSAVE_BNDCSR], &env->bndcs_regs, sizeof(env->bndcs_regs)); memcpy(&xsave->region[XSAVE_OPMASK], env->opmask_regs, sizeof env->opmask_regs); - memcpy(&xsave->region[XSAVE_ZMM_Hi256], env->zmmh_regs, - sizeof env->zmmh_regs); xmm = (uint8_t *)&xsave->region[XSAVE_XMM_SPACE]; - for (i = 0; i < CPU_NB_REGS; i++, xmm += 16) { + ymmh = (uint8_t *)&xsave->region[XSAVE_YMMH_SPACE]; + zmmh = (uint8_t *)&xsave->region[XSAVE_ZMM_Hi256]; + for (i = 0; i < CPU_NB_REGS; i++, xmm += 16, ymmh += 16, zmmh += 32) { stq_p(xmm, env->xmm_regs[i].XMM_Q(0)); stq_p(xmm+8, env->xmm_regs[i].XMM_Q(1)); + stq_p(ymmh, env->xmm_regs[i].XMM_Q(2)); + stq_p(ymmh+8, env->xmm_regs[i].XMM_Q(3)); + stq_p(zmmh, env->xmm_regs[i].XMM_Q(4)); + stq_p(zmmh+8, env->xmm_regs[i].XMM_Q(5)); + stq_p(zmmh+16, env->xmm_regs[i].XMM_Q(6)); + stq_p(zmmh+24, env->xmm_regs[i].XMM_Q(7)); } #ifdef TARGET_X86_64 - memcpy(&xsave->region[XSAVE_Hi16_ZMM], env->hi16_zmm_regs, - sizeof env->hi16_zmm_regs); + memcpy(&xsave->region[XSAVE_Hi16_ZMM], &env->xmm_regs[16], + 16 * sizeof env->xmm_regs[16]); #endif r = kvm_vcpu_ioctl(CPU(cpu), KVM_SET_XSAVE, xsave); return r; @@ -1407,7 +1411,7 @@ static int kvm_get_xsave(X86CPU *cpu) CPUX86State *env = &cpu->env; struct kvm_xsave* xsave = env->kvm_xsave_buf; int ret, i; - const uint8_t *xmm; + const uint8_t *xmm, *ymmh, *zmmh; uint16_t cwd, swd, twd; if (!kvm_has_xsave()) { @@ -1435,26 +1439,30 @@ static int kvm_get_xsave(X86CPU *cpu) memcpy(env->fpregs, &xsave->region[XSAVE_ST_SPACE], sizeof env->fpregs); env->xstate_bv = *(uint64_t *)&xsave->region[XSAVE_XSTATE_BV]; - memcpy(env->ymmh_regs, &xsave->region[XSAVE_YMMH_SPACE], - sizeof env->ymmh_regs); memcpy(env->bnd_regs, &xsave->region[XSAVE_BNDREGS], sizeof env->bnd_regs); memcpy(&env->bndcs_regs, &xsave->region[XSAVE_BNDCSR], sizeof(env->bndcs_regs)); memcpy(env->opmask_regs, &xsave->region[XSAVE_OPMASK], sizeof env->opmask_regs); - memcpy(env->zmmh_regs, &xsave->region[XSAVE_ZMM_Hi256], - sizeof env->zmmh_regs); xmm = (const uint8_t *)&xsave->region[XSAVE_XMM_SPACE]; - for (i = 0; i < CPU_NB_REGS; i++, xmm += 16) { + ymmh = (const uint8_t *)&xsave->region[XSAVE_YMMH_SPACE]; + zmmh = (const uint8_t *)&xsave->region[XSAVE_ZMM_Hi256]; + for (i = 0; i < CPU_NB_REGS; i++, xmm += 16, ymmh += 16, zmmh += 32) { env->xmm_regs[i].XMM_Q(0) = ldq_p(xmm); env->xmm_regs[i].XMM_Q(1) = ldq_p(xmm+8); + env->xmm_regs[i].XMM_Q(2) = ldq_p(ymmh); + env->xmm_regs[i].XMM_Q(3) = ldq_p(ymmh+8); + env->xmm_regs[i].XMM_Q(4) = ldq_p(zmmh); + env->xmm_regs[i].XMM_Q(5) = ldq_p(zmmh+8); + env->xmm_regs[i].XMM_Q(6) = ldq_p(zmmh+16); + env->xmm_regs[i].XMM_Q(7) = ldq_p(zmmh+24); } #ifdef TARGET_X86_64 - memcpy(env->hi16_zmm_regs, &xsave->region[XSAVE_Hi16_ZMM], - sizeof env->hi16_zmm_regs); + memcpy(&env->xmm_regs[16], &xsave->region[XSAVE_Hi16_ZMM], + 16 * sizeof env->xmm_regs[16]); #endif return 0; } |