diff options
author | Richard Henderson <richard.henderson@linaro.org> | 2024-06-08 07:40:08 -0700 |
---|---|---|
committer | Richard Henderson <richard.henderson@linaro.org> | 2024-06-08 07:40:08 -0700 |
commit | 3e246da2c3f85298b52f8a1154b832acf36aa656 (patch) | |
tree | 94ad95cf7197831fd84cf9ec32972d6b8186c3af /target | |
parent | dec9742cbc59415a8b83e382e7ae36395394e4bd (diff) | |
parent | fc00123f3abeb027cd51eb58ea8845377794b3bc (diff) | |
download | qemu-3e246da2c3f85298b52f8a1154b832acf36aa656.zip qemu-3e246da2c3f85298b52f8a1154b832acf36aa656.tar.gz qemu-3e246da2c3f85298b52f8a1154b832acf36aa656.tar.bz2 |
Merge tag 'for-upstream' of https://gitlab.com/bonzini/qemu into staging
* scsi-disk: Don't silently truncate serial number
* backends/hostmem: Report error on unavailable qemu_madvise() features or unaligned memory sizes
* target/i386: fixes and documentation for INHIBIT_IRQ/TF/RF and debugging
* i386/hvf: Adds support for INVTSC cpuid bit
* i386/hvf: Fixes for dirty memory tracking
* i386/hvf: Use hv_vcpu_interrupt() and hv_vcpu_run_until()
* hvf: Cleanups
* stubs: fixes for --disable-system build
* i386/kvm: support for FRED
* i386/kvm: fix MCE handling on AMD hosts
# -----BEGIN PGP SIGNATURE-----
#
# iQFIBAABCAAyFiEE8TM4V0tmI4mGbHaCv/vSX3jHroMFAmZkF2oUHHBib256aW5p
# QHJlZGhhdC5jb20ACgkQv/vSX3jHroPNlQf+N9y6Eh0nMEEQ69twtV8ytglTY+uX
# FsogvnsXHNMVubOWmmeItM6kFXTAkR9cmFaL8dqI1Gs03xEQdQXbF1KejJZOAZVl
# RQMOW8Fg2Afr+0lwqCXHvhsmZ4hr5yUkRndyucA/E9AO2uGrtgwsWGDBGaHJOZIA
# lAsEMOZgKjXHZnefXjhMrvpk/QNovjEV6f1RHX3oKZjKSI5/G4IqGSmwNYToot8p
# 2fgs4Qti4+1gNyM2oBLq7cCMjMS61tSxOMH4uqVoIisjyckPlAFRvc+DXtKsUAAs
# 9AgM++pNgpB0IXv67czRUNdRoK7OI8I0ULhI4qHXi6Yg2QYAHqpQ6WL4Lg==
# =RP7U
# -----END PGP SIGNATURE-----
# gpg: Signature made Sat 08 Jun 2024 01:33:46 AM PDT
# gpg: using RSA key F13338574B662389866C7682BFFBD25F78C7AE83
# gpg: issuer "pbonzini@redhat.com"
# gpg: Good signature from "Paolo Bonzini <bonzini@gnu.org>" [full]
# gpg: aka "Paolo Bonzini <pbonzini@redhat.com>" [full]
* tag 'for-upstream' of https://gitlab.com/bonzini/qemu: (42 commits)
python: mkvenv: remove ensure command
Revert "python: use vendored tomli"
i386: Add support for overflow recovery
i386: Add support for SUCCOR feature
i386: Fix MCE support for AMD hosts
docs: i386: pc: Avoid mentioning limit of maximum vCPUs
target/i386: Add get/set/migrate support for FRED MSRs
target/i386: enumerate VMX nested-exception support
vmxcap: add support for VMX FRED controls
target/i386: mark CR4.FRED not reserved
target/i386: add support for FRED in CPUID enumeration
hvf: Makes assert_hvf_ok report failed expression
i386/hvf: Updates API usage to use modern vCPU run function
i386/hvf: In kick_vcpu use hv_vcpu_interrupt to force exit
i386/hvf: Fixes dirty memory tracking by page granularity RX->RWX change
hvf: Consistent types for vCPU handles
i386/hvf: Fixes some compilation warnings
i386/hvf: Adds support for INVTSC cpuid bit
stubs/meson: Fix qemuutil build when --disable-system
scsi-disk: Don't silently truncate serial number
...
Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
Diffstat (limited to 'target')
-rw-r--r-- | target/i386/cpu.c | 33 | ||||
-rw-r--r-- | target/i386/cpu.h | 53 | ||||
-rw-r--r-- | target/i386/helper.c | 4 | ||||
-rw-r--r-- | target/i386/helper.h | 5 | ||||
-rw-r--r-- | target/i386/hvf/hvf.c | 47 | ||||
-rw-r--r-- | target/i386/hvf/vmx.h | 3 | ||||
-rw-r--r-- | target/i386/hvf/x86_cpuid.c | 4 | ||||
-rw-r--r-- | target/i386/hvf/x86_decode.c | 2 | ||||
-rw-r--r-- | target/i386/hvf/x86_emu.c | 4 | ||||
-rw-r--r-- | target/i386/kvm/kvm.c | 90 | ||||
-rw-r--r-- | target/i386/machine.c | 28 | ||||
-rw-r--r-- | target/i386/tcg/decode-new.c.inc | 19 | ||||
-rw-r--r-- | target/i386/tcg/emit.c.inc | 37 | ||||
-rw-r--r-- | target/i386/tcg/excp_helper.c | 20 | ||||
-rw-r--r-- | target/i386/tcg/helper-tcg.h | 13 | ||||
-rw-r--r-- | target/i386/tcg/misc_helper.c | 14 | ||||
-rw-r--r-- | target/i386/tcg/seg_helper.c | 49 | ||||
-rw-r--r-- | target/i386/tcg/sysemu/bpt_helper.c | 18 | ||||
-rw-r--r-- | target/i386/tcg/sysemu/misc_helper.c | 17 | ||||
-rw-r--r-- | target/i386/tcg/sysemu/seg_helper.c | 17 | ||||
-rw-r--r-- | target/i386/tcg/sysemu/svm_helper.c | 71 | ||||
-rw-r--r-- | target/i386/tcg/translate.c | 39 |
22 files changed, 489 insertions, 98 deletions
diff --git a/target/i386/cpu.c b/target/i386/cpu.c index 914bef4..7466217 100644 --- a/target/i386/cpu.c +++ b/target/i386/cpu.c @@ -1114,7 +1114,7 @@ FeatureWordInfo feature_word_info[FEATURE_WORDS] = { "avx-vnni", "avx512-bf16", NULL, "cmpccxadd", NULL, NULL, "fzrm", "fsrs", "fsrc", NULL, NULL, NULL, - NULL, NULL, NULL, NULL, + NULL, "fred", "lkgs", "wrmsrns", NULL, "amx-fp16", NULL, "avx-ifma", NULL, NULL, "lam", NULL, NULL, NULL, NULL, NULL, @@ -1180,6 +1180,22 @@ FeatureWordInfo feature_word_info[FEATURE_WORDS] = { .tcg_features = TCG_APM_FEATURES, .unmigratable_flags = CPUID_APM_INVTSC, }, + [FEAT_8000_0007_EBX] = { + .type = CPUID_FEATURE_WORD, + .feat_names = { + "overflow-recov", "succor", NULL, NULL, + NULL, NULL, NULL, NULL, + NULL, NULL, NULL, NULL, + NULL, NULL, NULL, NULL, + NULL, NULL, NULL, NULL, + NULL, NULL, NULL, NULL, + NULL, NULL, NULL, NULL, + NULL, NULL, NULL, NULL, + }, + .cpuid = { .eax = 0x80000007, .reg = R_EBX, }, + .tcg_features = 0, + .unmigratable_flags = 0, + }, [FEAT_8000_0008_EBX] = { .type = CPUID_FEATURE_WORD, .feat_names = { @@ -1492,6 +1508,7 @@ FeatureWordInfo feature_word_info[FEATURE_WORDS] = { [54] = "vmx-ins-outs", [55] = "vmx-true-ctls", [56] = "vmx-any-errcode", + [58] = "vmx-nested-exception", }, .msr = { .index = MSR_IA32_VMX_BASIC, @@ -1701,6 +1718,18 @@ static FeatureDep feature_dependencies[] = { .from = { FEAT_7_0_ECX, CPUID_7_0_ECX_WAITPKG }, .to = { FEAT_VMX_SECONDARY_CTLS, VMX_SECONDARY_EXEC_ENABLE_USER_WAIT_PAUSE }, }, + { + .from = { FEAT_8000_0001_EDX, CPUID_EXT2_LM }, + .to = { FEAT_7_1_EAX, CPUID_7_1_EAX_FRED }, + }, + { + .from = { FEAT_7_1_EAX, CPUID_7_1_EAX_LKGS }, + .to = { FEAT_7_1_EAX, CPUID_7_1_EAX_FRED }, + }, + { + .from = { FEAT_7_1_EAX, CPUID_7_1_EAX_WRMSRNS }, + .to = { FEAT_7_1_EAX, CPUID_7_1_EAX_FRED }, + }, }; typedef struct X86RegisterInfo32 { @@ -6874,7 +6903,7 @@ void cpu_x86_cpuid(CPUX86State *env, uint32_t index, uint32_t count, break; case 0x80000007: *eax = 0; - *ebx = 0; + *ebx = env->features[FEAT_8000_0007_EBX]; *ecx = 0; *edx = env->features[FEAT_8000_0007_EDX]; break; diff --git a/target/i386/cpu.h b/target/i386/cpu.h index c64ef0c..8fe28b6 100644 --- a/target/i386/cpu.h +++ b/target/i386/cpu.h @@ -261,6 +261,18 @@ typedef enum X86Seg { #define CR4_PKS_MASK (1U << 24) #define CR4_LAM_SUP_MASK (1U << 28) +#ifdef TARGET_X86_64 +#define CR4_FRED_MASK (1ULL << 32) +#else +#define CR4_FRED_MASK 0 +#endif + +#ifdef TARGET_X86_64 +#define CR4_FRED_MASK (1ULL << 32) +#else +#define CR4_FRED_MASK 0 +#endif + #define CR4_RESERVED_MASK \ (~(target_ulong)(CR4_VME_MASK | CR4_PVI_MASK | CR4_TSD_MASK \ | CR4_DE_MASK | CR4_PSE_MASK | CR4_PAE_MASK \ @@ -269,7 +281,7 @@ typedef enum X86Seg { | CR4_LA57_MASK \ | CR4_FSGSBASE_MASK | CR4_PCIDE_MASK | CR4_OSXSAVE_MASK \ | CR4_SMEP_MASK | CR4_SMAP_MASK | CR4_PKE_MASK | CR4_PKS_MASK \ - | CR4_LAM_SUP_MASK)) + | CR4_LAM_SUP_MASK | CR4_FRED_MASK)) #define DR6_BD (1 << 13) #define DR6_BS (1 << 14) @@ -365,6 +377,8 @@ typedef enum X86Seg { #define MCI_STATUS_PCC (1ULL<<57) /* processor context corrupt */ #define MCI_STATUS_S (1ULL<<56) /* Signaled machine check */ #define MCI_STATUS_AR (1ULL<<55) /* Action required */ +#define MCI_STATUS_DEFERRED (1ULL<<44) /* Deferred error */ +#define MCI_STATUS_POISON (1ULL<<43) /* Poisoned data consumed */ /* MISC register defines */ #define MCM_ADDR_SEGOFF 0 /* segment offset */ @@ -526,6 +540,17 @@ typedef enum X86Seg { #define MSR_IA32_XFD 0x000001c4 #define MSR_IA32_XFD_ERR 0x000001c5 +/* FRED MSRs */ +#define MSR_IA32_FRED_RSP0 0x000001cc /* Stack level 0 regular stack pointer */ +#define MSR_IA32_FRED_RSP1 0x000001cd /* Stack level 1 regular stack pointer */ +#define MSR_IA32_FRED_RSP2 0x000001ce /* Stack level 2 regular stack pointer */ +#define MSR_IA32_FRED_RSP3 0x000001cf /* Stack level 3 regular stack pointer */ +#define MSR_IA32_FRED_STKLVLS 0x000001d0 /* FRED exception stack levels */ +#define MSR_IA32_FRED_SSP1 0x000001d1 /* Stack level 1 shadow stack pointer in ring 0 */ +#define MSR_IA32_FRED_SSP2 0x000001d2 /* Stack level 2 shadow stack pointer in ring 0 */ +#define MSR_IA32_FRED_SSP3 0x000001d3 /* Stack level 3 shadow stack pointer in ring 0 */ +#define MSR_IA32_FRED_CONFIG 0x000001d4 /* FRED Entrypoint and interrupt stack level */ + #define MSR_IA32_BNDCFGS 0x00000d90 #define MSR_IA32_XSS 0x00000da0 #define MSR_IA32_UMWAIT_CONTROL 0xe1 @@ -605,6 +630,7 @@ typedef enum FeatureWord { FEAT_7_1_EAX, /* CPUID[EAX=7,ECX=1].EAX */ FEAT_8000_0001_EDX, /* CPUID[8000_0001].EDX */ FEAT_8000_0001_ECX, /* CPUID[8000_0001].ECX */ + FEAT_8000_0007_EBX, /* CPUID[8000_0007].EBX */ FEAT_8000_0007_EDX, /* CPUID[8000_0007].EDX */ FEAT_8000_0008_EBX, /* CPUID[8000_0008].EBX */ FEAT_8000_0021_EAX, /* CPUID[8000_0021].EAX */ @@ -941,6 +967,12 @@ uint64_t x86_cpu_get_supported_feature_word(FeatureWord w, #define CPUID_7_1_EDX_AMX_COMPLEX (1U << 8) /* PREFETCHIT0/1 Instructions */ #define CPUID_7_1_EDX_PREFETCHITI (1U << 14) +/* Flexible return and event delivery (FRED) */ +#define CPUID_7_1_EAX_FRED (1U << 17) +/* Load into IA32_KERNEL_GS_BASE (LKGS) */ +#define CPUID_7_1_EAX_LKGS (1U << 18) +/* Non-Serializing Write to Model Specific Register (WRMSRNS) */ +#define CPUID_7_1_EAX_WRMSRNS (1U << 19) /* Do not exhibit MXCSR Configuration Dependent Timing (MCDT) behavior */ #define CPUID_7_2_EDX_MCDT_NO (1U << 5) @@ -951,6 +983,10 @@ uint64_t x86_cpu_get_supported_feature_word(FeatureWord w, /* Packets which contain IP payload have LIP values */ #define CPUID_14_0_ECX_LIP (1U << 31) +/* RAS Features */ +#define CPUID_8000_0007_EBX_OVERFLOW_RECOV (1U << 0) +#define CPUID_8000_0007_EBX_SUCCOR (1U << 1) + /* CLZERO instruction */ #define CPUID_8000_0008_EBX_CLZERO (1U << 0) /* Always save/restore FP error pointers */ @@ -1053,6 +1089,7 @@ uint64_t x86_cpu_get_supported_feature_word(FeatureWord w, #define MSR_VMX_BASIC_INS_OUTS (1ULL << 54) #define MSR_VMX_BASIC_TRUE_CTLS (1ULL << 55) #define MSR_VMX_BASIC_ANY_ERRCODE (1ULL << 56) +#define MSR_VMX_BASIC_NESTED_EXCEPTION (1ULL << 58) #define MSR_VMX_MISC_PREEMPTION_TIMER_SHIFT_MASK 0x1Full #define MSR_VMX_MISC_STORE_LMA (1ULL << 5) @@ -1704,6 +1741,17 @@ typedef struct CPUArchState { target_ulong cstar; target_ulong fmask; target_ulong kernelgsbase; + + /* FRED MSRs */ + uint64_t fred_rsp0; + uint64_t fred_rsp1; + uint64_t fred_rsp2; + uint64_t fred_rsp3; + uint64_t fred_stklvls; + uint64_t fred_ssp1; + uint64_t fred_ssp2; + uint64_t fred_ssp3; + uint64_t fred_config; #endif uint64_t tsc_adjust; @@ -2607,6 +2655,9 @@ static inline uint64_t cr4_reserved_bits(CPUX86State *env) if (!(env->features[FEAT_7_1_EAX] & CPUID_7_1_EAX_LAM)) { reserved_bits |= CR4_LAM_SUP_MASK; } + if (!(env->features[FEAT_7_1_EAX] & CPUID_7_1_EAX_FRED)) { + reserved_bits |= CR4_FRED_MASK; + } return reserved_bits; } diff --git a/target/i386/helper.c b/target/i386/helper.c index f9d1381..01a268a 100644 --- a/target/i386/helper.c +++ b/target/i386/helper.c @@ -91,6 +91,10 @@ int cpu_x86_support_mca_broadcast(CPUX86State *env) int family = 0; int model = 0; + if (IS_AMD_CPU(env)) { + return 0; + } + cpu_x86_version(env, &family, &model); if ((family == 6 && model >= 14) || family > 6) { return 1; diff --git a/target/i386/helper.h b/target/i386/helper.h index a52a1bf..2f46cff 100644 --- a/target/i386/helper.h +++ b/target/i386/helper.h @@ -53,9 +53,10 @@ DEF_HELPER_1(sysenter, void, env) DEF_HELPER_2(sysexit, void, env, int) DEF_HELPER_2(syscall, void, env, int) DEF_HELPER_2(sysret, void, env, int) -DEF_HELPER_FLAGS_2(pause, TCG_CALL_NO_WG, noreturn, env, int) +DEF_HELPER_FLAGS_1(pause, TCG_CALL_NO_WG, noreturn, env) DEF_HELPER_FLAGS_3(raise_interrupt, TCG_CALL_NO_WG, noreturn, env, int, int) DEF_HELPER_FLAGS_2(raise_exception, TCG_CALL_NO_WG, noreturn, env, int) +DEF_HELPER_FLAGS_1(icebp, TCG_CALL_NO_WG, noreturn, env) DEF_HELPER_3(boundw, void, env, tl, int) DEF_HELPER_3(boundl, void, env, tl, int) @@ -89,7 +90,7 @@ DEF_HELPER_2(vmsave, void, env, int) DEF_HELPER_1(stgi, void, env) DEF_HELPER_1(clgi, void, env) DEF_HELPER_FLAGS_2(flush_page, TCG_CALL_NO_RWG, void, env, tl) -DEF_HELPER_FLAGS_2(hlt, TCG_CALL_NO_WG, noreturn, env, int) +DEF_HELPER_FLAGS_1(hlt, TCG_CALL_NO_WG, noreturn, env) DEF_HELPER_FLAGS_2(monitor, TCG_CALL_NO_WG, void, env, tl) DEF_HELPER_FLAGS_2(mwait, TCG_CALL_NO_WG, noreturn, env, int) DEF_HELPER_1(rdmsr, void, env) diff --git a/target/i386/hvf/hvf.c b/target/i386/hvf/hvf.c index e493452..2d0eef6 100644 --- a/target/i386/hvf/hvf.c +++ b/target/i386/hvf/hvf.c @@ -49,6 +49,8 @@ #include "qemu/osdep.h" #include "qemu/error-report.h" #include "qemu/memalign.h" +#include "qapi/error.h" +#include "migration/blocker.h" #include "sysemu/hvf.h" #include "sysemu/hvf_int.h" @@ -74,6 +76,8 @@ #include "qemu/accel.h" #include "target/i386/cpu.h" +static Error *invtsc_mig_blocker; + void vmx_update_tpr(CPUState *cpu) { /* TODO: need integrate APIC handling */ @@ -131,9 +135,10 @@ static bool ept_emulation_fault(hvf_slot *slot, uint64_t gpa, uint64_t ept_qual) if (write && slot) { if (slot->flags & HVF_SLOT_LOG) { + uint64_t dirty_page_start = gpa & ~(TARGET_PAGE_SIZE - 1u); memory_region_set_dirty(slot->region, gpa - slot->start, 1); - hv_vm_protect((hv_gpaddr_t)slot->start, (size_t)slot->size, - HV_MEMORY_READ | HV_MEMORY_WRITE); + hv_vm_protect(dirty_page_start, TARGET_PAGE_SIZE, + HV_MEMORY_READ | HV_MEMORY_WRITE | HV_MEMORY_EXEC); } } @@ -210,6 +215,7 @@ static inline bool apic_bus_freq_is_known(CPUX86State *env) void hvf_kick_vcpu_thread(CPUState *cpu) { cpus_kick_thread(cpu); + hv_vcpu_interrupt(&cpu->accel->fd, 1); } int hvf_arch_init(void) @@ -221,6 +227,8 @@ int hvf_arch_init_vcpu(CPUState *cpu) { X86CPU *x86cpu = X86_CPU(cpu); CPUX86State *env = &x86cpu->env; + Error *local_err = NULL; + int r; uint64_t reqCap; init_emu(); @@ -238,6 +246,18 @@ int hvf_arch_init_vcpu(CPUState *cpu) } } + if ((env->features[FEAT_8000_0007_EDX] & CPUID_APM_INVTSC) && + invtsc_mig_blocker == NULL) { + error_setg(&invtsc_mig_blocker, + "State blocked by non-migratable CPU device (invtsc flag)"); + r = migrate_add_blocker(&invtsc_mig_blocker, &local_err); + if (r < 0) { + error_report_err(local_err); + return r; + } + } + + if (hv_vmx_read_capability(HV_VMX_CAP_PINBASED, &hvf_state->hvf_caps->vmx_cap_pinbased)) { abort(); @@ -407,6 +427,27 @@ static void hvf_cpu_x86_cpuid(CPUX86State *env, uint32_t index, uint32_t count, } } +static hv_return_t hvf_vcpu_run(hv_vcpuid_t vcpu_id) +{ + /* + * hv_vcpu_run_until is available and recommended from macOS 10.15+, + * HV_DEADLINE_FOREVER from 11.0. Test for availability at runtime and fall + * back to hv_vcpu_run() only where necessary. + */ +#ifndef MAC_OS_VERSION_11_0 + return hv_vcpu_run(vcpu_id); +#elif MAC_OS_X_VERSION_MIN_REQUIRED >= MAC_OS_VERSION_11_0 + return hv_vcpu_run_until(vcpu_id, HV_DEADLINE_FOREVER); +#else /* MAC_OS_X_VERSION_MIN_REQUIRED < MAC_OS_VERSION_11_0 */ + /* 11.0 SDK or newer, but could be < 11 at runtime */ + if (__builtin_available(macOS 11.0, *)) { + return hv_vcpu_run_until(vcpu_id, HV_DEADLINE_FOREVER); + } else { + return hv_vcpu_run(vcpu_id); + } +#endif +} + int hvf_vcpu_exec(CPUState *cpu) { X86CPU *x86_cpu = X86_CPU(cpu); @@ -435,7 +476,7 @@ int hvf_vcpu_exec(CPUState *cpu) return EXCP_HLT; } - hv_return_t r = hv_vcpu_run(cpu->accel->fd); + hv_return_t r = hvf_vcpu_run(cpu->accel->fd); assert_hvf_ok(r); /* handle VMEXIT */ diff --git a/target/i386/hvf/vmx.h b/target/i386/hvf/vmx.h index 0fffcfa..3954ef8 100644 --- a/target/i386/hvf/vmx.h +++ b/target/i386/hvf/vmx.h @@ -95,8 +95,7 @@ static void enter_long_mode(hv_vcpuid_t vcpu, uint64_t cr0, uint64_t efer) efer |= MSR_EFER_LMA; wvmcs(vcpu, VMCS_GUEST_IA32_EFER, efer); entry_ctls = rvmcs(vcpu, VMCS_ENTRY_CTLS); - wvmcs(vcpu, VMCS_ENTRY_CTLS, rvmcs(vcpu, VMCS_ENTRY_CTLS) | - VM_ENTRY_GUEST_LMA); + wvmcs(vcpu, VMCS_ENTRY_CTLS, entry_ctls | VM_ENTRY_GUEST_LMA); uint64_t guest_tr_ar = rvmcs(vcpu, VMCS_GUEST_TR_ACCESS_RIGHTS); if ((efer & MSR_EFER_LME) && diff --git a/target/i386/hvf/x86_cpuid.c b/target/i386/hvf/x86_cpuid.c index 9380b90..e56cd84 100644 --- a/target/i386/hvf/x86_cpuid.c +++ b/target/i386/hvf/x86_cpuid.c @@ -146,6 +146,10 @@ uint32_t hvf_get_supported_cpuid(uint32_t func, uint32_t idx, CPUID_EXT3_3DNOWPREFETCH | CPUID_EXT3_OSVW | CPUID_EXT3_XOP | CPUID_EXT3_FMA4 | CPUID_EXT3_TBM; break; + case 0x80000007: + edx &= CPUID_APM_INVTSC; + eax = ebx = ecx = 0; + break; default: return 0; } diff --git a/target/i386/hvf/x86_decode.c b/target/i386/hvf/x86_decode.c index 3728d77..a4a28f1 100644 --- a/target/i386/hvf/x86_decode.c +++ b/target/i386/hvf/x86_decode.c @@ -2111,7 +2111,7 @@ uint32_t decode_instruction(CPUX86State *env, struct x86_decode *decode) return decode->len; } -void init_decoder() +void init_decoder(void) { int i; diff --git a/target/i386/hvf/x86_emu.c b/target/i386/hvf/x86_emu.c index 3a3f0a5..38c782b 100644 --- a/target/i386/hvf/x86_emu.c +++ b/target/i386/hvf/x86_emu.c @@ -1409,7 +1409,7 @@ static struct cmd_handler { static struct cmd_handler _cmd_handler[X86_DECODE_CMD_LAST]; -static void init_cmd_handler() +static void init_cmd_handler(void) { int i; for (i = 0; i < ARRAY_SIZE(handlers); i++) { @@ -1481,7 +1481,7 @@ bool exec_instruction(CPUX86State *env, struct x86_decode *ins) return true; } -void init_emu() +void init_emu(void) { init_cmd_handler(); } diff --git a/target/i386/kvm/kvm.c b/target/i386/kvm/kvm.c index 0852ed0..912f5d5 100644 --- a/target/i386/kvm/kvm.c +++ b/target/i386/kvm/kvm.c @@ -532,6 +532,8 @@ uint32_t kvm_arch_get_supported_cpuid(KVMState *s, uint32_t function, */ cpuid_1_edx = kvm_arch_get_supported_cpuid(s, 1, 0, R_EDX); ret |= cpuid_1_edx & CPUID_EXT2_AMD_ALIASES; + } else if (function == 0x80000007 && reg == R_EBX) { + ret |= CPUID_8000_0007_EBX_OVERFLOW_RECOV | CPUID_8000_0007_EBX_SUCCOR; } else if (function == KVM_CPUID_FEATURES && reg == R_EAX) { /* kvm_pv_unhalt is reported by GET_SUPPORTED_CPUID, but it can't * be enabled without the in-kernel irqchip @@ -638,17 +640,40 @@ static void kvm_mce_inject(X86CPU *cpu, hwaddr paddr, int code) { CPUState *cs = CPU(cpu); CPUX86State *env = &cpu->env; - uint64_t status = MCI_STATUS_VAL | MCI_STATUS_UC | MCI_STATUS_EN | - MCI_STATUS_MISCV | MCI_STATUS_ADDRV | MCI_STATUS_S; - uint64_t mcg_status = MCG_STATUS_MCIP; + uint64_t status = MCI_STATUS_VAL | MCI_STATUS_EN | MCI_STATUS_MISCV | + MCI_STATUS_ADDRV; + uint64_t mcg_status = MCG_STATUS_MCIP | MCG_STATUS_RIPV; int flags = 0; - if (code == BUS_MCEERR_AR) { - status |= MCI_STATUS_AR | 0x134; - mcg_status |= MCG_STATUS_RIPV | MCG_STATUS_EIPV; + if (!IS_AMD_CPU(env)) { + status |= MCI_STATUS_S | MCI_STATUS_UC; + if (code == BUS_MCEERR_AR) { + status |= MCI_STATUS_AR | 0x134; + mcg_status |= MCG_STATUS_EIPV; + } else { + status |= 0xc0; + } } else { - status |= 0xc0; - mcg_status |= MCG_STATUS_RIPV; + if (code == BUS_MCEERR_AR) { + status |= MCI_STATUS_UC | MCI_STATUS_POISON; + mcg_status |= MCG_STATUS_EIPV; + } else { + /* Setting the POISON bit for deferred errors indicates to the + * guest kernel that the address provided by the MCE is valid + * and usable which will ensure that the guest kernel will send + * a SIGBUS_AO signal to the guest process. This allows for + * more desirable behavior in the case that the guest process + * with poisoned memory has set the MCE_KILL_EARLY prctl flag + * which indicates that the process would prefer to handle or + * shutdown due to the poisoned memory condition before the + * memory has been accessed. + * + * While the POISON bit would not be set in a deferred error + * sent from hardware, the bit is not meaningful for deferred + * errors and can be reused in this scenario. + */ + status |= MCI_STATUS_DEFERRED | MCI_STATUS_POISON; + } } flags = cpu_x86_support_mca_broadcast(env) ? MCE_INJECT_BROADCAST : 0; @@ -3376,6 +3401,17 @@ static int kvm_put_msrs(X86CPU *cpu, int level) kvm_msr_entry_add(cpu, MSR_KERNELGSBASE, env->kernelgsbase); kvm_msr_entry_add(cpu, MSR_FMASK, env->fmask); kvm_msr_entry_add(cpu, MSR_LSTAR, env->lstar); + if (env->features[FEAT_7_1_EAX] & CPUID_7_1_EAX_FRED) { + kvm_msr_entry_add(cpu, MSR_IA32_FRED_RSP0, env->fred_rsp0); + kvm_msr_entry_add(cpu, MSR_IA32_FRED_RSP1, env->fred_rsp1); + kvm_msr_entry_add(cpu, MSR_IA32_FRED_RSP2, env->fred_rsp2); + kvm_msr_entry_add(cpu, MSR_IA32_FRED_RSP3, env->fred_rsp3); + kvm_msr_entry_add(cpu, MSR_IA32_FRED_STKLVLS, env->fred_stklvls); + kvm_msr_entry_add(cpu, MSR_IA32_FRED_SSP1, env->fred_ssp1); + kvm_msr_entry_add(cpu, MSR_IA32_FRED_SSP2, env->fred_ssp2); + kvm_msr_entry_add(cpu, MSR_IA32_FRED_SSP3, env->fred_ssp3); + kvm_msr_entry_add(cpu, MSR_IA32_FRED_CONFIG, env->fred_config); + } } #endif @@ -3848,6 +3884,17 @@ static int kvm_get_msrs(X86CPU *cpu) kvm_msr_entry_add(cpu, MSR_KERNELGSBASE, 0); kvm_msr_entry_add(cpu, MSR_FMASK, 0); kvm_msr_entry_add(cpu, MSR_LSTAR, 0); + if (env->features[FEAT_7_1_EAX] & CPUID_7_1_EAX_FRED) { + kvm_msr_entry_add(cpu, MSR_IA32_FRED_RSP0, 0); + kvm_msr_entry_add(cpu, MSR_IA32_FRED_RSP1, 0); + kvm_msr_entry_add(cpu, MSR_IA32_FRED_RSP2, 0); + kvm_msr_entry_add(cpu, MSR_IA32_FRED_RSP3, 0); + kvm_msr_entry_add(cpu, MSR_IA32_FRED_STKLVLS, 0); + kvm_msr_entry_add(cpu, MSR_IA32_FRED_SSP1, 0); + kvm_msr_entry_add(cpu, MSR_IA32_FRED_SSP2, 0); + kvm_msr_entry_add(cpu, MSR_IA32_FRED_SSP3, 0); + kvm_msr_entry_add(cpu, MSR_IA32_FRED_CONFIG, 0); + } } #endif kvm_msr_entry_add(cpu, MSR_KVM_SYSTEM_TIME, 0); @@ -4069,6 +4116,33 @@ static int kvm_get_msrs(X86CPU *cpu) case MSR_LSTAR: env->lstar = msrs[i].data; break; + case MSR_IA32_FRED_RSP0: + env->fred_rsp0 = msrs[i].data; + break; + case MSR_IA32_FRED_RSP1: + env->fred_rsp1 = msrs[i].data; + break; + case MSR_IA32_FRED_RSP2: + env->fred_rsp2 = msrs[i].data; + break; + case MSR_IA32_FRED_RSP3: + env->fred_rsp3 = msrs[i].data; + break; + case MSR_IA32_FRED_STKLVLS: + env->fred_stklvls = msrs[i].data; + break; + case MSR_IA32_FRED_SSP1: + env->fred_ssp1 = msrs[i].data; + break; + case MSR_IA32_FRED_SSP2: + env->fred_ssp2 = msrs[i].data; + break; + case MSR_IA32_FRED_SSP3: + env->fred_ssp3 = msrs[i].data; + break; + case MSR_IA32_FRED_CONFIG: + env->fred_config = msrs[i].data; + break; #endif case MSR_IA32_TSC: env->tsc = msrs[i].data; diff --git a/target/i386/machine.c b/target/i386/machine.c index c3ae320..39f8294 100644 --- a/target/i386/machine.c +++ b/target/i386/machine.c @@ -1544,6 +1544,33 @@ static const VMStateDescription vmstate_msr_xfd = { }; #ifdef TARGET_X86_64 +static bool intel_fred_msrs_needed(void *opaque) +{ + X86CPU *cpu = opaque; + CPUX86State *env = &cpu->env; + + return !!(env->features[FEAT_7_1_EAX] & CPUID_7_1_EAX_FRED); +} + +static const VMStateDescription vmstate_msr_fred = { + .name = "cpu/fred", + .version_id = 1, + .minimum_version_id = 1, + .needed = intel_fred_msrs_needed, + .fields = (VMStateField[]) { + VMSTATE_UINT64(env.fred_rsp0, X86CPU), + VMSTATE_UINT64(env.fred_rsp1, X86CPU), + VMSTATE_UINT64(env.fred_rsp2, X86CPU), + VMSTATE_UINT64(env.fred_rsp3, X86CPU), + VMSTATE_UINT64(env.fred_stklvls, X86CPU), + VMSTATE_UINT64(env.fred_ssp1, X86CPU), + VMSTATE_UINT64(env.fred_ssp2, X86CPU), + VMSTATE_UINT64(env.fred_ssp3, X86CPU), + VMSTATE_UINT64(env.fred_config, X86CPU), + VMSTATE_END_OF_LIST() + } + }; + static bool amx_xtile_needed(void *opaque) { X86CPU *cpu = opaque; @@ -1747,6 +1774,7 @@ const VMStateDescription vmstate_x86_cpu = { &vmstate_pdptrs, &vmstate_msr_xfd, #ifdef TARGET_X86_64 + &vmstate_msr_fred, &vmstate_amx_xtile, #endif &vmstate_arch_lbr, diff --git a/target/i386/tcg/decode-new.c.inc b/target/i386/tcg/decode-new.c.inc index 0ff0866..c2d8da8 100644 --- a/target/i386/tcg/decode-new.c.inc +++ b/target/i386/tcg/decode-new.c.inc @@ -1359,6 +1359,19 @@ static void decode_group11(DisasContext *s, CPUX86State *env, X86OpEntry *entry, } } +static void decode_90(DisasContext *s, CPUX86State *env, X86OpEntry *entry, uint8_t *b) +{ + static X86OpEntry pause = X86_OP_ENTRY0(PAUSE, svm(PAUSE)); + static X86OpEntry nop = X86_OP_ENTRY0(NOP); + static X86OpEntry xchg_ax = X86_OP_ENTRY2(XCHG, 0,v, LoBits,v); + + if (REX_B(s)) { + *entry = xchg_ax; + } else { + *entry = (s->prefix & PREFIX_REPZ) ? pause : nop; + } +} + static const X86OpEntry opcodes_root[256] = { [0x00] = X86_OP_ENTRY2(ADD, E,b, G,b, lock), [0x01] = X86_OP_ENTRY2(ADD, E,v, G,v, lock), @@ -1441,7 +1454,7 @@ static const X86OpEntry opcodes_root[256] = { [0x86] = X86_OP_ENTRY2(XCHG, E,b, G,b, xchg), [0x87] = X86_OP_ENTRY2(XCHG, E,v, G,v, xchg), - [0x90] = X86_OP_ENTRY2(XCHG, 0,v, LoBits,v), + [0x90] = X86_OP_GROUP0(90), [0x91] = X86_OP_ENTRY2(XCHG, 0,v, LoBits,v), [0x92] = X86_OP_ENTRY2(XCHG, 0,v, LoBits,v), [0x93] = X86_OP_ENTRY2(XCHG, 0,v, LoBits,v), @@ -1496,7 +1509,7 @@ static const X86OpEntry opcodes_root[256] = { [0xE7] = X86_OP_ENTRYrr(OUT, 0,v, I_unsigned,b), /* AX/EAX */ [0xF1] = X86_OP_ENTRY0(INT1, svm(ICEBP)), - [0xF4] = X86_OP_ENTRY0(HLT, chk(cpl0)), + [0xF4] = X86_OP_ENTRY0(HLT, chk(cpl0) svm(HLT)), [0xF5] = X86_OP_ENTRY0(CMC), [0xF6] = X86_OP_GROUP1(group3, E,b), [0xF7] = X86_OP_GROUP1(group3, E,v), @@ -2539,7 +2552,7 @@ static void disas_insn(DisasContext *s, CPUState *cpu) /* * Checks that result in #GP or VMEXIT come second. Intercepts are - * generally checked after non-memory exceptions (i.e. before all + * generally checked after non-memory exceptions (i.e. after all * exceptions if there is no memory operand). Exceptions are * vm86 checks (INTn, IRET, PUSHF/POPF), RSM and XSETBV (!). * diff --git a/target/i386/tcg/emit.c.inc b/target/i386/tcg/emit.c.inc index e990141..4be3d9a 100644 --- a/target/i386/tcg/emit.c.inc +++ b/target/i386/tcg/emit.c.inc @@ -1638,8 +1638,8 @@ static void gen_HLT(DisasContext *s, CPUX86State *env, X86DecodedInsn *decode) { #ifdef CONFIG_SYSTEM_ONLY gen_update_cc_op(s); - gen_update_eip_cur(s); - gen_helper_hlt(tcg_env, cur_insn_len_i32(s)); + gen_update_eip_next(s); + gen_helper_hlt(tcg_env); s->base.is_jmp = DISAS_NORETURN; #endif } @@ -1858,7 +1858,10 @@ static void gen_INT(DisasContext *s, CPUX86State *env, X86DecodedInsn *decode) static void gen_INT1(DisasContext *s, CPUX86State *env, X86DecodedInsn *decode) { - gen_exception(s, EXCP01_DB); + gen_update_cc_op(s); + gen_update_eip_next(s); + gen_helper_icebp(tcg_env); + s->base.is_jmp = DISAS_NORETURN; } static void gen_INT3(DisasContext *s, CPUX86State *env, X86DecodedInsn *decode) @@ -2347,6 +2350,14 @@ static void gen_PANDN(DisasContext *s, CPUX86State *env, X86DecodedInsn *decode) decode->op[1].offset, vec_len, vec_len); } +static void gen_PAUSE(DisasContext *s, CPUX86State *env, X86DecodedInsn *decode) +{ + gen_update_cc_op(s); + gen_update_eip_next(s); + gen_helper_pause(tcg_env); + s->base.is_jmp = DISAS_NORETURN; +} + static void gen_PCMPESTRI(DisasContext *s, CPUX86State *env, X86DecodedInsn *decode) { TCGv_i32 imm = tcg_constant8u_i32(decode->immediate); @@ -2564,12 +2575,14 @@ static void gen_PMOVMSKB(DisasContext *s, CPUX86State *env, X86DecodedInsn *deco static void gen_POP(DisasContext *s, CPUX86State *env, X86DecodedInsn *decode) { + X86DecodedOp *op = &decode->op[0]; MemOp ot = gen_pop_T0(s); - if (decode->op[0].has_ea) { + + if (op->has_ea || op->unit == X86_OP_SEG) { /* NOTE: order is important for MMU exceptions */ - gen_op_st_v(s, ot, s->T0, s->A0); - decode->op[0].unit = X86_OP_SKIP; + gen_writeback(s, decode, 0, s->T0); } + /* NOTE: writing back registers after update is important for pop %sp */ gen_pop_update(s, ot); } @@ -4011,18 +4024,6 @@ static void gen_WAIT(DisasContext *s, CPUX86State *env, X86DecodedInsn *decode) static void gen_XCHG(DisasContext *s, CPUX86State *env, X86DecodedInsn *decode) { - if (decode->b == 0x90 && !REX_B(s)) { - if (s->prefix & PREFIX_REPZ) { - gen_update_cc_op(s); - gen_update_eip_cur(s); - gen_helper_pause(tcg_env, cur_insn_len_i32(s)); - s->base.is_jmp = DISAS_NORETURN; - } - /* No writeback. */ - decode->op[0].unit = X86_OP_SKIP; - return; - } - if (s->prefix & PREFIX_LOCK) { tcg_gen_atomic_xchg_tl(s->T0, s->A0, s->T1, s->mem_index, decode->op[0].ot | MO_LE); diff --git a/target/i386/tcg/excp_helper.c b/target/i386/tcg/excp_helper.c index 65e37ae..72387aa 100644 --- a/target/i386/tcg/excp_helper.c +++ b/target/i386/tcg/excp_helper.c @@ -140,6 +140,26 @@ G_NORETURN void raise_exception_ra(CPUX86State *env, int exception_index, raise_interrupt2(env, exception_index, 0, 0, 0, retaddr); } +G_NORETURN void helper_icebp(CPUX86State *env) +{ + CPUState *cs = env_cpu(env); + + do_end_instruction(env); + + /* + * INT1 aka ICEBP generates a trap-like #DB, but it is pretty special. + * + * "Although the ICEBP instruction dispatches through IDT vector 1, + * that event is not interceptable by means of the #DB exception + * intercept". Instead there is a separate fault-like ICEBP intercept. + */ + cs->exception_index = EXCP01_DB; + env->error_code = 0; + env->exception_is_int = 0; + env->exception_next_eip = env->eip; + cpu_loop_exit(cs); +} + G_NORETURN void handle_unaligned_access(CPUX86State *env, vaddr vaddr, MMUAccessType access_type, uintptr_t retaddr) diff --git a/target/i386/tcg/helper-tcg.h b/target/i386/tcg/helper-tcg.h index 8595794..15d6c6f 100644 --- a/target/i386/tcg/helper-tcg.h +++ b/target/i386/tcg/helper-tcg.h @@ -91,7 +91,6 @@ extern const uint8_t parity_table[256]; /* misc_helper.c */ void cpu_load_eflags(CPUX86State *env, int eflags, int update_mask); -G_NORETURN void do_pause(CPUX86State *env); /* sysemu/svm_helper.c */ #ifndef CONFIG_USER_ONLY @@ -111,7 +110,17 @@ int exception_has_error_code(int intno); /* smm_helper.c */ void do_smm_enter(X86CPU *cpu); -/* bpt_helper.c */ +/* sysemu/bpt_helper.c */ bool check_hw_breakpoints(CPUX86State *env, bool force_dr6_update); +/* + * Do the tasks usually performed by gen_eob(). Callers of this function + * should also handle TF as appropriate. + */ +static inline void do_end_instruction(CPUX86State *env) +{ + /* needed if sti is just before */ + env->hflags &= ~HF_INHIBIT_IRQ_MASK; + env->eflags &= ~HF_RF_MASK; +} #endif /* I386_HELPER_TCG_H */ diff --git a/target/i386/tcg/misc_helper.c b/target/i386/tcg/misc_helper.c index b0f0f7b..ed4cda8 100644 --- a/target/i386/tcg/misc_helper.c +++ b/target/i386/tcg/misc_helper.c @@ -88,23 +88,19 @@ G_NORETURN void helper_rdpmc(CPUX86State *env) raise_exception_err(env, EXCP06_ILLOP, 0); } -G_NORETURN void do_pause(CPUX86State *env) +G_NORETURN void helper_pause(CPUX86State *env) { CPUState *cs = env_cpu(env); + /* Do gen_eob() tasks before going back to the main loop. */ + do_end_instruction(env); + helper_rechecking_single_step(env); + /* Just let another CPU run. */ cs->exception_index = EXCP_INTERRUPT; cpu_loop_exit(cs); } -G_NORETURN void helper_pause(CPUX86State *env, int next_eip_addend) -{ - cpu_svm_check_intercept_param(env, SVM_EXIT_PAUSE, 0, GETPC()); - env->eip += next_eip_addend; - - do_pause(env); -} - uint64_t helper_rdpkru(CPUX86State *env, uint32_t ecx) { if ((env->cr[4] & CR4_PKE_MASK) == 0) { diff --git a/target/i386/tcg/seg_helper.c b/target/i386/tcg/seg_helper.c index 0301459..715db1f 100644 --- a/target/i386/tcg/seg_helper.c +++ b/target/i386/tcg/seg_helper.c @@ -526,6 +526,24 @@ static inline unsigned int get_sp_mask(unsigned int e2) } } +static int exception_is_fault(int intno) +{ + switch (intno) { + /* + * #DB can be both fault- and trap-like, but it never sets RF=1 + * in the RFLAGS value pushed on the stack. + */ + case EXCP01_DB: + case EXCP03_INT3: + case EXCP04_INTO: + case EXCP08_DBLE: + case EXCP12_MCHK: + return 0; + } + /* Everything else including reserved exception is a fault. */ + return 1; +} + int exception_has_error_code(int intno) { switch (intno) { @@ -605,8 +623,9 @@ static void do_interrupt_protected(CPUX86State *env, int intno, int is_int, int type, dpl, selector, ss_dpl, cpl; int has_error_code, new_stack, shift; uint32_t e1, e2, offset, ss = 0, esp, ss_e1 = 0, ss_e2 = 0; - uint32_t old_eip, sp_mask; + uint32_t old_eip, sp_mask, eflags; int vm86 = env->eflags & VM_MASK; + bool set_rf; has_error_code = 0; if (!is_int && !is_hw) { @@ -614,8 +633,10 @@ static void do_interrupt_protected(CPUX86State *env, int intno, int is_int, } if (is_int) { old_eip = next_eip; + set_rf = false; } else { old_eip = env->eip; + set_rf = exception_is_fault(intno); } dt = &env->idt; @@ -748,6 +769,15 @@ static void do_interrupt_protected(CPUX86State *env, int intno, int is_int, } push_size <<= shift; #endif + eflags = cpu_compute_eflags(env); + /* + * AMD states that code breakpoint #DBs clear RF=0, Intel leaves it + * as is. AMD behavior could be implemented in check_hw_breakpoints(). + */ + if (set_rf) { + eflags |= RF_MASK; + } + if (shift == 1) { if (new_stack) { if (vm86) { @@ -759,7 +789,7 @@ static void do_interrupt_protected(CPUX86State *env, int intno, int is_int, PUSHL(ssp, esp, sp_mask, env->segs[R_SS].selector); PUSHL(ssp, esp, sp_mask, env->regs[R_ESP]); } - PUSHL(ssp, esp, sp_mask, cpu_compute_eflags(env)); + PUSHL(ssp, esp, sp_mask, eflags); PUSHL(ssp, esp, sp_mask, env->segs[R_CS].selector); PUSHL(ssp, esp, sp_mask, old_eip); if (has_error_code) { @@ -776,7 +806,7 @@ static void do_interrupt_protected(CPUX86State *env, int intno, int is_int, PUSHW(ssp, esp, sp_mask, env->segs[R_SS].selector); PUSHW(ssp, esp, sp_mask, env->regs[R_ESP]); } - PUSHW(ssp, esp, sp_mask, cpu_compute_eflags(env)); + PUSHW(ssp, esp, sp_mask, eflags); PUSHW(ssp, esp, sp_mask, env->segs[R_CS].selector); PUSHW(ssp, esp, sp_mask, old_eip); if (has_error_code) { @@ -868,8 +898,9 @@ static void do_interrupt64(CPUX86State *env, int intno, int is_int, target_ulong ptr; int type, dpl, selector, cpl, ist; int has_error_code, new_stack; - uint32_t e1, e2, e3, ss; + uint32_t e1, e2, e3, ss, eflags; target_ulong old_eip, esp, offset; + bool set_rf; has_error_code = 0; if (!is_int && !is_hw) { @@ -877,8 +908,10 @@ static void do_interrupt64(CPUX86State *env, int intno, int is_int, } if (is_int) { old_eip = next_eip; + set_rf = false; } else { old_eip = env->eip; + set_rf = exception_is_fault(intno); } dt = &env->idt; @@ -950,9 +983,15 @@ static void do_interrupt64(CPUX86State *env, int intno, int is_int, } esp &= ~0xfLL; /* align stack */ + /* See do_interrupt_protected. */ + eflags = cpu_compute_eflags(env); + if (set_rf) { + eflags |= RF_MASK; + } + PUSHQ(esp, env->segs[R_SS].selector); PUSHQ(esp, env->regs[R_ESP]); - PUSHQ(esp, cpu_compute_eflags(env)); + PUSHQ(esp, eflags); PUSHQ(esp, env->segs[R_CS].selector); PUSHQ(esp, old_eip); if (has_error_code) { diff --git a/target/i386/tcg/sysemu/bpt_helper.c b/target/i386/tcg/sysemu/bpt_helper.c index 4d96a48..b29acf4 100644 --- a/target/i386/tcg/sysemu/bpt_helper.c +++ b/target/i386/tcg/sysemu/bpt_helper.c @@ -215,6 +215,12 @@ void breakpoint_handler(CPUState *cs) if (cs->watchpoint_hit->flags & BP_CPU) { cs->watchpoint_hit = NULL; if (check_hw_breakpoints(env, false)) { + /* + * FIXME: #DB should be delayed by one instruction if + * INHIBIT_IRQ is set (STI cannot trigger a watchpoint). + * The delayed #DB should also fuse with one generated + * by ICEBP (aka INT1). + */ raise_exception(env, EXCP01_DB); } else { cpu_loop_exit_noexc(cs); @@ -238,6 +244,12 @@ target_ulong helper_get_dr(CPUX86State *env, int reg) } } + if (env->dr[7] & DR7_GD) { + env->dr[7] &= ~DR7_GD; + env->dr[6] |= DR6_BD; + raise_exception_ra(env, EXCP01_DB, GETPC()); + } + return env->dr[reg]; } @@ -251,6 +263,12 @@ void helper_set_dr(CPUX86State *env, int reg, target_ulong t0) } } + if (env->dr[7] & DR7_GD) { + env->dr[7] &= ~DR7_GD; + env->dr[6] |= DR6_BD; + raise_exception_ra(env, EXCP01_DB, GETPC()); + } + if (reg < 4) { if (hw_breakpoint_enabled(env->dr[7], reg) && hw_breakpoint_type(env->dr[7], reg) != DR7_TYPE_IO_RW) { diff --git a/target/i386/tcg/sysemu/misc_helper.c b/target/i386/tcg/sysemu/misc_helper.c index edb7c3d..7fa0c5a 100644 --- a/target/i386/tcg/sysemu/misc_helper.c +++ b/target/i386/tcg/sysemu/misc_helper.c @@ -516,25 +516,16 @@ void helper_flush_page(CPUX86State *env, target_ulong addr) tlb_flush_page(env_cpu(env), addr); } -static G_NORETURN -void do_hlt(CPUX86State *env) +G_NORETURN void helper_hlt(CPUX86State *env) { CPUState *cs = env_cpu(env); - env->hflags &= ~HF_INHIBIT_IRQ_MASK; /* needed if sti is just before */ + do_end_instruction(env); cs->halted = 1; cs->exception_index = EXCP_HLT; cpu_loop_exit(cs); } -G_NORETURN void helper_hlt(CPUX86State *env, int next_eip_addend) -{ - cpu_svm_check_intercept_param(env, SVM_EXIT_HLT, 0, GETPC()); - env->eip += next_eip_addend; - - do_hlt(env); -} - void helper_monitor(CPUX86State *env, target_ulong ptr) { if ((uint32_t)env->regs[R_ECX] != 0) { @@ -556,8 +547,8 @@ G_NORETURN void helper_mwait(CPUX86State *env, int next_eip_addend) /* XXX: not complete but not completely erroneous */ if (cs->cpu_index != 0 || CPU_NEXT(cs) != NULL) { - do_pause(env); + helper_pause(env); } else { - do_hlt(env); + helper_hlt(env); } } diff --git a/target/i386/tcg/sysemu/seg_helper.c b/target/i386/tcg/sysemu/seg_helper.c index 9ba94de..05174a7 100644 --- a/target/i386/tcg/sysemu/seg_helper.c +++ b/target/i386/tcg/sysemu/seg_helper.c @@ -130,15 +130,26 @@ void x86_cpu_do_interrupt(CPUState *cs) bool x86_cpu_exec_halt(CPUState *cpu) { - if (cpu->interrupt_request & CPU_INTERRUPT_POLL) { - X86CPU *x86_cpu = X86_CPU(cpu); + X86CPU *x86_cpu = X86_CPU(cpu); + CPUX86State *env = &x86_cpu->env; + if (cpu->interrupt_request & CPU_INTERRUPT_POLL) { bql_lock(); apic_poll_irq(x86_cpu->apic_state); cpu_reset_interrupt(cpu, CPU_INTERRUPT_POLL); bql_unlock(); } - return cpu_has_work(cpu); + + if (!cpu_has_work(cpu)) { + return false; + } + + /* Complete HLT instruction. */ + if (env->eflags & TF_MASK) { + env->dr[6] |= DR6_BS; + do_interrupt_all(x86_cpu, EXCP01_DB, 0, 0, env->eip, 0); + } + return true; } bool x86_need_replay_interrupt(int interrupt_request) diff --git a/target/i386/tcg/sysemu/svm_helper.c b/target/i386/tcg/sysemu/svm_helper.c index 5d6de22..9db8ad6 100644 --- a/target/i386/tcg/sysemu/svm_helper.c +++ b/target/i386/tcg/sysemu/svm_helper.c @@ -163,6 +163,8 @@ void helper_vmrun(CPUX86State *env, int aflag, int next_eip_addend) uint64_t new_cr0; uint64_t new_cr3; uint64_t new_cr4; + uint64_t new_dr6; + uint64_t new_dr7; if (aflag == 2) { addr = env->regs[R_EAX]; @@ -252,6 +254,13 @@ void helper_vmrun(CPUX86State *env, int aflag, int next_eip_addend) control.intercept_exceptions )); + env->hflags &= ~HF_INHIBIT_IRQ_MASK; + if (x86_ldl_phys(cs, env->vm_vmcb + + offsetof(struct vmcb, control.int_state)) & + SVM_INTERRUPT_SHADOW_MASK) { + env->hflags |= HF_INHIBIT_IRQ_MASK; + } + nested_ctl = x86_ldq_phys(cs, env->vm_vmcb + offsetof(struct vmcb, control.nested_ctl)); asid = x86_ldq_phys(cs, env->vm_vmcb + offsetof(struct vmcb, @@ -361,20 +370,22 @@ void helper_vmrun(CPUX86State *env, int aflag, int next_eip_addend) env->vm_vmcb + offsetof(struct vmcb, save.rsp)); env->regs[R_EAX] = x86_ldq_phys(cs, env->vm_vmcb + offsetof(struct vmcb, save.rax)); - env->dr[7] = x86_ldq_phys(cs, - env->vm_vmcb + offsetof(struct vmcb, save.dr7)); - env->dr[6] = x86_ldq_phys(cs, - env->vm_vmcb + offsetof(struct vmcb, save.dr6)); + + new_dr7 = x86_ldq_phys(cs, env->vm_vmcb + offsetof(struct vmcb, save.dr7)); + new_dr6 = x86_ldq_phys(cs, env->vm_vmcb + offsetof(struct vmcb, save.dr6)); #ifdef TARGET_X86_64 - if (env->dr[6] & DR_RESERVED_MASK) { + if (new_dr7 & DR_RESERVED_MASK) { cpu_vmexit(env, SVM_EXIT_ERR, 0, GETPC()); } - if (env->dr[7] & DR_RESERVED_MASK) { + if (new_dr6 & DR_RESERVED_MASK) { cpu_vmexit(env, SVM_EXIT_ERR, 0, GETPC()); } #endif + cpu_x86_update_dr7(env, new_dr7); + env->dr[6] = new_dr6; + if (is_efer_invalid_state(env)) { cpu_vmexit(env, SVM_EXIT_ERR, 0, GETPC()); } @@ -811,8 +822,12 @@ void do_vmexit(CPUX86State *env) env->hflags &= ~HF_GUEST_MASK; env->intercept = 0; env->intercept_exceptions = 0; + + /* Clears the V_IRQ and V_INTR_MASKING bits inside the processor. */ cs->interrupt_request &= ~CPU_INTERRUPT_VIRQ; env->int_ctl = 0; + + /* Clears the TSC_OFFSET inside the processor. */ env->tsc_offset = 0; env->gdt.base = x86_ldq_phys(cs, env->vm_hsave + offsetof(struct vmcb, @@ -832,6 +847,15 @@ void do_vmexit(CPUX86State *env) cpu_x86_update_cr4(env, x86_ldq_phys(cs, env->vm_hsave + offsetof(struct vmcb, save.cr4))); + + /* + * Resets the current ASID register to zero (host ASID; TLB flush). + * + * If the host is in PAE mode, the processor reloads the host's PDPEs + * from the page table indicated the host's CR3. FIXME: If the PDPEs + * contain illegal state, the processor causes a shutdown (QEMU does + * not implement PDPTRs). + */ cpu_x86_update_cr3(env, x86_ldq_phys(cs, env->vm_hsave + offsetof(struct vmcb, save.cr3))); @@ -839,12 +863,14 @@ void do_vmexit(CPUX86State *env) set properly */ cpu_load_efer(env, x86_ldq_phys(cs, env->vm_hsave + offsetof(struct vmcb, save.efer))); + + /* Completion of the VMRUN instruction clears the host EFLAGS.RF bit. */ env->eflags = 0; cpu_load_eflags(env, x86_ldq_phys(cs, env->vm_hsave + offsetof(struct vmcb, save.rflags)), ~(CC_O | CC_S | CC_Z | CC_A | CC_P | CC_C | DF_MASK | - VM_MASK)); + RF_MASK | VM_MASK)); svm_load_seg_cache(env, MMU_PHYS_IDX, env->vm_hsave + offsetof(struct vmcb, save.es), R_ES); @@ -864,8 +890,11 @@ void do_vmexit(CPUX86State *env) env->dr[6] = x86_ldq_phys(cs, env->vm_hsave + offsetof(struct vmcb, save.dr6)); - env->dr[7] = x86_ldq_phys(cs, - env->vm_hsave + offsetof(struct vmcb, save.dr7)); + + /* Disables all breakpoints in the host DR7 register. */ + cpu_x86_update_dr7(env, + x86_ldq_phys(cs, + env->vm_hsave + offsetof(struct vmcb, save.dr7)) & ~0xff); /* other setups */ x86_stl_phys(cs, @@ -881,21 +910,17 @@ void do_vmexit(CPUX86State *env) env->hflags2 &= ~HF2_GIF_MASK; env->hflags2 &= ~HF2_VGIF_MASK; - /* FIXME: Resets the current ASID register to zero (host ASID). */ - - /* Clears the V_IRQ and V_INTR_MASKING bits inside the processor. */ - /* Clears the TSC_OFFSET inside the processor. */ - - /* If the host is in PAE mode, the processor reloads the host's PDPEs - from the page table indicated the host's CR3. If the PDPEs contain - illegal state, the processor causes a shutdown. */ - /* Disables all breakpoints in the host DR7 register. */ + /* FIXME: Checks the reloaded host state for consistency. */ - /* Checks the reloaded host state for consistency. */ - - /* If the host's rIP reloaded by #VMEXIT is outside the limit of the - host's code segment or non-canonical (in the case of long mode), a - #GP fault is delivered inside the host. */ + /* + * EFLAGS.TF causes a #DB trap after the VMRUN completes on the host + * side (i.e., after the #VMEXIT from the guest). Since we're running + * in the main loop, call do_interrupt_all directly. + */ + if ((env->eflags & TF_MASK) != 0) { + env->dr[6] |= DR6_BS; + do_interrupt_all(X86_CPU(cs), EXCP01_DB, 0, 0, env->eip, 0); + } } diff --git a/target/i386/tcg/translate.c b/target/i386/tcg/translate.c index 0486ab6..fcba9c1 100644 --- a/target/i386/tcg/translate.c +++ b/target/i386/tcg/translate.c @@ -549,6 +549,19 @@ static inline void gen_op_st_rm_T0_A0(DisasContext *s, int idx, int d) } } +static void gen_update_eip_next(DisasContext *s) +{ + assert(s->pc_save != -1); + if (tb_cflags(s->base.tb) & CF_PCREL) { + tcg_gen_addi_tl(cpu_eip, cpu_eip, s->pc - s->pc_save); + } else if (CODE64(s)) { + tcg_gen_movi_tl(cpu_eip, s->pc); + } else { + tcg_gen_movi_tl(cpu_eip, (uint32_t)(s->pc - s->cs_base)); + } + s->pc_save = s->pc; +} + static void gen_update_eip_cur(DisasContext *s) { assert(s->pc_save != -1); @@ -2125,7 +2138,7 @@ static void gen_enter(DisasContext *s, int esp_addend, int level) } /* Copy the FrameTemp value to EBP. */ - gen_op_mov_reg_v(s, a_ot, R_EBP, s->T1); + gen_op_mov_reg_v(s, d_ot, R_EBP, s->T1); /* Compute the final value of ESP. */ tcg_gen_subi_tl(s->T1, s->T1, esp_addend + size * level); @@ -3732,6 +3745,11 @@ static void disas_insn_old(DisasContext *s, CPUState *cpu, int b) } gen_update_cc_op(s); gen_update_eip_cur(s); + /* + * Reloads INHIBIT_IRQ mask as well as TF and RF with guest state. + * The usual gen_eob() handling is performed on vmexit after + * host state is reloaded. + */ gen_helper_vmrun(tcg_env, tcg_constant_i32(s->aflag - 1), cur_insn_len_i32(s)); tcg_gen_exit_tb(NULL, 0); @@ -4630,6 +4648,14 @@ static void i386_tr_init_disas_context(DisasContextBase *dcbase, CPUState *cpu) * If jmp_opt, we want to handle each string instruction individually. * For icount also disable repz optimization so that each iteration * is accounted separately. + * + * FIXME: this is messy; it makes REP string instructions a lot less + * efficient than they should be and it gets in the way of correct + * handling of RF (interrupts or traps arriving after any iteration + * of a repeated string instruction but the last should set RF to 1). + * Perhaps it would be more efficient if REP string instructions were + * always at the beginning of the TB, or even their own TB? That + * would even allow accounting up to 64k iterations at once for icount. */ dc->repz_opt = !dc->jmp_opt && !(cflags & CF_USE_ICOUNT); @@ -4735,6 +4761,17 @@ static void i386_tr_tb_stop(DisasContextBase *dcbase, CPUState *cpu) switch (dc->base.is_jmp) { case DISAS_NORETURN: + /* + * Most instructions should not use DISAS_NORETURN, as that suppresses + * the handling of hflags normally done by gen_eob(). We can + * get here: + * - for exception and interrupts + * - for jump optimization (which is disabled by INHIBIT_IRQ/RF/TF) + * - for VMRUN because RF/TF handling for the host is done after vmexit, + * and INHIBIT_IRQ is loaded from the VMCB + * - for HLT/PAUSE/MWAIT to exit the main loop with specific EXCP_* values; + * the helpers handle themselves the tasks normally done by gen_eob(). + */ break; case DISAS_TOO_MANY: gen_update_cc_op(dc); |