diff options
-rw-r--r-- | cpus.c | 81 | ||||
-rw-r--r-- | kvm-stub.c | 5 | ||||
-rw-r--r-- | kvm.h | 3 | ||||
-rw-r--r-- | target-i386/cpu.h | 20 | ||||
-rw-r--r-- | target-i386/helper.c | 2 | ||||
-rw-r--r-- | target-i386/kvm.c | 178 | ||||
-rw-r--r-- | target-i386/kvm_x86.h | 3 |
7 files changed, 278 insertions, 14 deletions
@@ -34,6 +34,9 @@ #include "cpus.h" #include "compatfd.h" +#ifdef CONFIG_LINUX +#include <sys/prctl.h> +#endif #ifdef SIGRTMIN #define SIG_IPI (SIGRTMIN+4) @@ -41,6 +44,10 @@ #define SIG_IPI SIGUSR1 #endif +#ifndef PR_MCE_KILL +#define PR_MCE_KILL 33 +#endif + static CPUState *next_cpu; /***********************************************************/ @@ -498,28 +505,77 @@ static void qemu_tcg_wait_io_event(void) } } +static void sigbus_reraise(void) +{ + sigset_t set; + struct sigaction action; + + memset(&action, 0, sizeof(action)); + action.sa_handler = SIG_DFL; + if (!sigaction(SIGBUS, &action, NULL)) { + raise(SIGBUS); + sigemptyset(&set); + sigaddset(&set, SIGBUS); + sigprocmask(SIG_UNBLOCK, &set, NULL); + } + perror("Failed to re-raise SIGBUS!\n"); + abort(); +} + +static void sigbus_handler(int n, struct qemu_signalfd_siginfo *siginfo, + void *ctx) +{ +#if defined(TARGET_I386) + if (kvm_on_sigbus(siginfo->ssi_code, (void *)(intptr_t)siginfo->ssi_addr)) +#endif + sigbus_reraise(); +} + static void qemu_kvm_eat_signal(CPUState *env, int timeout) { struct timespec ts; int r, e; siginfo_t siginfo; sigset_t waitset; + sigset_t chkset; ts.tv_sec = timeout / 1000; ts.tv_nsec = (timeout % 1000) * 1000000; sigemptyset(&waitset); sigaddset(&waitset, SIG_IPI); + sigaddset(&waitset, SIGBUS); - qemu_mutex_unlock(&qemu_global_mutex); - r = sigtimedwait(&waitset, &siginfo, &ts); - e = errno; - qemu_mutex_lock(&qemu_global_mutex); + do { + qemu_mutex_unlock(&qemu_global_mutex); - if (r == -1 && !(e == EAGAIN || e == EINTR)) { - fprintf(stderr, "sigtimedwait: %s\n", strerror(e)); - exit(1); - } + r = sigtimedwait(&waitset, &siginfo, &ts); + e = errno; + + qemu_mutex_lock(&qemu_global_mutex); + + if (r == -1 && !(e == EAGAIN || e == EINTR)) { + fprintf(stderr, "sigtimedwait: %s\n", strerror(e)); + exit(1); + } + + switch (r) { + case SIGBUS: +#ifdef TARGET_I386 + if (kvm_on_sigbus_vcpu(env, siginfo.si_code, siginfo.si_addr)) +#endif + sigbus_reraise(); + break; + default: + break; + } + + r = sigpending(&chkset); + if (r == -1) { + fprintf(stderr, "sigpending: %s\n", strerror(e)); + exit(1); + } + } while (sigismember(&chkset, SIG_IPI) || sigismember(&chkset, SIGBUS)); } static void qemu_kvm_wait_io_event(CPUState *env) @@ -640,6 +696,7 @@ static void kvm_init_ipi(CPUState *env) pthread_sigmask(SIG_BLOCK, NULL, &set); sigdelset(&set, SIG_IPI); + sigdelset(&set, SIGBUS); r = kvm_set_signal_mask(env, &set); if (r) { fprintf(stderr, "kvm_set_signal_mask: %s\n", strerror(r)); @@ -650,6 +707,7 @@ static void kvm_init_ipi(CPUState *env) static sigset_t block_io_signals(void) { sigset_t set; + struct sigaction action; /* SIGUSR2 used by posix-aio-compat.c */ sigemptyset(&set); @@ -660,8 +718,15 @@ static sigset_t block_io_signals(void) sigaddset(&set, SIGIO); sigaddset(&set, SIGALRM); sigaddset(&set, SIG_IPI); + sigaddset(&set, SIGBUS); pthread_sigmask(SIG_BLOCK, &set, NULL); + memset(&action, 0, sizeof(action)); + action.sa_flags = SA_SIGINFO; + action.sa_sigaction = (void (*)(int, siginfo_t*, void*))sigbus_handler; + sigaction(SIGBUS, &action, NULL); + prctl(PR_MCE_KILL, 1, 1, 0, 0); + return set; } @@ -141,3 +141,8 @@ int kvm_set_ioeventfd_mmio_long(int fd, uint32_t adr, uint32_t val, bool assign) { return -ENOSYS; } + +int kvm_on_sigbus(int code, void *addr) +{ + return 1; +} @@ -110,6 +110,9 @@ int kvm_arch_init_vcpu(CPUState *env); void kvm_arch_reset_vcpu(CPUState *env); +int kvm_on_sigbus_vcpu(CPUState *env, int code, void *addr); +int kvm_on_sigbus(int code, void *addr); + struct kvm_guest_debug; struct kvm_debug_exit_arch; diff --git a/target-i386/cpu.h b/target-i386/cpu.h index 77eeab1..85ed30f 100644 --- a/target-i386/cpu.h +++ b/target-i386/cpu.h @@ -250,16 +250,32 @@ #define PG_ERROR_RSVD_MASK 0x08 #define PG_ERROR_I_D_MASK 0x10 -#define MCG_CTL_P (1UL<<8) /* MCG_CAP register available */ +#define MCG_CTL_P (1ULL<<8) /* MCG_CAP register available */ +#define MCG_SER_P (1ULL<<24) /* MCA recovery/new status bits */ -#define MCE_CAP_DEF MCG_CTL_P +#define MCE_CAP_DEF (MCG_CTL_P|MCG_SER_P) #define MCE_BANKS_DEF 10 +#define MCG_STATUS_RIPV (1ULL<<0) /* restart ip valid */ +#define MCG_STATUS_EIPV (1ULL<<1) /* ip points to correct instruction */ #define MCG_STATUS_MCIP (1ULL<<2) /* machine check in progress */ #define MCI_STATUS_VAL (1ULL<<63) /* valid error */ #define MCI_STATUS_OVER (1ULL<<62) /* previous errors lost */ #define MCI_STATUS_UC (1ULL<<61) /* uncorrected error */ +#define MCI_STATUS_EN (1ULL<<60) /* error enabled */ +#define MCI_STATUS_MISCV (1ULL<<59) /* misc error reg. valid */ +#define MCI_STATUS_ADDRV (1ULL<<58) /* addr reg. valid */ +#define MCI_STATUS_PCC (1ULL<<57) /* processor context corrupt */ +#define MCI_STATUS_S (1ULL<<56) /* Signaled machine check */ +#define MCI_STATUS_AR (1ULL<<55) /* Action required */ + +/* MISC register defines */ +#define MCM_ADDR_SEGOFF 0 /* segment offset */ +#define MCM_ADDR_LINEAR 1 /* linear address */ +#define MCM_ADDR_PHYS 2 /* physical address */ +#define MCM_ADDR_MEM 3 /* memory address */ +#define MCM_ADDR_GENERIC 7 /* generic */ #define MSR_IA32_TSC 0x10 #define MSR_IA32_APICBASE 0x1b diff --git a/target-i386/helper.c b/target-i386/helper.c index 4b430dd..4fff4a8 100644 --- a/target-i386/helper.c +++ b/target-i386/helper.c @@ -1032,7 +1032,7 @@ void cpu_inject_x86_mce(CPUState *cenv, int bank, uint64_t status, return; if (kvm_enabled()) { - kvm_inject_x86_mce(cenv, bank, status, mcg_status, addr, misc); + kvm_inject_x86_mce(cenv, bank, status, mcg_status, addr, misc, 0); return; } diff --git a/target-i386/kvm.c b/target-i386/kvm.c index 343fb02..0161f6d 100644 --- a/target-i386/kvm.c +++ b/target-i386/kvm.c @@ -46,6 +46,13 @@ #define MSR_KVM_WALL_CLOCK 0x11 #define MSR_KVM_SYSTEM_TIME 0x12 +#ifndef BUS_MCEERR_AR +#define BUS_MCEERR_AR 4 +#endif +#ifndef BUS_MCEERR_AO +#define BUS_MCEERR_AO 5 +#endif + #ifdef KVM_CAP_EXT_CPUID static struct kvm_cpuid2 *try_get_cpuid(KVMState *s, int max) @@ -192,10 +199,39 @@ static int kvm_set_mce(CPUState *env, struct kvm_x86_mce *m) return kvm_vcpu_ioctl(env, KVM_X86_SET_MCE, m); } +static int kvm_get_msr(CPUState *env, struct kvm_msr_entry *msrs, int n) +{ + struct kvm_msrs *kmsrs = qemu_malloc(sizeof *kmsrs + n * sizeof *msrs); + int r; + + kmsrs->nmsrs = n; + memcpy(kmsrs->entries, msrs, n * sizeof *msrs); + r = kvm_vcpu_ioctl(env, KVM_GET_MSRS, kmsrs); + memcpy(msrs, kmsrs->entries, n * sizeof *msrs); + free(kmsrs); + return r; +} + +/* FIXME: kill this and kvm_get_msr, use env->mcg_status instead */ +static int kvm_mce_in_exception(CPUState *env) +{ + struct kvm_msr_entry msr_mcg_status = { + .index = MSR_MCG_STATUS, + }; + int r; + + r = kvm_get_msr(env, &msr_mcg_status, 1); + if (r == -1 || r == 0) { + return -1; + } + return !!(msr_mcg_status.data & MCG_STATUS_MCIP); +} + struct kvm_x86_mce_data { CPUState *env; struct kvm_x86_mce *mce; + int abort_on_error; }; static void kvm_do_inject_x86_mce(void *_data) @@ -203,14 +239,26 @@ static void kvm_do_inject_x86_mce(void *_data) struct kvm_x86_mce_data *data = _data; int r; + /* If there is an MCE excpetion being processed, ignore this SRAO MCE */ + r = kvm_mce_in_exception(data->env); + if (r == -1) + fprintf(stderr, "Failed to get MCE status\n"); + else if (r && !(data->mce->status & MCI_STATUS_AR)) + return; + r = kvm_set_mce(data->env, data->mce); - if (r < 0) + if (r < 0) { perror("kvm_set_mce FAILED"); + if (data->abort_on_error) { + abort(); + } + } } #endif void kvm_inject_x86_mce(CPUState *cenv, int bank, uint64_t status, - uint64_t mcg_status, uint64_t addr, uint64_t misc) + uint64_t mcg_status, uint64_t addr, uint64_t misc, + int abort_on_error) { #ifdef KVM_CAP_MCE struct kvm_x86_mce mce = { @@ -225,7 +273,15 @@ void kvm_inject_x86_mce(CPUState *cenv, int bank, uint64_t status, .mce = &mce, }; + if (!cenv->mcg_cap) { + fprintf(stderr, "MCE support is not enabled!\n"); + return; + } + run_on_cpu(cenv, kvm_do_inject_x86_mce, &data); +#else + if (abort_on_error) + abort(); #endif } @@ -1528,3 +1584,121 @@ bool kvm_arch_stop_on_emulation_error(CPUState *env) ((env->segs[R_CS].selector & 3) != 3); } +static void hardware_memory_error(void) +{ + fprintf(stderr, "Hardware memory error!\n"); + exit(1); +} + +int kvm_on_sigbus_vcpu(CPUState *env, int code, void *addr) +{ +#if defined(KVM_CAP_MCE) + struct kvm_x86_mce mce = { + .bank = 9, + }; + void *vaddr; + ram_addr_t ram_addr; + target_phys_addr_t paddr; + int r; + + if ((env->mcg_cap & MCG_SER_P) && addr + && (code == BUS_MCEERR_AR + || code == BUS_MCEERR_AO)) { + if (code == BUS_MCEERR_AR) { + /* Fake an Intel architectural Data Load SRAR UCR */ + mce.status = MCI_STATUS_VAL | MCI_STATUS_UC | MCI_STATUS_EN + | MCI_STATUS_MISCV | MCI_STATUS_ADDRV | MCI_STATUS_S + | MCI_STATUS_AR | 0x134; + mce.misc = (MCM_ADDR_PHYS << 6) | 0xc; + mce.mcg_status = MCG_STATUS_MCIP | MCG_STATUS_EIPV; + } else { + /* + * If there is an MCE excpetion being processed, ignore + * this SRAO MCE + */ + r = kvm_mce_in_exception(env); + if (r == -1) { + fprintf(stderr, "Failed to get MCE status\n"); + } else if (r) { + return 0; + } + /* Fake an Intel architectural Memory scrubbing UCR */ + mce.status = MCI_STATUS_VAL | MCI_STATUS_UC | MCI_STATUS_EN + | MCI_STATUS_MISCV | MCI_STATUS_ADDRV | MCI_STATUS_S + | 0xc0; + mce.misc = (MCM_ADDR_PHYS << 6) | 0xc; + mce.mcg_status = MCG_STATUS_MCIP | MCG_STATUS_RIPV; + } + vaddr = (void *)addr; + if (qemu_ram_addr_from_host(vaddr, &ram_addr) || + !kvm_physical_memory_addr_from_ram(env->kvm_state, ram_addr, &paddr)) { + fprintf(stderr, "Hardware memory error for memory used by " + "QEMU itself instead of guest system!\n"); + /* Hope we are lucky for AO MCE */ + if (code == BUS_MCEERR_AO) { + return 0; + } else { + hardware_memory_error(); + } + } + mce.addr = paddr; + r = kvm_set_mce(env, &mce); + if (r < 0) { + fprintf(stderr, "kvm_set_mce: %s\n", strerror(errno)); + abort(); + } + } else +#endif + { + if (code == BUS_MCEERR_AO) { + return 0; + } else if (code == BUS_MCEERR_AR) { + hardware_memory_error(); + } else { + return 1; + } + } + return 0; +} + +int kvm_on_sigbus(int code, void *addr) +{ +#if defined(KVM_CAP_MCE) + if ((first_cpu->mcg_cap & MCG_SER_P) && addr && code == BUS_MCEERR_AO) { + uint64_t status; + void *vaddr; + ram_addr_t ram_addr; + target_phys_addr_t paddr; + CPUState *cenv; + + /* Hope we are lucky for AO MCE */ + vaddr = addr; + if (qemu_ram_addr_from_host(vaddr, &ram_addr) || + !kvm_physical_memory_addr_from_ram(first_cpu->kvm_state, ram_addr, &paddr)) { + fprintf(stderr, "Hardware memory error for memory used by " + "QEMU itself instead of guest system!: %p\n", addr); + return 0; + } + status = MCI_STATUS_VAL | MCI_STATUS_UC | MCI_STATUS_EN + | MCI_STATUS_MISCV | MCI_STATUS_ADDRV | MCI_STATUS_S + | 0xc0; + kvm_inject_x86_mce(first_cpu, 9, status, + MCG_STATUS_MCIP | MCG_STATUS_RIPV, paddr, + (MCM_ADDR_PHYS << 6) | 0xc, 1); + for (cenv = first_cpu->next_cpu; cenv != NULL; cenv = cenv->next_cpu) { + kvm_inject_x86_mce(cenv, 1, MCI_STATUS_VAL | MCI_STATUS_UC, + MCG_STATUS_MCIP | MCG_STATUS_RIPV, 0, 0, 1); + } + } else +#endif + { + if (code == BUS_MCEERR_AO) { + return 0; + } else if (code == BUS_MCEERR_AR) { + hardware_memory_error(); + } else { + return 1; + } + } + return 0; +} diff --git a/target-i386/kvm_x86.h b/target-i386/kvm_x86.h index c1ebd24..04932cf 100644 --- a/target-i386/kvm_x86.h +++ b/target-i386/kvm_x86.h @@ -16,6 +16,7 @@ #define __KVM_X86_H__ void kvm_inject_x86_mce(CPUState *cenv, int bank, uint64_t status, - uint64_t mcg_status, uint64_t addr, uint64_t misc); + uint64_t mcg_status, uint64_t addr, uint64_t misc, + int abort_on_error); #endif |