diff options
author | Peter Maydell <peter.maydell@linaro.org> | 2017-03-03 16:41:09 +0000 |
---|---|---|
committer | Peter Maydell <peter.maydell@linaro.org> | 2017-03-03 16:41:09 +0000 |
commit | 5febe7671f5ec0a6842d64edfb920feb7bbb5f1e (patch) | |
tree | 4983379122a9105f30b12f57c0449e3d1e684b5c | |
parent | 5b10b94bd53229540b088342015d69bc5ef2cc1d (diff) | |
parent | f6eb0b319e4bad3d01d74d71e3a6cf40f0ede720 (diff) | |
download | qemu-5febe7671f5ec0a6842d64edfb920feb7bbb5f1e.zip qemu-5febe7671f5ec0a6842d64edfb920feb7bbb5f1e.tar.gz qemu-5febe7671f5ec0a6842d64edfb920feb7bbb5f1e.tar.bz2 |
Merge remote-tracking branch 'remotes/bonzini/tags/for-upstream' into staging
* kernel header update (requested by David and Vijay)
* GuestPanicInformation fixups (Anton)
* record/replay icount fixes (Pavel)
* cpu-exec cleanup, unification of icount_decr with tcg_exit_req (me)
* KVM_CAP_IMMEDIATE_EXIT support (me)
* vmxcap update (me)
* iscsi locking fix (me)
* VFIO ram device fix (Yongji)
* scsi-hd vs. default CD-ROM (Hervé)
* SMI migration fix (Dave)
* spice-char segfault (Li Qiang)
* improved "info mtree -f" (me)
# gpg: Signature made Fri 03 Mar 2017 15:43:04 GMT
# gpg: using RSA key 0xBFFBD25F78C7AE83
# gpg: Good signature from "Paolo Bonzini <bonzini@gnu.org>"
# gpg: aka "Paolo Bonzini <pbonzini@redhat.com>"
# Primary key fingerprint: 46F5 9FBD 57D6 12E7 BFD4 E2F7 7E15 100C CD36 69B1
# Subkey fingerprint: F133 3857 4B66 2389 866C 7682 BFFB D25F 78C7 AE83
* remotes/bonzini/tags/for-upstream: (21 commits)
iscsi: fix missing unlock
memory: show region offset and ROM/RAM type in "info mtree -f"
x86: Work around SMI migration breakages
spice-char: fix segfault in char_spice_finalize
vl: disable default cdrom when using explicitely scsi-hd
memory: Introduce DEVICE_HOST_ENDIAN for ram device
qmp-events: fix GUEST_PANICKED description formatting
qapi: flatten GuestPanicInformation union
vmxcap: update for September 2016 SDM
vmxcap: port to Python 3
KVM: use KVM_CAP_IMMEDIATE_EXIT
kvm: use atomic_read/atomic_set to access cpu->exit_request
KVM: move SIG_IPI handling to kvm-all.c
KVM: do not use sigtimedwait to catch SIGBUS
KVM: remove kvm_arch_on_sigbus
cpus: reorganize signal handling code
KVM: x86: cleanup SIGBUS handlers
cpus: remove ugly cast on sigbus_handler
cpu-exec: remove unnecessary check of cpu->exit_request
replay: check icount in cpu exec loop
...
Signed-off-by: Peter Maydell <peter.maydell@linaro.org>
-rw-r--r-- | block/iscsi.c | 4 | ||||
-rw-r--r-- | cpu-exec.c | 93 | ||||
-rw-r--r-- | cpus.c | 102 | ||||
-rw-r--r-- | include/exec/cpu-common.h | 6 | ||||
-rw-r--r-- | include/exec/gen-icount.h | 53 | ||||
-rw-r--r-- | include/hw/i386/pc.h | 4 | ||||
-rw-r--r-- | include/qemu/compatfd.h | 42 | ||||
-rw-r--r-- | include/qemu/osdep.h | 37 | ||||
-rw-r--r-- | include/qom/cpu.h | 15 | ||||
-rw-r--r-- | include/sysemu/kvm.h | 11 | ||||
-rw-r--r-- | kvm-all.c | 150 | ||||
-rw-r--r-- | kvm-stub.c | 12 | ||||
-rw-r--r-- | memory.c | 27 | ||||
-rw-r--r-- | qapi-schema.json | 12 | ||||
-rw-r--r-- | qapi/event.json | 4 | ||||
-rw-r--r-- | qom/cpu.c | 2 | ||||
-rwxr-xr-x | scripts/kvm/vmxcap | 23 | ||||
-rw-r--r-- | spice-qemu-char.c | 5 | ||||
-rw-r--r-- | target/arm/kvm.c | 10 | ||||
-rw-r--r-- | target/i386/cpu.c | 17 | ||||
-rw-r--r-- | target/i386/cpu.h | 3 | ||||
-rw-r--r-- | target/i386/kvm.c | 88 | ||||
-rw-r--r-- | target/mips/kvm.c | 12 | ||||
-rw-r--r-- | target/ppc/kvm.c | 10 | ||||
-rw-r--r-- | target/s390x/kvm.c | 10 | ||||
-rw-r--r-- | tcg/tcg.h | 1 | ||||
-rw-r--r-- | translate-all.c | 2 | ||||
-rw-r--r-- | translate-common.c | 13 | ||||
-rw-r--r-- | util/compatfd.c | 1 | ||||
-rw-r--r-- | util/main-loop.c | 5 | ||||
-rw-r--r-- | util/oslib-posix.c | 33 | ||||
-rw-r--r-- | vl.c | 13 |
32 files changed, 428 insertions, 392 deletions
diff --git a/block/iscsi.c b/block/iscsi.c index 76319a1..75d8905 100644 --- a/block/iscsi.c +++ b/block/iscsi.c @@ -637,6 +637,7 @@ retry: } #endif if (iTask.task == NULL) { + qemu_mutex_unlock(&iscsilun->mutex); return -ENOMEM; } #if LIBISCSI_API_VERSION < (20160603) @@ -864,6 +865,7 @@ retry: } #endif if (iTask.task == NULL) { + qemu_mutex_unlock(&iscsilun->mutex); return -ENOMEM; } #if LIBISCSI_API_VERSION < (20160603) @@ -904,6 +906,7 @@ static int coroutine_fn iscsi_co_flush(BlockDriverState *bs) retry: if (iscsi_synchronizecache10_task(iscsilun->iscsi, iscsilun->lun, 0, 0, 0, 0, iscsi_co_generic_cb, &iTask) == NULL) { + qemu_mutex_unlock(&iscsilun->mutex); return -ENOMEM; } @@ -1237,6 +1240,7 @@ retry: 0, 0, iscsi_co_generic_cb, &iTask); } if (iTask.task == NULL) { + qemu_mutex_unlock(&iscsilun->mutex); return -ENOMEM; } @@ -186,12 +186,6 @@ static inline tcg_target_ulong cpu_tb_exec(CPUState *cpu, TranslationBlock *itb) cc->set_pc(cpu, last_tb->pc); } } - if (tb_exit == TB_EXIT_REQUESTED) { - /* We were asked to stop executing TBs (probably a pending - * interrupt. We've now stopped, so clear the flag. - */ - atomic_set(&cpu->tcg_exit_req, 0); - } return ret; } @@ -560,8 +554,9 @@ static inline bool cpu_handle_interrupt(CPUState *cpu, qemu_mutex_unlock_iothread(); } - - if (unlikely(atomic_read(&cpu->exit_request) || replay_has_interrupt())) { + /* Finally, check if we need to exit to the main loop. */ + if (unlikely(atomic_read(&cpu->exit_request) + || (use_icount && cpu->icount_decr.u16.low + cpu->icount_extra == 0))) { atomic_set(&cpu->exit_request, 0); cpu->exception_index = EXCP_INTERRUPT; return true; @@ -571,62 +566,54 @@ static inline bool cpu_handle_interrupt(CPUState *cpu, } static inline void cpu_loop_exec_tb(CPUState *cpu, TranslationBlock *tb, - TranslationBlock **last_tb, int *tb_exit, - SyncClocks *sc) + TranslationBlock **last_tb, int *tb_exit) { uintptr_t ret; - - if (unlikely(atomic_read(&cpu->exit_request))) { - return; - } + int32_t insns_left; trace_exec_tb(tb, tb->pc); ret = cpu_tb_exec(cpu, tb); tb = (TranslationBlock *)(ret & ~TB_EXIT_MASK); *tb_exit = ret & TB_EXIT_MASK; - switch (*tb_exit) { - case TB_EXIT_REQUESTED: + if (*tb_exit != TB_EXIT_REQUESTED) { + *last_tb = tb; + return; + } + + *last_tb = NULL; + insns_left = atomic_read(&cpu->icount_decr.u32); + atomic_set(&cpu->icount_decr.u16.high, 0); + if (insns_left < 0) { /* Something asked us to stop executing chained TBs; just * continue round the main loop. Whatever requested the exit - * will also have set something else (eg interrupt_request) - * which we will handle next time around the loop. But we - * need to ensure the tcg_exit_req read in generated code - * comes before the next read of cpu->exit_request or - * cpu->interrupt_request. + * will also have set something else (eg exit_request or + * interrupt_request) which we will handle next time around + * the loop. But we need to ensure the zeroing of icount_decr + * comes before the next read of cpu->exit_request + * or cpu->interrupt_request. */ smp_mb(); - *last_tb = NULL; - break; - case TB_EXIT_ICOUNT_EXPIRED: - { - /* Instruction counter expired. */ -#ifdef CONFIG_USER_ONLY - abort(); -#else - int insns_left = cpu->icount_decr.u32; - *last_tb = NULL; - if (cpu->icount_extra && insns_left >= 0) { - /* Refill decrementer and continue execution. */ - cpu->icount_extra += insns_left; - insns_left = MIN(0xffff, cpu->icount_extra); - cpu->icount_extra -= insns_left; - cpu->icount_decr.u16.low = insns_left; - } else { - if (insns_left > 0) { - /* Execute remaining instructions. */ - cpu_exec_nocache(cpu, insns_left, tb, false); - align_clocks(sc, cpu); - } - cpu->exception_index = EXCP_INTERRUPT; - cpu_loop_exit(cpu); - } - break; -#endif + return; } - default: - *last_tb = tb; - break; + + /* Instruction counter expired. */ + assert(use_icount); +#ifndef CONFIG_USER_ONLY + if (cpu->icount_extra) { + /* Refill decrementer and continue execution. */ + cpu->icount_extra += insns_left; + insns_left = MIN(0xffff, cpu->icount_extra); + cpu->icount_extra -= insns_left; + cpu->icount_decr.u16.low = insns_left; + } else { + /* Execute any remaining instructions, then let the main loop + * handle the next event. + */ + if (insns_left > 0) { + cpu_exec_nocache(cpu, insns_left, tb, false); + } } +#endif } /* main execution loop */ @@ -635,7 +622,7 @@ int cpu_exec(CPUState *cpu) { CPUClass *cc = CPU_GET_CLASS(cpu); int ret; - SyncClocks sc; + SyncClocks sc = { 0 }; /* replay_interrupt may need current_cpu */ current_cpu = cpu; @@ -683,7 +670,7 @@ int cpu_exec(CPUState *cpu) while (!cpu_handle_interrupt(cpu, &last_tb)) { TranslationBlock *tb = tb_find(cpu, last_tb, tb_exit); - cpu_loop_exec_tb(cpu, tb, &last_tb, &tb_exit, &sc); + cpu_loop_exec_tb(cpu, tb, &last_tb, &tb_exit); /* Try to align the host and virtual clocks if the guest is in advance */ align_clocks(&sc, cpu); @@ -51,10 +51,6 @@ #include "hw/nmi.h" #include "sysemu/replay.h" -#ifndef _WIN32 -#include "qemu/compatfd.h" -#endif - #ifdef CONFIG_LINUX #include <sys/prctl.h> @@ -924,13 +920,23 @@ static void sigbus_reraise(void) abort(); } -static void sigbus_handler(int n, struct qemu_signalfd_siginfo *siginfo, - void *ctx) +static void sigbus_handler(int n, siginfo_t *siginfo, void *ctx) { - if (kvm_on_sigbus(siginfo->ssi_code, - (void *)(intptr_t)siginfo->ssi_addr)) { + if (siginfo->si_code != BUS_MCEERR_AO && siginfo->si_code != BUS_MCEERR_AR) { sigbus_reraise(); } + + if (current_cpu) { + /* Called asynchronously in VCPU thread. */ + if (kvm_on_sigbus_vcpu(current_cpu, siginfo->si_code, siginfo->si_addr)) { + sigbus_reraise(); + } + } else { + /* Called synchronously (via signalfd) in main thread. */ + if (kvm_on_sigbus(siginfo->si_code, siginfo->si_addr)) { + sigbus_reraise(); + } + } } static void qemu_init_sigbus(void) @@ -939,92 +945,17 @@ static void qemu_init_sigbus(void) memset(&action, 0, sizeof(action)); action.sa_flags = SA_SIGINFO; - action.sa_sigaction = (void (*)(int, siginfo_t*, void*))sigbus_handler; + action.sa_sigaction = sigbus_handler; sigaction(SIGBUS, &action, NULL); prctl(PR_MCE_KILL, PR_MCE_KILL_SET, PR_MCE_KILL_EARLY, 0, 0); } - -static void qemu_kvm_eat_signals(CPUState *cpu) -{ - struct timespec ts = { 0, 0 }; - siginfo_t siginfo; - sigset_t waitset; - sigset_t chkset; - int r; - - sigemptyset(&waitset); - sigaddset(&waitset, SIG_IPI); - sigaddset(&waitset, SIGBUS); - - do { - r = sigtimedwait(&waitset, &siginfo, &ts); - if (r == -1 && !(errno == EAGAIN || errno == EINTR)) { - perror("sigtimedwait"); - exit(1); - } - - switch (r) { - case SIGBUS: - if (kvm_on_sigbus_vcpu(cpu, siginfo.si_code, siginfo.si_addr)) { - sigbus_reraise(); - } - break; - default: - break; - } - - r = sigpending(&chkset); - if (r == -1) { - perror("sigpending"); - exit(1); - } - } while (sigismember(&chkset, SIG_IPI) || sigismember(&chkset, SIGBUS)); -} - #else /* !CONFIG_LINUX */ - static void qemu_init_sigbus(void) { } - -static void qemu_kvm_eat_signals(CPUState *cpu) -{ -} #endif /* !CONFIG_LINUX */ -#ifndef _WIN32 -static void dummy_signal(int sig) -{ -} - -static void qemu_kvm_init_cpu_signals(CPUState *cpu) -{ - int r; - sigset_t set; - struct sigaction sigact; - - memset(&sigact, 0, sizeof(sigact)); - sigact.sa_handler = dummy_signal; - sigaction(SIG_IPI, &sigact, NULL); - - pthread_sigmask(SIG_BLOCK, NULL, &set); - sigdelset(&set, SIG_IPI); - sigdelset(&set, SIGBUS); - r = kvm_set_signal_mask(cpu, &set); - if (r) { - fprintf(stderr, "kvm_set_signal_mask: %s\n", strerror(-r)); - exit(1); - } -} - -#else /* _WIN32 */ -static void qemu_kvm_init_cpu_signals(CPUState *cpu) -{ - abort(); -} -#endif /* _WIN32 */ - static QemuMutex qemu_global_mutex; static QemuThread io_thread; @@ -1099,7 +1030,6 @@ static void qemu_kvm_wait_io_event(CPUState *cpu) qemu_cond_wait(cpu->halt_cond, &qemu_global_mutex); } - qemu_kvm_eat_signals(cpu); qemu_wait_io_event_common(cpu); } @@ -1122,7 +1052,7 @@ static void *qemu_kvm_cpu_thread_fn(void *arg) exit(1); } - qemu_kvm_init_cpu_signals(cpu); + kvm_init_cpu_signals(cpu); /* signal CPU creation */ cpu->created = true; diff --git a/include/exec/cpu-common.h b/include/exec/cpu-common.h index 8c305aa..b62f0d8 100644 --- a/include/exec/cpu-common.h +++ b/include/exec/cpu-common.h @@ -36,6 +36,12 @@ enum device_endian { DEVICE_LITTLE_ENDIAN, }; +#if defined(HOST_WORDS_BIGENDIAN) +#define DEVICE_HOST_ENDIAN DEVICE_BIG_ENDIAN +#else +#define DEVICE_HOST_ENDIAN DEVICE_LITTLE_ENDIAN +#endif + /* address in the RAM (different from a physical address) */ #if defined(CONFIG_XEN_BACKEND) typedef uint64_t ram_addr_t; diff --git a/include/exec/gen-icount.h b/include/exec/gen-icount.h index 050de59..62d462e 100644 --- a/include/exec/gen-icount.h +++ b/include/exec/gen-icount.h @@ -6,58 +6,55 @@ /* Helpers for instruction counting code generation. */ static int icount_start_insn_idx; -static TCGLabel *icount_label; static TCGLabel *exitreq_label; static inline void gen_tb_start(TranslationBlock *tb) { - TCGv_i32 count, flag, imm; + TCGv_i32 count, imm; exitreq_label = gen_new_label(); - flag = tcg_temp_new_i32(); - tcg_gen_ld_i32(flag, cpu_env, - offsetof(CPUState, tcg_exit_req) - ENV_OFFSET); - tcg_gen_brcondi_i32(TCG_COND_NE, flag, 0, exitreq_label); - tcg_temp_free_i32(flag); - - if (!(tb->cflags & CF_USE_ICOUNT)) { - return; + if (tb->cflags & CF_USE_ICOUNT) { + count = tcg_temp_local_new_i32(); + } else { + count = tcg_temp_new_i32(); } - icount_label = gen_new_label(); - count = tcg_temp_local_new_i32(); tcg_gen_ld_i32(count, cpu_env, -ENV_OFFSET + offsetof(CPUState, icount_decr.u32)); - imm = tcg_temp_new_i32(); - /* We emit a movi with a dummy immediate argument. Keep the insn index - * of the movi so that we later (when we know the actual insn count) - * can update the immediate argument with the actual insn count. */ - icount_start_insn_idx = tcg_op_buf_count(); - tcg_gen_movi_i32(imm, 0xdeadbeef); + if (tb->cflags & CF_USE_ICOUNT) { + imm = tcg_temp_new_i32(); + /* We emit a movi with a dummy immediate argument. Keep the insn index + * of the movi so that we later (when we know the actual insn count) + * can update the immediate argument with the actual insn count. */ + icount_start_insn_idx = tcg_op_buf_count(); + tcg_gen_movi_i32(imm, 0xdeadbeef); + + tcg_gen_sub_i32(count, count, imm); + tcg_temp_free_i32(imm); + } + + tcg_gen_brcondi_i32(TCG_COND_LT, count, 0, exitreq_label); - tcg_gen_sub_i32(count, count, imm); - tcg_temp_free_i32(imm); + if (tb->cflags & CF_USE_ICOUNT) { + tcg_gen_st16_i32(count, cpu_env, + -ENV_OFFSET + offsetof(CPUState, icount_decr.u16.low)); + } - tcg_gen_brcondi_i32(TCG_COND_LT, count, 0, icount_label); - tcg_gen_st16_i32(count, cpu_env, - -ENV_OFFSET + offsetof(CPUState, icount_decr.u16.low)); tcg_temp_free_i32(count); } static void gen_tb_end(TranslationBlock *tb, int num_insns) { - gen_set_label(exitreq_label); - tcg_gen_exit_tb((uintptr_t)tb + TB_EXIT_REQUESTED); - if (tb->cflags & CF_USE_ICOUNT) { /* Update the num_insn immediate parameter now that we know * the actual insn count. */ tcg_set_insn_param(icount_start_insn_idx, 1, num_insns); - gen_set_label(icount_label); - tcg_gen_exit_tb((uintptr_t)tb + TB_EXIT_ICOUNT_EXPIRED); } + gen_set_label(exitreq_label); + tcg_gen_exit_tb((uintptr_t)tb + TB_EXIT_REQUESTED); + /* Terminate the linked list. */ tcg_ctx.gen_op_buf[tcg_ctx.gen_op_buf[0].prev].next = 0; } diff --git a/include/hw/i386/pc.h b/include/hw/i386/pc.h index d1f4554..ab303c7 100644 --- a/include/hw/i386/pc.h +++ b/include/hw/i386/pc.h @@ -623,6 +623,10 @@ bool e820_get_entry(int, uint32_t, uint64_t *, uint64_t *); .driver = "Broadwell-noTSX" "-" TYPE_X86_CPU,\ .property = "xlevel",\ .value = stringify(0x8000000a),\ + },{\ + .driver = TYPE_X86_CPU,\ + .property = "kvm-no-smi-migration",\ + .value = "on",\ }, #define PC_COMPAT_2_2 \ diff --git a/include/qemu/compatfd.h b/include/qemu/compatfd.h deleted file mode 100644 index aa12ee9..0000000 --- a/include/qemu/compatfd.h +++ /dev/null @@ -1,42 +0,0 @@ -/* - * signalfd/eventfd compatibility - * - * Copyright IBM, Corp. 2008 - * - * Authors: - * Anthony Liguori <aliguori@us.ibm.com> - * - * This work is licensed under the terms of the GNU GPL, version 2. See - * the COPYING file in the top-level directory. - * - */ - -#ifndef QEMU_COMPATFD_H -#define QEMU_COMPATFD_H - - -struct qemu_signalfd_siginfo { - uint32_t ssi_signo; /* Signal number */ - int32_t ssi_errno; /* Error number (unused) */ - int32_t ssi_code; /* Signal code */ - uint32_t ssi_pid; /* PID of sender */ - uint32_t ssi_uid; /* Real UID of sender */ - int32_t ssi_fd; /* File descriptor (SIGIO) */ - uint32_t ssi_tid; /* Kernel timer ID (POSIX timers) */ - uint32_t ssi_band; /* Band event (SIGIO) */ - uint32_t ssi_overrun; /* POSIX timer overrun count */ - uint32_t ssi_trapno; /* Trap number that caused signal */ - int32_t ssi_status; /* Exit status or signal (SIGCHLD) */ - int32_t ssi_int; /* Integer sent by sigqueue(2) */ - uint64_t ssi_ptr; /* Pointer sent by sigqueue(2) */ - uint64_t ssi_utime; /* User CPU time consumed (SIGCHLD) */ - uint64_t ssi_stime; /* System CPU time consumed (SIGCHLD) */ - uint64_t ssi_addr; /* Address that generated signal - (for hardware-generated signals) */ - uint8_t pad[48]; /* Pad size to 128 bytes (allow for - additional fields in the future) */ -}; - -int qemu_signalfd(const sigset_t *mask); - -#endif diff --git a/include/qemu/osdep.h b/include/qemu/osdep.h index 56c9e22..af37195 100644 --- a/include/qemu/osdep.h +++ b/include/qemu/osdep.h @@ -284,6 +284,15 @@ void qemu_anon_ram_free(void *ptr, size_t size); #endif +#if defined(CONFIG_LINUX) +#ifndef BUS_MCEERR_AR +#define BUS_MCEERR_AR 4 +#endif +#ifndef BUS_MCEERR_AO +#define BUS_MCEERR_AO 5 +#endif +#endif + #if defined(__linux__) && \ (defined(__x86_64__) || defined(__arm__) || defined(__aarch64__)) /* Use 2 MiB alignment so transparent hugepages can be used by KVM. @@ -297,6 +306,34 @@ void qemu_anon_ram_free(void *ptr, size_t size); # define QEMU_VMALLOC_ALIGN getpagesize() #endif +#ifdef CONFIG_POSIX +struct qemu_signalfd_siginfo { + uint32_t ssi_signo; /* Signal number */ + int32_t ssi_errno; /* Error number (unused) */ + int32_t ssi_code; /* Signal code */ + uint32_t ssi_pid; /* PID of sender */ + uint32_t ssi_uid; /* Real UID of sender */ + int32_t ssi_fd; /* File descriptor (SIGIO) */ + uint32_t ssi_tid; /* Kernel timer ID (POSIX timers) */ + uint32_t ssi_band; /* Band event (SIGIO) */ + uint32_t ssi_overrun; /* POSIX timer overrun count */ + uint32_t ssi_trapno; /* Trap number that caused signal */ + int32_t ssi_status; /* Exit status or signal (SIGCHLD) */ + int32_t ssi_int; /* Integer sent by sigqueue(2) */ + uint64_t ssi_ptr; /* Pointer sent by sigqueue(2) */ + uint64_t ssi_utime; /* User CPU time consumed (SIGCHLD) */ + uint64_t ssi_stime; /* System CPU time consumed (SIGCHLD) */ + uint64_t ssi_addr; /* Address that generated signal + (for hardware-generated signals) */ + uint8_t pad[48]; /* Pad size to 128 bytes (allow for + additional fields in the future) */ +}; + +int qemu_signalfd(const sigset_t *mask); +void sigaction_invoke(struct sigaction *action, + struct qemu_signalfd_siginfo *info); +#endif + int qemu_madvise(void *addr, size_t len, int advice); int qemu_open(const char *name, int flags, ...); diff --git a/include/qom/cpu.h b/include/qom/cpu.h index 3e61c88..c3292ef 100644 --- a/include/qom/cpu.h +++ b/include/qom/cpu.h @@ -275,11 +275,11 @@ struct qemu_work_item; * @stopped: Indicates the CPU has been artificially stopped. * @unplug: Indicates a pending CPU unplug request. * @crash_occurred: Indicates the OS reported a crash (panic) for this CPU - * @tcg_exit_req: Set to force TCG to stop executing linked TBs for this - * CPU and return to its top level loop. * @singlestep_enabled: Flags for single-stepping. * @icount_extra: Instructions until next timer event. - * @icount_decr: Number of cycles left, with interrupt flag in high bit. + * @icount_decr: Low 16 bits: number of cycles left, only used in icount mode. + * High 16 bits: Set to -1 to force TCG to stop executing linked TBs for this + * CPU and return to its top level loop (even in non-icount mode). * This allows a single read-compare-cbranch-write sequence to test * for both decrementer underflow and exceptions. * @can_do_io: Nonzero if memory-mapped IO is safe. Deterministic execution @@ -382,10 +382,6 @@ struct CPUState { /* TODO Move common fields from CPUArchState here. */ int cpu_index; /* used by alpha TCG */ uint32_t halted; /* used by alpha, cris, ppc TCG */ - union { - uint32_t u32; - icount_decr_u16 u16; - } icount_decr; uint32_t can_do_io; int32_t exception_index; /* used by m68k TCG */ @@ -398,7 +394,10 @@ struct CPUState { offset from AREG0. Leave this field at the end so as to make the (absolute value) offset as small as possible. This reduces code size, especially for hosts without large memory offsets. */ - uint32_t tcg_exit_req; + union { + uint32_t u32; + icount_decr_u16 u16; + } icount_decr; bool hax_vcpu_dirty; struct hax_vcpu_state *hax_vcpu; diff --git a/include/sysemu/kvm.h b/include/sysemu/kvm.h index 3045ee7..24281fc 100644 --- a/include/sysemu/kvm.h +++ b/include/sysemu/kvm.h @@ -238,9 +238,6 @@ int kvm_remove_breakpoint(CPUState *cpu, target_ulong addr, target_ulong len, int type); void kvm_remove_all_breakpoints(CPUState *cpu); int kvm_update_guest_debug(CPUState *cpu, unsigned long reinject_trap); -#ifndef _WIN32 -int kvm_set_signal_mask(CPUState *cpu, const sigset_t *sigset); -#endif int kvm_on_sigbus_vcpu(CPUState *cpu, int code, void *addr); int kvm_on_sigbus(int code, void *addr); @@ -357,8 +354,10 @@ bool kvm_vcpu_id_is_valid(int vcpu_id); /* Returns VCPU ID to be used on KVM_CREATE_VCPU ioctl() */ unsigned long kvm_arch_vcpu_id(CPUState *cpu); -int kvm_arch_on_sigbus_vcpu(CPUState *cpu, int code, void *addr); -int kvm_arch_on_sigbus(int code, void *addr); +#ifdef TARGET_I386 +#define KVM_HAVE_MCE_INJECTION 1 +void kvm_arch_on_sigbus_vcpu(CPUState *cpu, int code, void *addr); +#endif void kvm_arch_init_irq_routing(KVMState *s); @@ -461,6 +460,8 @@ void kvm_cpu_synchronize_state(CPUState *cpu); void kvm_cpu_synchronize_post_reset(CPUState *cpu); void kvm_cpu_synchronize_post_init(CPUState *cpu); +void kvm_init_cpu_signals(CPUState *cpu); + /** * kvm_irqchip_add_msi_route - Add MSI route for specific vector * @s: KVM state @@ -120,6 +120,7 @@ bool kvm_vm_attributes_allowed; bool kvm_direct_msi_allowed; bool kvm_ioeventfd_any_length_allowed; bool kvm_msi_use_devid; +static bool kvm_immediate_exit; static const KVMCapabilityInfo kvm_required_capabilites[] = { KVM_CAP_INFO(USER_MEMORY), @@ -1619,6 +1620,7 @@ static int kvm_init(MachineState *ms) goto err; } + kvm_immediate_exit = kvm_check_extension(s, KVM_CAP_IMMEDIATE_EXIT); s->nr_slots = kvm_check_extension(s, KVM_CAP_NR_MEMSLOTS); /* If unspecified, use the default value */ @@ -1893,6 +1895,61 @@ void kvm_cpu_synchronize_post_init(CPUState *cpu) run_on_cpu(cpu, do_kvm_cpu_synchronize_post_init, RUN_ON_CPU_NULL); } +#ifdef KVM_HAVE_MCE_INJECTION +static __thread void *pending_sigbus_addr; +static __thread int pending_sigbus_code; +static __thread bool have_sigbus_pending; +#endif + +static void kvm_cpu_kick(CPUState *cpu) +{ + atomic_set(&cpu->kvm_run->immediate_exit, 1); +} + +static void kvm_cpu_kick_self(void) +{ + if (kvm_immediate_exit) { + kvm_cpu_kick(current_cpu); + } else { + qemu_cpu_kick_self(); + } +} + +static void kvm_eat_signals(CPUState *cpu) +{ + struct timespec ts = { 0, 0 }; + siginfo_t siginfo; + sigset_t waitset; + sigset_t chkset; + int r; + + if (kvm_immediate_exit) { + atomic_set(&cpu->kvm_run->immediate_exit, 0); + /* Write kvm_run->immediate_exit before the cpu->exit_request + * write in kvm_cpu_exec. + */ + smp_wmb(); + return; + } + + sigemptyset(&waitset); + sigaddset(&waitset, SIG_IPI); + + do { + r = sigtimedwait(&waitset, &siginfo, &ts); + if (r == -1 && !(errno == EAGAIN || errno == EINTR)) { + perror("sigtimedwait"); + exit(1); + } + + r = sigpending(&chkset); + if (r == -1) { + perror("sigpending"); + exit(1); + } + } while (sigismember(&chkset, SIG_IPI)); +} + int kvm_cpu_exec(CPUState *cpu) { struct kvm_run *run = cpu->kvm_run; @@ -1901,7 +1958,7 @@ int kvm_cpu_exec(CPUState *cpu) DPRINTF("kvm_cpu_exec()\n"); if (kvm_arch_process_async_events(cpu)) { - cpu->exit_request = 0; + atomic_set(&cpu->exit_request, 0); return EXCP_HLT; } @@ -1916,23 +1973,39 @@ int kvm_cpu_exec(CPUState *cpu) } kvm_arch_pre_run(cpu, run); - if (cpu->exit_request) { + if (atomic_read(&cpu->exit_request)) { DPRINTF("interrupt exit requested\n"); /* * KVM requires us to reenter the kernel after IO exits to complete * instruction emulation. This self-signal will ensure that we * leave ASAP again. */ - qemu_cpu_kick_self(); + kvm_cpu_kick_self(); } + /* Read cpu->exit_request before KVM_RUN reads run->immediate_exit. + * Matching barrier in kvm_eat_signals. + */ + smp_rmb(); + run_ret = kvm_vcpu_ioctl(cpu, KVM_RUN, 0); attrs = kvm_arch_post_run(cpu, run); +#ifdef KVM_HAVE_MCE_INJECTION + if (unlikely(have_sigbus_pending)) { + qemu_mutex_lock_iothread(); + kvm_arch_on_sigbus_vcpu(cpu, pending_sigbus_code, + pending_sigbus_addr); + have_sigbus_pending = false; + qemu_mutex_unlock_iothread(); + } +#endif + if (run_ret < 0) { if (run_ret == -EINTR || run_ret == -EAGAIN) { DPRINTF("io window exit\n"); + kvm_eat_signals(cpu); ret = EXCP_INTERRUPT; break; } @@ -2026,7 +2099,7 @@ int kvm_cpu_exec(CPUState *cpu) vm_stop(RUN_STATE_INTERNAL_ERROR); } - cpu->exit_request = 0; + atomic_set(&cpu->exit_request, 0); return ret; } @@ -2372,16 +2445,12 @@ void kvm_remove_all_breakpoints(CPUState *cpu) } #endif /* !KVM_CAP_SET_GUEST_DEBUG */ -int kvm_set_signal_mask(CPUState *cpu, const sigset_t *sigset) +static int kvm_set_signal_mask(CPUState *cpu, const sigset_t *sigset) { KVMState *s = kvm_state; struct kvm_signal_mask *sigmask; int r; - if (!sigset) { - return kvm_vcpu_ioctl(cpu, KVM_SET_SIGNAL_MASK, NULL); - } - sigmask = g_malloc(sizeof(*sigmask) + sizeof(*sigset)); sigmask->len = s->sigmask_len; @@ -2391,14 +2460,73 @@ int kvm_set_signal_mask(CPUState *cpu, const sigset_t *sigset) return r; } + +static void kvm_ipi_signal(int sig) +{ + if (current_cpu) { + assert(kvm_immediate_exit); + kvm_cpu_kick(current_cpu); + } +} + +void kvm_init_cpu_signals(CPUState *cpu) +{ + int r; + sigset_t set; + struct sigaction sigact; + + memset(&sigact, 0, sizeof(sigact)); + sigact.sa_handler = kvm_ipi_signal; + sigaction(SIG_IPI, &sigact, NULL); + + pthread_sigmask(SIG_BLOCK, NULL, &set); +#if defined KVM_HAVE_MCE_INJECTION + sigdelset(&set, SIGBUS); + pthread_sigmask(SIG_SETMASK, &set, NULL); +#endif + sigdelset(&set, SIG_IPI); + if (kvm_immediate_exit) { + r = pthread_sigmask(SIG_SETMASK, &set, NULL); + } else { + r = kvm_set_signal_mask(cpu, &set); + } + if (r) { + fprintf(stderr, "kvm_set_signal_mask: %s\n", strerror(-r)); + exit(1); + } +} + +/* Called asynchronously in VCPU thread. */ int kvm_on_sigbus_vcpu(CPUState *cpu, int code, void *addr) { - return kvm_arch_on_sigbus_vcpu(cpu, code, addr); +#ifdef KVM_HAVE_MCE_INJECTION + if (have_sigbus_pending) { + return 1; + } + have_sigbus_pending = true; + pending_sigbus_addr = addr; + pending_sigbus_code = code; + atomic_set(&cpu->exit_request, 1); + return 0; +#else + return 1; +#endif } +/* Called synchronously (via signalfd) in main thread. */ int kvm_on_sigbus(int code, void *addr) { - return kvm_arch_on_sigbus(code, addr); +#ifdef KVM_HAVE_MCE_INJECTION + /* Action required MCE kills the process if SIGBUS is blocked. Because + * that's what happens in the I/O thread, where we handle MCE via signalfd, + * we can only get action optional here. + */ + assert(code != BUS_MCEERR_AR); + kvm_arch_on_sigbus_vcpu(first_cpu, code, addr); + return 0; +#else + return 1; +#endif } int kvm_create_device(KVMState *s, uint64_t type, bool test) @@ -95,13 +95,6 @@ void kvm_remove_all_breakpoints(CPUState *cpu) { } -#ifndef _WIN32 -int kvm_set_signal_mask(CPUState *cpu, const sigset_t *sigset) -{ - abort(); -} -#endif - int kvm_on_sigbus_vcpu(CPUState *cpu, int code, void *addr) { return 1; @@ -157,4 +150,9 @@ bool kvm_has_free_slot(MachineState *ms) { return false; } + +void kvm_init_cpu_signals(CPUState *cpu) +{ + abort(); +} #endif @@ -1182,7 +1182,7 @@ static void memory_region_ram_device_write(void *opaque, hwaddr addr, static const MemoryRegionOps ram_device_mem_ops = { .read = memory_region_ram_device_read, .write = memory_region_ram_device_write, - .endianness = DEVICE_NATIVE_ENDIAN, + .endianness = DEVICE_HOST_ENDIAN, .valid = { .min_access_size = 1, .max_access_size = 8, @@ -2588,13 +2588,24 @@ static void mtree_print_flatview(fprintf_function p, void *f, while (n--) { mr = range->mr; - p(f, MTREE_INDENT TARGET_FMT_plx "-" - TARGET_FMT_plx " (prio %d, %s): %s\n", - int128_get64(range->addr.start), - int128_get64(range->addr.start) + MR_SIZE(range->addr.size), - mr->priority, - memory_region_type(mr), - memory_region_name(mr)); + if (range->offset_in_region) { + p(f, MTREE_INDENT TARGET_FMT_plx "-" + TARGET_FMT_plx " (prio %d, %s): %s @" TARGET_FMT_plx "\n", + int128_get64(range->addr.start), + int128_get64(range->addr.start) + MR_SIZE(range->addr.size), + mr->priority, + range->readonly ? "rom" : memory_region_type(mr), + memory_region_name(mr), + range->offset_in_region); + } else { + p(f, MTREE_INDENT TARGET_FMT_plx "-" + TARGET_FMT_plx " (prio %d, %s): %s\n", + int128_get64(range->addr.start), + int128_get64(range->addr.start) + MR_SIZE(range->addr.size), + mr->priority, + range->readonly ? "rom" : memory_region_type(mr), + memory_region_name(mr)); + } range++; } diff --git a/qapi-schema.json b/qapi-schema.json index fb39d1d..6febfa7 100644 --- a/qapi-schema.json +++ b/qapi-schema.json @@ -5915,6 +5915,16 @@ 'data': [ 'pause', 'poweroff' ] } ## +# @GuestPanicInformationType: +# +# An enumeration of the guest panic information types +# +# Since: 2.9 +## +{ 'enum': 'GuestPanicInformationType', + 'data': [ 'hyper-v'] } + +## # @GuestPanicInformation: # # Information about a guest panic @@ -5922,6 +5932,8 @@ # Since: 2.9 ## {'union': 'GuestPanicInformation', + 'base': {'type': 'GuestPanicInformationType'}, + 'discriminator': 'type', 'data': { 'hyper-v': 'GuestPanicInformationHyperV' } } ## diff --git a/qapi/event.json b/qapi/event.json index 970ff02..e02852c 100644 --- a/qapi/event.json +++ b/qapi/event.json @@ -488,9 +488,9 @@ # # @action: action that has been taken, currently always "pause" # -# @info: optional information about a panic +# @info: #optional information about a panic (since 2.9) # -# Since: 1.5 (@info since 2.9) +# Since: 1.5 # # Example: # @@ -133,7 +133,7 @@ void cpu_exit(CPUState *cpu) atomic_set(&cpu->exit_request, 1); /* Ensure cpu_exec will see the exit request after TCG has exited. */ smp_wmb(); - atomic_set(&cpu->tcg_exit_req, 1); + atomic_set(&cpu->icount_decr.u16.high, -1); } int cpu_write_elf32_qemunote(WriteCoreDumpFunction f, CPUState *cpu, diff --git a/scripts/kvm/vmxcap b/scripts/kvm/vmxcap index 2220255..d9a6db0 100755 --- a/scripts/kvm/vmxcap +++ b/scripts/kvm/vmxcap @@ -27,9 +27,9 @@ MSR_IA32_VMX_VMFUNC = 0x491 class msr(object): def __init__(self): try: - self.f = open('/dev/cpu/0/msr', 'r', 0) + self.f = open('/dev/cpu/0/msr', 'rb', 0) except: - self.f = open('/dev/msr0', 'r', 0) + self.f = open('/dev/msr0', 'rb', 0) def read(self, index, default = None): import struct self.f.seek(index) @@ -49,7 +49,7 @@ class Control(object): val = m.read(nr, 0) return (val & 0xffffffff, val >> 32) def show(self): - print self.name + print(self.name) mbz, mb1 = self.read2(self.cap_msr) tmbz, tmb1 = 0, 0 if self.true_cap_msr: @@ -69,7 +69,7 @@ class Control(object): s = 'forced' elif one and zero: s = 'yes' - print ' %-40s %s' % (self.bits[bit], s) + print(' %-40s %s' % (self.bits[bit], s)) class Misc(object): def __init__(self, name, bits, msr): @@ -77,9 +77,9 @@ class Misc(object): self.bits = bits self.msr = msr def show(self): - print self.name + print(self.name) value = msr().read(self.msr, 0) - print ' Hex: 0x%x' % (value) + print(' Hex: 0x%x' % (value)) def first_bit(key): if type(key) is tuple: return key[0] @@ -94,7 +94,7 @@ class Misc(object): def fmt(x): return { True: 'yes', False: 'no' }[x] v = (value >> lo) & ((1 << (hi - lo + 1)) - 1) - print ' %-40s %s' % (self.bits[bits], fmt(v)) + print(' %-40s %s' % (self.bits[bits], fmt(v))) controls = [ Misc( @@ -170,9 +170,13 @@ controls = [ 12: 'Enable INVPCID', 13: 'Enable VM functions', 14: 'VMCS shadowing', + 15: 'Enable ENCLS exiting', 16: 'RDSEED exiting', + 17: 'Enable PML', 18: 'EPT-violation #VE', + 19: 'Conceal non-root operation from PT', 20: 'Enable XSAVES/XRSTORS', + 22: 'Mode-based execute control (XS/XU)', 25: 'TSC scaling', }, cap_msr = MSR_IA32_VMX_PROCBASED_CTLS2, @@ -190,6 +194,8 @@ controls = [ 20: 'Save IA32_EFER', 21: 'Load IA32_EFER', 22: 'Save VMX-preemption timer value', + 23: 'Clear IA32_BNDCFGS', + 24: 'Conceal VM exits from PT', }, cap_msr = MSR_IA32_VMX_EXIT_CTLS, true_cap_msr = MSR_IA32_VMX_TRUE_EXIT_CTLS, @@ -205,6 +211,8 @@ controls = [ 13: 'Load IA32_PERF_GLOBAL_CTRL', 14: 'Load IA32_PAT', 15: 'Load IA32_EFER', + 16: 'Load IA32_BNDCFGS', + 17: 'Conceal VM entries from PT', }, cap_msr = MSR_IA32_VMX_ENTRY_CTLS, true_cap_msr = MSR_IA32_VMX_TRUE_ENTRY_CTLS, @@ -223,6 +231,7 @@ controls = [ (25,27): 'MSR-load/store count recommendation', 28: 'IA32_SMM_MONITOR_CTL[2] can be set to 1', 29: 'VMWRITE to VM-exit information fields', + 30: 'Inject event with insn length=0', (32,63): 'MSEG revision identifier', }, msr = MSR_IA32_VMX_MISC_CTLS, diff --git a/spice-qemu-char.c b/spice-qemu-char.c index 6f46f46..4d1c76e 100644 --- a/spice-qemu-char.c +++ b/spice-qemu-char.c @@ -215,7 +215,10 @@ static void char_spice_finalize(Object *obj) SpiceChardev *s = SPICE_CHARDEV(obj); vmc_unregister_interface(s); - QLIST_REMOVE(s, next); + + if (s->next.le_prev) { + QLIST_REMOVE(s, next); + } g_free((char *)s->sin.subtype); #if SPICE_SERVER_VERSION >= 0x000c02 diff --git a/target/arm/kvm.c b/target/arm/kvm.c index 395e986..4555468 100644 --- a/target/arm/kvm.c +++ b/target/arm/kvm.c @@ -560,16 +560,6 @@ int kvm_arch_process_async_events(CPUState *cs) return 0; } -int kvm_arch_on_sigbus_vcpu(CPUState *cs, int code, void *addr) -{ - return 1; -} - -int kvm_arch_on_sigbus(int code, void *addr) -{ - return 1; -} - /* The #ifdef protections are until 32bit headers are imported and can * be removed once both 32 and 64 bit reach feature parity. */ diff --git a/target/i386/cpu.c b/target/i386/cpu.c index 89421c8..fba9212 100644 --- a/target/i386/cpu.c +++ b/target/i386/cpu.c @@ -3778,19 +3778,16 @@ static GuestPanicInformation *x86_cpu_get_crash_info(CPUState *cs) GuestPanicInformation *panic_info = NULL; if (env->features[FEAT_HYPERV_EDX] & HV_X64_GUEST_CRASH_MSR_AVAILABLE) { - GuestPanicInformationHyperV *panic_info_hv = - g_malloc0(sizeof(GuestPanicInformationHyperV)); panic_info = g_malloc0(sizeof(GuestPanicInformation)); - panic_info->type = GUEST_PANIC_INFORMATION_KIND_HYPER_V; - panic_info->u.hyper_v.data = panic_info_hv; + panic_info->type = GUEST_PANIC_INFORMATION_TYPE_HYPER_V; assert(HV_X64_MSR_CRASH_PARAMS >= 5); - panic_info_hv->arg1 = env->msr_hv_crash_params[0]; - panic_info_hv->arg2 = env->msr_hv_crash_params[1]; - panic_info_hv->arg3 = env->msr_hv_crash_params[2]; - panic_info_hv->arg4 = env->msr_hv_crash_params[3]; - panic_info_hv->arg5 = env->msr_hv_crash_params[4]; + panic_info->u.hyper_v.arg1 = env->msr_hv_crash_params[0]; + panic_info->u.hyper_v.arg2 = env->msr_hv_crash_params[1]; + panic_info->u.hyper_v.arg3 = env->msr_hv_crash_params[2]; + panic_info->u.hyper_v.arg4 = env->msr_hv_crash_params[3]; + panic_info->u.hyper_v.arg5 = env->msr_hv_crash_params[4]; } return panic_info; @@ -3986,6 +3983,8 @@ static Property x86_cpu_properties[] = { DEFINE_PROP_BOOL("cpuid-0xb", X86CPU, enable_cpuid_0xb, true), DEFINE_PROP_BOOL("lmce", X86CPU, enable_lmce, false), DEFINE_PROP_BOOL("l3-cache", X86CPU, enable_l3_cache, true), + DEFINE_PROP_BOOL("kvm-no-smi-migration", X86CPU, kvm_no_smi_migration, + false), DEFINE_PROP_BOOL("vmware-cpuid-freq", X86CPU, vmware_cpuid_freq, true), DEFINE_PROP_END_OF_LIST() }; diff --git a/target/i386/cpu.h b/target/i386/cpu.h index 12a39d5..ac2ad6d 100644 --- a/target/i386/cpu.h +++ b/target/i386/cpu.h @@ -1255,6 +1255,9 @@ struct X86CPU { /* if true override the phys_bits value with a value read from the host */ bool host_phys_bits; + /* Stop SMI delivery for migration compatibility with old machines */ + bool kvm_no_smi_migration; + /* Number of physical address bits supported */ uint32_t phys_bits; diff --git a/target/i386/kvm.c b/target/i386/kvm.c index 27fd050..887a812 100644 --- a/target/i386/kvm.c +++ b/target/i386/kvm.c @@ -64,13 +64,6 @@ * 255 kvm_msr_entry structs */ #define MSR_BUF_SIZE 4096 -#ifndef BUS_MCEERR_AR -#define BUS_MCEERR_AR 4 -#endif -#ifndef BUS_MCEERR_AO -#define BUS_MCEERR_AO 5 -#endif - const KVMCapabilityInfo kvm_arch_required_capabilities[] = { KVM_CAP_INFO(SET_TSS_ADDR), KVM_CAP_INFO(EXT_CPUID), @@ -462,70 +455,38 @@ static void hardware_memory_error(void) exit(1); } -int kvm_arch_on_sigbus_vcpu(CPUState *c, int code, void *addr) +void kvm_arch_on_sigbus_vcpu(CPUState *c, int code, void *addr) { X86CPU *cpu = X86_CPU(c); CPUX86State *env = &cpu->env; ram_addr_t ram_addr; hwaddr paddr; - if ((env->mcg_cap & MCG_SER_P) && addr - && (code == BUS_MCEERR_AR || code == BUS_MCEERR_AO)) { + /* If we get an action required MCE, it has been injected by KVM + * while the VM was running. An action optional MCE instead should + * be coming from the main thread, which qemu_init_sigbus identifies + * as the "early kill" thread. + */ + assert(code == BUS_MCEERR_AR || code == BUS_MCEERR_AO); + + if ((env->mcg_cap & MCG_SER_P) && addr) { ram_addr = qemu_ram_addr_from_host(addr); - if (ram_addr == RAM_ADDR_INVALID || - !kvm_physical_memory_addr_from_host(c->kvm_state, addr, &paddr)) { - fprintf(stderr, "Hardware memory error for memory used by " - "QEMU itself instead of guest system!\n"); - /* Hope we are lucky for AO MCE */ - if (code == BUS_MCEERR_AO) { - return 0; - } else { - hardware_memory_error(); - } - } - kvm_hwpoison_page_add(ram_addr); - kvm_mce_inject(cpu, paddr, code); - } else { - if (code == BUS_MCEERR_AO) { - return 0; - } else if (code == BUS_MCEERR_AR) { - hardware_memory_error(); - } else { - return 1; + if (ram_addr != RAM_ADDR_INVALID && + kvm_physical_memory_addr_from_host(c->kvm_state, addr, &paddr)) { + kvm_hwpoison_page_add(ram_addr); + kvm_mce_inject(cpu, paddr, code); + return; } - } - return 0; -} -int kvm_arch_on_sigbus(int code, void *addr) -{ - X86CPU *cpu = X86_CPU(first_cpu); - - if ((cpu->env.mcg_cap & MCG_SER_P) && addr && code == BUS_MCEERR_AO) { - ram_addr_t ram_addr; - hwaddr paddr; + fprintf(stderr, "Hardware memory error for memory used by " + "QEMU itself instead of guest system!\n"); + } - /* Hope we are lucky for AO MCE */ - ram_addr = qemu_ram_addr_from_host(addr); - if (ram_addr == RAM_ADDR_INVALID || - !kvm_physical_memory_addr_from_host(first_cpu->kvm_state, - addr, &paddr)) { - fprintf(stderr, "Hardware memory error for memory used by " - "QEMU itself instead of guest system!: %p\n", addr); - return 0; - } - kvm_hwpoison_page_add(ram_addr); - kvm_mce_inject(X86_CPU(first_cpu), paddr, code); - } else { - if (code == BUS_MCEERR_AO) { - return 0; - } else if (code == BUS_MCEERR_AR) { - hardware_memory_error(); - } else { - return 1; - } + if (code == BUS_MCEERR_AR) { + hardware_memory_error(); } - return 0; + + /* Hope we are lucky for AO MCE */ } static int kvm_inject_mce_oldstyle(X86CPU *cpu) @@ -2531,7 +2492,12 @@ static int kvm_put_vcpu_events(X86CPU *cpu, int level) events.smi.pending = 0; events.smi.latched_init = 0; } - events.flags |= KVM_VCPUEVENT_VALID_SMM; + /* Stop SMI delivery on old machine types to avoid a reboot + * on an inward migration of an old VM. + */ + if (!cpu->kvm_no_smi_migration) { + events.flags |= KVM_VCPUEVENT_VALID_SMM; + } } if (level >= KVM_PUT_RESET_STATE) { diff --git a/target/mips/kvm.c b/target/mips/kvm.c index 998c341..0982e87 100644 --- a/target/mips/kvm.c +++ b/target/mips/kvm.c @@ -180,18 +180,6 @@ bool kvm_arch_stop_on_emulation_error(CPUState *cs) return true; } -int kvm_arch_on_sigbus_vcpu(CPUState *cs, int code, void *addr) -{ - DPRINTF("%s\n", __func__); - return 1; -} - -int kvm_arch_on_sigbus(int code, void *addr) -{ - DPRINTF("%s\n", __func__); - return 1; -} - void kvm_arch_init_irq_routing(KVMState *s) { } diff --git a/target/ppc/kvm.c b/target/ppc/kvm.c index acc40ec..03f5097 100644 --- a/target/ppc/kvm.c +++ b/target/ppc/kvm.c @@ -2582,16 +2582,6 @@ bool kvm_arch_stop_on_emulation_error(CPUState *cpu) return true; } -int kvm_arch_on_sigbus_vcpu(CPUState *cpu, int code, void *addr) -{ - return 1; -} - -int kvm_arch_on_sigbus(int code, void *addr) -{ - return 1; -} - void kvm_arch_init_irq_routing(KVMState *s) { } diff --git a/target/s390x/kvm.c b/target/s390x/kvm.c index 5ec050c..ac47154 100644 --- a/target/s390x/kvm.c +++ b/target/s390x/kvm.c @@ -2140,16 +2140,6 @@ bool kvm_arch_stop_on_emulation_error(CPUState *cpu) return true; } -int kvm_arch_on_sigbus_vcpu(CPUState *cpu, int code, void *addr) -{ - return 1; -} - -int kvm_arch_on_sigbus(int code, void *addr) -{ - return 1; -} - void kvm_s390_io_interrupt(uint16_t subchannel_id, uint16_t subchannel_nr, uint32_t io_int_parm, uint32_t io_int_word) @@ -1101,7 +1101,6 @@ static inline unsigned get_mmuidx(TCGMemOpIdx oi) #define TB_EXIT_MASK 3 #define TB_EXIT_IDX0 0 #define TB_EXIT_IDX1 1 -#define TB_EXIT_ICOUNT_EXPIRED 2 #define TB_EXIT_REQUESTED 3 #ifdef HAVE_TCG_QEMU_TB_EXEC diff --git a/translate-all.c b/translate-all.c index 9bac061..d42d003 100644 --- a/translate-all.c +++ b/translate-all.c @@ -1930,7 +1930,7 @@ void cpu_interrupt(CPUState *cpu, int mask) { g_assert(qemu_mutex_iothread_locked()); cpu->interrupt_request |= mask; - cpu->tcg_exit_req = 1; + cpu->icount_decr.u16.high = -1; } /* diff --git a/translate-common.c b/translate-common.c index d504dd0..40fe5a1 100644 --- a/translate-common.c +++ b/translate-common.c @@ -43,14 +43,11 @@ static void tcg_handle_interrupt(CPUState *cpu, int mask) if (!qemu_cpu_is_self(cpu)) { qemu_cpu_kick(cpu); } else { - if (use_icount) { - cpu->icount_decr.u16.high = 0xffff; - if (!cpu->can_do_io - && (mask & ~old_mask) != 0) { - cpu_abort(cpu, "Raised interrupt while not in I/O function"); - } - } else { - cpu->tcg_exit_req = 1; + cpu->icount_decr.u16.high = -1; + if (use_icount && + !cpu->can_do_io + && (mask & ~old_mask) != 0) { + cpu_abort(cpu, "Raised interrupt while not in I/O function"); } } } diff --git a/util/compatfd.c b/util/compatfd.c index 9a43042..980bd33 100644 --- a/util/compatfd.c +++ b/util/compatfd.c @@ -15,7 +15,6 @@ #include "qemu/osdep.h" #include "qemu-common.h" -#include "qemu/compatfd.h" #include "qemu/thread.h" #include <sys/syscall.h> diff --git a/util/main-loop.c b/util/main-loop.c index ad10bca..ca7bb07 100644 --- a/util/main-loop.c +++ b/util/main-loop.c @@ -34,8 +34,6 @@ #ifndef _WIN32 -#include "qemu/compatfd.h" - /* If we have signalfd, we mask out the signals we want to handle and then * use signalfd to listen for them. We rely on whatever the current signal * handler is to dispatch the signals when we receive them. @@ -63,8 +61,7 @@ static void sigfd_handler(void *opaque) sigaction(info.ssi_signo, NULL, &action); if ((action.sa_flags & SA_SIGINFO) && action.sa_sigaction) { - action.sa_sigaction(info.ssi_signo, - (siginfo_t *)&info, NULL); + sigaction_invoke(&action, &info); } else if (action.sa_handler) { action.sa_handler(info.ssi_signo); } diff --git a/util/oslib-posix.c b/util/oslib-posix.c index f631464..cd686aa 100644 --- a/util/oslib-posix.c +++ b/util/oslib-posix.c @@ -603,3 +603,36 @@ void qemu_free_stack(void *stack, size_t sz) munmap(stack, sz); } + +void sigaction_invoke(struct sigaction *action, + struct qemu_signalfd_siginfo *info) +{ + siginfo_t si = { 0 }; + si.si_signo = info->ssi_signo; + si.si_errno = info->ssi_errno; + si.si_code = info->ssi_code; + + /* Convert the minimal set of fields defined by POSIX. + * Positive si_code values are reserved for kernel-generated + * signals, where the valid siginfo fields are determined by + * the signal number. But according to POSIX, it is unspecified + * whether SI_USER and SI_QUEUE have values less than or equal to + * zero. + */ + if (info->ssi_code == SI_USER || info->ssi_code == SI_QUEUE || + info->ssi_code <= 0) { + /* SIGTERM, etc. */ + si.si_pid = info->ssi_pid; + si.si_uid = info->ssi_uid; + } else if (info->ssi_signo == SIGILL || info->ssi_signo == SIGFPE || + info->ssi_signo == SIGSEGV || info->ssi_signo == SIGBUS) { + si.si_addr = (void *)(uintptr_t)info->ssi_addr; + } else if (info->ssi_signo == SIGCHLD) { + si.si_pid = info->ssi_pid; + si.si_status = info->ssi_status; + si.si_uid = info->ssi_uid; + } else if (info->ssi_signo == SIGIO) { + si.si_band = info->ssi_band; + } + action->sa_sigaction(info->ssi_signo, &si, NULL); +} @@ -227,6 +227,7 @@ static struct { { .driver = "ide-hd", .flag = &default_cdrom }, { .driver = "ide-drive", .flag = &default_cdrom }, { .driver = "scsi-cd", .flag = &default_cdrom }, + { .driver = "scsi-hd", .flag = &default_cdrom }, { .driver = "virtio-serial-pci", .flag = &default_virtcon }, { .driver = "virtio-serial", .flag = &default_virtcon }, { .driver = "VGA", .flag = &default_vga }, @@ -1717,14 +1718,14 @@ void qemu_system_guest_panicked(GuestPanicInformation *info) } if (info) { - if (info->type == GUEST_PANIC_INFORMATION_KIND_HYPER_V) { + if (info->type == GUEST_PANIC_INFORMATION_TYPE_HYPER_V) { qemu_log_mask(LOG_GUEST_ERROR, "HV crash parameters: (%#"PRIx64 " %#"PRIx64" %#"PRIx64" %#"PRIx64" %#"PRIx64")\n", - info->u.hyper_v.data->arg1, - info->u.hyper_v.data->arg2, - info->u.hyper_v.data->arg3, - info->u.hyper_v.data->arg4, - info->u.hyper_v.data->arg5); + info->u.hyper_v.arg1, + info->u.hyper_v.arg2, + info->u.hyper_v.arg3, + info->u.hyper_v.arg4, + info->u.hyper_v.arg5); } qapi_free_GuestPanicInformation(info); } |