From 05ff8dc32fa124e7bbf77a257f863f3685c7be9d Mon Sep 17 00:00:00 2001 From: Artem Pisarenko Date: Wed, 17 Oct 2018 14:24:18 +0600 Subject: Revert some patches from recent [PATCH v6] "Fixing record/replay and adding reverse debugging" That patch series introduced new virtual clock type for use in external subsystems. It breaks desired behavior in non-record/replay usage scenarios due to a small change to existing behavior. Processing of virtual timers belonging to new clock type is kicked off to the main loop, which makes these timers asynchronous with vCPU thread and, in icount mode, with whole guest execution. This breaks expected determinism in non-record/replay icount mode of emulation where these "external subsystems" are isolated from the host (i.e. they are external only to guest core, not to the entire emulation environment). Example for slirp ("user" backend for network device): User runs qemu in icount mode with rtc clock=vm without any external communication interfaces but with "-netdev user,restrict=on". It expects deterministic execution, because network services are emulated inside qemu and isolated from host. There are no reasons to get reply from DHCP server with different delay or something like that. The next patches revert reimplements the same changes in a better way. This reverts commit 87f4fe7653baf55b5c2f2753fe6003f473c07342. This reverts commit 775a412bf83f6bc0c5c02091ee06cf649b34c593. This reverts commit 9888091404a702d7ec79d51b088d994b9fc121bd. Signed-off-by: Artem Pisarenko Message-Id: <18b1e7c8f155fe26976f91be06bde98eef6f8751.1539764043.git.artem.k.pisarenko@gmail.com> Signed-off-by: Paolo Bonzini --- include/qemu/timer.h | 9 --------- 1 file changed, 9 deletions(-) (limited to 'include') diff --git a/include/qemu/timer.h b/include/qemu/timer.h index a005ed2..39ea907 100644 --- a/include/qemu/timer.h +++ b/include/qemu/timer.h @@ -42,14 +42,6 @@ * In icount mode, this clock counts nanoseconds while the virtual * machine is running. It is used to increase @QEMU_CLOCK_VIRTUAL * while the CPUs are sleeping and thus not executing instructions. - * - * @QEMU_CLOCK_VIRTUAL_EXT: virtual clock for external subsystems - * - * The virtual clock only runs during the emulation. It stops - * when the virtual machine is stopped. The timers for this clock - * do not recorded in rr mode, therefore this clock could be used - * for the subsystems that operate outside the guest core. - * */ typedef enum { @@ -57,7 +49,6 @@ typedef enum { QEMU_CLOCK_VIRTUAL = 1, QEMU_CLOCK_HOST = 2, QEMU_CLOCK_VIRTUAL_RT = 3, - QEMU_CLOCK_VIRTUAL_EXT = 4, QEMU_CLOCK_MAX } QEMUClockType; -- cgit v1.1 From 89a603a0c80ae3d6a8711571550b2ae9a01ea909 Mon Sep 17 00:00:00 2001 From: Artem Pisarenko Date: Wed, 17 Oct 2018 14:24:19 +0600 Subject: qemu-timer: introduce timer attributes Attributes are simple flags, associated with individual timers for their whole lifetime. They intended to be used to mark individual timers for special handling when they fire. New/init functions family in timer interface updated and refactored (new 'attribute' argument added, timer_list replaced with timer_list_group+type combinations, comments improved to avoid info duplication). Also existing aio interface extended with attribute-enabled variants of functions, which create/initialize timers. Signed-off-by: Artem Pisarenko Message-Id: Signed-off-by: Paolo Bonzini --- include/block/aio.h | 59 ++++++++++++++++++++++++---- include/qemu/timer.h | 109 +++++++++++++++++++++++++-------------------------- 2 files changed, 106 insertions(+), 62 deletions(-) (limited to 'include') diff --git a/include/block/aio.h b/include/block/aio.h index f08630c..0ca25df 100644 --- a/include/block/aio.h +++ b/include/block/aio.h @@ -388,18 +388,41 @@ struct LinuxAioState *aio_setup_linux_aio(AioContext *ctx, Error **errp); struct LinuxAioState *aio_get_linux_aio(AioContext *ctx); /** - * aio_timer_new: + * aio_timer_new_with_attrs: * @ctx: the aio context * @type: the clock type * @scale: the scale + * @attributes: 0, or one to multiple OR'ed QEMU_TIMER_ATTR_ values + * to assign * @cb: the callback to call on timer expiry * @opaque: the opaque pointer to pass to the callback * - * Allocate a new timer attached to the context @ctx. + * Allocate a new timer (with attributes) attached to the context @ctx. * The function is responsible for memory allocation. * - * The preferred interface is aio_timer_init. Use that - * unless you really need dynamic memory allocation. + * The preferred interface is aio_timer_init or aio_timer_init_with_attrs. + * Use that unless you really need dynamic memory allocation. + * + * Returns: a pointer to the new timer + */ +static inline QEMUTimer *aio_timer_new_with_attrs(AioContext *ctx, + QEMUClockType type, + int scale, int attributes, + QEMUTimerCB *cb, void *opaque) +{ + return timer_new_full(&ctx->tlg, type, scale, attributes, cb, opaque); +} + +/** + * aio_timer_new: + * @ctx: the aio context + * @type: the clock type + * @scale: the scale + * @cb: the callback to call on timer expiry + * @opaque: the opaque pointer to pass to the callback + * + * Allocate a new timer attached to the context @ctx. + * See aio_timer_new_with_attrs for details. * * Returns: a pointer to the new timer */ @@ -407,7 +430,29 @@ static inline QEMUTimer *aio_timer_new(AioContext *ctx, QEMUClockType type, int scale, QEMUTimerCB *cb, void *opaque) { - return timer_new_tl(ctx->tlg.tl[type], scale, cb, opaque); + return timer_new_full(&ctx->tlg, type, scale, 0, cb, opaque); +} + +/** + * aio_timer_init_with_attrs: + * @ctx: the aio context + * @ts: the timer + * @type: the clock type + * @scale: the scale + * @attributes: 0, or one to multiple OR'ed QEMU_TIMER_ATTR_ values + * to assign + * @cb: the callback to call on timer expiry + * @opaque: the opaque pointer to pass to the callback + * + * Initialise a new timer (with attributes) attached to the context @ctx. + * The caller is responsible for memory allocation. + */ +static inline void aio_timer_init_with_attrs(AioContext *ctx, + QEMUTimer *ts, QEMUClockType type, + int scale, int attributes, + QEMUTimerCB *cb, void *opaque) +{ + timer_init_full(ts, &ctx->tlg, type, scale, attributes, cb, opaque); } /** @@ -420,14 +465,14 @@ static inline QEMUTimer *aio_timer_new(AioContext *ctx, QEMUClockType type, * @opaque: the opaque pointer to pass to the callback * * Initialise a new timer attached to the context @ctx. - * The caller is responsible for memory allocation. + * See aio_timer_init_with_attrs for details. */ static inline void aio_timer_init(AioContext *ctx, QEMUTimer *ts, QEMUClockType type, int scale, QEMUTimerCB *cb, void *opaque) { - timer_init_tl(ts, ctx->tlg.tl[type], scale, cb, opaque); + timer_init_full(ts, &ctx->tlg, type, scale, 0, cb, opaque); } /** diff --git a/include/qemu/timer.h b/include/qemu/timer.h index 39ea907..8ff1092 100644 --- a/include/qemu/timer.h +++ b/include/qemu/timer.h @@ -2,6 +2,7 @@ #define QEMU_TIMER_H #include "qemu-common.h" +#include "qemu/bitops.h" #include "qemu/notify.h" #include "qemu/host-utils.h" @@ -52,6 +53,16 @@ typedef enum { QEMU_CLOCK_MAX } QEMUClockType; +/** + * QEMU Timer attributes: + * + * An individual timer may be given one or multiple attributes when initialized. + * Each attribute corresponds to one bit. Attributes modify the processing + * of timers when they fire. + * + * No attributes defined currently. + */ + typedef struct QEMUTimerList QEMUTimerList; struct QEMUTimerListGroup { @@ -67,6 +78,7 @@ struct QEMUTimer { QEMUTimerCB *cb; void *opaque; QEMUTimer *next; + int attributes; int scale; }; @@ -418,22 +430,27 @@ int64_t timerlistgroup_deadline_ns(QEMUTimerListGroup *tlg); */ /** - * timer_init_tl: + * timer_init_full: * @ts: the timer to be initialised - * @timer_list: the timer list to attach the timer to + * @timer_list_group: (optional) the timer list group to attach the timer to + * @type: the clock type to use * @scale: the scale value for the timer + * @attributes: 0, or one or more OR'ed QEMU_TIMER_ATTR_ values * @cb: the callback to be called when the timer expires * @opaque: the opaque pointer to be passed to the callback * - * Initialise a new timer and associate it with @timer_list. + * Initialise a timer with the given scale and attributes, + * and associate it with timer list for given clock @type in @timer_list_group + * (or default timer list group, if NULL). * The caller is responsible for allocating the memory. * * You need not call an explicit deinit call. Simply make * sure it is not on a list with timer_del. */ -void timer_init_tl(QEMUTimer *ts, - QEMUTimerList *timer_list, int scale, - QEMUTimerCB *cb, void *opaque); +void timer_init_full(QEMUTimer *ts, + QEMUTimerListGroup *timer_list_group, QEMUClockType type, + int scale, int attributes, + QEMUTimerCB *cb, void *opaque); /** * timer_init: @@ -445,14 +462,12 @@ void timer_init_tl(QEMUTimer *ts, * * Initialize a timer with the given scale on the default timer list * associated with the clock. - * - * You need not call an explicit deinit call. Simply make - * sure it is not on a list with timer_del. + * See timer_init_full for details. */ static inline void timer_init(QEMUTimer *ts, QEMUClockType type, int scale, QEMUTimerCB *cb, void *opaque) { - timer_init_tl(ts, main_loop_tlg.tl[type], scale, cb, opaque); + timer_init_full(ts, NULL, type, scale, 0, cb, opaque); } /** @@ -464,9 +479,7 @@ static inline void timer_init(QEMUTimer *ts, QEMUClockType type, int scale, * * Initialize a timer with nanosecond scale on the default timer list * associated with the clock. - * - * You need not call an explicit deinit call. Simply make - * sure it is not on a list with timer_del. + * See timer_init_full for details. */ static inline void timer_init_ns(QEMUTimer *ts, QEMUClockType type, QEMUTimerCB *cb, void *opaque) @@ -483,9 +496,7 @@ static inline void timer_init_ns(QEMUTimer *ts, QEMUClockType type, * * Initialize a timer with microsecond scale on the default timer list * associated with the clock. - * - * You need not call an explicit deinit call. Simply make - * sure it is not on a list with timer_del. + * See timer_init_full for details. */ static inline void timer_init_us(QEMUTimer *ts, QEMUClockType type, QEMUTimerCB *cb, void *opaque) @@ -502,9 +513,7 @@ static inline void timer_init_us(QEMUTimer *ts, QEMUClockType type, * * Initialize a timer with millisecond scale on the default timer list * associated with the clock. - * - * You need not call an explicit deinit call. Simply make - * sure it is not on a list with timer_del. + * See timer_init_full for details. */ static inline void timer_init_ms(QEMUTimer *ts, QEMUClockType type, QEMUTimerCB *cb, void *opaque) @@ -513,27 +522,37 @@ static inline void timer_init_ms(QEMUTimer *ts, QEMUClockType type, } /** - * timer_new_tl: - * @timer_list: the timer list to attach the timer to + * timer_new_full: + * @timer_list_group: (optional) the timer list group to attach the timer to + * @type: the clock type to use * @scale: the scale value for the timer + * @attributes: 0, or one or more OR'ed QEMU_TIMER_ATTR_ values * @cb: the callback to be called when the timer expires * @opaque: the opaque pointer to be passed to the callback * - * Create a new timer and associate it with @timer_list. + * Create a new timer with the given scale and attributes, + * and associate it with timer list for given clock @type in @timer_list_group + * (or default timer list group, if NULL). * The memory is allocated by the function. * * This is not the preferred interface unless you know you - * are going to call timer_free. Use timer_init instead. + * are going to call timer_free. Use timer_init or timer_init_full instead. + * + * The default timer list has one special feature: in icount mode, + * %QEMU_CLOCK_VIRTUAL timers are run in the vCPU thread. This is + * not true of other timer lists, which are typically associated + * with an AioContext---each of them runs its timer callbacks in its own + * AioContext thread. * * Returns: a pointer to the timer */ -static inline QEMUTimer *timer_new_tl(QEMUTimerList *timer_list, - int scale, - QEMUTimerCB *cb, - void *opaque) +static inline QEMUTimer *timer_new_full(QEMUTimerListGroup *timer_list_group, + QEMUClockType type, + int scale, int attributes, + QEMUTimerCB *cb, void *opaque) { QEMUTimer *ts = g_malloc0(sizeof(QEMUTimer)); - timer_init_tl(ts, timer_list, scale, cb, opaque); + timer_init_full(ts, timer_list_group, type, scale, attributes, cb, opaque); return ts; } @@ -544,21 +563,16 @@ static inline QEMUTimer *timer_new_tl(QEMUTimerList *timer_list, * @cb: the callback to be called when the timer expires * @opaque: the opaque pointer to be passed to the callback * - * Create a new timer and associate it with the default - * timer list for the clock type @type. - * - * The default timer list has one special feature: in icount mode, - * %QEMU_CLOCK_VIRTUAL timers are run in the vCPU thread. This is - * not true of other timer lists, which are typically associated - * with an AioContext---each of them runs its timer callbacks in its own - * AioContext thread. + * Create a new timer with the given scale, + * and associate it with the default timer list for the clock type @type. + * See timer_new_full for details. * * Returns: a pointer to the timer */ static inline QEMUTimer *timer_new(QEMUClockType type, int scale, QEMUTimerCB *cb, void *opaque) { - return timer_new_tl(main_loop_tlg.tl[type], scale, cb, opaque); + return timer_new_full(NULL, type, scale, 0, cb, opaque); } /** @@ -569,12 +583,7 @@ static inline QEMUTimer *timer_new(QEMUClockType type, int scale, * * Create a new timer with nanosecond scale on the default timer list * associated with the clock. - * - * The default timer list has one special feature: in icount mode, - * %QEMU_CLOCK_VIRTUAL timers are run in the vCPU thread. This is - * not true of other timer lists, which are typically associated - * with an AioContext---each of them runs its timer callbacks in its own - * AioContext thread. + * See timer_new_full for details. * * Returns: a pointer to the newly created timer */ @@ -590,14 +599,9 @@ static inline QEMUTimer *timer_new_ns(QEMUClockType type, QEMUTimerCB *cb, * @cb: the callback to call when the timer expires * @opaque: the opaque pointer to pass to the callback * - * The default timer list has one special feature: in icount mode, - * %QEMU_CLOCK_VIRTUAL timers are run in the vCPU thread. This is - * not true of other timer lists, which are typically associated - * with an AioContext---each of them runs its timer callbacks in its own - * AioContext thread. - * * Create a new timer with microsecond scale on the default timer list * associated with the clock. + * See timer_new_full for details. * * Returns: a pointer to the newly created timer */ @@ -613,14 +617,9 @@ static inline QEMUTimer *timer_new_us(QEMUClockType type, QEMUTimerCB *cb, * @cb: the callback to call when the timer expires * @opaque: the opaque pointer to pass to the callback * - * The default timer list has one special feature: in icount mode, - * %QEMU_CLOCK_VIRTUAL timers are run in the vCPU thread. This is - * not true of other timer lists, which are typically associated - * with an AioContext---each of them runs its timer callbacks in its own - * AioContext thread. - * * Create a new timer with millisecond scale on the default timer list * associated with the clock. + * See timer_new_full for details. * * Returns: a pointer to the newly created timer */ -- cgit v1.1 From e81f86790f561437b70549aff05433731b464e62 Mon Sep 17 00:00:00 2001 From: Artem Pisarenko Date: Wed, 17 Oct 2018 14:24:20 +0600 Subject: qemu-timer: avoid checkpoints for virtual clock timers in external subsystems Adds EXTERNAL attribute definition to qemu timers subsystem and assigns it to virtual clock timers, used in slirp (ICMP IPv6) and ui (key queue). Virtual clock processing in rr mode can use this attribute instead of a separate clock type. Fixes: 87f4fe7653baf55b5c2f2753fe6003f473c07342 Fixes: 775a412bf83f6bc0c5c02091ee06cf649b34c593 Fixes: 9888091404a702d7ec79d51b088d994b9fc121bd Signed-off-by: Artem Pisarenko Message-Id: Signed-off-by: Paolo Bonzini --- include/qemu/timer.h | 10 +++++++++- 1 file changed, 9 insertions(+), 1 deletion(-) (limited to 'include') diff --git a/include/qemu/timer.h b/include/qemu/timer.h index 8ff1092..9f37c92 100644 --- a/include/qemu/timer.h +++ b/include/qemu/timer.h @@ -60,9 +60,17 @@ typedef enum { * Each attribute corresponds to one bit. Attributes modify the processing * of timers when they fire. * - * No attributes defined currently. + * The following attributes are available: + * + * QEMU_TIMER_ATTR_EXTERNAL: drives external subsystem + * + * Timers with this attribute do not recorded in rr mode, therefore it could be + * used for the subsystems that operate outside the guest core. Applicable only + * with virtual clock type. */ +#define QEMU_TIMER_ATTR_EXTERNAL BIT(0) + typedef struct QEMUTimerList QEMUTimerList; struct QEMUTimerListGroup { -- cgit v1.1 From e6d34aeea6aad52b01956d9cc29e81887939b9ea Mon Sep 17 00:00:00 2001 From: Peng Hao Date: Thu, 18 Oct 2018 00:52:54 +0800 Subject: target-i386 : add coalesced_pio API the primary API realization. Signed-off-by: Peng Hao Reviewed-by: Eduardo Habkost Message-Id: <1539795177-21038-3-git-send-email-peng.hao2@zte.com.cn> Signed-off-by: Paolo Bonzini --- include/exec/memory.h | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'include') diff --git a/include/exec/memory.h b/include/exec/memory.h index 3a427aa..667466b 100644 --- a/include/exec/memory.h +++ b/include/exec/memory.h @@ -419,9 +419,9 @@ struct MemoryListener { bool match_data, uint64_t data, EventNotifier *e); void (*eventfd_del)(MemoryListener *listener, MemoryRegionSection *section, bool match_data, uint64_t data, EventNotifier *e); - void (*coalesced_mmio_add)(MemoryListener *listener, MemoryRegionSection *section, + void (*coalesced_io_add)(MemoryListener *listener, MemoryRegionSection *section, hwaddr addr, hwaddr len); - void (*coalesced_mmio_del)(MemoryListener *listener, MemoryRegionSection *section, + void (*coalesced_io_del)(MemoryListener *listener, MemoryRegionSection *section, hwaddr addr, hwaddr len); /* Lower = earlier (during add), later (during del) */ unsigned priority; -- cgit v1.1 From 92cc3aaa1fb4165a6236a2265c6be4ea531d0988 Mon Sep 17 00:00:00 2001 From: Roman Bolshakov Date: Thu, 18 Oct 2018 17:30:51 +0300 Subject: i386: hvf: Remove hvf_disabled accel_init_machine sets *(acc->allowed) to true if acc->init_machine(ms) succeeds. There's no need to have both hvf_allowed and hvf_disabled. Signed-off-by: Roman Bolshakov Message-Id: <20181018143051.48508-1-r.bolshakov@yadro.com> Signed-off-by: Paolo Bonzini --- include/sysemu/hvf.h | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'include') diff --git a/include/sysemu/hvf.h b/include/sysemu/hvf.h index 2411188..aaa51d2 100644 --- a/include/sysemu/hvf.h +++ b/include/sysemu/hvf.h @@ -17,7 +17,7 @@ #include "exec/memory.h" #include "sysemu/accel.h" -extern int hvf_disabled; +extern bool hvf_allowed; #ifdef CONFIG_HVF #include #include @@ -26,7 +26,7 @@ extern int hvf_disabled; #include "hw/hw.h" uint32_t hvf_get_supported_cpuid(uint32_t func, uint32_t idx, int reg); -#define hvf_enabled() !hvf_disabled +#define hvf_enabled() (hvf_allowed) #else #define hvf_enabled() 0 #define hvf_get_supported_cpuid(func, idx, reg) 0 -- cgit v1.1 From 8b5e6caf01f778a257721cae499392e8f1d55ddb Mon Sep 17 00:00:00 2001 From: Igor Mammedov Date: Tue, 16 Oct 2018 15:33:40 +0200 Subject: call HotplugHandler->plug() as the last step in device realization When [2] was fixed it was agreed that adding and calling post_plug() callback after device_reset() was low risk approach to hotfix issue right before release. So it was merged instead of moving already existing plug() callback after device_reset() is called which would be more risky and require all plug() callbacks audit. Looking at the current plug() callbacks, it doesn't seem that moving plug() callback after device_reset() is breaking anything, so here goes agreed upon [3] proper fix which essentially reverts [1][2] and moves plug() callback after device_reset(). This way devices always comes to plug() stage, after it's been fully initialized (including being reset), which fixes race condition [2] without need for an extra post_plug() callback. 1. (25e897881 "qdev: add HotplugHandler->post_plug() callback") 2. (8449bcf94 "virtio-scsi: fix hotplug ->reset() vs event race") 3. https://www.mail-archive.com/qemu-devel@nongnu.org/msg549915.html Signed-off-by: Igor Mammedov Message-Id: <1539696820-273275-1-git-send-email-imammedo@redhat.com> Reviewed-by: Stefan Hajnoczi Tested-by: Pierre Morel Acked-by: Pierre Morel Signed-off-by: Paolo Bonzini --- include/hw/hotplug.h | 11 ----------- 1 file changed, 11 deletions(-) (limited to 'include') diff --git a/include/hw/hotplug.h b/include/hw/hotplug.h index 51541d6..1a0516a 100644 --- a/include/hw/hotplug.h +++ b/include/hw/hotplug.h @@ -47,8 +47,6 @@ typedef void (*hotplug_fn)(HotplugHandler *plug_handler, * @parent: Opaque parent interface. * @pre_plug: pre plug callback called at start of device.realize(true) * @plug: plug callback called at end of device.realize(true). - * @post_plug: post plug callback called after device.realize(true) and device - * reset * @unplug_request: unplug request callback. * Used as a means to initiate device unplug for devices that * require asynchronous unplug handling. @@ -63,7 +61,6 @@ typedef struct HotplugHandlerClass { /* */ hotplug_fn pre_plug; hotplug_fn plug; - void (*post_plug)(HotplugHandler *plug_handler, DeviceState *plugged_dev); hotplug_fn unplug_request; hotplug_fn unplug; } HotplugHandlerClass; @@ -87,14 +84,6 @@ void hotplug_handler_pre_plug(HotplugHandler *plug_handler, Error **errp); /** - * hotplug_handler_post_plug: - * - * Call #HotplugHandlerClass.post_plug callback of @plug_handler. - */ -void hotplug_handler_post_plug(HotplugHandler *plug_handler, - DeviceState *plugged_dev); - -/** * hotplug_handler_unplug_request: * * Calls #HotplugHandlerClass.unplug_request callback of @plug_handler. -- cgit v1.1 From 5116122af70357d895ecc61c0211dbf786226081 Mon Sep 17 00:00:00 2001 From: Roman Kagan Date: Fri, 21 Sep 2018 11:20:37 +0300 Subject: hyperv: split hyperv-proto.h into x86 and arch-independent parts Some parts of the Hyper-V hypervisor-guest interface appear to be target-independent, so move them into a proper header. Not that Hyper-V ARM64 emulation is around the corner but it seems more conveninent to have most of Hyper-V and VMBus target-independent, and allows to avoid conflicts with inclusion of arch-specific headers down the road in VMBus implementation. Signed-off-by: Roman Kagan Message-Id: <20180921082041.29380-2-rkagan@virtuozzo.com> Signed-off-by: Paolo Bonzini --- include/hw/hyperv/hyperv-proto.h | 129 +++++++++++++++++++++++++++++++++++++++ 1 file changed, 129 insertions(+) create mode 100644 include/hw/hyperv/hyperv-proto.h (limited to 'include') diff --git a/include/hw/hyperv/hyperv-proto.h b/include/hw/hyperv/hyperv-proto.h new file mode 100644 index 0000000..2dc78ee --- /dev/null +++ b/include/hw/hyperv/hyperv-proto.h @@ -0,0 +1,129 @@ +/* + * Definitions for Hyper-V guest/hypervisor interaction + * + * Copyright (c) 2017-2018 Virtuozzo International GmbH. + * + * This work is licensed under the terms of the GNU GPL, version 2 or later. + * See the COPYING file in the top-level directory. + */ + +#ifndef HW_HYPERV_HYPERV_PROTO_H +#define HW_HYPERV_HYPERV_PROTO_H + +#include "qemu/bitmap.h" + +/* + * Hypercall status code + */ +#define HV_STATUS_SUCCESS 0 +#define HV_STATUS_INVALID_HYPERCALL_CODE 2 +#define HV_STATUS_INVALID_HYPERCALL_INPUT 3 +#define HV_STATUS_INVALID_ALIGNMENT 4 +#define HV_STATUS_INVALID_PARAMETER 5 +#define HV_STATUS_INSUFFICIENT_MEMORY 11 +#define HV_STATUS_INVALID_CONNECTION_ID 18 +#define HV_STATUS_INSUFFICIENT_BUFFERS 19 + +/* + * Hypercall numbers + */ +#define HV_POST_MESSAGE 0x005c +#define HV_SIGNAL_EVENT 0x005d +#define HV_HYPERCALL_FAST (1u << 16) + +/* + * Message size + */ +#define HV_MESSAGE_PAYLOAD_SIZE 240 + +/* + * Message types + */ +#define HV_MESSAGE_NONE 0x00000000 +#define HV_MESSAGE_VMBUS 0x00000001 +#define HV_MESSAGE_UNMAPPED_GPA 0x80000000 +#define HV_MESSAGE_GPA_INTERCEPT 0x80000001 +#define HV_MESSAGE_TIMER_EXPIRED 0x80000010 +#define HV_MESSAGE_INVALID_VP_REGISTER_VALUE 0x80000020 +#define HV_MESSAGE_UNRECOVERABLE_EXCEPTION 0x80000021 +#define HV_MESSAGE_UNSUPPORTED_FEATURE 0x80000022 +#define HV_MESSAGE_EVENTLOG_BUFFERCOMPLETE 0x80000040 +#define HV_MESSAGE_X64_IOPORT_INTERCEPT 0x80010000 +#define HV_MESSAGE_X64_MSR_INTERCEPT 0x80010001 +#define HV_MESSAGE_X64_CPUID_INTERCEPT 0x80010002 +#define HV_MESSAGE_X64_EXCEPTION_INTERCEPT 0x80010003 +#define HV_MESSAGE_X64_APIC_EOI 0x80010004 +#define HV_MESSAGE_X64_LEGACY_FP_ERROR 0x80010005 + +/* + * Message flags + */ +#define HV_MESSAGE_FLAG_PENDING 0x1 + +/* + * Number of synthetic interrupts + */ +#define HV_SINT_COUNT 16 + +/* + * Event flags number per SINT + */ +#define HV_EVENT_FLAGS_COUNT (256 * 8) + +/* + * Connection id valid bits + */ +#define HV_CONNECTION_ID_MASK 0x00ffffff + +/* + * Input structure for POST_MESSAGE hypercall + */ +struct hyperv_post_message_input { + uint32_t connection_id; + uint32_t _reserved; + uint32_t message_type; + uint32_t payload_size; + uint8_t payload[HV_MESSAGE_PAYLOAD_SIZE]; +}; + +/* + * Input structure for SIGNAL_EVENT hypercall + */ +struct hyperv_signal_event_input { + uint32_t connection_id; + uint16_t flag_number; + uint16_t _reserved_zero; +}; + +/* + * SynIC message structures + */ +struct hyperv_message_header { + uint32_t message_type; + uint8_t payload_size; + uint8_t message_flags; /* HV_MESSAGE_FLAG_XX */ + uint8_t _reserved[2]; + uint64_t sender; +}; + +struct hyperv_message { + struct hyperv_message_header header; + uint8_t payload[HV_MESSAGE_PAYLOAD_SIZE]; +}; + +struct hyperv_message_page { + struct hyperv_message slot[HV_SINT_COUNT]; +}; + +/* + * SynIC event flags structures + */ +struct hyperv_event_flags { + DECLARE_BITMAP(flags, HV_EVENT_FLAGS_COUNT); +}; + +struct hyperv_event_flags_page { + struct hyperv_event_flags slot[HV_SINT_COUNT]; +}; + +#endif -- cgit v1.1 From 701189e31140a7c82ec02a7f4ca632cfd6a8559d Mon Sep 17 00:00:00 2001 From: Roman Kagan Date: Fri, 21 Sep 2018 11:20:39 +0300 Subject: hyperv: factor out arch-independent API into hw/hyperv A significant part of hyperv.c is not actually tied to x86, and can be moved to hw/. This will allow to maintain most of Hyper-V and VMBus target-independent, and to avoid conflicts with inclusion of arch-specific headers down the road in VMBus implementation. Also this stuff can now be opt-out with CONFIG_HYPERV. Signed-off-by: Roman Kagan Message-Id: <20180921082041.29380-4-rkagan@virtuozzo.com> Signed-off-by: Paolo Bonzini --- include/hw/hyperv/hyperv.h | 31 +++++++++++++++++++++++++++++++ 1 file changed, 31 insertions(+) create mode 100644 include/hw/hyperv/hyperv.h (limited to 'include') diff --git a/include/hw/hyperv/hyperv.h b/include/hw/hyperv/hyperv.h new file mode 100644 index 0000000..d6c8d78 --- /dev/null +++ b/include/hw/hyperv/hyperv.h @@ -0,0 +1,31 @@ +/* + * Hyper-V guest/hypervisor interaction + * + * Copyright (c) 2015-2018 Virtuozzo International GmbH. + * + * This work is licensed under the terms of the GNU GPL, version 2 or later. + * See the COPYING file in the top-level directory. + */ + +#ifndef HW_HYPERV_HYPERV_H +#define HW_HYPERV_HYPERV_H + +#include "cpu-qom.h" + +typedef struct HvSintRoute HvSintRoute; +typedef void (*HvSintAckClb)(void *data); + +HvSintRoute *hyperv_sint_route_new(uint32_t vp_index, uint32_t sint, + HvSintAckClb sint_ack_clb, + void *sint_ack_clb_data); +void hyperv_sint_route_ref(HvSintRoute *sint_route); +void hyperv_sint_route_unref(HvSintRoute *sint_route); + +int hyperv_sint_route_set_sint(HvSintRoute *sint_route); + +static inline uint32_t hyperv_vp_index(CPUState *cs) +{ + return cs->cpu_index; +} + +#endif -- cgit v1.1 From 606c34bfd57a0ecda67b395bea022bb307a5384e Mon Sep 17 00:00:00 2001 From: Roman Kagan Date: Fri, 21 Sep 2018 11:22:09 +0300 Subject: hyperv: qom-ify SynIC Make Hyper-V SynIC a device which is attached as a child to a CPU. For now it only makes SynIC visibile in the qom hierarchy, and maintains its internal fields in sync with the respecitve msrs of the parent cpu (the fields will be used in followup patches). Signed-off-by: Roman Kagan Message-Id: <20180921082217.29481-3-rkagan@virtuozzo.com> Signed-off-by: Paolo Bonzini --- include/hw/hyperv/hyperv.h | 5 +++++ 1 file changed, 5 insertions(+) (limited to 'include') diff --git a/include/hw/hyperv/hyperv.h b/include/hw/hyperv/hyperv.h index d6c8d78..6fba476 100644 --- a/include/hw/hyperv/hyperv.h +++ b/include/hw/hyperv/hyperv.h @@ -28,4 +28,9 @@ static inline uint32_t hyperv_vp_index(CPUState *cs) return cs->cpu_index; } +void hyperv_synic_add(CPUState *cs); +void hyperv_synic_reset(CPUState *cs); +void hyperv_synic_update(CPUState *cs, bool enable, + hwaddr msg_page_addr, hwaddr event_page_addr); + #endif -- cgit v1.1 From 9b4cf107b09d18ac30f46fd1c4de8585ccba030c Mon Sep 17 00:00:00 2001 From: Roman Kagan Date: Fri, 21 Sep 2018 11:22:10 +0300 Subject: hyperv: only add SynIC in compatible configurations Certain configurations do not allow SynIC to be used in QEMU. In particular, - when hyperv_vpindex is off, SINT routes can't be used as they refer to the destination vCPU by vp_index - older KVM (which doesn't expose KVM_CAP_HYPERV_SYNIC2) zeroes out SynIC message and event pages on every msr load, breaking migration OTOH in-KVM users of SynIC -- SynIC timers -- do work in those configurations, and we shouldn't stop the guest from using them. To cover both scenarios, introduce an X86CPU property that makes CPU init code to skip creation of the SynIC object (and thus disables any SynIC use in QEMU) but keeps the KVM part of the SynIC working. The property is clear by default but is set via compat logic for older machine types. As a result, when hv_synic and a modern machine type are specified, QEMU will refuse to run unless vp_index is on and the kernel is recent enough. OTOH with an older machine type QEMU will run fine with hv_synic=on against an older kernel and/or without vp_index enabled but will disallow the in-QEMU uses of SynIC (in e.g. VMBus). Signed-off-by: Roman Kagan Message-Id: <20180921082217.29481-4-rkagan@virtuozzo.com> Signed-off-by: Paolo Bonzini --- include/hw/i386/pc.h | 8 ++++++++ 1 file changed, 8 insertions(+) (limited to 'include') diff --git a/include/hw/i386/pc.h b/include/hw/i386/pc.h index 6894f37..dfe6746 100644 --- a/include/hw/i386/pc.h +++ b/include/hw/i386/pc.h @@ -294,6 +294,14 @@ int e820_add_entry(uint64_t, uint64_t, uint32_t); int e820_get_num_entries(void); bool e820_get_entry(int, uint32_t, uint64_t *, uint64_t *); +#define PC_COMPAT_3_0 \ + HW_COMPAT_3_0 \ + {\ + .driver = TYPE_X86_CPU,\ + .property = "x-hv-synic-kvm-only",\ + .value = "on",\ + } + #define PC_COMPAT_2_12 \ HW_COMPAT_2_12 \ {\ -- cgit v1.1 From 4cbaf3c13300b79d0386b567630f8e9c91ac5099 Mon Sep 17 00:00:00 2001 From: Roman Kagan Date: Fri, 21 Sep 2018 11:22:12 +0300 Subject: hyperv: add synic message delivery Add infrastructure to deliver SynIC messages to the SynIC message page. Note that KVM may also want to deliver (SynIC timer) messages to the same message slot. The problem is that the access to a SynIC message slot is controlled by the value of its .msg_type field which indicates if the slot is being owned by the hypervisor (zero) or by the guest (non-zero). This leaves no room for synchronizing multiple concurrent producers. The simplest way to deal with this for both KVM and QEMU is to only deliver messages in the vcpu thread. KVM already does this; this patch makes it for QEMU, too. Specifically, - add a function for posting messages, which only copies the message into the staging buffer if its free, and schedules a work on the corresponding vcpu to actually deliver it to the guest slot; - instead of a sint ack callback, set up the sint route with a message status callback. This function is called in a bh whenever there are updates to the message slot status: either the vcpu made definitive progress delivering the message from the staging buffer (succeeded or failed) or the guest issued EOM; the status is passed as an argument to the callback. Signed-off-by: Roman Kagan Message-Id: <20180921082217.29481-6-rkagan@virtuozzo.com> Signed-off-by: Paolo Bonzini --- include/hw/hyperv/hyperv.h | 18 +++++++++++++++--- 1 file changed, 15 insertions(+), 3 deletions(-) (limited to 'include') diff --git a/include/hw/hyperv/hyperv.h b/include/hw/hyperv/hyperv.h index 6fba476..82d561f 100644 --- a/include/hw/hyperv/hyperv.h +++ b/include/hw/hyperv/hyperv.h @@ -11,18 +11,30 @@ #define HW_HYPERV_HYPERV_H #include "cpu-qom.h" +#include "hw/hyperv/hyperv-proto.h" typedef struct HvSintRoute HvSintRoute; -typedef void (*HvSintAckClb)(void *data); + +/* + * Callback executed in a bottom-half when the status of posting the message + * becomes known, before unblocking the connection for further messages + */ +typedef void (*HvSintMsgCb)(void *data, int status); HvSintRoute *hyperv_sint_route_new(uint32_t vp_index, uint32_t sint, - HvSintAckClb sint_ack_clb, - void *sint_ack_clb_data); + HvSintMsgCb cb, void *cb_data); void hyperv_sint_route_ref(HvSintRoute *sint_route); void hyperv_sint_route_unref(HvSintRoute *sint_route); int hyperv_sint_route_set_sint(HvSintRoute *sint_route); +/* + * Submit a message to be posted in vcpu context. If the submission succeeds, + * the status of posting the message is reported via the callback associated + * with the @sint_route; until then no more messages are accepted. + */ +int hyperv_post_msg(HvSintRoute *sint_route, struct hyperv_message *msg); + static inline uint32_t hyperv_vp_index(CPUState *cs) { return cs->cpu_index; -- cgit v1.1 From f5642f8b458ba578c1ea94b9ad773e1e5c6cb615 Mon Sep 17 00:00:00 2001 From: Roman Kagan Date: Fri, 21 Sep 2018 11:22:13 +0300 Subject: hyperv: add synic event flag signaling Add infrastructure to signal SynIC event flags by atomically setting the corresponding bit in the event flags page and firing a SINT if necessary. Signed-off-by: Roman Kagan Message-Id: <20180921082217.29481-7-rkagan@virtuozzo.com> Signed-off-by: Paolo Bonzini --- include/hw/hyperv/hyperv.h | 4 ++++ 1 file changed, 4 insertions(+) (limited to 'include') diff --git a/include/hw/hyperv/hyperv.h b/include/hw/hyperv/hyperv.h index 82d561f..757c85e 100644 --- a/include/hw/hyperv/hyperv.h +++ b/include/hw/hyperv/hyperv.h @@ -34,6 +34,10 @@ int hyperv_sint_route_set_sint(HvSintRoute *sint_route); * with the @sint_route; until then no more messages are accepted. */ int hyperv_post_msg(HvSintRoute *sint_route, struct hyperv_message *msg); +/* + * Set event flag @eventno, and signal the SINT if the flag has changed. + */ +int hyperv_set_event_flag(HvSintRoute *sint_route, unsigned eventno); static inline uint32_t hyperv_vp_index(CPUState *cs) { -- cgit v1.1 From e6ea9f45b72fe83d49adda948ff397dafc00c68f Mon Sep 17 00:00:00 2001 From: Roman Kagan Date: Fri, 21 Sep 2018 11:22:14 +0300 Subject: hyperv: process SIGNAL_EVENT hypercall Add handling of SIGNAL_EVENT hypercall. For that, provide an interface to associate an EventNotifier with an event connection number, so that it's signaled when the SIGNAL_EVENT hypercall with the matching connection ID is called by the guest. Support for using KVM functionality for this will be added in a followup patch. Signed-off-by: Roman Kagan Message-Id: <20180921082217.29481-8-rkagan@virtuozzo.com> Signed-off-by: Paolo Bonzini --- include/hw/hyperv/hyperv-proto.h | 1 + include/hw/hyperv/hyperv.h | 13 +++++++++++++ 2 files changed, 14 insertions(+) (limited to 'include') diff --git a/include/hw/hyperv/hyperv-proto.h b/include/hw/hyperv/hyperv-proto.h index 2dc78ee..21dc28a 100644 --- a/include/hw/hyperv/hyperv-proto.h +++ b/include/hw/hyperv/hyperv-proto.h @@ -21,6 +21,7 @@ #define HV_STATUS_INVALID_ALIGNMENT 4 #define HV_STATUS_INVALID_PARAMETER 5 #define HV_STATUS_INSUFFICIENT_MEMORY 11 +#define HV_STATUS_INVALID_PORT_ID 17 #define HV_STATUS_INVALID_CONNECTION_ID 18 #define HV_STATUS_INSUFFICIENT_BUFFERS 19 diff --git a/include/hw/hyperv/hyperv.h b/include/hw/hyperv/hyperv.h index 757c85e..df92ed7 100644 --- a/include/hw/hyperv/hyperv.h +++ b/include/hw/hyperv/hyperv.h @@ -39,6 +39,19 @@ int hyperv_post_msg(HvSintRoute *sint_route, struct hyperv_message *msg); */ int hyperv_set_event_flag(HvSintRoute *sint_route, unsigned eventno); +/* + * Associate @notifier with the event connection @conn_id, such that @notifier + * is signaled when the guest executes HV_SIGNAL_EVENT hypercall on @conn_id. + * If @notifier is NULL clear the association. + */ +int hyperv_set_event_flag_handler(uint32_t conn_id, EventNotifier *notifier); + +/* + * Process HV_SIGNAL_EVENT hypercall: signal the EventNotifier associated with + * the connection as specified in @param. + */ +uint16_t hyperv_hcall_signal_event(uint64_t param, bool fast); + static inline uint32_t hyperv_vp_index(CPUState *cs) { return cs->cpu_index; -- cgit v1.1 From 76036a5fc7ca632f805748aeef416355b1d212a3 Mon Sep 17 00:00:00 2001 From: Roman Kagan Date: Fri, 21 Sep 2018 11:22:16 +0300 Subject: hyperv: process POST_MESSAGE hypercall Add handling of POST_MESSAGE hypercall. For that, add an interface to regsiter a handler for the messages arrived from the guest on a particular connection id (IOW set up a message connection in Hyper-V speak). Signed-off-by: Roman Kagan Message-Id: <20180921082217.29481-10-rkagan@virtuozzo.com> Signed-off-by: Paolo Bonzini --- include/hw/hyperv/hyperv.h | 18 ++++++++++++++++++ 1 file changed, 18 insertions(+) (limited to 'include') diff --git a/include/hw/hyperv/hyperv.h b/include/hw/hyperv/hyperv.h index df92ed7..597381c 100644 --- a/include/hw/hyperv/hyperv.h +++ b/include/hw/hyperv/hyperv.h @@ -40,6 +40,18 @@ int hyperv_post_msg(HvSintRoute *sint_route, struct hyperv_message *msg); int hyperv_set_event_flag(HvSintRoute *sint_route, unsigned eventno); /* + * Handler for messages arriving from the guest via HV_POST_MESSAGE hypercall. + * Executed in vcpu context. + */ +typedef uint16_t (*HvMsgHandler)(const struct hyperv_post_message_input *msg, + void *data); +/* + * Associate @handler with the message connection @conn_id, such that @handler + * is called with @data when the guest executes HV_POST_MESSAGE hypercall on + * @conn_id. If @handler is NULL clear the association. + */ +int hyperv_set_msg_handler(uint32_t conn_id, HvMsgHandler handler, void *data); +/* * Associate @notifier with the event connection @conn_id, such that @notifier * is signaled when the guest executes HV_SIGNAL_EVENT hypercall on @conn_id. * If @notifier is NULL clear the association. @@ -47,6 +59,12 @@ int hyperv_set_event_flag(HvSintRoute *sint_route, unsigned eventno); int hyperv_set_event_flag_handler(uint32_t conn_id, EventNotifier *notifier); /* + * Process HV_POST_MESSAGE hypercall: parse the data in the guest memory as + * specified in @param, and call the HvMsgHandler associated with the + * connection on the message contained therein. + */ +uint16_t hyperv_hcall_post_message(uint64_t param, bool fast); +/* * Process HV_SIGNAL_EVENT hypercall: signal the EventNotifier associated with * the connection as specified in @param. */ -- cgit v1.1 From 74c0b816adfc6aa1b01b4426fdf385e32e35cbac Mon Sep 17 00:00:00 2001 From: Paolo Bonzini Date: Mon, 8 Oct 2018 13:24:14 +0200 Subject: replay: pass raw icount value to replay_save_clock This avoids lock recursion when REPLAY_CLOCK is called inside the timers spinlock. Signed-off-by: Paolo Bonzini --- include/sysemu/replay.h | 10 ++++++++-- 1 file changed, 8 insertions(+), 2 deletions(-) (limited to 'include') diff --git a/include/sysemu/replay.h b/include/sysemu/replay.h index 7f7a594..3a7c58e 100644 --- a/include/sysemu/replay.h +++ b/include/sysemu/replay.h @@ -100,14 +100,20 @@ bool replay_has_interrupt(void); /* Processing clocks and other time sources */ /*! Save the specified clock */ -int64_t replay_save_clock(ReplayClockKind kind, int64_t clock); +int64_t replay_save_clock(ReplayClockKind kind, int64_t clock, + int64_t raw_icount); /*! Read the specified clock from the log or return cached data */ int64_t replay_read_clock(ReplayClockKind kind); /*! Saves or reads the clock depending on the current replay mode. */ #define REPLAY_CLOCK(clock, value) \ (replay_mode == REPLAY_MODE_PLAY ? replay_read_clock((clock)) \ : replay_mode == REPLAY_MODE_RECORD \ - ? replay_save_clock((clock), (value)) \ + ? replay_save_clock((clock), (value), cpu_get_icount_raw()) \ + : (value)) +#define REPLAY_CLOCK_LOCKED(clock, value) \ + (replay_mode == REPLAY_MODE_PLAY ? replay_read_clock((clock)) \ + : replay_mode == REPLAY_MODE_RECORD \ + ? replay_save_clock((clock), (value), cpu_get_icount_raw_locked()) \ : (value)) /* Events */ -- cgit v1.1