diff options
36 files changed, 843 insertions, 161 deletions
diff --git a/contrib/libvhost-user/libvhost-user-glib.c b/contrib/libvhost-user/libvhost-user-glib.c index 42660a1..99edd2f 100644 --- a/contrib/libvhost-user/libvhost-user-glib.c +++ b/contrib/libvhost-user/libvhost-user-glib.c @@ -131,18 +131,24 @@ static void vug_watch(VuDev *dev, int condition, void *data) } } -void -vug_init(VugDev *dev, int socket, +bool +vug_init(VugDev *dev, uint16_t max_queues, int socket, vu_panic_cb panic, const VuDevIface *iface) { g_assert(dev); g_assert(iface); - vu_init(&dev->parent, socket, panic, set_watch, remove_watch, iface); + if (!vu_init(&dev->parent, max_queues, socket, panic, set_watch, + remove_watch, iface)) { + return false; + } + dev->fdmap = g_hash_table_new_full(NULL, NULL, NULL, (GDestroyNotify) g_source_destroy); dev->src = vug_source_new(dev, socket, G_IO_IN, vug_watch, NULL); + + return true; } void diff --git a/contrib/libvhost-user/libvhost-user-glib.h b/contrib/libvhost-user/libvhost-user-glib.h index d3200f3..64d539d 100644 --- a/contrib/libvhost-user/libvhost-user-glib.h +++ b/contrib/libvhost-user/libvhost-user-glib.h @@ -25,7 +25,7 @@ typedef struct VugDev { GSource *src; } VugDev; -void vug_init(VugDev *dev, int socket, +bool vug_init(VugDev *dev, uint16_t max_queues, int socket, vu_panic_cb panic, const VuDevIface *iface); void vug_deinit(VugDev *dev); diff --git a/contrib/libvhost-user/libvhost-user.c b/contrib/libvhost-user/libvhost-user.c index 443b7e0..4b36e35 100644 --- a/contrib/libvhost-user/libvhost-user.c +++ b/contrib/libvhost-user/libvhost-user.c @@ -216,6 +216,15 @@ vmsg_close_fds(VhostUserMsg *vmsg) } } +/* Set reply payload.u64 and clear request flags and fd_num */ +static void vmsg_set_reply_u64(VhostUserMsg *vmsg, uint64_t val) +{ + vmsg->flags = 0; /* defaults will be set by vu_send_reply() */ + vmsg->size = sizeof(vmsg->payload.u64); + vmsg->payload.u64 = val; + vmsg->fd_num = 0; +} + /* A test to see if we have userfault available */ static bool have_userfault(void) @@ -484,9 +493,9 @@ vu_get_features_exec(VuDev *dev, VhostUserMsg *vmsg) static void vu_set_enable_all_rings(VuDev *dev, bool enabled) { - int i; + uint16_t i; - for (i = 0; i < VHOST_MAX_NR_VIRTQUEUE; i++) { + for (i = 0; i < dev->max_queues; i++) { dev->vq[i].enable = enabled; } } @@ -907,7 +916,7 @@ vu_check_queue_msg_file(VuDev *dev, VhostUserMsg *vmsg) { int index = vmsg->payload.u64 & VHOST_USER_VRING_IDX_MASK; - if (index >= VHOST_MAX_NR_VIRTQUEUE) { + if (index >= dev->max_queues) { vmsg_close_fds(vmsg); vu_panic(dev, "Invalid queue index: %u", index); return false; @@ -1151,7 +1160,8 @@ vu_set_vring_err_exec(VuDev *dev, VhostUserMsg *vmsg) static bool vu_get_protocol_features_exec(VuDev *dev, VhostUserMsg *vmsg) { - uint64_t features = 1ULL << VHOST_USER_PROTOCOL_F_LOG_SHMFD | + uint64_t features = 1ULL << VHOST_USER_PROTOCOL_F_MQ | + 1ULL << VHOST_USER_PROTOCOL_F_LOG_SHMFD | 1ULL << VHOST_USER_PROTOCOL_F_SLAVE_REQ | 1ULL << VHOST_USER_PROTOCOL_F_HOST_NOTIFIER | 1ULL << VHOST_USER_PROTOCOL_F_SLAVE_SEND_FD; @@ -1168,10 +1178,7 @@ vu_get_protocol_features_exec(VuDev *dev, VhostUserMsg *vmsg) features |= dev->iface->get_protocol_features(dev); } - vmsg->payload.u64 = features; - vmsg->size = sizeof(vmsg->payload.u64); - vmsg->fd_num = 0; - + vmsg_set_reply_u64(vmsg, features); return true; } @@ -1194,8 +1201,8 @@ vu_set_protocol_features_exec(VuDev *dev, VhostUserMsg *vmsg) static bool vu_get_queue_num_exec(VuDev *dev, VhostUserMsg *vmsg) { - DPRINT("Function %s() not implemented yet.\n", __func__); - return false; + vmsg_set_reply_u64(vmsg, dev->max_queues); + return true; } static bool @@ -1207,7 +1214,7 @@ vu_set_vring_enable_exec(VuDev *dev, VhostUserMsg *vmsg) DPRINT("State.index: %d\n", index); DPRINT("State.enable: %d\n", enable); - if (index >= VHOST_MAX_NR_VIRTQUEUE) { + if (index >= dev->max_queues) { vu_panic(dev, "Invalid vring_enable index: %u", index); return false; } @@ -1307,17 +1314,14 @@ out: static bool vu_set_postcopy_listen(VuDev *dev, VhostUserMsg *vmsg) { - vmsg->payload.u64 = -1; - vmsg->size = sizeof(vmsg->payload.u64); - if (dev->nregions) { vu_panic(dev, "Regions already registered at postcopy-listen"); + vmsg_set_reply_u64(vmsg, -1); return true; } dev->postcopy_listening = true; - vmsg->flags = VHOST_USER_VERSION | VHOST_USER_REPLY_MASK; - vmsg->payload.u64 = 0; /* Success */ + vmsg_set_reply_u64(vmsg, 0); return true; } @@ -1332,10 +1336,7 @@ vu_set_postcopy_end(VuDev *dev, VhostUserMsg *vmsg) DPRINT("%s: Done close\n", __func__); } - vmsg->fd_num = 0; - vmsg->payload.u64 = 0; - vmsg->size = sizeof(vmsg->payload.u64); - vmsg->flags = VHOST_USER_VERSION | VHOST_USER_REPLY_MASK; + vmsg_set_reply_u64(vmsg, 0); DPRINT("%s: exit\n", __func__); return true; } @@ -1582,7 +1583,7 @@ vu_deinit(VuDev *dev) } dev->nregions = 0; - for (i = 0; i < VHOST_MAX_NR_VIRTQUEUE; i++) { + for (i = 0; i < dev->max_queues; i++) { VuVirtq *vq = &dev->vq[i]; if (vq->call_fd != -1) { @@ -1627,18 +1628,23 @@ vu_deinit(VuDev *dev) if (dev->sock != -1) { close(dev->sock); } + + free(dev->vq); + dev->vq = NULL; } -void +bool vu_init(VuDev *dev, + uint16_t max_queues, int socket, vu_panic_cb panic, vu_set_watch_cb set_watch, vu_remove_watch_cb remove_watch, const VuDevIface *iface) { - int i; + uint16_t i; + assert(max_queues > 0); assert(socket >= 0); assert(set_watch); assert(remove_watch); @@ -1654,18 +1660,28 @@ vu_init(VuDev *dev, dev->iface = iface; dev->log_call_fd = -1; dev->slave_fd = -1; - for (i = 0; i < VHOST_MAX_NR_VIRTQUEUE; i++) { + dev->max_queues = max_queues; + + dev->vq = malloc(max_queues * sizeof(dev->vq[0])); + if (!dev->vq) { + DPRINT("%s: failed to malloc virtqueues\n", __func__); + return false; + } + + for (i = 0; i < max_queues; i++) { dev->vq[i] = (VuVirtq) { .call_fd = -1, .kick_fd = -1, .err_fd = -1, .notification = true, }; } + + return true; } VuVirtq * vu_get_queue(VuDev *dev, int qidx) { - assert(qidx < VHOST_MAX_NR_VIRTQUEUE); + assert(qidx < dev->max_queues); return &dev->vq[qidx]; } diff --git a/contrib/libvhost-user/libvhost-user.h b/contrib/libvhost-user/libvhost-user.h index 3b888ff..46b6007 100644 --- a/contrib/libvhost-user/libvhost-user.h +++ b/contrib/libvhost-user/libvhost-user.h @@ -25,7 +25,6 @@ #define VHOST_USER_F_PROTOCOL_FEATURES 30 #define VHOST_LOG_PAGE 4096 -#define VHOST_MAX_NR_VIRTQUEUE 8 #define VIRTQUEUE_MAX_SIZE 1024 #define VHOST_MEMORY_MAX_NREGIONS 8 @@ -353,7 +352,7 @@ struct VuDev { int sock; uint32_t nregions; VuDevRegion regions[VHOST_MEMORY_MAX_NREGIONS]; - VuVirtq vq[VHOST_MAX_NR_VIRTQUEUE]; + VuVirtq *vq; VuDevInflightInfo inflight_info; int log_call_fd; int slave_fd; @@ -362,6 +361,7 @@ struct VuDev { uint64_t features; uint64_t protocol_features; bool broken; + uint16_t max_queues; /* @set_watch: add or update the given fd to the watch set, * call cb when condition is met */ @@ -391,6 +391,7 @@ typedef struct VuVirtqElement { /** * vu_init: * @dev: a VuDev context + * @max_queues: maximum number of virtqueues * @socket: the socket connected to vhost-user master * @panic: a panic callback * @set_watch: a set_watch callback @@ -398,8 +399,11 @@ typedef struct VuVirtqElement { * @iface: a VuDevIface structure with vhost-user device callbacks * * Intializes a VuDev vhost-user context. + * + * Returns: true on success, false on failure. **/ -void vu_init(VuDev *dev, +bool vu_init(VuDev *dev, + uint16_t max_queues, int socket, vu_panic_cb panic, vu_set_watch_cb set_watch, diff --git a/contrib/vhost-user-blk/vhost-user-blk.c b/contrib/vhost-user-blk/vhost-user-blk.c index 86a3987..ae61034 100644 --- a/contrib/vhost-user-blk/vhost-user-blk.c +++ b/contrib/vhost-user-blk/vhost-user-blk.c @@ -25,6 +25,10 @@ #include <sys/ioctl.h> #endif +enum { + VHOST_USER_BLK_MAX_QUEUES = 8, +}; + struct virtio_blk_inhdr { unsigned char status; }; @@ -334,12 +338,6 @@ static void vub_process_vq(VuDev *vu_dev, int idx) VuVirtq *vq; int ret; - if ((idx < 0) || (idx >= VHOST_MAX_NR_VIRTQUEUE)) { - fprintf(stderr, "VQ Index out of range: %d\n", idx); - vub_panic_cb(vu_dev, NULL); - return; - } - gdev = container_of(vu_dev, VugDev, parent); vdev_blk = container_of(gdev, VubDev, parent); assert(vdev_blk); @@ -631,7 +629,11 @@ int main(int argc, char **argv) vdev_blk->enable_ro = true; } - vug_init(&vdev_blk->parent, csock, vub_panic_cb, &vub_iface); + if (!vug_init(&vdev_blk->parent, VHOST_USER_BLK_MAX_QUEUES, csock, + vub_panic_cb, &vub_iface)) { + fprintf(stderr, "Failed to initialized libvhost-user-glib\n"); + goto err; + } g_main_loop_run(vdev_blk->loop); diff --git a/contrib/vhost-user-gpu/main.c b/contrib/vhost-user-gpu/main.c index 04b7530..b45d201 100644 --- a/contrib/vhost-user-gpu/main.c +++ b/contrib/vhost-user-gpu/main.c @@ -25,6 +25,10 @@ #include "virgl.h" #include "vugbm.h" +enum { + VHOST_USER_GPU_MAX_QUEUES = 2, +}; + struct virtio_gpu_simple_resource { uint32_t resource_id; uint32_t width; @@ -1169,7 +1173,10 @@ main(int argc, char *argv[]) exit(EXIT_FAILURE); } - vug_init(&g.dev, fd, vg_panic, &vuiface); + if (!vug_init(&g.dev, VHOST_USER_GPU_MAX_QUEUES, fd, vg_panic, &vuiface)) { + g_printerr("Failed to initialize libvhost-user-glib.\n"); + exit(EXIT_FAILURE); + } loop = g_main_loop_new(NULL, FALSE); g_main_loop_run(loop); diff --git a/contrib/vhost-user-input/main.c b/contrib/vhost-user-input/main.c index 8b4e7d2..449fd21 100644 --- a/contrib/vhost-user-input/main.c +++ b/contrib/vhost-user-input/main.c @@ -17,6 +17,10 @@ #include "standard-headers/linux/virtio_input.h" #include "qapi/error.h" +enum { + VHOST_USER_INPUT_MAX_QUEUES = 2, +}; + typedef struct virtio_input_event virtio_input_event; typedef struct virtio_input_config virtio_input_config; @@ -384,7 +388,12 @@ main(int argc, char *argv[]) g_printerr("Invalid vhost-user socket.\n"); exit(EXIT_FAILURE); } - vug_init(&vi.dev, fd, vi_panic, &vuiface); + + if (!vug_init(&vi.dev, VHOST_USER_INPUT_MAX_QUEUES, fd, vi_panic, + &vuiface)) { + g_printerr("Failed to initialize libvhost-user-glib.\n"); + exit(EXIT_FAILURE); + } loop = g_main_loop_new(NULL, FALSE); g_main_loop_run(loop); diff --git a/contrib/vhost-user-scsi/vhost-user-scsi.c b/contrib/vhost-user-scsi/vhost-user-scsi.c index 496dd6e..0fc14d7 100644 --- a/contrib/vhost-user-scsi/vhost-user-scsi.c +++ b/contrib/vhost-user-scsi/vhost-user-scsi.c @@ -19,6 +19,10 @@ #define VUS_ISCSI_INITIATOR "iqn.2016-11.com.nutanix:vhost-user-scsi" +enum { + VHOST_USER_SCSI_MAX_QUEUES = 8, +}; + typedef struct VusIscsiLun { struct iscsi_context *iscsi_ctx; int iscsi_lun; @@ -231,11 +235,6 @@ static void vus_proc_req(VuDev *vu_dev, int idx) gdev = container_of(vu_dev, VugDev, parent); vdev_scsi = container_of(gdev, VusDev, parent); - if (idx < 0 || idx >= VHOST_MAX_NR_VIRTQUEUE) { - g_warning("VQ Index out of range: %d", idx); - vus_panic_cb(vu_dev, NULL); - return; - } vq = vu_get_queue(vu_dev, idx); if (!vq) { @@ -295,12 +294,6 @@ static void vus_queue_set_started(VuDev *vu_dev, int idx, bool started) assert(vu_dev); - if (idx < 0 || idx >= VHOST_MAX_NR_VIRTQUEUE) { - g_warning("VQ Index out of range: %d", idx); - vus_panic_cb(vu_dev, NULL); - return; - } - vq = vu_get_queue(vu_dev, idx); if (idx == 0 || idx == 1) { @@ -398,7 +391,11 @@ int main(int argc, char **argv) goto err; } - vug_init(&vdev_scsi->parent, csock, vus_panic_cb, &vus_iface); + if (!vug_init(&vdev_scsi->parent, VHOST_USER_SCSI_MAX_QUEUES, csock, + vus_panic_cb, &vus_iface)) { + g_printerr("Failed to initialize libvhost-user-glib\n"); + goto err; + } g_main_loop_run(vdev_scsi->loop); diff --git a/docs/interop/vhost-user.rst b/docs/interop/vhost-user.rst index dc0ff92..5750668 100644 --- a/docs/interop/vhost-user.rst +++ b/docs/interop/vhost-user.rst @@ -324,19 +324,20 @@ must support changing some configuration aspects on the fly. Multiple queue support ---------------------- -Multiple queue is treated as a protocol extension, hence the slave has -to implement protocol features first. The multiple queues feature is -supported only when the protocol feature ``VHOST_USER_PROTOCOL_F_MQ`` -(bit 0) is set. +Multiple queue support allows the slave to advertise the maximum number of +queues. This is treated as a protocol extension, hence the slave has to +implement protocol features first. The multiple queues feature is supported +only when the protocol feature ``VHOST_USER_PROTOCOL_F_MQ`` (bit 0) is set. -The max number of queue pairs the slave supports can be queried with -message ``VHOST_USER_GET_QUEUE_NUM``. Master should stop when the -number of requested queues is bigger than that. +The max number of queues the slave supports can be queried with message +``VHOST_USER_GET_QUEUE_NUM``. Master should stop when the number of requested +queues is bigger than that. As all queues share one connection, the master uses a unique index for each -queue in the sent message to identify a specified queue. One queue pair -is enabled initially. More queues are enabled dynamically, by sending -message ``VHOST_USER_SET_VRING_ENABLE``. +queue in the sent message to identify a specified queue. + +The master enables queues by sending message ``VHOST_USER_SET_VRING_ENABLE``. +vhost-user-net has historically automatically enabled the first queue pair. Migration --------- diff --git a/hw/block/vhost-user-blk.c b/hw/block/vhost-user-blk.c index 9cb6133..85bc401 100644 --- a/hw/block/vhost-user-blk.c +++ b/hw/block/vhost-user-blk.c @@ -191,7 +191,7 @@ static void vhost_user_blk_stop(VirtIODevice *vdev) static void vhost_user_blk_set_status(VirtIODevice *vdev, uint8_t status) { VHostUserBlk *s = VHOST_USER_BLK(vdev); - bool should_start = vdev->started; + bool should_start = virtio_device_started(vdev, status); int ret; if (!vdev->vm_running) { @@ -317,7 +317,7 @@ static int vhost_user_blk_connect(DeviceState *dev) } /* restore vhost state */ - if (vdev->started) { + if (virtio_device_started(vdev, vdev->status)) { ret = vhost_user_blk_start(vdev); if (ret < 0) { error_report("vhost-user-blk: vhost start failed: %s", diff --git a/hw/core/machine.c b/hw/core/machine.c index ea5a01a..ea84bd6 100644 --- a/hw/core/machine.c +++ b/hw/core/machine.c @@ -30,6 +30,7 @@ GlobalProperty hw_compat_4_0[] = { { "bochs-display", "edid", "false" }, { "virtio-vga", "edid", "false" }, { "virtio-gpu-pci", "edid", "false" }, + { "virtio-device", "use-started", "false" }, }; const size_t hw_compat_4_0_len = G_N_ELEMENTS(hw_compat_4_0); diff --git a/hw/core/numa.c b/hw/core/numa.c index 76c447f..66119d1 100644 --- a/hw/core/numa.c +++ b/hw/core/numa.c @@ -533,6 +533,7 @@ static void numa_stat_memory_devices(NumaNodeMem node_mem[]) MemoryDeviceInfoList *info_list = qmp_memory_device_list(); MemoryDeviceInfoList *info; PCDIMMDeviceInfo *pcdimm_info; + VirtioPMEMDeviceInfo *vpi; for (info = info_list; info; info = info->next) { MemoryDeviceInfo *value = info->value; @@ -540,22 +541,21 @@ static void numa_stat_memory_devices(NumaNodeMem node_mem[]) if (value) { switch (value->type) { case MEMORY_DEVICE_INFO_KIND_DIMM: - pcdimm_info = value->u.dimm.data; - break; - case MEMORY_DEVICE_INFO_KIND_NVDIMM: - pcdimm_info = value->u.nvdimm.data; - break; - - default: - pcdimm_info = NULL; - break; - } - - if (pcdimm_info) { + pcdimm_info = value->type == MEMORY_DEVICE_INFO_KIND_DIMM ? + value->u.dimm.data : value->u.nvdimm.data; node_mem[pcdimm_info->node].node_mem += pcdimm_info->size; node_mem[pcdimm_info->node].node_plugged_mem += pcdimm_info->size; + break; + case MEMORY_DEVICE_INFO_KIND_VIRTIO_PMEM: + vpi = value->u.virtio_pmem.data; + /* TODO: once we support numa, assign to right node */ + node_mem[0].node_mem += vpi->size; + node_mem[0].node_plugged_mem += vpi->size; + break; + default: + g_assert_not_reached(); } } } diff --git a/hw/i386/Kconfig b/hw/i386/Kconfig index 9817888..4ddf2a9 100644 --- a/hw/i386/Kconfig +++ b/hw/i386/Kconfig @@ -30,6 +30,7 @@ config PC # For ACPI builder: select SERIAL_ISA select ACPI_VMGENID + select VIRTIO_PMEM_SUPPORTED config PC_PCI bool diff --git a/hw/i386/pc.c b/hw/i386/pc.c index e96360b..b380bd7 100644 --- a/hw/i386/pc.c +++ b/hw/i386/pc.c @@ -79,6 +79,8 @@ #include "hw/i386/intel_iommu.h" #include "hw/net/ne2000-isa.h" #include "standard-headers/asm-x86/bootparam.h" +#include "hw/virtio/virtio-pmem-pci.h" +#include "hw/mem/memory-device.h" /* debug PC/ISA interrupts */ //#define DEBUG_IRQ @@ -913,14 +915,6 @@ bool e820_get_entry(int idx, uint32_t type, uint64_t *address, uint64_t *length) return false; } -/* Enables contiguous-apic-ID mode, for compatibility */ -static bool compat_apic_id_mode; - -void enable_compat_apic_id_mode(void) -{ - compat_apic_id_mode = true; -} - /* Calculates initial APIC ID for a specific CPU index * * Currently we need to be able to calculate the APIC ID from the CPU index @@ -928,13 +922,15 @@ void enable_compat_apic_id_mode(void) * no concept of "CPU index", and the NUMA tables on fw_cfg need the APIC ID of * all CPUs up to max_cpus. */ -static uint32_t x86_cpu_apic_id_from_index(unsigned int cpu_index) +static uint32_t x86_cpu_apic_id_from_index(PCMachineState *pcms, + unsigned int cpu_index) { + PCMachineClass *pcmc = PC_MACHINE_GET_CLASS(pcms); uint32_t correct_id; static bool warned; correct_id = x86_apicid_from_cpu_idx(smp_cores, smp_threads, cpu_index); - if (compat_apic_id_mode) { + if (pcmc->compat_apic_id_mode) { if (cpu_index != correct_id && !warned && !qtest_enabled()) { error_report("APIC IDs set in compatibility mode, " "CPU topology won't match the configuration"); @@ -1533,7 +1529,8 @@ static void pc_new_cpu(const char *typename, int64_t apic_id, Error **errp) void pc_hot_add_cpu(const int64_t id, Error **errp) { MachineState *ms = MACHINE(qdev_get_machine()); - int64_t apic_id = x86_cpu_apic_id_from_index(id); + PCMachineState *pcms = PC_MACHINE(ms); + int64_t apic_id = x86_cpu_apic_id_from_index(pcms, id); Error *local_err = NULL; if (id < 0) { @@ -1569,7 +1566,7 @@ void pc_cpus_init(PCMachineState *pcms) * * This is used for FW_CFG_MAX_CPUS. See comments on bochs_bios_init(). */ - pcms->apic_id_limit = x86_cpu_apic_id_from_index(max_cpus - 1) + 1; + pcms->apic_id_limit = x86_cpu_apic_id_from_index(pcms, max_cpus - 1) + 1; possible_cpus = mc->possible_cpu_arch_ids(ms); for (i = 0; i < smp_cpus; i++) { pc_new_cpu(possible_cpus->cpus[i].type, possible_cpus->cpus[i].arch_id, @@ -2395,6 +2392,65 @@ static void pc_cpu_pre_plug(HotplugHandler *hotplug_dev, numa_cpu_pre_plug(cpu_slot, dev, errp); } +static void pc_virtio_pmem_pci_pre_plug(HotplugHandler *hotplug_dev, + DeviceState *dev, Error **errp) +{ + HotplugHandler *hotplug_dev2 = qdev_get_bus_hotplug_handler(dev); + Error *local_err = NULL; + + if (!hotplug_dev2) { + /* + * Without a bus hotplug handler, we cannot control the plug/unplug + * order. This should never be the case on x86, however better add + * a safety net. + */ + error_setg(errp, "virtio-pmem-pci not supported on this bus."); + return; + } + /* + * First, see if we can plug this memory device at all. If that + * succeeds, branch of to the actual hotplug handler. + */ + memory_device_pre_plug(MEMORY_DEVICE(dev), MACHINE(hotplug_dev), NULL, + &local_err); + if (!local_err) { + hotplug_handler_pre_plug(hotplug_dev2, dev, &local_err); + } + error_propagate(errp, local_err); +} + +static void pc_virtio_pmem_pci_plug(HotplugHandler *hotplug_dev, + DeviceState *dev, Error **errp) +{ + HotplugHandler *hotplug_dev2 = qdev_get_bus_hotplug_handler(dev); + Error *local_err = NULL; + + /* + * Plug the memory device first and then branch off to the actual + * hotplug handler. If that one fails, we can easily undo the memory + * device bits. + */ + memory_device_plug(MEMORY_DEVICE(dev), MACHINE(hotplug_dev)); + hotplug_handler_plug(hotplug_dev2, dev, &local_err); + if (local_err) { + memory_device_unplug(MEMORY_DEVICE(dev), MACHINE(hotplug_dev)); + } + error_propagate(errp, local_err); +} + +static void pc_virtio_pmem_pci_unplug_request(HotplugHandler *hotplug_dev, + DeviceState *dev, Error **errp) +{ + /* We don't support virtio pmem hot unplug */ + error_setg(errp, "virtio pmem device unplug not supported."); +} + +static void pc_virtio_pmem_pci_unplug(HotplugHandler *hotplug_dev, + DeviceState *dev, Error **errp) +{ + /* We don't support virtio pmem hot unplug */ +} + static void pc_machine_device_pre_plug_cb(HotplugHandler *hotplug_dev, DeviceState *dev, Error **errp) { @@ -2402,6 +2458,8 @@ static void pc_machine_device_pre_plug_cb(HotplugHandler *hotplug_dev, pc_memory_pre_plug(hotplug_dev, dev, errp); } else if (object_dynamic_cast(OBJECT(dev), TYPE_CPU)) { pc_cpu_pre_plug(hotplug_dev, dev, errp); + } else if (object_dynamic_cast(OBJECT(dev), TYPE_VIRTIO_PMEM_PCI)) { + pc_virtio_pmem_pci_pre_plug(hotplug_dev, dev, errp); } } @@ -2412,6 +2470,8 @@ static void pc_machine_device_plug_cb(HotplugHandler *hotplug_dev, pc_memory_plug(hotplug_dev, dev, errp); } else if (object_dynamic_cast(OBJECT(dev), TYPE_CPU)) { pc_cpu_plug(hotplug_dev, dev, errp); + } else if (object_dynamic_cast(OBJECT(dev), TYPE_VIRTIO_PMEM_PCI)) { + pc_virtio_pmem_pci_plug(hotplug_dev, dev, errp); } } @@ -2422,6 +2482,8 @@ static void pc_machine_device_unplug_request_cb(HotplugHandler *hotplug_dev, pc_memory_unplug_request(hotplug_dev, dev, errp); } else if (object_dynamic_cast(OBJECT(dev), TYPE_CPU)) { pc_cpu_unplug_request_cb(hotplug_dev, dev, errp); + } else if (object_dynamic_cast(OBJECT(dev), TYPE_VIRTIO_PMEM_PCI)) { + pc_virtio_pmem_pci_unplug_request(hotplug_dev, dev, errp); } else { error_setg(errp, "acpi: device unplug request for not supported device" " type: %s", object_get_typename(OBJECT(dev))); @@ -2435,6 +2497,8 @@ static void pc_machine_device_unplug_cb(HotplugHandler *hotplug_dev, pc_memory_unplug(hotplug_dev, dev, errp); } else if (object_dynamic_cast(OBJECT(dev), TYPE_CPU)) { pc_cpu_unplug_cb(hotplug_dev, dev, errp); + } else if (object_dynamic_cast(OBJECT(dev), TYPE_VIRTIO_PMEM_PCI)) { + pc_virtio_pmem_pci_unplug(hotplug_dev, dev, errp); } else { error_setg(errp, "acpi: device unplug for not supported device" " type: %s", object_get_typename(OBJECT(dev))); @@ -2445,7 +2509,8 @@ static HotplugHandler *pc_get_hotplug_handler(MachineState *machine, DeviceState *dev) { if (object_dynamic_cast(OBJECT(dev), TYPE_PC_DIMM) || - object_dynamic_cast(OBJECT(dev), TYPE_CPU)) { + object_dynamic_cast(OBJECT(dev), TYPE_CPU) || + object_dynamic_cast(OBJECT(dev), TYPE_VIRTIO_PMEM_PCI)) { return HOTPLUG_HANDLER(machine); } @@ -2660,6 +2725,7 @@ static int64_t pc_get_default_cpu_node_id(const MachineState *ms, int idx) static const CPUArchIdList *pc_possible_cpu_arch_ids(MachineState *ms) { + PCMachineState *pcms = PC_MACHINE(ms); int i; if (ms->possible_cpus) { @@ -2679,7 +2745,7 @@ static const CPUArchIdList *pc_possible_cpu_arch_ids(MachineState *ms) ms->possible_cpus->cpus[i].type = ms->cpu_type; ms->possible_cpus->cpus[i].vcpus_count = 1; - ms->possible_cpus->cpus[i].arch_id = x86_cpu_apic_id_from_index(i); + ms->possible_cpus->cpus[i].arch_id = x86_cpu_apic_id_from_index(pcms, i); x86_topo_ids_from_apicid(ms->possible_cpus->cpus[i].arch_id, smp_cores, smp_threads, &topo); ms->possible_cpus->cpus[i].props.has_socket_id = true; diff --git a/hw/i386/pc_piix.c b/hw/i386/pc_piix.c index c07c4a5..f29de58 100644 --- a/hw/i386/pc_piix.c +++ b/hw/i386/pc_piix.c @@ -358,7 +358,6 @@ static void pc_compat_1_4_fn(MachineState *machine) static void pc_compat_1_3(MachineState *machine) { pc_compat_1_4_fn(machine); - enable_compat_apic_id_mode(); } /* PC compat function for pc-0.14 to pc-1.2 */ @@ -708,6 +707,7 @@ DEFINE_I440FX_MACHINE(v1_4, "pc-i440fx-1.4", pc_compat_1_4_fn, static void pc_i440fx_1_3_machine_options(MachineClass *m) { + PCMachineClass *pcmc = PC_MACHINE_CLASS(m); static GlobalProperty compat[] = { PC_CPU_MODEL_IDS("1.3.0") { "usb-tablet", "usb_version", "1" }, @@ -718,6 +718,7 @@ static void pc_i440fx_1_3_machine_options(MachineClass *m) pc_i440fx_1_4_machine_options(m); m->hw_version = "1.3.0"; + pcmc->compat_apic_id_mode = true; compat_props_add(m->compat_props, compat, G_N_ELEMENTS(compat)); } diff --git a/hw/pci-bridge/pcie_root_port.c b/hw/pci-bridge/pcie_root_port.c index 92f253c..09019ca 100644 --- a/hw/pci-bridge/pcie_root_port.c +++ b/hw/pci-bridge/pcie_root_port.c @@ -31,10 +31,13 @@ static void rp_write_config(PCIDevice *d, uint32_t address, { uint32_t root_cmd = pci_get_long(d->config + d->exp.aer_cap + PCI_ERR_ROOT_COMMAND); + uint16_t slt_ctl, slt_sta; + + pcie_cap_slot_get(d, &slt_ctl, &slt_sta); pci_bridge_write_config(d, address, val, len); rp_aer_vector_update(d); - pcie_cap_slot_write_config(d, address, val, len); + pcie_cap_slot_write_config(d, slt_ctl, slt_sta, address, val, len); pcie_aer_write_config(d, address, val, len); pcie_aer_root_write_config(d, address, val, len, root_cmd); } diff --git a/hw/pci-bridge/xio3130_downstream.c b/hw/pci-bridge/xio3130_downstream.c index 264e37d..899b0fd 100644 --- a/hw/pci-bridge/xio3130_downstream.c +++ b/hw/pci-bridge/xio3130_downstream.c @@ -41,9 +41,12 @@ static void xio3130_downstream_write_config(PCIDevice *d, uint32_t address, uint32_t val, int len) { + uint16_t slt_ctl, slt_sta; + + pcie_cap_slot_get(d, &slt_sta, &slt_ctl); pci_bridge_write_config(d, address, val, len); pcie_cap_flr_write_config(d, address, val, len); - pcie_cap_slot_write_config(d, address, val, len); + pcie_cap_slot_write_config(d, slt_ctl, slt_sta, address, val, len); pcie_aer_write_config(d, address, val, len); } diff --git a/hw/pci/pcie.c b/hw/pci/pcie.c index 88c30ff..a6beb56 100644 --- a/hw/pci/pcie.c +++ b/hw/pci/pcie.c @@ -383,7 +383,7 @@ static void pcie_cap_slot_event(PCIDevice *dev, PCIExpressHotPlugEvent event) { /* Minor optimization: if nothing changed - no event is needed. */ if (pci_word_test_and_set_mask(dev->config + dev->exp.exp_cap + - PCI_EXP_SLTSTA, event)) { + PCI_EXP_SLTSTA, event) == event) { return; } hotplug_event_notify(dev); @@ -594,7 +594,16 @@ void pcie_cap_slot_reset(PCIDevice *dev) hotplug_event_update_event_status(dev); } +void pcie_cap_slot_get(PCIDevice *dev, uint16_t *slt_ctl, uint16_t *slt_sta) +{ + uint32_t pos = dev->exp.exp_cap; + uint8_t *exp_cap = dev->config + pos; + *slt_ctl = pci_get_word(exp_cap + PCI_EXP_SLTCTL); + *slt_sta = pci_get_word(exp_cap + PCI_EXP_SLTSTA); +} + void pcie_cap_slot_write_config(PCIDevice *dev, + uint16_t old_slt_ctl, uint16_t old_slt_sta, uint32_t addr, uint32_t val, int len) { uint32_t pos = dev->exp.exp_cap; @@ -602,6 +611,25 @@ void pcie_cap_slot_write_config(PCIDevice *dev, uint16_t sltsta = pci_get_word(exp_cap + PCI_EXP_SLTSTA); if (ranges_overlap(addr, len, pos + PCI_EXP_SLTSTA, 2)) { + /* + * Guests tend to clears all bits during init. + * If they clear bits that weren't set this is racy and will lose events: + * not a big problem for manual button presses, but a problem for us. + * As a work-around, detect this and revert status to what it was + * before the write. + * + * Note: in theory this can be detected as a duplicate button press + * which cancels the previous press. Does not seem to happen in + * practice as guests seem to only have this bug during init. + */ +#define PCIE_SLOT_EVENTS (PCI_EXP_SLTSTA_ABP | PCI_EXP_SLTSTA_PFD | \ + PCI_EXP_SLTSTA_MRLSC | PCI_EXP_SLTSTA_PDC | \ + PCI_EXP_SLTSTA_CC) + + if (val & ~old_slt_sta & PCIE_SLOT_EVENTS) { + sltsta = (sltsta & ~PCIE_SLOT_EVENTS) | (old_slt_sta & PCIE_SLOT_EVENTS); + pci_set_word(exp_cap + PCI_EXP_SLTSTA, sltsta); + } hotplug_event_clear(dev); } @@ -619,11 +647,17 @@ void pcie_cap_slot_write_config(PCIDevice *dev, } /* - * If the slot is polulated, power indicator is off and power + * If the slot is populated, power indicator is off and power * controller is off, it is safe to detach the devices. + * + * Note: don't detach if condition was already true: + * this is a work around for guests that overwrite + * control of powered off slots before powering them on. */ if ((sltsta & PCI_EXP_SLTSTA_PDS) && (val & PCI_EXP_SLTCTL_PCC) && - ((val & PCI_EXP_SLTCTL_PIC_OFF) == PCI_EXP_SLTCTL_PIC_OFF)) { + (val & PCI_EXP_SLTCTL_PIC_OFF) == PCI_EXP_SLTCTL_PIC_OFF && + (!(old_slt_ctl & PCI_EXP_SLTCTL_PCC) || + (old_slt_ctl & PCI_EXP_SLTCTL_PIC_OFF) != PCI_EXP_SLTCTL_PIC_OFF)) { PCIBus *sec_bus = pci_bridge_get_sec_bus(PCI_BRIDGE(dev)); pci_for_each_device(sec_bus, pci_bus_num(sec_bus), pcie_unplug_device, NULL); diff --git a/hw/virtio/Kconfig b/hw/virtio/Kconfig index e0452de..3724ff8 100644 --- a/hw/virtio/Kconfig +++ b/hw/virtio/Kconfig @@ -29,3 +29,13 @@ config VIRTIO_CRYPTO bool default y depends on VIRTIO + +config VIRTIO_PMEM_SUPPORTED + bool + +config VIRTIO_PMEM + bool + default y + depends on VIRTIO + depends on VIRTIO_PMEM_SUPPORTED + select MEM_DEVICE diff --git a/hw/virtio/Makefile.objs b/hw/virtio/Makefile.objs index 5570ea8..964ce78 100644 --- a/hw/virtio/Makefile.objs +++ b/hw/virtio/Makefile.objs @@ -12,6 +12,8 @@ common-obj-$(CONFIG_VIRTIO_MMIO) += virtio-mmio.o obj-$(CONFIG_VIRTIO_BALLOON) += virtio-balloon.o obj-$(CONFIG_VIRTIO_CRYPTO) += virtio-crypto.o obj-$(call land,$(CONFIG_VIRTIO_CRYPTO),$(CONFIG_VIRTIO_PCI)) += virtio-crypto-pci.o +obj-$(CONFIG_VIRTIO_PMEM) += virtio-pmem.o +common-obj-$(call land,$(CONFIG_VIRTIO_PMEM),$(CONFIG_VIRTIO_PCI)) += virtio-pmem-pci.o obj-$(CONFIG_VHOST_VSOCK) += vhost-vsock.o ifeq ($(CONFIG_VIRTIO_PCI),y) diff --git a/hw/virtio/virtio-pci.c b/hw/virtio/virtio-pci.c index e6d5467..ce928f2 100644 --- a/hw/virtio/virtio-pci.c +++ b/hw/virtio/virtio-pci.c @@ -1913,13 +1913,6 @@ static void virtio_pci_generic_class_init(ObjectClass *klass, void *data) dc->props = virtio_pci_generic_properties; } -/* Used when the generic type and the base type is the same */ -static void virtio_pci_generic_base_class_init(ObjectClass *klass, void *data) -{ - virtio_pci_base_class_init(klass, data); - virtio_pci_generic_class_init(klass, NULL); -} - static void virtio_pci_transitional_instance_init(Object *obj) { VirtIOPCIProxy *proxy = VIRTIO_PCI(obj); @@ -1938,15 +1931,15 @@ static void virtio_pci_non_transitional_instance_init(Object *obj) void virtio_pci_types_register(const VirtioPCIDeviceTypeInfo *t) { + char *base_name = NULL; TypeInfo base_type_info = { .name = t->base_name, .parent = t->parent ? t->parent : TYPE_VIRTIO_PCI, .instance_size = t->instance_size, .instance_init = t->instance_init, .class_size = t->class_size, - .class_init = virtio_pci_base_class_init, - .class_data = (void *)t, .abstract = true, + .interfaces = t->interfaces, }; TypeInfo generic_type_info = { .name = t->generic_name, @@ -1961,13 +1954,20 @@ void virtio_pci_types_register(const VirtioPCIDeviceTypeInfo *t) if (!base_type_info.name) { /* No base type -> register a single generic device type */ - base_type_info.name = t->generic_name; - base_type_info.class_init = virtio_pci_generic_base_class_init; - base_type_info.interfaces = generic_type_info.interfaces; - base_type_info.abstract = false; - generic_type_info.name = NULL; + /* use intermediate %s-base-type to add generic device props */ + base_name = g_strdup_printf("%s-base-type", t->generic_name); + base_type_info.name = base_name; + base_type_info.class_init = virtio_pci_generic_class_init; + + generic_type_info.parent = base_name; + generic_type_info.class_init = virtio_pci_base_class_init; + generic_type_info.class_data = (void *)t; + assert(!t->non_transitional_name); assert(!t->transitional_name); + } else { + base_type_info.class_init = virtio_pci_base_class_init; + base_type_info.class_data = (void *)t; } type_register(&base_type_info); @@ -2005,6 +2005,7 @@ void virtio_pci_types_register(const VirtioPCIDeviceTypeInfo *t) }; type_register(&transitional_type_info); } + g_free(base_name); } /* virtio-pci-bus */ diff --git a/hw/virtio/virtio-pci.h b/hw/virtio/virtio-pci.h index bfea289..619d909 100644 --- a/hw/virtio/virtio-pci.h +++ b/hw/virtio/virtio-pci.h @@ -252,6 +252,7 @@ typedef struct VirtioPCIDeviceTypeInfo { size_t class_size; void (*instance_init)(Object *obj); void (*class_init)(ObjectClass *klass, void *data); + InterfaceInfo *interfaces; } VirtioPCIDeviceTypeInfo; /* Register virtio-pci type(s). @t must be static. */ diff --git a/hw/virtio/virtio-pmem-pci.c b/hw/virtio/virtio-pmem-pci.c new file mode 100644 index 0000000..8b2d0db --- /dev/null +++ b/hw/virtio/virtio-pmem-pci.c @@ -0,0 +1,131 @@ +/* + * Virtio PMEM PCI device + * + * Copyright (C) 2018-2019 Red Hat, Inc. + * + * Authors: + * Pankaj Gupta <pagupta@redhat.com> + * David Hildenbrand <david@redhat.com> + * + * This work is licensed under the terms of the GNU GPL, version 2. + * See the COPYING file in the top-level directory. + */ + +#include "qemu/osdep.h" + +#include "virtio-pmem-pci.h" +#include "hw/mem/memory-device.h" +#include "qapi/error.h" + +static void virtio_pmem_pci_realize(VirtIOPCIProxy *vpci_dev, Error **errp) +{ + VirtIOPMEMPCI *pmem_pci = VIRTIO_PMEM_PCI(vpci_dev); + DeviceState *vdev = DEVICE(&pmem_pci->vdev); + + qdev_set_parent_bus(vdev, BUS(&vpci_dev->bus)); + object_property_set_bool(OBJECT(vdev), true, "realized", errp); +} + +static void virtio_pmem_pci_set_addr(MemoryDeviceState *md, uint64_t addr, + Error **errp) +{ + object_property_set_uint(OBJECT(md), addr, VIRTIO_PMEM_ADDR_PROP, errp); +} + +static uint64_t virtio_pmem_pci_get_addr(const MemoryDeviceState *md) +{ + return object_property_get_uint(OBJECT(md), VIRTIO_PMEM_ADDR_PROP, + &error_abort); +} + +static MemoryRegion *virtio_pmem_pci_get_memory_region(MemoryDeviceState *md, + Error **errp) +{ + VirtIOPMEMPCI *pci_pmem = VIRTIO_PMEM_PCI(md); + VirtIOPMEM *pmem = VIRTIO_PMEM(&pci_pmem->vdev); + VirtIOPMEMClass *vpc = VIRTIO_PMEM_GET_CLASS(pmem); + + return vpc->get_memory_region(pmem, errp); +} + +static uint64_t virtio_pmem_pci_get_plugged_size(const MemoryDeviceState *md, + Error **errp) +{ + VirtIOPMEMPCI *pci_pmem = VIRTIO_PMEM_PCI(md); + VirtIOPMEM *pmem = VIRTIO_PMEM(&pci_pmem->vdev); + VirtIOPMEMClass *vpc = VIRTIO_PMEM_GET_CLASS(pmem); + MemoryRegion *mr = vpc->get_memory_region(pmem, errp); + + /* the plugged size corresponds to the region size */ + return mr ? 0 : memory_region_size(mr); +} + +static void virtio_pmem_pci_fill_device_info(const MemoryDeviceState *md, + MemoryDeviceInfo *info) +{ + VirtioPMEMDeviceInfo *vi = g_new0(VirtioPMEMDeviceInfo, 1); + VirtIOPMEMPCI *pci_pmem = VIRTIO_PMEM_PCI(md); + VirtIOPMEM *pmem = VIRTIO_PMEM(&pci_pmem->vdev); + VirtIOPMEMClass *vpc = VIRTIO_PMEM_GET_CLASS(pmem); + DeviceState *dev = DEVICE(md); + + if (dev->id) { + vi->has_id = true; + vi->id = g_strdup(dev->id); + } + + /* let the real device handle everything else */ + vpc->fill_device_info(pmem, vi); + + info->u.virtio_pmem.data = vi; + info->type = MEMORY_DEVICE_INFO_KIND_VIRTIO_PMEM; +} + +static void virtio_pmem_pci_class_init(ObjectClass *klass, void *data) +{ + DeviceClass *dc = DEVICE_CLASS(klass); + VirtioPCIClass *k = VIRTIO_PCI_CLASS(klass); + PCIDeviceClass *pcidev_k = PCI_DEVICE_CLASS(klass); + MemoryDeviceClass *mdc = MEMORY_DEVICE_CLASS(klass); + + k->realize = virtio_pmem_pci_realize; + set_bit(DEVICE_CATEGORY_MISC, dc->categories); + pcidev_k->vendor_id = PCI_VENDOR_ID_REDHAT_QUMRANET; + pcidev_k->device_id = PCI_DEVICE_ID_VIRTIO_PMEM; + pcidev_k->revision = VIRTIO_PCI_ABI_VERSION; + pcidev_k->class_id = PCI_CLASS_OTHERS; + + mdc->get_addr = virtio_pmem_pci_get_addr; + mdc->set_addr = virtio_pmem_pci_set_addr; + mdc->get_plugged_size = virtio_pmem_pci_get_plugged_size; + mdc->get_memory_region = virtio_pmem_pci_get_memory_region; + mdc->fill_device_info = virtio_pmem_pci_fill_device_info; +} + +static void virtio_pmem_pci_instance_init(Object *obj) +{ + VirtIOPMEMPCI *dev = VIRTIO_PMEM_PCI(obj); + + virtio_instance_init_common(obj, &dev->vdev, sizeof(dev->vdev), + TYPE_VIRTIO_PMEM); +} + +static const VirtioPCIDeviceTypeInfo virtio_pmem_pci_info = { + .base_name = TYPE_VIRTIO_PMEM_PCI, + .generic_name = "virtio-pmem-pci", + .transitional_name = "virtio-pmem-pci-transitional", + .non_transitional_name = "virtio-pmem-pci-non-transitional", + .instance_size = sizeof(VirtIOPMEMPCI), + .instance_init = virtio_pmem_pci_instance_init, + .class_init = virtio_pmem_pci_class_init, + .interfaces = (InterfaceInfo[]) { + { TYPE_MEMORY_DEVICE }, + { } + }, +}; + +static void virtio_pmem_pci_register_types(void) +{ + virtio_pci_types_register(&virtio_pmem_pci_info); +} +type_init(virtio_pmem_pci_register_types) diff --git a/hw/virtio/virtio-pmem-pci.h b/hw/virtio/virtio-pmem-pci.h new file mode 100644 index 0000000..616abef --- /dev/null +++ b/hw/virtio/virtio-pmem-pci.h @@ -0,0 +1,34 @@ +/* + * Virtio PMEM PCI device + * + * Copyright (C) 2018-2019 Red Hat, Inc. + * + * Authors: + * Pankaj Gupta <pagupta@redhat.com> + * David Hildenbrand <david@redhat.com> + * + * This work is licensed under the terms of the GNU GPL, version 2. + * See the COPYING file in the top-level directory. + */ + +#ifndef QEMU_VIRTIO_PMEM_PCI_H +#define QEMU_VIRTIO_PMEM_PCI_H + +#include "hw/virtio/virtio-pci.h" +#include "hw/virtio/virtio-pmem.h" + +typedef struct VirtIOPMEMPCI VirtIOPMEMPCI; + +/* + * virtio-pmem-pci: This extends VirtioPCIProxy. + */ +#define TYPE_VIRTIO_PMEM_PCI "virtio-pmem-pci-base" +#define VIRTIO_PMEM_PCI(obj) \ + OBJECT_CHECK(VirtIOPMEMPCI, (obj), TYPE_VIRTIO_PMEM_PCI) + +struct VirtIOPMEMPCI { + VirtIOPCIProxy parent_obj; + VirtIOPMEM vdev; +}; + +#endif /* QEMU_VIRTIO_PMEM_PCI_H */ diff --git a/hw/virtio/virtio-pmem.c b/hw/virtio/virtio-pmem.c new file mode 100644 index 0000000..adbfb60 --- /dev/null +++ b/hw/virtio/virtio-pmem.c @@ -0,0 +1,189 @@ +/* + * Virtio PMEM device + * + * Copyright (C) 2018-2019 Red Hat, Inc. + * + * Authors: + * Pankaj Gupta <pagupta@redhat.com> + * David Hildenbrand <david@redhat.com> + * + * This work is licensed under the terms of the GNU GPL, version 2. + * See the COPYING file in the top-level directory. + */ + +#include "qemu/osdep.h" +#include "qapi/error.h" +#include "qemu-common.h" +#include "qemu/error-report.h" +#include "hw/virtio/virtio-pmem.h" +#include "hw/virtio/virtio-access.h" +#include "standard-headers/linux/virtio_ids.h" +#include "standard-headers/linux/virtio_pmem.h" +#include "block/aio.h" +#include "block/thread-pool.h" + +typedef struct VirtIODeviceRequest { + VirtQueueElement elem; + int fd; + VirtIOPMEM *pmem; + VirtIODevice *vdev; + struct virtio_pmem_req req; + struct virtio_pmem_resp resp; +} VirtIODeviceRequest; + +static int worker_cb(void *opaque) +{ + VirtIODeviceRequest *req_data = opaque; + int err = 0; + + /* flush raw backing image */ + err = fsync(req_data->fd); + if (err != 0) { + err = 1; + } + + virtio_stw_p(req_data->vdev, &req_data->resp.ret, err); + + return 0; +} + +static void done_cb(void *opaque, int ret) +{ + VirtIODeviceRequest *req_data = opaque; + int len = iov_from_buf(req_data->elem.in_sg, req_data->elem.in_num, 0, + &req_data->resp, sizeof(struct virtio_pmem_resp)); + + /* Callbacks are serialized, so no need to use atomic ops. */ + virtqueue_push(req_data->pmem->rq_vq, &req_data->elem, len); + virtio_notify((VirtIODevice *)req_data->pmem, req_data->pmem->rq_vq); + g_free(req_data); +} + +static void virtio_pmem_flush(VirtIODevice *vdev, VirtQueue *vq) +{ + VirtIODeviceRequest *req_data; + VirtIOPMEM *pmem = VIRTIO_PMEM(vdev); + HostMemoryBackend *backend = MEMORY_BACKEND(pmem->memdev); + ThreadPool *pool = aio_get_thread_pool(qemu_get_aio_context()); + + req_data = virtqueue_pop(vq, sizeof(VirtIODeviceRequest)); + if (!req_data) { + virtio_error(vdev, "virtio-pmem missing request data"); + return; + } + + if (req_data->elem.out_num < 1 || req_data->elem.in_num < 1) { + virtio_error(vdev, "virtio-pmem request not proper"); + g_free(req_data); + return; + } + req_data->fd = memory_region_get_fd(&backend->mr); + req_data->pmem = pmem; + req_data->vdev = vdev; + thread_pool_submit_aio(pool, worker_cb, req_data, done_cb, req_data); +} + +static void virtio_pmem_get_config(VirtIODevice *vdev, uint8_t *config) +{ + VirtIOPMEM *pmem = VIRTIO_PMEM(vdev); + struct virtio_pmem_config *pmemcfg = (struct virtio_pmem_config *) config; + + virtio_stq_p(vdev, &pmemcfg->start, pmem->start); + virtio_stq_p(vdev, &pmemcfg->size, memory_region_size(&pmem->memdev->mr)); +} + +static uint64_t virtio_pmem_get_features(VirtIODevice *vdev, uint64_t features, + Error **errp) +{ + return features; +} + +static void virtio_pmem_realize(DeviceState *dev, Error **errp) +{ + VirtIODevice *vdev = VIRTIO_DEVICE(dev); + VirtIOPMEM *pmem = VIRTIO_PMEM(dev); + + if (!pmem->memdev) { + error_setg(errp, "virtio-pmem memdev not set"); + return; + } + + if (host_memory_backend_is_mapped(pmem->memdev)) { + char *path = object_get_canonical_path_component(OBJECT(pmem->memdev)); + error_setg(errp, "can't use already busy memdev: %s", path); + g_free(path); + return; + } + + host_memory_backend_set_mapped(pmem->memdev, true); + virtio_init(vdev, TYPE_VIRTIO_PMEM, VIRTIO_ID_PMEM, + sizeof(struct virtio_pmem_config)); + pmem->rq_vq = virtio_add_queue(vdev, 128, virtio_pmem_flush); +} + +static void virtio_pmem_unrealize(DeviceState *dev, Error **errp) +{ + VirtIODevice *vdev = VIRTIO_DEVICE(dev); + VirtIOPMEM *pmem = VIRTIO_PMEM(dev); + + host_memory_backend_set_mapped(pmem->memdev, false); + virtio_cleanup(vdev); +} + +static void virtio_pmem_fill_device_info(const VirtIOPMEM *pmem, + VirtioPMEMDeviceInfo *vi) +{ + vi->memaddr = pmem->start; + vi->size = pmem->memdev ? memory_region_size(&pmem->memdev->mr) : 0; + vi->memdev = object_get_canonical_path(OBJECT(pmem->memdev)); +} + +static MemoryRegion *virtio_pmem_get_memory_region(VirtIOPMEM *pmem, + Error **errp) +{ + if (!pmem->memdev) { + error_setg(errp, "'%s' property must be set", VIRTIO_PMEM_MEMDEV_PROP); + return NULL; + } + + return &pmem->memdev->mr; +} + +static Property virtio_pmem_properties[] = { + DEFINE_PROP_UINT64(VIRTIO_PMEM_ADDR_PROP, VirtIOPMEM, start, 0), + DEFINE_PROP_LINK(VIRTIO_PMEM_MEMDEV_PROP, VirtIOPMEM, memdev, + TYPE_MEMORY_BACKEND, HostMemoryBackend *), + DEFINE_PROP_END_OF_LIST(), +}; + +static void virtio_pmem_class_init(ObjectClass *klass, void *data) +{ + DeviceClass *dc = DEVICE_CLASS(klass); + VirtioDeviceClass *vdc = VIRTIO_DEVICE_CLASS(klass); + VirtIOPMEMClass *vpc = VIRTIO_PMEM_CLASS(klass); + + dc->props = virtio_pmem_properties; + + vdc->realize = virtio_pmem_realize; + vdc->unrealize = virtio_pmem_unrealize; + vdc->get_config = virtio_pmem_get_config; + vdc->get_features = virtio_pmem_get_features; + + vpc->fill_device_info = virtio_pmem_fill_device_info; + vpc->get_memory_region = virtio_pmem_get_memory_region; +} + +static TypeInfo virtio_pmem_info = { + .name = TYPE_VIRTIO_PMEM, + .parent = TYPE_VIRTIO_DEVICE, + .class_size = sizeof(VirtIOPMEMClass), + .class_init = virtio_pmem_class_init, + .instance_size = sizeof(VirtIOPMEM), +}; + +static void virtio_register_types(void) +{ + type_register_static(&virtio_pmem_info); +} + +type_init(virtio_register_types) diff --git a/hw/virtio/virtio.c b/hw/virtio/virtio.c index e1e90fc..18f9f4c 100644 --- a/hw/virtio/virtio.c +++ b/hw/virtio/virtio.c @@ -1162,9 +1162,10 @@ int virtio_set_status(VirtIODevice *vdev, uint8_t val) } } } - vdev->started = val & VIRTIO_CONFIG_S_DRIVER_OK; - if (unlikely(vdev->start_on_kick && vdev->started)) { - vdev->start_on_kick = false; + + if ((vdev->status & VIRTIO_CONFIG_S_DRIVER_OK) != + (val & VIRTIO_CONFIG_S_DRIVER_OK)) { + virtio_set_started(vdev, val & VIRTIO_CONFIG_S_DRIVER_OK); } if (k->set_status) { @@ -1214,8 +1215,7 @@ void virtio_reset(void *opaque) k->reset(vdev); } - vdev->start_on_kick = (virtio_host_has_feature(vdev, VIRTIO_F_VERSION_1) && - !virtio_vdev_has_feature(vdev, VIRTIO_F_VERSION_1)); + vdev->start_on_kick = false; vdev->started = false; vdev->broken = false; vdev->guest_features = 0; @@ -1536,8 +1536,7 @@ static bool virtio_queue_notify_aio_vq(VirtQueue *vq) ret = vq->handle_aio_output(vdev, vq); if (unlikely(vdev->start_on_kick)) { - vdev->started = true; - vdev->start_on_kick = false; + virtio_set_started(vdev, true); } } @@ -1557,8 +1556,7 @@ static void virtio_queue_notify_vq(VirtQueue *vq) vq->handle_output(vdev, vq); if (unlikely(vdev->start_on_kick)) { - vdev->started = true; - vdev->start_on_kick = false; + virtio_set_started(vdev, true); } } } @@ -1576,11 +1574,10 @@ void virtio_queue_notify(VirtIODevice *vdev, int n) event_notifier_set(&vq->host_notifier); } else if (vq->handle_output) { vq->handle_output(vdev, vq); - } - if (unlikely(vdev->start_on_kick)) { - vdev->started = true; - vdev->start_on_kick = false; + if (unlikely(vdev->start_on_kick)) { + virtio_set_started(vdev, true); + } } } @@ -2069,14 +2066,21 @@ int virtio_set_features(VirtIODevice *vdev, uint64_t val) return -EINVAL; } ret = virtio_set_features_nocheck(vdev, val); - if (!ret && virtio_vdev_has_feature(vdev, VIRTIO_RING_F_EVENT_IDX)) { - /* VIRTIO_RING_F_EVENT_IDX changes the size of the caches. */ - int i; - for (i = 0; i < VIRTIO_QUEUE_MAX; i++) { - if (vdev->vq[i].vring.num != 0) { - virtio_init_region_cache(vdev, i); + if (!ret) { + if (virtio_vdev_has_feature(vdev, VIRTIO_RING_F_EVENT_IDX)) { + /* VIRTIO_RING_F_EVENT_IDX changes the size of the caches. */ + int i; + for (i = 0; i < VIRTIO_QUEUE_MAX; i++) { + if (vdev->vq[i].vring.num != 0) { + virtio_init_region_cache(vdev, i); + } } } + + if (!virtio_device_started(vdev, vdev->status) && + !virtio_vdev_has_feature(vdev, VIRTIO_F_VERSION_1)) { + vdev->start_on_kick = true; + } } return ret; } @@ -2228,6 +2232,11 @@ int virtio_load(VirtIODevice *vdev, QEMUFile *f, int version_id) } } + if (!virtio_device_started(vdev, vdev->status) && + !virtio_vdev_has_feature(vdev, VIRTIO_F_VERSION_1)) { + vdev->start_on_kick = true; + } + rcu_read_lock(); for (i = 0; i < num; i++) { if (vdev->vq[i].vring.desc) { @@ -2291,7 +2300,7 @@ static void virtio_vmstate_change(void *opaque, int running, RunState state) VirtIODevice *vdev = opaque; BusState *qbus = qdev_get_parent_bus(DEVICE(vdev)); VirtioBusClass *k = VIRTIO_BUS_GET_CLASS(qbus); - bool backend_run = running && vdev->started; + bool backend_run = running && virtio_device_started(vdev, vdev->status); vdev->vm_running = running; if (backend_run) { @@ -2330,8 +2339,7 @@ void virtio_init(VirtIODevice *vdev, const char *name, g_malloc0(sizeof(*vdev->vector_queues) * nvectors); } - vdev->start_on_kick = (virtio_host_has_feature(vdev, VIRTIO_F_VERSION_1) && - !virtio_vdev_has_feature(vdev, VIRTIO_F_VERSION_1)); + vdev->start_on_kick = false; vdev->started = false; vdev->device_id = device_id; vdev->status = 0; @@ -2669,6 +2677,7 @@ static void virtio_device_instance_finalize(Object *obj) static Property virtio_properties[] = { DEFINE_VIRTIO_COMMON_FEATURES(VirtIODevice, host_features), + DEFINE_PROP_BOOL("use-started", VirtIODevice, use_started, true), DEFINE_PROP_END_OF_LIST(), }; diff --git a/include/hw/i386/pc.h b/include/hw/i386/pc.h index c54cc54..853502f 100644 --- a/include/hw/i386/pc.h +++ b/include/hw/i386/pc.h @@ -134,6 +134,9 @@ typedef struct PCMachineClass { /* use PVH to load kernels that support this feature */ bool pvh_enabled; + + /* Enables contiguous-apic-ID mode */ + bool compat_apic_id_mode; } PCMachineClass; #define TYPE_PC_MACHINE "generic-pc-machine" diff --git a/include/hw/pci/pci.h b/include/hw/pci/pci.h index d082707..aaf1b9f 100644 --- a/include/hw/pci/pci.h +++ b/include/hw/pci/pci.h @@ -85,6 +85,7 @@ extern bool pci_available; #define PCI_DEVICE_ID_VIRTIO_RNG 0x1005 #define PCI_DEVICE_ID_VIRTIO_9P 0x1009 #define PCI_DEVICE_ID_VIRTIO_VSOCK 0x1012 +#define PCI_DEVICE_ID_VIRTIO_PMEM 0x1013 #define PCI_VENDOR_ID_REDHAT 0x1b36 #define PCI_DEVICE_ID_REDHAT_BRIDGE 0x0001 diff --git a/include/hw/pci/pcie.h b/include/hw/pci/pcie.h index e30334d..34f2777 100644 --- a/include/hw/pci/pcie.h +++ b/include/hw/pci/pcie.h @@ -107,7 +107,9 @@ void pcie_cap_lnkctl_reset(PCIDevice *dev); void pcie_cap_slot_init(PCIDevice *dev, uint16_t slot); void pcie_cap_slot_reset(PCIDevice *dev); +void pcie_cap_slot_get(PCIDevice *dev, uint16_t *slot_ctl, uint16_t *slt_sta); void pcie_cap_slot_write_config(PCIDevice *dev, + uint16_t old_slot_ctl, uint16_t old_slt_sta, uint32_t addr, uint32_t val, int len); int pcie_cap_slot_post_load(void *opaque, int version_id); void pcie_cap_slot_push_attention_button(PCIDevice *dev); diff --git a/include/hw/virtio/virtio-pmem.h b/include/hw/virtio/virtio-pmem.h new file mode 100644 index 0000000..19b6ee6 --- /dev/null +++ b/include/hw/virtio/virtio-pmem.h @@ -0,0 +1,49 @@ +/* + * Virtio PMEM device + * + * Copyright (C) 2018-2019 Red Hat, Inc. + * + * Authors: + * Pankaj Gupta <pagupta@redhat.com> + * David Hildenbrand <david@redhat.com> + * + * This work is licensed under the terms of the GNU GPL, version 2. + * See the COPYING file in the top-level directory. + */ + +#ifndef HW_VIRTIO_PMEM_H +#define HW_VIRTIO_PMEM_H + +#include "hw/virtio/virtio.h" +#include "sysemu/hostmem.h" + +#define TYPE_VIRTIO_PMEM "virtio-pmem" + +#define VIRTIO_PMEM(obj) \ + OBJECT_CHECK(VirtIOPMEM, (obj), TYPE_VIRTIO_PMEM) +#define VIRTIO_PMEM_CLASS(oc) \ + OBJECT_CLASS_CHECK(VirtIOPMEMClass, (oc), TYPE_VIRTIO_PMEM) +#define VIRTIO_PMEM_GET_CLASS(obj) \ + OBJECT_GET_CLASS(VirtIOPMEMClass, (obj), TYPE_VIRTIO_PMEM) + +#define VIRTIO_PMEM_ADDR_PROP "memaddr" +#define VIRTIO_PMEM_MEMDEV_PROP "memdev" + +typedef struct VirtIOPMEM { + VirtIODevice parent_obj; + + VirtQueue *rq_vq; + uint64_t start; + HostMemoryBackend *memdev; +} VirtIOPMEM; + +typedef struct VirtIOPMEMClass { + /* private */ + VirtIODevice parent; + + /* public */ + void (*fill_device_info)(const VirtIOPMEM *pmem, VirtioPMEMDeviceInfo *vi); + MemoryRegion *(*get_memory_region)(VirtIOPMEM *pmem, Error **errp); +} VirtIOPMEMClass; + +#endif diff --git a/include/hw/virtio/virtio.h b/include/hw/virtio/virtio.h index 27c0efc..b189788 100644 --- a/include/hw/virtio/virtio.h +++ b/include/hw/virtio/virtio.h @@ -105,8 +105,9 @@ struct VirtIODevice uint16_t device_id; bool vm_running; bool broken; /* device in invalid state, needs reset */ + bool use_started; bool started; - bool start_on_kick; /* virtio 1.0 transitional devices support that */ + bool start_on_kick; /* when virtio 1.0 feature has not been negotiated */ VMChangeStateEntry *vmstate; char *bus_name; uint8_t device_endian; @@ -351,4 +352,24 @@ static inline bool virtio_is_big_endian(VirtIODevice *vdev) /* Devices conforming to VIRTIO 1.0 or later are always LE. */ return false; } + +static inline bool virtio_device_started(VirtIODevice *vdev, uint8_t status) +{ + if (vdev->use_started) { + return vdev->started; + } + + return status & VIRTIO_CONFIG_S_DRIVER_OK; +} + +static inline void virtio_set_started(VirtIODevice *vdev, bool started) +{ + if (started) { + vdev->start_on_kick = false; + } + + if (vdev->use_started) { + vdev->started = started; + } +} #endif diff --git a/include/standard-headers/linux/virtio_ids.h b/include/standard-headers/linux/virtio_ids.h index 6d5c3b2..32b2f94 100644 --- a/include/standard-headers/linux/virtio_ids.h +++ b/include/standard-headers/linux/virtio_ids.h @@ -43,5 +43,6 @@ #define VIRTIO_ID_INPUT 18 /* virtio input */ #define VIRTIO_ID_VSOCK 19 /* virtio vsock transport */ #define VIRTIO_ID_CRYPTO 20 /* virtio crypto */ +#define VIRTIO_ID_PMEM 27 /* virtio pmem */ #endif /* _LINUX_VIRTIO_IDS_H */ diff --git a/include/standard-headers/linux/virtio_pmem.h b/include/standard-headers/linux/virtio_pmem.h new file mode 100644 index 0000000..7e3d43b --- /dev/null +++ b/include/standard-headers/linux/virtio_pmem.h @@ -0,0 +1,34 @@ +/* SPDX-License-Identifier: GPL-2.0 OR BSD-3-Clause */ +/* + * Definitions for virtio-pmem devices. + * + * Copyright (C) 2019 Red Hat, Inc. + * + * Author(s): Pankaj Gupta <pagupta@redhat.com> + */ + +#ifndef _UAPI_LINUX_VIRTIO_PMEM_H +#define _UAPI_LINUX_VIRTIO_PMEM_H + +#include "standard-headers/linux/types.h" +#include "standard-headers/linux/virtio_ids.h" +#include "standard-headers/linux/virtio_config.h" + +struct virtio_pmem_config { + uint64_t start; + uint64_t size; +}; + +#define VIRTIO_PMEM_REQ_TYPE_FLUSH 0 + +struct virtio_pmem_resp { + /* Host return status corresponding to flush request */ + uint32_t ret; +}; + +struct virtio_pmem_req { + /* command type */ + uint32_t type; +}; + +#endif diff --git a/monitor/hmp-cmds.c b/monitor/hmp-cmds.c index 7cccedb..99ceb08 100644 --- a/monitor/hmp-cmds.c +++ b/monitor/hmp-cmds.c @@ -2567,6 +2567,7 @@ void hmp_info_memory_devices(Monitor *mon, const QDict *qdict) Error *err = NULL; MemoryDeviceInfoList *info_list = qmp_query_memory_devices(&err); MemoryDeviceInfoList *info; + VirtioPMEMDeviceInfo *vpi; MemoryDeviceInfo *value; PCDIMMDeviceInfo *di; @@ -2576,19 +2577,9 @@ void hmp_info_memory_devices(Monitor *mon, const QDict *qdict) if (value) { switch (value->type) { case MEMORY_DEVICE_INFO_KIND_DIMM: - di = value->u.dimm.data; - break; - case MEMORY_DEVICE_INFO_KIND_NVDIMM: - di = value->u.nvdimm.data; - break; - - default: - di = NULL; - break; - } - - if (di) { + di = value->type == MEMORY_DEVICE_INFO_KIND_DIMM ? + value->u.dimm.data : value->u.nvdimm.data; monitor_printf(mon, "Memory device [%s]: \"%s\"\n", MemoryDeviceInfoKind_str(value->type), di->id ? di->id : ""); @@ -2601,6 +2592,18 @@ void hmp_info_memory_devices(Monitor *mon, const QDict *qdict) di->hotplugged ? "true" : "false"); monitor_printf(mon, " hotpluggable: %s\n", di->hotpluggable ? "true" : "false"); + break; + case MEMORY_DEVICE_INFO_KIND_VIRTIO_PMEM: + vpi = value->u.virtio_pmem.data; + monitor_printf(mon, "Memory device [%s]: \"%s\"\n", + MemoryDeviceInfoKind_str(value->type), + vpi->id ? vpi->id : ""); + monitor_printf(mon, " memaddr: 0x%" PRIx64 "\n", vpi->memaddr); + monitor_printf(mon, " size: %" PRIu64 "\n", vpi->size); + monitor_printf(mon, " memdev: %s\n", vpi->memdev); + break; + default: + g_assert_not_reached(); } } } diff --git a/qapi/misc.json b/qapi/misc.json index 31427d4..a7fba72 100644 --- a/qapi/misc.json +++ b/qapi/misc.json @@ -1551,15 +1551,41 @@ } ## +# @VirtioPMEMDeviceInfo: +# +# VirtioPMEM state information +# +# @id: device's ID +# +# @memaddr: physical address in memory, where device is mapped +# +# @size: size of memory that the device provides +# +# @memdev: memory backend linked with device +# +# Since: 4.1 +## +{ 'struct': 'VirtioPMEMDeviceInfo', + 'data': { '*id': 'str', + 'memaddr': 'size', + 'size': 'size', + 'memdev': 'str' + } +} + +## # @MemoryDeviceInfo: # # Union containing information about a memory device # +# nvdimm is included since 2.12. virtio-pmem is included since 4.1. +# # Since: 2.1 ## { 'union': 'MemoryDeviceInfo', 'data': { 'dimm': 'PCDIMMDeviceInfo', - 'nvdimm': 'PCDIMMDeviceInfo' + 'nvdimm': 'PCDIMMDeviceInfo', + 'virtio-pmem': 'VirtioPMEMDeviceInfo' } } diff --git a/tests/vhost-user-bridge.c b/tests/vhost-user-bridge.c index 0bb03af..c4e350e 100644 --- a/tests/vhost-user-bridge.c +++ b/tests/vhost-user-bridge.c @@ -45,6 +45,10 @@ } \ } while (0) +enum { + VHOST_USER_BRIDGE_MAX_QUEUES = 8, +}; + typedef void (*CallbackFunc)(int sock, void *ctx); typedef struct Event { @@ -512,12 +516,16 @@ vubr_accept_cb(int sock, void *ctx) } DPRINT("Got connection from remote peer on sock %d\n", conn_fd); - vu_init(&dev->vudev, - conn_fd, - vubr_panic, - vubr_set_watch, - vubr_remove_watch, - &vuiface); + if (!vu_init(&dev->vudev, + VHOST_USER_BRIDGE_MAX_QUEUES, + conn_fd, + vubr_panic, + vubr_set_watch, + vubr_remove_watch, + &vuiface)) { + fprintf(stderr, "Failed to initialize libvhost-user\n"); + exit(1); + } dispatcher_add(&dev->dispatcher, conn_fd, ctx, vubr_receive_cb); dispatcher_remove(&dev->dispatcher, sock); @@ -560,12 +568,18 @@ vubr_new(const char *path, bool client) if (connect(dev->sock, (struct sockaddr *)&un, len) == -1) { vubr_die("connect"); } - vu_init(&dev->vudev, - dev->sock, - vubr_panic, - vubr_set_watch, - vubr_remove_watch, - &vuiface); + + if (!vu_init(&dev->vudev, + VHOST_USER_BRIDGE_MAX_QUEUES, + dev->sock, + vubr_panic, + vubr_set_watch, + vubr_remove_watch, + &vuiface)) { + fprintf(stderr, "Failed to initialize libvhost-user\n"); + exit(1); + } + cb = vubr_receive_cb; } @@ -584,7 +598,7 @@ static void *notifier_thread(void *arg) int qidx; while (true) { - for (qidx = 0; qidx < VHOST_MAX_NR_VIRTQUEUE; qidx++) { + for (qidx = 0; qidx < VHOST_USER_BRIDGE_MAX_QUEUES; qidx++) { uint16_t *n = vubr->notifier.addr + pagesize * qidx; if (*n == qidx) { @@ -616,7 +630,7 @@ vubr_host_notifier_setup(VubrDev *dev) void *addr; int fd; - length = getpagesize() * VHOST_MAX_NR_VIRTQUEUE; + length = getpagesize() * VHOST_USER_BRIDGE_MAX_QUEUES; fd = mkstemp(template); if (fd < 0) { |