diff options
48 files changed, 1583 insertions, 761 deletions
diff --git a/MAINTAINERS b/MAINTAINERS index dc0aa54..5c4e23e 100644 --- a/MAINTAINERS +++ b/MAINTAINERS @@ -866,6 +866,7 @@ VFIO M: Alex Williamson <alex.williamson@redhat.com> S: Supported F: hw/vfio/* +F: include/hw/vfio/ vhost M: Michael S. Tsirkin <mst@redhat.com> @@ -238,7 +238,7 @@ qemu-img$(EXESUF): qemu-img.o $(block-obj-y) $(crypto-obj-y) $(io-obj-y) $(qom-o qemu-nbd$(EXESUF): qemu-nbd.o $(block-obj-y) $(crypto-obj-y) $(io-obj-y) $(qom-obj-y) libqemuutil.a libqemustub.a qemu-io$(EXESUF): qemu-io.o $(block-obj-y) $(crypto-obj-y) $(io-obj-y) $(qom-obj-y) libqemuutil.a libqemustub.a -qemu-bridge-helper$(EXESUF): qemu-bridge-helper.o +qemu-bridge-helper$(EXESUF): qemu-bridge-helper.o libqemuutil.a libqemustub.a fsdev/virtfs-proxy-helper$(EXESUF): fsdev/virtfs-proxy-helper.o fsdev/9p-marshal.o fsdev/9p-iov-marshal.o libqemuutil.a libqemustub.a fsdev/virtfs-proxy-helper$(EXESUF): LIBS += -lcap @@ -329,7 +329,7 @@ ifneq ($(EXESUF),) qemu-ga: qemu-ga$(EXESUF) $(QGA_VSS_PROVIDER) $(QEMU_GA_MSI) endif -ivshmem-client$(EXESUF): $(ivshmem-client-obj-y) +ivshmem-client$(EXESUF): $(ivshmem-client-obj-y) libqemuutil.a libqemustub.a $(call LINK, $^) ivshmem-server$(EXESUF): $(ivshmem-server-obj-y) libqemuutil.a libqemustub.a $(call LINK, $^) diff --git a/block/sheepdog.c b/block/sheepdog.c index 8739acc..05677ed 100644 --- a/block/sheepdog.c +++ b/block/sheepdog.c @@ -615,14 +615,13 @@ static coroutine_fn int send_co_req(int sockfd, SheepdogReq *hdr, void *data, ret = qemu_co_send(sockfd, hdr, sizeof(*hdr)); if (ret != sizeof(*hdr)) { error_report("failed to send a req, %s", strerror(errno)); - ret = -socket_error(); - return ret; + return -errno; } ret = qemu_co_send(sockfd, data, *wlen); if (ret != *wlen) { - ret = -socket_error(); error_report("failed to send a req, %s", strerror(errno)); + return -errno; } return ret; diff --git a/docs/migration.txt b/docs/migration.txt index fda8d61..90209ab 100644 --- a/docs/migration.txt +++ b/docs/migration.txt @@ -333,7 +333,7 @@ doesn't finish in a given time the switch is made to postcopy. To enable postcopy, issue this command on the monitor prior to the start of migration: -migrate_set_capability x-postcopy-ram on +migrate_set_capability postcopy-ram on The normal commands are then used to start a migration, which is still started in precopy mode. Issuing: diff --git a/hmp-commands.hx b/hmp-commands.hx index 664d794..639205b 100644 --- a/hmp-commands.hx +++ b/hmp-commands.hx @@ -1026,7 +1026,7 @@ ETEXI .args_type = "", .params = "", .help = "Followup to a migration command to switch the migration" - " to postcopy mode. The x-postcopy-ram capability must " + " to postcopy mode. The postcopy-ram capability must " "be set before the original migration command.", .mhandler.cmd = hmp_migrate_start_postcopy, }, diff --git a/hw/arm/sysbus-fdt.c b/hw/arm/sysbus-fdt.c index 04afeae..49bd212 100644 --- a/hw/arm/sysbus-fdt.c +++ b/hw/arm/sysbus-fdt.c @@ -240,7 +240,7 @@ static int add_calxeda_midway_xgmac_fdt_node(SysBusDevice *sbdev, void *opaque) mmio_base = platform_bus_get_mmio_addr(pbus, sbdev, i); reg_attr[2 * i] = cpu_to_be32(mmio_base); reg_attr[2 * i + 1] = cpu_to_be32( - memory_region_size(&vdev->regions[i]->mem)); + memory_region_size(vdev->regions[i]->mem)); } qemu_fdt_setprop(fdt, nodename, "reg", reg_attr, vbasedev->num_regions * 2 * sizeof(uint32_t)); @@ -374,7 +374,7 @@ static int add_amd_xgbe_fdt_node(SysBusDevice *sbdev, void *opaque) mmio_base = platform_bus_get_mmio_addr(pbus, sbdev, i); reg_attr[2 * i] = cpu_to_be32(mmio_base); reg_attr[2 * i + 1] = cpu_to_be32( - memory_region_size(&vdev->regions[i]->mem)); + memory_region_size(vdev->regions[i]->mem)); } qemu_fdt_setprop(guest_fdt, nodename, "reg", reg_attr, vbasedev->num_regions * 2 * sizeof(uint32_t)); diff --git a/hw/s390x/s390-virtio-ccw.c b/hw/s390x/s390-virtio-ccw.c index 89f5d0d..3d8c3c4 100644 --- a/hw/s390x/s390-virtio-ccw.c +++ b/hw/s390x/s390-virtio-ccw.c @@ -22,20 +22,7 @@ #include "s390-pci-bus.h" #include "hw/s390x/storage-keys.h" #include "hw/compat.h" - -#define TYPE_S390_CCW_MACHINE "s390-ccw-machine" - -#define S390_CCW_MACHINE(obj) \ - OBJECT_CHECK(S390CcwMachineState, (obj), TYPE_S390_CCW_MACHINE) - -typedef struct S390CcwMachineState { - /*< private >*/ - MachineState parent_obj; - - /*< public >*/ - bool aes_key_wrap; - bool dea_key_wrap; -} S390CcwMachineState; +#include "hw/s390x/s390-virtio-ccw.h" static const char *const reset_dev_types[] = { "virtual-css-bridge", @@ -136,7 +123,7 @@ static void ccw_init(MachineState *machine) virtio_ccw_register_hcalls(); /* init CPUs */ - s390_init_cpus(machine->cpu_model); + s390_init_cpus(machine); if (kvm_enabled()) { kvm_s390_enable_css_support(s390_cpu_addr2state(0)); @@ -156,13 +143,54 @@ static void ccw_init(MachineState *machine) gtod_save, gtod_load, kvm_state); } +static void s390_cpu_plug(HotplugHandler *hotplug_dev, + DeviceState *dev, Error **errp) +{ + gchar *name; + S390CPU *cpu = S390_CPU(dev); + CPUState *cs = CPU(dev); + + name = g_strdup_printf("cpu[%i]", cpu->env.cpu_num); + object_property_set_link(OBJECT(hotplug_dev), OBJECT(cs), name, + errp); + g_free(name); +} + +static void s390_machine_device_plug(HotplugHandler *hotplug_dev, + DeviceState *dev, Error **errp) +{ + if (object_dynamic_cast(OBJECT(dev), TYPE_CPU)) { + s390_cpu_plug(hotplug_dev, dev, errp); + } +} + +static HotplugHandler *s390_get_hotplug_handler(MachineState *machine, + DeviceState *dev) +{ + if (object_dynamic_cast(OBJECT(dev), TYPE_CPU)) { + return HOTPLUG_HANDLER(machine); + } + return NULL; +} + +static void s390_hot_add_cpu(const int64_t id, Error **errp) +{ + MachineState *machine = MACHINE(qdev_get_machine()); + Error *err = NULL; + + s390x_new_cpu(machine->cpu_model, id, &err); + error_propagate(errp, err); +} + static void ccw_machine_class_init(ObjectClass *oc, void *data) { MachineClass *mc = MACHINE_CLASS(oc); NMIClass *nc = NMI_CLASS(oc); + HotplugHandlerClass *hc = HOTPLUG_HANDLER_CLASS(oc); mc->init = ccw_init; mc->reset = s390_machine_reset; + mc->hot_add_cpu = s390_hot_add_cpu; mc->block_default_type = IF_VIRTIO; mc->no_cdrom = 1; mc->no_floppy = 1; @@ -171,6 +199,8 @@ static void ccw_machine_class_init(ObjectClass *oc, void *data) mc->no_sdcard = 1; mc->use_sclp = 1; mc->max_cpus = 255; + mc->get_hotplug_handler = s390_get_hotplug_handler; + hc->plug = s390_machine_device_plug; nc->nmi_monitor_handler = s390_nmi; } @@ -232,10 +262,40 @@ static const TypeInfo ccw_machine_info = { .class_init = ccw_machine_class_init, .interfaces = (InterfaceInfo[]) { { TYPE_NMI }, + { TYPE_HOTPLUG_HANDLER}, { } }, }; +#define DEFINE_CCW_MACHINE(suffix, verstr, latest) \ + static void ccw_machine_##suffix##_class_init(ObjectClass *oc, \ + void *data) \ + { \ + MachineClass *mc = MACHINE_CLASS(oc); \ + ccw_machine_##suffix##_class_options(mc); \ + mc->desc = "VirtIO-ccw based S390 machine v" verstr; \ + if (latest) { \ + mc->alias = "s390-ccw-virtio"; \ + mc->is_default = 1; \ + } \ + } \ + static void ccw_machine_##suffix##_instance_init(Object *obj) \ + { \ + MachineState *machine = MACHINE(obj); \ + ccw_machine_##suffix##_instance_options(machine); \ + } \ + static const TypeInfo ccw_machine_##suffix##_info = { \ + .name = MACHINE_TYPE_NAME("s390-ccw-virtio-" verstr), \ + .parent = TYPE_S390_CCW_MACHINE, \ + .class_init = ccw_machine_##suffix##_class_init, \ + .instance_init = ccw_machine_##suffix##_instance_init, \ + }; \ + static void ccw_machine_register_##suffix(void) \ + { \ + type_register_static(&ccw_machine_##suffix##_info); \ + } \ + machine_init(ccw_machine_register_##suffix) + #define CCW_COMPAT_2_5 \ HW_COMPAT_2_5 @@ -280,63 +340,39 @@ static const TypeInfo ccw_machine_info = { .value = "0",\ }, -static void ccw_machine_2_4_class_init(ObjectClass *oc, void *data) +static void ccw_machine_2_6_instance_options(MachineState *machine) { - MachineClass *mc = MACHINE_CLASS(oc); - static GlobalProperty compat_props[] = { - CCW_COMPAT_2_4 - { /* end of list */ } - }; - - mc->desc = "VirtIO-ccw based S390 machine v2.4"; - mc->compat_props = compat_props; } -static const TypeInfo ccw_machine_2_4_info = { - .name = MACHINE_TYPE_NAME("s390-ccw-virtio-2.4"), - .parent = TYPE_S390_CCW_MACHINE, - .class_init = ccw_machine_2_4_class_init, -}; - -static void ccw_machine_2_5_class_init(ObjectClass *oc, void *data) +static void ccw_machine_2_6_class_options(MachineClass *mc) { - MachineClass *mc = MACHINE_CLASS(oc); - static GlobalProperty compat_props[] = { - CCW_COMPAT_2_5 - { /* end of list */ } - }; - - mc->desc = "VirtIO-ccw based S390 machine v2.5"; - mc->compat_props = compat_props; } +DEFINE_CCW_MACHINE(2_6, "2.6", true); -static const TypeInfo ccw_machine_2_5_info = { - .name = MACHINE_TYPE_NAME("s390-ccw-virtio-2.5"), - .parent = TYPE_S390_CCW_MACHINE, - .class_init = ccw_machine_2_5_class_init, -}; +static void ccw_machine_2_5_instance_options(MachineState *machine) +{ +} -static void ccw_machine_2_6_class_init(ObjectClass *oc, void *data) +static void ccw_machine_2_5_class_options(MachineClass *mc) { - MachineClass *mc = MACHINE_CLASS(oc); + SET_MACHINE_COMPAT(mc, CCW_COMPAT_2_5); +} +DEFINE_CCW_MACHINE(2_5, "2.5", false); - mc->alias = "s390-ccw-virtio"; - mc->desc = "VirtIO-ccw based S390 machine v2.6"; - mc->is_default = 1; +static void ccw_machine_2_4_instance_options(MachineState *machine) +{ + ccw_machine_2_5_instance_options(machine); } -static const TypeInfo ccw_machine_2_6_info = { - .name = MACHINE_TYPE_NAME("s390-ccw-virtio-2.6"), - .parent = TYPE_S390_CCW_MACHINE, - .class_init = ccw_machine_2_6_class_init, -}; +static void ccw_machine_2_4_class_options(MachineClass *mc) +{ + SET_MACHINE_COMPAT(mc, CCW_COMPAT_2_4); +} +DEFINE_CCW_MACHINE(2_4, "2.4", false); static void ccw_machine_register_types(void) { type_register_static(&ccw_machine_info); - type_register_static(&ccw_machine_2_4_info); - type_register_static(&ccw_machine_2_5_info); - type_register_static(&ccw_machine_2_6_info); } type_init(ccw_machine_register_types) diff --git a/hw/s390x/s390-virtio.c b/hw/s390x/s390-virtio.c index 8e533ae..7c6e281 100644 --- a/hw/s390x/s390-virtio.c +++ b/hw/s390x/s390-virtio.c @@ -58,15 +58,16 @@ #define S390_TOD_CLOCK_VALUE_MISSING 0x00 #define S390_TOD_CLOCK_VALUE_PRESENT 0x01 -static S390CPU **ipi_states; +static S390CPU **cpu_states; S390CPU *s390_cpu_addr2state(uint16_t cpu_addr) { - if (cpu_addr >= smp_cpus) { + if (cpu_addr >= max_cpus) { return NULL; } - return ipi_states[cpu_addr]; + /* Fast lookup via CPU ID */ + return cpu_states[cpu_addr]; } void s390_init_ipl_dev(const char *kernel_filename, @@ -93,26 +94,29 @@ void s390_init_ipl_dev(const char *kernel_filename, qdev_init_nofail(dev); } -void s390_init_cpus(const char *cpu_model) +void s390_init_cpus(MachineState *machine) { int i; + gchar *name; - if (cpu_model == NULL) { - cpu_model = "host"; + if (machine->cpu_model == NULL) { + machine->cpu_model = "host"; } - ipi_states = g_malloc(sizeof(S390CPU *) * smp_cpus); + cpu_states = g_new0(S390CPU *, max_cpus); - for (i = 0; i < smp_cpus; i++) { - S390CPU *cpu; - CPUState *cs; - - cpu = cpu_s390x_init(cpu_model); - cs = CPU(cpu); + for (i = 0; i < max_cpus; i++) { + name = g_strdup_printf("cpu[%i]", i); + object_property_add_link(OBJECT(machine), name, TYPE_S390_CPU, + (Object **) &cpu_states[i], + object_property_allow_set_link, + OBJ_PROP_LINK_UNREF_ON_RELEASE, + &error_abort); + g_free(name); + } - ipi_states[i] = cpu; - cs->halted = 1; - cs->exception_index = EXCP_HLT; + for (i = 0; i < smp_cpus; i++) { + s390x_new_cpu(machine->cpu_model, i, &error_fatal); } } diff --git a/hw/s390x/s390-virtio.h b/hw/s390x/s390-virtio.h index eebce8e..ffd014c 100644 --- a/hw/s390x/s390-virtio.h +++ b/hw/s390x/s390-virtio.h @@ -19,7 +19,7 @@ typedef int (*s390_virtio_fn)(const uint64_t *args); void s390_register_virtio_hypercall(uint64_t code, s390_virtio_fn fn); -void s390_init_cpus(const char *cpu_model); +void s390_init_cpus(MachineState *machine); void s390_init_ipl_dev(const char *kernel_filename, const char *kernel_cmdline, const char *initrd_filename, diff --git a/hw/vfio/common.c b/hw/vfio/common.c index 607ec70..96ccb79 100644 --- a/hw/vfio/common.c +++ b/hw/vfio/common.c @@ -493,46 +493,162 @@ static void vfio_listener_release(VFIOContainer *container) memory_listener_unregister(&container->listener); } -int vfio_mmap_region(Object *obj, VFIORegion *region, - MemoryRegion *mem, MemoryRegion *submem, - void **map, size_t size, off_t offset, - const char *name) +int vfio_region_setup(Object *obj, VFIODevice *vbasedev, VFIORegion *region, + int index, const char *name) { - int ret = 0; - VFIODevice *vbasedev = region->vbasedev; + struct vfio_region_info *info; + int ret; + + ret = vfio_get_region_info(vbasedev, index, &info); + if (ret) { + return ret; + } + + region->vbasedev = vbasedev; + region->flags = info->flags; + region->size = info->size; + region->fd_offset = info->offset; + region->nr = index; + + if (region->size) { + region->mem = g_new0(MemoryRegion, 1); + memory_region_init_io(region->mem, obj, &vfio_region_ops, + region, name, region->size); - if (!vbasedev->no_mmap && size && region->flags & - VFIO_REGION_INFO_FLAG_MMAP) { - int prot = 0; + if (!vbasedev->no_mmap && + region->flags & VFIO_REGION_INFO_FLAG_MMAP && + !(region->size & ~qemu_real_host_page_mask)) { - if (region->flags & VFIO_REGION_INFO_FLAG_READ) { - prot |= PROT_READ; + region->nr_mmaps = 1; + region->mmaps = g_new0(VFIOMmap, region->nr_mmaps); + + region->mmaps[0].offset = 0; + region->mmaps[0].size = region->size; } + } + + g_free(info); + + trace_vfio_region_setup(vbasedev->name, index, name, + region->flags, region->fd_offset, region->size); + return 0; +} + +int vfio_region_mmap(VFIORegion *region) +{ + int i, prot = 0; + char *name; - if (region->flags & VFIO_REGION_INFO_FLAG_WRITE) { - prot |= PROT_WRITE; + if (!region->mem) { + return 0; + } + + prot |= region->flags & VFIO_REGION_INFO_FLAG_READ ? PROT_READ : 0; + prot |= region->flags & VFIO_REGION_INFO_FLAG_WRITE ? PROT_WRITE : 0; + + for (i = 0; i < region->nr_mmaps; i++) { + region->mmaps[i].mmap = mmap(NULL, region->mmaps[i].size, prot, + MAP_SHARED, region->vbasedev->fd, + region->fd_offset + + region->mmaps[i].offset); + if (region->mmaps[i].mmap == MAP_FAILED) { + int ret = -errno; + + trace_vfio_region_mmap_fault(memory_region_name(region->mem), i, + region->fd_offset + + region->mmaps[i].offset, + region->fd_offset + + region->mmaps[i].offset + + region->mmaps[i].size - 1, ret); + + region->mmaps[i].mmap = NULL; + + for (i--; i >= 0; i--) { + memory_region_del_subregion(region->mem, ®ion->mmaps[i].mem); + munmap(region->mmaps[i].mmap, region->mmaps[i].size); + object_unparent(OBJECT(®ion->mmaps[i].mem)); + region->mmaps[i].mmap = NULL; + } + + return ret; } - *map = mmap(NULL, size, prot, MAP_SHARED, - vbasedev->fd, - region->fd_offset + offset); - if (*map == MAP_FAILED) { - *map = NULL; - ret = -errno; - goto empty_region; + name = g_strdup_printf("%s mmaps[%d]", + memory_region_name(region->mem), i); + memory_region_init_ram_ptr(®ion->mmaps[i].mem, + memory_region_owner(region->mem), + name, region->mmaps[i].size, + region->mmaps[i].mmap); + g_free(name); + memory_region_set_skip_dump(®ion->mmaps[i].mem); + memory_region_add_subregion(region->mem, region->mmaps[i].offset, + ®ion->mmaps[i].mem); + + trace_vfio_region_mmap(memory_region_name(®ion->mmaps[i].mem), + region->mmaps[i].offset, + region->mmaps[i].offset + + region->mmaps[i].size - 1); + } + + return 0; +} + +void vfio_region_exit(VFIORegion *region) +{ + int i; + + if (!region->mem) { + return; + } + + for (i = 0; i < region->nr_mmaps; i++) { + if (region->mmaps[i].mmap) { + memory_region_del_subregion(region->mem, ®ion->mmaps[i].mem); } + } - memory_region_init_ram_ptr(submem, obj, name, size, *map); - memory_region_set_skip_dump(submem); - } else { -empty_region: - /* Create a zero sized sub-region to make cleanup easy. */ - memory_region_init(submem, obj, name, 0); + trace_vfio_region_exit(region->vbasedev->name, region->nr); +} + +void vfio_region_finalize(VFIORegion *region) +{ + int i; + + if (!region->mem) { + return; + } + + for (i = 0; i < region->nr_mmaps; i++) { + if (region->mmaps[i].mmap) { + munmap(region->mmaps[i].mmap, region->mmaps[i].size); + object_unparent(OBJECT(®ion->mmaps[i].mem)); + } } - memory_region_add_subregion(mem, offset, submem); + object_unparent(OBJECT(region->mem)); - return ret; + g_free(region->mem); + g_free(region->mmaps); + + trace_vfio_region_finalize(region->vbasedev->name, region->nr); +} + +void vfio_region_mmaps_set_enabled(VFIORegion *region, bool enabled) +{ + int i; + + if (!region->mem) { + return; + } + + for (i = 0; i < region->nr_mmaps; i++) { + if (region->mmaps[i].mmap) { + memory_region_set_enabled(®ion->mmaps[i].mem, enabled); + } + } + + trace_vfio_region_mmaps_set_enabled(memory_region_name(region->mem), + enabled); } void vfio_reset_handler(void *opaque) @@ -959,6 +1075,24 @@ void vfio_put_base_device(VFIODevice *vbasedev) close(vbasedev->fd); } +int vfio_get_region_info(VFIODevice *vbasedev, int index, + struct vfio_region_info **info) +{ + size_t argsz = sizeof(struct vfio_region_info); + + *info = g_malloc0(argsz); + + (*info)->index = index; + (*info)->argsz = argsz; + + if (ioctl(vbasedev->fd, VFIO_DEVICE_GET_REGION_INFO, *info)) { + g_free(*info); + return -errno; + } + + return 0; +} + static int vfio_container_do_ioctl(AddressSpace *as, int32_t groupid, int req, void *param) { diff --git a/hw/vfio/pci-quirks.c b/hw/vfio/pci-quirks.c index 4815527..49ecf11 100644 --- a/hw/vfio/pci-quirks.c +++ b/hw/vfio/pci-quirks.c @@ -290,10 +290,10 @@ static void vfio_vga_probe_ati_3c3_quirk(VFIOPCIDevice *vdev) memory_region_init_io(quirk->mem, OBJECT(vdev), &vfio_ati_3c3_quirk, vdev, "vfio-ati-3c3-quirk", 1); - memory_region_add_subregion(&vdev->vga.region[QEMU_PCI_VGA_IO_HI].mem, + memory_region_add_subregion(&vdev->vga->region[QEMU_PCI_VGA_IO_HI].mem, 3 /* offset 3 bytes from 0x3c0 */, quirk->mem); - QLIST_INSERT_HEAD(&vdev->vga.region[QEMU_PCI_VGA_IO_HI].quirks, + QLIST_INSERT_HEAD(&vdev->vga->region[QEMU_PCI_VGA_IO_HI].quirks, quirk, next); trace_vfio_quirk_ati_3c3_probe(vdev->vbasedev.name); @@ -337,14 +337,14 @@ static void vfio_probe_ati_bar4_quirk(VFIOPCIDevice *vdev, int nr) memory_region_init_io(window->addr_mem, OBJECT(vdev), &vfio_generic_window_address_quirk, window, "vfio-ati-bar4-window-address-quirk", 4); - memory_region_add_subregion_overlap(&vdev->bars[nr].region.mem, + memory_region_add_subregion_overlap(vdev->bars[nr].region.mem, window->address_offset, window->addr_mem, 1); memory_region_init_io(window->data_mem, OBJECT(vdev), &vfio_generic_window_data_quirk, window, "vfio-ati-bar4-window-data-quirk", 4); - memory_region_add_subregion_overlap(&vdev->bars[nr].region.mem, + memory_region_add_subregion_overlap(vdev->bars[nr].region.mem, window->data_offset, window->data_mem, 1); @@ -378,7 +378,7 @@ static void vfio_probe_ati_bar2_quirk(VFIOPCIDevice *vdev, int nr) memory_region_init_io(mirror->mem, OBJECT(vdev), &vfio_generic_mirror_quirk, mirror, "vfio-ati-bar2-4000-quirk", PCI_CONFIG_SPACE_SIZE); - memory_region_add_subregion_overlap(&vdev->bars[nr].region.mem, + memory_region_add_subregion_overlap(vdev->bars[nr].region.mem, mirror->offset, mirror->mem, 1); QLIST_INSERT_HEAD(&vdev->bars[nr].quirks, quirk, next); @@ -428,7 +428,7 @@ static uint64_t vfio_nvidia_3d4_quirk_read(void *opaque, quirk->state = NONE; - return vfio_vga_read(&vdev->vga.region[QEMU_PCI_VGA_IO_HI], + return vfio_vga_read(&vdev->vga->region[QEMU_PCI_VGA_IO_HI], addr + 0x14, size); } @@ -465,7 +465,7 @@ static void vfio_nvidia_3d4_quirk_write(void *opaque, hwaddr addr, break; } - vfio_vga_write(&vdev->vga.region[QEMU_PCI_VGA_IO_HI], + vfio_vga_write(&vdev->vga->region[QEMU_PCI_VGA_IO_HI], addr + 0x14, data, size); } @@ -481,7 +481,7 @@ static uint64_t vfio_nvidia_3d0_quirk_read(void *opaque, VFIONvidia3d0Quirk *quirk = opaque; VFIOPCIDevice *vdev = quirk->vdev; VFIONvidia3d0State old_state = quirk->state; - uint64_t data = vfio_vga_read(&vdev->vga.region[QEMU_PCI_VGA_IO_HI], + uint64_t data = vfio_vga_read(&vdev->vga->region[QEMU_PCI_VGA_IO_HI], addr + 0x10, size); quirk->state = NONE; @@ -523,7 +523,7 @@ static void vfio_nvidia_3d0_quirk_write(void *opaque, hwaddr addr, } } - vfio_vga_write(&vdev->vga.region[QEMU_PCI_VGA_IO_HI], + vfio_vga_write(&vdev->vga->region[QEMU_PCI_VGA_IO_HI], addr + 0x10, data, size); } @@ -551,15 +551,15 @@ static void vfio_vga_probe_nvidia_3d0_quirk(VFIOPCIDevice *vdev) memory_region_init_io(&quirk->mem[0], OBJECT(vdev), &vfio_nvidia_3d4_quirk, data, "vfio-nvidia-3d4-quirk", 2); - memory_region_add_subregion(&vdev->vga.region[QEMU_PCI_VGA_IO_HI].mem, + memory_region_add_subregion(&vdev->vga->region[QEMU_PCI_VGA_IO_HI].mem, 0x14 /* 0x3c0 + 0x14 */, &quirk->mem[0]); memory_region_init_io(&quirk->mem[1], OBJECT(vdev), &vfio_nvidia_3d0_quirk, data, "vfio-nvidia-3d0-quirk", 2); - memory_region_add_subregion(&vdev->vga.region[QEMU_PCI_VGA_IO_HI].mem, + memory_region_add_subregion(&vdev->vga->region[QEMU_PCI_VGA_IO_HI].mem, 0x10 /* 0x3c0 + 0x10 */, &quirk->mem[1]); - QLIST_INSERT_HEAD(&vdev->vga.region[QEMU_PCI_VGA_IO_HI].quirks, + QLIST_INSERT_HEAD(&vdev->vga->region[QEMU_PCI_VGA_IO_HI].quirks, quirk, next); trace_vfio_quirk_nvidia_3d0_probe(vdev->vbasedev.name); @@ -683,7 +683,7 @@ static void vfio_probe_nvidia_bar5_quirk(VFIOPCIDevice *vdev, int nr) memory_region_init_io(window->addr_mem, OBJECT(vdev), &vfio_generic_window_address_quirk, window, "vfio-nvidia-bar5-window-address-quirk", 4); - memory_region_add_subregion_overlap(&vdev->bars[nr].region.mem, + memory_region_add_subregion_overlap(vdev->bars[nr].region.mem, window->address_offset, window->addr_mem, 1); memory_region_set_enabled(window->addr_mem, false); @@ -691,7 +691,7 @@ static void vfio_probe_nvidia_bar5_quirk(VFIOPCIDevice *vdev, int nr) memory_region_init_io(window->data_mem, OBJECT(vdev), &vfio_generic_window_data_quirk, window, "vfio-nvidia-bar5-window-data-quirk", 4); - memory_region_add_subregion_overlap(&vdev->bars[nr].region.mem, + memory_region_add_subregion_overlap(vdev->bars[nr].region.mem, window->data_offset, window->data_mem, 1); memory_region_set_enabled(window->data_mem, false); @@ -699,13 +699,13 @@ static void vfio_probe_nvidia_bar5_quirk(VFIOPCIDevice *vdev, int nr) memory_region_init_io(&quirk->mem[2], OBJECT(vdev), &vfio_nvidia_bar5_quirk_master, bar5, "vfio-nvidia-bar5-master-quirk", 4); - memory_region_add_subregion_overlap(&vdev->bars[nr].region.mem, + memory_region_add_subregion_overlap(vdev->bars[nr].region.mem, 0, &quirk->mem[2], 1); memory_region_init_io(&quirk->mem[3], OBJECT(vdev), &vfio_nvidia_bar5_quirk_enable, bar5, "vfio-nvidia-bar5-enable-quirk", 4); - memory_region_add_subregion_overlap(&vdev->bars[nr].region.mem, + memory_region_add_subregion_overlap(vdev->bars[nr].region.mem, 4, &quirk->mem[3], 1); QLIST_INSERT_HEAD(&vdev->bars[nr].quirks, quirk, next); @@ -767,7 +767,7 @@ static void vfio_probe_nvidia_bar0_quirk(VFIOPCIDevice *vdev, int nr) &vfio_nvidia_mirror_quirk, mirror, "vfio-nvidia-bar0-88000-mirror-quirk", vdev->config_size); - memory_region_add_subregion_overlap(&vdev->bars[nr].region.mem, + memory_region_add_subregion_overlap(vdev->bars[nr].region.mem, mirror->offset, mirror->mem, 1); QLIST_INSERT_HEAD(&vdev->bars[nr].quirks, quirk, next); @@ -786,7 +786,7 @@ static void vfio_probe_nvidia_bar0_quirk(VFIOPCIDevice *vdev, int nr) &vfio_nvidia_mirror_quirk, mirror, "vfio-nvidia-bar0-1800-mirror-quirk", PCI_CONFIG_SPACE_SIZE); - memory_region_add_subregion_overlap(&vdev->bars[nr].region.mem, + memory_region_add_subregion_overlap(vdev->bars[nr].region.mem, mirror->offset, mirror->mem, 1); QLIST_INSERT_HEAD(&vdev->bars[nr].quirks, quirk, next); @@ -947,13 +947,13 @@ static void vfio_probe_rtl8168_bar2_quirk(VFIOPCIDevice *vdev, int nr) memory_region_init_io(&quirk->mem[0], OBJECT(vdev), &vfio_rtl_address_quirk, rtl, "vfio-rtl8168-window-address-quirk", 4); - memory_region_add_subregion_overlap(&vdev->bars[nr].region.mem, + memory_region_add_subregion_overlap(vdev->bars[nr].region.mem, 0x74, &quirk->mem[0], 1); memory_region_init_io(&quirk->mem[1], OBJECT(vdev), &vfio_rtl_data_quirk, rtl, "vfio-rtl8168-window-data-quirk", 4); - memory_region_add_subregion_overlap(&vdev->bars[nr].region.mem, + memory_region_add_subregion_overlap(vdev->bars[nr].region.mem, 0x70, &quirk->mem[1], 1); QLIST_INSERT_HEAD(&vdev->bars[nr].quirks, quirk, next); @@ -970,28 +970,28 @@ void vfio_vga_quirk_setup(VFIOPCIDevice *vdev) vfio_vga_probe_nvidia_3d0_quirk(vdev); } -void vfio_vga_quirk_teardown(VFIOPCIDevice *vdev) +void vfio_vga_quirk_exit(VFIOPCIDevice *vdev) { VFIOQuirk *quirk; int i, j; - for (i = 0; i < ARRAY_SIZE(vdev->vga.region); i++) { - QLIST_FOREACH(quirk, &vdev->vga.region[i].quirks, next) { + for (i = 0; i < ARRAY_SIZE(vdev->vga->region); i++) { + QLIST_FOREACH(quirk, &vdev->vga->region[i].quirks, next) { for (j = 0; j < quirk->nr_mem; j++) { - memory_region_del_subregion(&vdev->vga.region[i].mem, + memory_region_del_subregion(&vdev->vga->region[i].mem, &quirk->mem[j]); } } } } -void vfio_vga_quirk_free(VFIOPCIDevice *vdev) +void vfio_vga_quirk_finalize(VFIOPCIDevice *vdev) { int i, j; - for (i = 0; i < ARRAY_SIZE(vdev->vga.region); i++) { - while (!QLIST_EMPTY(&vdev->vga.region[i].quirks)) { - VFIOQuirk *quirk = QLIST_FIRST(&vdev->vga.region[i].quirks); + for (i = 0; i < ARRAY_SIZE(vdev->vga->region); i++) { + while (!QLIST_EMPTY(&vdev->vga->region[i].quirks)) { + VFIOQuirk *quirk = QLIST_FIRST(&vdev->vga->region[i].quirks); QLIST_REMOVE(quirk, next); for (j = 0; j < quirk->nr_mem; j++) { object_unparent(OBJECT(&quirk->mem[j])); @@ -1012,7 +1012,7 @@ void vfio_bar_quirk_setup(VFIOPCIDevice *vdev, int nr) vfio_probe_rtl8168_bar2_quirk(vdev, nr); } -void vfio_bar_quirk_teardown(VFIOPCIDevice *vdev, int nr) +void vfio_bar_quirk_exit(VFIOPCIDevice *vdev, int nr) { VFIOBAR *bar = &vdev->bars[nr]; VFIOQuirk *quirk; @@ -1020,12 +1020,12 @@ void vfio_bar_quirk_teardown(VFIOPCIDevice *vdev, int nr) QLIST_FOREACH(quirk, &bar->quirks, next) { for (i = 0; i < quirk->nr_mem; i++) { - memory_region_del_subregion(&bar->region.mem, &quirk->mem[i]); + memory_region_del_subregion(bar->region.mem, &quirk->mem[i]); } } } -void vfio_bar_quirk_free(VFIOPCIDevice *vdev, int nr) +void vfio_bar_quirk_finalize(VFIOPCIDevice *vdev, int nr) { VFIOBAR *bar = &vdev->bars[nr]; int i; diff --git a/hw/vfio/pci.c b/hw/vfio/pci.c index 20b505f..d091d8c 100644 --- a/hw/vfio/pci.c +++ b/hw/vfio/pci.c @@ -783,25 +783,25 @@ static void vfio_update_msi(VFIOPCIDevice *vdev) static void vfio_pci_load_rom(VFIOPCIDevice *vdev) { - struct vfio_region_info reg_info = { - .argsz = sizeof(reg_info), - .index = VFIO_PCI_ROM_REGION_INDEX - }; + struct vfio_region_info *reg_info; uint64_t size; off_t off = 0; ssize_t bytes; - if (ioctl(vdev->vbasedev.fd, VFIO_DEVICE_GET_REGION_INFO, ®_info)) { + if (vfio_get_region_info(&vdev->vbasedev, + VFIO_PCI_ROM_REGION_INDEX, ®_info)) { error_report("vfio: Error getting ROM info: %m"); return; } - trace_vfio_pci_load_rom(vdev->vbasedev.name, (unsigned long)reg_info.size, - (unsigned long)reg_info.offset, - (unsigned long)reg_info.flags); + trace_vfio_pci_load_rom(vdev->vbasedev.name, (unsigned long)reg_info->size, + (unsigned long)reg_info->offset, + (unsigned long)reg_info->flags); + + vdev->rom_size = size = reg_info->size; + vdev->rom_offset = reg_info->offset; - vdev->rom_size = size = reg_info.size; - vdev->rom_offset = reg_info.offset; + g_free(reg_info); if (!vdev->rom_size) { vdev->rom_read_failed = true; @@ -832,6 +832,36 @@ static void vfio_pci_load_rom(VFIOPCIDevice *vdev) break; } } + + /* + * Test the ROM signature against our device, if the vendor is correct + * but the device ID doesn't match, store the correct device ID and + * recompute the checksum. Intel IGD devices need this and are known + * to have bogus checksums so we can't simply adjust the checksum. + */ + if (pci_get_word(vdev->rom) == 0xaa55 && + pci_get_word(vdev->rom + 0x18) + 8 < vdev->rom_size && + !memcmp(vdev->rom + pci_get_word(vdev->rom + 0x18), "PCIR", 4)) { + uint16_t vid, did; + + vid = pci_get_word(vdev->rom + pci_get_word(vdev->rom + 0x18) + 4); + did = pci_get_word(vdev->rom + pci_get_word(vdev->rom + 0x18) + 6); + + if (vid == vdev->vendor_id && did != vdev->device_id) { + int i; + uint8_t csum, *data = vdev->rom; + + pci_set_word(vdev->rom + pci_get_word(vdev->rom + 0x18) + 6, + vdev->device_id); + data[6] = 0; + + for (csum = 0, i = 0; i < vdev->rom_size; i++) { + csum += data[i]; + } + + data[6] = -csum; + } + } } static uint64_t vfio_rom_read(void *opaque, hwaddr addr, unsigned size) @@ -889,18 +919,14 @@ static void vfio_pci_size_rom(VFIOPCIDevice *vdev) uint32_t orig, size = cpu_to_le32((uint32_t)PCI_ROM_ADDRESS_MASK); off_t offset = vdev->config_offset + PCI_ROM_ADDRESS; DeviceState *dev = DEVICE(vdev); - char name[32]; + char *name; int fd = vdev->vbasedev.fd; if (vdev->pdev.romfile || !vdev->pdev.rom_bar) { /* Since pci handles romfile, just print a message and return */ if (vfio_blacklist_opt_rom(vdev) && vdev->pdev.romfile) { - error_printf("Warning : Device at %04x:%02x:%02x.%x " - "is known to cause system instability issues during " - "option rom execution. " - "Proceeding anyway since user specified romfile\n", - vdev->host.domain, vdev->host.bus, vdev->host.slot, - vdev->host.function); + error_printf("Warning : Device at %s is known to cause system instability issues during option rom execution. Proceeding anyway since user specified romfile\n", + vdev->vbasedev.name); } return; } @@ -913,9 +939,7 @@ static void vfio_pci_size_rom(VFIOPCIDevice *vdev) pwrite(fd, &size, 4, offset) != 4 || pread(fd, &size, 4, offset) != 4 || pwrite(fd, &orig, 4, offset) != 4) { - error_report("%s(%04x:%02x:%02x.%x) failed: %m", - __func__, vdev->host.domain, vdev->host.bus, - vdev->host.slot, vdev->host.function); + error_report("%s(%s) failed: %m", __func__, vdev->vbasedev.name); return; } @@ -927,32 +951,22 @@ static void vfio_pci_size_rom(VFIOPCIDevice *vdev) if (vfio_blacklist_opt_rom(vdev)) { if (dev->opts && qemu_opt_get(dev->opts, "rombar")) { - error_printf("Warning : Device at %04x:%02x:%02x.%x " - "is known to cause system instability issues during " - "option rom execution. " - "Proceeding anyway since user specified non zero value for " - "rombar\n", - vdev->host.domain, vdev->host.bus, vdev->host.slot, - vdev->host.function); + error_printf("Warning : Device at %s is known to cause system instability issues during option rom execution. Proceeding anyway since user specified non zero value for rombar\n", + vdev->vbasedev.name); } else { - error_printf("Warning : Rom loading for device at " - "%04x:%02x:%02x.%x has been disabled due to " - "system instability issues. " - "Specify rombar=1 or romfile to force\n", - vdev->host.domain, vdev->host.bus, vdev->host.slot, - vdev->host.function); + error_printf("Warning : Rom loading for device at %s has been disabled due to system instability issues. Specify rombar=1 or romfile to force\n", + vdev->vbasedev.name); return; } } trace_vfio_pci_size_rom(vdev->vbasedev.name, size); - snprintf(name, sizeof(name), "vfio[%04x:%02x:%02x.%x].rom", - vdev->host.domain, vdev->host.bus, vdev->host.slot, - vdev->host.function); + name = g_strdup_printf("vfio[%s].rom", vdev->vbasedev.name); memory_region_init_io(&vdev->pdev.rom, OBJECT(vdev), &vfio_rom_ops, vdev, name, size); + g_free(name); pci_register_bar(&vdev->pdev, PCI_ROM_SLOT, PCI_BASE_ADDRESS_SPACE_MEMORY, &vdev->pdev.rom); @@ -1063,9 +1077,8 @@ uint32_t vfio_pci_read_config(PCIDevice *pdev, uint32_t addr, int len) ret = pread(vdev->vbasedev.fd, &phys_val, len, vdev->config_offset + addr); if (ret != len) { - error_report("%s(%04x:%02x:%02x.%x, 0x%x, 0x%x) failed: %m", - __func__, vdev->host.domain, vdev->host.bus, - vdev->host.slot, vdev->host.function, addr, len); + error_report("%s(%s, 0x%x, 0x%x) failed: %m", + __func__, vdev->vbasedev.name, addr, len); return -errno; } phys_val = le32_to_cpu(phys_val); @@ -1089,9 +1102,8 @@ void vfio_pci_write_config(PCIDevice *pdev, /* Write everything to VFIO, let it filter out what we can't write */ if (pwrite(vdev->vbasedev.fd, &val_le, len, vdev->config_offset + addr) != len) { - error_report("%s(%04x:%02x:%02x.%x, 0x%x, 0x%x, 0x%x) failed: %m", - __func__, vdev->host.domain, vdev->host.bus, - vdev->host.slot, vdev->host.function, addr, val, len); + error_report("%s(%s, 0x%x, 0x%x, 0x%x) failed: %m", + __func__, vdev->vbasedev.name, addr, val, len); } /* MSI/MSI-X Enabling/Disabling */ @@ -1185,6 +1197,74 @@ static int vfio_msi_setup(VFIOPCIDevice *vdev, int pos) return 0; } +static void vfio_pci_fixup_msix_region(VFIOPCIDevice *vdev) +{ + off_t start, end; + VFIORegion *region = &vdev->bars[vdev->msix->table_bar].region; + + /* + * We expect to find a single mmap covering the whole BAR, anything else + * means it's either unsupported or already setup. + */ + if (region->nr_mmaps != 1 || region->mmaps[0].offset || + region->size != region->mmaps[0].size) { + return; + } + + /* MSI-X table start and end aligned to host page size */ + start = vdev->msix->table_offset & qemu_real_host_page_mask; + end = REAL_HOST_PAGE_ALIGN((uint64_t)vdev->msix->table_offset + + (vdev->msix->entries * PCI_MSIX_ENTRY_SIZE)); + + /* + * Does the MSI-X table cover the beginning of the BAR? The whole BAR? + * NB - Host page size is necessarily a power of two and so is the PCI + * BAR (not counting EA yet), therefore if we have host page aligned + * @start and @end, then any remainder of the BAR before or after those + * must be at least host page sized and therefore mmap'able. + */ + if (!start) { + if (end >= region->size) { + region->nr_mmaps = 0; + g_free(region->mmaps); + region->mmaps = NULL; + trace_vfio_msix_fixup(vdev->vbasedev.name, + vdev->msix->table_bar, 0, 0); + } else { + region->mmaps[0].offset = end; + region->mmaps[0].size = region->size - end; + trace_vfio_msix_fixup(vdev->vbasedev.name, + vdev->msix->table_bar, region->mmaps[0].offset, + region->mmaps[0].offset + region->mmaps[0].size); + } + + /* Maybe it's aligned at the end of the BAR */ + } else if (end >= region->size) { + region->mmaps[0].size = start; + trace_vfio_msix_fixup(vdev->vbasedev.name, + vdev->msix->table_bar, region->mmaps[0].offset, + region->mmaps[0].offset + region->mmaps[0].size); + + /* Otherwise it must split the BAR */ + } else { + region->nr_mmaps = 2; + region->mmaps = g_renew(VFIOMmap, region->mmaps, 2); + + memcpy(®ion->mmaps[1], ®ion->mmaps[0], sizeof(VFIOMmap)); + + region->mmaps[0].size = start; + trace_vfio_msix_fixup(vdev->vbasedev.name, + vdev->msix->table_bar, region->mmaps[0].offset, + region->mmaps[0].offset + region->mmaps[0].size); + + region->mmaps[1].offset = end; + region->mmaps[1].size = region->size - end; + trace_vfio_msix_fixup(vdev->vbasedev.name, + vdev->msix->table_bar, region->mmaps[1].offset, + region->mmaps[1].offset + region->mmaps[1].size); + } +} + /* * We don't have any control over how pci_add_capability() inserts * capabilities into the chain. In order to setup MSI-X we need a @@ -1259,6 +1339,8 @@ static int vfio_msix_early_setup(VFIOPCIDevice *vdev) msix->table_offset, msix->entries); vdev->msix = msix; + vfio_pci_fixup_msix_region(vdev); + return 0; } @@ -1269,9 +1351,9 @@ static int vfio_msix_setup(VFIOPCIDevice *vdev, int pos) vdev->msix->pending = g_malloc0(BITS_TO_LONGS(vdev->msix->entries) * sizeof(unsigned long)); ret = msix_init(&vdev->pdev, vdev->msix->entries, - &vdev->bars[vdev->msix->table_bar].region.mem, + vdev->bars[vdev->msix->table_bar].region.mem, vdev->msix->table_bar, vdev->msix->table_offset, - &vdev->bars[vdev->msix->pba_bar].region.mem, + vdev->bars[vdev->msix->pba_bar].region.mem, vdev->msix->pba_bar, vdev->msix->pba_offset, pos); if (ret < 0) { if (ret == -ENOTSUP) { @@ -1308,8 +1390,8 @@ static void vfio_teardown_msi(VFIOPCIDevice *vdev) if (vdev->msix) { msix_uninit(&vdev->pdev, - &vdev->bars[vdev->msix->table_bar].region.mem, - &vdev->bars[vdev->msix->pba_bar].region.mem); + vdev->bars[vdev->msix->table_bar].region.mem, + vdev->bars[vdev->msix->pba_bar].region.mem); g_free(vdev->msix->pending); } } @@ -1322,71 +1404,23 @@ static void vfio_mmap_set_enabled(VFIOPCIDevice *vdev, bool enabled) int i; for (i = 0; i < PCI_ROM_SLOT; i++) { - VFIOBAR *bar = &vdev->bars[i]; - - if (!bar->region.size) { - continue; - } - - memory_region_set_enabled(&bar->region.mmap_mem, enabled); - if (vdev->msix && vdev->msix->table_bar == i) { - memory_region_set_enabled(&vdev->msix->mmap_mem, enabled); - } - } -} - -static void vfio_unregister_bar(VFIOPCIDevice *vdev, int nr) -{ - VFIOBAR *bar = &vdev->bars[nr]; - - if (!bar->region.size) { - return; - } - - vfio_bar_quirk_teardown(vdev, nr); - - memory_region_del_subregion(&bar->region.mem, &bar->region.mmap_mem); - - if (vdev->msix && vdev->msix->table_bar == nr) { - memory_region_del_subregion(&bar->region.mem, &vdev->msix->mmap_mem); + vfio_region_mmaps_set_enabled(&vdev->bars[i].region, enabled); } } -static void vfio_unmap_bar(VFIOPCIDevice *vdev, int nr) +static void vfio_bar_setup(VFIOPCIDevice *vdev, int nr) { VFIOBAR *bar = &vdev->bars[nr]; - if (!bar->region.size) { - return; - } - - vfio_bar_quirk_free(vdev, nr); - - munmap(bar->region.mmap, memory_region_size(&bar->region.mmap_mem)); - - if (vdev->msix && vdev->msix->table_bar == nr) { - munmap(vdev->msix->mmap, memory_region_size(&vdev->msix->mmap_mem)); - } -} - -static void vfio_map_bar(VFIOPCIDevice *vdev, int nr) -{ - VFIOBAR *bar = &vdev->bars[nr]; - uint64_t size = bar->region.size; - char name[64]; uint32_t pci_bar; uint8_t type; int ret; /* Skip both unimplemented BARs and the upper half of 64bit BARS. */ - if (!size) { + if (!bar->region.size) { return; } - snprintf(name, sizeof(name), "VFIO %04x:%02x:%02x.%x BAR %d", - vdev->host.domain, vdev->host.bus, vdev->host.slot, - vdev->host.function, nr); - /* Determine what type of BAR this is for registration */ ret = pread(vdev->vbasedev.fd, &pci_bar, sizeof(pci_bar), vdev->config_offset + PCI_BASE_ADDRESS_0 + (4 * nr)); @@ -1401,102 +1435,78 @@ static void vfio_map_bar(VFIOPCIDevice *vdev, int nr) type = pci_bar & (bar->ioport ? ~PCI_BASE_ADDRESS_IO_MASK : ~PCI_BASE_ADDRESS_MEM_MASK); - /* A "slow" read/write mapping underlies all BARs */ - memory_region_init_io(&bar->region.mem, OBJECT(vdev), &vfio_region_ops, - bar, name, size); - pci_register_bar(&vdev->pdev, nr, type, &bar->region.mem); - - /* - * We can't mmap areas overlapping the MSIX vector table, so we - * potentially insert a direct-mapped subregion before and after it. - */ - if (vdev->msix && vdev->msix->table_bar == nr) { - size = vdev->msix->table_offset & qemu_real_host_page_mask; - } - - strncat(name, " mmap", sizeof(name) - strlen(name) - 1); - if (vfio_mmap_region(OBJECT(vdev), &bar->region, &bar->region.mem, - &bar->region.mmap_mem, &bar->region.mmap, - size, 0, name)) { - error_report("%s unsupported. Performance may be slow", name); - } - - if (vdev->msix && vdev->msix->table_bar == nr) { - uint64_t start; - - start = REAL_HOST_PAGE_ALIGN((uint64_t)vdev->msix->table_offset + - (vdev->msix->entries * - PCI_MSIX_ENTRY_SIZE)); - - size = start < bar->region.size ? bar->region.size - start : 0; - strncat(name, " msix-hi", sizeof(name) - strlen(name) - 1); - /* VFIOMSIXInfo contains another MemoryRegion for this mapping */ - if (vfio_mmap_region(OBJECT(vdev), &bar->region, &bar->region.mem, - &vdev->msix->mmap_mem, - &vdev->msix->mmap, size, start, name)) { - error_report("%s unsupported. Performance may be slow", name); - } + if (vfio_region_mmap(&bar->region)) { + error_report("Failed to mmap %s BAR %d. Performance may be slow", + vdev->vbasedev.name, nr); } vfio_bar_quirk_setup(vdev, nr); + + pci_register_bar(&vdev->pdev, nr, type, bar->region.mem); } -static void vfio_map_bars(VFIOPCIDevice *vdev) +static void vfio_bars_setup(VFIOPCIDevice *vdev) { int i; for (i = 0; i < PCI_ROM_SLOT; i++) { - vfio_map_bar(vdev, i); + vfio_bar_setup(vdev, i); } - if (vdev->has_vga) { - memory_region_init_io(&vdev->vga.region[QEMU_PCI_VGA_MEM].mem, + if (vdev->vga) { + memory_region_init_io(&vdev->vga->region[QEMU_PCI_VGA_MEM].mem, OBJECT(vdev), &vfio_vga_ops, - &vdev->vga.region[QEMU_PCI_VGA_MEM], + &vdev->vga->region[QEMU_PCI_VGA_MEM], "vfio-vga-mmio@0xa0000", QEMU_PCI_VGA_MEM_SIZE); - memory_region_init_io(&vdev->vga.region[QEMU_PCI_VGA_IO_LO].mem, + memory_region_init_io(&vdev->vga->region[QEMU_PCI_VGA_IO_LO].mem, OBJECT(vdev), &vfio_vga_ops, - &vdev->vga.region[QEMU_PCI_VGA_IO_LO], + &vdev->vga->region[QEMU_PCI_VGA_IO_LO], "vfio-vga-io@0x3b0", QEMU_PCI_VGA_IO_LO_SIZE); - memory_region_init_io(&vdev->vga.region[QEMU_PCI_VGA_IO_HI].mem, + memory_region_init_io(&vdev->vga->region[QEMU_PCI_VGA_IO_HI].mem, OBJECT(vdev), &vfio_vga_ops, - &vdev->vga.region[QEMU_PCI_VGA_IO_HI], + &vdev->vga->region[QEMU_PCI_VGA_IO_HI], "vfio-vga-io@0x3c0", QEMU_PCI_VGA_IO_HI_SIZE); - pci_register_vga(&vdev->pdev, &vdev->vga.region[QEMU_PCI_VGA_MEM].mem, - &vdev->vga.region[QEMU_PCI_VGA_IO_LO].mem, - &vdev->vga.region[QEMU_PCI_VGA_IO_HI].mem); + pci_register_vga(&vdev->pdev, &vdev->vga->region[QEMU_PCI_VGA_MEM].mem, + &vdev->vga->region[QEMU_PCI_VGA_IO_LO].mem, + &vdev->vga->region[QEMU_PCI_VGA_IO_HI].mem); vfio_vga_quirk_setup(vdev); } } -static void vfio_unregister_bars(VFIOPCIDevice *vdev) +static void vfio_bars_exit(VFIOPCIDevice *vdev) { int i; for (i = 0; i < PCI_ROM_SLOT; i++) { - vfio_unregister_bar(vdev, i); + vfio_bar_quirk_exit(vdev, i); + vfio_region_exit(&vdev->bars[i].region); } - if (vdev->has_vga) { - vfio_vga_quirk_teardown(vdev); + if (vdev->vga) { pci_unregister_vga(&vdev->pdev); + vfio_vga_quirk_exit(vdev); } } -static void vfio_unmap_bars(VFIOPCIDevice *vdev) +static void vfio_bars_finalize(VFIOPCIDevice *vdev) { int i; for (i = 0; i < PCI_ROM_SLOT; i++) { - vfio_unmap_bar(vdev, i); + vfio_bar_quirk_finalize(vdev, i); + vfio_region_finalize(&vdev->bars[i].region); } - if (vdev->has_vga) { - vfio_vga_quirk_free(vdev); + if (vdev->vga) { + vfio_vga_quirk_finalize(vdev); + for (i = 0; i < ARRAY_SIZE(vdev->vga->region); i++) { + object_unparent(OBJECT(&vdev->vga->region[i].mem)); + } + g_free(vdev->vga); } } @@ -1756,9 +1766,8 @@ static int vfio_add_std_cap(VFIOPCIDevice *vdev, uint8_t pos) } if (ret < 0) { - error_report("vfio: %04x:%02x:%02x.%x Error adding PCI capability " - "0x%x[0x%x]@0x%x: %d", vdev->host.domain, - vdev->host.bus, vdev->host.slot, vdev->host.function, + error_report("vfio: %s Error adding PCI capability " + "0x%x[0x%x]@0x%x: %d", vdev->vbasedev.name, cap_id, size, pos, ret); return ret; } @@ -1820,11 +1829,14 @@ static void vfio_pci_post_reset(VFIOPCIDevice *vdev) vfio_intx_enable(vdev); } -static bool vfio_pci_host_match(PCIHostDeviceAddress *host1, - PCIHostDeviceAddress *host2) +static bool vfio_pci_host_match(PCIHostDeviceAddress *addr, const char *name) { - return (host1->domain == host2->domain && host1->bus == host2->bus && - host1->slot == host2->slot && host1->function == host2->function); + char tmp[13]; + + sprintf(tmp, "%04x:%02x:%02x.%1x", addr->domain, + addr->bus, addr->slot, addr->function); + + return (strcmp(tmp, name) == 0); } static int vfio_pci_hot_reset(VFIOPCIDevice *vdev, bool single) @@ -1849,9 +1861,8 @@ static int vfio_pci_hot_reset(VFIOPCIDevice *vdev, bool single) if (ret && errno != ENOSPC) { ret = -errno; if (!vdev->has_pm_reset) { - error_report("vfio: Cannot reset device %04x:%02x:%02x.%x, " - "no available reset mechanism.", vdev->host.domain, - vdev->host.bus, vdev->host.slot, vdev->host.function); + error_report("vfio: Cannot reset device %s, " + "no available reset mechanism.", vdev->vbasedev.name); } goto out_single; } @@ -1884,7 +1895,7 @@ static int vfio_pci_hot_reset(VFIOPCIDevice *vdev, bool single) trace_vfio_pci_hot_reset_dep_devices(host.domain, host.bus, host.slot, host.function, devices[i].group_id); - if (vfio_pci_host_match(&host, &vdev->host)) { + if (vfio_pci_host_match(&host, vdev->vbasedev.name)) { continue; } @@ -1910,7 +1921,7 @@ static int vfio_pci_hot_reset(VFIOPCIDevice *vdev, bool single) continue; } tmp = container_of(vbasedev_iter, VFIOPCIDevice, vbasedev); - if (vfio_pci_host_match(&host, &tmp->host)) { + if (vfio_pci_host_match(&host, tmp->vbasedev.name)) { if (single) { ret = -EINVAL; goto out_single; @@ -1972,7 +1983,7 @@ out: host.slot = PCI_SLOT(devices[i].devfn); host.function = PCI_FUNC(devices[i].devfn); - if (vfio_pci_host_match(&host, &vdev->host)) { + if (vfio_pci_host_match(&host, vdev->vbasedev.name)) { continue; } @@ -1991,7 +2002,7 @@ out: continue; } tmp = container_of(vbasedev_iter, VFIOPCIDevice, vbasedev); - if (vfio_pci_host_match(&host, &tmp->host)) { + if (vfio_pci_host_match(&host, tmp->vbasedev.name)) { vfio_pci_post_reset(tmp); break; } @@ -2044,10 +2055,56 @@ static VFIODeviceOps vfio_pci_ops = { .vfio_eoi = vfio_intx_eoi, }; +int vfio_populate_vga(VFIOPCIDevice *vdev) +{ + VFIODevice *vbasedev = &vdev->vbasedev; + struct vfio_region_info *reg_info; + int ret; + + if (vbasedev->num_regions > VFIO_PCI_VGA_REGION_INDEX) { + ret = vfio_get_region_info(vbasedev, + VFIO_PCI_VGA_REGION_INDEX, ®_info); + if (ret) { + return ret; + } + + if (!(reg_info->flags & VFIO_REGION_INFO_FLAG_READ) || + !(reg_info->flags & VFIO_REGION_INFO_FLAG_WRITE) || + reg_info->size < 0xbffff + 1) { + error_report("vfio: Unexpected VGA info, flags 0x%lx, size 0x%lx", + (unsigned long)reg_info->flags, + (unsigned long)reg_info->size); + g_free(reg_info); + return -EINVAL; + } + + vdev->vga = g_new0(VFIOVGA, 1); + + vdev->vga->fd_offset = reg_info->offset; + vdev->vga->fd = vdev->vbasedev.fd; + + g_free(reg_info); + + vdev->vga->region[QEMU_PCI_VGA_MEM].offset = QEMU_PCI_VGA_MEM_BASE; + vdev->vga->region[QEMU_PCI_VGA_MEM].nr = QEMU_PCI_VGA_MEM; + QLIST_INIT(&vdev->vga->region[QEMU_PCI_VGA_MEM].quirks); + + vdev->vga->region[QEMU_PCI_VGA_IO_LO].offset = QEMU_PCI_VGA_IO_LO_BASE; + vdev->vga->region[QEMU_PCI_VGA_IO_LO].nr = QEMU_PCI_VGA_IO_LO; + QLIST_INIT(&vdev->vga->region[QEMU_PCI_VGA_IO_LO].quirks); + + vdev->vga->region[QEMU_PCI_VGA_IO_HI].offset = QEMU_PCI_VGA_IO_HI_BASE; + vdev->vga->region[QEMU_PCI_VGA_IO_HI].nr = QEMU_PCI_VGA_IO_HI; + QLIST_INIT(&vdev->vga->region[QEMU_PCI_VGA_IO_HI].quirks); + } + + return 0; +} + static int vfio_populate_device(VFIOPCIDevice *vdev) { VFIODevice *vbasedev = &vdev->vbasedev; - struct vfio_region_info reg_info = { .argsz = sizeof(reg_info) }; + struct vfio_region_info *reg_info; struct vfio_irq_info irq_info = { .argsz = sizeof(irq_info) }; int i, ret = -1; @@ -2069,85 +2126,47 @@ static int vfio_populate_device(VFIOPCIDevice *vdev) } for (i = VFIO_PCI_BAR0_REGION_INDEX; i < VFIO_PCI_ROM_REGION_INDEX; i++) { - reg_info.index = i; + char *name = g_strdup_printf("%s BAR %d", vbasedev->name, i); + + ret = vfio_region_setup(OBJECT(vdev), vbasedev, + &vdev->bars[i].region, i, name); + g_free(name); - ret = ioctl(vbasedev->fd, VFIO_DEVICE_GET_REGION_INFO, ®_info); if (ret) { error_report("vfio: Error getting region %d info: %m", i); goto error; } - trace_vfio_populate_device_region(vbasedev->name, i, - (unsigned long)reg_info.size, - (unsigned long)reg_info.offset, - (unsigned long)reg_info.flags); - - vdev->bars[i].region.vbasedev = vbasedev; - vdev->bars[i].region.flags = reg_info.flags; - vdev->bars[i].region.size = reg_info.size; - vdev->bars[i].region.fd_offset = reg_info.offset; - vdev->bars[i].region.nr = i; QLIST_INIT(&vdev->bars[i].quirks); } - reg_info.index = VFIO_PCI_CONFIG_REGION_INDEX; - - ret = ioctl(vdev->vbasedev.fd, VFIO_DEVICE_GET_REGION_INFO, ®_info); + ret = vfio_get_region_info(vbasedev, + VFIO_PCI_CONFIG_REGION_INDEX, ®_info); if (ret) { error_report("vfio: Error getting config info: %m"); goto error; } trace_vfio_populate_device_config(vdev->vbasedev.name, - (unsigned long)reg_info.size, - (unsigned long)reg_info.offset, - (unsigned long)reg_info.flags); + (unsigned long)reg_info->size, + (unsigned long)reg_info->offset, + (unsigned long)reg_info->flags); - vdev->config_size = reg_info.size; + vdev->config_size = reg_info->size; if (vdev->config_size == PCI_CONFIG_SPACE_SIZE) { vdev->pdev.cap_present &= ~QEMU_PCI_CAP_EXPRESS; } - vdev->config_offset = reg_info.offset; + vdev->config_offset = reg_info->offset; - if ((vdev->features & VFIO_FEATURE_ENABLE_VGA) && - vbasedev->num_regions > VFIO_PCI_VGA_REGION_INDEX) { - struct vfio_region_info vga_info = { - .argsz = sizeof(vga_info), - .index = VFIO_PCI_VGA_REGION_INDEX, - }; + g_free(reg_info); - ret = ioctl(vdev->vbasedev.fd, VFIO_DEVICE_GET_REGION_INFO, &vga_info); + if (vdev->features & VFIO_FEATURE_ENABLE_VGA) { + ret = vfio_populate_vga(vdev); if (ret) { error_report( "vfio: Device does not support requested feature x-vga"); goto error; } - - if (!(vga_info.flags & VFIO_REGION_INFO_FLAG_READ) || - !(vga_info.flags & VFIO_REGION_INFO_FLAG_WRITE) || - vga_info.size < 0xbffff + 1) { - error_report("vfio: Unexpected VGA info, flags 0x%lx, size 0x%lx", - (unsigned long)vga_info.flags, - (unsigned long)vga_info.size); - goto error; - } - - vdev->vga.fd_offset = vga_info.offset; - vdev->vga.fd = vdev->vbasedev.fd; - - vdev->vga.region[QEMU_PCI_VGA_MEM].offset = QEMU_PCI_VGA_MEM_BASE; - vdev->vga.region[QEMU_PCI_VGA_MEM].nr = QEMU_PCI_VGA_MEM; - QLIST_INIT(&vdev->vga.region[QEMU_PCI_VGA_MEM].quirks); - - vdev->vga.region[QEMU_PCI_VGA_IO_LO].offset = QEMU_PCI_VGA_IO_LO_BASE; - vdev->vga.region[QEMU_PCI_VGA_IO_LO].nr = QEMU_PCI_VGA_IO_LO; - QLIST_INIT(&vdev->vga.region[QEMU_PCI_VGA_IO_LO].quirks); - - vdev->vga.region[QEMU_PCI_VGA_IO_HI].offset = QEMU_PCI_VGA_IO_HI_BASE; - vdev->vga.region[QEMU_PCI_VGA_IO_HI].nr = QEMU_PCI_VGA_IO_HI; - QLIST_INIT(&vdev->vga.region[QEMU_PCI_VGA_IO_HI].quirks); - - vdev->has_vga = true; } irq_info.index = VFIO_PCI_ERR_IRQ_INDEX; @@ -2172,11 +2191,8 @@ error: static void vfio_put_device(VFIOPCIDevice *vdev) { g_free(vdev->vbasedev.name); - if (vdev->msix) { - object_unparent(OBJECT(&vdev->msix->mmap_mem)); - g_free(vdev->msix); - vdev->msix = NULL; - } + g_free(vdev->msix); + vfio_put_base_device(&vdev->vbasedev); } @@ -2197,10 +2213,7 @@ static void vfio_err_notifier_handler(void *opaque) * guest to contain the error. */ - error_report("%s(%04x:%02x:%02x.%x) Unrecoverable error detected. " - "Please collect any data possible and then kill the guest", - __func__, vdev->host.domain, vdev->host.bus, - vdev->host.slot, vdev->host.function); + error_report("%s(%s) Unrecoverable error detected. Please collect any data possible and then kill the guest", __func__, vdev->vbasedev.name); vm_stop(RUN_STATE_INTERNAL_ERROR); } @@ -2381,42 +2394,43 @@ static int vfio_initfn(PCIDevice *pdev) VFIOPCIDevice *vdev = DO_UPCAST(VFIOPCIDevice, pdev, pdev); VFIODevice *vbasedev_iter; VFIOGroup *group; - char path[PATH_MAX], iommu_group_path[PATH_MAX], *group_name; + char *tmp, group_path[PATH_MAX], *group_name; ssize_t len; struct stat st; int groupid; int ret; - /* Check that the host device exists */ - snprintf(path, sizeof(path), - "/sys/bus/pci/devices/%04x:%02x:%02x.%01x/", - vdev->host.domain, vdev->host.bus, vdev->host.slot, - vdev->host.function); - if (stat(path, &st) < 0) { - error_report("vfio: error: no such host device: %s", path); + if (!vdev->vbasedev.sysfsdev) { + vdev->vbasedev.sysfsdev = + g_strdup_printf("/sys/bus/pci/devices/%04x:%02x:%02x.%01x", + vdev->host.domain, vdev->host.bus, + vdev->host.slot, vdev->host.function); + } + + if (stat(vdev->vbasedev.sysfsdev, &st) < 0) { + error_report("vfio: error: no such host device: %s", + vdev->vbasedev.sysfsdev); return -errno; } + vdev->vbasedev.name = g_strdup(basename(vdev->vbasedev.sysfsdev)); vdev->vbasedev.ops = &vfio_pci_ops; - vdev->vbasedev.type = VFIO_DEVICE_TYPE_PCI; - vdev->vbasedev.name = g_strdup_printf("%04x:%02x:%02x.%01x", - vdev->host.domain, vdev->host.bus, - vdev->host.slot, vdev->host.function); - strncat(path, "iommu_group", sizeof(path) - strlen(path) - 1); + tmp = g_strdup_printf("%s/iommu_group", vdev->vbasedev.sysfsdev); + len = readlink(tmp, group_path, sizeof(group_path)); + g_free(tmp); - len = readlink(path, iommu_group_path, sizeof(path)); - if (len <= 0 || len >= sizeof(path)) { + if (len <= 0 || len >= sizeof(group_path)) { error_report("vfio: error no iommu_group for device"); return len < 0 ? -errno : -ENAMETOOLONG; } - iommu_group_path[len] = 0; - group_name = basename(iommu_group_path); + group_path[len] = 0; + group_name = basename(group_path); if (sscanf(group_name, "%d", &groupid) != 1) { - error_report("vfio: error reading %s: %m", path); + error_report("vfio: error reading %s: %m", group_path); return -errno; } @@ -2428,21 +2442,18 @@ static int vfio_initfn(PCIDevice *pdev) return -ENOENT; } - snprintf(path, sizeof(path), "%04x:%02x:%02x.%01x", - vdev->host.domain, vdev->host.bus, vdev->host.slot, - vdev->host.function); - QLIST_FOREACH(vbasedev_iter, &group->device_list, next) { if (strcmp(vbasedev_iter->name, vdev->vbasedev.name) == 0) { - error_report("vfio: error: device %s is already attached", path); + error_report("vfio: error: device %s is already attached", + vdev->vbasedev.name); vfio_put_group(group); return -EBUSY; } } - ret = vfio_get_device(group, path, &vdev->vbasedev); + ret = vfio_get_device(group, vdev->vbasedev.name, &vdev->vbasedev); if (ret) { - error_report("vfio: failed to get device %s", path); + error_report("vfio: failed to get device %s", vdev->vbasedev.name); vfio_put_group(group); return ret; } @@ -2542,7 +2553,7 @@ static int vfio_initfn(PCIDevice *pdev) return ret; } - vfio_map_bars(vdev); + vfio_bars_setup(vdev); ret = vfio_add_capabilities(vdev); if (ret) { @@ -2579,7 +2590,7 @@ static int vfio_initfn(PCIDevice *pdev) out_teardown: pci_device_set_intx_routing_notifier(&vdev->pdev, NULL); vfio_teardown_msi(vdev); - vfio_unregister_bars(vdev); + vfio_bars_exit(vdev); return ret; } @@ -2589,7 +2600,7 @@ static void vfio_instance_finalize(Object *obj) VFIOPCIDevice *vdev = DO_UPCAST(VFIOPCIDevice, pdev, pci_dev); VFIOGroup *group = vdev->vbasedev.group; - vfio_unmap_bars(vdev); + vfio_bars_finalize(vdev); g_free(vdev->emulated_config_bits); g_free(vdev->rom); vfio_put_device(vdev); @@ -2608,7 +2619,7 @@ static void vfio_exitfn(PCIDevice *pdev) timer_free(vdev->intx.mmap_timer); } vfio_teardown_msi(vdev); - vfio_unregister_bars(vdev); + vfio_bars_exit(vdev); } static void vfio_pci_reset(DeviceState *dev) @@ -2659,6 +2670,7 @@ static void vfio_instance_init(Object *obj) static Property vfio_pci_dev_properties[] = { DEFINE_PROP_PCI_HOST_DEVADDR("host", VFIOPCIDevice, host), + DEFINE_PROP_STRING("sysfsdev", VFIOPCIDevice, vbasedev.sysfsdev), DEFINE_PROP_UINT32("x-intx-mmap-timeout-ms", VFIOPCIDevice, intx.mmap_timeout, 1100), DEFINE_PROP_BIT("x-vga", VFIOPCIDevice, features, diff --git a/hw/vfio/pci.h b/hw/vfio/pci.h index 6256587..3976f68 100644 --- a/hw/vfio/pci.h +++ b/hw/vfio/pci.h @@ -114,7 +114,7 @@ typedef struct VFIOPCIDevice { int nr_vectors; /* Number of MSI/MSIX vectors currently in use */ int interrupt; /* Current interrupt type */ VFIOBAR bars[PCI_NUM_REGIONS - 1]; /* No ROM */ - VFIOVGA vga; /* 0xa0000, 0x3b0, 0x3c0 */ + VFIOVGA *vga; /* 0xa0000, 0x3b0, 0x3c0 */ PCIHostDeviceAddress host; EventNotifier err_notifier; EventNotifier req_notifier; @@ -150,11 +150,13 @@ void vfio_vga_write(void *opaque, hwaddr addr, uint64_t data, unsigned size); bool vfio_blacklist_opt_rom(VFIOPCIDevice *vdev); void vfio_vga_quirk_setup(VFIOPCIDevice *vdev); -void vfio_vga_quirk_teardown(VFIOPCIDevice *vdev); -void vfio_vga_quirk_free(VFIOPCIDevice *vdev); +void vfio_vga_quirk_exit(VFIOPCIDevice *vdev); +void vfio_vga_quirk_finalize(VFIOPCIDevice *vdev); void vfio_bar_quirk_setup(VFIOPCIDevice *vdev, int nr); -void vfio_bar_quirk_teardown(VFIOPCIDevice *vdev, int nr); -void vfio_bar_quirk_free(VFIOPCIDevice *vdev, int nr); +void vfio_bar_quirk_exit(VFIOPCIDevice *vdev, int nr); +void vfio_bar_quirk_finalize(VFIOPCIDevice *vdev, int nr); void vfio_setup_resetfn_quirk(VFIOPCIDevice *vdev); +int vfio_populate_vga(VFIOPCIDevice *vdev); + #endif /* HW_VFIO_VFIO_PCI_H */ diff --git a/hw/vfio/platform.c b/hw/vfio/platform.c index ebc9dcb..a2ab75d 100644 --- a/hw/vfio/platform.c +++ b/hw/vfio/platform.c @@ -143,12 +143,8 @@ static void vfio_mmap_set_enabled(VFIOPlatformDevice *vdev, bool enabled) { int i; - trace_vfio_platform_mmap_set_enabled(enabled); - for (i = 0; i < vdev->vbasedev.num_regions; i++) { - VFIORegion *region = vdev->regions[i]; - - memory_region_set_enabled(®ion->mmap_mem, enabled); + vfio_region_mmaps_set_enabled(vdev->regions[i], enabled); } } @@ -476,28 +472,16 @@ static int vfio_populate_device(VFIODevice *vbasedev) vdev->regions = g_new0(VFIORegion *, vbasedev->num_regions); for (i = 0; i < vbasedev->num_regions; i++) { - struct vfio_region_info reg_info = { .argsz = sizeof(reg_info) }; - VFIORegion *ptr; + char *name = g_strdup_printf("VFIO %s region %d\n", vbasedev->name, i); vdev->regions[i] = g_new0(VFIORegion, 1); - ptr = vdev->regions[i]; - reg_info.index = i; - ret = ioctl(vbasedev->fd, VFIO_DEVICE_GET_REGION_INFO, ®_info); + ret = vfio_region_setup(OBJECT(vdev), vbasedev, + vdev->regions[i], i, name); + g_free(name); if (ret) { error_report("vfio: Error getting region %d info: %m", i); goto reg_error; } - ptr->flags = reg_info.flags; - ptr->size = reg_info.size; - ptr->fd_offset = reg_info.offset; - ptr->nr = i; - ptr->vbasedev = vbasedev; - - trace_vfio_platform_populate_regions(ptr->nr, - (unsigned long)ptr->flags, - (unsigned long)ptr->size, - ptr->vbasedev->fd, - (unsigned long)ptr->fd_offset); } vdev->mmap_timer = timer_new_ms(QEMU_CLOCK_VIRTUAL, @@ -534,6 +518,9 @@ irq_err: } reg_error: for (i = 0; i < vbasedev->num_regions; i++) { + if (vdev->regions[i]) { + vfio_region_finalize(vdev->regions[i]); + } g_free(vdev->regions[i]); } g_free(vdev->regions); @@ -560,38 +547,45 @@ static int vfio_base_device_init(VFIODevice *vbasedev) { VFIOGroup *group; VFIODevice *vbasedev_iter; - char path[PATH_MAX], iommu_group_path[PATH_MAX], *group_name; + char *tmp, group_path[PATH_MAX], *group_name; ssize_t len; struct stat st; int groupid; int ret; - /* name must be set prior to the call */ - if (!vbasedev->name || strchr(vbasedev->name, '/')) { - return -EINVAL; - } + /* @sysfsdev takes precedence over @host */ + if (vbasedev->sysfsdev) { + g_free(vbasedev->name); + vbasedev->name = g_strdup(basename(vbasedev->sysfsdev)); + } else { + if (!vbasedev->name || strchr(vbasedev->name, '/')) { + return -EINVAL; + } - /* Check that the host device exists */ - g_snprintf(path, sizeof(path), "/sys/bus/platform/devices/%s/", - vbasedev->name); + vbasedev->sysfsdev = g_strdup_printf("/sys/bus/platform/devices/%s", + vbasedev->name); + } - if (stat(path, &st) < 0) { - error_report("vfio: error: no such host device: %s", path); + if (stat(vbasedev->sysfsdev, &st) < 0) { + error_report("vfio: error: no such host device: %s", + vbasedev->sysfsdev); return -errno; } - g_strlcat(path, "iommu_group", sizeof(path)); - len = readlink(path, iommu_group_path, sizeof(iommu_group_path)); - if (len < 0 || len >= sizeof(iommu_group_path)) { + tmp = g_strdup_printf("%s/iommu_group", vbasedev->sysfsdev); + len = readlink(tmp, group_path, sizeof(group_path)); + g_free(tmp); + + if (len < 0 || len >= sizeof(group_path)) { error_report("vfio: error no iommu_group for device"); return len < 0 ? -errno : -ENAMETOOLONG; } - iommu_group_path[len] = 0; - group_name = basename(iommu_group_path); + group_path[len] = 0; + group_name = basename(group_path); if (sscanf(group_name, "%d", &groupid) != 1) { - error_report("vfio: error reading %s: %m", path); + error_report("vfio: error reading %s: %m", group_path); return -errno; } @@ -603,25 +597,24 @@ static int vfio_base_device_init(VFIODevice *vbasedev) return -ENOENT; } - g_snprintf(path, sizeof(path), "%s", vbasedev->name); - QLIST_FOREACH(vbasedev_iter, &group->device_list, next) { if (strcmp(vbasedev_iter->name, vbasedev->name) == 0) { - error_report("vfio: error: device %s is already attached", path); + error_report("vfio: error: device %s is already attached", + vbasedev->name); vfio_put_group(group); return -EBUSY; } } - ret = vfio_get_device(group, path, vbasedev); + ret = vfio_get_device(group, vbasedev->name, vbasedev); if (ret) { - error_report("vfio: failed to get device %s", path); + error_report("vfio: failed to get device %s", vbasedev->name); vfio_put_group(group); return ret; } ret = vfio_populate_device(vbasedev); if (ret) { - error_report("vfio: failed to populate device %s", path); + error_report("vfio: failed to populate device %s", vbasedev->name); vfio_put_group(group); } @@ -629,41 +622,6 @@ static int vfio_base_device_init(VFIODevice *vbasedev) } /** - * vfio_map_region - initialize the 2 memory regions for a given - * MMIO region index - * @vdev: the VFIO platform device handle - * @nr: the index of the region - * - * Init the top memory region and the mmapped memory region beneath - * VFIOPlatformDevice is used since VFIODevice is not a QOM Object - * and could not be passed to memory region functions -*/ -static void vfio_map_region(VFIOPlatformDevice *vdev, int nr) -{ - VFIORegion *region = vdev->regions[nr]; - uint64_t size = region->size; - char name[64]; - - if (!size) { - return; - } - - g_snprintf(name, sizeof(name), "VFIO %s region %d", - vdev->vbasedev.name, nr); - - /* A "slow" read/write mapping underlies all regions */ - memory_region_init_io(®ion->mem, OBJECT(vdev), &vfio_region_ops, - region, name, size); - - g_strlcat(name, " mmap", sizeof(name)); - - if (vfio_mmap_region(OBJECT(vdev), region, ®ion->mem, - ®ion->mmap_mem, ®ion->mmap, size, 0, name)) { - error_report("%s unsupported. Performance may be slow", name); - } -} - -/** * vfio_platform_realize - the device realize function * @dev: device state pointer * @errp: error @@ -681,7 +639,9 @@ static void vfio_platform_realize(DeviceState *dev, Error **errp) vbasedev->type = VFIO_DEVICE_TYPE_PLATFORM; vbasedev->ops = &vfio_platform_ops; - trace_vfio_platform_realize(vbasedev->name, vdev->compat); + trace_vfio_platform_realize(vbasedev->sysfsdev ? + vbasedev->sysfsdev : vbasedev->name, + vdev->compat); ret = vfio_base_device_init(vbasedev); if (ret) { @@ -691,8 +651,11 @@ static void vfio_platform_realize(DeviceState *dev, Error **errp) } for (i = 0; i < vbasedev->num_regions; i++) { - vfio_map_region(vdev, i); - sysbus_init_mmio(sbdev, &vdev->regions[i]->mem); + if (vfio_region_mmap(vdev->regions[i])) { + error_report("%s mmap unsupported. Performance may be slow", + memory_region_name(vdev->regions[i]->mem)); + } + sysbus_init_mmio(sbdev, vdev->regions[i]->mem); } } @@ -703,6 +666,7 @@ static const VMStateDescription vfio_platform_vmstate = { static Property vfio_platform_dev_properties[] = { DEFINE_PROP_STRING("host", VFIOPlatformDevice, vbasedev.name), + DEFINE_PROP_STRING("sysfsdev", VFIOPlatformDevice, vbasedev.sysfsdev), DEFINE_PROP_BOOL("x-no-mmap", VFIOPlatformDevice, vbasedev.no_mmap, false), DEFINE_PROP_UINT32("mmap-timeout-ms", VFIOPlatformDevice, mmap_timeout, 1100), diff --git a/include/hw/s390x/s390-virtio-ccw.h b/include/hw/s390x/s390-virtio-ccw.h new file mode 100644 index 0000000..ab08332 --- /dev/null +++ b/include/hw/s390x/s390-virtio-ccw.h @@ -0,0 +1,40 @@ +/* + * virtio ccw machine definitions + * + * Copyright 2012, 2016 IBM Corp. + * Author(s): Cornelia Huck <cornelia.huck@de.ibm.com> + * + * This work is licensed under the terms of the GNU GPL, version 2 or (at + * your option) any later version. See the COPYING file in the top-level + * directory. + */ +#ifndef HW_S390X_S390_VIRTIO_CCW_H +#define HW_S390X_S390_VIRTIO_CCW_H + +#include "hw/boards.h" + +#define TYPE_S390_CCW_MACHINE "s390-ccw-machine" + +#define S390_CCW_MACHINE(obj) \ + OBJECT_CHECK(S390CcwMachineState, (obj), TYPE_S390_CCW_MACHINE) + +#define S390_MACHINE_CLASS(klass) \ + OBJECT_CLASS_CHECK(S390CcwMachineClass, (klass), TYPE_S390_CCW_MACHINE) + +typedef struct S390CcwMachineState { + /*< private >*/ + MachineState parent_obj; + + /*< public >*/ + bool aes_key_wrap; + bool dea_key_wrap; +} S390CcwMachineState; + +typedef struct S390CcwMachineClass { + /*< private >*/ + MachineClass parent_class; + + /*< public >*/ +} S390CcwMachineClass; + +#endif diff --git a/include/hw/vfio/vfio-common.h b/include/hw/vfio/vfio-common.h index f037f3c..eb0e1b0 100644 --- a/include/hw/vfio/vfio-common.h +++ b/include/hw/vfio/vfio-common.h @@ -25,6 +25,9 @@ #include "exec/memory.h" #include "qemu/queue.h" #include "qemu/notify.h" +#ifdef CONFIG_LINUX +#include <linux/vfio.h> +#endif /*#define DEBUG_VFIO*/ #ifdef DEBUG_VFIO @@ -40,14 +43,21 @@ enum { VFIO_DEVICE_TYPE_PLATFORM = 1, }; +typedef struct VFIOMmap { + MemoryRegion mem; + void *mmap; + off_t offset; + size_t size; +} VFIOMmap; + typedef struct VFIORegion { struct VFIODevice *vbasedev; off_t fd_offset; /* offset of region within device fd */ - MemoryRegion mem; /* slow, read/write access */ - MemoryRegion mmap_mem; /* direct mapped access */ - void *mmap; + MemoryRegion *mem; /* slow, read/write access */ size_t size; uint32_t flags; /* VFIO region flags (rd/wr/mmap) */ + uint32_t nr_mmaps; + VFIOMmap *mmaps; uint8_t nr; /* cache the region number for debug */ } VFIORegion; @@ -89,6 +99,7 @@ typedef struct VFIODeviceOps VFIODeviceOps; typedef struct VFIODevice { QLIST_ENTRY(VFIODevice) next; struct VFIOGroup *group; + char *sysfsdev; char *name; int fd; int type; @@ -124,10 +135,12 @@ void vfio_region_write(void *opaque, hwaddr addr, uint64_t data, unsigned size); uint64_t vfio_region_read(void *opaque, hwaddr addr, unsigned size); -int vfio_mmap_region(Object *vdev, VFIORegion *region, - MemoryRegion *mem, MemoryRegion *submem, - void **map, size_t size, off_t offset, - const char *name); +int vfio_region_setup(Object *obj, VFIODevice *vbasedev, VFIORegion *region, + int index, const char *name); +int vfio_region_mmap(VFIORegion *region); +void vfio_region_mmaps_set_enabled(VFIORegion *region, bool enabled); +void vfio_region_exit(VFIORegion *region); +void vfio_region_finalize(VFIORegion *region); void vfio_reset_handler(void *opaque); VFIOGroup *vfio_get_group(int groupid, AddressSpace *as); void vfio_put_group(VFIOGroup *group); @@ -138,4 +151,8 @@ extern const MemoryRegionOps vfio_region_ops; extern QLIST_HEAD(vfio_group_head, VFIOGroup) vfio_group_list; extern QLIST_HEAD(vfio_as_head, VFIOAddressSpace) vfio_address_spaces; +#ifdef CONFIG_LINUX +int vfio_get_region_info(VFIODevice *vbasedev, int index, + struct vfio_region_info **info); +#endif #endif /* !HW_VFIO_VFIO_COMMON_H */ diff --git a/include/io/channel-watch.h b/include/io/channel-watch.h index 656358a..76d7642 100644 --- a/include/io/channel-watch.h +++ b/include/io/channel-watch.h @@ -39,7 +39,7 @@ * monitor the file descriptor @fd for the * I/O conditions in @condition. This is able * monitor block devices, character devices, - * sockets, pipes but not plain files. + * pipes but not plain files or, on Win32, sockets. * * Returns: the new main loop source */ @@ -48,6 +48,24 @@ GSource *qio_channel_create_fd_watch(QIOChannel *ioc, GIOCondition condition); /** + * qio_channel_create_socket_watch: + * @ioc: the channel object + * @fd: the file descriptor + * @condition: the I/O condition + * + * Create a new main loop source that is able to + * monitor the file descriptor @fd for the + * I/O conditions in @condition. This is equivalent + * to qio_channel_create_fd_watch on POSIX systems + * but not on Windows. + * + * Returns: the new main loop source + */ +GSource *qio_channel_create_socket_watch(QIOChannel *ioc, + int fd, + GIOCondition condition); + +/** * qio_channel_create_fd_pair_watch: * @ioc: the channel object * @fdread: the file descriptor for reading diff --git a/include/io/channel.h b/include/io/channel.h index 0a1f1ce..d37acd2 100644 --- a/include/io/channel.h +++ b/include/io/channel.h @@ -78,6 +78,9 @@ typedef gboolean (*QIOChannelFunc)(QIOChannel *ioc, struct QIOChannel { Object parent; unsigned int features; /* bitmask of QIOChannelFeatures */ +#ifdef _WIN32 + HANDLE event; /* For use with GSource on Win32 */ +#endif }; /** diff --git a/include/qemu/sockets.h b/include/qemu/sockets.h index 0be68de..1bd9218 100644 --- a/include/qemu/sockets.h +++ b/include/qemu/sockets.h @@ -3,26 +3,9 @@ #define QEMU_SOCKET_H #ifdef _WIN32 -#include <windows.h> -#include <winsock2.h> -#include <ws2tcpip.h> - -#define socket_error() WSAGetLastError() int inet_aton(const char *cp, struct in_addr *ia); -#else - -#include <sys/socket.h> -#include <netinet/in.h> -#include <netinet/tcp.h> -#include <arpa/inet.h> -#include <netdb.h> -#include <sys/un.h> - -#define socket_error() errno -#define closesocket(s) close(s) - #endif /* !_WIN32 */ #include "qapi-types.h" diff --git a/include/sysemu/os-posix.h b/include/sysemu/os-posix.h index 5b9c4d6..07e3e5a 100644 --- a/include/sysemu/os-posix.h +++ b/include/sysemu/os-posix.h @@ -26,6 +26,12 @@ #ifndef QEMU_OS_POSIX_H #define QEMU_OS_POSIX_H +#include <sys/socket.h> +#include <netinet/in.h> +#include <netinet/tcp.h> +#include <arpa/inet.h> +#include <netdb.h> +#include <sys/un.h> void os_set_line_buffering(void); void os_set_proc_name(const char *s); @@ -34,6 +40,9 @@ void os_daemonize(void); void os_setup_post(void); int os_mlock(void); +#define closesocket(s) close(s) +#define ioctlsocket(s, r, v) ioctl(s, r, v) + typedef struct timeval qemu_timeval; #define qemu_gettimeofday(tp) gettimeofday(tp, NULL) diff --git a/include/sysemu/os-win32.h b/include/sysemu/os-win32.h index fbed346..17aad3b 100644 --- a/include/sysemu/os-win32.h +++ b/include/sysemu/os-win32.h @@ -28,32 +28,7 @@ #include <winsock2.h> #include <windows.h> - -/* Workaround for older versions of MinGW. */ -#ifndef ECONNREFUSED -# define ECONNREFUSED WSAECONNREFUSED -#endif -#ifndef EINPROGRESS -# define EINPROGRESS WSAEINPROGRESS -#endif -#ifndef EHOSTUNREACH -# define EHOSTUNREACH WSAEHOSTUNREACH -#endif -#ifndef EINTR -# define EINTR WSAEINTR -#endif -#ifndef EINPROGRESS -# define EINPROGRESS WSAEINPROGRESS -#endif -#ifndef ENETUNREACH -# define ENETUNREACH WSAENETUNREACH -#endif -#ifndef ENOTCONN -# define ENOTCONN WSAENOTCONN -#endif -#ifndef EWOULDBLOCK -# define EWOULDBLOCK WSAEWOULDBLOCK -#endif +#include <ws2tcpip.h> #if defined(_WIN64) /* On w64, setjmp is implemented by _setjmp which needs a second parameter. @@ -80,7 +55,6 @@ struct tm *gmtime_r(const time_t *timep, struct tm *result); struct tm *localtime_r(const time_t *timep, struct tm *result); #endif /* CONFIG_LOCALTIME_R */ - static inline void os_setup_signal_handling(void) {} static inline void os_daemonize(void) {} static inline void os_setup_post(void) {} @@ -129,4 +103,82 @@ static inline char *realpath(const char *path, char *resolved_path) return resolved_path; } + +/* We wrap all the sockets functions so that we can + * set errno based on WSAGetLastError() + */ + +#undef connect +#define connect qemu_connect_wrap +int qemu_connect_wrap(int sockfd, const struct sockaddr *addr, + socklen_t addrlen); + +#undef listen +#define listen qemu_listen_wrap +int qemu_listen_wrap(int sockfd, int backlog); + +#undef bind +#define bind qemu_bind_wrap +int qemu_bind_wrap(int sockfd, const struct sockaddr *addr, + socklen_t addrlen); + +#undef socket +#define socket qemu_socket_wrap +int qemu_socket_wrap(int domain, int type, int protocol); + +#undef accept +#define accept qemu_accept_wrap +int qemu_accept_wrap(int sockfd, struct sockaddr *addr, + socklen_t *addrlen); + +#undef shutdown +#define shutdown qemu_shutdown_wrap +int qemu_shutdown_wrap(int sockfd, int how); + +#undef ioctlsocket +#define ioctlsocket qemu_ioctlsocket_wrap +int qemu_ioctlsocket_wrap(int fd, int req, void *val); + +#undef closesocket +#define closesocket qemu_closesocket_wrap +int qemu_closesocket_wrap(int fd); + +#undef getsockopt +#define getsockopt qemu_getsockopt_wrap +int qemu_getsockopt_wrap(int sockfd, int level, int optname, + void *optval, socklen_t *optlen); + +#undef setsockopt +#define setsockopt qemu_setsockopt_wrap +int qemu_setsockopt_wrap(int sockfd, int level, int optname, + const void *optval, socklen_t optlen); + +#undef getpeername +#define getpeername qemu_getpeername_wrap +int qemu_getpeername_wrap(int sockfd, struct sockaddr *addr, + socklen_t *addrlen); + +#undef getsockname +#define getsockname qemu_getsockname_wrap +int qemu_getsockname_wrap(int sockfd, struct sockaddr *addr, + socklen_t *addrlen); + +#undef send +#define send qemu_send_wrap +ssize_t qemu_send_wrap(int sockfd, const void *buf, size_t len, int flags); + +#undef sendto +#define sendto qemu_sendto_wrap +ssize_t qemu_sendto_wrap(int sockfd, const void *buf, size_t len, int flags, + const struct sockaddr *addr, socklen_t addrlen); + +#undef recv +#define recv qemu_recv_wrap +ssize_t qemu_recv_wrap(int sockfd, void *buf, size_t len, int flags); + +#undef recvfrom +#define recvfrom qemu_recvfrom_wrap +ssize_t qemu_recvfrom_wrap(int sockfd, void *buf, size_t len, int flags, + struct sockaddr *addr, socklen_t *addrlen); + #endif diff --git a/io/channel-command.c b/io/channel-command.c index f53ce0f..604514a 100644 --- a/io/channel-command.c +++ b/io/channel-command.c @@ -236,8 +236,7 @@ static ssize_t qio_channel_command_readv(QIOChannel *ioc, retry: ret = readv(cioc->readfd, iov, niov); if (ret < 0) { - if (errno == EAGAIN || - errno == EWOULDBLOCK) { + if (errno == EAGAIN) { return QIO_CHANNEL_ERR_BLOCK; } if (errno == EINTR) { @@ -265,8 +264,7 @@ static ssize_t qio_channel_command_writev(QIOChannel *ioc, retry: ret = writev(cioc->writefd, iov, niov); if (ret <= 0) { - if (errno == EAGAIN || - errno == EWOULDBLOCK) { + if (errno == EAGAIN) { return QIO_CHANNEL_ERR_BLOCK; } if (errno == EINTR) { diff --git a/io/channel-file.c b/io/channel-file.c index 19a4325..f28e2b0 100644 --- a/io/channel-file.c +++ b/io/channel-file.c @@ -96,8 +96,7 @@ static ssize_t qio_channel_file_readv(QIOChannel *ioc, retry: ret = readv(fioc->fd, iov, niov); if (ret < 0) { - if (errno == EAGAIN || - errno == EWOULDBLOCK) { + if (errno == EAGAIN) { return QIO_CHANNEL_ERR_BLOCK; } if (errno == EINTR) { @@ -125,8 +124,7 @@ static ssize_t qio_channel_file_writev(QIOChannel *ioc, retry: ret = writev(fioc->fd, iov, niov); if (ret <= 0) { - if (errno == EAGAIN || - errno == EWOULDBLOCK) { + if (errno == EAGAIN) { return QIO_CHANNEL_ERR_BLOCK; } if (errno == EINTR) { diff --git a/io/channel-socket.c b/io/channel-socket.c index bf66a78..d005070 100644 --- a/io/channel-socket.c +++ b/io/channel-socket.c @@ -55,6 +55,10 @@ qio_channel_socket_new(void) ioc = QIO_CHANNEL(sioc); ioc->features |= (1 << QIO_CHANNEL_FEATURE_SHUTDOWN); +#ifdef WIN32 + ioc->event = CreateEvent(NULL, FALSE, FALSE, NULL); +#endif + trace_qio_channel_socket_new(sioc); return sioc; @@ -78,11 +82,11 @@ qio_channel_socket_set_fd(QIOChannelSocket *sioc, if (getpeername(fd, (struct sockaddr *)&sioc->remoteAddr, &sioc->remoteAddrLen) < 0) { - if (socket_error() == ENOTCONN) { + if (errno == ENOTCONN) { memset(&sioc->remoteAddr, 0, sizeof(sioc->remoteAddr)); sioc->remoteAddrLen = sizeof(sioc->remoteAddr); } else { - error_setg_errno(errp, socket_error(), + error_setg_errno(errp, errno, "Unable to query remote socket address"); goto error; } @@ -90,7 +94,7 @@ qio_channel_socket_set_fd(QIOChannelSocket *sioc, if (getsockname(fd, (struct sockaddr *)&sioc->localAddr, &sioc->localAddrLen) < 0) { - error_setg_errno(errp, socket_error(), + error_setg_errno(errp, errno, "Unable to query local socket address"); goto error; } @@ -341,13 +345,18 @@ qio_channel_socket_accept(QIOChannelSocket *ioc, cioc->remoteAddrLen = sizeof(ioc->remoteAddr); cioc->localAddrLen = sizeof(ioc->localAddr); +#ifdef WIN32 + QIO_CHANNEL(cioc)->event = CreateEvent(NULL, FALSE, FALSE, NULL); +#endif + + retry: trace_qio_channel_socket_accept(ioc); - cioc->fd = accept(ioc->fd, (struct sockaddr *)&cioc->remoteAddr, - &cioc->remoteAddrLen); + cioc->fd = qemu_accept(ioc->fd, (struct sockaddr *)&cioc->remoteAddr, + &cioc->remoteAddrLen); if (cioc->fd < 0) { trace_qio_channel_socket_accept_fail(ioc); - if (socket_error() == EINTR) { + if (errno == EINTR) { goto retry; } goto error; @@ -355,7 +364,7 @@ qio_channel_socket_accept(QIOChannelSocket *ioc, if (getsockname(cioc->fd, (struct sockaddr *)&cioc->localAddr, &cioc->localAddrLen) < 0) { - error_setg_errno(errp, socket_error(), + error_setg_errno(errp, errno, "Unable to query local socket address"); goto error; } @@ -384,7 +393,10 @@ static void qio_channel_socket_finalize(Object *obj) { QIOChannelSocket *ioc = QIO_CHANNEL_SOCKET(obj); if (ioc->fd != -1) { - close(ioc->fd); +#ifdef WIN32 + WSAEventSelect(ioc->fd, NULL, 0); +#endif + closesocket(ioc->fd); ioc->fd = -1; } } @@ -466,15 +478,14 @@ static ssize_t qio_channel_socket_readv(QIOChannel *ioc, retry: ret = recvmsg(sioc->fd, &msg, sflags); if (ret < 0) { - if (socket_error() == EAGAIN || - socket_error() == EWOULDBLOCK) { + if (errno == EAGAIN) { return QIO_CHANNEL_ERR_BLOCK; } - if (socket_error() == EINTR) { + if (errno == EINTR) { goto retry; } - error_setg_errno(errp, socket_error(), + error_setg_errno(errp, errno, "Unable to read from socket"); return -1; } @@ -526,14 +537,13 @@ static ssize_t qio_channel_socket_writev(QIOChannel *ioc, retry: ret = sendmsg(sioc->fd, &msg, 0); if (ret <= 0) { - if (socket_error() == EAGAIN || - socket_error() == EWOULDBLOCK) { + if (errno == EAGAIN) { return QIO_CHANNEL_ERR_BLOCK; } - if (socket_error() == EINTR) { + if (errno == EINTR) { goto retry; } - error_setg_errno(errp, socket_error(), + error_setg_errno(errp, errno, "Unable to write to socket"); return -1; } @@ -559,17 +569,17 @@ static ssize_t qio_channel_socket_readv(QIOChannel *ioc, iov[i].iov_len, 0); if (ret < 0) { - if (socket_error() == EAGAIN) { + if (errno == EAGAIN) { if (done) { return done; } else { return QIO_CHANNEL_ERR_BLOCK; } - } else if (socket_error() == EINTR) { + } else if (errno == EINTR) { goto retry; } else { - error_setg_errno(errp, socket_error(), - "Unable to write to socket"); + error_setg_errno(errp, errno, + "Unable to read from socket"); return -1; } } @@ -601,16 +611,16 @@ static ssize_t qio_channel_socket_writev(QIOChannel *ioc, iov[i].iov_len, 0); if (ret < 0) { - if (socket_error() == EAGAIN) { + if (errno == EAGAIN) { if (done) { return done; } else { return QIO_CHANNEL_ERR_BLOCK; } - } else if (socket_error() == EINTR) { + } else if (errno == EINTR) { goto retry; } else { - error_setg_errno(errp, socket_error(), + error_setg_errno(errp, errno, "Unable to write to socket"); return -1; } @@ -636,6 +646,11 @@ qio_channel_socket_set_blocking(QIOChannel *ioc, qemu_set_block(sioc->fd); } else { qemu_set_nonblock(sioc->fd); +#ifdef WIN32 + WSAEventSelect(sioc->fd, ioc->event, + FD_READ | FD_ACCEPT | FD_CLOSE | + FD_CONNECT | FD_WRITE | FD_OOB); +#endif } return 0; } @@ -671,13 +686,18 @@ qio_channel_socket_close(QIOChannel *ioc, { QIOChannelSocket *sioc = QIO_CHANNEL_SOCKET(ioc); - if (closesocket(sioc->fd) < 0) { + if (sioc->fd != -1) { +#ifdef WIN32 + WSAEventSelect(sioc->fd, NULL, 0); +#endif + if (closesocket(sioc->fd) < 0) { + sioc->fd = -1; + error_setg_errno(errp, errno, + "Unable to close socket"); + return -1; + } sioc->fd = -1; - error_setg_errno(errp, socket_error(), - "Unable to close socket"); - return -1; } - sioc->fd = -1; return 0; } @@ -703,7 +723,7 @@ qio_channel_socket_shutdown(QIOChannel *ioc, } if (shutdown(sioc->fd, sockhow) < 0) { - error_setg_errno(errp, socket_error(), + error_setg_errno(errp, errno, "Unable to shutdown socket"); return -1; } @@ -714,9 +734,9 @@ static GSource *qio_channel_socket_create_watch(QIOChannel *ioc, GIOCondition condition) { QIOChannelSocket *sioc = QIO_CHANNEL_SOCKET(ioc); - return qio_channel_create_fd_watch(ioc, - sioc->fd, - condition); + return qio_channel_create_socket_watch(ioc, + sioc->fd, + condition); } static void qio_channel_socket_class_init(ObjectClass *klass, diff --git a/io/channel-watch.c b/io/channel-watch.c index 931fa4d..cf1cdff 100644 --- a/io/channel-watch.c +++ b/io/channel-watch.c @@ -30,6 +30,20 @@ struct QIOChannelFDSource { }; +#ifdef CONFIG_WIN32 +typedef struct QIOChannelSocketSource QIOChannelSocketSource; +struct QIOChannelSocketSource { + GSource parent; + GPollFD fd; + QIOChannel *ioc; + SOCKET socket; + int revents; + GIOCondition condition; +}; + +#endif + + typedef struct QIOChannelFDPairSource QIOChannelFDPairSource; struct QIOChannelFDPairSource { GSource parent; @@ -82,6 +96,97 @@ qio_channel_fd_source_finalize(GSource *source) } +#ifdef CONFIG_WIN32 +static gboolean +qio_channel_socket_source_prepare(GSource *source G_GNUC_UNUSED, + gint *timeout) +{ + *timeout = -1; + + return FALSE; +} + + +/* + * NB, this impl only works when the socket is in non-blocking + * mode on Win32 + */ +static gboolean +qio_channel_socket_source_check(GSource *source) +{ + static struct timeval tv0; + + QIOChannelSocketSource *ssource = (QIOChannelSocketSource *)source; + WSANETWORKEVENTS ev; + fd_set rfds, wfds, xfds; + + if (!ssource->condition) { + return 0; + } + + WSAEnumNetworkEvents(ssource->socket, ssource->ioc->event, &ev); + + FD_ZERO(&rfds); + FD_ZERO(&wfds); + FD_ZERO(&xfds); + if (ssource->condition & G_IO_IN) { + FD_SET((SOCKET)ssource->socket, &rfds); + } + if (ssource->condition & G_IO_OUT) { + FD_SET((SOCKET)ssource->socket, &wfds); + } + if (ssource->condition & G_IO_PRI) { + FD_SET((SOCKET)ssource->socket, &xfds); + } + ssource->revents = 0; + if (select(0, &rfds, &wfds, &xfds, &tv0) == 0) { + return 0; + } + + if (FD_ISSET(ssource->socket, &rfds)) { + ssource->revents |= G_IO_IN; + } + if (FD_ISSET(ssource->socket, &wfds)) { + ssource->revents |= G_IO_OUT; + } + if (FD_ISSET(ssource->socket, &xfds)) { + ssource->revents |= G_IO_PRI; + } + + return ssource->revents; +} + + +static gboolean +qio_channel_socket_source_dispatch(GSource *source, + GSourceFunc callback, + gpointer user_data) +{ + QIOChannelFunc func = (QIOChannelFunc)callback; + QIOChannelSocketSource *ssource = (QIOChannelSocketSource *)source; + + return (*func)(ssource->ioc, ssource->revents, user_data); +} + + +static void +qio_channel_socket_source_finalize(GSource *source) +{ + QIOChannelSocketSource *ssource = (QIOChannelSocketSource *)source; + + object_unref(OBJECT(ssource->ioc)); +} + + +GSourceFuncs qio_channel_socket_source_funcs = { + qio_channel_socket_source_prepare, + qio_channel_socket_source_check, + qio_channel_socket_source_dispatch, + qio_channel_socket_source_finalize +}; +#endif + + static gboolean qio_channel_fd_pair_source_prepare(GSource *source G_GNUC_UNUSED, gint *timeout) @@ -160,7 +265,11 @@ GSource *qio_channel_create_fd_watch(QIOChannel *ioc, ssource->condition = condition; +#ifdef CONFIG_WIN32 + ssource->fd.fd = (gint64)_get_osfhandle(fd); +#else ssource->fd.fd = fd; +#endif ssource->fd.events = condition; g_source_add_poll(source, &ssource->fd); @@ -168,6 +277,40 @@ GSource *qio_channel_create_fd_watch(QIOChannel *ioc, return source; } +#ifdef CONFIG_WIN32 +GSource *qio_channel_create_socket_watch(QIOChannel *ioc, + int socket, + GIOCondition condition) +{ + GSource *source; + QIOChannelSocketSource *ssource; + + source = g_source_new(&qio_channel_socket_source_funcs, + sizeof(QIOChannelSocketSource)); + ssource = (QIOChannelSocketSource *)source; + + ssource->ioc = ioc; + object_ref(OBJECT(ioc)); + + ssource->condition = condition; + ssource->socket = socket; + ssource->revents = 0; + + ssource->fd.fd = (gintptr)ioc->event; + ssource->fd.events = G_IO_IN; + + g_source_add_poll(source, &ssource->fd); + + return source; +} +#else +GSource *qio_channel_create_socket_watch(QIOChannel *ioc, + int socket, + GIOCondition condition) +{ + return qio_channel_create_fd_watch(ioc, socket, condition); +} +#endif GSource *qio_channel_create_fd_pair_watch(QIOChannel *ioc, int fdread, @@ -186,10 +329,15 @@ GSource *qio_channel_create_fd_pair_watch(QIOChannel *ioc, ssource->condition = condition; +#ifdef CONFIG_WIN32 + ssource->fdread.fd = (gint64)_get_osfhandle(fdread); + ssource->fdwrite.fd = (gint64)_get_osfhandle(fdwrite); +#else ssource->fdread.fd = fdread; - ssource->fdread.events = condition & G_IO_IN; - ssource->fdwrite.fd = fdwrite; +#endif + + ssource->fdread.events = condition & G_IO_IN; ssource->fdwrite.events = condition & G_IO_OUT; g_source_add_poll(source, &ssource->fdread); diff --git a/io/channel.c b/io/channel.c index 3fc09f8..dd6fc0e 100644 --- a/io/channel.c +++ b/io/channel.c @@ -274,10 +274,24 @@ void qio_channel_wait(QIOChannel *ioc, } +#ifdef _WIN32 +static void qio_channel_finalize(Object *obj) +{ + QIOChannel *ioc = QIO_CHANNEL(obj); + + if (ioc->event) { + CloseHandle(ioc->event); + } +} +#endif + static const TypeInfo qio_channel_info = { .parent = TYPE_OBJECT, .name = TYPE_QIO_CHANNEL, .instance_size = sizeof(QIOChannel), +#ifdef _WIN32 + .instance_finalize = qio_channel_finalize, +#endif .abstract = true, .class_size = sizeof(QIOChannelClass), }; diff --git a/linux-user/flatload.c b/linux-user/flatload.c index a25c797..f9139c3 100644 --- a/linux-user/flatload.c +++ b/linux-user/flatload.c @@ -38,7 +38,6 @@ #include "qemu.h" #include "flat.h" -#define ntohl(x) be32_to_cpu(x) #include <target_flat.h> //#define DEBUG diff --git a/migration/migration.c b/migration/migration.c index 7d13377..034a918 100644 --- a/migration/migration.c +++ b/migration/migration.c @@ -706,7 +706,7 @@ void qmp_migrate_set_capabilities(MigrationCapabilityStatusList *params, */ error_report("Postcopy is not currently compatible with " "compression"); - s->enabled_capabilities[MIGRATION_CAPABILITY_X_POSTCOPY_RAM] = + s->enabled_capabilities[MIGRATION_CAPABILITY_POSTCOPY_RAM] = false; } } @@ -1125,7 +1125,7 @@ bool migrate_postcopy_ram(void) s = migrate_get_current(); - return s->enabled_capabilities[MIGRATION_CAPABILITY_X_POSTCOPY_RAM]; + return s->enabled_capabilities[MIGRATION_CAPABILITY_POSTCOPY_RAM]; } bool migrate_auto_converge(void) @@ -1269,8 +1269,7 @@ static void *source_return_path_thread(void *opaque) MigrationState *ms = opaque; QEMUFile *rp = ms->rp_state.from_dst_file; uint16_t header_len, header_type; - const int max_len = 512; - uint8_t buf[max_len]; + uint8_t buf[512]; uint32_t tmp32, sibling_error; ram_addr_t start = 0; /* =0 to silence warning */ size_t len = 0, expected_len; @@ -1293,7 +1292,7 @@ static void *source_return_path_thread(void *opaque) if ((rp_cmd_args[header_type].len != -1 && header_len != rp_cmd_args[header_type].len) || - header_len > max_len) { + header_len > sizeof(buf)) { error_report("RP: Received '%s' message (0x%04x) with" "incorrect length %d expecting %zu", rp_cmd_args[header_type].name, header_type, header_len, diff --git a/migration/qemu-file-unix.c b/migration/qemu-file-unix.c index 61b059b..4474e18 100644 --- a/migration/qemu-file-unix.c +++ b/migration/qemu-file-unix.c @@ -53,18 +53,16 @@ static ssize_t socket_writev_buffer(void *opaque, struct iovec *iov, int iovcnt, } if (size > 0) { - err = socket_error(); - - if (err != EAGAIN && err != EWOULDBLOCK) { + if (errno != EAGAIN && errno != EWOULDBLOCK) { error_report("socket_writev_buffer: Got err=%d for (%zu/%zu)", - err, (size_t)size, (size_t)len); + errno, (size_t)size, (size_t)len); /* * If I've already sent some but only just got the error, I * could return the amount validly sent so far and wait for the * next call to report the error, but I'd rather flag the error * immediately. */ - return -err; + return -errno; } /* Emulate blocking */ @@ -99,15 +97,15 @@ static ssize_t socket_get_buffer(void *opaque, uint8_t *buf, int64_t pos, if (len != -1) { break; } - if (socket_error() == EAGAIN) { + if (errno == EAGAIN) { yield_until_fd_readable(s->fd); - } else if (socket_error() != EINTR) { + } else if (errno != EINTR) { break; } } if (len == -1) { - len = -socket_error(); + len = -errno; } return len; } diff --git a/migration/savevm.c b/migration/savevm.c index 96e7db5..0a33c22 100644 --- a/migration/savevm.c +++ b/migration/savevm.c @@ -1494,17 +1494,22 @@ static int loadvm_postcopy_handle_listen(MigrationIncomingState *mis) qemu_sem_init(&mis->listen_thread_sem, 0); qemu_thread_create(&mis->listen_thread, "postcopy/listen", postcopy_ram_listen_thread, mis->from_src_file, - QEMU_THREAD_JOINABLE); + QEMU_THREAD_DETACHED); qemu_sem_wait(&mis->listen_thread_sem); qemu_sem_destroy(&mis->listen_thread_sem); return 0; } + +typedef struct { + QEMUBH *bh; +} HandleRunBhData; + static void loadvm_postcopy_handle_run_bh(void *opaque) { Error *local_err = NULL; - MigrationIncomingState *mis = opaque; + HandleRunBhData *data = opaque; /* TODO we should move all of this lot into postcopy_ram.c or a shared code * in migration.c @@ -1532,13 +1537,15 @@ static void loadvm_postcopy_handle_run_bh(void *opaque) runstate_set(RUN_STATE_PAUSED); } - qemu_bh_delete(mis->bh); + qemu_bh_delete(data->bh); + g_free(data); } /* After all discards we can start running and asking for pages */ static int loadvm_postcopy_handle_run(MigrationIncomingState *mis) { PostcopyState ps = postcopy_state_set(POSTCOPY_INCOMING_RUNNING); + HandleRunBhData *data; trace_loadvm_postcopy_handle_run(); if (ps != POSTCOPY_INCOMING_LISTENING) { @@ -1546,8 +1553,9 @@ static int loadvm_postcopy_handle_run(MigrationIncomingState *mis) return -1; } - mis->bh = qemu_bh_new(loadvm_postcopy_handle_run_bh, NULL); - qemu_bh_schedule(mis->bh); + data = g_new(HandleRunBhData, 1); + data->bh = qemu_bh_new(loadvm_postcopy_handle_run_bh, data); + qemu_bh_schedule(data->bh); /* We need to finish reading the stream from the package * and also stop reading anything more from the stream that loaded the diff --git a/migration/tcp.c b/migration/tcp.c index e888a4e..e1fa7f8 100644 --- a/migration/tcp.c +++ b/migration/tcp.c @@ -59,12 +59,11 @@ static void tcp_accept_incoming_migration(void *opaque) socklen_t addrlen = sizeof(addr); int s = (intptr_t)opaque; QEMUFile *f; - int c, err; + int c; do { c = qemu_accept(s, (struct sockaddr *)&addr, &addrlen); - err = socket_error(); - } while (c < 0 && err == EINTR); + } while (c < 0 && errno == EINTR); qemu_set_fd_handler(s, NULL, NULL, NULL); closesocket(s); @@ -72,7 +71,7 @@ static void tcp_accept_incoming_migration(void *opaque) if (c < 0) { error_report("could not accept migration connection (%s)", - strerror(err)); + strerror(errno)); return; } diff --git a/net/socket.c b/net/socket.c index e32e3cb..73dc49a 100644 --- a/net/socket.c +++ b/net/socket.c @@ -145,15 +145,14 @@ static void net_socket_send_completed(NetClientState *nc, ssize_t len) static void net_socket_send(void *opaque) { NetSocketState *s = opaque; - int size, err; + int size; unsigned l; uint8_t buf1[NET_BUFSIZE]; const uint8_t *buf; size = qemu_recv(s->fd, buf1, sizeof(buf1), 0); if (size < 0) { - err = socket_error(); - if (err != EWOULDBLOCK) + if (errno != EWOULDBLOCK) goto eoc; } else if (size == 0) { /* end of connection */ @@ -566,7 +565,7 @@ static int net_socket_connect_init(NetClientState *peer, const char *host_str) { NetSocketState *s; - int fd, connected, ret, err; + int fd, connected, ret; struct sockaddr_in saddr; if (parse_host_port(&saddr, host_str) < 0) @@ -583,14 +582,12 @@ static int net_socket_connect_init(NetClientState *peer, for(;;) { ret = connect(fd, (struct sockaddr *)&saddr, sizeof(saddr)); if (ret < 0) { - err = socket_error(); - if (err == EINTR || err == EWOULDBLOCK) { - } else if (err == EINPROGRESS) { - break; -#ifdef _WIN32 - } else if (err == WSAEALREADY || err == WSAEINVAL) { + if (errno == EINTR || errno == EWOULDBLOCK) { + /* continue */ + } else if (errno == EINPROGRESS || + errno == EALREADY || + errno == EINVAL) { break; -#endif } else { perror("connect"); closesocket(fd); diff --git a/pc-bios/s390-ccw/bootmap.c b/pc-bios/s390-ccw/bootmap.c index 415508b..4925302 100644 --- a/pc-bios/s390-ccw/bootmap.c +++ b/pc-bios/s390-ccw/bootmap.c @@ -424,7 +424,7 @@ static void ipl_scsi(void) IPL_assert(magic_match(sec, ZIPL_MAGIC), "No zIPL magic"); ns_end = sec + virtio_get_block_size(); - for (ns = (sec + pte_len); (ns + pte_len) < ns_end; ns++) { + for (ns = (sec + pte_len); (ns + pte_len) < ns_end; ns += pte_len) { prog_table_entry = (ScsiBlockPtr *)ns; if (!prog_table_entry->blockno) { break; diff --git a/qapi-schema.json b/qapi-schema.json index 362c9d8..6269c37 100644 --- a/qapi-schema.json +++ b/qapi-schema.json @@ -540,15 +540,15 @@ # @auto-converge: If enabled, QEMU will automatically throttle down the guest # to speed up convergence of RAM migration. (since 1.6) # -# @x-postcopy-ram: Start executing on the migration target before all of RAM has +# @postcopy-ram: Start executing on the migration target before all of RAM has # been migrated, pulling the remaining pages along as needed. NOTE: If -# the migration fails during postcopy the VM will fail. (since 2.5) +# the migration fails during postcopy the VM will fail. (since 2.6) # # Since: 1.2 ## { 'enum': 'MigrationCapability', 'data': ['xbzrle', 'rdma-pin-all', 'auto-converge', 'zero-blocks', - 'compress', 'events', 'x-postcopy-ram'] } + 'compress', 'events', 'postcopy-ram'] } ## # @MigrationCapabilityStatus @@ -705,7 +705,7 @@ # @migrate-start-postcopy # # Followup to a migration command to switch the migration to postcopy mode. -# The x-postcopy-ram capability must be set before the original migration +# The postcopy-ram capability must be set before the original migration # command. # # Since: 2.5 diff --git a/qemu-char.c b/qemu-char.c index e0147f3..3bf30b5 100644 --- a/qemu-char.c +++ b/qemu-char.c @@ -3091,20 +3091,6 @@ static void tcp_chr_close(CharDriverState *chr) qemu_chr_be_event(chr, CHR_EVENT_CLOSED); } -static void qemu_chr_finish_socket_connection(CharDriverState *chr, - QIOChannelSocket *sioc) -{ - TCPCharDriver *s = chr->opaque; - - if (s->is_listen) { - s->listen_ioc = sioc; - s->listen_tag = qio_channel_add_watch( - QIO_CHANNEL(s->listen_ioc), G_IO_IN, tcp_chr_accept, chr, NULL); - } else { - tcp_chr_new_client(chr, sioc); - object_unref(OBJECT(sioc)); - } -} static void qemu_chr_socket_connected(Object *src, Error *err, void *opaque) { @@ -3119,37 +3105,11 @@ static void qemu_chr_socket_connected(Object *src, Error *err, void *opaque) } s->connect_err_reported = false; - qemu_chr_finish_socket_connection(chr, sioc); -} - -static bool qemu_chr_open_socket_fd(CharDriverState *chr, Error **errp) -{ - TCPCharDriver *s = chr->opaque; - QIOChannelSocket *sioc = qio_channel_socket_new(); - - if (s->is_listen) { - if (qio_channel_socket_listen_sync(sioc, s->addr, errp) < 0) { - goto fail; - } - qemu_chr_finish_socket_connection(chr, sioc); - } else if (s->reconnect_time) { - qio_channel_socket_connect_async(sioc, s->addr, - qemu_chr_socket_connected, - chr, NULL); - } else { - if (qio_channel_socket_connect_sync(sioc, s->addr, errp) < 0) { - goto fail; - } - qemu_chr_finish_socket_connection(chr, sioc); - } - - return true; - - fail: + tcp_chr_new_client(chr, sioc); object_unref(OBJECT(sioc)); - return false; } + /*********************************************************/ /* Ring buffer chardev */ @@ -4258,19 +4218,11 @@ static CharDriverState *qmp_chardev_open_parallel(const char *id, #endif /* WIN32 */ -static void socket_try_connect(CharDriverState *chr) -{ - Error *err = NULL; - - if (!qemu_chr_open_socket_fd(chr, &err)) { - check_report_connect_error(chr, err); - } -} - static gboolean socket_reconnect_timeout(gpointer opaque) { CharDriverState *chr = opaque; TCPCharDriver *s = chr->opaque; + QIOChannelSocket *sioc; s->reconnect_timer = 0; @@ -4278,7 +4230,10 @@ static gboolean socket_reconnect_timeout(gpointer opaque) return false; } - socket_try_connect(chr); + sioc = qio_channel_socket_new(); + qio_channel_socket_connect_async(sioc, s->addr, + qemu_chr_socket_connected, + chr, NULL); return false; } @@ -4298,6 +4253,7 @@ static CharDriverState *qmp_chardev_open_socket(const char *id, bool is_waitconnect = sock->has_wait ? sock->wait : false; int64_t reconnect = sock->has_reconnect ? sock->reconnect : 0; ChardevCommon *common = qapi_ChardevSocket_base(sock); + QIOChannelSocket *sioc = NULL; chr = qemu_chr_alloc(common, errp); if (!chr) { @@ -4367,22 +4323,40 @@ static CharDriverState *qmp_chardev_open_socket(const char *id, s->reconnect_time = reconnect; } + sioc = qio_channel_socket_new(); if (s->reconnect_time) { - socket_try_connect(chr); - } else if (!qemu_chr_open_socket_fd(chr, errp)) { - goto error; - } - - if (is_listen && is_waitconnect) { - fprintf(stderr, "QEMU waiting for connection on: %s\n", - chr->filename); - tcp_chr_accept(QIO_CHANNEL(s->listen_ioc), G_IO_IN, chr); + qio_channel_socket_connect_async(sioc, s->addr, + qemu_chr_socket_connected, + chr, NULL); + } else if (s->is_listen) { + if (qio_channel_socket_listen_sync(sioc, s->addr, errp) < 0) { + goto error; + } + s->listen_ioc = sioc; + if (is_waitconnect) { + fprintf(stderr, "QEMU waiting for connection on: %s\n", + chr->filename); + tcp_chr_accept(QIO_CHANNEL(s->listen_ioc), G_IO_IN, chr); + } qio_channel_set_blocking(QIO_CHANNEL(s->listen_ioc), false, NULL); + if (!s->ioc) { + s->listen_tag = qio_channel_add_watch( + QIO_CHANNEL(s->listen_ioc), G_IO_IN, tcp_chr_accept, chr, NULL); + } + } else { + if (qio_channel_socket_connect_sync(sioc, s->addr, errp) < 0) { + goto error; + } + tcp_chr_new_client(chr, sioc); + object_unref(OBJECT(sioc)); } return chr; error: + if (sioc) { + object_unref(OBJECT(sioc)); + } if (s->tls_creds) { object_unref(OBJECT(s->tls_creds)); } diff --git a/qmp-commands.hx b/qmp-commands.hx index b629673..9e05365 100644 --- a/qmp-commands.hx +++ b/qmp-commands.hx @@ -3683,7 +3683,7 @@ Enable/Disable migration capabilities - "zero-blocks": compress zero blocks during block migration - "compress": use multiple compression threads to accelerate live migration - "events": generate events for each migration state change -- "x-postcopy-ram": postcopy mode for live migration +- "postcopy-ram": postcopy mode for live migration Arguments: @@ -3713,7 +3713,7 @@ Query current migration capabilities - "zero-blocks" : Zero Blocks state (json-bool) - "compress": Multiple compression threads state (json-bool) - "events": Migration state change event state (json-bool) - - "x-postcopy-ram": postcopy ram state (json-bool) + - "postcopy-ram": postcopy ram state (json-bool) Arguments: @@ -3727,7 +3727,7 @@ Example: {"state": false, "capability": "zero-blocks"}, {"state": false, "capability": "compress"}, {"state": true, "capability": "events"}, - {"state": false, "capability": "x-postcopy-ram"} + {"state": false, "capability": "postcopy-ram"} ]} EQMP diff --git a/slirp/slirp.h b/slirp/slirp.h index 07c13b4..a6741e7 100644 --- a/slirp/slirp.h +++ b/slirp/slirp.h @@ -14,8 +14,6 @@ typedef char *caddr_t; # include <iphlpapi.h> #else -# define ioctlsocket ioctl -# define closesocket(s) close(s) # if !defined(__HAIKU__) # define O_BINARY 0 # endif diff --git a/slirp/tcp_input.c b/slirp/tcp_input.c index 2027a75..03be56e 100644 --- a/slirp/tcp_input.c +++ b/slirp/tcp_input.c @@ -586,11 +586,7 @@ findso: } if ((tcp_fconnect(so, so->so_ffamily) == -1) && -#if defined(_WIN32) - socket_error() != WSAEWOULDBLOCK -#else (errno != EINPROGRESS) && (errno != EWOULDBLOCK) -#endif ) { u_char code=ICMP_UNREACH_NET; DEBUG_MISC((dfd, " tcp fconnect errno = %d-%s\n", diff --git a/target-s390x/cpu-qom.h b/target-s390x/cpu-qom.h index 029a44a..1c90933 100644 --- a/target-s390x/cpu-qom.h +++ b/target-s390x/cpu-qom.h @@ -47,6 +47,8 @@ typedef struct S390CPUClass { CPUClass parent_class; /*< public >*/ + int64_t next_cpu_id; + DeviceRealize parent_realize; void (*parent_reset)(CPUState *cpu); void (*load_normal)(CPUState *cpu); @@ -66,6 +68,7 @@ typedef struct S390CPU { /*< public >*/ CPUS390XState env; + int64_t id; /* needed for live migration */ void *irqstate; uint32_t irqstate_saved_size; diff --git a/target-s390x/cpu.c b/target-s390x/cpu.c index 73a910d..1cbf703 100644 --- a/target-s390x/cpu.c +++ b/target-s390x/cpu.c @@ -30,8 +30,11 @@ #include "qemu/error-report.h" #include "hw/hw.h" #include "trace.h" +#include "qapi/visitor.h" #ifndef CONFIG_USER_ONLY #include "sysemu/arch_init.h" +#include "sysemu/sysemu.h" +#include "hw/s390x/sclp.h" #endif #define CR0_RESET 0xE0UL @@ -195,7 +198,39 @@ static void s390_cpu_realizefn(DeviceState *dev, Error **errp) { CPUState *cs = CPU(dev); S390CPUClass *scc = S390_CPU_GET_CLASS(dev); + S390CPU *cpu = S390_CPU(dev); + CPUS390XState *env = &cpu->env; + Error *err = NULL; + +#if !defined(CONFIG_USER_ONLY) + if (cpu->id >= max_cpus) { + error_setg(&err, "Unable to add CPU: %" PRIi64 + ", max allowed: %d", cpu->id, max_cpus - 1); + goto out; + } +#endif + if (cpu_exists(cpu->id)) { + error_setg(&err, "Unable to add CPU: %" PRIi64 + ", it already exists", cpu->id); + goto out; + } + if (cpu->id != scc->next_cpu_id) { + error_setg(&err, "Unable to add CPU: %" PRIi64 + ", The next available id is %" PRIi64, cpu->id, + scc->next_cpu_id); + goto out; + } + + cpu_exec_init(cs, &err); + if (err != NULL) { + goto out; + } + scc->next_cpu_id++; +#if !defined(CONFIG_USER_ONLY) + qemu_register_reset(s390_cpu_machine_reset_cb, cpu); +#endif + env->cpu_num = cpu->id; s390_cpu_gdb_init(cs); qemu_init_vcpu(cs); #if !defined(CONFIG_USER_ONLY) @@ -204,7 +239,55 @@ static void s390_cpu_realizefn(DeviceState *dev, Error **errp) cpu_reset(cs); #endif - scc->parent_realize(dev, errp); + scc->parent_realize(dev, &err); + +#if !defined(CONFIG_USER_ONLY) + if (dev->hotplugged) { + raise_irq_cpu_hotplug(); + } +#endif + +out: + error_propagate(errp, err); +} + +static void s390x_cpu_get_id(Object *obj, Visitor *v, const char *name, + void *opaque, Error **errp) +{ + S390CPU *cpu = S390_CPU(obj); + int64_t value = cpu->id; + + visit_type_int(v, name, &value, errp); +} + +static void s390x_cpu_set_id(Object *obj, Visitor *v, const char *name, + void *opaque, Error **errp) +{ + S390CPU *cpu = S390_CPU(obj); + DeviceState *dev = DEVICE(obj); + const int64_t min = 0; + const int64_t max = UINT32_MAX; + Error *err = NULL; + int64_t value; + + if (dev->realized) { + error_setg(errp, "Attempt to set property '%s' on '%s' after " + "it was realized", name, object_get_typename(obj)); + return; + } + + visit_type_int(v, name, &value, &err); + if (err) { + error_propagate(errp, err); + return; + } + if (value < min || value > max) { + error_setg(errp, "Property %s.%s doesn't take value %" PRId64 + " (minimum: %" PRId64 ", maximum: %" PRId64 ")" , + object_get_typename(obj), name, value, min, max); + return; + } + cpu->id = value; } static void s390_cpu_initfn(Object *obj) @@ -213,15 +296,16 @@ static void s390_cpu_initfn(Object *obj) S390CPU *cpu = S390_CPU(obj); CPUS390XState *env = &cpu->env; static bool inited; - static int cpu_num = 0; #if !defined(CONFIG_USER_ONLY) struct tm tm; #endif cs->env_ptr = env; - cpu_exec_init(cs, &error_abort); + cs->halted = 1; + cs->exception_index = EXCP_HLT; + object_property_add(OBJECT(cpu), "id", "int64_t", s390x_cpu_get_id, + s390x_cpu_set_id, NULL, NULL, NULL); #if !defined(CONFIG_USER_ONLY) - qemu_register_reset(s390_cpu_machine_reset_cb, cpu); qemu_get_timedate(&tm, 0); env->tod_offset = TOD_UNIX_EPOCH + (time2tod(mktimegm(&tm)) * 1000000000ULL); @@ -230,7 +314,6 @@ static void s390_cpu_initfn(Object *obj) env->cpu_timer = timer_new_ns(QEMU_CLOCK_VIRTUAL, s390x_cpu_timer, cpu); s390_cpu_set_state(CPU_STATE_STOPPED, cpu); #endif - env->cpu_num = cpu_num++; if (tcg_enabled() && !inited) { inited = true; @@ -337,6 +420,7 @@ static void s390_cpu_class_init(ObjectClass *oc, void *data) CPUClass *cc = CPU_CLASS(scc); DeviceClass *dc = DEVICE_CLASS(oc); + scc->next_cpu_id = 0; scc->parent_realize = dc->realize; dc->realize = s390_cpu_realizefn; @@ -369,7 +453,7 @@ static void s390_cpu_class_init(ObjectClass *oc, void *data) cc->gdb_arch_name = s390_gdb_arch_name; /* - * Reason: s390_cpu_initfn() calls cpu_exec_init(), which saves + * Reason: s390_cpu_realizefn() calls cpu_exec_init(), which saves * the object in cpus -> dangling pointer after final * object_unref(). */ diff --git a/target-s390x/cpu.h b/target-s390x/cpu.h index 49c8415..6d97c08 100644 --- a/target-s390x/cpu.h +++ b/target-s390x/cpu.h @@ -413,6 +413,8 @@ void trigger_pgm_exception(CPUS390XState *env, uint32_t code, uint32_t ilen); #endif S390CPU *cpu_s390x_init(const char *cpu_model); +S390CPU *s390x_new_cpu(const char *cpu_model, int64_t id, Error **errp); +S390CPU *cpu_s390x_create(const char *cpu_model, Error **errp); void s390x_translate_init(void); int cpu_s390x_exec(CPUState *cpu); diff --git a/target-s390x/helper.c b/target-s390x/helper.c index 838bdd9..76d5fbe 100644 --- a/target-s390x/helper.c +++ b/target-s390x/helper.c @@ -65,14 +65,51 @@ void s390x_cpu_timer(void *opaque) } #endif -S390CPU *cpu_s390x_init(const char *cpu_model) +S390CPU *cpu_s390x_create(const char *cpu_model, Error **errp) { S390CPU *cpu; cpu = S390_CPU(object_new(TYPE_S390_CPU)); - object_property_set_bool(OBJECT(cpu), true, "realized", NULL); + return cpu; +} + +S390CPU *s390x_new_cpu(const char *cpu_model, int64_t id, Error **errp) +{ + S390CPU *cpu; + Error *err = NULL; + + cpu = cpu_s390x_create(cpu_model, &err); + if (err != NULL) { + goto out; + } + + object_property_set_int(OBJECT(cpu), id, "id", &err); + if (err != NULL) { + goto out; + } + object_property_set_bool(OBJECT(cpu), true, "realized", &err); +out: + if (err) { + error_propagate(errp, err); + object_unref(OBJECT(cpu)); + cpu = NULL; + } + return cpu; +} + +S390CPU *cpu_s390x_init(const char *cpu_model) +{ + Error *err = NULL; + S390CPU *cpu; + /* Use to track CPU ID for linux-user only */ + static int64_t next_cpu_id; + + cpu = s390x_new_cpu(cpu_model, next_cpu_id++, &err); + if (err) { + error_report_err(err); + } return cpu; } diff --git a/tests/io-channel-helpers.c b/tests/io-channel-helpers.c index 8440669..a4dedbe 100644 --- a/tests/io-channel-helpers.c +++ b/tests/io-channel-helpers.c @@ -132,7 +132,7 @@ static gpointer test_io_thread_reader(gpointer opaque) if (ret == QIO_CHANNEL_ERR_BLOCK) { if (data->blocking) { - error_setg(&data->writeerr, + error_setg(&data->readerr, "Unexpected I/O blocking"); break; } else { @@ -233,11 +233,11 @@ void qio_channel_test_run_reader(QIOChannelTest *test, void qio_channel_test_validate(QIOChannelTest *test) { + g_assert(test->readerr == NULL); + g_assert(test->writeerr == NULL); g_assert_cmpint(memcmp(test->input, test->output, test->len), ==, 0); - g_assert(test->readerr == NULL); - g_assert(test->writeerr == NULL); g_free(test->inputv); g_free(test->outputv); diff --git a/tests/test-io-channel-socket.c b/tests/test-io-channel-socket.c index 8a34056..ae665f5 100644 --- a/tests/test-io-channel-socket.c +++ b/tests/test-io-channel-socket.c @@ -22,66 +22,49 @@ #include "io/channel-socket.h" #include "io/channel-util.h" #include "io-channel-helpers.h" -#ifdef HAVE_IFADDRS_H -#include <ifaddrs.h> -#endif -static int check_protocol_support(bool *has_ipv4, bool *has_ipv6) +static int check_bind(struct sockaddr *sa, socklen_t salen, bool *has_proto) { -#ifdef HAVE_IFADDRS_H - struct ifaddrs *ifaddr = NULL, *ifa; - struct addrinfo hints = { 0 }; - struct addrinfo *ai = NULL; - int gaierr; - - *has_ipv4 = *has_ipv6 = false; + int fd; - if (getifaddrs(&ifaddr) < 0) { - g_printerr("Failed to lookup interface addresses: %s\n", - strerror(errno)); + fd = socket(sa->sa_family, SOCK_STREAM, 0); + if (fd < 0) { return -1; } - for (ifa = ifaddr; ifa != NULL; ifa = ifa->ifa_next) { - if (!ifa->ifa_addr) { - continue; - } - - if (ifa->ifa_addr->sa_family == AF_INET) { - *has_ipv4 = true; - } - if (ifa->ifa_addr->sa_family == AF_INET6) { - *has_ipv6 = true; + if (bind(fd, sa, salen) < 0) { + close(fd); + if (errno == EADDRNOTAVAIL) { + *has_proto = false; + return 0; } + return -1; } - freeifaddrs(ifaddr); - - hints.ai_flags = AI_PASSIVE | AI_ADDRCONFIG; - hints.ai_family = AF_INET6; - hints.ai_socktype = SOCK_STREAM; - - gaierr = getaddrinfo("::1", NULL, &hints, &ai); - if (gaierr != 0) { - if (gaierr == EAI_ADDRFAMILY || - gaierr == EAI_FAMILY || - gaierr == EAI_NONAME) { - *has_ipv6 = false; - } else { - g_printerr("Failed to resolve ::1 address: %s\n", - gai_strerror(gaierr)); - return -1; - } - } + close(fd); + *has_proto = true; + return 0; +} - freeaddrinfo(ai); +static int check_protocol_support(bool *has_ipv4, bool *has_ipv6) +{ + struct sockaddr_in sin = { + .sin_family = AF_INET, + .sin_addr = { .s_addr = htonl(INADDR_LOOPBACK) }, + }; + struct sockaddr_in6 sin6 = { + .sin6_family = AF_INET6, + .sin6_addr = IN6ADDR_LOOPBACK_INIT, + }; - return 0; -#else - *has_ipv4 = *has_ipv6 = false; + if (check_bind((struct sockaddr *)&sin, sizeof(sin), has_ipv4) < 0) { + return -1; + } + if (check_bind((struct sockaddr *)&sin6, sizeof(sin6), has_ipv6) < 0) { + return -1; + } - return -1; -#endif + return 0; } @@ -131,6 +114,7 @@ static void test_io_channel_setup_sync(SocketAddress *listen_addr, QIO_CHANNEL_SOCKET(*src), connect_addr, &error_abort); qio_channel_set_delay(*src, false); + qio_channel_wait(QIO_CHANNEL(lioc), G_IO_IN); *dst = QIO_CHANNEL(qio_channel_socket_accept(lioc, &error_abort)); g_assert(*dst); @@ -198,6 +182,7 @@ static void test_io_channel_setup_async(SocketAddress *listen_addr, g_assert(!data.err); + qio_channel_wait(QIO_CHANNEL(lioc), G_IO_IN); *dst = QIO_CHANNEL(qio_channel_socket_accept(lioc, &error_abort)); g_assert(*dst); @@ -487,10 +472,20 @@ static void test_io_channel_ipv4_fd(void) { QIOChannel *ioc; int fd = -1; + struct sockaddr_in sa = { + .sin_family = AF_INET, + .sin_addr = { + .s_addr = htonl(INADDR_LOOPBACK), + } + /* Leave port unset for auto-assign */ + }; + socklen_t salen = sizeof(sa); fd = socket(AF_INET, SOCK_STREAM, 0); g_assert_cmpint(fd, >, -1); + g_assert_cmpint(bind(fd, (struct sockaddr *)&sa, salen), ==, 0); + ioc = qio_channel_new_fd(fd, &error_abort); g_assert_cmpstr(object_get_typename(OBJECT(ioc)), @@ -506,6 +501,7 @@ int main(int argc, char **argv) bool has_ipv4, has_ipv6; module_call_init(MODULE_INIT_QOM); + socket_init(); g_test_init(&argc, &argv, NULL); diff --git a/trace-events b/trace-events index 5d0d483..0ad8a1c 100644 --- a/trace-events +++ b/trace-events @@ -1654,6 +1654,7 @@ vfio_msix_enable(const char *name) " (%s)" vfio_msix_pba_disable(const char *name) " (%s)" vfio_msix_pba_enable(const char *name) " (%s)" vfio_msix_disable(const char *name) " (%s)" +vfio_msix_fixup(const char *name, int bar, uint64_t start, uint64_t end) " (%s) MSI-X region %d mmap fixup [0x%"PRIx64" - 0x%"PRIx64"]" vfio_msi_enable(const char *name, int nr_vectors) " (%s) Enabled %d MSI vectors" vfio_msi_disable(const char *name) " (%s)" vfio_pci_load_rom(const char *name, unsigned long size, unsigned long offset, unsigned long flags) "Device %s ROM:\n size: 0x%lx, offset: 0x%lx, flags: 0x%lx" @@ -1672,7 +1673,6 @@ vfio_pci_hot_reset(const char *name, const char *type) " (%s) %s" vfio_pci_hot_reset_has_dep_devices(const char *name) "%s: hot reset dependent devices:" vfio_pci_hot_reset_dep_devices(int domain, int bus, int slot, int function, int group_id) "\t%04x:%02x:%02x.%x group %d" vfio_pci_hot_reset_result(const char *name, const char *result) "%s hot reset: %s" -vfio_populate_device_region(const char *region_name, int index, unsigned long size, unsigned long offset, unsigned long flags) "Device %s region %d:\n size: 0x%lx, offset: 0x%lx, flags: 0x%lx" vfio_populate_device_config(const char *name, unsigned long size, unsigned long offset, unsigned long flags) "Device %s config:\n size: 0x%lx, offset: 0x%lx, flags: 0x%lx" vfio_populate_device_get_irq_info_failure(void) "VFIO_DEVICE_GET_IRQ_INFO failure: %m" vfio_initfn(const char *name, int group_id) " (%s) group %d" @@ -1728,13 +1728,17 @@ vfio_disconnect_container(int fd) "close container->fd=%d" vfio_put_group(int fd) "close group->fd=%d" vfio_get_device(const char * name, unsigned int flags, unsigned int num_regions, unsigned int num_irqs) "Device %s flags: %u, regions: %u, irqs: %u" vfio_put_base_device(int fd) "close vdev->fd=%d" +vfio_region_setup(const char *dev, int index, const char *name, unsigned long flags, unsigned long offset, unsigned long size) "Device %s, region %d \"%s\", flags: %lx, offset: %lx, size: %lx" +vfio_region_mmap_fault(const char *name, int index, unsigned long offset, unsigned long size, int fault) "Region %s mmaps[%d], [%lx - %lx], fault: %d" +vfio_region_mmap(const char *name, unsigned long offset, unsigned long end) "Region %s [%lx - %lx]" +vfio_region_exit(const char *name, int index) "Device %s, region %d" +vfio_region_finalize(const char *name, int index) "Device %s, region %d" +vfio_region_mmaps_set_enabled(const char *name, bool enabled) "Region %s mmaps enabled: %d" # hw/vfio/platform.c -vfio_platform_populate_regions(int region_index, unsigned long flag, unsigned long size, int fd, unsigned long offset) "- region %d flags = 0x%lx, size = 0x%lx, fd= %d, offset = 0x%lx" vfio_platform_base_device_init(char *name, int groupid) "%s belongs to group #%d" vfio_platform_realize(char *name, char *compat) "vfio device %s, compat = %s" vfio_platform_eoi(int pin, int fd) "EOI IRQ pin %d (fd=%d)" -vfio_platform_mmap_set_enabled(bool enabled) "fast path = %d" vfio_platform_intp_mmap_enable(int pin) "IRQ #%d still active, stay in slow path" vfio_platform_intp_interrupt(int pin, int fd) "Inject IRQ #%d (fd = %d)" vfio_platform_intp_inject_pending_lockheld(int pin, int fd) "Inject pending IRQ #%d (fd = %d)" diff --git a/util/oslib-win32.c b/util/oslib-win32.c index 438cfa4..a3f0664 100644 --- a/util/oslib-win32.c +++ b/util/oslib-win32.c @@ -2,7 +2,7 @@ * os-win32.c * * Copyright (c) 2003-2008 Fabrice Bellard - * Copyright (c) 2010 Red Hat, Inc. + * Copyright (c) 2010-2016 Red Hat, Inc. * * QEMU library functions for win32 which are shared between QEMU and * the QEMU tools. @@ -144,6 +144,83 @@ int socket_set_fast_reuse(int fd) return 0; } + +static int socket_error(void) +{ + switch (WSAGetLastError()) { + case 0: + return 0; + case WSAEINTR: + return EINTR; + case WSAEINVAL: + return EINVAL; + case WSA_INVALID_HANDLE: + return EBADF; + case WSA_NOT_ENOUGH_MEMORY: + return ENOMEM; + case WSA_INVALID_PARAMETER: + return EINVAL; + case WSAENAMETOOLONG: + return ENAMETOOLONG; + case WSAENOTEMPTY: + return ENOTEMPTY; + case WSAEWOULDBLOCK: + /* not using EWOULDBLOCK as we don't want code to have + * to check both EWOULDBLOCK and EAGAIN */ + return EAGAIN; + case WSAEINPROGRESS: + return EINPROGRESS; + case WSAEALREADY: + return EALREADY; + case WSAENOTSOCK: + return ENOTSOCK; + case WSAEDESTADDRREQ: + return EDESTADDRREQ; + case WSAEMSGSIZE: + return EMSGSIZE; + case WSAEPROTOTYPE: + return EPROTOTYPE; + case WSAENOPROTOOPT: + return ENOPROTOOPT; + case WSAEPROTONOSUPPORT: + return EPROTONOSUPPORT; + case WSAEOPNOTSUPP: + return EOPNOTSUPP; + case WSAEAFNOSUPPORT: + return EAFNOSUPPORT; + case WSAEADDRINUSE: + return EADDRINUSE; + case WSAEADDRNOTAVAIL: + return EADDRNOTAVAIL; + case WSAENETDOWN: + return ENETDOWN; + case WSAENETUNREACH: + return ENETUNREACH; + case WSAENETRESET: + return ENETRESET; + case WSAECONNABORTED: + return ECONNABORTED; + case WSAECONNRESET: + return ECONNRESET; + case WSAENOBUFS: + return ENOBUFS; + case WSAEISCONN: + return EISCONN; + case WSAENOTCONN: + return ENOTCONN; + case WSAETIMEDOUT: + return ETIMEDOUT; + case WSAECONNREFUSED: + return ECONNREFUSED; + case WSAELOOP: + return ELOOP; + case WSAEHOSTUNREACH: + return EHOSTUNREACH; + default: + return EIO; + } +} + int inet_aton(const char *cp, struct in_addr *ia) { uint32_t addr = inet_addr(cp); @@ -504,3 +581,204 @@ pid_t qemu_fork(Error **errp) "cannot fork child process"); return -1; } + + +#undef connect +int qemu_connect_wrap(int sockfd, const struct sockaddr *addr, + socklen_t addrlen) +{ + int ret; + ret = connect(sockfd, addr, addrlen); + if (ret < 0) { + errno = socket_error(); + } + return ret; +} + + +#undef listen +int qemu_listen_wrap(int sockfd, int backlog) +{ + int ret; + ret = listen(sockfd, backlog); + if (ret < 0) { + errno = socket_error(); + } + return ret; +} + + +#undef bind +int qemu_bind_wrap(int sockfd, const struct sockaddr *addr, + socklen_t addrlen) +{ + int ret; + ret = bind(sockfd, addr, addrlen); + if (ret < 0) { + errno = socket_error(); + } + return ret; +} + + +#undef socket +int qemu_socket_wrap(int domain, int type, int protocol) +{ + int ret; + ret = socket(domain, type, protocol); + if (ret < 0) { + errno = socket_error(); + } + return ret; +} + + +#undef accept +int qemu_accept_wrap(int sockfd, struct sockaddr *addr, + socklen_t *addrlen) +{ + int ret; + ret = accept(sockfd, addr, addrlen); + if (ret < 0) { + errno = socket_error(); + } + return ret; +} + + +#undef shutdown +int qemu_shutdown_wrap(int sockfd, int how) +{ + int ret; + ret = shutdown(sockfd, how); + if (ret < 0) { + errno = socket_error(); + } + return ret; +} + + +#undef ioctlsocket +int qemu_ioctlsocket_wrap(int fd, int req, void *val) +{ + int ret; + ret = ioctlsocket(fd, req, val); + if (ret < 0) { + errno = socket_error(); + } + return ret; +} + + +#undef closesocket +int qemu_closesocket_wrap(int fd) +{ + int ret; + ret = closesocket(fd); + if (ret < 0) { + errno = socket_error(); + } + return ret; +} + + +#undef getsockopt +int qemu_getsockopt_wrap(int sockfd, int level, int optname, + void *optval, socklen_t *optlen) +{ + int ret; + ret = getsockopt(sockfd, level, optname, optval, optlen); + if (ret < 0) { + errno = socket_error(); + } + return ret; +} + + +#undef setsockopt +int qemu_setsockopt_wrap(int sockfd, int level, int optname, + const void *optval, socklen_t optlen) +{ + int ret; + ret = setsockopt(sockfd, level, optname, optval, optlen); + if (ret < 0) { + errno = socket_error(); + } + return ret; +} + + +#undef getpeername +int qemu_getpeername_wrap(int sockfd, struct sockaddr *addr, + socklen_t *addrlen) +{ + int ret; + ret = getpeername(sockfd, addr, addrlen); + if (ret < 0) { + errno = socket_error(); + } + return ret; +} + + +#undef getsockname +int qemu_getsockname_wrap(int sockfd, struct sockaddr *addr, + socklen_t *addrlen) +{ + int ret; + ret = getsockname(sockfd, addr, addrlen); + if (ret < 0) { + errno = socket_error(); + } + return ret; +} + + +#undef send +ssize_t qemu_send_wrap(int sockfd, const void *buf, size_t len, int flags) +{ + int ret; + ret = send(sockfd, buf, len, flags); + if (ret < 0) { + errno = socket_error(); + } + return ret; +} + + +#undef sendto +ssize_t qemu_sendto_wrap(int sockfd, const void *buf, size_t len, int flags, + const struct sockaddr *addr, socklen_t addrlen) +{ + int ret; + ret = sendto(sockfd, buf, len, flags, addr, addrlen); + if (ret < 0) { + errno = socket_error(); + } + return ret; +} + + +#undef recv +ssize_t qemu_recv_wrap(int sockfd, void *buf, size_t len, int flags) +{ + int ret; + ret = recv(sockfd, buf, len, flags); + if (ret < 0) { + errno = socket_error(); + } + return ret; +} + + +#undef recvfrom +ssize_t qemu_recvfrom_wrap(int sockfd, void *buf, size_t len, int flags, + struct sockaddr *addr, socklen_t *addrlen) +{ + int ret; + ret = recvfrom(sockfd, buf, len, flags, addr, addrlen); + if (ret < 0) { + errno = socket_error(); + } + return ret; +} diff --git a/util/qemu-coroutine-io.c b/util/qemu-coroutine-io.c index 0d5041c..91b9357 100644 --- a/util/qemu-coroutine-io.c +++ b/util/qemu-coroutine-io.c @@ -35,18 +35,16 @@ qemu_co_sendv_recvv(int sockfd, struct iovec *iov, unsigned iov_cnt, { size_t done = 0; ssize_t ret; - int err; while (done < bytes) { ret = iov_send_recv(sockfd, iov, iov_cnt, offset + done, bytes - done, do_send); if (ret > 0) { done += ret; } else if (ret < 0) { - err = socket_error(); - if (err == EAGAIN || err == EWOULDBLOCK) { + if (errno == EAGAIN || errno == EWOULDBLOCK) { qemu_coroutine_yield(); } else if (done == 0) { - return -err; + return -errno; } else { break; } diff --git a/util/qemu-sockets.c b/util/qemu-sockets.c index ad7c00c..fd37ac2 100644 --- a/util/qemu-sockets.c +++ b/util/qemu-sockets.c @@ -268,7 +268,7 @@ static void wait_for_connect(void *opaque) do { rc = qemu_getsockopt(s->fd, SOL_SOCKET, SO_ERROR, &val, &valsize); - } while (rc == -1 && socket_error() == EINTR); + } while (rc == -1 && errno == EINTR); /* update rc to contain error */ if (!rc && val) { @@ -330,7 +330,7 @@ static int inet_connect_addr(struct addrinfo *addr, bool *in_progress, do { rc = 0; if (connect(sock, addr->ai_addr, addr->ai_addrlen) < 0) { - rc = -socket_error(); + rc = -errno; } } while (rc == -EINTR); @@ -787,7 +787,7 @@ static int unix_connect_saddr(UnixSocketAddress *saddr, Error **errp, do { rc = 0; if (connect(sock, (struct sockaddr *) &un, sizeof(un)) < 0) { - rc = -socket_error(); + rc = -errno; } } while (rc == -EINTR); @@ -1082,7 +1082,7 @@ SocketAddress *socket_local_address(int fd, Error **errp) socklen_t sslen = sizeof(ss); if (getsockname(fd, (struct sockaddr *)&ss, &sslen) < 0) { - error_setg_errno(errp, socket_error(), "%s", + error_setg_errno(errp, errno, "%s", "Unable to query local socket address"); return NULL; } @@ -1097,7 +1097,7 @@ SocketAddress *socket_remote_address(int fd, Error **errp) socklen_t sslen = sizeof(ss); if (getpeername(fd, (struct sockaddr *)&ss, &sslen) < 0) { - error_setg_errno(errp, socket_error(), "%s", + error_setg_errno(errp, errno, "%s", "Unable to query remote socket address"); return NULL; } |