diff options
Diffstat (limited to 'hw')
37 files changed, 770 insertions, 450 deletions
diff --git a/hw/acpi/acpi-pci-hotplug-stub.c b/hw/acpi/acpi-pci-hotplug-stub.c index b67b4a9..b7bc6e4 100644 --- a/hw/acpi/acpi-pci-hotplug-stub.c +++ b/hw/acpi/acpi-pci-hotplug-stub.c @@ -34,7 +34,7 @@ void acpi_pcihp_reset(AcpiPciHpState *s) { } -bool acpi_pcihp_is_hotpluggbale_bus(AcpiPciHpState *s, BusState *bus) +bool acpi_pcihp_is_hotpluggable_bus(AcpiPciHpState *s, BusState *bus) { return true; } diff --git a/hw/acpi/generic_event_device.c b/hw/acpi/generic_event_device.c index d8adfea..7a62f8d 100644 --- a/hw/acpi/generic_event_device.c +++ b/hw/acpi/generic_event_device.c @@ -458,11 +458,11 @@ static void acpi_ged_initfn(Object *obj) * container for memory hotplug IO and expose it as GED sysbus * MMIO so that boards can map it separately. */ - memory_region_init(&s->container_memhp, OBJECT(dev), "memhp container", - MEMORY_HOTPLUG_IO_LEN); - sysbus_init_mmio(sbd, &s->container_memhp); - acpi_memory_hotplug_init(&s->container_memhp, OBJECT(dev), - &s->memhp_state, 0); + memory_region_init(&s->container_memhp, OBJECT(dev), "memhp container", + MEMORY_HOTPLUG_IO_LEN); + sysbus_init_mmio(sbd, &s->container_memhp); + acpi_memory_hotplug_init(&s->container_memhp, OBJECT(dev), + &s->memhp_state, 0); memory_region_init_io(&ged_st->regs, obj, &ged_regs_ops, ged_st, TYPE_ACPI_GED "-regs", ACPI_GED_REG_COUNT); diff --git a/hw/acpi/ich9.c b/hw/acpi/ich9.c index c7a735b..967b674 100644 --- a/hw/acpi/ich9.c +++ b/hw/acpi/ich9.c @@ -41,15 +41,6 @@ #include "hw/mem/pc-dimm.h" #include "hw/mem/nvdimm.h" -//#define DEBUG - -#ifdef DEBUG -#define ICH9_DEBUG(fmt, ...) \ -do { printf("%s "fmt, __func__, ## __VA_ARGS__); } while (0) -#else -#define ICH9_DEBUG(fmt, ...) do { } while (0) -#endif - static void ich9_pm_update_sci_fn(ACPIREGS *regs) { ICH9LPCPMRegs *pm = container_of(regs, ICH9LPCPMRegs, acpi_regs); @@ -135,8 +126,6 @@ static const MemoryRegionOps ich9_smi_ops = { void ich9_pm_iospace_update(ICH9LPCPMRegs *pm, uint32_t pm_io_base) { - ICH9_DEBUG("to 0x%x\n", pm_io_base); - assert((pm_io_base & ICH9_PMIO_MASK) == 0); pm->pm_io_base = pm_io_base; @@ -570,7 +559,7 @@ void ich9_pm_device_unplug_cb(HotplugHandler *hotplug_dev, DeviceState *dev, bool ich9_pm_is_hotpluggable_bus(HotplugHandler *hotplug_dev, BusState *bus) { ICH9LPCState *lpc = ICH9_LPC_DEVICE(hotplug_dev); - return acpi_pcihp_is_hotpluggbale_bus(&lpc->pm.acpi_pci_hotplug, bus); + return acpi_pcihp_is_hotpluggable_bus(&lpc->pm.acpi_pci_hotplug, bus); } void ich9_pm_ospm_status(AcpiDeviceIf *adev, ACPIOSTInfoList ***list) diff --git a/hw/acpi/pcihp.c b/hw/acpi/pcihp.c index 5f79c90..aac9001 100644 --- a/hw/acpi/pcihp.c +++ b/hw/acpi/pcihp.c @@ -371,7 +371,7 @@ void acpi_pcihp_device_unplug_request_cb(HotplugHandler *hotplug_dev, acpi_send_event(DEVICE(hotplug_dev), ACPI_PCI_HOTPLUG_STATUS); } -bool acpi_pcihp_is_hotpluggbale_bus(AcpiPciHpState *s, BusState *bus) +bool acpi_pcihp_is_hotpluggable_bus(AcpiPciHpState *s, BusState *bus) { Object *o = OBJECT(bus->parent); diff --git a/hw/acpi/piix4.c b/hw/acpi/piix4.c index b16d45f..d98b80d 100644 --- a/hw/acpi/piix4.c +++ b/hw/acpi/piix4.c @@ -406,7 +406,7 @@ static bool piix4_is_hotpluggable_bus(HotplugHandler *hotplug_dev, BusState *bus) { PIIX4PMState *s = PIIX4_PM(hotplug_dev); - return acpi_pcihp_is_hotpluggbale_bus(&s->acpi_pci_hotplug, bus); + return acpi_pcihp_is_hotpluggable_bus(&s->acpi_pci_hotplug, bus); } static void piix4_pm_machine_ready(Notifier *n, void *opaque) diff --git a/hw/core/machine.c b/hw/core/machine.c index ed01798..b8ae155 100644 --- a/hw/core/machine.c +++ b/hw/core/machine.c @@ -41,7 +41,7 @@ GlobalProperty hw_compat_10_0[] = {}; const size_t hw_compat_10_0_len = G_N_ELEMENTS(hw_compat_10_0); GlobalProperty hw_compat_9_2[] = { - {"arm-cpu", "backcompat-pauth-default-use-qarma5", "true"}, + { "arm-cpu", "backcompat-pauth-default-use-qarma5", "true"}, { "virtio-balloon-pci", "vectors", "0" }, { "virtio-balloon-pci-transitional", "vectors", "0" }, { "virtio-balloon-pci-non-transitional", "vectors", "0" }, @@ -58,12 +58,12 @@ GlobalProperty hw_compat_9_1[] = { const size_t hw_compat_9_1_len = G_N_ELEMENTS(hw_compat_9_1); GlobalProperty hw_compat_9_0[] = { - {"arm-cpu", "backcompat-cntfrq", "true" }, + { "arm-cpu", "backcompat-cntfrq", "true" }, { "scsi-hd", "migrate-emulated-scsi-request", "false" }, { "scsi-cd", "migrate-emulated-scsi-request", "false" }, - {"vfio-pci", "skip-vsc-check", "false" }, + { "vfio-pci", "skip-vsc-check", "false" }, { "virtio-pci", "x-pcie-pm-no-soft-reset", "off" }, - {"sd-card", "spec_version", "2" }, + { "sd-card", "spec_version", "2" }, }; const size_t hw_compat_9_0_len = G_N_ELEMENTS(hw_compat_9_0); diff --git a/hw/display/apple-gfx.m b/hw/display/apple-gfx.m index 2ff1c90..8dde1f1 100644 --- a/hw/display/apple-gfx.m +++ b/hw/display/apple-gfx.m @@ -69,7 +69,7 @@ struct PGTask_s { mach_vm_address_t address; uint64_t len; /* - * All unique MemoryRegions for which a mapping has been created in in this + * All unique MemoryRegions for which a mapping has been created in this * task, and on which we have thus called memory_region_ref(). There are * usually very few regions of system RAM in total, so we expect this array * to be very short. Therefore, no need for sorting or fancy search diff --git a/hw/display/vhost-user-gpu.c b/hw/display/vhost-user-gpu.c index 06c4e7e..43d4c08 100644 --- a/hw/display/vhost-user-gpu.c +++ b/hw/display/vhost-user-gpu.c @@ -18,6 +18,7 @@ #include "chardev/char-fe.h" #include "qapi/error.h" #include "migration/blocker.h" +#include "standard-headers/drm/drm_fourcc.h" typedef enum VhostUserGpuRequest { VHOST_USER_GPU_NONE = 0, @@ -249,7 +250,9 @@ vhost_user_gpu_handle_display(VhostUserGPU *g, VhostUserGpuMsg *msg) case VHOST_USER_GPU_DMABUF_SCANOUT: { VhostUserGpuDMABUFScanout *m = &msg->payload.dmabuf_scanout; int fd = qemu_chr_fe_get_msgfd(&g->vhost_chr); - uint64_t modifier = 0; + uint32_t offset = 0; + uint32_t stride = m->fd_stride; + uint64_t modifier = DRM_FORMAT_MOD_INVALID; QemuDmaBuf *dmabuf; if (m->scanout_id >= g->parent_obj.conf.max_outputs) { @@ -282,10 +285,10 @@ vhost_user_gpu_handle_display(VhostUserGPU *g, VhostUserGpuMsg *msg) } dmabuf = qemu_dmabuf_new(m->width, m->height, - m->fd_stride, 0, 0, + &offset, &stride, 0, 0, m->fd_width, m->fd_height, m->fd_drm_fourcc, modifier, - fd, false, m->fd_flags & + &fd, 1, false, m->fd_flags & VIRTIO_GPU_RESOURCE_FLAG_Y_0_TOP); dpy_gl_scanout_dmabuf(con, dmabuf); diff --git a/hw/display/virtio-gpu-udmabuf.c b/hw/display/virtio-gpu-udmabuf.c index 0510577..d804f32 100644 --- a/hw/display/virtio-gpu-udmabuf.c +++ b/hw/display/virtio-gpu-udmabuf.c @@ -25,6 +25,7 @@ #include <linux/memfd.h> #include "qemu/memfd.h" #include "standard-headers/linux/udmabuf.h" +#include "standard-headers/drm/drm_fourcc.h" static void virtio_gpu_create_udmabuf(struct virtio_gpu_simple_resource *res) { @@ -176,16 +177,19 @@ static VGPUDMABuf struct virtio_gpu_rect *r) { VGPUDMABuf *dmabuf; + uint32_t offset = 0; if (res->dmabuf_fd < 0) { return NULL; } dmabuf = g_new0(VGPUDMABuf, 1); - dmabuf->buf = qemu_dmabuf_new(r->width, r->height, fb->stride, + dmabuf->buf = qemu_dmabuf_new(r->width, r->height, + &offset, &fb->stride, r->x, r->y, fb->width, fb->height, qemu_pixman_to_drm_format(fb->format), - 0, res->dmabuf_fd, true, false); + DRM_FORMAT_MOD_INVALID, &res->dmabuf_fd, + 1, true, false); dmabuf->scanout_id = scanout_id; QTAILQ_INSERT_HEAD(&g->dmabuf.bufs, dmabuf, next); diff --git a/hw/hyperv/hyperv.c b/hw/hyperv/hyperv.c index 0271cfd..e4d0688 100644 --- a/hw/hyperv/hyperv.c +++ b/hw/hyperv/hyperv.c @@ -13,6 +13,8 @@ #include "qapi/error.h" #include "system/address-spaces.h" #include "system/memory.h" +#include "exec/target_page.h" +#include "linux/kvm.h" #include "system/kvm.h" #include "qemu/bitops.h" #include "qemu/error-report.h" @@ -23,7 +25,6 @@ #include "hw/hyperv/hyperv.h" #include "qom/object.h" #include "target/i386/kvm/hyperv-proto.h" -#include "target/i386/cpu.h" #include "exec/target_page.h" struct SynICState { diff --git a/hw/hyperv/meson.build b/hw/hyperv/meson.build index d3d2668..d1cf781 100644 --- a/hw/hyperv/meson.build +++ b/hw/hyperv/meson.build @@ -1,5 +1,6 @@ -specific_ss.add(when: 'CONFIG_HYPERV', if_true: files('hyperv.c')) -specific_ss.add(when: 'CONFIG_HYPERV_TESTDEV', if_true: files('hyperv_testdev.c')) -specific_ss.add(when: 'CONFIG_VMBUS', if_true: files('vmbus.c')) -specific_ss.add(when: 'CONFIG_SYNDBG', if_true: files('syndbg.c')) -specific_ss.add(when: 'CONFIG_HV_BALLOON', if_true: files('hv-balloon.c', 'hv-balloon-page_range_tree.c', 'hv-balloon-our_range_memslots.c'), if_false: files('hv-balloon-stub.c')) +system_ss.add(when: 'CONFIG_HYPERV', if_true: files('hyperv.c')) +system_ss.add(when: 'CONFIG_HYPERV_TESTDEV', if_true: files('hyperv_testdev.c')) +system_ss.add(when: 'CONFIG_VMBUS', if_true: files('vmbus.c')) +system_ss.add(when: 'CONFIG_SYNDBG', if_true: files('syndbg.c')) +system_ss.add(when: 'CONFIG_HV_BALLOON', if_true: files('hv-balloon.c', 'hv-balloon-page_range_tree.c', 'hv-balloon-our_range_memslots.c')) +system_ss.add(when: 'CONFIG_HV_BALLOON', if_false: files('hv-balloon-stub.c')) diff --git a/hw/hyperv/syndbg.c b/hw/hyperv/syndbg.c index ca29182..8b8a147 100644 --- a/hw/hyperv/syndbg.c +++ b/hw/hyperv/syndbg.c @@ -10,11 +10,11 @@ #include "qemu/error-report.h" #include "qemu/main-loop.h" #include "qemu/sockets.h" +#include "qemu/units.h" #include "qapi/error.h" #include "migration/vmstate.h" #include "hw/qdev-properties.h" #include "hw/loader.h" -#include "cpu.h" #include "exec/target_page.h" #include "hw/hyperv/hyperv.h" #include "hw/hyperv/vmbus-bridge.h" @@ -184,12 +184,15 @@ static bool create_udp_pkt(HvSynDbg *syndbg, void *pkt, uint32_t pkt_len, return true; } +#define MSG_BUFSZ (4 * KiB) + static uint16_t handle_recv_msg(HvSynDbg *syndbg, uint64_t outgpa, uint32_t count, bool is_raw, uint32_t options, uint64_t timeout, uint32_t *retrieved_count) { uint16_t ret; - uint8_t data_buf[TARGET_PAGE_SIZE - UDP_PKT_HEADER_SIZE]; + g_assert(MSG_BUFSZ >= qemu_target_page_size()); + uint8_t data_buf[MSG_BUFSZ]; hwaddr out_len; void *out_data; ssize_t recv_byte_count; @@ -202,7 +205,7 @@ static uint16_t handle_recv_msg(HvSynDbg *syndbg, uint64_t outgpa, recv_byte_count = 0; } else { recv_byte_count = recv(syndbg->socket, data_buf, - MIN(sizeof(data_buf), count), MSG_WAITALL); + MIN(MSG_BUFSZ, count), MSG_WAITALL); if (recv_byte_count == -1) { return HV_STATUS_INVALID_PARAMETER; } diff --git a/hw/hyperv/vmbus.c b/hw/hyperv/vmbus.c index b147ea0..961406c 100644 --- a/hw/hyperv/vmbus.c +++ b/hw/hyperv/vmbus.c @@ -19,7 +19,7 @@ #include "hw/hyperv/vmbus.h" #include "hw/hyperv/vmbus-bridge.h" #include "hw/sysbus.h" -#include "cpu.h" +#include "exec/target_page.h" #include "trace.h" enum { diff --git a/hw/i2c/imx_i2c.c b/hw/i2c/imx_i2c.c index 91f84c2..d26177c 100644 --- a/hw/i2c/imx_i2c.c +++ b/hw/i2c/imx_i2c.c @@ -79,13 +79,12 @@ static void imx_i2c_reset(DeviceState *dev) static inline void imx_i2c_raise_interrupt(IMXI2CState *s) { - /* - * raise an interrupt if the device is enabled and it is configured - * to generate some interrupts. - */ - if (imx_i2c_is_enabled(s) && imx_i2c_interrupt_is_enabled(s)) { + if (imx_i2c_is_enabled(s)) { s->i2sr |= I2SR_IIF; - qemu_irq_raise(s->irq); + + if (imx_i2c_interrupt_is_enabled(s)) { + qemu_irq_raise(s->irq); + } } } diff --git a/hw/i386/acpi-build.c b/hw/i386/acpi-build.c index 3fffa4a..f40ad06 100644 --- a/hw/i386/acpi-build.c +++ b/hw/i386/acpi-build.c @@ -139,7 +139,7 @@ static void init_common_fadt_data(MachineState *ms, Object *o, /* * "ICH9-LPC" or "PIIX4_PM" has "smm-compat" property to keep the old * behavior for compatibility irrelevant to smm_enabled, which doesn't - * comforms to ACPI spec. + * conform to the ACPI spec. */ bool smm_enabled = object_property_get_bool(o, "smm-compat", NULL) ? true : x86_machine_is_smm_enabled(x86ms); @@ -589,8 +589,8 @@ void build_append_pci_bus_devices(Aml *parent_scope, PCIBus *bus) } } -static bool build_append_notfication_callback(Aml *parent_scope, - const PCIBus *bus) +static bool build_append_notification_callback(Aml *parent_scope, + const PCIBus *bus) { Aml *method; PCIBus *sec; @@ -604,7 +604,7 @@ static bool build_append_notfication_callback(Aml *parent_scope, continue; } nr_notifiers = nr_notifiers + - build_append_notfication_callback(br_scope, sec); + build_append_notification_callback(br_scope, sec); /* * add new child scope to parent * and keep track of bus that have PCNT, @@ -1773,7 +1773,7 @@ build_dsdt(GArray *table_data, BIOSLinker *linker, PCIBus *b = PCI_HOST_BRIDGE(pci_host)->bus; scope = aml_scope("\\_SB.PCI0"); - has_pcnt = build_append_notfication_callback(scope, b); + has_pcnt = build_append_notification_callback(scope, b); if (has_pcnt) { aml_append(dsdt, scope); } diff --git a/hw/i386/acpi-build.h b/hw/i386/acpi-build.h index 0dce155..275ec05 100644 --- a/hw/i386/acpi-build.h +++ b/hw/i386/acpi-build.h @@ -5,7 +5,7 @@ extern const struct AcpiGenericAddress x86_nvdimm_acpi_dsmio; -/* PCI Hot-plug registers bases. See docs/spec/acpi_pci_hotplug.txt */ +/* PCI Hot-plug registers' base. See docs/specs/acpi_pci_hotplug.rst */ #define ACPI_PCIHP_SEJ_BASE 0x8 #define ACPI_PCIHP_BNMR_BASE 0x10 diff --git a/hw/isa/lpc_ich9.c b/hw/isa/lpc_ich9.c index 71afb45..304dffa 100644 --- a/hw/isa/lpc_ich9.c +++ b/hw/isa/lpc_ich9.c @@ -182,7 +182,6 @@ static uint64_t ich9_cc_read(void *opaque, hwaddr addr, } /* IRQ routing */ -/* */ static void ich9_lpc_rout(uint8_t pirq_rout, int *pic_irq, int *pic_dis) { *pic_irq = pirq_rout & ICH9_LPC_PIRQ_ROUT_MASK; diff --git a/hw/net/e1000x_regs.h b/hw/net/e1000x_regs.h index cd896fc..e9a74de 100644 --- a/hw/net/e1000x_regs.h +++ b/hw/net/e1000x_regs.h @@ -900,7 +900,7 @@ struct e1000_context_desc { uint16_t tucse; /* TCP checksum end */ } tcp_fields; } upper_setup; - uint32_t cmd_and_length; /* */ + uint32_t cmd_and_length; union { uint32_t data; struct { diff --git a/hw/pci-host/gpex-acpi.c b/hw/pci-host/gpex-acpi.c index e8b4c64..0aba47c 100644 --- a/hw/pci-host/gpex-acpi.c +++ b/hw/pci-host/gpex-acpi.c @@ -182,7 +182,7 @@ void acpi_dsdt_add_gpex(Aml *scope, struct GPEXConfig *cfg) /* * Resources defined for PXBs are composed of the following parts: - * 1. The resources the pci-brige/pcie-root-port need. + * 1. The resources the pci-bridge/pcie-root-port need. * 2. The resources the devices behind pxb need. */ crs = build_crs(PCI_HOST_BRIDGE(BUS(bus)->parent), &crs_range_set, diff --git a/hw/pci/pci.c b/hw/pci/pci.c index fe38c4c..352b3d1 100644 --- a/hw/pci/pci.c +++ b/hw/pci/pci.c @@ -54,13 +54,6 @@ #include "hw/xen/xen.h" #include "hw/i386/kvm/xen_evtchn.h" -//#define DEBUG_PCI -#ifdef DEBUG_PCI -# define PCI_DPRINTF(format, ...) printf(format, ## __VA_ARGS__) -#else -# define PCI_DPRINTF(format, ...) do { } while (0) -#endif - bool pci_available = true; static char *pcibus_get_dev_path(DeviceState *dev); @@ -2439,12 +2432,12 @@ static void pci_patch_ids(PCIDevice *pdev, uint8_t *ptr, uint32_t size) /* Only a valid rom will be patched. */ rom_magic = pci_get_word(ptr); if (rom_magic != 0xaa55) { - PCI_DPRINTF("Bad ROM magic %04x\n", rom_magic); + trace_pci_bad_rom_magic(rom_magic, 0xaa55); return; } pcir_offset = pci_get_word(ptr + 0x18); if (pcir_offset + 8 >= size || memcmp(ptr + pcir_offset, "PCIR", 4)) { - PCI_DPRINTF("Bad PCIR offset 0x%x or signature\n", pcir_offset); + trace_pci_bad_pcir_offset(pcir_offset); return; } @@ -2453,8 +2446,8 @@ static void pci_patch_ids(PCIDevice *pdev, uint8_t *ptr, uint32_t size) rom_vendor_id = pci_get_word(ptr + pcir_offset + 4); rom_device_id = pci_get_word(ptr + pcir_offset + 6); - PCI_DPRINTF("%s: ROM id %04x%04x / PCI id %04x%04x\n", pdev->romfile, - vendor_id, device_id, rom_vendor_id, rom_device_id); + trace_pci_rom_and_pci_ids(pdev->romfile, vendor_id, device_id, + rom_vendor_id, rom_device_id); checksum = ptr[6]; @@ -2462,7 +2455,7 @@ static void pci_patch_ids(PCIDevice *pdev, uint8_t *ptr, uint32_t size) /* Patch vendor id and checksum (at offset 6 for etherboot roms). */ checksum += (uint8_t)rom_vendor_id + (uint8_t)(rom_vendor_id >> 8); checksum -= (uint8_t)vendor_id + (uint8_t)(vendor_id >> 8); - PCI_DPRINTF("ROM checksum %02x / %02x\n", ptr[6], checksum); + trace_pci_rom_checksum_change(ptr[6], checksum); ptr[6] = checksum; pci_set_word(ptr + pcir_offset + 4, vendor_id); } @@ -2471,7 +2464,7 @@ static void pci_patch_ids(PCIDevice *pdev, uint8_t *ptr, uint32_t size) /* Patch device id and checksum (at offset 6 for etherboot roms). */ checksum += (uint8_t)rom_device_id + (uint8_t)(rom_device_id >> 8); checksum -= (uint8_t)device_id + (uint8_t)(device_id >> 8); - PCI_DPRINTF("ROM checksum %02x / %02x\n", ptr[6], checksum); + trace_pci_rom_checksum_change(ptr[6], checksum); ptr[6] = checksum; pci_set_word(ptr + pcir_offset + 6, device_id); } diff --git a/hw/pci/pcie_port.c b/hw/pci/pcie_port.c index 54f639e..f3841a2 100644 --- a/hw/pci/pcie_port.c +++ b/hw/pci/pcie_port.c @@ -188,7 +188,7 @@ int pcie_count_ds_ports(PCIBus *bus) return dsp_count; } -static bool pcie_slot_is_hotpluggbale_bus(HotplugHandler *plug_handler, +static bool pcie_slot_is_hotpluggable_bus(HotplugHandler *plug_handler, BusState *bus) { PCIESlot *s = PCIE_SLOT(bus->parent); @@ -221,7 +221,7 @@ static void pcie_slot_class_init(ObjectClass *oc, const void *data) hc->plug = pcie_cap_slot_plug_cb; hc->unplug = pcie_cap_slot_unplug_cb; hc->unplug_request = pcie_cap_slot_unplug_request_cb; - hc->is_hotpluggable_bus = pcie_slot_is_hotpluggbale_bus; + hc->is_hotpluggable_bus = pcie_slot_is_hotpluggable_bus; } static const TypeInfo pcie_slot_type_info = { diff --git a/hw/pci/trace-events b/hw/pci/trace-events index 6a99689..02c80d3 100644 --- a/hw/pci/trace-events +++ b/hw/pci/trace-events @@ -6,6 +6,10 @@ pci_pm_transition(const char *dev, uint32_t bus, uint32_t slot, uint32_t func, u pci_update_mappings_del(const char *dev, uint32_t bus, uint32_t slot, uint32_t func, int bar, uint64_t addr, uint64_t size) "%s %02x:%02x.%x %d,0x%"PRIx64"+0x%"PRIx64 pci_update_mappings_add(const char *dev, uint32_t bus, uint32_t slot, uint32_t func, int bar, uint64_t addr, uint64_t size) "%s %02x:%02x.%x %d,0x%"PRIx64"+0x%"PRIx64 pci_route_irq(int dev_irq, const char *dev_path, int parent_irq, const char *parent_path) "IRQ %d @%s -> IRQ %d @%s" +pci_bad_rom_magic(uint16_t bad_rom_magic, uint16_t good_rom_magic) "Bad ROM magic number: %04"PRIX16". Should be: %04"PRIX16 +pci_bad_pcir_offset(uint16_t pcir_offset) "Bad PCIR offset 0x%"PRIx16" or signature" +pci_rom_and_pci_ids(char *romfile, uint16_t vendor_id, uint16_t device_id, uint16_t rom_vendor_id, uint16_t rom_device_id) "%s: ROM ID %04"PRIx16":%04"PRIx16" | PCI ID %04"PRIx16":%04"PRIx16 +pci_rom_checksum_change(uint8_t old_checksum, uint8_t new_checksum) "ROM checksum changed from %02"PRIx8" to %02"PRIx8 # pci_host.c pci_cfg_read(const char *dev, uint32_t bus, uint32_t slot, uint32_t func, unsigned offs, unsigned val) "%s %02x:%02x.%x @0x%x -> 0x%x" diff --git a/hw/vfio/ap.c b/hw/vfio/ap.c index 1207c08..785c0a0 100644 --- a/hw/vfio/ap.c +++ b/hw/vfio/ap.c @@ -74,10 +74,10 @@ static bool vfio_ap_register_irq_notifier(VFIOAPDevice *vapdev, unsigned int irq, Error **errp) { int fd; - size_t argsz; + int ret; IOHandler *fd_read; EventNotifier *notifier; - g_autofree struct vfio_irq_info *irq_info = NULL; + struct vfio_irq_info irq_info; VFIODevice *vdev = &vapdev->vdev; switch (irq) { @@ -96,14 +96,15 @@ static bool vfio_ap_register_irq_notifier(VFIOAPDevice *vapdev, return false; } - argsz = sizeof(*irq_info); - irq_info = g_malloc0(argsz); - irq_info->index = irq; - irq_info->argsz = argsz; + ret = vfio_device_get_irq_info(vdev, irq, &irq_info); + + if (ret < 0) { + error_setg_errno(errp, -ret, "vfio: Error getting irq info"); + return false; + } - if (ioctl(vdev->fd, VFIO_DEVICE_GET_IRQ_INFO, - irq_info) < 0 || irq_info->count < 1) { - error_setg_errno(errp, errno, "vfio: Error getting irq info"); + if (irq_info.count < 1) { + error_setg(errp, "vfio: Error getting irq info, count=0"); return false; } diff --git a/hw/vfio/ccw.c b/hw/vfio/ccw.c index fde0c3f..cea9d6e 100644 --- a/hw/vfio/ccw.c +++ b/hw/vfio/ccw.c @@ -376,8 +376,8 @@ static bool vfio_ccw_register_irq_notifier(VFIOCCWDevice *vcdev, Error **errp) { VFIODevice *vdev = &vcdev->vdev; - g_autofree struct vfio_irq_info *irq_info = NULL; - size_t argsz; + struct vfio_irq_info irq_info; + int ret; int fd; EventNotifier *notifier; IOHandler *fd_read; @@ -406,13 +406,15 @@ static bool vfio_ccw_register_irq_notifier(VFIOCCWDevice *vcdev, return false; } - argsz = sizeof(*irq_info); - irq_info = g_malloc0(argsz); - irq_info->index = irq; - irq_info->argsz = argsz; - if (ioctl(vdev->fd, VFIO_DEVICE_GET_IRQ_INFO, - irq_info) < 0 || irq_info->count < 1) { - error_setg_errno(errp, errno, "vfio: Error getting irq info"); + ret = vfio_device_get_irq_info(vdev, irq, &irq_info); + + if (ret < 0) { + error_setg_errno(errp, -ret, "vfio: Error getting irq info"); + return false; + } + + if (irq_info.count < 1) { + error_setg(errp, "vfio: Error getting irq info, count=0"); return false; } @@ -502,7 +504,6 @@ static bool vfio_ccw_get_region(VFIOCCWDevice *vcdev, Error **errp) vcdev->io_region_offset = info->offset; vcdev->io_region = g_malloc0(info->size); - g_free(info); /* check for the optional async command region */ ret = vfio_device_get_region_info_type(vdev, VFIO_REGION_TYPE_CCW, @@ -515,7 +516,6 @@ static bool vfio_ccw_get_region(VFIOCCWDevice *vcdev, Error **errp) } vcdev->async_cmd_region_offset = info->offset; vcdev->async_cmd_region = g_malloc0(info->size); - g_free(info); } ret = vfio_device_get_region_info_type(vdev, VFIO_REGION_TYPE_CCW, @@ -528,7 +528,6 @@ static bool vfio_ccw_get_region(VFIOCCWDevice *vcdev, Error **errp) } vcdev->schib_region_offset = info->offset; vcdev->schib_region = g_malloc(info->size); - g_free(info); } ret = vfio_device_get_region_info_type(vdev, VFIO_REGION_TYPE_CCW, @@ -542,7 +541,6 @@ static bool vfio_ccw_get_region(VFIOCCWDevice *vcdev, Error **errp) } vcdev->crw_region_offset = info->offset; vcdev->crw_region = g_malloc(info->size); - g_free(info); } return true; @@ -552,7 +550,6 @@ out_err: g_free(vcdev->schib_region); g_free(vcdev->async_cmd_region); g_free(vcdev->io_region); - g_free(info); return false; } diff --git a/hw/vfio/container-base.c b/hw/vfio/container-base.c index 09340fd..1c6ca94 100644 --- a/hw/vfio/container-base.c +++ b/hw/vfio/container-base.c @@ -85,12 +85,12 @@ int vfio_container_dma_map(VFIOContainerBase *bcontainer, int vfio_container_dma_unmap(VFIOContainerBase *bcontainer, hwaddr iova, ram_addr_t size, - IOMMUTLBEntry *iotlb) + IOMMUTLBEntry *iotlb, bool unmap_all) { VFIOIOMMUClass *vioc = VFIO_IOMMU_GET_CLASS(bcontainer); g_assert(vioc->dma_unmap); - return vioc->dma_unmap(bcontainer, iova, size, iotlb); + return vioc->dma_unmap(bcontainer, iova, size, iotlb, unmap_all); } bool vfio_container_add_section_window(VFIOContainerBase *bcontainer, @@ -198,11 +198,7 @@ static int vfio_device_dma_logging_report(VFIODevice *vbasedev, hwaddr iova, feature->flags = VFIO_DEVICE_FEATURE_GET | VFIO_DEVICE_FEATURE_DMA_LOGGING_REPORT; - if (ioctl(vbasedev->fd, VFIO_DEVICE_FEATURE, feature)) { - return -errno; - } - - return 0; + return vbasedev->io_ops->device_feature(vbasedev, feature); } static int vfio_container_iommu_query_dirty_bitmap(const VFIOContainerBase *bcontainer, diff --git a/hw/vfio/container.c b/hw/vfio/container.c index 77ff56b..a9f0dba 100644 --- a/hw/vfio/container.c +++ b/hw/vfio/container.c @@ -119,12 +119,9 @@ unmap_exit: return ret; } -/* - * DMA - Mapping and unmapping for the "type1" IOMMU interface used on x86 - */ -static int vfio_legacy_dma_unmap(const VFIOContainerBase *bcontainer, - hwaddr iova, ram_addr_t size, - IOMMUTLBEntry *iotlb) +static int vfio_legacy_dma_unmap_one(const VFIOContainerBase *bcontainer, + hwaddr iova, ram_addr_t size, + IOMMUTLBEntry *iotlb) { const VFIOContainer *container = container_of(bcontainer, VFIOContainer, bcontainer); @@ -181,6 +178,34 @@ static int vfio_legacy_dma_unmap(const VFIOContainerBase *bcontainer, return 0; } +/* + * DMA - Mapping and unmapping for the "type1" IOMMU interface used on x86 + */ +static int vfio_legacy_dma_unmap(const VFIOContainerBase *bcontainer, + hwaddr iova, ram_addr_t size, + IOMMUTLBEntry *iotlb, bool unmap_all) +{ + int ret; + + if (unmap_all) { + /* The unmap ioctl doesn't accept a full 64-bit span. */ + Int128 llsize = int128_rshift(int128_2_64(), 1); + + ret = vfio_legacy_dma_unmap_one(bcontainer, 0, int128_get64(llsize), + iotlb); + + if (ret == 0) { + ret = vfio_legacy_dma_unmap_one(bcontainer, int128_get64(llsize), + int128_get64(llsize), iotlb); + } + + } else { + ret = vfio_legacy_dma_unmap_one(bcontainer, iova, size, iotlb); + } + + return ret; +} + static int vfio_legacy_dma_map(const VFIOContainerBase *bcontainer, hwaddr iova, ram_addr_t size, void *vaddr, bool readonly) { @@ -205,7 +230,7 @@ static int vfio_legacy_dma_map(const VFIOContainerBase *bcontainer, hwaddr iova, */ if (ioctl(container->fd, VFIO_IOMMU_MAP_DMA, &map) == 0 || (errno == EBUSY && - vfio_legacy_dma_unmap(bcontainer, iova, size, NULL) == 0 && + vfio_legacy_dma_unmap(bcontainer, iova, size, NULL, false) == 0 && ioctl(container->fd, VFIO_IOMMU_MAP_DMA, &map) == 0)) { return 0; } @@ -511,16 +536,10 @@ static bool vfio_legacy_setup(VFIOContainerBase *bcontainer, Error **errp) return true; } -static bool vfio_container_connect(VFIOGroup *group, AddressSpace *as, - Error **errp) +static bool vfio_container_attach_discard_disable(VFIOContainer *container, + VFIOGroup *group, Error **errp) { - VFIOContainer *container; - VFIOContainerBase *bcontainer; - int ret, fd; - VFIOAddressSpace *space; - VFIOIOMMUClass *vioc; - - space = vfio_address_space_get(as); + int ret; /* * VFIO is currently incompatible with discarding of RAM insofar as the @@ -553,97 +572,118 @@ static bool vfio_container_connect(VFIOGroup *group, AddressSpace *as, * details once we know which type of IOMMU we are using. */ + ret = vfio_ram_block_discard_disable(container, true); + if (ret) { + error_setg_errno(errp, -ret, "Cannot set discarding of RAM broken"); + if (ioctl(group->fd, VFIO_GROUP_UNSET_CONTAINER, &container->fd)) { + error_report("vfio: error disconnecting group %d from" + " container", group->groupid); + } + } + return !ret; +} + +static bool vfio_container_group_add(VFIOContainer *container, VFIOGroup *group, + Error **errp) +{ + if (!vfio_container_attach_discard_disable(container, group, errp)) { + return false; + } + group->container = container; + QLIST_INSERT_HEAD(&container->group_list, group, container_next); + vfio_group_add_kvm_device(group); + return true; +} + +static void vfio_container_group_del(VFIOContainer *container, VFIOGroup *group) +{ + QLIST_REMOVE(group, container_next); + group->container = NULL; + vfio_group_del_kvm_device(group); + vfio_ram_block_discard_disable(container, false); +} + +static bool vfio_container_connect(VFIOGroup *group, AddressSpace *as, + Error **errp) +{ + VFIOContainer *container; + VFIOContainerBase *bcontainer; + int ret, fd = -1; + VFIOAddressSpace *space; + VFIOIOMMUClass *vioc = NULL; + bool new_container = false; + bool group_was_added = false; + + space = vfio_address_space_get(as); + QLIST_FOREACH(bcontainer, &space->containers, next) { container = container_of(bcontainer, VFIOContainer, bcontainer); if (!ioctl(group->fd, VFIO_GROUP_SET_CONTAINER, &container->fd)) { - ret = vfio_ram_block_discard_disable(container, true); - if (ret) { - error_setg_errno(errp, -ret, - "Cannot set discarding of RAM broken"); - if (ioctl(group->fd, VFIO_GROUP_UNSET_CONTAINER, - &container->fd)) { - error_report("vfio: error disconnecting group %d from" - " container", group->groupid); - } - return false; - } - group->container = container; - QLIST_INSERT_HEAD(&container->group_list, group, container_next); - vfio_group_add_kvm_device(group); - return true; + return vfio_container_group_add(container, group, errp); } } fd = qemu_open("/dev/vfio/vfio", O_RDWR, errp); if (fd < 0) { - goto put_space_exit; + goto fail; } ret = ioctl(fd, VFIO_GET_API_VERSION); if (ret != VFIO_API_VERSION) { error_setg(errp, "supported vfio version: %d, " "reported version: %d", VFIO_API_VERSION, ret); - goto close_fd_exit; + goto fail; } container = vfio_create_container(fd, group, errp); if (!container) { - goto close_fd_exit; + goto fail; } + new_container = true; bcontainer = &container->bcontainer; if (!vfio_cpr_register_container(bcontainer, errp)) { - goto free_container_exit; - } - - ret = vfio_ram_block_discard_disable(container, true); - if (ret) { - error_setg_errno(errp, -ret, "Cannot set discarding of RAM broken"); - goto unregister_container_exit; + goto fail; } vioc = VFIO_IOMMU_GET_CLASS(bcontainer); assert(vioc->setup); if (!vioc->setup(bcontainer, errp)) { - goto enable_discards_exit; + goto fail; } - vfio_group_add_kvm_device(group); - vfio_address_space_insert(space, bcontainer); - group->container = container; - QLIST_INSERT_HEAD(&container->group_list, group, container_next); + if (!vfio_container_group_add(container, group, errp)) { + goto fail; + } + group_was_added = true; if (!vfio_listener_register(bcontainer, errp)) { - goto listener_release_exit; + goto fail; } bcontainer->initialized = true; return true; -listener_release_exit: - QLIST_REMOVE(group, container_next); - vfio_group_del_kvm_device(group); + +fail: vfio_listener_unregister(bcontainer); - if (vioc->release) { + + if (group_was_added) { + vfio_container_group_del(container, group); + } + if (vioc && vioc->release) { vioc->release(bcontainer); } - -enable_discards_exit: - vfio_ram_block_discard_disable(container, false); - -unregister_container_exit: - vfio_cpr_unregister_container(bcontainer); - -free_container_exit: - object_unref(container); - -close_fd_exit: - close(fd); - -put_space_exit: + if (new_container) { + vfio_cpr_unregister_container(bcontainer); + object_unref(container); + } + if (fd >= 0) { + close(fd); + } vfio_address_space_put(space); return false; @@ -811,18 +851,14 @@ static bool vfio_device_get(VFIOGroup *group, const char *name, } } + vfio_device_prepare(vbasedev, &group->container->bcontainer, info); + vbasedev->fd = fd; vbasedev->group = group; QLIST_INSERT_HEAD(&group->device_list, vbasedev, next); - vbasedev->num_irqs = info->num_irqs; - vbasedev->num_regions = info->num_regions; - vbasedev->flags = info->flags; - trace_vfio_device_get(name, info->flags, info->num_regions, info->num_irqs); - vbasedev->reset_works = !!(info->flags & VFIO_DEVICE_FLAGS_RESET); - return true; } @@ -875,7 +911,6 @@ static bool vfio_legacy_attach_device(const char *name, VFIODevice *vbasedev, int groupid = vfio_device_get_groupid(vbasedev, errp); VFIODevice *vbasedev_iter; VFIOGroup *group; - VFIOContainerBase *bcontainer; if (groupid < 0) { return false; @@ -904,11 +939,6 @@ static bool vfio_legacy_attach_device(const char *name, VFIODevice *vbasedev, goto device_put_exit; } - bcontainer = &group->container->bcontainer; - vbasedev->bcontainer = bcontainer; - QLIST_INSERT_HEAD(&bcontainer->device_list, vbasedev, container_next); - QLIST_INSERT_HEAD(&vfio_device_list, vbasedev, global_next); - return true; device_put_exit: @@ -922,10 +952,10 @@ static void vfio_legacy_detach_device(VFIODevice *vbasedev) { VFIOGroup *group = vbasedev->group; - QLIST_REMOVE(vbasedev, global_next); - QLIST_REMOVE(vbasedev, container_next); - vbasedev->bcontainer = NULL; trace_vfio_device_detach(vbasedev->name, group->groupid); + + vfio_device_unprepare(vbasedev); + object_unref(vbasedev->hiod); vfio_device_put(vbasedev); vfio_group_put(group); diff --git a/hw/vfio/device.c b/hw/vfio/device.c index d625a7c..9fba2c7 100644 --- a/hw/vfio/device.c +++ b/hw/vfio/device.c @@ -82,7 +82,7 @@ void vfio_device_irq_disable(VFIODevice *vbasedev, int index) .count = 0, }; - ioctl(vbasedev->fd, VFIO_DEVICE_SET_IRQS, &irq_set); + vbasedev->io_ops->set_irqs(vbasedev, &irq_set); } void vfio_device_irq_unmask(VFIODevice *vbasedev, int index) @@ -95,7 +95,7 @@ void vfio_device_irq_unmask(VFIODevice *vbasedev, int index) .count = 1, }; - ioctl(vbasedev->fd, VFIO_DEVICE_SET_IRQS, &irq_set); + vbasedev->io_ops->set_irqs(vbasedev, &irq_set); } void vfio_device_irq_mask(VFIODevice *vbasedev, int index) @@ -108,7 +108,7 @@ void vfio_device_irq_mask(VFIODevice *vbasedev, int index) .count = 1, }; - ioctl(vbasedev->fd, VFIO_DEVICE_SET_IRQS, &irq_set); + vbasedev->io_ops->set_irqs(vbasedev, &irq_set); } static inline const char *action_to_str(int action) @@ -167,7 +167,7 @@ bool vfio_device_irq_set_signaling(VFIODevice *vbasedev, int index, int subindex pfd = (int32_t *)&irq_set->data; *pfd = fd; - if (!ioctl(vbasedev->fd, VFIO_DEVICE_SET_IRQS, irq_set)) { + if (!vbasedev->io_ops->set_irqs(vbasedev, irq_set)) { return true; } @@ -185,10 +185,28 @@ bool vfio_device_irq_set_signaling(VFIODevice *vbasedev, int index, int subindex return false; } +int vfio_device_get_irq_info(VFIODevice *vbasedev, int index, + struct vfio_irq_info *info) +{ + memset(info, 0, sizeof(*info)); + + info->argsz = sizeof(*info); + info->index = index; + + return vbasedev->io_ops->get_irq_info(vbasedev, info); +} + int vfio_device_get_region_info(VFIODevice *vbasedev, int index, struct vfio_region_info **info) { size_t argsz = sizeof(struct vfio_region_info); + int ret; + + /* check cache */ + if (vbasedev->reginfo[index] != NULL) { + *info = vbasedev->reginfo[index]; + return 0; + } *info = g_malloc0(argsz); @@ -196,10 +214,11 @@ int vfio_device_get_region_info(VFIODevice *vbasedev, int index, retry: (*info)->argsz = argsz; - if (ioctl(vbasedev->fd, VFIO_DEVICE_GET_REGION_INFO, *info)) { + ret = vbasedev->io_ops->get_region_info(vbasedev, *info); + if (ret != 0) { g_free(*info); *info = NULL; - return -errno; + return ret; } if ((*info)->argsz > argsz) { @@ -209,6 +228,9 @@ retry: goto retry; } + /* fill cache */ + vbasedev->reginfo[index] = *info; + return 0; } @@ -227,7 +249,6 @@ int vfio_device_get_region_info_type(VFIODevice *vbasedev, uint32_t type, hdr = vfio_get_region_info_cap(*info, VFIO_REGION_INFO_CAP_TYPE); if (!hdr) { - g_free(*info); continue; } @@ -239,8 +260,6 @@ int vfio_device_get_region_info_type(VFIODevice *vbasedev, uint32_t type, if (cap_type->type == type && cap_type->subtype == subtype) { return 0; } - - g_free(*info); } *info = NULL; @@ -249,7 +268,7 @@ int vfio_device_get_region_info_type(VFIODevice *vbasedev, uint32_t type, bool vfio_device_has_region_cap(VFIODevice *vbasedev, int region, uint16_t cap_type) { - g_autofree struct vfio_region_info *info = NULL; + struct vfio_region_info *info = NULL; bool ret = false; if (!vfio_device_get_region_info(vbasedev, region, &info)) { @@ -305,11 +324,14 @@ void vfio_device_set_fd(VFIODevice *vbasedev, const char *str, Error **errp) vbasedev->fd = fd; } +static VFIODeviceIOOps vfio_device_io_ops_ioctl; + void vfio_device_init(VFIODevice *vbasedev, int type, VFIODeviceOps *ops, DeviceState *dev, bool ram_discard) { vbasedev->type = type; vbasedev->ops = ops; + vbasedev->io_ops = &vfio_device_io_ops_ioctl; vbasedev->dev = dev; vbasedev->fd = -1; @@ -370,27 +392,35 @@ bool vfio_device_hiod_create_and_realize(VFIODevice *vbasedev, VFIODevice *vfio_get_vfio_device(Object *obj) { if (object_dynamic_cast(obj, TYPE_VFIO_PCI)) { - return &VFIO_PCI(obj)->vbasedev; + return &VFIO_PCI_BASE(obj)->vbasedev; } else { return NULL; } } -bool vfio_device_attach(char *name, VFIODevice *vbasedev, - AddressSpace *as, Error **errp) +bool vfio_device_attach_by_iommu_type(const char *iommu_type, char *name, + VFIODevice *vbasedev, AddressSpace *as, + Error **errp) { const VFIOIOMMUClass *ops = - VFIO_IOMMU_CLASS(object_class_by_name(TYPE_VFIO_IOMMU_LEGACY)); - - if (vbasedev->iommufd) { - ops = VFIO_IOMMU_CLASS(object_class_by_name(TYPE_VFIO_IOMMU_IOMMUFD)); - } + VFIO_IOMMU_CLASS(object_class_by_name(iommu_type)); assert(ops); return ops->attach_device(name, vbasedev, as, errp); } +bool vfio_device_attach(char *name, VFIODevice *vbasedev, + AddressSpace *as, Error **errp) +{ + const char *iommu_type = vbasedev->iommufd ? + TYPE_VFIO_IOMMU_IOMMUFD : + TYPE_VFIO_IOMMU_LEGACY; + + return vfio_device_attach_by_iommu_type(iommu_type, name, vbasedev, + as, errp); +} + void vfio_device_detach(VFIODevice *vbasedev) { if (!vbasedev->bcontainer) { @@ -398,3 +428,120 @@ void vfio_device_detach(VFIODevice *vbasedev) } VFIO_IOMMU_GET_CLASS(vbasedev->bcontainer)->detach_device(vbasedev); } + +void vfio_device_prepare(VFIODevice *vbasedev, VFIOContainerBase *bcontainer, + struct vfio_device_info *info) +{ + vbasedev->num_irqs = info->num_irqs; + vbasedev->num_regions = info->num_regions; + vbasedev->flags = info->flags; + vbasedev->reset_works = !!(info->flags & VFIO_DEVICE_FLAGS_RESET); + + vbasedev->bcontainer = bcontainer; + QLIST_INSERT_HEAD(&bcontainer->device_list, vbasedev, container_next); + + QLIST_INSERT_HEAD(&vfio_device_list, vbasedev, global_next); + + vbasedev->reginfo = g_new0(struct vfio_region_info *, + vbasedev->num_regions); +} + +void vfio_device_unprepare(VFIODevice *vbasedev) +{ + int i; + + for (i = 0; i < vbasedev->num_regions; i++) { + g_free(vbasedev->reginfo[i]); + } + g_free(vbasedev->reginfo); + vbasedev->reginfo = NULL; + + QLIST_REMOVE(vbasedev, container_next); + QLIST_REMOVE(vbasedev, global_next); + vbasedev->bcontainer = NULL; +} + +/* + * Traditional ioctl() based io + */ + +static int vfio_device_io_device_feature(VFIODevice *vbasedev, + struct vfio_device_feature *feature) +{ + int ret; + + ret = ioctl(vbasedev->fd, VFIO_DEVICE_FEATURE, feature); + + return ret < 0 ? -errno : ret; +} + +static int vfio_device_io_get_region_info(VFIODevice *vbasedev, + struct vfio_region_info *info) +{ + int ret; + + ret = ioctl(vbasedev->fd, VFIO_DEVICE_GET_REGION_INFO, info); + + return ret < 0 ? -errno : ret; +} + +static int vfio_device_io_get_irq_info(VFIODevice *vbasedev, + struct vfio_irq_info *info) +{ + int ret; + + ret = ioctl(vbasedev->fd, VFIO_DEVICE_GET_IRQ_INFO, info); + + return ret < 0 ? -errno : ret; +} + +static int vfio_device_io_set_irqs(VFIODevice *vbasedev, + struct vfio_irq_set *irqs) +{ + int ret; + + ret = ioctl(vbasedev->fd, VFIO_DEVICE_SET_IRQS, irqs); + + return ret < 0 ? -errno : ret; +} + +static int vfio_device_io_region_read(VFIODevice *vbasedev, uint8_t index, + off_t off, uint32_t size, void *data) +{ + struct vfio_region_info *info; + int ret; + + ret = vfio_device_get_region_info(vbasedev, index, &info); + if (ret != 0) { + return ret; + } + + ret = pread(vbasedev->fd, data, size, info->offset + off); + + return ret < 0 ? -errno : ret; +} + +static int vfio_device_io_region_write(VFIODevice *vbasedev, uint8_t index, + off_t off, uint32_t size, void *data) +{ + struct vfio_region_info *info; + int ret; + + ret = vfio_device_get_region_info(vbasedev, index, &info); + if (ret != 0) { + return ret; + } + + ret = pwrite(vbasedev->fd, data, size, info->offset + off); + + return ret < 0 ? -errno : ret; +} + +static VFIODeviceIOOps vfio_device_io_ops_ioctl = { + .device_feature = vfio_device_io_device_feature, + .get_region_info = vfio_device_io_get_region_info, + .get_irq_info = vfio_device_io_get_irq_info, + .set_irqs = vfio_device_io_set_irqs, + .region_read = vfio_device_io_region_read, + .region_write = vfio_device_io_region_write, +}; diff --git a/hw/vfio/display.c b/hw/vfio/display.c index f3e6581..9c6f5aa 100644 --- a/hw/vfio/display.c +++ b/hw/vfio/display.c @@ -213,6 +213,7 @@ static VFIODMABuf *vfio_display_get_dmabuf(VFIOPCIDevice *vdev, struct vfio_device_gfx_plane_info plane; VFIODMABuf *dmabuf; int fd, ret; + uint32_t offset = 0; memset(&plane, 0, sizeof(plane)); plane.argsz = sizeof(plane); @@ -245,10 +246,10 @@ static VFIODMABuf *vfio_display_get_dmabuf(VFIOPCIDevice *vdev, dmabuf = g_new0(VFIODMABuf, 1); dmabuf->dmabuf_id = plane.dmabuf_id; - dmabuf->buf = qemu_dmabuf_new(plane.width, plane.height, - plane.stride, 0, 0, plane.width, + dmabuf->buf = qemu_dmabuf_new(plane.width, plane.height, &offset, + &plane.stride, 0, 0, plane.width, plane.height, plane.drm_format, - plane.drm_format_mod, fd, false, false); + plane.drm_format_mod, &fd, 1, false, false); if (plane_type == DRM_PLANE_TYPE_CURSOR) { vfio_display_update_cursor(dmabuf, &plane); diff --git a/hw/vfio/igd.c b/hw/vfio/igd.c index d7e4728..e7952d1 100644 --- a/hw/vfio/igd.c +++ b/hw/vfio/igd.c @@ -103,6 +103,7 @@ static int igd_gen(VFIOPCIDevice *vdev) /* * Unfortunately, Intel changes it's specification quite often. This makes * it impossible to use a suitable default value for unknown devices. + * Return -1 for not applying any generation-specific quirks. */ return -1; } @@ -182,16 +183,13 @@ static bool vfio_pci_igd_opregion_init(VFIOPCIDevice *vdev, trace_vfio_pci_igd_opregion_enabled(vdev->vbasedev.name); - pci_set_long(vdev->pdev.config + IGD_ASLS, 0); - pci_set_long(vdev->pdev.wmask + IGD_ASLS, ~0); - pci_set_long(vdev->emulated_config_bits + IGD_ASLS, ~0); - return true; } -static bool vfio_pci_igd_setup_opregion(VFIOPCIDevice *vdev, Error **errp) +static bool vfio_pci_igd_opregion_detect(VFIOPCIDevice *vdev, + struct vfio_region_info **opregion, + Error **errp) { - g_autofree struct vfio_region_info *opregion = NULL; int ret; /* Hotplugging is not supported for opregion access */ @@ -202,17 +200,13 @@ static bool vfio_pci_igd_setup_opregion(VFIOPCIDevice *vdev, Error **errp) ret = vfio_device_get_region_info_type(&vdev->vbasedev, VFIO_REGION_TYPE_PCI_VENDOR_TYPE | PCI_VENDOR_ID_INTEL, - VFIO_REGION_SUBTYPE_INTEL_IGD_OPREGION, &opregion); + VFIO_REGION_SUBTYPE_INTEL_IGD_OPREGION, opregion); if (ret) { error_setg_errno(errp, -ret, "Device does not supports IGD OpRegion feature"); return false; } - if (!vfio_pci_igd_opregion_init(vdev, opregion, errp)) { - return false; - } - return true; } @@ -355,8 +349,8 @@ static int vfio_pci_igd_lpc_init(VFIOPCIDevice *vdev, static bool vfio_pci_igd_setup_lpc_bridge(VFIOPCIDevice *vdev, Error **errp) { - g_autofree struct vfio_region_info *host = NULL; - g_autofree struct vfio_region_info *lpc = NULL; + struct vfio_region_info *host = NULL; + struct vfio_region_info *lpc = NULL; PCIDevice *lpc_bridge; int ret; @@ -419,6 +413,44 @@ static bool vfio_pci_igd_setup_lpc_bridge(VFIOPCIDevice *vdev, Error **errp) return true; } +static bool vfio_pci_igd_override_gms(int gen, uint32_t gms, uint32_t *gmch) +{ + bool ret = false; + + if (gen == -1) { + error_report("x-igd-gms is not supported on this device"); + } else if (gen < 8) { + if (gms <= 0x10) { + *gmch &= ~(IGD_GMCH_GEN6_GMS_MASK << IGD_GMCH_GEN6_GMS_SHIFT); + *gmch |= gms << IGD_GMCH_GEN6_GMS_SHIFT; + ret = true; + } else { + error_report(QERR_INVALID_PARAMETER_VALUE, "x-igd-gms", "0~0x10"); + } + } else if (gen == 8) { + if (gms <= 0x40) { + *gmch &= ~(IGD_GMCH_GEN8_GMS_MASK << IGD_GMCH_GEN8_GMS_SHIFT); + *gmch |= gms << IGD_GMCH_GEN8_GMS_SHIFT; + ret = true; + } else { + error_report(QERR_INVALID_PARAMETER_VALUE, "x-igd-gms", "0~0x40"); + } + } else { + /* 0x0 to 0x40: 32MB increments starting at 0MB */ + /* 0xf0 to 0xfe: 4MB increments starting at 4MB */ + if ((gms <= 0x40) || (gms >= 0xf0 && gms <= 0xfe)) { + *gmch &= ~(IGD_GMCH_GEN8_GMS_MASK << IGD_GMCH_GEN8_GMS_SHIFT); + *gmch |= gms << IGD_GMCH_GEN8_GMS_SHIFT; + ret = true; + } else { + error_report(QERR_INVALID_PARAMETER_VALUE, + "x-igd-gms", "0~0x40 or 0xf0~0xfe"); + } + } + + return ret; +} + #define IGD_GGC_MMIO_OFFSET 0x108040 #define IGD_BDSM_MMIO_OFFSET 0x1080C0 @@ -428,41 +460,35 @@ void vfio_probe_igd_bar0_quirk(VFIOPCIDevice *vdev, int nr) VFIOConfigMirrorQuirk *ggc_mirror, *bdsm_mirror; int gen; - /* - * This must be an Intel VGA device at address 00:02.0 for us to even - * consider enabling legacy mode. Some driver have dependencies on the PCI - * bus address. - */ if (!vfio_pci_is(vdev, PCI_VENDOR_ID_INTEL, PCI_ANY_ID) || !vfio_is_vga(vdev) || nr != 0) { return; } - /* - * Only on IGD devices of gen 11 and above, the BDSM register is mirrored - * into MMIO space and read from MMIO space by the Windows driver. - */ + /* Only on IGD Gen6-12 device needs quirks in BAR 0 */ gen = igd_gen(vdev); if (gen < 6) { return; } - ggc_quirk = vfio_quirk_alloc(1); - ggc_mirror = ggc_quirk->data = g_malloc0(sizeof(*ggc_mirror)); - ggc_mirror->mem = ggc_quirk->mem; - ggc_mirror->vdev = vdev; - ggc_mirror->bar = nr; - ggc_mirror->offset = IGD_GGC_MMIO_OFFSET; - ggc_mirror->config_offset = IGD_GMCH; - - memory_region_init_io(ggc_mirror->mem, OBJECT(vdev), - &vfio_generic_mirror_quirk, ggc_mirror, - "vfio-igd-ggc-quirk", 2); - memory_region_add_subregion_overlap(vdev->bars[nr].region.mem, - ggc_mirror->offset, ggc_mirror->mem, - 1); - - QLIST_INSERT_HEAD(&vdev->bars[nr].quirks, ggc_quirk, next); + if (vdev->igd_gms) { + ggc_quirk = vfio_quirk_alloc(1); + ggc_mirror = ggc_quirk->data = g_malloc0(sizeof(*ggc_mirror)); + ggc_mirror->mem = ggc_quirk->mem; + ggc_mirror->vdev = vdev; + ggc_mirror->bar = nr; + ggc_mirror->offset = IGD_GGC_MMIO_OFFSET; + ggc_mirror->config_offset = IGD_GMCH; + + memory_region_init_io(ggc_mirror->mem, OBJECT(vdev), + &vfio_generic_mirror_quirk, ggc_mirror, + "vfio-igd-ggc-quirk", 2); + memory_region_add_subregion_overlap(vdev->bars[nr].region.mem, + ggc_mirror->offset, ggc_mirror->mem, + 1); + + QLIST_INSERT_HEAD(&vdev->bars[nr].quirks, ggc_quirk, next); + } bdsm_quirk = vfio_quirk_alloc(1); bdsm_mirror = bdsm_quirk->data = g_malloc0(sizeof(*bdsm_mirror)); @@ -484,44 +510,37 @@ void vfio_probe_igd_bar0_quirk(VFIOPCIDevice *vdev, int nr) static bool vfio_pci_igd_config_quirk(VFIOPCIDevice *vdev, Error **errp) { + struct vfio_region_info *opregion = NULL; int ret, gen; - uint64_t gms_size; + uint64_t gms_size = 0; uint64_t *bdsm_size; uint32_t gmch; bool legacy_mode_enabled = false; Error *err = NULL; - /* - * This must be an Intel VGA device at address 00:02.0 for us to even - * consider enabling legacy mode. The vBIOS has dependencies on the - * PCI bus address. - */ if (!vfio_pci_is(vdev, PCI_VENDOR_ID_INTEL, PCI_ANY_ID) || !vfio_is_vga(vdev)) { return true; } - /* - * IGD is not a standard, they like to change their specs often. We - * only attempt to support back to SandBridge and we hope that newer - * devices maintain compatibility with generation 8. - */ - gen = igd_gen(vdev); - if (gen == -1) { - error_report("IGD device %s is unsupported in legacy mode, " - "try SandyBridge or newer", vdev->vbasedev.name); + /* IGD device always comes with OpRegion */ + if (!vfio_pci_igd_opregion_detect(vdev, &opregion, errp)) { return true; } + info_report("OpRegion detected on Intel display %x.", vdev->device_id); + gen = igd_gen(vdev); gmch = vfio_pci_read_config(&vdev->pdev, IGD_GMCH, 4); /* * For backward compatibility, enable legacy mode when + * - Device geneation is 6 to 9 (including both) * - Machine type is i440fx (pc_piix) * - IGD device is at guest BDF 00:02.0 * - Not manually disabled by x-igd-legacy-mode=off */ if ((vdev->igd_legacy_mode != ON_OFF_AUTO_OFF) && + (gen >= 6 && gen <= 9) && !strcmp(MACHINE_GET_CLASS(qdev_get_machine())->family, "pc_piix") && (&vdev->pdev == pci_find_device(pci_device_root_bus(&vdev->pdev), 0, PCI_DEVFN(0x2, 0)))) { @@ -532,7 +551,7 @@ static bool vfio_pci_igd_config_quirk(VFIOPCIDevice *vdev, Error **errp) * - OpRegion * - Same LPC bridge and Host bridge VID/DID/SVID/SSID as host */ - g_autofree struct vfio_region_info *rom = NULL; + struct vfio_region_info *rom = NULL; legacy_mode_enabled = true; info_report("IGD legacy mode enabled, " @@ -566,13 +585,15 @@ static bool vfio_pci_igd_config_quirk(VFIOPCIDevice *vdev, Error **errp) vdev->features |= VFIO_FEATURE_ENABLE_IGD_LPC; } else if (vdev->igd_legacy_mode == ON_OFF_AUTO_ON) { error_setg(&err, - "Machine is not i440fx or assigned BDF is not 00:02.0"); + "Machine is not i440fx, assigned BDF is not 00:02.0, " + "or device %04x (gen %d) doesn't support legacy mode", + vdev->device_id, gen); goto error; } /* Setup OpRegion access */ if ((vdev->features & VFIO_FEATURE_ENABLE_IGD_OPREGION) && - !vfio_pci_igd_setup_opregion(vdev, errp)) { + !vfio_pci_igd_opregion_init(vdev, opregion, errp)) { goto error; } @@ -580,7 +601,15 @@ static bool vfio_pci_igd_config_quirk(VFIOPCIDevice *vdev, Error **errp) if ((vdev->features & VFIO_FEATURE_ENABLE_IGD_LPC) && !vfio_pci_igd_setup_lpc_bridge(vdev, errp)) { goto error; - } + } + + /* + * ASLS (OpRegion address) is read-only, emulated + * It contains HPA, guest firmware need to reprogram it with GPA. + */ + pci_set_long(vdev->pdev.config + IGD_ASLS, 0); + pci_set_long(vdev->pdev.wmask + IGD_ASLS, ~0); + pci_set_long(vdev->emulated_config_bits + IGD_ASLS, ~0); /* * Allow user to override dsm size using x-igd-gms option, in multiples of @@ -588,56 +617,44 @@ static bool vfio_pci_igd_config_quirk(VFIOPCIDevice *vdev, Error **errp) * set from DVMT Pre-Allocated option in host BIOS. */ if (vdev->igd_gms) { - if (gen < 8) { - if (vdev->igd_gms <= 0x10) { - gmch &= ~(IGD_GMCH_GEN6_GMS_MASK << IGD_GMCH_GEN6_GMS_SHIFT); - gmch |= vdev->igd_gms << IGD_GMCH_GEN6_GMS_SHIFT; - } else { - error_report(QERR_INVALID_PARAMETER_VALUE, - "x-igd-gms", "0~0x10"); - } - } else { - if (vdev->igd_gms <= 0x40) { - gmch &= ~(IGD_GMCH_GEN8_GMS_MASK << IGD_GMCH_GEN8_GMS_SHIFT); - gmch |= vdev->igd_gms << IGD_GMCH_GEN8_GMS_SHIFT; - } else { - error_report(QERR_INVALID_PARAMETER_VALUE, - "x-igd-gms", "0~0x40"); - } + if (!vfio_pci_igd_override_gms(gen, vdev->igd_gms, &gmch)) { + return false; } + + /* GMCH is read-only, emulated */ + pci_set_long(vdev->pdev.config + IGD_GMCH, gmch); + pci_set_long(vdev->pdev.wmask + IGD_GMCH, 0); + pci_set_long(vdev->emulated_config_bits + IGD_GMCH, ~0); } - gms_size = igd_stolen_memory_size(gen, gmch); + if (gen > 0) { + gms_size = igd_stolen_memory_size(gen, gmch); + + /* BDSM is read-write, emulated. BIOS needs to be able to write it */ + if (gen < 11) { + pci_set_long(vdev->pdev.config + IGD_BDSM, 0); + pci_set_long(vdev->pdev.wmask + IGD_BDSM, ~0); + pci_set_long(vdev->emulated_config_bits + IGD_BDSM, ~0); + } else { + pci_set_quad(vdev->pdev.config + IGD_BDSM_GEN11, 0); + pci_set_quad(vdev->pdev.wmask + IGD_BDSM_GEN11, ~0); + pci_set_quad(vdev->emulated_config_bits + IGD_BDSM_GEN11, ~0); + } + } /* * Request reserved memory for stolen memory via fw_cfg. VM firmware * must allocate a 1MB aligned reserved memory region below 4GB with - * the requested size (in bytes) for use by the Intel PCI class VGA - * device at VM address 00:02.0. The base address of this reserved - * memory region must be written to the device BDSM register at PCI - * config offset 0x5C. + * the requested size (in bytes) for use by the IGD device. The base + * address of this reserved memory region must be written to the + * device BDSM register. + * For newer device without BDSM register, this fw_cfg item is 0. */ bdsm_size = g_malloc(sizeof(*bdsm_size)); *bdsm_size = cpu_to_le64(gms_size); fw_cfg_add_file(fw_cfg_find(), "etc/igd-bdsm-size", bdsm_size, sizeof(*bdsm_size)); - /* GMCH is read-only, emulated */ - pci_set_long(vdev->pdev.config + IGD_GMCH, gmch); - pci_set_long(vdev->pdev.wmask + IGD_GMCH, 0); - pci_set_long(vdev->emulated_config_bits + IGD_GMCH, ~0); - - /* BDSM is read-write, emulated. The BIOS needs to be able to write it */ - if (gen < 11) { - pci_set_long(vdev->pdev.config + IGD_BDSM, 0); - pci_set_long(vdev->pdev.wmask + IGD_BDSM, ~0); - pci_set_long(vdev->emulated_config_bits + IGD_BDSM, ~0); - } else { - pci_set_quad(vdev->pdev.config + IGD_BDSM_GEN11, 0); - pci_set_quad(vdev->pdev.wmask + IGD_BDSM_GEN11, ~0); - pci_set_quad(vdev->emulated_config_bits + IGD_BDSM_GEN11, ~0); - } - trace_vfio_pci_igd_bdsm_enabled(vdev->vbasedev.name, (gms_size / MiB)); return true; @@ -664,8 +681,27 @@ error: */ static bool vfio_pci_kvmgt_config_quirk(VFIOPCIDevice *vdev, Error **errp) { + struct vfio_region_info *opregion = NULL; + int gen; + + if (!vfio_pci_is(vdev, PCI_VENDOR_ID_INTEL, PCI_ANY_ID) || + !vfio_is_vga(vdev)) { + return true; + } + + /* FIXME: Cherryview is Gen8, but don't support GVT-g */ + gen = igd_gen(vdev); + if (gen != 8 && gen != 9) { + return true; + } + + if (!vfio_pci_igd_opregion_detect(vdev, &opregion, errp)) { + /* Should never reach here, KVMGT always emulates OpRegion */ + return false; + } + if ((vdev->features & VFIO_FEATURE_ENABLE_IGD_OPREGION) && - !vfio_pci_igd_setup_opregion(vdev, errp)) { + !vfio_pci_igd_opregion_init(vdev, opregion, errp)) { return false; } diff --git a/hw/vfio/iommufd.c b/hw/vfio/iommufd.c index 232c06d..af1c7ab 100644 --- a/hw/vfio/iommufd.c +++ b/hw/vfio/iommufd.c @@ -46,11 +46,28 @@ static int iommufd_cdev_map(const VFIOContainerBase *bcontainer, hwaddr iova, static int iommufd_cdev_unmap(const VFIOContainerBase *bcontainer, hwaddr iova, ram_addr_t size, - IOMMUTLBEntry *iotlb) + IOMMUTLBEntry *iotlb, bool unmap_all) { const VFIOIOMMUFDContainer *container = container_of(bcontainer, VFIOIOMMUFDContainer, bcontainer); + /* unmap in halves */ + if (unmap_all) { + Int128 llsize = int128_rshift(int128_2_64(), 1); + int ret; + + ret = iommufd_backend_unmap_dma(container->be, container->ioas_id, + 0, int128_get64(llsize)); + + if (ret == 0) { + ret = iommufd_backend_unmap_dma(container->be, container->ioas_id, + int128_get64(llsize), + int128_get64(llsize)); + } + + return ret; + } + /* TODO: Handle dma_unmap_bitmap with iotlb args (migration) */ return iommufd_backend_unmap_dma(container->be, container->ioas_id, iova, size); @@ -588,14 +605,7 @@ found_container: iommufd_cdev_ram_block_discard_disable(false); } - vbasedev->group = 0; - vbasedev->num_irqs = dev_info.num_irqs; - vbasedev->num_regions = dev_info.num_regions; - vbasedev->flags = dev_info.flags; - vbasedev->reset_works = !!(dev_info.flags & VFIO_DEVICE_FLAGS_RESET); - vbasedev->bcontainer = bcontainer; - QLIST_INSERT_HEAD(&bcontainer->device_list, vbasedev, container_next); - QLIST_INSERT_HEAD(&vfio_device_list, vbasedev, global_next); + vfio_device_prepare(vbasedev, bcontainer, &dev_info); trace_iommufd_cdev_device_info(vbasedev->name, devfd, vbasedev->num_irqs, vbasedev->num_regions, vbasedev->flags); @@ -622,9 +632,7 @@ static void iommufd_cdev_detach(VFIODevice *vbasedev) VFIOIOMMUFDContainer *container = container_of(bcontainer, VFIOIOMMUFDContainer, bcontainer); - QLIST_REMOVE(vbasedev, global_next); - QLIST_REMOVE(vbasedev, container_next); - vbasedev->bcontainer = NULL; + vfio_device_unprepare(vbasedev); if (!vbasedev->ram_block_discard_allowed) { iommufd_cdev_ram_block_discard_disable(false); diff --git a/hw/vfio/listener.c b/hw/vfio/listener.c index 6f77e18..bfacb3d 100644 --- a/hw/vfio/listener.c +++ b/hw/vfio/listener.c @@ -172,7 +172,7 @@ static void vfio_iommu_map_notify(IOMMUNotifier *n, IOMMUTLBEntry *iotlb) } } else { ret = vfio_container_dma_unmap(bcontainer, iova, - iotlb->addr_mask + 1, iotlb); + iotlb->addr_mask + 1, iotlb, false); if (ret) { error_setg(&local_err, "vfio_container_dma_unmap(%p, 0x%"HWADDR_PRIx", " @@ -201,7 +201,7 @@ static void vfio_ram_discard_notify_discard(RamDiscardListener *rdl, int ret; /* Unmap with a single call. */ - ret = vfio_container_dma_unmap(bcontainer, iova, size , NULL); + ret = vfio_container_dma_unmap(bcontainer, iova, size , NULL, false); if (ret) { error_report("%s: vfio_container_dma_unmap() failed: %s", __func__, strerror(-ret)); @@ -411,6 +411,32 @@ static bool vfio_get_section_iova_range(VFIOContainerBase *bcontainer, return true; } +static void vfio_listener_begin(MemoryListener *listener) +{ + VFIOContainerBase *bcontainer = container_of(listener, VFIOContainerBase, + listener); + void (*listener_begin)(VFIOContainerBase *bcontainer); + + listener_begin = VFIO_IOMMU_GET_CLASS(bcontainer)->listener_begin; + + if (listener_begin) { + listener_begin(bcontainer); + } +} + +static void vfio_listener_commit(MemoryListener *listener) +{ + VFIOContainerBase *bcontainer = container_of(listener, VFIOContainerBase, + listener); + void (*listener_commit)(VFIOContainerBase *bcontainer); + + listener_commit = VFIO_IOMMU_GET_CLASS(bcontainer)->listener_begin; + + if (listener_commit) { + listener_commit(bcontainer); + } +} + static void vfio_device_error_append(VFIODevice *vbasedev, Error **errp) { /* @@ -634,21 +660,14 @@ static void vfio_listener_region_del(MemoryListener *listener, } if (try_unmap) { + bool unmap_all = false; + if (int128_eq(llsize, int128_2_64())) { - /* The unmap ioctl doesn't accept a full 64-bit span. */ - llsize = int128_rshift(llsize, 1); - ret = vfio_container_dma_unmap(bcontainer, iova, - int128_get64(llsize), NULL); - if (ret) { - error_report("vfio_container_dma_unmap(%p, 0x%"HWADDR_PRIx", " - "0x%"HWADDR_PRIx") = %d (%s)", - bcontainer, iova, int128_get64(llsize), ret, - strerror(-ret)); - } - iova += int128_get64(llsize); + unmap_all = true; + llsize = int128_zero(); } - ret = vfio_container_dma_unmap(bcontainer, iova, - int128_get64(llsize), NULL); + ret = vfio_container_dma_unmap(bcontainer, iova, int128_get64(llsize), + NULL, unmap_all); if (ret) { error_report("vfio_container_dma_unmap(%p, 0x%"HWADDR_PRIx", " "0x%"HWADDR_PRIx") = %d (%s)", @@ -801,13 +820,17 @@ static void vfio_devices_dma_logging_stop(VFIOContainerBase *bcontainer) VFIO_DEVICE_FEATURE_DMA_LOGGING_STOP; QLIST_FOREACH(vbasedev, &bcontainer->device_list, container_next) { + int ret; + if (!vbasedev->dirty_tracking) { continue; } - if (ioctl(vbasedev->fd, VFIO_DEVICE_FEATURE, feature)) { + ret = vbasedev->io_ops->device_feature(vbasedev, feature); + + if (ret != 0) { warn_report("%s: Failed to stop DMA logging, err %d (%s)", - vbasedev->name, -errno, strerror(errno)); + vbasedev->name, -ret, strerror(-ret)); } vbasedev->dirty_tracking = false; } @@ -908,10 +931,9 @@ static bool vfio_devices_dma_logging_start(VFIOContainerBase *bcontainer, continue; } - ret = ioctl(vbasedev->fd, VFIO_DEVICE_FEATURE, feature); + ret = vbasedev->io_ops->device_feature(vbasedev, feature); if (ret) { - ret = -errno; - error_setg_errno(errp, errno, "%s: Failed to start DMA logging", + error_setg_errno(errp, -ret, "%s: Failed to start DMA logging", vbasedev->name); goto out; } @@ -1165,6 +1187,8 @@ static void vfio_listener_log_sync(MemoryListener *listener, static const MemoryListener vfio_memory_listener = { .name = "vfio", + .begin = vfio_listener_begin, + .commit = vfio_listener_commit, .region_add = vfio_listener_region_add, .region_del = vfio_listener_region_del, .log_global_start = vfio_listener_log_global_start, diff --git a/hw/vfio/pci.c b/hw/vfio/pci.c index 6908bcc..a1bfdfe 100644 --- a/hw/vfio/pci.c +++ b/hw/vfio/pci.c @@ -241,7 +241,7 @@ static void vfio_intx_update(VFIOPCIDevice *vdev, PCIINTxRoute *route) static void vfio_intx_routing_notifier(PCIDevice *pdev) { - VFIOPCIDevice *vdev = VFIO_PCI(pdev); + VFIOPCIDevice *vdev = VFIO_PCI_BASE(pdev); PCIINTxRoute route; if (vdev->interrupt != VFIO_INT_INTx) { @@ -381,7 +381,7 @@ static void vfio_msi_interrupt(void *opaque) static int vfio_enable_msix_no_vec(VFIOPCIDevice *vdev) { g_autofree struct vfio_irq_set *irq_set = NULL; - int ret = 0, argsz; + int argsz; int32_t *fd; argsz = sizeof(*irq_set) + sizeof(*fd); @@ -396,9 +396,7 @@ static int vfio_enable_msix_no_vec(VFIOPCIDevice *vdev) fd = (int32_t *)&irq_set->data; *fd = -1; - ret = ioctl(vdev->vbasedev.fd, VFIO_DEVICE_SET_IRQS, irq_set); - - return ret; + return vdev->vbasedev.io_ops->set_irqs(&vdev->vbasedev, irq_set); } static int vfio_enable_vectors(VFIOPCIDevice *vdev, bool msix) @@ -455,7 +453,7 @@ static int vfio_enable_vectors(VFIOPCIDevice *vdev, bool msix) fds[i] = fd; } - ret = ioctl(vdev->vbasedev.fd, VFIO_DEVICE_SET_IRQS, irq_set); + ret = vdev->vbasedev.io_ops->set_irqs(&vdev->vbasedev, irq_set); g_free(irq_set); @@ -516,7 +514,7 @@ static void vfio_update_kvm_msi_virq(VFIOMSIVector *vector, MSIMessage msg, static int vfio_msix_vector_do_use(PCIDevice *pdev, unsigned int nr, MSIMessage *msg, IOHandler *handler) { - VFIOPCIDevice *vdev = VFIO_PCI(pdev); + VFIOPCIDevice *vdev = VFIO_PCI_BASE(pdev); VFIOMSIVector *vector; int ret; bool resizing = !!(vdev->nr_vectors < nr + 1); @@ -581,7 +579,8 @@ static int vfio_msix_vector_do_use(PCIDevice *pdev, unsigned int nr, vfio_device_irq_disable(&vdev->vbasedev, VFIO_PCI_MSIX_IRQ_INDEX); ret = vfio_enable_vectors(vdev, true); if (ret) { - error_report("vfio: failed to enable vectors, %d", ret); + error_report("vfio: failed to enable vectors, %s", + strerror(-ret)); } } else { Error *err = NULL; @@ -621,7 +620,7 @@ static int vfio_msix_vector_use(PCIDevice *pdev, static void vfio_msix_vector_release(PCIDevice *pdev, unsigned int nr) { - VFIOPCIDevice *vdev = VFIO_PCI(pdev); + VFIOPCIDevice *vdev = VFIO_PCI_BASE(pdev); VFIOMSIVector *vector = &vdev->msi_vectors[nr]; trace_vfio_msix_vector_release(vdev->vbasedev.name, nr); @@ -695,7 +694,8 @@ static void vfio_msix_enable(VFIOPCIDevice *vdev) if (vdev->nr_vectors) { ret = vfio_enable_vectors(vdev, true); if (ret) { - error_report("vfio: failed to enable vectors, %d", ret); + error_report("vfio: failed to enable vectors, %s", + strerror(-ret)); } } else { /* @@ -712,7 +712,8 @@ static void vfio_msix_enable(VFIOPCIDevice *vdev) */ ret = vfio_enable_msix_no_vec(vdev); if (ret) { - error_report("vfio: failed to enable MSI-X, %d", ret); + error_report("vfio: failed to enable MSI-X, %s", + strerror(-ret)); } } @@ -765,7 +766,8 @@ retry: ret = vfio_enable_vectors(vdev, false); if (ret) { if (ret < 0) { - error_report("vfio: Error: Failed to setup MSI fds: %m"); + error_report("vfio: Error: Failed to setup MSI fds: %s", + strerror(-ret)); } else { error_report("vfio: Error: Failed to enable %d " "MSI vectors, retry with %d", vdev->nr_vectors, ret); @@ -881,18 +883,22 @@ static void vfio_update_msi(VFIOPCIDevice *vdev) static void vfio_pci_load_rom(VFIOPCIDevice *vdev) { - g_autofree struct vfio_region_info *reg_info = NULL; + VFIODevice *vbasedev = &vdev->vbasedev; + struct vfio_region_info *reg_info = NULL; uint64_t size; off_t off = 0; ssize_t bytes; + int ret; - if (vfio_device_get_region_info(&vdev->vbasedev, - VFIO_PCI_ROM_REGION_INDEX, ®_info)) { - error_report("vfio: Error getting ROM info: %m"); + ret = vfio_device_get_region_info(vbasedev, VFIO_PCI_ROM_REGION_INDEX, + ®_info); + + if (ret != 0) { + error_report("vfio: Error getting ROM info: %s", strerror(-ret)); return; } - trace_vfio_pci_load_rom(vdev->vbasedev.name, (unsigned long)reg_info->size, + trace_vfio_pci_load_rom(vbasedev->name, (unsigned long)reg_info->size, (unsigned long)reg_info->offset, (unsigned long)reg_info->flags); @@ -901,8 +907,7 @@ static void vfio_pci_load_rom(VFIOPCIDevice *vdev) if (!vdev->rom_size) { vdev->rom_read_failed = true; - error_report("vfio-pci: Cannot read device rom at " - "%s", vdev->vbasedev.name); + error_report("vfio-pci: Cannot read device rom at %s", vbasedev->name); error_printf("Device option ROM contents are probably invalid " "(check dmesg).\nSkip option ROM probe with rombar=0, " "or load from file with romfile=\n"); @@ -913,18 +918,22 @@ static void vfio_pci_load_rom(VFIOPCIDevice *vdev) memset(vdev->rom, 0xff, size); while (size) { - bytes = pread(vdev->vbasedev.fd, vdev->rom + off, - size, vdev->rom_offset + off); + bytes = vbasedev->io_ops->region_read(vbasedev, + VFIO_PCI_ROM_REGION_INDEX, + off, size, vdev->rom + off); + if (bytes == 0) { break; } else if (bytes > 0) { off += bytes; size -= bytes; } else { - if (errno == EINTR || errno == EAGAIN) { + if (bytes == -EINTR || bytes == -EAGAIN) { continue; } - error_report("vfio: Error reading device ROM: %m"); + error_report("vfio: Error reading device ROM: %s", + strreaderror(bytes)); + break; } } @@ -960,6 +969,24 @@ static void vfio_pci_load_rom(VFIOPCIDevice *vdev) } } +/* "Raw" read of underlying config space. */ +static int vfio_pci_config_space_read(VFIOPCIDevice *vdev, off_t offset, + uint32_t size, void *data) +{ + return vdev->vbasedev.io_ops->region_read(&vdev->vbasedev, + VFIO_PCI_CONFIG_REGION_INDEX, + offset, size, data); +} + +/* "Raw" write of underlying config space. */ +static int vfio_pci_config_space_write(VFIOPCIDevice *vdev, off_t offset, + uint32_t size, void *data) +{ + return vdev->vbasedev.io_ops->region_write(&vdev->vbasedev, + VFIO_PCI_CONFIG_REGION_INDEX, + offset, size, data); +} + static uint64_t vfio_rom_read(void *opaque, hwaddr addr, unsigned size) { VFIOPCIDevice *vdev = opaque; @@ -1012,10 +1039,9 @@ static const MemoryRegionOps vfio_rom_ops = { static void vfio_pci_size_rom(VFIOPCIDevice *vdev) { + VFIODevice *vbasedev = &vdev->vbasedev; uint32_t orig, size = cpu_to_le32((uint32_t)PCI_ROM_ADDRESS_MASK); - off_t offset = vdev->config_offset + PCI_ROM_ADDRESS; char *name; - int fd = vdev->vbasedev.fd; if (vdev->pdev.romfile || !vdev->pdev.rom_bar) { /* Since pci handles romfile, just print a message and return */ @@ -1032,11 +1058,12 @@ static void vfio_pci_size_rom(VFIOPCIDevice *vdev) * Use the same size ROM BAR as the physical device. The contents * will get filled in later when the guest tries to read it. */ - if (pread(fd, &orig, 4, offset) != 4 || - pwrite(fd, &size, 4, offset) != 4 || - pread(fd, &size, 4, offset) != 4 || - pwrite(fd, &orig, 4, offset) != 4) { - error_report("%s(%s) failed: %m", __func__, vdev->vbasedev.name); + if (vfio_pci_config_space_read(vdev, PCI_ROM_ADDRESS, 4, &orig) != 4 || + vfio_pci_config_space_write(vdev, PCI_ROM_ADDRESS, 4, &size) != 4 || + vfio_pci_config_space_read(vdev, PCI_ROM_ADDRESS, 4, &size) != 4 || + vfio_pci_config_space_write(vdev, PCI_ROM_ADDRESS, 4, &orig) != 4) { + + error_report("%s(%s) ROM access failed", __func__, vbasedev->name); return; } @@ -1169,7 +1196,7 @@ static const MemoryRegionOps vfio_vga_ops = { */ static void vfio_sub_page_bar_update_mapping(PCIDevice *pdev, int bar) { - VFIOPCIDevice *vdev = VFIO_PCI(pdev); + VFIOPCIDevice *vdev = VFIO_PCI_BASE(pdev); VFIORegion *region = &vdev->bars[bar].region; MemoryRegion *mmap_mr, *region_mr, *base_mr; PCIIORegion *r; @@ -1215,7 +1242,8 @@ static void vfio_sub_page_bar_update_mapping(PCIDevice *pdev, int bar) */ uint32_t vfio_pci_read_config(PCIDevice *pdev, uint32_t addr, int len) { - VFIOPCIDevice *vdev = VFIO_PCI(pdev); + VFIOPCIDevice *vdev = VFIO_PCI_BASE(pdev); + VFIODevice *vbasedev = &vdev->vbasedev; uint32_t emu_bits = 0, emu_val = 0, phys_val = 0, val; memcpy(&emu_bits, vdev->emulated_config_bits + addr, len); @@ -1228,12 +1256,12 @@ uint32_t vfio_pci_read_config(PCIDevice *pdev, uint32_t addr, int len) if (~emu_bits & (0xffffffffU >> (32 - len * 8))) { ssize_t ret; - ret = pread(vdev->vbasedev.fd, &phys_val, len, - vdev->config_offset + addr); + ret = vfio_pci_config_space_read(vdev, addr, len, &phys_val); if (ret != len) { - error_report("%s(%s, 0x%x, 0x%x) failed: %m", - __func__, vdev->vbasedev.name, addr, len); - return -errno; + error_report("%s(%s, 0x%x, 0x%x) failed: %s", + __func__, vbasedev->name, addr, len, + strreaderror(ret)); + return -1; } phys_val = le32_to_cpu(phys_val); } @@ -1248,16 +1276,19 @@ uint32_t vfio_pci_read_config(PCIDevice *pdev, uint32_t addr, int len) void vfio_pci_write_config(PCIDevice *pdev, uint32_t addr, uint32_t val, int len) { - VFIOPCIDevice *vdev = VFIO_PCI(pdev); + VFIOPCIDevice *vdev = VFIO_PCI_BASE(pdev); + VFIODevice *vbasedev = &vdev->vbasedev; uint32_t val_le = cpu_to_le32(val); + int ret; trace_vfio_pci_write_config(vdev->vbasedev.name, addr, val, len); /* Write everything to VFIO, let it filter out what we can't write */ - if (pwrite(vdev->vbasedev.fd, &val_le, len, vdev->config_offset + addr) - != len) { - error_report("%s(%s, 0x%x, 0x%x, 0x%x) failed: %m", - __func__, vdev->vbasedev.name, addr, val, len); + ret = vfio_pci_config_space_write(vdev, addr, len, &val_le); + if (ret != len) { + error_report("%s(%s, 0x%x, 0x%x, 0x%x) failed: %s", + __func__, vbasedev->name, addr, val, len, + strwriteerror(ret)); } /* MSI/MSI-X Enabling/Disabling */ @@ -1345,9 +1376,11 @@ static bool vfio_msi_setup(VFIOPCIDevice *vdev, int pos, Error **errp) int ret, entries; Error *err = NULL; - if (pread(vdev->vbasedev.fd, &ctrl, sizeof(ctrl), - vdev->config_offset + pos + PCI_CAP_FLAGS) != sizeof(ctrl)) { - error_setg_errno(errp, errno, "failed reading MSI PCI_CAP_FLAGS"); + ret = vfio_pci_config_space_read(vdev, pos + PCI_CAP_FLAGS, + sizeof(ctrl), &ctrl); + if (ret != sizeof(ctrl)) { + error_setg(errp, "failed reading MSI PCI_CAP_FLAGS: %s", + strreaderror(ret)); return false; } ctrl = le16_to_cpu(ctrl); @@ -1554,31 +1587,35 @@ static bool vfio_msix_early_setup(VFIOPCIDevice *vdev, Error **errp) uint8_t pos; uint16_t ctrl; uint32_t table, pba; - int ret, fd = vdev->vbasedev.fd; - struct vfio_irq_info irq_info = { .argsz = sizeof(irq_info), - .index = VFIO_PCI_MSIX_IRQ_INDEX }; + struct vfio_irq_info irq_info; VFIOMSIXInfo *msix; + int ret; pos = pci_find_capability(&vdev->pdev, PCI_CAP_ID_MSIX); if (!pos) { return true; } - if (pread(fd, &ctrl, sizeof(ctrl), - vdev->config_offset + pos + PCI_MSIX_FLAGS) != sizeof(ctrl)) { - error_setg_errno(errp, errno, "failed to read PCI MSIX FLAGS"); + ret = vfio_pci_config_space_read(vdev, pos + PCI_MSIX_FLAGS, + sizeof(ctrl), &ctrl); + if (ret != sizeof(ctrl)) { + error_setg(errp, "failed to read PCI MSIX FLAGS: %s", + strreaderror(ret)); return false; } - if (pread(fd, &table, sizeof(table), - vdev->config_offset + pos + PCI_MSIX_TABLE) != sizeof(table)) { - error_setg_errno(errp, errno, "failed to read PCI MSIX TABLE"); + ret = vfio_pci_config_space_read(vdev, pos + PCI_MSIX_TABLE, + sizeof(table), &table); + if (ret != sizeof(table)) { + error_setg(errp, "failed to read PCI MSIX TABLE: %s", + strreaderror(ret)); return false; } - if (pread(fd, &pba, sizeof(pba), - vdev->config_offset + pos + PCI_MSIX_PBA) != sizeof(pba)) { - error_setg_errno(errp, errno, "failed to read PCI MSIX PBA"); + ret = vfio_pci_config_space_read(vdev, pos + PCI_MSIX_PBA, + sizeof(pba), &pba); + if (ret != sizeof(pba)) { + error_setg(errp, "failed to read PCI MSIX PBA: %s", strreaderror(ret)); return false; } @@ -1593,7 +1630,8 @@ static bool vfio_msix_early_setup(VFIOPCIDevice *vdev, Error **errp) msix->pba_offset = pba & ~PCI_MSIX_FLAGS_BIRMASK; msix->entries = (ctrl & PCI_MSIX_FLAGS_QSIZE) + 1; - ret = ioctl(vdev->vbasedev.fd, VFIO_DEVICE_GET_IRQ_INFO, &irq_info); + ret = vfio_device_get_irq_info(&vdev->vbasedev, VFIO_PCI_MSIX_IRQ_INDEX, + &irq_info); if (ret < 0) { error_setg_errno(errp, -ret, "failed to get MSI-X irq info"); g_free(msix); @@ -1737,10 +1775,10 @@ static void vfio_bar_prepare(VFIOPCIDevice *vdev, int nr) } /* Determine what type of BAR this is for registration */ - ret = pread(vdev->vbasedev.fd, &pci_bar, sizeof(pci_bar), - vdev->config_offset + PCI_BASE_ADDRESS_0 + (4 * nr)); + ret = vfio_pci_config_space_read(vdev, PCI_BASE_ADDRESS_0 + (4 * nr), + sizeof(pci_bar), &pci_bar); if (ret != sizeof(pci_bar)) { - error_report("vfio: Failed to read BAR %d (%m)", nr); + error_report("vfio: Failed to read BAR %d: %s", nr, strreaderror(ret)); return; } @@ -2443,21 +2481,23 @@ void vfio_pci_pre_reset(VFIOPCIDevice *vdev) void vfio_pci_post_reset(VFIOPCIDevice *vdev) { + VFIODevice *vbasedev = &vdev->vbasedev; Error *err = NULL; - int nr; + int ret, nr; if (!vfio_intx_enable(vdev, &err)) { error_reportf_err(err, VFIO_MSG_PREFIX, vdev->vbasedev.name); } for (nr = 0; nr < PCI_NUM_REGIONS - 1; ++nr) { - off_t addr = vdev->config_offset + PCI_BASE_ADDRESS_0 + (4 * nr); + off_t addr = PCI_BASE_ADDRESS_0 + (4 * nr); uint32_t val = 0; uint32_t len = sizeof(val); - if (pwrite(vdev->vbasedev.fd, &val, len, addr) != len) { - error_report("%s(%s) reset bar %d failed: %m", __func__, - vdev->vbasedev.name, nr); + ret = vfio_pci_config_space_write(vdev, addr, len, &val); + if (ret != len) { + error_report("%s(%s) reset bar %d failed: %s", __func__, + vbasedev->name, nr, strwriteerror(ret)); } } @@ -2670,7 +2710,7 @@ static VFIODeviceOps vfio_pci_ops = { bool vfio_populate_vga(VFIOPCIDevice *vdev, Error **errp) { VFIODevice *vbasedev = &vdev->vbasedev; - g_autofree struct vfio_region_info *reg_info = NULL; + struct vfio_region_info *reg_info = NULL; int ret; ret = vfio_device_get_region_info(vbasedev, VFIO_PCI_VGA_REGION_INDEX, ®_info); @@ -2735,8 +2775,8 @@ bool vfio_populate_vga(VFIOPCIDevice *vdev, Error **errp) static bool vfio_populate_device(VFIOPCIDevice *vdev, Error **errp) { VFIODevice *vbasedev = &vdev->vbasedev; - g_autofree struct vfio_region_info *reg_info = NULL; - struct vfio_irq_info irq_info = { .argsz = sizeof(irq_info) }; + struct vfio_region_info *reg_info = NULL; + struct vfio_irq_info irq_info; int i, ret = -1; /* Sanity check device */ @@ -2797,12 +2837,10 @@ static bool vfio_populate_device(VFIOPCIDevice *vdev, Error **errp) } } - irq_info.index = VFIO_PCI_ERR_IRQ_INDEX; - - ret = ioctl(vdev->vbasedev.fd, VFIO_DEVICE_GET_IRQ_INFO, &irq_info); + ret = vfio_device_get_irq_info(vbasedev, VFIO_PCI_ERR_IRQ_INDEX, &irq_info); if (ret) { /* This can fail for an old kernel or legacy PCI dev */ - trace_vfio_populate_device_get_irq_info_failure(strerror(errno)); + trace_vfio_populate_device_get_irq_info_failure(strerror(-ret)); } else if (irq_info.count == 1) { vdev->pci_aer = true; } else { @@ -2911,17 +2949,18 @@ static void vfio_req_notifier_handler(void *opaque) static void vfio_register_req_notifier(VFIOPCIDevice *vdev) { - struct vfio_irq_info irq_info = { .argsz = sizeof(irq_info), - .index = VFIO_PCI_REQ_IRQ_INDEX }; + struct vfio_irq_info irq_info; Error *err = NULL; int32_t fd; + int ret; if (!(vdev->features & VFIO_FEATURE_ENABLE_REQ)) { return; } - if (ioctl(vdev->vbasedev.fd, - VFIO_DEVICE_GET_IRQ_INFO, &irq_info) < 0 || irq_info.count < 1) { + ret = vfio_device_get_irq_info(&vdev->vbasedev, VFIO_PCI_REQ_IRQ_INDEX, + &irq_info); + if (ret < 0 || irq_info.count < 1) { return; } @@ -3090,11 +3129,12 @@ static bool vfio_interrupt_setup(VFIOPCIDevice *vdev, Error **errp) static void vfio_realize(PCIDevice *pdev, Error **errp) { ERRP_GUARD(); - VFIOPCIDevice *vdev = VFIO_PCI(pdev); + VFIOPCIDevice *vdev = VFIO_PCI_BASE(pdev); VFIODevice *vbasedev = &vdev->vbasedev; int i, ret; char uuid[UUID_STR_LEN]; g_autofree char *name = NULL; + uint32_t config_space_size; if (vbasedev->fd < 0 && !vbasedev->sysfsdev) { if (!(~vdev->host.domain || ~vdev->host.bus || @@ -3149,13 +3189,14 @@ static void vfio_realize(PCIDevice *pdev, Error **errp) goto error; } + config_space_size = MIN(pci_config_size(&vdev->pdev), vdev->config_size); + /* Get a copy of config space */ - ret = pread(vbasedev->fd, vdev->pdev.config, - MIN(pci_config_size(&vdev->pdev), vdev->config_size), - vdev->config_offset); - if (ret < (int)MIN(pci_config_size(&vdev->pdev), vdev->config_size)) { - ret = ret < 0 ? -errno : -EFAULT; - error_setg_errno(errp, -ret, "failed to read device config space"); + ret = vfio_pci_config_space_read(vdev, 0, config_space_size, + vdev->pdev.config); + if (ret < (int)config_space_size) { + ret = ret < 0 ? -ret : EFAULT; + error_setg_errno(errp, ret, "failed to read device config space"); goto error; } @@ -3259,7 +3300,7 @@ error: static void vfio_instance_finalize(Object *obj) { - VFIOPCIDevice *vdev = VFIO_PCI(obj); + VFIOPCIDevice *vdev = VFIO_PCI_BASE(obj); vfio_display_finalize(vdev); vfio_bars_finalize(vdev); @@ -3277,7 +3318,7 @@ static void vfio_instance_finalize(Object *obj) static void vfio_exitfn(PCIDevice *pdev) { - VFIOPCIDevice *vdev = VFIO_PCI(pdev); + VFIOPCIDevice *vdev = VFIO_PCI_BASE(pdev); VFIODevice *vbasedev = &vdev->vbasedev; vfio_unregister_req_notifier(vdev); @@ -3301,7 +3342,7 @@ static void vfio_exitfn(PCIDevice *pdev) static void vfio_pci_reset(DeviceState *dev) { - VFIOPCIDevice *vdev = VFIO_PCI(dev); + VFIOPCIDevice *vdev = VFIO_PCI_BASE(dev); trace_vfio_pci_reset(vdev->vbasedev.name); @@ -3341,7 +3382,7 @@ post_reset: static void vfio_instance_init(Object *obj) { PCIDevice *pci_dev = PCI_DEVICE(obj); - VFIOPCIDevice *vdev = VFIO_PCI(obj); + VFIOPCIDevice *vdev = VFIO_PCI_BASE(obj); VFIODevice *vbasedev = &vdev->vbasedev; device_add_bootindex_property(obj, &vdev->bootindex, @@ -3362,6 +3403,31 @@ static void vfio_instance_init(Object *obj) pci_dev->cap_present |= QEMU_PCI_CAP_EXPRESS; } +static void vfio_pci_base_dev_class_init(ObjectClass *klass, const void *data) +{ + DeviceClass *dc = DEVICE_CLASS(klass); + PCIDeviceClass *pdc = PCI_DEVICE_CLASS(klass); + + dc->desc = "VFIO PCI base device"; + set_bit(DEVICE_CATEGORY_MISC, dc->categories); + pdc->exit = vfio_exitfn; + pdc->config_read = vfio_pci_read_config; + pdc->config_write = vfio_pci_write_config; +} + +static const TypeInfo vfio_pci_base_dev_info = { + .name = TYPE_VFIO_PCI_BASE, + .parent = TYPE_PCI_DEVICE, + .instance_size = 0, + .abstract = true, + .class_init = vfio_pci_base_dev_class_init, + .interfaces = (const InterfaceInfo[]) { + { INTERFACE_PCIE_DEVICE }, + { INTERFACE_CONVENTIONAL_PCI_DEVICE }, + { } + }, +}; + static PropertyInfo vfio_pci_migration_multifd_transfer_prop; static const Property vfio_pci_dev_properties[] = { @@ -3385,7 +3451,7 @@ static const Property vfio_pci_dev_properties[] = { DEFINE_PROP_BIT("x-req", VFIOPCIDevice, features, VFIO_FEATURE_ENABLE_REQ_BIT, true), DEFINE_PROP_BIT("x-igd-opregion", VFIOPCIDevice, features, - VFIO_FEATURE_ENABLE_IGD_OPREGION_BIT, false), + VFIO_FEATURE_ENABLE_IGD_OPREGION_BIT, true), DEFINE_PROP_BIT("x-igd-lpc", VFIOPCIDevice, features, VFIO_FEATURE_ENABLE_IGD_LPC_BIT, false), DEFINE_PROP_ON_OFF_AUTO("x-igd-legacy-mode", VFIOPCIDevice, @@ -3432,7 +3498,8 @@ static const Property vfio_pci_dev_properties[] = { #ifdef CONFIG_IOMMUFD static void vfio_pci_set_fd(Object *obj, const char *str, Error **errp) { - vfio_device_set_fd(&VFIO_PCI(obj)->vbasedev, str, errp); + VFIOPCIDevice *vdev = VFIO_PCI_BASE(obj); + vfio_device_set_fd(&vdev->vbasedev, str, errp); } #endif @@ -3447,11 +3514,7 @@ static void vfio_pci_dev_class_init(ObjectClass *klass, const void *data) object_class_property_add_str(klass, "fd", NULL, vfio_pci_set_fd); #endif dc->desc = "VFIO-based PCI device assignment"; - set_bit(DEVICE_CATEGORY_MISC, dc->categories); pdc->realize = vfio_realize; - pdc->exit = vfio_exitfn; - pdc->config_read = vfio_pci_read_config; - pdc->config_write = vfio_pci_write_config; object_class_property_set_description(klass, /* 1.3 */ "host", @@ -3576,16 +3639,11 @@ static void vfio_pci_dev_class_init(ObjectClass *klass, const void *data) static const TypeInfo vfio_pci_dev_info = { .name = TYPE_VFIO_PCI, - .parent = TYPE_PCI_DEVICE, + .parent = TYPE_VFIO_PCI_BASE, .instance_size = sizeof(VFIOPCIDevice), .class_init = vfio_pci_dev_class_init, .instance_init = vfio_instance_init, .instance_finalize = vfio_instance_finalize, - .interfaces = (const InterfaceInfo[]) { - { INTERFACE_PCIE_DEVICE }, - { INTERFACE_CONVENTIONAL_PCI_DEVICE }, - { } - }, }; static const Property vfio_pci_dev_nohotplug_properties[] = { @@ -3632,6 +3690,7 @@ static void register_vfio_pci_dev_type(void) vfio_pci_migration_multifd_transfer_prop = qdev_prop_on_off_auto; vfio_pci_migration_multifd_transfer_prop.realized_set_allowed = true; + type_register_static(&vfio_pci_base_dev_info); type_register_static(&vfio_pci_dev_info); type_register_static(&vfio_pci_nohotplug_dev_info); } diff --git a/hw/vfio/pci.h b/hw/vfio/pci.h index f835b1d..5ce0fb9 100644 --- a/hw/vfio/pci.h +++ b/hw/vfio/pci.h @@ -118,8 +118,16 @@ typedef struct VFIOMSIXInfo { bool noresize; } VFIOMSIXInfo; +/* + * TYPE_VFIO_PCI_BASE is an abstract type used to share code + * between VFIO implementations that use a kernel driver + * with those that use user sockets. + */ +#define TYPE_VFIO_PCI_BASE "vfio-pci-base" +OBJECT_DECLARE_SIMPLE_TYPE(VFIOPCIDevice, VFIO_PCI_BASE) + #define TYPE_VFIO_PCI "vfio-pci" -OBJECT_DECLARE_SIMPLE_TYPE(VFIOPCIDevice, VFIO_PCI) +/* TYPE_VFIO_PCI shares struct VFIOPCIDevice. */ struct VFIOPCIDevice { PCIDevice pdev; diff --git a/hw/vfio/platform.c b/hw/vfio/platform.c index ffb3681..9a21f2e 100644 --- a/hw/vfio/platform.c +++ b/hw/vfio/platform.c @@ -474,10 +474,10 @@ static bool vfio_populate_device(VFIODevice *vbasedev, Error **errp) QSIMPLEQ_INIT(&vdev->pending_intp_queue); for (i = 0; i < vbasedev->num_irqs; i++) { - struct vfio_irq_info irq = { .argsz = sizeof(irq) }; + struct vfio_irq_info irq; + + ret = vfio_device_get_irq_info(vbasedev, i, &irq); - irq.index = i; - ret = ioctl(vbasedev->fd, VFIO_DEVICE_GET_IRQ_INFO, &irq); if (ret) { error_setg_errno(errp, -ret, "failed to get device irq info"); goto irq_err; diff --git a/hw/vfio/region.c b/hw/vfio/region.c index 04bf9eb..34752c3 100644 --- a/hw/vfio/region.c +++ b/hw/vfio/region.c @@ -45,6 +45,7 @@ void vfio_region_write(void *opaque, hwaddr addr, uint32_t dword; uint64_t qword; } buf; + int ret; switch (size) { case 1: @@ -64,11 +65,13 @@ void vfio_region_write(void *opaque, hwaddr addr, break; } - if (pwrite(vbasedev->fd, &buf, size, region->fd_offset + addr) != size) { + ret = vbasedev->io_ops->region_write(vbasedev, region->nr, + addr, size, &buf); + if (ret != size) { error_report("%s(%s:region%d+0x%"HWADDR_PRIx", 0x%"PRIx64 - ",%d) failed: %m", + ",%d) failed: %s", __func__, vbasedev->name, region->nr, - addr, data, size); + addr, data, size, strwriteerror(ret)); } trace_vfio_region_write(vbasedev->name, region->nr, addr, data, size); @@ -96,11 +99,13 @@ uint64_t vfio_region_read(void *opaque, uint64_t qword; } buf; uint64_t data = 0; + int ret; - if (pread(vbasedev->fd, &buf, size, region->fd_offset + addr) != size) { - error_report("%s(%s:region%d+0x%"HWADDR_PRIx", %d) failed: %m", + ret = vbasedev->io_ops->region_read(vbasedev, region->nr, addr, size, &buf); + if (ret != size) { + error_report("%s(%s:region%d+0x%"HWADDR_PRIx", %d) failed: %s", __func__, vbasedev->name, region->nr, - addr, size); + addr, size, strreaderror(ret)); return (uint64_t)-1; } switch (size) { @@ -182,7 +187,7 @@ static int vfio_setup_region_sparse_mmaps(VFIORegion *region, int vfio_region_setup(Object *obj, VFIODevice *vbasedev, VFIORegion *region, int index, const char *name) { - g_autofree struct vfio_region_info *info = NULL; + struct vfio_region_info *info = NULL; int ret; ret = vfio_device_get_region_info(vbasedev, index, &info); diff --git a/hw/xen/xen-hvm-common.c b/hw/xen/xen-hvm-common.c index 9a677e8..78e0bc8 100644 --- a/hw/xen/xen-hvm-common.c +++ b/hw/xen/xen-hvm-common.c @@ -711,7 +711,7 @@ static int xen_map_ioreq_server(XenIOState *state) /* * If we fail to map the shared page with xenforeignmemory_map_resource() * or if we're using buffered ioreqs, we need xen_get_ioreq_server_info() - * to provide the the addresses to map the shared page and/or to get the + * to provide the addresses to map the shared page and/or to get the * event-channel port for buffered ioreqs. */ if (state->shared_page == NULL || state->has_bufioreq) { diff --git a/hw/xen/xen-mapcache.c b/hw/xen/xen-mapcache.c index 698b5c5..e31d379 100644 --- a/hw/xen/xen-mapcache.c +++ b/hw/xen/xen-mapcache.c @@ -75,7 +75,8 @@ typedef struct MapCache { } MapCache; static MapCache *mapcache; -static MapCache *mapcache_grants; +static MapCache *mapcache_grants_ro; +static MapCache *mapcache_grants_rw; static xengnttab_handle *xen_region_gnttabdev; static inline void mapcache_lock(MapCache *mc) @@ -176,9 +177,12 @@ void xen_map_cache_init(phys_offset_to_gaddr_t f, void *opaque) * Grant mappings must use XC_PAGE_SIZE granularity since we can't * map anything beyond the number of pages granted to us. */ - mapcache_grants = xen_map_cache_init_single(f, opaque, - XC_PAGE_SHIFT, - max_mcache_size); + mapcache_grants_ro = xen_map_cache_init_single(f, opaque, + XC_PAGE_SHIFT, + max_mcache_size); + mapcache_grants_rw = xen_map_cache_init_single(f, opaque, + XC_PAGE_SHIFT, + max_mcache_size); setrlimit(RLIMIT_AS, &rlimit_as); } @@ -376,12 +380,12 @@ tryagain: entry = &mc->entry[address_index % mc->nr_buckets]; - while (entry && (lock || entry->lock) && entry->vaddr_base && - (entry->paddr_index != address_index || entry->size != cache_size || + while (entry && (!entry->vaddr_base || + entry->paddr_index != address_index || entry->size != cache_size || !test_bits(address_offset >> XC_PAGE_SHIFT, test_bit_size >> XC_PAGE_SHIFT, entry->valid_mapping))) { - if (!free_entry && !entry->lock) { + if (!free_entry && (!entry->lock || !entry->vaddr_base)) { free_entry = entry; free_pentry = pentry; } @@ -456,9 +460,13 @@ uint8_t *xen_map_cache(MemoryRegion *mr, bool is_write) { bool grant = xen_mr_is_grants(mr); - MapCache *mc = grant ? mapcache_grants : mapcache; + MapCache *mc = mapcache; uint8_t *p; + if (grant) { + mc = is_write ? mapcache_grants_rw : mapcache_grants_ro; + } + if (grant && !lock) { /* * Grants are only supported via address_space_map(). Anything @@ -523,7 +531,10 @@ ram_addr_t xen_ram_addr_from_mapcache(void *ptr) addr = xen_ram_addr_from_mapcache_single(mapcache, ptr); if (addr == RAM_ADDR_INVALID) { - addr = xen_ram_addr_from_mapcache_single(mapcache_grants, ptr); + addr = xen_ram_addr_from_mapcache_single(mapcache_grants_ro, ptr); + } + if (addr == RAM_ADDR_INVALID) { + addr = xen_ram_addr_from_mapcache_single(mapcache_grants_rw, ptr); } return addr; @@ -626,7 +637,8 @@ static void xen_invalidate_map_cache_entry_single(MapCache *mc, uint8_t *buffer) static void xen_invalidate_map_cache_entry_all(uint8_t *buffer) { xen_invalidate_map_cache_entry_single(mapcache, buffer); - xen_invalidate_map_cache_entry_single(mapcache_grants, buffer); + xen_invalidate_map_cache_entry_single(mapcache_grants_ro, buffer); + xen_invalidate_map_cache_entry_single(mapcache_grants_rw, buffer); } static void xen_invalidate_map_cache_entry_bh(void *opaque) |