From 7df9381b7aa56c897e344f3bfe43bf5848bbd3e0 Mon Sep 17 00:00:00 2001 From: Alex Williamson Date: Thu, 10 Mar 2016 09:39:07 -0700 Subject: vfio: Add sysfsdev property for pci & platform vfio-pci currently requires a host= parameter, which comes in the form of a PCI address in [domain:] notation. We expect to find a matching entry in sysfs for that under /sys/bus/pci/devices/. vfio-platform takes a similar approach, but defines the host= parameter to be a string, which can be matched directly under /sys/bus/platform/devices/. On the PCI side, we have some interest in using vfio to expose vGPU devices. These are not actual discrete PCI devices, so they don't have a compatible host PCI bus address or a device link where QEMU wants to look for it. There's also really no requirement that vfio can only be used to expose physical devices, a new vfio bus and iommu driver could expose a completely emulated device. To fit within the vfio framework, it would need a kernel struct device and associated IOMMU group, but those are easy constraints to manage. To support such devices, which would include vGPUs, that honor the VFIO PCI programming API, but are not necessarily backed by a unique PCI address, add support for specifying any device in sysfs. The vfio API already has support for probing the device type to ensure compatibility with either vfio-pci or vfio-platform. With this, a vfio-pci device could either be specified as: -device vfio-pci,host=02:00.0 or -device vfio-pci,sysfsdev=/sys/devices/pci0000:00/0000:00:1c.0/0000:02:00.0 or even -device vfio-pci,sysfsdev=/sys/bus/pci/devices/0000:02:00.0 When vGPU support comes along, this might look something more like: -device vfio-pci,sysfsdev=/sys/devices/virtual/intel-vgpu/vgpu0@0000:00:02.0 NB - This is only a made up example path The same change is made for vfio-platform, specifying sysfsdev has precedence over the old host option. Tested-by: Eric Auger Reviewed-by: Eric Auger Signed-off-by: Alex Williamson --- hw/vfio/pci.c | 130 +++++++++++++++++------------------------- hw/vfio/platform.c | 55 ++++++++++-------- include/hw/vfio/vfio-common.h | 1 + 3 files changed, 86 insertions(+), 100 deletions(-) diff --git a/hw/vfio/pci.c b/hw/vfio/pci.c index 20b505f..ef9faf6 100644 --- a/hw/vfio/pci.c +++ b/hw/vfio/pci.c @@ -895,12 +895,8 @@ static void vfio_pci_size_rom(VFIOPCIDevice *vdev) if (vdev->pdev.romfile || !vdev->pdev.rom_bar) { /* Since pci handles romfile, just print a message and return */ if (vfio_blacklist_opt_rom(vdev) && vdev->pdev.romfile) { - error_printf("Warning : Device at %04x:%02x:%02x.%x " - "is known to cause system instability issues during " - "option rom execution. " - "Proceeding anyway since user specified romfile\n", - vdev->host.domain, vdev->host.bus, vdev->host.slot, - vdev->host.function); + error_printf("Warning : Device at %s is known to cause system instability issues during option rom execution. Proceeding anyway since user specified romfile\n", + vdev->vbasedev.name); } return; } @@ -913,9 +909,7 @@ static void vfio_pci_size_rom(VFIOPCIDevice *vdev) pwrite(fd, &size, 4, offset) != 4 || pread(fd, &size, 4, offset) != 4 || pwrite(fd, &orig, 4, offset) != 4) { - error_report("%s(%04x:%02x:%02x.%x) failed: %m", - __func__, vdev->host.domain, vdev->host.bus, - vdev->host.slot, vdev->host.function); + error_report("%s(%s) failed: %m", __func__, vdev->vbasedev.name); return; } @@ -927,29 +921,18 @@ static void vfio_pci_size_rom(VFIOPCIDevice *vdev) if (vfio_blacklist_opt_rom(vdev)) { if (dev->opts && qemu_opt_get(dev->opts, "rombar")) { - error_printf("Warning : Device at %04x:%02x:%02x.%x " - "is known to cause system instability issues during " - "option rom execution. " - "Proceeding anyway since user specified non zero value for " - "rombar\n", - vdev->host.domain, vdev->host.bus, vdev->host.slot, - vdev->host.function); + error_printf("Warning : Device at %s is known to cause system instability issues during option rom execution. Proceeding anyway since user specified non zero value for rombar\n", + vdev->vbasedev.name); } else { - error_printf("Warning : Rom loading for device at " - "%04x:%02x:%02x.%x has been disabled due to " - "system instability issues. " - "Specify rombar=1 or romfile to force\n", - vdev->host.domain, vdev->host.bus, vdev->host.slot, - vdev->host.function); + error_printf("Warning : Rom loading for device at %s has been disabled due to system instability issues. Specify rombar=1 or romfile to force\n", + vdev->vbasedev.name); return; } } trace_vfio_pci_size_rom(vdev->vbasedev.name, size); - snprintf(name, sizeof(name), "vfio[%04x:%02x:%02x.%x].rom", - vdev->host.domain, vdev->host.bus, vdev->host.slot, - vdev->host.function); + snprintf(name, sizeof(name), "vfio[%s].rom", vdev->vbasedev.name); memory_region_init_io(&vdev->pdev.rom, OBJECT(vdev), &vfio_rom_ops, vdev, name, size); @@ -1063,9 +1046,8 @@ uint32_t vfio_pci_read_config(PCIDevice *pdev, uint32_t addr, int len) ret = pread(vdev->vbasedev.fd, &phys_val, len, vdev->config_offset + addr); if (ret != len) { - error_report("%s(%04x:%02x:%02x.%x, 0x%x, 0x%x) failed: %m", - __func__, vdev->host.domain, vdev->host.bus, - vdev->host.slot, vdev->host.function, addr, len); + error_report("%s(%s, 0x%x, 0x%x) failed: %m", + __func__, vdev->vbasedev.name, addr, len); return -errno; } phys_val = le32_to_cpu(phys_val); @@ -1089,9 +1071,8 @@ void vfio_pci_write_config(PCIDevice *pdev, /* Write everything to VFIO, let it filter out what we can't write */ if (pwrite(vdev->vbasedev.fd, &val_le, len, vdev->config_offset + addr) != len) { - error_report("%s(%04x:%02x:%02x.%x, 0x%x, 0x%x, 0x%x) failed: %m", - __func__, vdev->host.domain, vdev->host.bus, - vdev->host.slot, vdev->host.function, addr, val, len); + error_report("%s(%s, 0x%x, 0x%x, 0x%x) failed: %m", + __func__, vdev->vbasedev.name, addr, val, len); } /* MSI/MSI-X Enabling/Disabling */ @@ -1383,9 +1364,7 @@ static void vfio_map_bar(VFIOPCIDevice *vdev, int nr) return; } - snprintf(name, sizeof(name), "VFIO %04x:%02x:%02x.%x BAR %d", - vdev->host.domain, vdev->host.bus, vdev->host.slot, - vdev->host.function, nr); + snprintf(name, sizeof(name), "VFIO %s BAR %d", vdev->vbasedev.name, nr); /* Determine what type of BAR this is for registration */ ret = pread(vdev->vbasedev.fd, &pci_bar, sizeof(pci_bar), @@ -1756,9 +1735,8 @@ static int vfio_add_std_cap(VFIOPCIDevice *vdev, uint8_t pos) } if (ret < 0) { - error_report("vfio: %04x:%02x:%02x.%x Error adding PCI capability " - "0x%x[0x%x]@0x%x: %d", vdev->host.domain, - vdev->host.bus, vdev->host.slot, vdev->host.function, + error_report("vfio: %s Error adding PCI capability " + "0x%x[0x%x]@0x%x: %d", vdev->vbasedev.name, cap_id, size, pos, ret); return ret; } @@ -1820,11 +1798,14 @@ static void vfio_pci_post_reset(VFIOPCIDevice *vdev) vfio_intx_enable(vdev); } -static bool vfio_pci_host_match(PCIHostDeviceAddress *host1, - PCIHostDeviceAddress *host2) +static bool vfio_pci_host_match(PCIHostDeviceAddress *addr, const char *name) { - return (host1->domain == host2->domain && host1->bus == host2->bus && - host1->slot == host2->slot && host1->function == host2->function); + char tmp[13]; + + sprintf(tmp, "%04x:%02x:%02x.%1x", addr->domain, + addr->bus, addr->slot, addr->function); + + return (strcmp(tmp, name) == 0); } static int vfio_pci_hot_reset(VFIOPCIDevice *vdev, bool single) @@ -1849,9 +1830,8 @@ static int vfio_pci_hot_reset(VFIOPCIDevice *vdev, bool single) if (ret && errno != ENOSPC) { ret = -errno; if (!vdev->has_pm_reset) { - error_report("vfio: Cannot reset device %04x:%02x:%02x.%x, " - "no available reset mechanism.", vdev->host.domain, - vdev->host.bus, vdev->host.slot, vdev->host.function); + error_report("vfio: Cannot reset device %s, " + "no available reset mechanism.", vdev->vbasedev.name); } goto out_single; } @@ -1884,7 +1864,7 @@ static int vfio_pci_hot_reset(VFIOPCIDevice *vdev, bool single) trace_vfio_pci_hot_reset_dep_devices(host.domain, host.bus, host.slot, host.function, devices[i].group_id); - if (vfio_pci_host_match(&host, &vdev->host)) { + if (vfio_pci_host_match(&host, vdev->vbasedev.name)) { continue; } @@ -1910,7 +1890,7 @@ static int vfio_pci_hot_reset(VFIOPCIDevice *vdev, bool single) continue; } tmp = container_of(vbasedev_iter, VFIOPCIDevice, vbasedev); - if (vfio_pci_host_match(&host, &tmp->host)) { + if (vfio_pci_host_match(&host, tmp->vbasedev.name)) { if (single) { ret = -EINVAL; goto out_single; @@ -1972,7 +1952,7 @@ out: host.slot = PCI_SLOT(devices[i].devfn); host.function = PCI_FUNC(devices[i].devfn); - if (vfio_pci_host_match(&host, &vdev->host)) { + if (vfio_pci_host_match(&host, vdev->vbasedev.name)) { continue; } @@ -1991,7 +1971,7 @@ out: continue; } tmp = container_of(vbasedev_iter, VFIOPCIDevice, vbasedev); - if (vfio_pci_host_match(&host, &tmp->host)) { + if (vfio_pci_host_match(&host, tmp->vbasedev.name)) { vfio_pci_post_reset(tmp); break; } @@ -2197,10 +2177,7 @@ static void vfio_err_notifier_handler(void *opaque) * guest to contain the error. */ - error_report("%s(%04x:%02x:%02x.%x) Unrecoverable error detected. " - "Please collect any data possible and then kill the guest", - __func__, vdev->host.domain, vdev->host.bus, - vdev->host.slot, vdev->host.function); + error_report("%s(%s) Unrecoverable error detected. Please collect any data possible and then kill the guest", __func__, vdev->vbasedev.name); vm_stop(RUN_STATE_INTERNAL_ERROR); } @@ -2381,42 +2358,43 @@ static int vfio_initfn(PCIDevice *pdev) VFIOPCIDevice *vdev = DO_UPCAST(VFIOPCIDevice, pdev, pdev); VFIODevice *vbasedev_iter; VFIOGroup *group; - char path[PATH_MAX], iommu_group_path[PATH_MAX], *group_name; + char *tmp, group_path[PATH_MAX], *group_name; ssize_t len; struct stat st; int groupid; int ret; - /* Check that the host device exists */ - snprintf(path, sizeof(path), - "/sys/bus/pci/devices/%04x:%02x:%02x.%01x/", - vdev->host.domain, vdev->host.bus, vdev->host.slot, - vdev->host.function); - if (stat(path, &st) < 0) { - error_report("vfio: error: no such host device: %s", path); + if (!vdev->vbasedev.sysfsdev) { + vdev->vbasedev.sysfsdev = + g_strdup_printf("/sys/bus/pci/devices/%04x:%02x:%02x.%01x", + vdev->host.domain, vdev->host.bus, + vdev->host.slot, vdev->host.function); + } + + if (stat(vdev->vbasedev.sysfsdev, &st) < 0) { + error_report("vfio: error: no such host device: %s", + vdev->vbasedev.sysfsdev); return -errno; } + vdev->vbasedev.name = g_strdup(basename(vdev->vbasedev.sysfsdev)); vdev->vbasedev.ops = &vfio_pci_ops; - vdev->vbasedev.type = VFIO_DEVICE_TYPE_PCI; - vdev->vbasedev.name = g_strdup_printf("%04x:%02x:%02x.%01x", - vdev->host.domain, vdev->host.bus, - vdev->host.slot, vdev->host.function); - strncat(path, "iommu_group", sizeof(path) - strlen(path) - 1); + tmp = g_strdup_printf("%s/iommu_group", vdev->vbasedev.sysfsdev); + len = readlink(tmp, group_path, sizeof(group_path)); + g_free(tmp); - len = readlink(path, iommu_group_path, sizeof(path)); - if (len <= 0 || len >= sizeof(path)) { + if (len <= 0 || len >= sizeof(group_path)) { error_report("vfio: error no iommu_group for device"); return len < 0 ? -errno : -ENAMETOOLONG; } - iommu_group_path[len] = 0; - group_name = basename(iommu_group_path); + group_path[len] = 0; + group_name = basename(group_path); if (sscanf(group_name, "%d", &groupid) != 1) { - error_report("vfio: error reading %s: %m", path); + error_report("vfio: error reading %s: %m", group_path); return -errno; } @@ -2428,21 +2406,18 @@ static int vfio_initfn(PCIDevice *pdev) return -ENOENT; } - snprintf(path, sizeof(path), "%04x:%02x:%02x.%01x", - vdev->host.domain, vdev->host.bus, vdev->host.slot, - vdev->host.function); - QLIST_FOREACH(vbasedev_iter, &group->device_list, next) { if (strcmp(vbasedev_iter->name, vdev->vbasedev.name) == 0) { - error_report("vfio: error: device %s is already attached", path); + error_report("vfio: error: device %s is already attached", + vdev->vbasedev.name); vfio_put_group(group); return -EBUSY; } } - ret = vfio_get_device(group, path, &vdev->vbasedev); + ret = vfio_get_device(group, vdev->vbasedev.name, &vdev->vbasedev); if (ret) { - error_report("vfio: failed to get device %s", path); + error_report("vfio: failed to get device %s", vdev->vbasedev.name); vfio_put_group(group); return ret; } @@ -2659,6 +2634,7 @@ static void vfio_instance_init(Object *obj) static Property vfio_pci_dev_properties[] = { DEFINE_PROP_PCI_HOST_DEVADDR("host", VFIOPCIDevice, host), + DEFINE_PROP_STRING("sysfsdev", VFIOPCIDevice, vbasedev.sysfsdev), DEFINE_PROP_UINT32("x-intx-mmap-timeout-ms", VFIOPCIDevice, intx.mmap_timeout, 1100), DEFINE_PROP_BIT("x-vga", VFIOPCIDevice, features, diff --git a/hw/vfio/platform.c b/hw/vfio/platform.c index ebc9dcb..6c8b54a 100644 --- a/hw/vfio/platform.c +++ b/hw/vfio/platform.c @@ -560,38 +560,45 @@ static int vfio_base_device_init(VFIODevice *vbasedev) { VFIOGroup *group; VFIODevice *vbasedev_iter; - char path[PATH_MAX], iommu_group_path[PATH_MAX], *group_name; + char *tmp, group_path[PATH_MAX], *group_name; ssize_t len; struct stat st; int groupid; int ret; - /* name must be set prior to the call */ - if (!vbasedev->name || strchr(vbasedev->name, '/')) { - return -EINVAL; - } + /* @sysfsdev takes precedence over @host */ + if (vbasedev->sysfsdev) { + g_free(vbasedev->name); + vbasedev->name = g_strdup(basename(vbasedev->sysfsdev)); + } else { + if (!vbasedev->name || strchr(vbasedev->name, '/')) { + return -EINVAL; + } - /* Check that the host device exists */ - g_snprintf(path, sizeof(path), "/sys/bus/platform/devices/%s/", - vbasedev->name); + vbasedev->sysfsdev = g_strdup_printf("/sys/bus/platform/devices/%s", + vbasedev->name); + } - if (stat(path, &st) < 0) { - error_report("vfio: error: no such host device: %s", path); + if (stat(vbasedev->sysfsdev, &st) < 0) { + error_report("vfio: error: no such host device: %s", + vbasedev->sysfsdev); return -errno; } - g_strlcat(path, "iommu_group", sizeof(path)); - len = readlink(path, iommu_group_path, sizeof(iommu_group_path)); - if (len < 0 || len >= sizeof(iommu_group_path)) { + tmp = g_strdup_printf("%s/iommu_group", vbasedev->sysfsdev); + len = readlink(tmp, group_path, sizeof(group_path)); + g_free(tmp); + + if (len < 0 || len >= sizeof(group_path)) { error_report("vfio: error no iommu_group for device"); return len < 0 ? -errno : -ENAMETOOLONG; } - iommu_group_path[len] = 0; - group_name = basename(iommu_group_path); + group_path[len] = 0; + group_name = basename(group_path); if (sscanf(group_name, "%d", &groupid) != 1) { - error_report("vfio: error reading %s: %m", path); + error_report("vfio: error reading %s: %m", group_path); return -errno; } @@ -603,25 +610,24 @@ static int vfio_base_device_init(VFIODevice *vbasedev) return -ENOENT; } - g_snprintf(path, sizeof(path), "%s", vbasedev->name); - QLIST_FOREACH(vbasedev_iter, &group->device_list, next) { if (strcmp(vbasedev_iter->name, vbasedev->name) == 0) { - error_report("vfio: error: device %s is already attached", path); + error_report("vfio: error: device %s is already attached", + vbasedev->name); vfio_put_group(group); return -EBUSY; } } - ret = vfio_get_device(group, path, vbasedev); + ret = vfio_get_device(group, vbasedev->name, vbasedev); if (ret) { - error_report("vfio: failed to get device %s", path); + error_report("vfio: failed to get device %s", vbasedev->name); vfio_put_group(group); return ret; } ret = vfio_populate_device(vbasedev); if (ret) { - error_report("vfio: failed to populate device %s", path); + error_report("vfio: failed to populate device %s", vbasedev->name); vfio_put_group(group); } @@ -681,7 +687,9 @@ static void vfio_platform_realize(DeviceState *dev, Error **errp) vbasedev->type = VFIO_DEVICE_TYPE_PLATFORM; vbasedev->ops = &vfio_platform_ops; - trace_vfio_platform_realize(vbasedev->name, vdev->compat); + trace_vfio_platform_realize(vbasedev->sysfsdev ? + vbasedev->sysfsdev : vbasedev->name, + vdev->compat); ret = vfio_base_device_init(vbasedev); if (ret) { @@ -703,6 +711,7 @@ static const VMStateDescription vfio_platform_vmstate = { static Property vfio_platform_dev_properties[] = { DEFINE_PROP_STRING("host", VFIOPlatformDevice, vbasedev.name), + DEFINE_PROP_STRING("sysfsdev", VFIOPlatformDevice, vbasedev.sysfsdev), DEFINE_PROP_BOOL("x-no-mmap", VFIOPlatformDevice, vbasedev.no_mmap, false), DEFINE_PROP_UINT32("mmap-timeout-ms", VFIOPlatformDevice, mmap_timeout, 1100), diff --git a/include/hw/vfio/vfio-common.h b/include/hw/vfio/vfio-common.h index f037f3c..7e00ffc 100644 --- a/include/hw/vfio/vfio-common.h +++ b/include/hw/vfio/vfio-common.h @@ -89,6 +89,7 @@ typedef struct VFIODeviceOps VFIODeviceOps; typedef struct VFIODevice { QLIST_ENTRY(VFIODevice) next; struct VFIOGroup *group; + char *sysfsdev; char *name; int fd; int type; -- cgit v1.1 From 469002263a295ec471c1498c3b456ccd9f85a841 Mon Sep 17 00:00:00 2001 From: Alex Williamson Date: Thu, 10 Mar 2016 09:39:07 -0700 Subject: vfio: Wrap VFIO_DEVICE_GET_REGION_INFO In preparation for supporting capability chains on regions, wrap ioctl(VFIO_DEVICE_GET_REGION_INFO) so we don't duplicate the code for each caller. Signed-off-by: Alex Williamson --- hw/vfio/common.c | 18 ++++++++++ hw/vfio/pci.c | 81 ++++++++++++++++++++++--------------------- hw/vfio/platform.c | 13 +++---- include/hw/vfio/vfio-common.h | 7 ++++ 4 files changed, 73 insertions(+), 46 deletions(-) diff --git a/hw/vfio/common.c b/hw/vfio/common.c index 607ec70..e20fc4f 100644 --- a/hw/vfio/common.c +++ b/hw/vfio/common.c @@ -959,6 +959,24 @@ void vfio_put_base_device(VFIODevice *vbasedev) close(vbasedev->fd); } +int vfio_get_region_info(VFIODevice *vbasedev, int index, + struct vfio_region_info **info) +{ + size_t argsz = sizeof(struct vfio_region_info); + + *info = g_malloc0(argsz); + + (*info)->index = index; + (*info)->argsz = argsz; + + if (ioctl(vbasedev->fd, VFIO_DEVICE_GET_REGION_INFO, *info)) { + g_free(*info); + return -errno; + } + + return 0; +} + static int vfio_container_do_ioctl(AddressSpace *as, int32_t groupid, int req, void *param) { diff --git a/hw/vfio/pci.c b/hw/vfio/pci.c index ef9faf6..db7a950 100644 --- a/hw/vfio/pci.c +++ b/hw/vfio/pci.c @@ -783,25 +783,25 @@ static void vfio_update_msi(VFIOPCIDevice *vdev) static void vfio_pci_load_rom(VFIOPCIDevice *vdev) { - struct vfio_region_info reg_info = { - .argsz = sizeof(reg_info), - .index = VFIO_PCI_ROM_REGION_INDEX - }; + struct vfio_region_info *reg_info; uint64_t size; off_t off = 0; ssize_t bytes; - if (ioctl(vdev->vbasedev.fd, VFIO_DEVICE_GET_REGION_INFO, ®_info)) { + if (vfio_get_region_info(&vdev->vbasedev, + VFIO_PCI_ROM_REGION_INDEX, ®_info)) { error_report("vfio: Error getting ROM info: %m"); return; } - trace_vfio_pci_load_rom(vdev->vbasedev.name, (unsigned long)reg_info.size, - (unsigned long)reg_info.offset, - (unsigned long)reg_info.flags); + trace_vfio_pci_load_rom(vdev->vbasedev.name, (unsigned long)reg_info->size, + (unsigned long)reg_info->offset, + (unsigned long)reg_info->flags); + + vdev->rom_size = size = reg_info->size; + vdev->rom_offset = reg_info->offset; - vdev->rom_size = size = reg_info.size; - vdev->rom_offset = reg_info.offset; + g_free(reg_info); if (!vdev->rom_size) { vdev->rom_read_failed = true; @@ -2027,7 +2027,7 @@ static VFIODeviceOps vfio_pci_ops = { static int vfio_populate_device(VFIOPCIDevice *vdev) { VFIODevice *vbasedev = &vdev->vbasedev; - struct vfio_region_info reg_info = { .argsz = sizeof(reg_info) }; + struct vfio_region_info *reg_info; struct vfio_irq_info irq_info = { .argsz = sizeof(irq_info) }; int i, ret = -1; @@ -2049,72 +2049,73 @@ static int vfio_populate_device(VFIOPCIDevice *vdev) } for (i = VFIO_PCI_BAR0_REGION_INDEX; i < VFIO_PCI_ROM_REGION_INDEX; i++) { - reg_info.index = i; - - ret = ioctl(vbasedev->fd, VFIO_DEVICE_GET_REGION_INFO, ®_info); + ret = vfio_get_region_info(vbasedev, i, ®_info); if (ret) { error_report("vfio: Error getting region %d info: %m", i); goto error; } trace_vfio_populate_device_region(vbasedev->name, i, - (unsigned long)reg_info.size, - (unsigned long)reg_info.offset, - (unsigned long)reg_info.flags); + (unsigned long)reg_info->size, + (unsigned long)reg_info->offset, + (unsigned long)reg_info->flags); vdev->bars[i].region.vbasedev = vbasedev; - vdev->bars[i].region.flags = reg_info.flags; - vdev->bars[i].region.size = reg_info.size; - vdev->bars[i].region.fd_offset = reg_info.offset; + vdev->bars[i].region.flags = reg_info->flags; + vdev->bars[i].region.size = reg_info->size; + vdev->bars[i].region.fd_offset = reg_info->offset; vdev->bars[i].region.nr = i; QLIST_INIT(&vdev->bars[i].quirks); - } - reg_info.index = VFIO_PCI_CONFIG_REGION_INDEX; + g_free(reg_info); + } - ret = ioctl(vdev->vbasedev.fd, VFIO_DEVICE_GET_REGION_INFO, ®_info); + ret = vfio_get_region_info(vbasedev, + VFIO_PCI_CONFIG_REGION_INDEX, ®_info); if (ret) { error_report("vfio: Error getting config info: %m"); goto error; } trace_vfio_populate_device_config(vdev->vbasedev.name, - (unsigned long)reg_info.size, - (unsigned long)reg_info.offset, - (unsigned long)reg_info.flags); + (unsigned long)reg_info->size, + (unsigned long)reg_info->offset, + (unsigned long)reg_info->flags); - vdev->config_size = reg_info.size; + vdev->config_size = reg_info->size; if (vdev->config_size == PCI_CONFIG_SPACE_SIZE) { vdev->pdev.cap_present &= ~QEMU_PCI_CAP_EXPRESS; } - vdev->config_offset = reg_info.offset; + vdev->config_offset = reg_info->offset; + + g_free(reg_info); if ((vdev->features & VFIO_FEATURE_ENABLE_VGA) && vbasedev->num_regions > VFIO_PCI_VGA_REGION_INDEX) { - struct vfio_region_info vga_info = { - .argsz = sizeof(vga_info), - .index = VFIO_PCI_VGA_REGION_INDEX, - }; - - ret = ioctl(vdev->vbasedev.fd, VFIO_DEVICE_GET_REGION_INFO, &vga_info); + ret = vfio_get_region_info(vbasedev, + VFIO_PCI_VGA_REGION_INDEX, ®_info); if (ret) { error_report( "vfio: Device does not support requested feature x-vga"); goto error; } - if (!(vga_info.flags & VFIO_REGION_INFO_FLAG_READ) || - !(vga_info.flags & VFIO_REGION_INFO_FLAG_WRITE) || - vga_info.size < 0xbffff + 1) { + if (!(reg_info->flags & VFIO_REGION_INFO_FLAG_READ) || + !(reg_info->flags & VFIO_REGION_INFO_FLAG_WRITE) || + reg_info->size < 0xbffff + 1) { error_report("vfio: Unexpected VGA info, flags 0x%lx, size 0x%lx", - (unsigned long)vga_info.flags, - (unsigned long)vga_info.size); + (unsigned long)reg_info->flags, + (unsigned long)reg_info->size); + g_free(reg_info); + ret = -1; goto error; } - vdev->vga.fd_offset = vga_info.offset; + vdev->vga.fd_offset = reg_info->offset; vdev->vga.fd = vdev->vbasedev.fd; + g_free(reg_info); + vdev->vga.region[QEMU_PCI_VGA_MEM].offset = QEMU_PCI_VGA_MEM_BASE; vdev->vga.region[QEMU_PCI_VGA_MEM].nr = QEMU_PCI_VGA_MEM; QLIST_INIT(&vdev->vga.region[QEMU_PCI_VGA_MEM].quirks); diff --git a/hw/vfio/platform.c b/hw/vfio/platform.c index 6c8b54a..f9b9c20 100644 --- a/hw/vfio/platform.c +++ b/hw/vfio/platform.c @@ -476,23 +476,24 @@ static int vfio_populate_device(VFIODevice *vbasedev) vdev->regions = g_new0(VFIORegion *, vbasedev->num_regions); for (i = 0; i < vbasedev->num_regions; i++) { - struct vfio_region_info reg_info = { .argsz = sizeof(reg_info) }; + struct vfio_region_info *reg_info; VFIORegion *ptr; vdev->regions[i] = g_new0(VFIORegion, 1); ptr = vdev->regions[i]; - reg_info.index = i; - ret = ioctl(vbasedev->fd, VFIO_DEVICE_GET_REGION_INFO, ®_info); + ret = vfio_get_region_info(vbasedev, i, ®_info); if (ret) { error_report("vfio: Error getting region %d info: %m", i); goto reg_error; } - ptr->flags = reg_info.flags; - ptr->size = reg_info.size; - ptr->fd_offset = reg_info.offset; + ptr->flags = reg_info->flags; + ptr->size = reg_info->size; + ptr->fd_offset = reg_info->offset; ptr->nr = i; ptr->vbasedev = vbasedev; + g_free(reg_info); + trace_vfio_platform_populate_regions(ptr->nr, (unsigned long)ptr->flags, (unsigned long)ptr->size, diff --git a/include/hw/vfio/vfio-common.h b/include/hw/vfio/vfio-common.h index 7e00ffc..a147bd2 100644 --- a/include/hw/vfio/vfio-common.h +++ b/include/hw/vfio/vfio-common.h @@ -25,6 +25,9 @@ #include "exec/memory.h" #include "qemu/queue.h" #include "qemu/notify.h" +#ifdef CONFIG_LINUX +#include +#endif /*#define DEBUG_VFIO*/ #ifdef DEBUG_VFIO @@ -139,4 +142,8 @@ extern const MemoryRegionOps vfio_region_ops; extern QLIST_HEAD(vfio_group_head, VFIOGroup) vfio_group_list; extern QLIST_HEAD(vfio_as_head, VFIOAddressSpace) vfio_address_spaces; +#ifdef CONFIG_LINUX +int vfio_get_region_info(VFIODevice *vbasedev, int index, + struct vfio_region_info **info); +#endif #endif /* !HW_VFIO_VFIO_COMMON_H */ -- cgit v1.1 From db0da029a1853d46c90a6c0790ce6ca77fd46ea3 Mon Sep 17 00:00:00 2001 From: Alex Williamson Date: Thu, 10 Mar 2016 09:39:07 -0700 Subject: vfio: Generalize region support Both platform and PCI vfio drivers create a "slow", I/O memory region with one or more mmap memory regions overlayed when supported by the device. Generalize this to a set of common helpers in the core that pulls the region info from vfio, fills the region data, configures slow mapping, and adds helpers for comleting the mmap, enable/disable, and teardown. This can be immediately used by the PCI MSI-X code, which needs to mmap around the MSI-X vector table. This also changes VFIORegion.mem to be dynamically allocated because otherwise we don't know how the caller has allocated VFIORegion and therefore don't know whether to unreference it to destroy the MemoryRegion or not. Signed-off-by: Alex Williamson --- hw/arm/sysbus-fdt.c | 4 +- hw/vfio/common.c | 172 +++++++++++++++++++++++++++++++++++------- hw/vfio/pci-quirks.c | 24 +++--- hw/vfio/pci.c | 168 ++++++++++++++++++++++------------------- hw/vfio/platform.c | 72 ++++-------------- include/hw/vfio/vfio-common.h | 23 ++++-- trace-events | 10 ++- 7 files changed, 283 insertions(+), 190 deletions(-) diff --git a/hw/arm/sysbus-fdt.c b/hw/arm/sysbus-fdt.c index 04afeae..49bd212 100644 --- a/hw/arm/sysbus-fdt.c +++ b/hw/arm/sysbus-fdt.c @@ -240,7 +240,7 @@ static int add_calxeda_midway_xgmac_fdt_node(SysBusDevice *sbdev, void *opaque) mmio_base = platform_bus_get_mmio_addr(pbus, sbdev, i); reg_attr[2 * i] = cpu_to_be32(mmio_base); reg_attr[2 * i + 1] = cpu_to_be32( - memory_region_size(&vdev->regions[i]->mem)); + memory_region_size(vdev->regions[i]->mem)); } qemu_fdt_setprop(fdt, nodename, "reg", reg_attr, vbasedev->num_regions * 2 * sizeof(uint32_t)); @@ -374,7 +374,7 @@ static int add_amd_xgbe_fdt_node(SysBusDevice *sbdev, void *opaque) mmio_base = platform_bus_get_mmio_addr(pbus, sbdev, i); reg_attr[2 * i] = cpu_to_be32(mmio_base); reg_attr[2 * i + 1] = cpu_to_be32( - memory_region_size(&vdev->regions[i]->mem)); + memory_region_size(vdev->regions[i]->mem)); } qemu_fdt_setprop(guest_fdt, nodename, "reg", reg_attr, vbasedev->num_regions * 2 * sizeof(uint32_t)); diff --git a/hw/vfio/common.c b/hw/vfio/common.c index e20fc4f..96ccb79 100644 --- a/hw/vfio/common.c +++ b/hw/vfio/common.c @@ -493,46 +493,162 @@ static void vfio_listener_release(VFIOContainer *container) memory_listener_unregister(&container->listener); } -int vfio_mmap_region(Object *obj, VFIORegion *region, - MemoryRegion *mem, MemoryRegion *submem, - void **map, size_t size, off_t offset, - const char *name) +int vfio_region_setup(Object *obj, VFIODevice *vbasedev, VFIORegion *region, + int index, const char *name) { - int ret = 0; - VFIODevice *vbasedev = region->vbasedev; + struct vfio_region_info *info; + int ret; + + ret = vfio_get_region_info(vbasedev, index, &info); + if (ret) { + return ret; + } + + region->vbasedev = vbasedev; + region->flags = info->flags; + region->size = info->size; + region->fd_offset = info->offset; + region->nr = index; - if (!vbasedev->no_mmap && size && region->flags & - VFIO_REGION_INFO_FLAG_MMAP) { - int prot = 0; + if (region->size) { + region->mem = g_new0(MemoryRegion, 1); + memory_region_init_io(region->mem, obj, &vfio_region_ops, + region, name, region->size); - if (region->flags & VFIO_REGION_INFO_FLAG_READ) { - prot |= PROT_READ; + if (!vbasedev->no_mmap && + region->flags & VFIO_REGION_INFO_FLAG_MMAP && + !(region->size & ~qemu_real_host_page_mask)) { + + region->nr_mmaps = 1; + region->mmaps = g_new0(VFIOMmap, region->nr_mmaps); + + region->mmaps[0].offset = 0; + region->mmaps[0].size = region->size; } + } + + g_free(info); + + trace_vfio_region_setup(vbasedev->name, index, name, + region->flags, region->fd_offset, region->size); + return 0; +} - if (region->flags & VFIO_REGION_INFO_FLAG_WRITE) { - prot |= PROT_WRITE; +int vfio_region_mmap(VFIORegion *region) +{ + int i, prot = 0; + char *name; + + if (!region->mem) { + return 0; + } + + prot |= region->flags & VFIO_REGION_INFO_FLAG_READ ? PROT_READ : 0; + prot |= region->flags & VFIO_REGION_INFO_FLAG_WRITE ? PROT_WRITE : 0; + + for (i = 0; i < region->nr_mmaps; i++) { + region->mmaps[i].mmap = mmap(NULL, region->mmaps[i].size, prot, + MAP_SHARED, region->vbasedev->fd, + region->fd_offset + + region->mmaps[i].offset); + if (region->mmaps[i].mmap == MAP_FAILED) { + int ret = -errno; + + trace_vfio_region_mmap_fault(memory_region_name(region->mem), i, + region->fd_offset + + region->mmaps[i].offset, + region->fd_offset + + region->mmaps[i].offset + + region->mmaps[i].size - 1, ret); + + region->mmaps[i].mmap = NULL; + + for (i--; i >= 0; i--) { + memory_region_del_subregion(region->mem, ®ion->mmaps[i].mem); + munmap(region->mmaps[i].mmap, region->mmaps[i].size); + object_unparent(OBJECT(®ion->mmaps[i].mem)); + region->mmaps[i].mmap = NULL; + } + + return ret; } - *map = mmap(NULL, size, prot, MAP_SHARED, - vbasedev->fd, - region->fd_offset + offset); - if (*map == MAP_FAILED) { - *map = NULL; - ret = -errno; - goto empty_region; + name = g_strdup_printf("%s mmaps[%d]", + memory_region_name(region->mem), i); + memory_region_init_ram_ptr(®ion->mmaps[i].mem, + memory_region_owner(region->mem), + name, region->mmaps[i].size, + region->mmaps[i].mmap); + g_free(name); + memory_region_set_skip_dump(®ion->mmaps[i].mem); + memory_region_add_subregion(region->mem, region->mmaps[i].offset, + ®ion->mmaps[i].mem); + + trace_vfio_region_mmap(memory_region_name(®ion->mmaps[i].mem), + region->mmaps[i].offset, + region->mmaps[i].offset + + region->mmaps[i].size - 1); + } + + return 0; +} + +void vfio_region_exit(VFIORegion *region) +{ + int i; + + if (!region->mem) { + return; + } + + for (i = 0; i < region->nr_mmaps; i++) { + if (region->mmaps[i].mmap) { + memory_region_del_subregion(region->mem, ®ion->mmaps[i].mem); } + } - memory_region_init_ram_ptr(submem, obj, name, size, *map); - memory_region_set_skip_dump(submem); - } else { -empty_region: - /* Create a zero sized sub-region to make cleanup easy. */ - memory_region_init(submem, obj, name, 0); + trace_vfio_region_exit(region->vbasedev->name, region->nr); +} + +void vfio_region_finalize(VFIORegion *region) +{ + int i; + + if (!region->mem) { + return; } - memory_region_add_subregion(mem, offset, submem); + for (i = 0; i < region->nr_mmaps; i++) { + if (region->mmaps[i].mmap) { + munmap(region->mmaps[i].mmap, region->mmaps[i].size); + object_unparent(OBJECT(®ion->mmaps[i].mem)); + } + } - return ret; + object_unparent(OBJECT(region->mem)); + + g_free(region->mem); + g_free(region->mmaps); + + trace_vfio_region_finalize(region->vbasedev->name, region->nr); +} + +void vfio_region_mmaps_set_enabled(VFIORegion *region, bool enabled) +{ + int i; + + if (!region->mem) { + return; + } + + for (i = 0; i < region->nr_mmaps; i++) { + if (region->mmaps[i].mmap) { + memory_region_set_enabled(®ion->mmaps[i].mem, enabled); + } + } + + trace_vfio_region_mmaps_set_enabled(memory_region_name(region->mem), + enabled); } void vfio_reset_handler(void *opaque) diff --git a/hw/vfio/pci-quirks.c b/hw/vfio/pci-quirks.c index 4815527..d626ec9 100644 --- a/hw/vfio/pci-quirks.c +++ b/hw/vfio/pci-quirks.c @@ -337,14 +337,14 @@ static void vfio_probe_ati_bar4_quirk(VFIOPCIDevice *vdev, int nr) memory_region_init_io(window->addr_mem, OBJECT(vdev), &vfio_generic_window_address_quirk, window, "vfio-ati-bar4-window-address-quirk", 4); - memory_region_add_subregion_overlap(&vdev->bars[nr].region.mem, + memory_region_add_subregion_overlap(vdev->bars[nr].region.mem, window->address_offset, window->addr_mem, 1); memory_region_init_io(window->data_mem, OBJECT(vdev), &vfio_generic_window_data_quirk, window, "vfio-ati-bar4-window-data-quirk", 4); - memory_region_add_subregion_overlap(&vdev->bars[nr].region.mem, + memory_region_add_subregion_overlap(vdev->bars[nr].region.mem, window->data_offset, window->data_mem, 1); @@ -378,7 +378,7 @@ static void vfio_probe_ati_bar2_quirk(VFIOPCIDevice *vdev, int nr) memory_region_init_io(mirror->mem, OBJECT(vdev), &vfio_generic_mirror_quirk, mirror, "vfio-ati-bar2-4000-quirk", PCI_CONFIG_SPACE_SIZE); - memory_region_add_subregion_overlap(&vdev->bars[nr].region.mem, + memory_region_add_subregion_overlap(vdev->bars[nr].region.mem, mirror->offset, mirror->mem, 1); QLIST_INSERT_HEAD(&vdev->bars[nr].quirks, quirk, next); @@ -683,7 +683,7 @@ static void vfio_probe_nvidia_bar5_quirk(VFIOPCIDevice *vdev, int nr) memory_region_init_io(window->addr_mem, OBJECT(vdev), &vfio_generic_window_address_quirk, window, "vfio-nvidia-bar5-window-address-quirk", 4); - memory_region_add_subregion_overlap(&vdev->bars[nr].region.mem, + memory_region_add_subregion_overlap(vdev->bars[nr].region.mem, window->address_offset, window->addr_mem, 1); memory_region_set_enabled(window->addr_mem, false); @@ -691,7 +691,7 @@ static void vfio_probe_nvidia_bar5_quirk(VFIOPCIDevice *vdev, int nr) memory_region_init_io(window->data_mem, OBJECT(vdev), &vfio_generic_window_data_quirk, window, "vfio-nvidia-bar5-window-data-quirk", 4); - memory_region_add_subregion_overlap(&vdev->bars[nr].region.mem, + memory_region_add_subregion_overlap(vdev->bars[nr].region.mem, window->data_offset, window->data_mem, 1); memory_region_set_enabled(window->data_mem, false); @@ -699,13 +699,13 @@ static void vfio_probe_nvidia_bar5_quirk(VFIOPCIDevice *vdev, int nr) memory_region_init_io(&quirk->mem[2], OBJECT(vdev), &vfio_nvidia_bar5_quirk_master, bar5, "vfio-nvidia-bar5-master-quirk", 4); - memory_region_add_subregion_overlap(&vdev->bars[nr].region.mem, + memory_region_add_subregion_overlap(vdev->bars[nr].region.mem, 0, &quirk->mem[2], 1); memory_region_init_io(&quirk->mem[3], OBJECT(vdev), &vfio_nvidia_bar5_quirk_enable, bar5, "vfio-nvidia-bar5-enable-quirk", 4); - memory_region_add_subregion_overlap(&vdev->bars[nr].region.mem, + memory_region_add_subregion_overlap(vdev->bars[nr].region.mem, 4, &quirk->mem[3], 1); QLIST_INSERT_HEAD(&vdev->bars[nr].quirks, quirk, next); @@ -767,7 +767,7 @@ static void vfio_probe_nvidia_bar0_quirk(VFIOPCIDevice *vdev, int nr) &vfio_nvidia_mirror_quirk, mirror, "vfio-nvidia-bar0-88000-mirror-quirk", vdev->config_size); - memory_region_add_subregion_overlap(&vdev->bars[nr].region.mem, + memory_region_add_subregion_overlap(vdev->bars[nr].region.mem, mirror->offset, mirror->mem, 1); QLIST_INSERT_HEAD(&vdev->bars[nr].quirks, quirk, next); @@ -786,7 +786,7 @@ static void vfio_probe_nvidia_bar0_quirk(VFIOPCIDevice *vdev, int nr) &vfio_nvidia_mirror_quirk, mirror, "vfio-nvidia-bar0-1800-mirror-quirk", PCI_CONFIG_SPACE_SIZE); - memory_region_add_subregion_overlap(&vdev->bars[nr].region.mem, + memory_region_add_subregion_overlap(vdev->bars[nr].region.mem, mirror->offset, mirror->mem, 1); QLIST_INSERT_HEAD(&vdev->bars[nr].quirks, quirk, next); @@ -947,13 +947,13 @@ static void vfio_probe_rtl8168_bar2_quirk(VFIOPCIDevice *vdev, int nr) memory_region_init_io(&quirk->mem[0], OBJECT(vdev), &vfio_rtl_address_quirk, rtl, "vfio-rtl8168-window-address-quirk", 4); - memory_region_add_subregion_overlap(&vdev->bars[nr].region.mem, + memory_region_add_subregion_overlap(vdev->bars[nr].region.mem, 0x74, &quirk->mem[0], 1); memory_region_init_io(&quirk->mem[1], OBJECT(vdev), &vfio_rtl_data_quirk, rtl, "vfio-rtl8168-window-data-quirk", 4); - memory_region_add_subregion_overlap(&vdev->bars[nr].region.mem, + memory_region_add_subregion_overlap(vdev->bars[nr].region.mem, 0x70, &quirk->mem[1], 1); QLIST_INSERT_HEAD(&vdev->bars[nr].quirks, quirk, next); @@ -1020,7 +1020,7 @@ void vfio_bar_quirk_teardown(VFIOPCIDevice *vdev, int nr) QLIST_FOREACH(quirk, &bar->quirks, next) { for (i = 0; i < quirk->nr_mem; i++) { - memory_region_del_subregion(&bar->region.mem, &quirk->mem[i]); + memory_region_del_subregion(bar->region.mem, &quirk->mem[i]); } } } diff --git a/hw/vfio/pci.c b/hw/vfio/pci.c index db7a950..f18a678 100644 --- a/hw/vfio/pci.c +++ b/hw/vfio/pci.c @@ -1166,6 +1166,74 @@ static int vfio_msi_setup(VFIOPCIDevice *vdev, int pos) return 0; } +static void vfio_pci_fixup_msix_region(VFIOPCIDevice *vdev) +{ + off_t start, end; + VFIORegion *region = &vdev->bars[vdev->msix->table_bar].region; + + /* + * We expect to find a single mmap covering the whole BAR, anything else + * means it's either unsupported or already setup. + */ + if (region->nr_mmaps != 1 || region->mmaps[0].offset || + region->size != region->mmaps[0].size) { + return; + } + + /* MSI-X table start and end aligned to host page size */ + start = vdev->msix->table_offset & qemu_real_host_page_mask; + end = REAL_HOST_PAGE_ALIGN((uint64_t)vdev->msix->table_offset + + (vdev->msix->entries * PCI_MSIX_ENTRY_SIZE)); + + /* + * Does the MSI-X table cover the beginning of the BAR? The whole BAR? + * NB - Host page size is necessarily a power of two and so is the PCI + * BAR (not counting EA yet), therefore if we have host page aligned + * @start and @end, then any remainder of the BAR before or after those + * must be at least host page sized and therefore mmap'able. + */ + if (!start) { + if (end >= region->size) { + region->nr_mmaps = 0; + g_free(region->mmaps); + region->mmaps = NULL; + trace_vfio_msix_fixup(vdev->vbasedev.name, + vdev->msix->table_bar, 0, 0); + } else { + region->mmaps[0].offset = end; + region->mmaps[0].size = region->size - end; + trace_vfio_msix_fixup(vdev->vbasedev.name, + vdev->msix->table_bar, region->mmaps[0].offset, + region->mmaps[0].offset + region->mmaps[0].size); + } + + /* Maybe it's aligned at the end of the BAR */ + } else if (end >= region->size) { + region->mmaps[0].size = start; + trace_vfio_msix_fixup(vdev->vbasedev.name, + vdev->msix->table_bar, region->mmaps[0].offset, + region->mmaps[0].offset + region->mmaps[0].size); + + /* Otherwise it must split the BAR */ + } else { + region->nr_mmaps = 2; + region->mmaps = g_renew(VFIOMmap, region->mmaps, 2); + + memcpy(®ion->mmaps[1], ®ion->mmaps[0], sizeof(VFIOMmap)); + + region->mmaps[0].size = start; + trace_vfio_msix_fixup(vdev->vbasedev.name, + vdev->msix->table_bar, region->mmaps[0].offset, + region->mmaps[0].offset + region->mmaps[0].size); + + region->mmaps[1].offset = end; + region->mmaps[1].size = region->size - end; + trace_vfio_msix_fixup(vdev->vbasedev.name, + vdev->msix->table_bar, region->mmaps[1].offset, + region->mmaps[1].offset + region->mmaps[1].size); + } +} + /* * We don't have any control over how pci_add_capability() inserts * capabilities into the chain. In order to setup MSI-X we need a @@ -1240,6 +1308,8 @@ static int vfio_msix_early_setup(VFIOPCIDevice *vdev) msix->table_offset, msix->entries); vdev->msix = msix; + vfio_pci_fixup_msix_region(vdev); + return 0; } @@ -1250,9 +1320,9 @@ static int vfio_msix_setup(VFIOPCIDevice *vdev, int pos) vdev->msix->pending = g_malloc0(BITS_TO_LONGS(vdev->msix->entries) * sizeof(unsigned long)); ret = msix_init(&vdev->pdev, vdev->msix->entries, - &vdev->bars[vdev->msix->table_bar].region.mem, + vdev->bars[vdev->msix->table_bar].region.mem, vdev->msix->table_bar, vdev->msix->table_offset, - &vdev->bars[vdev->msix->pba_bar].region.mem, + vdev->bars[vdev->msix->pba_bar].region.mem, vdev->msix->pba_bar, vdev->msix->pba_offset, pos); if (ret < 0) { if (ret == -ENOTSUP) { @@ -1289,8 +1359,8 @@ static void vfio_teardown_msi(VFIOPCIDevice *vdev) if (vdev->msix) { msix_uninit(&vdev->pdev, - &vdev->bars[vdev->msix->table_bar].region.mem, - &vdev->bars[vdev->msix->pba_bar].region.mem); + vdev->bars[vdev->msix->table_bar].region.mem, + vdev->bars[vdev->msix->pba_bar].region.mem); g_free(vdev->msix->pending); } } @@ -1303,16 +1373,7 @@ static void vfio_mmap_set_enabled(VFIOPCIDevice *vdev, bool enabled) int i; for (i = 0; i < PCI_ROM_SLOT; i++) { - VFIOBAR *bar = &vdev->bars[i]; - - if (!bar->region.size) { - continue; - } - - memory_region_set_enabled(&bar->region.mmap_mem, enabled); - if (vdev->msix && vdev->msix->table_bar == i) { - memory_region_set_enabled(&vdev->msix->mmap_mem, enabled); - } + vfio_region_mmaps_set_enabled(&vdev->bars[i].region, enabled); } } @@ -1326,11 +1387,7 @@ static void vfio_unregister_bar(VFIOPCIDevice *vdev, int nr) vfio_bar_quirk_teardown(vdev, nr); - memory_region_del_subregion(&bar->region.mem, &bar->region.mmap_mem); - - if (vdev->msix && vdev->msix->table_bar == nr) { - memory_region_del_subregion(&bar->region.mem, &vdev->msix->mmap_mem); - } + vfio_region_exit(&bar->region); } static void vfio_unmap_bar(VFIOPCIDevice *vdev, int nr) @@ -1343,18 +1400,13 @@ static void vfio_unmap_bar(VFIOPCIDevice *vdev, int nr) vfio_bar_quirk_free(vdev, nr); - munmap(bar->region.mmap, memory_region_size(&bar->region.mmap_mem)); - - if (vdev->msix && vdev->msix->table_bar == nr) { - munmap(vdev->msix->mmap, memory_region_size(&vdev->msix->mmap_mem)); - } + vfio_region_finalize(&bar->region); } static void vfio_map_bar(VFIOPCIDevice *vdev, int nr) { VFIOBAR *bar = &vdev->bars[nr]; uint64_t size = bar->region.size; - char name[64]; uint32_t pci_bar; uint8_t type; int ret; @@ -1364,8 +1416,6 @@ static void vfio_map_bar(VFIOPCIDevice *vdev, int nr) return; } - snprintf(name, sizeof(name), "VFIO %s BAR %d", vdev->vbasedev.name, nr); - /* Determine what type of BAR this is for registration */ ret = pread(vdev->vbasedev.fd, &pci_bar, sizeof(pci_bar), vdev->config_offset + PCI_BASE_ADDRESS_0 + (4 * nr)); @@ -1380,41 +1430,11 @@ static void vfio_map_bar(VFIOPCIDevice *vdev, int nr) type = pci_bar & (bar->ioport ? ~PCI_BASE_ADDRESS_IO_MASK : ~PCI_BASE_ADDRESS_MEM_MASK); - /* A "slow" read/write mapping underlies all BARs */ - memory_region_init_io(&bar->region.mem, OBJECT(vdev), &vfio_region_ops, - bar, name, size); - pci_register_bar(&vdev->pdev, nr, type, &bar->region.mem); - - /* - * We can't mmap areas overlapping the MSIX vector table, so we - * potentially insert a direct-mapped subregion before and after it. - */ - if (vdev->msix && vdev->msix->table_bar == nr) { - size = vdev->msix->table_offset & qemu_real_host_page_mask; - } - - strncat(name, " mmap", sizeof(name) - strlen(name) - 1); - if (vfio_mmap_region(OBJECT(vdev), &bar->region, &bar->region.mem, - &bar->region.mmap_mem, &bar->region.mmap, - size, 0, name)) { - error_report("%s unsupported. Performance may be slow", name); - } - - if (vdev->msix && vdev->msix->table_bar == nr) { - uint64_t start; - - start = REAL_HOST_PAGE_ALIGN((uint64_t)vdev->msix->table_offset + - (vdev->msix->entries * - PCI_MSIX_ENTRY_SIZE)); + pci_register_bar(&vdev->pdev, nr, type, bar->region.mem); - size = start < bar->region.size ? bar->region.size - start : 0; - strncat(name, " msix-hi", sizeof(name) - strlen(name) - 1); - /* VFIOMSIXInfo contains another MemoryRegion for this mapping */ - if (vfio_mmap_region(OBJECT(vdev), &bar->region, &bar->region.mem, - &vdev->msix->mmap_mem, - &vdev->msix->mmap, size, start, name)) { - error_report("%s unsupported. Performance may be slow", name); - } + if (vfio_region_mmap(&bar->region)) { + error_report("Failed to mmap %s BAR %d. Performance may be slow", + vdev->vbasedev.name, nr); } vfio_bar_quirk_setup(vdev, nr); @@ -2049,25 +2069,18 @@ static int vfio_populate_device(VFIOPCIDevice *vdev) } for (i = VFIO_PCI_BAR0_REGION_INDEX; i < VFIO_PCI_ROM_REGION_INDEX; i++) { - ret = vfio_get_region_info(vbasedev, i, ®_info); + char *name = g_strdup_printf("%s BAR %d", vbasedev->name, i); + + ret = vfio_region_setup(OBJECT(vdev), vbasedev, + &vdev->bars[i].region, i, name); + g_free(name); + if (ret) { error_report("vfio: Error getting region %d info: %m", i); goto error; } - trace_vfio_populate_device_region(vbasedev->name, i, - (unsigned long)reg_info->size, - (unsigned long)reg_info->offset, - (unsigned long)reg_info->flags); - - vdev->bars[i].region.vbasedev = vbasedev; - vdev->bars[i].region.flags = reg_info->flags; - vdev->bars[i].region.size = reg_info->size; - vdev->bars[i].region.fd_offset = reg_info->offset; - vdev->bars[i].region.nr = i; QLIST_INIT(&vdev->bars[i].quirks); - - g_free(reg_info); } ret = vfio_get_region_info(vbasedev, @@ -2153,11 +2166,8 @@ error: static void vfio_put_device(VFIOPCIDevice *vdev) { g_free(vdev->vbasedev.name); - if (vdev->msix) { - object_unparent(OBJECT(&vdev->msix->mmap_mem)); - g_free(vdev->msix); - vdev->msix = NULL; - } + g_free(vdev->msix); + vfio_put_base_device(&vdev->vbasedev); } diff --git a/hw/vfio/platform.c b/hw/vfio/platform.c index f9b9c20..a2ab75d 100644 --- a/hw/vfio/platform.c +++ b/hw/vfio/platform.c @@ -143,12 +143,8 @@ static void vfio_mmap_set_enabled(VFIOPlatformDevice *vdev, bool enabled) { int i; - trace_vfio_platform_mmap_set_enabled(enabled); - for (i = 0; i < vdev->vbasedev.num_regions; i++) { - VFIORegion *region = vdev->regions[i]; - - memory_region_set_enabled(®ion->mmap_mem, enabled); + vfio_region_mmaps_set_enabled(vdev->regions[i], enabled); } } @@ -476,29 +472,16 @@ static int vfio_populate_device(VFIODevice *vbasedev) vdev->regions = g_new0(VFIORegion *, vbasedev->num_regions); for (i = 0; i < vbasedev->num_regions; i++) { - struct vfio_region_info *reg_info; - VFIORegion *ptr; + char *name = g_strdup_printf("VFIO %s region %d\n", vbasedev->name, i); vdev->regions[i] = g_new0(VFIORegion, 1); - ptr = vdev->regions[i]; - ret = vfio_get_region_info(vbasedev, i, ®_info); + ret = vfio_region_setup(OBJECT(vdev), vbasedev, + vdev->regions[i], i, name); + g_free(name); if (ret) { error_report("vfio: Error getting region %d info: %m", i); goto reg_error; } - ptr->flags = reg_info->flags; - ptr->size = reg_info->size; - ptr->fd_offset = reg_info->offset; - ptr->nr = i; - ptr->vbasedev = vbasedev; - - g_free(reg_info); - - trace_vfio_platform_populate_regions(ptr->nr, - (unsigned long)ptr->flags, - (unsigned long)ptr->size, - ptr->vbasedev->fd, - (unsigned long)ptr->fd_offset); } vdev->mmap_timer = timer_new_ms(QEMU_CLOCK_VIRTUAL, @@ -535,6 +518,9 @@ irq_err: } reg_error: for (i = 0; i < vbasedev->num_regions; i++) { + if (vdev->regions[i]) { + vfio_region_finalize(vdev->regions[i]); + } g_free(vdev->regions[i]); } g_free(vdev->regions); @@ -636,41 +622,6 @@ static int vfio_base_device_init(VFIODevice *vbasedev) } /** - * vfio_map_region - initialize the 2 memory regions for a given - * MMIO region index - * @vdev: the VFIO platform device handle - * @nr: the index of the region - * - * Init the top memory region and the mmapped memory region beneath - * VFIOPlatformDevice is used since VFIODevice is not a QOM Object - * and could not be passed to memory region functions -*/ -static void vfio_map_region(VFIOPlatformDevice *vdev, int nr) -{ - VFIORegion *region = vdev->regions[nr]; - uint64_t size = region->size; - char name[64]; - - if (!size) { - return; - } - - g_snprintf(name, sizeof(name), "VFIO %s region %d", - vdev->vbasedev.name, nr); - - /* A "slow" read/write mapping underlies all regions */ - memory_region_init_io(®ion->mem, OBJECT(vdev), &vfio_region_ops, - region, name, size); - - g_strlcat(name, " mmap", sizeof(name)); - - if (vfio_mmap_region(OBJECT(vdev), region, ®ion->mem, - ®ion->mmap_mem, ®ion->mmap, size, 0, name)) { - error_report("%s unsupported. Performance may be slow", name); - } -} - -/** * vfio_platform_realize - the device realize function * @dev: device state pointer * @errp: error @@ -700,8 +651,11 @@ static void vfio_platform_realize(DeviceState *dev, Error **errp) } for (i = 0; i < vbasedev->num_regions; i++) { - vfio_map_region(vdev, i); - sysbus_init_mmio(sbdev, &vdev->regions[i]->mem); + if (vfio_region_mmap(vdev->regions[i])) { + error_report("%s mmap unsupported. Performance may be slow", + memory_region_name(vdev->regions[i]->mem)); + } + sysbus_init_mmio(sbdev, vdev->regions[i]->mem); } } diff --git a/include/hw/vfio/vfio-common.h b/include/hw/vfio/vfio-common.h index a147bd2..eb0e1b0 100644 --- a/include/hw/vfio/vfio-common.h +++ b/include/hw/vfio/vfio-common.h @@ -43,14 +43,21 @@ enum { VFIO_DEVICE_TYPE_PLATFORM = 1, }; +typedef struct VFIOMmap { + MemoryRegion mem; + void *mmap; + off_t offset; + size_t size; +} VFIOMmap; + typedef struct VFIORegion { struct VFIODevice *vbasedev; off_t fd_offset; /* offset of region within device fd */ - MemoryRegion mem; /* slow, read/write access */ - MemoryRegion mmap_mem; /* direct mapped access */ - void *mmap; + MemoryRegion *mem; /* slow, read/write access */ size_t size; uint32_t flags; /* VFIO region flags (rd/wr/mmap) */ + uint32_t nr_mmaps; + VFIOMmap *mmaps; uint8_t nr; /* cache the region number for debug */ } VFIORegion; @@ -128,10 +135,12 @@ void vfio_region_write(void *opaque, hwaddr addr, uint64_t data, unsigned size); uint64_t vfio_region_read(void *opaque, hwaddr addr, unsigned size); -int vfio_mmap_region(Object *vdev, VFIORegion *region, - MemoryRegion *mem, MemoryRegion *submem, - void **map, size_t size, off_t offset, - const char *name); +int vfio_region_setup(Object *obj, VFIODevice *vbasedev, VFIORegion *region, + int index, const char *name); +int vfio_region_mmap(VFIORegion *region); +void vfio_region_mmaps_set_enabled(VFIORegion *region, bool enabled); +void vfio_region_exit(VFIORegion *region); +void vfio_region_finalize(VFIORegion *region); void vfio_reset_handler(void *opaque); VFIOGroup *vfio_get_group(int groupid, AddressSpace *as); void vfio_put_group(VFIOGroup *group); diff --git a/trace-events b/trace-events index 6fba6cc..bf07a8f 100644 --- a/trace-events +++ b/trace-events @@ -1652,6 +1652,7 @@ vfio_msix_enable(const char *name) " (%s)" vfio_msix_pba_disable(const char *name) " (%s)" vfio_msix_pba_enable(const char *name) " (%s)" vfio_msix_disable(const char *name) " (%s)" +vfio_msix_fixup(const char *name, int bar, uint64_t start, uint64_t end) " (%s) MSI-X region %d mmap fixup [0x%"PRIx64" - 0x%"PRIx64"]" vfio_msi_enable(const char *name, int nr_vectors) " (%s) Enabled %d MSI vectors" vfio_msi_disable(const char *name) " (%s)" vfio_pci_load_rom(const char *name, unsigned long size, unsigned long offset, unsigned long flags) "Device %s ROM:\n size: 0x%lx, offset: 0x%lx, flags: 0x%lx" @@ -1670,7 +1671,6 @@ vfio_pci_hot_reset(const char *name, const char *type) " (%s) %s" vfio_pci_hot_reset_has_dep_devices(const char *name) "%s: hot reset dependent devices:" vfio_pci_hot_reset_dep_devices(int domain, int bus, int slot, int function, int group_id) "\t%04x:%02x:%02x.%x group %d" vfio_pci_hot_reset_result(const char *name, const char *result) "%s hot reset: %s" -vfio_populate_device_region(const char *region_name, int index, unsigned long size, unsigned long offset, unsigned long flags) "Device %s region %d:\n size: 0x%lx, offset: 0x%lx, flags: 0x%lx" vfio_populate_device_config(const char *name, unsigned long size, unsigned long offset, unsigned long flags) "Device %s config:\n size: 0x%lx, offset: 0x%lx, flags: 0x%lx" vfio_populate_device_get_irq_info_failure(void) "VFIO_DEVICE_GET_IRQ_INFO failure: %m" vfio_initfn(const char *name, int group_id) " (%s) group %d" @@ -1726,13 +1726,17 @@ vfio_disconnect_container(int fd) "close container->fd=%d" vfio_put_group(int fd) "close group->fd=%d" vfio_get_device(const char * name, unsigned int flags, unsigned int num_regions, unsigned int num_irqs) "Device %s flags: %u, regions: %u, irqs: %u" vfio_put_base_device(int fd) "close vdev->fd=%d" +vfio_region_setup(const char *dev, int index, const char *name, unsigned long flags, unsigned long offset, unsigned long size) "Device %s, region %d \"%s\", flags: %lx, offset: %lx, size: %lx" +vfio_region_mmap_fault(const char *name, int index, unsigned long offset, unsigned long size, int fault) "Region %s mmaps[%d], [%lx - %lx], fault: %d" +vfio_region_mmap(const char *name, unsigned long offset, unsigned long end) "Region %s [%lx - %lx]" +vfio_region_exit(const char *name, int index) "Device %s, region %d" +vfio_region_finalize(const char *name, int index) "Device %s, region %d" +vfio_region_mmaps_set_enabled(const char *name, bool enabled) "Region %s mmaps enabled: %d" # hw/vfio/platform.c -vfio_platform_populate_regions(int region_index, unsigned long flag, unsigned long size, int fd, unsigned long offset) "- region %d flags = 0x%lx, size = 0x%lx, fd= %d, offset = 0x%lx" vfio_platform_base_device_init(char *name, int groupid) "%s belongs to group #%d" vfio_platform_realize(char *name, char *compat) "vfio device %s, compat = %s" vfio_platform_eoi(int pin, int fd) "EOI IRQ pin %d (fd=%d)" -vfio_platform_mmap_set_enabled(bool enabled) "fast path = %d" vfio_platform_intp_mmap_enable(int pin) "IRQ #%d still active, stay in slow path" vfio_platform_intp_interrupt(int pin, int fd) "Inject IRQ #%d (fd = %d)" vfio_platform_intp_inject_pending_lockheld(int pin, int fd) "Inject pending IRQ #%d (fd = %d)" -- cgit v1.1 From 2d82f8a3cdb276bc3cb92d6f01bf8f66bf328d62 Mon Sep 17 00:00:00 2001 From: Alex Williamson Date: Thu, 10 Mar 2016 09:39:08 -0700 Subject: vfio/pci: Convert all MemoryRegion to dynamic alloc and consistent functions Match common vfio code with setup, exit, and finalize functions for BAR, quirk, and VGA management. VGA is also changed to dynamic allocation to match the other MemoryRegions. Signed-off-by: Alex Williamson --- hw/vfio/pci-quirks.c | 38 ++++++++--------- hw/vfio/pci.c | 114 +++++++++++++++++++++------------------------------ hw/vfio/pci.h | 10 ++--- 3 files changed, 71 insertions(+), 91 deletions(-) diff --git a/hw/vfio/pci-quirks.c b/hw/vfio/pci-quirks.c index d626ec9..49ecf11 100644 --- a/hw/vfio/pci-quirks.c +++ b/hw/vfio/pci-quirks.c @@ -290,10 +290,10 @@ static void vfio_vga_probe_ati_3c3_quirk(VFIOPCIDevice *vdev) memory_region_init_io(quirk->mem, OBJECT(vdev), &vfio_ati_3c3_quirk, vdev, "vfio-ati-3c3-quirk", 1); - memory_region_add_subregion(&vdev->vga.region[QEMU_PCI_VGA_IO_HI].mem, + memory_region_add_subregion(&vdev->vga->region[QEMU_PCI_VGA_IO_HI].mem, 3 /* offset 3 bytes from 0x3c0 */, quirk->mem); - QLIST_INSERT_HEAD(&vdev->vga.region[QEMU_PCI_VGA_IO_HI].quirks, + QLIST_INSERT_HEAD(&vdev->vga->region[QEMU_PCI_VGA_IO_HI].quirks, quirk, next); trace_vfio_quirk_ati_3c3_probe(vdev->vbasedev.name); @@ -428,7 +428,7 @@ static uint64_t vfio_nvidia_3d4_quirk_read(void *opaque, quirk->state = NONE; - return vfio_vga_read(&vdev->vga.region[QEMU_PCI_VGA_IO_HI], + return vfio_vga_read(&vdev->vga->region[QEMU_PCI_VGA_IO_HI], addr + 0x14, size); } @@ -465,7 +465,7 @@ static void vfio_nvidia_3d4_quirk_write(void *opaque, hwaddr addr, break; } - vfio_vga_write(&vdev->vga.region[QEMU_PCI_VGA_IO_HI], + vfio_vga_write(&vdev->vga->region[QEMU_PCI_VGA_IO_HI], addr + 0x14, data, size); } @@ -481,7 +481,7 @@ static uint64_t vfio_nvidia_3d0_quirk_read(void *opaque, VFIONvidia3d0Quirk *quirk = opaque; VFIOPCIDevice *vdev = quirk->vdev; VFIONvidia3d0State old_state = quirk->state; - uint64_t data = vfio_vga_read(&vdev->vga.region[QEMU_PCI_VGA_IO_HI], + uint64_t data = vfio_vga_read(&vdev->vga->region[QEMU_PCI_VGA_IO_HI], addr + 0x10, size); quirk->state = NONE; @@ -523,7 +523,7 @@ static void vfio_nvidia_3d0_quirk_write(void *opaque, hwaddr addr, } } - vfio_vga_write(&vdev->vga.region[QEMU_PCI_VGA_IO_HI], + vfio_vga_write(&vdev->vga->region[QEMU_PCI_VGA_IO_HI], addr + 0x10, data, size); } @@ -551,15 +551,15 @@ static void vfio_vga_probe_nvidia_3d0_quirk(VFIOPCIDevice *vdev) memory_region_init_io(&quirk->mem[0], OBJECT(vdev), &vfio_nvidia_3d4_quirk, data, "vfio-nvidia-3d4-quirk", 2); - memory_region_add_subregion(&vdev->vga.region[QEMU_PCI_VGA_IO_HI].mem, + memory_region_add_subregion(&vdev->vga->region[QEMU_PCI_VGA_IO_HI].mem, 0x14 /* 0x3c0 + 0x14 */, &quirk->mem[0]); memory_region_init_io(&quirk->mem[1], OBJECT(vdev), &vfio_nvidia_3d0_quirk, data, "vfio-nvidia-3d0-quirk", 2); - memory_region_add_subregion(&vdev->vga.region[QEMU_PCI_VGA_IO_HI].mem, + memory_region_add_subregion(&vdev->vga->region[QEMU_PCI_VGA_IO_HI].mem, 0x10 /* 0x3c0 + 0x10 */, &quirk->mem[1]); - QLIST_INSERT_HEAD(&vdev->vga.region[QEMU_PCI_VGA_IO_HI].quirks, + QLIST_INSERT_HEAD(&vdev->vga->region[QEMU_PCI_VGA_IO_HI].quirks, quirk, next); trace_vfio_quirk_nvidia_3d0_probe(vdev->vbasedev.name); @@ -970,28 +970,28 @@ void vfio_vga_quirk_setup(VFIOPCIDevice *vdev) vfio_vga_probe_nvidia_3d0_quirk(vdev); } -void vfio_vga_quirk_teardown(VFIOPCIDevice *vdev) +void vfio_vga_quirk_exit(VFIOPCIDevice *vdev) { VFIOQuirk *quirk; int i, j; - for (i = 0; i < ARRAY_SIZE(vdev->vga.region); i++) { - QLIST_FOREACH(quirk, &vdev->vga.region[i].quirks, next) { + for (i = 0; i < ARRAY_SIZE(vdev->vga->region); i++) { + QLIST_FOREACH(quirk, &vdev->vga->region[i].quirks, next) { for (j = 0; j < quirk->nr_mem; j++) { - memory_region_del_subregion(&vdev->vga.region[i].mem, + memory_region_del_subregion(&vdev->vga->region[i].mem, &quirk->mem[j]); } } } } -void vfio_vga_quirk_free(VFIOPCIDevice *vdev) +void vfio_vga_quirk_finalize(VFIOPCIDevice *vdev) { int i, j; - for (i = 0; i < ARRAY_SIZE(vdev->vga.region); i++) { - while (!QLIST_EMPTY(&vdev->vga.region[i].quirks)) { - VFIOQuirk *quirk = QLIST_FIRST(&vdev->vga.region[i].quirks); + for (i = 0; i < ARRAY_SIZE(vdev->vga->region); i++) { + while (!QLIST_EMPTY(&vdev->vga->region[i].quirks)) { + VFIOQuirk *quirk = QLIST_FIRST(&vdev->vga->region[i].quirks); QLIST_REMOVE(quirk, next); for (j = 0; j < quirk->nr_mem; j++) { object_unparent(OBJECT(&quirk->mem[j])); @@ -1012,7 +1012,7 @@ void vfio_bar_quirk_setup(VFIOPCIDevice *vdev, int nr) vfio_probe_rtl8168_bar2_quirk(vdev, nr); } -void vfio_bar_quirk_teardown(VFIOPCIDevice *vdev, int nr) +void vfio_bar_quirk_exit(VFIOPCIDevice *vdev, int nr) { VFIOBAR *bar = &vdev->bars[nr]; VFIOQuirk *quirk; @@ -1025,7 +1025,7 @@ void vfio_bar_quirk_teardown(VFIOPCIDevice *vdev, int nr) } } -void vfio_bar_quirk_free(VFIOPCIDevice *vdev, int nr) +void vfio_bar_quirk_finalize(VFIOPCIDevice *vdev, int nr) { VFIOBAR *bar = &vdev->bars[nr]; int i; diff --git a/hw/vfio/pci.c b/hw/vfio/pci.c index f18a678..82c3771 100644 --- a/hw/vfio/pci.c +++ b/hw/vfio/pci.c @@ -1377,42 +1377,16 @@ static void vfio_mmap_set_enabled(VFIOPCIDevice *vdev, bool enabled) } } -static void vfio_unregister_bar(VFIOPCIDevice *vdev, int nr) +static void vfio_bar_setup(VFIOPCIDevice *vdev, int nr) { VFIOBAR *bar = &vdev->bars[nr]; - if (!bar->region.size) { - return; - } - - vfio_bar_quirk_teardown(vdev, nr); - - vfio_region_exit(&bar->region); -} - -static void vfio_unmap_bar(VFIOPCIDevice *vdev, int nr) -{ - VFIOBAR *bar = &vdev->bars[nr]; - - if (!bar->region.size) { - return; - } - - vfio_bar_quirk_free(vdev, nr); - - vfio_region_finalize(&bar->region); -} - -static void vfio_map_bar(VFIOPCIDevice *vdev, int nr) -{ - VFIOBAR *bar = &vdev->bars[nr]; - uint64_t size = bar->region.size; uint32_t pci_bar; uint8_t type; int ret; /* Skip both unimplemented BARs and the upper half of 64bit BARS. */ - if (!size) { + if (!bar->region.size) { return; } @@ -1430,72 +1404,78 @@ static void vfio_map_bar(VFIOPCIDevice *vdev, int nr) type = pci_bar & (bar->ioport ? ~PCI_BASE_ADDRESS_IO_MASK : ~PCI_BASE_ADDRESS_MEM_MASK); - pci_register_bar(&vdev->pdev, nr, type, bar->region.mem); - if (vfio_region_mmap(&bar->region)) { error_report("Failed to mmap %s BAR %d. Performance may be slow", vdev->vbasedev.name, nr); } vfio_bar_quirk_setup(vdev, nr); + + pci_register_bar(&vdev->pdev, nr, type, bar->region.mem); } -static void vfio_map_bars(VFIOPCIDevice *vdev) +static void vfio_bars_setup(VFIOPCIDevice *vdev) { int i; for (i = 0; i < PCI_ROM_SLOT; i++) { - vfio_map_bar(vdev, i); + vfio_bar_setup(vdev, i); } - if (vdev->has_vga) { - memory_region_init_io(&vdev->vga.region[QEMU_PCI_VGA_MEM].mem, + if (vdev->vga) { + memory_region_init_io(&vdev->vga->region[QEMU_PCI_VGA_MEM].mem, OBJECT(vdev), &vfio_vga_ops, - &vdev->vga.region[QEMU_PCI_VGA_MEM], + &vdev->vga->region[QEMU_PCI_VGA_MEM], "vfio-vga-mmio@0xa0000", QEMU_PCI_VGA_MEM_SIZE); - memory_region_init_io(&vdev->vga.region[QEMU_PCI_VGA_IO_LO].mem, + memory_region_init_io(&vdev->vga->region[QEMU_PCI_VGA_IO_LO].mem, OBJECT(vdev), &vfio_vga_ops, - &vdev->vga.region[QEMU_PCI_VGA_IO_LO], + &vdev->vga->region[QEMU_PCI_VGA_IO_LO], "vfio-vga-io@0x3b0", QEMU_PCI_VGA_IO_LO_SIZE); - memory_region_init_io(&vdev->vga.region[QEMU_PCI_VGA_IO_HI].mem, + memory_region_init_io(&vdev->vga->region[QEMU_PCI_VGA_IO_HI].mem, OBJECT(vdev), &vfio_vga_ops, - &vdev->vga.region[QEMU_PCI_VGA_IO_HI], + &vdev->vga->region[QEMU_PCI_VGA_IO_HI], "vfio-vga-io@0x3c0", QEMU_PCI_VGA_IO_HI_SIZE); - pci_register_vga(&vdev->pdev, &vdev->vga.region[QEMU_PCI_VGA_MEM].mem, - &vdev->vga.region[QEMU_PCI_VGA_IO_LO].mem, - &vdev->vga.region[QEMU_PCI_VGA_IO_HI].mem); + pci_register_vga(&vdev->pdev, &vdev->vga->region[QEMU_PCI_VGA_MEM].mem, + &vdev->vga->region[QEMU_PCI_VGA_IO_LO].mem, + &vdev->vga->region[QEMU_PCI_VGA_IO_HI].mem); vfio_vga_quirk_setup(vdev); } } -static void vfio_unregister_bars(VFIOPCIDevice *vdev) +static void vfio_bars_exit(VFIOPCIDevice *vdev) { int i; for (i = 0; i < PCI_ROM_SLOT; i++) { - vfio_unregister_bar(vdev, i); + vfio_bar_quirk_exit(vdev, i); + vfio_region_exit(&vdev->bars[i].region); } - if (vdev->has_vga) { - vfio_vga_quirk_teardown(vdev); + if (vdev->vga) { pci_unregister_vga(&vdev->pdev); + vfio_vga_quirk_exit(vdev); } } -static void vfio_unmap_bars(VFIOPCIDevice *vdev) +static void vfio_bars_finalize(VFIOPCIDevice *vdev) { int i; for (i = 0; i < PCI_ROM_SLOT; i++) { - vfio_unmap_bar(vdev, i); + vfio_bar_quirk_finalize(vdev, i); + vfio_region_finalize(&vdev->bars[i].region); } - if (vdev->has_vga) { - vfio_vga_quirk_free(vdev); + if (vdev->vga) { + vfio_vga_quirk_finalize(vdev); + for (i = 0; i < ARRAY_SIZE(vdev->vga->region); i++) { + object_unparent(OBJECT(&vdev->vga->region[i].mem)); + } + g_free(vdev->vga); } } @@ -2124,24 +2104,24 @@ static int vfio_populate_device(VFIOPCIDevice *vdev) goto error; } - vdev->vga.fd_offset = reg_info->offset; - vdev->vga.fd = vdev->vbasedev.fd; + vdev->vga = g_new0(VFIOVGA, 1); - g_free(reg_info); + vdev->vga->fd_offset = reg_info->offset; + vdev->vga->fd = vdev->vbasedev.fd; - vdev->vga.region[QEMU_PCI_VGA_MEM].offset = QEMU_PCI_VGA_MEM_BASE; - vdev->vga.region[QEMU_PCI_VGA_MEM].nr = QEMU_PCI_VGA_MEM; - QLIST_INIT(&vdev->vga.region[QEMU_PCI_VGA_MEM].quirks); + g_free(reg_info); - vdev->vga.region[QEMU_PCI_VGA_IO_LO].offset = QEMU_PCI_VGA_IO_LO_BASE; - vdev->vga.region[QEMU_PCI_VGA_IO_LO].nr = QEMU_PCI_VGA_IO_LO; - QLIST_INIT(&vdev->vga.region[QEMU_PCI_VGA_IO_LO].quirks); + vdev->vga->region[QEMU_PCI_VGA_MEM].offset = QEMU_PCI_VGA_MEM_BASE; + vdev->vga->region[QEMU_PCI_VGA_MEM].nr = QEMU_PCI_VGA_MEM; + QLIST_INIT(&vdev->vga->region[QEMU_PCI_VGA_MEM].quirks); - vdev->vga.region[QEMU_PCI_VGA_IO_HI].offset = QEMU_PCI_VGA_IO_HI_BASE; - vdev->vga.region[QEMU_PCI_VGA_IO_HI].nr = QEMU_PCI_VGA_IO_HI; - QLIST_INIT(&vdev->vga.region[QEMU_PCI_VGA_IO_HI].quirks); + vdev->vga->region[QEMU_PCI_VGA_IO_LO].offset = QEMU_PCI_VGA_IO_LO_BASE; + vdev->vga->region[QEMU_PCI_VGA_IO_LO].nr = QEMU_PCI_VGA_IO_LO; + QLIST_INIT(&vdev->vga->region[QEMU_PCI_VGA_IO_LO].quirks); - vdev->has_vga = true; + vdev->vga->region[QEMU_PCI_VGA_IO_HI].offset = QEMU_PCI_VGA_IO_HI_BASE; + vdev->vga->region[QEMU_PCI_VGA_IO_HI].nr = QEMU_PCI_VGA_IO_HI; + QLIST_INIT(&vdev->vga->region[QEMU_PCI_VGA_IO_HI].quirks); } irq_info.index = VFIO_PCI_ERR_IRQ_INDEX; @@ -2528,7 +2508,7 @@ static int vfio_initfn(PCIDevice *pdev) return ret; } - vfio_map_bars(vdev); + vfio_bars_setup(vdev); ret = vfio_add_capabilities(vdev); if (ret) { @@ -2565,7 +2545,7 @@ static int vfio_initfn(PCIDevice *pdev) out_teardown: pci_device_set_intx_routing_notifier(&vdev->pdev, NULL); vfio_teardown_msi(vdev); - vfio_unregister_bars(vdev); + vfio_bars_exit(vdev); return ret; } @@ -2575,7 +2555,7 @@ static void vfio_instance_finalize(Object *obj) VFIOPCIDevice *vdev = DO_UPCAST(VFIOPCIDevice, pdev, pci_dev); VFIOGroup *group = vdev->vbasedev.group; - vfio_unmap_bars(vdev); + vfio_bars_finalize(vdev); g_free(vdev->emulated_config_bits); g_free(vdev->rom); vfio_put_device(vdev); @@ -2594,7 +2574,7 @@ static void vfio_exitfn(PCIDevice *pdev) timer_free(vdev->intx.mmap_timer); } vfio_teardown_msi(vdev); - vfio_unregister_bars(vdev); + vfio_bars_exit(vdev); } static void vfio_pci_reset(DeviceState *dev) diff --git a/hw/vfio/pci.h b/hw/vfio/pci.h index 6256587..b8a7189 100644 --- a/hw/vfio/pci.h +++ b/hw/vfio/pci.h @@ -114,7 +114,7 @@ typedef struct VFIOPCIDevice { int nr_vectors; /* Number of MSI/MSIX vectors currently in use */ int interrupt; /* Current interrupt type */ VFIOBAR bars[PCI_NUM_REGIONS - 1]; /* No ROM */ - VFIOVGA vga; /* 0xa0000, 0x3b0, 0x3c0 */ + VFIOVGA *vga; /* 0xa0000, 0x3b0, 0x3c0 */ PCIHostDeviceAddress host; EventNotifier err_notifier; EventNotifier req_notifier; @@ -150,11 +150,11 @@ void vfio_vga_write(void *opaque, hwaddr addr, uint64_t data, unsigned size); bool vfio_blacklist_opt_rom(VFIOPCIDevice *vdev); void vfio_vga_quirk_setup(VFIOPCIDevice *vdev); -void vfio_vga_quirk_teardown(VFIOPCIDevice *vdev); -void vfio_vga_quirk_free(VFIOPCIDevice *vdev); +void vfio_vga_quirk_exit(VFIOPCIDevice *vdev); +void vfio_vga_quirk_finalize(VFIOPCIDevice *vdev); void vfio_bar_quirk_setup(VFIOPCIDevice *vdev, int nr); -void vfio_bar_quirk_teardown(VFIOPCIDevice *vdev, int nr); -void vfio_bar_quirk_free(VFIOPCIDevice *vdev, int nr); +void vfio_bar_quirk_exit(VFIOPCIDevice *vdev, int nr); +void vfio_bar_quirk_finalize(VFIOPCIDevice *vdev, int nr); void vfio_setup_resetfn_quirk(VFIOPCIDevice *vdev); #endif /* HW_VFIO_VFIO_PCI_H */ -- cgit v1.1 From e2e5ee9c5652ea1a3c2021a2ec79a3bc6a3b0c51 Mon Sep 17 00:00:00 2001 From: Alex Williamson Date: Thu, 10 Mar 2016 09:39:08 -0700 Subject: vfio/pci: Fixup PCI option ROMs Devices like Intel graphics are known to not only have bad checksums, but also the wrong device ID. This is not so surprising given that the video BIOS is typically part of the system firmware image rather that embedded into the device and needs to support any IGD device installed into the system. Signed-off-by: Alex Williamson --- hw/vfio/pci.c | 30 ++++++++++++++++++++++++++++++ 1 file changed, 30 insertions(+) diff --git a/hw/vfio/pci.c b/hw/vfio/pci.c index 82c3771..ebbc9d6 100644 --- a/hw/vfio/pci.c +++ b/hw/vfio/pci.c @@ -832,6 +832,36 @@ static void vfio_pci_load_rom(VFIOPCIDevice *vdev) break; } } + + /* + * Test the ROM signature against our device, if the vendor is correct + * but the device ID doesn't match, store the correct device ID and + * recompute the checksum. Intel IGD devices need this and are known + * to have bogus checksums so we can't simply adjust the checksum. + */ + if (pci_get_word(vdev->rom) == 0xaa55 && + pci_get_word(vdev->rom + 0x18) + 8 < vdev->rom_size && + !memcmp(vdev->rom + pci_get_word(vdev->rom + 0x18), "PCIR", 4)) { + uint16_t vid, did; + + vid = pci_get_word(vdev->rom + pci_get_word(vdev->rom + 0x18) + 4); + did = pci_get_word(vdev->rom + pci_get_word(vdev->rom + 0x18) + 6); + + if (vid == vdev->vendor_id && did != vdev->device_id) { + int i; + uint8_t csum, *data = vdev->rom; + + pci_set_word(vdev->rom + pci_get_word(vdev->rom + 0x18) + 6, + vdev->device_id); + data[6] = 0; + + for (csum = 0, i = 0; i < vdev->rom_size; i++) { + csum += data[i]; + } + + data[6] = -csum; + } + } } static uint64_t vfio_rom_read(void *opaque, hwaddr addr, unsigned size) -- cgit v1.1 From e593c0211b8a9e47d3fd24d2be5900ecf5a03b03 Mon Sep 17 00:00:00 2001 From: Alex Williamson Date: Thu, 10 Mar 2016 09:39:08 -0700 Subject: vfio/pci: Split out VGA setup This could be setup later by device specific code, such as IGD initialization. Signed-off-by: Alex Williamson --- hw/vfio/pci.c | 82 ++++++++++++++++++++++++++++++++++------------------------- hw/vfio/pci.h | 2 ++ 2 files changed, 50 insertions(+), 34 deletions(-) diff --git a/hw/vfio/pci.c b/hw/vfio/pci.c index ebbc9d6..30eb945 100644 --- a/hw/vfio/pci.c +++ b/hw/vfio/pci.c @@ -2054,6 +2054,52 @@ static VFIODeviceOps vfio_pci_ops = { .vfio_eoi = vfio_intx_eoi, }; +int vfio_populate_vga(VFIOPCIDevice *vdev) +{ + VFIODevice *vbasedev = &vdev->vbasedev; + struct vfio_region_info *reg_info; + int ret; + + if (vbasedev->num_regions > VFIO_PCI_VGA_REGION_INDEX) { + ret = vfio_get_region_info(vbasedev, + VFIO_PCI_VGA_REGION_INDEX, ®_info); + if (ret) { + return ret; + } + + if (!(reg_info->flags & VFIO_REGION_INFO_FLAG_READ) || + !(reg_info->flags & VFIO_REGION_INFO_FLAG_WRITE) || + reg_info->size < 0xbffff + 1) { + error_report("vfio: Unexpected VGA info, flags 0x%lx, size 0x%lx", + (unsigned long)reg_info->flags, + (unsigned long)reg_info->size); + g_free(reg_info); + return -EINVAL; + } + + vdev->vga = g_new0(VFIOVGA, 1); + + vdev->vga->fd_offset = reg_info->offset; + vdev->vga->fd = vdev->vbasedev.fd; + + g_free(reg_info); + + vdev->vga->region[QEMU_PCI_VGA_MEM].offset = QEMU_PCI_VGA_MEM_BASE; + vdev->vga->region[QEMU_PCI_VGA_MEM].nr = QEMU_PCI_VGA_MEM; + QLIST_INIT(&vdev->vga->region[QEMU_PCI_VGA_MEM].quirks); + + vdev->vga->region[QEMU_PCI_VGA_IO_LO].offset = QEMU_PCI_VGA_IO_LO_BASE; + vdev->vga->region[QEMU_PCI_VGA_IO_LO].nr = QEMU_PCI_VGA_IO_LO; + QLIST_INIT(&vdev->vga->region[QEMU_PCI_VGA_IO_LO].quirks); + + vdev->vga->region[QEMU_PCI_VGA_IO_HI].offset = QEMU_PCI_VGA_IO_HI_BASE; + vdev->vga->region[QEMU_PCI_VGA_IO_HI].nr = QEMU_PCI_VGA_IO_HI; + QLIST_INIT(&vdev->vga->region[QEMU_PCI_VGA_IO_HI].quirks); + } + + return 0; +} + static int vfio_populate_device(VFIOPCIDevice *vdev) { VFIODevice *vbasedev = &vdev->vbasedev; @@ -2113,45 +2159,13 @@ static int vfio_populate_device(VFIOPCIDevice *vdev) g_free(reg_info); - if ((vdev->features & VFIO_FEATURE_ENABLE_VGA) && - vbasedev->num_regions > VFIO_PCI_VGA_REGION_INDEX) { - ret = vfio_get_region_info(vbasedev, - VFIO_PCI_VGA_REGION_INDEX, ®_info); + if (vdev->features & VFIO_FEATURE_ENABLE_VGA) { + ret = vfio_populate_vga(vdev); if (ret) { error_report( "vfio: Device does not support requested feature x-vga"); goto error; } - - if (!(reg_info->flags & VFIO_REGION_INFO_FLAG_READ) || - !(reg_info->flags & VFIO_REGION_INFO_FLAG_WRITE) || - reg_info->size < 0xbffff + 1) { - error_report("vfio: Unexpected VGA info, flags 0x%lx, size 0x%lx", - (unsigned long)reg_info->flags, - (unsigned long)reg_info->size); - g_free(reg_info); - ret = -1; - goto error; - } - - vdev->vga = g_new0(VFIOVGA, 1); - - vdev->vga->fd_offset = reg_info->offset; - vdev->vga->fd = vdev->vbasedev.fd; - - g_free(reg_info); - - vdev->vga->region[QEMU_PCI_VGA_MEM].offset = QEMU_PCI_VGA_MEM_BASE; - vdev->vga->region[QEMU_PCI_VGA_MEM].nr = QEMU_PCI_VGA_MEM; - QLIST_INIT(&vdev->vga->region[QEMU_PCI_VGA_MEM].quirks); - - vdev->vga->region[QEMU_PCI_VGA_IO_LO].offset = QEMU_PCI_VGA_IO_LO_BASE; - vdev->vga->region[QEMU_PCI_VGA_IO_LO].nr = QEMU_PCI_VGA_IO_LO; - QLIST_INIT(&vdev->vga->region[QEMU_PCI_VGA_IO_LO].quirks); - - vdev->vga->region[QEMU_PCI_VGA_IO_HI].offset = QEMU_PCI_VGA_IO_HI_BASE; - vdev->vga->region[QEMU_PCI_VGA_IO_HI].nr = QEMU_PCI_VGA_IO_HI; - QLIST_INIT(&vdev->vga->region[QEMU_PCI_VGA_IO_HI].quirks); } irq_info.index = VFIO_PCI_ERR_IRQ_INDEX; diff --git a/hw/vfio/pci.h b/hw/vfio/pci.h index b8a7189..3976f68 100644 --- a/hw/vfio/pci.h +++ b/hw/vfio/pci.h @@ -157,4 +157,6 @@ void vfio_bar_quirk_exit(VFIOPCIDevice *vdev, int nr); void vfio_bar_quirk_finalize(VFIOPCIDevice *vdev, int nr); void vfio_setup_resetfn_quirk(VFIOPCIDevice *vdev); +int vfio_populate_vga(VFIOPCIDevice *vdev); + #endif /* HW_VFIO_VFIO_PCI_H */ -- cgit v1.1 From 062ed5d8d6edcba9cb8daecc764b7dcedf3e1009 Mon Sep 17 00:00:00 2001 From: Neo Jia Date: Thu, 10 Mar 2016 09:39:09 -0700 Subject: vfio/pci: replace fixed string limit by g_strdup_printf A trivial change to remove string limit by using g_strdup_printf Tested-by: Neo Jia Signed-off-by: Neo Jia Signed-off-by: Kirti Wankhede Signed-off-by: Alex Williamson --- hw/vfio/pci.c | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/hw/vfio/pci.c b/hw/vfio/pci.c index 30eb945..d091d8c 100644 --- a/hw/vfio/pci.c +++ b/hw/vfio/pci.c @@ -919,7 +919,7 @@ static void vfio_pci_size_rom(VFIOPCIDevice *vdev) uint32_t orig, size = cpu_to_le32((uint32_t)PCI_ROM_ADDRESS_MASK); off_t offset = vdev->config_offset + PCI_ROM_ADDRESS; DeviceState *dev = DEVICE(vdev); - char name[32]; + char *name; int fd = vdev->vbasedev.fd; if (vdev->pdev.romfile || !vdev->pdev.rom_bar) { @@ -962,10 +962,11 @@ static void vfio_pci_size_rom(VFIOPCIDevice *vdev) trace_vfio_pci_size_rom(vdev->vbasedev.name, size); - snprintf(name, sizeof(name), "vfio[%s].rom", vdev->vbasedev.name); + name = g_strdup_printf("vfio[%s].rom", vdev->vbasedev.name); memory_region_init_io(&vdev->pdev.rom, OBJECT(vdev), &vfio_rom_ops, vdev, name, size); + g_free(name); pci_register_bar(&vdev->pdev, PCI_ROM_SLOT, PCI_BASE_ADDRESS_SPACE_MEMORY, &vdev->pdev.rom); -- cgit v1.1 From 99b88c6d1fc81c9757bb6e116e0adf20b2906629 Mon Sep 17 00:00:00 2001 From: Thomas Huth Date: Thu, 10 Mar 2016 09:39:09 -0700 Subject: MAINTAINERS: Add entry for the include/hw/vfio/ folder The headers in include/hw/vfio/ should be listed in the VFIO section of the MAINTAINERS file. Signed-off-by: Thomas Huth Signed-off-by: Alex Williamson --- MAINTAINERS | 1 + 1 file changed, 1 insertion(+) diff --git a/MAINTAINERS b/MAINTAINERS index dc0aa54..5c4e23e 100644 --- a/MAINTAINERS +++ b/MAINTAINERS @@ -866,6 +866,7 @@ VFIO M: Alex Williamson S: Supported F: hw/vfio/* +F: include/hw/vfio/ vhost M: Michael S. Tsirkin -- cgit v1.1