diff options
Diffstat (limited to 'hw/vfio')
-rw-r--r-- | hw/vfio/common.c | 9 | ||||
-rw-r--r-- | hw/vfio/igd.c | 529 | ||||
-rw-r--r-- | hw/vfio/iommufd.c | 1 | ||||
-rw-r--r-- | hw/vfio/meson.build | 27 | ||||
-rw-r--r-- | hw/vfio/migration-multifd.c | 15 | ||||
-rw-r--r-- | hw/vfio/migration.c | 1 | ||||
-rw-r--r-- | hw/vfio/pci-quirks.c | 53 | ||||
-rw-r--r-- | hw/vfio/pci.c | 35 | ||||
-rw-r--r-- | hw/vfio/pci.h | 11 | ||||
-rw-r--r-- | hw/vfio/spapr.c | 4 |
10 files changed, 283 insertions, 402 deletions
diff --git a/hw/vfio/common.c b/hw/vfio/common.c index 7a4010e..1a0d929 100644 --- a/hw/vfio/common.c +++ b/hw/vfio/common.c @@ -30,6 +30,7 @@ #include "exec/address-spaces.h" #include "exec/memory.h" #include "exec/ram_addr.h" +#include "exec/target_page.h" #include "hw/hw.h" #include "qemu/error-report.h" #include "qemu/main-loop.h" @@ -42,6 +43,7 @@ #include "migration/misc.h" #include "migration/blocker.h" #include "migration/qemu-file.h" +#include "system/tcg.h" #include "system/tpm.h" VFIODeviceList vfio_device_list = @@ -392,13 +394,14 @@ static void vfio_register_ram_discard_listener(VFIOContainerBase *bcontainer, MemoryRegionSection *section) { RamDiscardManager *rdm = memory_region_get_ram_discard_manager(section->mr); + int target_page_size = qemu_target_page_size(); VFIORamDiscardListener *vrdl; /* Ignore some corner cases not relevant in practice. */ - g_assert(QEMU_IS_ALIGNED(section->offset_within_region, TARGET_PAGE_SIZE)); + g_assert(QEMU_IS_ALIGNED(section->offset_within_region, target_page_size)); g_assert(QEMU_IS_ALIGNED(section->offset_within_address_space, - TARGET_PAGE_SIZE)); - g_assert(QEMU_IS_ALIGNED(int128_get64(section->size), TARGET_PAGE_SIZE)); + target_page_size)); + g_assert(QEMU_IS_ALIGNED(int128_get64(section->size), target_page_size)); vrdl = g_new0(VFIORamDiscardListener, 1); vrdl->bcontainer = bcontainer; diff --git a/hw/vfio/igd.c b/hw/vfio/igd.c index b1a237e..265fffc 100644 --- a/hw/vfio/igd.c +++ b/hw/vfio/igd.c @@ -15,6 +15,7 @@ #include "qemu/error-report.h" #include "qapi/error.h" #include "qapi/qmp/qerror.h" +#include "hw/boards.h" #include "hw/hw.h" #include "hw/nvram/fw_cfg.h" #include "pci.h" @@ -106,40 +107,15 @@ static int igd_gen(VFIOPCIDevice *vdev) return -1; } -typedef struct VFIOIGDQuirk { - struct VFIOPCIDevice *vdev; - uint32_t index; - uint64_t bdsm; -} VFIOIGDQuirk; - +#define IGD_ASLS 0xfc /* ASL Storage Register */ #define IGD_GMCH 0x50 /* Graphics Control Register */ #define IGD_BDSM 0x5c /* Base Data of Stolen Memory */ #define IGD_BDSM_GEN11 0xc0 /* Base Data of Stolen Memory of gen 11 and later */ #define IGD_GMCH_GEN6_GMS_SHIFT 3 /* SNB_GMCH in i915 */ #define IGD_GMCH_GEN6_GMS_MASK 0x1f -#define IGD_GMCH_GEN6_GGMS_SHIFT 8 -#define IGD_GMCH_GEN6_GGMS_MASK 0x3 #define IGD_GMCH_GEN8_GMS_SHIFT 8 /* BDW_GMCH in i915 */ #define IGD_GMCH_GEN8_GMS_MASK 0xff -#define IGD_GMCH_GEN8_GGMS_SHIFT 6 -#define IGD_GMCH_GEN8_GGMS_MASK 0x3 - -static uint64_t igd_gtt_memory_size(int gen, uint16_t gmch) -{ - uint64_t ggms; - - if (gen < 8) { - ggms = (gmch >> IGD_GMCH_GEN6_GGMS_SHIFT) & IGD_GMCH_GEN6_GGMS_MASK; - } else { - ggms = (gmch >> IGD_GMCH_GEN8_GGMS_SHIFT) & IGD_GMCH_GEN8_GGMS_MASK; - if (ggms != 0) { - ggms = 1ULL << ggms; - } - } - - return ggms * MiB; -} static uint64_t igd_stolen_memory_size(int gen, uint32_t gmch) { @@ -165,6 +141,82 @@ static uint64_t igd_stolen_memory_size(int gen, uint32_t gmch) } /* + * The OpRegion includes the Video BIOS Table, which seems important for + * telling the driver what sort of outputs it has. Without this, the device + * may work in the guest, but we may not get output. This also requires BIOS + * support to reserve and populate a section of guest memory sufficient for + * the table and to write the base address of that memory to the ASLS register + * of the IGD device. + */ +static bool vfio_pci_igd_opregion_init(VFIOPCIDevice *vdev, + struct vfio_region_info *info, + Error **errp) +{ + int ret; + + vdev->igd_opregion = g_malloc0(info->size); + ret = pread(vdev->vbasedev.fd, vdev->igd_opregion, + info->size, info->offset); + if (ret != info->size) { + error_setg(errp, "failed to read IGD OpRegion"); + g_free(vdev->igd_opregion); + vdev->igd_opregion = NULL; + return false; + } + + /* + * Provide fw_cfg with a copy of the OpRegion which the VM firmware is to + * allocate 32bit reserved memory for, copy these contents into, and write + * the reserved memory base address to the device ASLS register at 0xFC. + * Alignment of this reserved region seems flexible, but using a 4k page + * alignment seems to work well. This interface assumes a single IGD + * device, which may be at VM address 00:02.0 in legacy mode or another + * address in UPT mode. + * + * NB, there may be future use cases discovered where the VM should have + * direct interaction with the host OpRegion, in which case the write to + * the ASLS register would trigger MemoryRegion setup to enable that. + */ + fw_cfg_add_file(fw_cfg_find(), "etc/igd-opregion", + vdev->igd_opregion, info->size); + + trace_vfio_pci_igd_opregion_enabled(vdev->vbasedev.name); + + pci_set_long(vdev->pdev.config + IGD_ASLS, 0); + pci_set_long(vdev->pdev.wmask + IGD_ASLS, ~0); + pci_set_long(vdev->emulated_config_bits + IGD_ASLS, ~0); + + return true; +} + +static bool vfio_pci_igd_setup_opregion(VFIOPCIDevice *vdev, Error **errp) +{ + g_autofree struct vfio_region_info *opregion = NULL; + int ret; + + /* Hotplugging is not supported for opregion access */ + if (vdev->pdev.qdev.hotplugged) { + error_setg(errp, "IGD OpRegion is not supported on hotplugged device"); + return false; + } + + ret = vfio_get_dev_region_info(&vdev->vbasedev, + VFIO_REGION_TYPE_PCI_VENDOR_TYPE | PCI_VENDOR_ID_INTEL, + VFIO_REGION_SUBTYPE_INTEL_IGD_OPREGION, &opregion); + if (ret) { + error_setg_errno(errp, -ret, + "Device does not supports IGD OpRegion feature"); + return false; + } + + if (!vfio_pci_igd_opregion_init(vdev, opregion, errp)) { + return false; + } + + return true; +} + +/* * The rather short list of registers that we copy from the host devices. * The LPC/ISA bridge values are definitely needed to support the vBIOS, the * host bridge values may or may not be needed depending on the guest OS. @@ -300,129 +352,72 @@ static int vfio_pci_igd_lpc_init(VFIOPCIDevice *vdev, return ret; } -/* - * IGD Gen8 and newer support up to 8MB for the GTT and use a 64bit PTE - * entry, older IGDs use 2MB and 32bit. Each PTE maps a 4k page. Therefore - * we either have 2M/4k * 4 = 2k or 8M/4k * 8 = 16k as the maximum iobar index - * for programming the GTT. - * - * See linux:include/drm/i915_drm.h for shift and mask values. - */ -static int vfio_igd_gtt_max(VFIOPCIDevice *vdev) -{ - uint32_t gmch = vfio_pci_read_config(&vdev->pdev, IGD_GMCH, sizeof(gmch)); - int gen = igd_gen(vdev); - uint64_t ggms_size = igd_gtt_memory_size(gen, gmch); - - return (ggms_size / (4 * KiB)) * (gen < 8 ? 4 : 8); -} - -/* - * The IGD ROM will make use of stolen memory (GGMS) for support of VESA modes. - * Somehow the host stolen memory range is used for this, but how the ROM gets - * it is a mystery, perhaps it's hardcoded into the ROM. Thankfully though, it - * reprograms the GTT through the IOBAR where we can trap it and transpose the - * programming to the VM allocated buffer. That buffer gets reserved by the VM - * firmware via the fw_cfg entry added below. Here we're just monitoring the - * IOBAR address and data registers to detect a write sequence targeting the - * GTTADR. This code is developed by observed behavior and doesn't have a - * direct spec reference, unfortunately. - */ -static uint64_t vfio_igd_quirk_data_read(void *opaque, - hwaddr addr, unsigned size) -{ - VFIOIGDQuirk *igd = opaque; - VFIOPCIDevice *vdev = igd->vdev; - - igd->index = ~0; - - return vfio_region_read(&vdev->bars[4].region, addr + 4, size); -} - -static void vfio_igd_quirk_data_write(void *opaque, hwaddr addr, - uint64_t data, unsigned size) +static bool vfio_pci_igd_setup_lpc_bridge(VFIOPCIDevice *vdev, Error **errp) { - VFIOIGDQuirk *igd = opaque; - VFIOPCIDevice *vdev = igd->vdev; - uint64_t val = data; - int gen = igd_gen(vdev); + g_autofree struct vfio_region_info *host = NULL; + g_autofree struct vfio_region_info *lpc = NULL; + PCIDevice *lpc_bridge; + int ret; /* - * Programming the GGMS starts at index 0x1 and uses every 4th index (ie. - * 0x1, 0x5, 0x9, 0xd,...). For pre-Gen8 each 4-byte write is a whole PTE - * entry, with 0th bit enable set. For Gen8 and up, PTEs are 64bit, so - * entries 0x5 & 0xd are the high dword, in our case zero. Each PTE points - * to a 4k page, which we translate to a page from the VM allocated region, - * pointed to by the BDSM register. If this is not set, we fail. - * - * We trap writes to the full configured GTT size, but we typically only - * see the vBIOS writing up to (nearly) the 1MB barrier. In fact it often - * seems to miss the last entry for an even 1MB GTT. Doing a gratuitous - * write of that last entry does work, but is hopefully unnecessary since - * we clear the previous GTT on initialization. + * Copying IDs or creating new devices are not supported on hotplug */ - if ((igd->index % 4 == 1) && igd->index < vfio_igd_gtt_max(vdev)) { - if (gen < 8 || (igd->index % 8 == 1)) { - uint64_t base; - - if (gen < 11) { - base = pci_get_long(vdev->pdev.config + IGD_BDSM); - } else { - base = pci_get_quad(vdev->pdev.config + IGD_BDSM_GEN11); - } - if (!base) { - hw_error("vfio-igd: Guest attempted to program IGD GTT before " - "BIOS reserved stolen memory. Unsupported BIOS?"); - } - - val = data - igd->bdsm + base; - } else { - val = 0; /* upper 32bits of pte, we only enable below 4G PTEs */ - } - - trace_vfio_pci_igd_bar4_write(vdev->vbasedev.name, - igd->index, data, val); + if (vdev->pdev.qdev.hotplugged) { + error_setg(errp, "IGD LPC is not supported on hotplugged device"); + return false; } - vfio_region_write(&vdev->bars[4].region, addr + 4, val, size); - - igd->index = ~0; -} - -static const MemoryRegionOps vfio_igd_data_quirk = { - .read = vfio_igd_quirk_data_read, - .write = vfio_igd_quirk_data_write, - .endianness = DEVICE_LITTLE_ENDIAN, -}; - -static uint64_t vfio_igd_quirk_index_read(void *opaque, - hwaddr addr, unsigned size) -{ - VFIOIGDQuirk *igd = opaque; - VFIOPCIDevice *vdev = igd->vdev; + /* + * We need to create an LPC/ISA bridge at PCI bus address 00:1f.0 that we + * can stuff host values into, so if there's already one there and it's not + * one we can hack on, this quirk is no-go. Sorry Q35. + */ + lpc_bridge = pci_find_device(pci_device_root_bus(&vdev->pdev), + 0, PCI_DEVFN(0x1f, 0)); + if (lpc_bridge && !object_dynamic_cast(OBJECT(lpc_bridge), + "vfio-pci-igd-lpc-bridge")) { + error_setg(errp, + "Cannot create LPC bridge due to existing device at 1f.0"); + return false; + } - igd->index = ~0; + /* + * Check whether we have all the vfio device specific regions to + * support LPC quirk (added in Linux v4.6). + */ + ret = vfio_get_dev_region_info(&vdev->vbasedev, + VFIO_REGION_TYPE_PCI_VENDOR_TYPE | PCI_VENDOR_ID_INTEL, + VFIO_REGION_SUBTYPE_INTEL_IGD_LPC_CFG, &lpc); + if (ret) { + error_setg(errp, "IGD LPC bridge access is not supported by kernel"); + return false; + } - return vfio_region_read(&vdev->bars[4].region, addr, size); -} + ret = vfio_get_dev_region_info(&vdev->vbasedev, + VFIO_REGION_TYPE_PCI_VENDOR_TYPE | PCI_VENDOR_ID_INTEL, + VFIO_REGION_SUBTYPE_INTEL_IGD_HOST_CFG, &host); + if (ret) { + error_setg(errp, "IGD host bridge access is not supported by kernel"); + return false; + } -static void vfio_igd_quirk_index_write(void *opaque, hwaddr addr, - uint64_t data, unsigned size) -{ - VFIOIGDQuirk *igd = opaque; - VFIOPCIDevice *vdev = igd->vdev; + /* Create/modify LPC bridge */ + ret = vfio_pci_igd_lpc_init(vdev, lpc); + if (ret) { + error_setg(errp, "Failed to create/modify LPC bridge for IGD"); + return false; + } - igd->index = data; + /* Stuff some host values into the VM PCI host bridge */ + ret = vfio_pci_igd_host_init(vdev, host); + if (ret) { + error_setg(errp, "Failed to modify host bridge for IGD"); + return false; + } - vfio_region_write(&vdev->bars[4].region, addr, data, size); + return true; } -static const MemoryRegionOps vfio_igd_index_quirk = { - .read = vfio_igd_quirk_index_read, - .write = vfio_igd_quirk_index_write, - .endianness = DEVICE_LITTLE_ENDIAN, -}; - #define IGD_GGC_MMIO_OFFSET 0x108040 #define IGD_BDSM_MMIO_OFFSET 0x1080C0 @@ -438,9 +433,7 @@ void vfio_probe_igd_bar0_quirk(VFIOPCIDevice *vdev, int nr) * bus address. */ if (!vfio_pci_is(vdev, PCI_VENDOR_ID_INTEL, PCI_ANY_ID) || - !vfio_is_vga(vdev) || nr != 0 || - &vdev->pdev != pci_find_device(pci_device_root_bus(&vdev->pdev), - 0, PCI_DEVFN(0x2, 0))) { + !vfio_is_vga(vdev) || nr != 0) { return; } @@ -488,20 +481,13 @@ void vfio_probe_igd_bar0_quirk(VFIOPCIDevice *vdev, int nr) QLIST_INSERT_HEAD(&vdev->bars[nr].quirks, bdsm_quirk, next); } -void vfio_probe_igd_bar4_quirk(VFIOPCIDevice *vdev, int nr) +static bool vfio_pci_igd_config_quirk(VFIOPCIDevice *vdev, Error **errp) { - g_autofree struct vfio_region_info *rom = NULL; - g_autofree struct vfio_region_info *opregion = NULL; - g_autofree struct vfio_region_info *host = NULL; - g_autofree struct vfio_region_info *lpc = NULL; - VFIOQuirk *quirk; - VFIOIGDQuirk *igd; - PCIDevice *lpc_bridge; - int i, ret, gen; - uint64_t ggms_size, gms_size; + int ret, gen; + uint64_t gms_size; uint64_t *bdsm_size; uint32_t gmch; - uint16_t cmd_orig, cmd; + bool legacy_mode_enabled = false; Error *err = NULL; /* @@ -510,24 +496,8 @@ void vfio_probe_igd_bar4_quirk(VFIOPCIDevice *vdev, int nr) * PCI bus address. */ if (!vfio_pci_is(vdev, PCI_VENDOR_ID_INTEL, PCI_ANY_ID) || - !vfio_is_vga(vdev) || nr != 4 || - &vdev->pdev != pci_find_device(pci_device_root_bus(&vdev->pdev), - 0, PCI_DEVFN(0x2, 0))) { - return; - } - - /* - * We need to create an LPC/ISA bridge at PCI bus address 00:1f.0 that we - * can stuff host values into, so if there's already one there and it's not - * one we can hack on, legacy mode is no-go. Sorry Q35. - */ - lpc_bridge = pci_find_device(pci_device_root_bus(&vdev->pdev), - 0, PCI_DEVFN(0x1f, 0)); - if (lpc_bridge && !object_dynamic_cast(OBJECT(lpc_bridge), - "vfio-pci-igd-lpc-bridge")) { - error_report("IGD device %s cannot support legacy mode due to existing " - "devices at address 1f.0", vdev->vbasedev.name); - return; + !vfio_is_vga(vdev)) { + return true; } /* @@ -539,126 +509,77 @@ void vfio_probe_igd_bar4_quirk(VFIOPCIDevice *vdev, int nr) if (gen == -1) { error_report("IGD device %s is unsupported in legacy mode, " "try SandyBridge or newer", vdev->vbasedev.name); - return; - } - - /* - * Most of what we're doing here is to enable the ROM to run, so if - * there's no ROM, there's no point in setting up this quirk. - * NB. We only seem to get BIOS ROMs, so a UEFI VM would need CSM support. - */ - ret = vfio_get_region_info(&vdev->vbasedev, - VFIO_PCI_ROM_REGION_INDEX, &rom); - if ((ret || !rom->size) && !vdev->pdev.romfile) { - error_report("IGD device %s has no ROM, legacy mode disabled", - vdev->vbasedev.name); - return; - } - - /* - * Ignore the hotplug corner case, mark the ROM failed, we can't - * create the devices we need for legacy mode in the hotplug scenario. - */ - if (vdev->pdev.qdev.hotplugged) { - error_report("IGD device %s hotplugged, ROM disabled, " - "legacy mode disabled", vdev->vbasedev.name); - vdev->rom_read_failed = true; - return; - } - - /* - * Check whether we have all the vfio device specific regions to - * support legacy mode (added in Linux v4.6). If not, bail. - */ - ret = vfio_get_dev_region_info(&vdev->vbasedev, - VFIO_REGION_TYPE_PCI_VENDOR_TYPE | PCI_VENDOR_ID_INTEL, - VFIO_REGION_SUBTYPE_INTEL_IGD_OPREGION, &opregion); - if (ret) { - error_report("IGD device %s does not support OpRegion access," - "legacy mode disabled", vdev->vbasedev.name); - return; - } - - ret = vfio_get_dev_region_info(&vdev->vbasedev, - VFIO_REGION_TYPE_PCI_VENDOR_TYPE | PCI_VENDOR_ID_INTEL, - VFIO_REGION_SUBTYPE_INTEL_IGD_HOST_CFG, &host); - if (ret) { - error_report("IGD device %s does not support host bridge access," - "legacy mode disabled", vdev->vbasedev.name); - return; - } - - ret = vfio_get_dev_region_info(&vdev->vbasedev, - VFIO_REGION_TYPE_PCI_VENDOR_TYPE | PCI_VENDOR_ID_INTEL, - VFIO_REGION_SUBTYPE_INTEL_IGD_LPC_CFG, &lpc); - if (ret) { - error_report("IGD device %s does not support LPC bridge access," - "legacy mode disabled", vdev->vbasedev.name); - return; + return true; } gmch = vfio_pci_read_config(&vdev->pdev, IGD_GMCH, 4); /* - * If IGD VGA Disable is clear (expected) and VGA is not already enabled, - * try to enable it. Probably shouldn't be using legacy mode without VGA, - * but also no point in us enabling VGA if disabled in hardware. + * For backward compatibility, enable legacy mode when + * - Machine type is i440fx (pc_piix) + * - IGD device is at guest BDF 00:02.0 + * - Not manually disabled by x-igd-legacy-mode=off */ - if (!(gmch & 0x2) && !vdev->vga && !vfio_populate_vga(vdev, &err)) { - error_reportf_err(err, VFIO_MSG_PREFIX, vdev->vbasedev.name); - error_report("IGD device %s failed to enable VGA access, " - "legacy mode disabled", vdev->vbasedev.name); - return; - } + if ((vdev->igd_legacy_mode != ON_OFF_AUTO_OFF) && + !strcmp(MACHINE_GET_CLASS(qdev_get_machine())->family, "pc_piix") && + (&vdev->pdev == pci_find_device(pci_device_root_bus(&vdev->pdev), + 0, PCI_DEVFN(0x2, 0)))) { + /* + * IGD legacy mode requires: + * - VBIOS in ROM BAR or file + * - VGA IO/MMIO ranges are claimed by IGD + * - OpRegion + * - Same LPC bridge and Host bridge VID/DID/SVID/SSID as host + */ + g_autofree struct vfio_region_info *rom = NULL; + + legacy_mode_enabled = true; + info_report("IGD legacy mode enabled, " + "use x-igd-legacy-mode=off to disable it if unwanted."); + + /* + * Most of what we're doing here is to enable the ROM to run, so if + * there's no ROM, there's no point in setting up this quirk. + * NB. We only seem to get BIOS ROMs, so UEFI VM would need CSM support. + */ + ret = vfio_get_region_info(&vdev->vbasedev, + VFIO_PCI_ROM_REGION_INDEX, &rom); + if ((ret || !rom->size) && !vdev->pdev.romfile) { + error_setg(&err, "Device has no ROM"); + goto error; + } - /* Create our LPC/ISA bridge */ - ret = vfio_pci_igd_lpc_init(vdev, lpc); - if (ret) { - error_report("IGD device %s failed to create LPC bridge, " - "legacy mode disabled", vdev->vbasedev.name); - return; - } + /* + * If IGD VGA Disable is clear (expected) and VGA is not already + * enabled, try to enable it. Probably shouldn't be using legacy mode + * without VGA, but also no point in us enabling VGA if disabled in + * hardware. + */ + if (!(gmch & 0x2) && !vdev->vga && !vfio_populate_vga(vdev, &err)) { + error_setg(&err, "Unable to enable VGA access"); + goto error; + } - /* Stuff some host values into the VM PCI host bridge */ - ret = vfio_pci_igd_host_init(vdev, host); - if (ret) { - error_report("IGD device %s failed to modify host bridge, " - "legacy mode disabled", vdev->vbasedev.name); - return; + /* Enable OpRegion and LPC bridge quirk */ + vdev->features |= VFIO_FEATURE_ENABLE_IGD_OPREGION; + vdev->features |= VFIO_FEATURE_ENABLE_IGD_LPC; + } else if (vdev->igd_legacy_mode == ON_OFF_AUTO_ON) { + error_setg(&err, + "Machine is not i440fx or assigned BDF is not 00:02.0"); + goto error; } /* Setup OpRegion access */ - if (!vfio_pci_igd_opregion_init(vdev, opregion, &err)) { - error_append_hint(&err, "IGD legacy mode disabled\n"); - error_reportf_err(err, VFIO_MSG_PREFIX, vdev->vbasedev.name); - return; + if ((vdev->features & VFIO_FEATURE_ENABLE_IGD_OPREGION) && + !vfio_pci_igd_setup_opregion(vdev, errp)) { + goto error; } - /* Setup our quirk to munge GTT addresses to the VM allocated buffer */ - quirk = vfio_quirk_alloc(2); - igd = quirk->data = g_malloc0(sizeof(*igd)); - igd->vdev = vdev; - igd->index = ~0; - if (gen < 11) { - igd->bdsm = vfio_pci_read_config(&vdev->pdev, IGD_BDSM, 4); - } else { - igd->bdsm = vfio_pci_read_config(&vdev->pdev, IGD_BDSM_GEN11, 4); - igd->bdsm |= - (uint64_t)vfio_pci_read_config(&vdev->pdev, IGD_BDSM_GEN11 + 4, 4) << 32; - } - igd->bdsm &= ~((1 * MiB) - 1); /* 1MB aligned */ - - memory_region_init_io(&quirk->mem[0], OBJECT(vdev), &vfio_igd_index_quirk, - igd, "vfio-igd-index-quirk", 4); - memory_region_add_subregion_overlap(vdev->bars[nr].region.mem, - 0, &quirk->mem[0], 1); - - memory_region_init_io(&quirk->mem[1], OBJECT(vdev), &vfio_igd_data_quirk, - igd, "vfio-igd-data-quirk", 4); - memory_region_add_subregion_overlap(vdev->bars[nr].region.mem, - 4, &quirk->mem[1], 1); - - QLIST_INSERT_HEAD(&vdev->bars[nr].quirks, quirk, next); + /* Setup LPC bridge / Host bridge PCI IDs */ + if ((vdev->features & VFIO_FEATURE_ENABLE_IGD_LPC) && + !vfio_pci_igd_setup_lpc_bridge(vdev, errp)) { + goto error; + } /* * Allow user to override dsm size using x-igd-gms option, in multiples of @@ -685,7 +606,6 @@ void vfio_probe_igd_bar4_quirk(VFIOPCIDevice *vdev, int nr) } } - ggms_size = igd_gtt_memory_size(gen, gmch); gms_size = igd_stolen_memory_size(gen, gmch); /* @@ -697,7 +617,7 @@ void vfio_probe_igd_bar4_quirk(VFIOPCIDevice *vdev, int nr) * config offset 0x5C. */ bdsm_size = g_malloc(sizeof(*bdsm_size)); - *bdsm_size = cpu_to_le64(ggms_size + gms_size); + *bdsm_size = cpu_to_le64(gms_size); fw_cfg_add_file(fw_cfg_find(), "etc/igd-bdsm-size", bdsm_size, sizeof(*bdsm_size)); @@ -717,37 +637,46 @@ void vfio_probe_igd_bar4_quirk(VFIOPCIDevice *vdev, int nr) pci_set_quad(vdev->emulated_config_bits + IGD_BDSM_GEN11, ~0); } + trace_vfio_pci_igd_bdsm_enabled(vdev->vbasedev.name, (gms_size / MiB)); + + return true; + +error: /* - * This IOBAR gives us access to GTTADR, which allows us to write to - * the GTT itself. So let's go ahead and write zero to all the GTT - * entries to avoid spurious DMA faults. Be sure I/O access is enabled - * before talking to the device. + * When legacy mode is implicity enabled, continue on error, + * to keep compatibility */ - if (pread(vdev->vbasedev.fd, &cmd_orig, sizeof(cmd_orig), - vdev->config_offset + PCI_COMMAND) != sizeof(cmd_orig)) { - error_report("IGD device %s - failed to read PCI command register", - vdev->vbasedev.name); + if (legacy_mode_enabled && (vdev->igd_legacy_mode == ON_OFF_AUTO_AUTO)) { + error_report_err(err); + error_report("IGD legacy mode disabled"); + return true; } - cmd = cmd_orig | PCI_COMMAND_IO; + error_propagate(errp, err); + return false; +} - if (pwrite(vdev->vbasedev.fd, &cmd, sizeof(cmd), - vdev->config_offset + PCI_COMMAND) != sizeof(cmd)) { - error_report("IGD device %s - failed to write PCI command register", - vdev->vbasedev.name); +/* + * KVMGT/GVT-g vGPU exposes an emulated OpRegion. So far, users have to specify + * x-igd-opregion=on to enable the access. + * TODO: Check VID/DID and enable opregion access automatically + */ +static bool vfio_pci_kvmgt_config_quirk(VFIOPCIDevice *vdev, Error **errp) +{ + if ((vdev->features & VFIO_FEATURE_ENABLE_IGD_OPREGION) && + !vfio_pci_igd_setup_opregion(vdev, errp)) { + return false; } - for (i = 1; i < vfio_igd_gtt_max(vdev); i += 4) { - vfio_region_write(&vdev->bars[4].region, 0, i, 4); - vfio_region_write(&vdev->bars[4].region, 4, 0, 4); - } + return true; +} - if (pwrite(vdev->vbasedev.fd, &cmd_orig, sizeof(cmd_orig), - vdev->config_offset + PCI_COMMAND) != sizeof(cmd_orig)) { - error_report("IGD device %s - failed to restore PCI command register", - vdev->vbasedev.name); +bool vfio_probe_igd_config_quirk(VFIOPCIDevice *vdev, Error **errp) +{ + /* KVMGT/GVT-g vGPU is exposed as mdev */ + if (vdev->vbasedev.mdev) { + return vfio_pci_kvmgt_config_quirk(vdev, errp); } - trace_vfio_pci_igd_bdsm_enabled(vdev->vbasedev.name, - (ggms_size + gms_size) / MiB); + return vfio_pci_igd_config_quirk(vdev, errp); } diff --git a/hw/vfio/iommufd.c b/hw/vfio/iommufd.c index df61edf..42c8412 100644 --- a/hw/vfio/iommufd.c +++ b/hw/vfio/iommufd.c @@ -25,7 +25,6 @@ #include "qemu/cutils.h" #include "qemu/chardev_open.h" #include "pci.h" -#include "exec/ram_addr.h" static int iommufd_cdev_map(const VFIOContainerBase *bcontainer, hwaddr iova, ram_addr_t size, void *vaddr, bool readonly) diff --git a/hw/vfio/meson.build b/hw/vfio/meson.build index 260d65f..a8939c8 100644 --- a/hw/vfio/meson.build +++ b/hw/vfio/meson.build @@ -1,27 +1,32 @@ vfio_ss = ss.source_set() vfio_ss.add(files( - 'helpers.c', 'common.c', - 'container-base.c', 'container.c', - 'migration.c', - 'migration-multifd.c', - 'cpr.c', )) vfio_ss.add(when: 'CONFIG_PSERIES', if_true: files('spapr.c')) -vfio_ss.add(when: 'CONFIG_IOMMUFD', if_true: files( - 'iommufd.c', -)) vfio_ss.add(when: 'CONFIG_VFIO_PCI', if_true: files( - 'display.c', 'pci-quirks.c', 'pci.c', )) vfio_ss.add(when: 'CONFIG_VFIO_CCW', if_true: files('ccw.c')) vfio_ss.add(when: 'CONFIG_VFIO_PLATFORM', if_true: files('platform.c')) -vfio_ss.add(when: 'CONFIG_VFIO_XGMAC', if_true: files('calxeda-xgmac.c')) -vfio_ss.add(when: 'CONFIG_VFIO_AMD_XGBE', if_true: files('amd-xgbe.c')) vfio_ss.add(when: 'CONFIG_VFIO_AP', if_true: files('ap.c')) vfio_ss.add(when: 'CONFIG_VFIO_IGD', if_true: files('igd.c')) specific_ss.add_all(when: 'CONFIG_VFIO', if_true: vfio_ss) + +system_ss.add(when: 'CONFIG_VFIO_XGMAC', if_true: files('calxeda-xgmac.c')) +system_ss.add(when: 'CONFIG_VFIO_AMD_XGBE', if_true: files('amd-xgbe.c')) +system_ss.add(when: 'CONFIG_VFIO', if_true: files( + 'helpers.c', + 'container-base.c', + 'migration.c', + 'migration-multifd.c', + 'cpr.c', +)) +system_ss.add(when: ['CONFIG_VFIO', 'CONFIG_IOMMUFD'], if_true: files( + 'iommufd.c', +)) +system_ss.add(when: 'CONFIG_VFIO_PCI', if_true: files( + 'display.c', +)) diff --git a/hw/vfio/migration-multifd.c b/hw/vfio/migration-multifd.c index 2337247..378f6f3 100644 --- a/hw/vfio/migration-multifd.c +++ b/hw/vfio/migration-multifd.c @@ -13,6 +13,7 @@ #include "hw/vfio/vfio-common.h" #include "migration/misc.h" #include "qapi/error.h" +#include "qemu/bswap.h" #include "qemu/error-report.h" #include "qemu/lockable.h" #include "qemu/main-loop.h" @@ -155,12 +156,16 @@ bool vfio_multifd_load_state_buffer(void *opaque, char *data, size_t data_size, return false; } + packet->version = be32_to_cpu(packet->version); if (packet->version != VFIO_DEVICE_STATE_PACKET_VER_CURRENT) { error_setg(errp, "%s: packet has unknown version %" PRIu32, vbasedev->name, packet->version); return false; } + packet->idx = be32_to_cpu(packet->idx); + packet->flags = be32_to_cpu(packet->flags); + if (packet->idx == UINT32_MAX) { error_setg(errp, "%s: packet index is invalid", vbasedev->name); return false; @@ -558,9 +563,9 @@ vfio_save_complete_precopy_thread_config_state(VFIODevice *vbasedev, packet_len = sizeof(*packet) + bioc->usage; packet = g_malloc0(packet_len); - packet->version = VFIO_DEVICE_STATE_PACKET_VER_CURRENT; - packet->idx = idx; - packet->flags = VFIO_DEVICE_STATE_CONFIG_STATE; + packet->version = cpu_to_be32(VFIO_DEVICE_STATE_PACKET_VER_CURRENT); + packet->idx = cpu_to_be32(idx); + packet->flags = cpu_to_be32(VFIO_DEVICE_STATE_CONFIG_STATE); memcpy(&packet->data, bioc->data, bioc->usage); if (!multifd_queue_device_state(idstr, instance_id, @@ -610,7 +615,7 @@ vfio_multifd_save_complete_precopy_thread(SaveLiveCompletePrecopyThreadData *d, } packet = g_malloc0(sizeof(*packet) + migration->data_buffer_size); - packet->version = VFIO_DEVICE_STATE_PACKET_VER_CURRENT; + packet->version = cpu_to_be32(VFIO_DEVICE_STATE_PACKET_VER_CURRENT); for (idx = 0; ; idx++) { ssize_t data_size; @@ -631,7 +636,7 @@ vfio_multifd_save_complete_precopy_thread(SaveLiveCompletePrecopyThreadData *d, break; } - packet->idx = idx; + packet->idx = cpu_to_be32(idx); packet_size = sizeof(*packet) + data_size; if (!multifd_queue_device_state(d->idstr, d->instance_id, diff --git a/hw/vfio/migration.c b/hw/vfio/migration.c index 416643d..fbff46c 100644 --- a/hw/vfio/migration.c +++ b/hw/vfio/migration.c @@ -27,7 +27,6 @@ #include "qapi/error.h" #include "qapi/qapi-events-vfio.h" #include "exec/ramlist.h" -#include "exec/ram_addr.h" #include "pci.h" #include "trace.h" #include "hw/hw.h" diff --git a/hw/vfio/pci-quirks.c b/hw/vfio/pci-quirks.c index c53591f..3f00225 100644 --- a/hw/vfio/pci-quirks.c +++ b/hw/vfio/pci-quirks.c @@ -403,7 +403,7 @@ static void vfio_probe_ati_bar4_quirk(VFIOPCIDevice *vdev, int nr) /* This windows doesn't seem to be used except by legacy VGA code */ if (!vfio_pci_is(vdev, PCI_VENDOR_ID_ATI, PCI_ANY_ID) || - !vdev->vga || nr != 4) { + !vdev->vga || nr != 4 || !vdev->bars[4].ioport) { return; } @@ -1114,59 +1114,19 @@ static void vfio_probe_rtl8168_bar2_quirk(VFIOPCIDevice *vdev, int nr) trace_vfio_quirk_rtl8168_probe(vdev->vbasedev.name); } -#define IGD_ASLS 0xfc /* ASL Storage Register */ - /* - * The OpRegion includes the Video BIOS Table, which seems important for - * telling the driver what sort of outputs it has. Without this, the device - * may work in the guest, but we may not get output. This also requires BIOS - * support to reserve and populate a section of guest memory sufficient for - * the table and to write the base address of that memory to the ASLS register - * of the IGD device. + * Common quirk probe entry points. */ -bool vfio_pci_igd_opregion_init(VFIOPCIDevice *vdev, - struct vfio_region_info *info, Error **errp) +bool vfio_config_quirk_setup(VFIOPCIDevice *vdev, Error **errp) { - int ret; - - vdev->igd_opregion = g_malloc0(info->size); - ret = pread(vdev->vbasedev.fd, vdev->igd_opregion, - info->size, info->offset); - if (ret != info->size) { - error_setg(errp, "failed to read IGD OpRegion"); - g_free(vdev->igd_opregion); - vdev->igd_opregion = NULL; +#ifdef CONFIG_VFIO_IGD + if (!vfio_probe_igd_config_quirk(vdev, errp)) { return false; } - - /* - * Provide fw_cfg with a copy of the OpRegion which the VM firmware is to - * allocate 32bit reserved memory for, copy these contents into, and write - * the reserved memory base address to the device ASLS register at 0xFC. - * Alignment of this reserved region seems flexible, but using a 4k page - * alignment seems to work well. This interface assumes a single IGD - * device, which may be at VM address 00:02.0 in legacy mode or another - * address in UPT mode. - * - * NB, there may be future use cases discovered where the VM should have - * direct interaction with the host OpRegion, in which case the write to - * the ASLS register would trigger MemoryRegion setup to enable that. - */ - fw_cfg_add_file(fw_cfg_find(), "etc/igd-opregion", - vdev->igd_opregion, info->size); - - trace_vfio_pci_igd_opregion_enabled(vdev->vbasedev.name); - - pci_set_long(vdev->pdev.config + IGD_ASLS, 0); - pci_set_long(vdev->pdev.wmask + IGD_ASLS, ~0); - pci_set_long(vdev->emulated_config_bits + IGD_ASLS, ~0); - +#endif return true; } -/* - * Common quirk probe entry points. - */ void vfio_vga_quirk_setup(VFIOPCIDevice *vdev) { vfio_vga_probe_ati_3c3_quirk(vdev); @@ -1215,7 +1175,6 @@ void vfio_bar_quirk_setup(VFIOPCIDevice *vdev, int nr) vfio_probe_rtl8168_bar2_quirk(vdev, nr); #ifdef CONFIG_VFIO_IGD vfio_probe_igd_bar0_quirk(vdev, nr); - vfio_probe_igd_bar4_quirk(vdev, nr); #endif } diff --git a/hw/vfio/pci.c b/hw/vfio/pci.c index fdbc158..7f1532f 100644 --- a/hw/vfio/pci.c +++ b/hw/vfio/pci.c @@ -3128,6 +3128,10 @@ static void vfio_realize(PCIDevice *pdev, Error **errp) goto out_unset_idev; } + if (!vfio_config_quirk_setup(vdev, errp)) { + goto out_unset_idev; + } + if (vdev->vga) { vfio_vga_quirk_setup(vdev); } @@ -3136,31 +3140,6 @@ static void vfio_realize(PCIDevice *pdev, Error **errp) vfio_bar_quirk_setup(vdev, i); } - if (!vdev->igd_opregion && - vdev->features & VFIO_FEATURE_ENABLE_IGD_OPREGION) { - g_autofree struct vfio_region_info *opregion = NULL; - - if (vdev->pdev.qdev.hotplugged) { - error_setg(errp, - "cannot support IGD OpRegion feature on hotplugged " - "device"); - goto out_unset_idev; - } - - ret = vfio_get_dev_region_info(vbasedev, - VFIO_REGION_TYPE_PCI_VENDOR_TYPE | PCI_VENDOR_ID_INTEL, - VFIO_REGION_SUBTYPE_INTEL_IGD_OPREGION, &opregion); - if (ret) { - error_setg_errno(errp, -ret, - "does not support requested IGD OpRegion feature"); - goto out_unset_idev; - } - - if (!vfio_pci_igd_opregion_init(vdev, opregion, errp)) { - goto out_unset_idev; - } - } - /* QEMU emulates all of MSI & MSIX */ if (pdev->cap_present & QEMU_PCI_CAP_MSIX) { memset(vdev->emulated_config_bits + pdev->msix_cap, 0xff, @@ -3381,6 +3360,10 @@ static const Property vfio_pci_dev_properties[] = { VFIO_FEATURE_ENABLE_REQ_BIT, true), DEFINE_PROP_BIT("x-igd-opregion", VFIOPCIDevice, features, VFIO_FEATURE_ENABLE_IGD_OPREGION_BIT, false), + DEFINE_PROP_BIT("x-igd-lpc", VFIOPCIDevice, features, + VFIO_FEATURE_ENABLE_IGD_LPC_BIT, false), + DEFINE_PROP_ON_OFF_AUTO("x-igd-legacy-mode", VFIOPCIDevice, + igd_legacy_mode, ON_OFF_AUTO_AUTO), DEFINE_PROP_ON_OFF_AUTO("enable-migration", VFIOPCIDevice, vbasedev.enable_migration, ON_OFF_AUTO_AUTO), DEFINE_PROP("x-migration-multifd-transfer", VFIOPCIDevice, @@ -3549,7 +3532,7 @@ static void vfio_pci_dev_class_init(ObjectClass *klass, void *data) object_class_property_set_description(klass, /* 9.1 */ "x-device-dirty-page-tracking", "Disable device dirty page tracking and use " - "container-based dirty page tracking (DEBUG)"); + "container-based dirty page tracking"); object_class_property_set_description(klass, /* 9.1 */ "migration-events", "Emit VFIO migration QAPI event when a VFIO device " diff --git a/hw/vfio/pci.h b/hw/vfio/pci.h index d638c78..d94ecab 100644 --- a/hw/vfio/pci.h +++ b/hw/vfio/pci.h @@ -154,10 +154,14 @@ struct VFIOPCIDevice { #define VFIO_FEATURE_ENABLE_IGD_OPREGION_BIT 2 #define VFIO_FEATURE_ENABLE_IGD_OPREGION \ (1 << VFIO_FEATURE_ENABLE_IGD_OPREGION_BIT) +#define VFIO_FEATURE_ENABLE_IGD_LPC_BIT 3 +#define VFIO_FEATURE_ENABLE_IGD_LPC \ + (1 << VFIO_FEATURE_ENABLE_IGD_LPC_BIT) OnOffAuto display; uint32_t display_xres; uint32_t display_yres; int32_t bootindex; + OnOffAuto igd_legacy_mode; uint32_t igd_gms; OffAutoPCIBAR msix_relo; uint8_t nv_gpudirect_clique; @@ -204,6 +208,7 @@ uint64_t vfio_vga_read(void *opaque, hwaddr addr, unsigned size); void vfio_vga_write(void *opaque, hwaddr addr, uint64_t data, unsigned size); bool vfio_opt_rom_in_denylist(VFIOPCIDevice *vdev); +bool vfio_config_quirk_setup(VFIOPCIDevice *vdev, Error **errp); void vfio_vga_quirk_setup(VFIOPCIDevice *vdev); void vfio_vga_quirk_exit(VFIOPCIDevice *vdev); void vfio_vga_quirk_finalize(VFIOPCIDevice *vdev); @@ -215,7 +220,7 @@ bool vfio_add_virt_caps(VFIOPCIDevice *vdev, Error **errp); void vfio_quirk_reset(VFIOPCIDevice *vdev); VFIOQuirk *vfio_quirk_alloc(int nr_mem); void vfio_probe_igd_bar0_quirk(VFIOPCIDevice *vdev, int nr); -void vfio_probe_igd_bar4_quirk(VFIOPCIDevice *vdev, int nr); +bool vfio_probe_igd_config_quirk(VFIOPCIDevice *vdev, Error **errp); extern const PropertyInfo qdev_prop_nv_gpudirect_clique; @@ -227,10 +232,6 @@ int vfio_pci_get_pci_hot_reset_info(VFIOPCIDevice *vdev, bool vfio_populate_vga(VFIOPCIDevice *vdev, Error **errp); -bool vfio_pci_igd_opregion_init(VFIOPCIDevice *vdev, - struct vfio_region_info *info, - Error **errp); - void vfio_display_reset(VFIOPCIDevice *vdev); bool vfio_display_probe(VFIOPCIDevice *vdev, Error **errp); void vfio_display_finalize(VFIOPCIDevice *vdev); diff --git a/hw/vfio/spapr.c b/hw/vfio/spapr.c index ad4c499..1a5d161 100644 --- a/hw/vfio/spapr.c +++ b/hw/vfio/spapr.c @@ -11,10 +11,8 @@ #include "qemu/osdep.h" #include <sys/ioctl.h> #include <linux/vfio.h> -#ifdef CONFIG_KVM -#include <linux/kvm.h> -#endif #include "system/kvm.h" +#include "system/hostmem.h" #include "exec/address-spaces.h" #include "hw/vfio/vfio-common.h" |