aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
-rw-r--r--MAINTAINERS6
-rw-r--r--hw/arm/virt-acpi-build.c56
-rw-r--r--hw/arm/virt.c124
-rw-r--r--hw/intc/Makefile.objs2
-rw-r--r--hw/intc/arm_gic_kvm.c98
-rw-r--r--hw/intc/arm_gicv3_common.c140
-rw-r--r--hw/intc/arm_gicv3_kvm.c149
-rw-r--r--hw/intc/vgic_common.h35
-rw-r--r--hw/vfio/Makefile.objs2
-rw-r--r--hw/vfio/common.c2
-rw-r--r--hw/vfio/pci-quirks.c1204
-rw-r--r--hw/vfio/pci.c1424
-rw-r--r--hw/vfio/pci.h159
-rw-r--r--hw/vfio/platform.c2
-rw-r--r--include/hw/acpi/acpi-defs.h9
-rw-r--r--include/hw/arm/virt-acpi-build.h1
-rw-r--r--include/hw/arm/virt.h4
-rw-r--r--include/hw/intc/arm_gicv3_common.h68
-rw-r--r--include/hw/vfio/vfio-common.h7
-rw-r--r--include/sysemu/kvm.h26
-rw-r--r--kvm-all.c34
-rw-r--r--target-arm/kvm.c19
-rw-r--r--target-arm/kvm_arm.h19
-rw-r--r--target-arm/machine.c18
-rw-r--r--trace-events87
25 files changed, 2244 insertions, 1451 deletions
diff --git a/MAINTAINERS b/MAINTAINERS
index 71c652b..cf02890 100644
--- a/MAINTAINERS
+++ b/MAINTAINERS
@@ -266,10 +266,10 @@ F: *win32*
ARM Machines
------------
Allwinner-a10
-M: Li Guang <lig.fnst@cn.fujitsu.com>
+M: Beniamino Galvani <b.galvani@gmail.com>
S: Maintained
-F: hw/*/allwinner-a10*
-F: include/hw/*/allwinner-a10*
+F: hw/*/allwinner*
+F: include/hw/*/allwinner*
F: hw/arm/cubieboard.c
Exynos
diff --git a/hw/arm/virt-acpi-build.c b/hw/arm/virt-acpi-build.c
index 9088248..1aaff1f 100644
--- a/hw/arm/virt-acpi-build.c
+++ b/hw/arm/virt-acpi-build.c
@@ -114,7 +114,7 @@ static void acpi_dsdt_add_flash(Aml *scope, const MemMapEntry *flash_memmap)
{
Aml *dev, *crs;
hwaddr base = flash_memmap->base;
- hwaddr size = flash_memmap->size;
+ hwaddr size = flash_memmap->size / 2;
dev = aml_device("FLS0");
aml_append(dev, aml_name_decl("_HID", aml_string("LNRO0015")));
@@ -443,33 +443,43 @@ build_madt(GArray *table_data, GArray *linker, VirtGuestInfo *guest_info,
madt = acpi_data_push(table_data, sizeof *madt);
- for (i = 0; i < guest_info->smp_cpus; i++) {
- AcpiMadtGenericInterrupt *gicc = acpi_data_push(table_data,
- sizeof *gicc);
- gicc->type = ACPI_APIC_GENERIC_INTERRUPT;
- gicc->length = sizeof(*gicc);
- gicc->base_address = memmap[VIRT_GIC_CPU].base;
- gicc->cpu_interface_number = i;
- gicc->arm_mpidr = i;
- gicc->uid = i;
- if (test_bit(i, cpuinfo->found_cpus)) {
- gicc->flags = cpu_to_le32(ACPI_GICC_ENABLED);
- }
- }
-
gicd = acpi_data_push(table_data, sizeof *gicd);
gicd->type = ACPI_APIC_GENERIC_DISTRIBUTOR;
gicd->length = sizeof(*gicd);
gicd->base_address = memmap[VIRT_GIC_DIST].base;
- gic_msi = acpi_data_push(table_data, sizeof *gic_msi);
- gic_msi->type = ACPI_APIC_GENERIC_MSI_FRAME;
- gic_msi->length = sizeof(*gic_msi);
- gic_msi->gic_msi_frame_id = 0;
- gic_msi->base_address = cpu_to_le64(memmap[VIRT_GIC_V2M].base);
- gic_msi->flags = cpu_to_le32(1);
- gic_msi->spi_count = cpu_to_le16(NUM_GICV2M_SPIS);
- gic_msi->spi_base = cpu_to_le16(irqmap[VIRT_GIC_V2M] + ARM_SPI_BASE);
+ if (guest_info->gic_version == 3) {
+ AcpiMadtGenericRedistributor *gicr = acpi_data_push(table_data,
+ sizeof *gicr);
+
+ gicr->type = ACPI_APIC_GENERIC_REDISTRIBUTOR;
+ gicr->length = sizeof(*gicr);
+ gicr->base_address = cpu_to_le64(memmap[VIRT_GIC_REDIST].base);
+ gicr->range_length = cpu_to_le32(memmap[VIRT_GIC_REDIST].size);
+ } else {
+ for (i = 0; i < guest_info->smp_cpus; i++) {
+ AcpiMadtGenericInterrupt *gicc = acpi_data_push(table_data,
+ sizeof *gicc);
+ gicc->type = ACPI_APIC_GENERIC_INTERRUPT;
+ gicc->length = sizeof(*gicc);
+ gicc->base_address = memmap[VIRT_GIC_CPU].base;
+ gicc->cpu_interface_number = i;
+ gicc->arm_mpidr = i;
+ gicc->uid = i;
+ if (test_bit(i, cpuinfo->found_cpus)) {
+ gicc->flags = cpu_to_le32(ACPI_GICC_ENABLED);
+ }
+ }
+
+ gic_msi = acpi_data_push(table_data, sizeof *gic_msi);
+ gic_msi->type = ACPI_APIC_GENERIC_MSI_FRAME;
+ gic_msi->length = sizeof(*gic_msi);
+ gic_msi->gic_msi_frame_id = 0;
+ gic_msi->base_address = cpu_to_le64(memmap[VIRT_GIC_V2M].base);
+ gic_msi->flags = cpu_to_le32(1);
+ gic_msi->spi_count = cpu_to_le16(NUM_GICV2M_SPIS);
+ gic_msi->spi_base = cpu_to_le16(irqmap[VIRT_GIC_V2M] + ARM_SPI_BASE);
+ }
build_header(linker, table_data,
(void *)(table_data->data + madt_start), "APIC",
diff --git a/hw/arm/virt.c b/hw/arm/virt.c
index 6bf0d6d..d25d6cf 100644
--- a/hw/arm/virt.c
+++ b/hw/arm/virt.c
@@ -51,6 +51,7 @@
#include "hw/intc/arm_gic_common.h"
#include "kvm_arm.h"
#include "hw/smbios/smbios.h"
+#include "qapi/visitor.h"
/* Number of external interrupt lines to configure the GIC with */
#define NUM_IRQS 256
@@ -81,6 +82,7 @@ typedef struct {
MachineState parent;
bool secure;
bool highmem;
+ int32_t gic_version;
} VirtMachineState;
#define TYPE_VIRT_MACHINE MACHINE_TYPE_NAME("virt")
@@ -111,6 +113,10 @@ static const MemMapEntry a15memmap[] = {
[VIRT_GIC_DIST] = { 0x08000000, 0x00010000 },
[VIRT_GIC_CPU] = { 0x08010000, 0x00010000 },
[VIRT_GIC_V2M] = { 0x08020000, 0x00001000 },
+ /* The space in between here is reserved for GICv3 CPU/vCPU/HYP */
+ [VIRT_GIC_ITS] = { 0x08080000, 0x00020000 },
+ /* This redistributor space allows up to 2*64kB*123 CPUs */
+ [VIRT_GIC_REDIST] = { 0x080A0000, 0x00F60000 },
[VIRT_UART] = { 0x09000000, 0x00001000 },
[VIRT_RTC] = { 0x09010000, 0x00001000 },
[VIRT_FW_CFG] = { 0x09020000, 0x0000000a },
@@ -255,7 +261,7 @@ static void fdt_add_psci_node(const VirtBoardInfo *vbi)
qemu_fdt_setprop_cell(fdt, "/psci", "migrate", migrate_fn);
}
-static void fdt_add_timer_nodes(const VirtBoardInfo *vbi)
+static void fdt_add_timer_nodes(const VirtBoardInfo *vbi, int gictype)
{
/* Note that on A15 h/w these interrupts are level-triggered,
* but for the GIC implementation provided by both QEMU and KVM
@@ -264,8 +270,11 @@ static void fdt_add_timer_nodes(const VirtBoardInfo *vbi)
ARMCPU *armcpu;
uint32_t irqflags = GIC_FDT_IRQ_FLAGS_EDGE_LO_HI;
- irqflags = deposit32(irqflags, GIC_FDT_IRQ_PPI_CPU_START,
- GIC_FDT_IRQ_PPI_CPU_WIDTH, (1 << vbi->smp_cpus) - 1);
+ if (gictype == 2) {
+ irqflags = deposit32(irqflags, GIC_FDT_IRQ_PPI_CPU_START,
+ GIC_FDT_IRQ_PPI_CPU_WIDTH,
+ (1 << vbi->smp_cpus) - 1);
+ }
qemu_fdt_add_subnode(vbi->fdt, "/timer");
@@ -355,25 +364,36 @@ static void fdt_add_v2m_gic_node(VirtBoardInfo *vbi)
qemu_fdt_setprop_cell(vbi->fdt, "/intc/v2m", "phandle", vbi->v2m_phandle);
}
-static void fdt_add_gic_node(VirtBoardInfo *vbi)
+static void fdt_add_gic_node(VirtBoardInfo *vbi, int type)
{
vbi->gic_phandle = qemu_fdt_alloc_phandle(vbi->fdt);
qemu_fdt_setprop_cell(vbi->fdt, "/", "interrupt-parent", vbi->gic_phandle);
qemu_fdt_add_subnode(vbi->fdt, "/intc");
- /* 'cortex-a15-gic' means 'GIC v2' */
- qemu_fdt_setprop_string(vbi->fdt, "/intc", "compatible",
- "arm,cortex-a15-gic");
qemu_fdt_setprop_cell(vbi->fdt, "/intc", "#interrupt-cells", 3);
qemu_fdt_setprop(vbi->fdt, "/intc", "interrupt-controller", NULL, 0);
- qemu_fdt_setprop_sized_cells(vbi->fdt, "/intc", "reg",
- 2, vbi->memmap[VIRT_GIC_DIST].base,
- 2, vbi->memmap[VIRT_GIC_DIST].size,
- 2, vbi->memmap[VIRT_GIC_CPU].base,
- 2, vbi->memmap[VIRT_GIC_CPU].size);
qemu_fdt_setprop_cell(vbi->fdt, "/intc", "#address-cells", 0x2);
qemu_fdt_setprop_cell(vbi->fdt, "/intc", "#size-cells", 0x2);
qemu_fdt_setprop(vbi->fdt, "/intc", "ranges", NULL, 0);
+ if (type == 3) {
+ qemu_fdt_setprop_string(vbi->fdt, "/intc", "compatible",
+ "arm,gic-v3");
+ qemu_fdt_setprop_sized_cells(vbi->fdt, "/intc", "reg",
+ 2, vbi->memmap[VIRT_GIC_DIST].base,
+ 2, vbi->memmap[VIRT_GIC_DIST].size,
+ 2, vbi->memmap[VIRT_GIC_REDIST].base,
+ 2, vbi->memmap[VIRT_GIC_REDIST].size);
+ } else {
+ /* 'cortex-a15-gic' means 'GIC v2' */
+ qemu_fdt_setprop_string(vbi->fdt, "/intc", "compatible",
+ "arm,cortex-a15-gic");
+ qemu_fdt_setprop_sized_cells(vbi->fdt, "/intc", "reg",
+ 2, vbi->memmap[VIRT_GIC_DIST].base,
+ 2, vbi->memmap[VIRT_GIC_DIST].size,
+ 2, vbi->memmap[VIRT_GIC_CPU].base,
+ 2, vbi->memmap[VIRT_GIC_CPU].size);
+ }
+
qemu_fdt_setprop_cell(vbi->fdt, "/intc", "phandle", vbi->gic_phandle);
}
@@ -396,18 +416,18 @@ static void create_v2m(VirtBoardInfo *vbi, qemu_irq *pic)
fdt_add_v2m_gic_node(vbi);
}
-static void create_gic(VirtBoardInfo *vbi, qemu_irq *pic, bool secure)
+static void create_gic(VirtBoardInfo *vbi, qemu_irq *pic, int type, bool secure)
{
- /* We create a standalone GIC v2 */
+ /* We create a standalone GIC */
DeviceState *gicdev;
SysBusDevice *gicbusdev;
const char *gictype;
int i;
- gictype = gic_class_name();
+ gictype = (type == 3) ? gicv3_class_name() : gic_class_name();
gicdev = qdev_create(NULL, gictype);
- qdev_prop_set_uint32(gicdev, "revision", 2);
+ qdev_prop_set_uint32(gicdev, "revision", type);
qdev_prop_set_uint32(gicdev, "num-cpu", smp_cpus);
/* Note that the num-irq property counts both internal and external
* interrupts; there are always 32 of the former (mandated by GIC spec).
@@ -419,7 +439,11 @@ static void create_gic(VirtBoardInfo *vbi, qemu_irq *pic, bool secure)
qdev_init_nofail(gicdev);
gicbusdev = SYS_BUS_DEVICE(gicdev);
sysbus_mmio_map(gicbusdev, 0, vbi->memmap[VIRT_GIC_DIST].base);
- sysbus_mmio_map(gicbusdev, 1, vbi->memmap[VIRT_GIC_CPU].base);
+ if (type == 3) {
+ sysbus_mmio_map(gicbusdev, 1, vbi->memmap[VIRT_GIC_REDIST].base);
+ } else {
+ sysbus_mmio_map(gicbusdev, 1, vbi->memmap[VIRT_GIC_CPU].base);
+ }
/* Wire the outputs from each CPU's generic timer to the
* appropriate GIC PPI inputs, and the GIC's IRQ output to
@@ -454,9 +478,11 @@ static void create_gic(VirtBoardInfo *vbi, qemu_irq *pic, bool secure)
pic[i] = qdev_get_gpio_in(gicdev, i);
}
- fdt_add_gic_node(vbi);
+ fdt_add_gic_node(vbi, type);
- create_v2m(vbi, pic);
+ if (type == 2) {
+ create_v2m(vbi, pic);
+ }
}
static void create_uart(const VirtBoardInfo *vbi, qemu_irq *pic)
@@ -773,7 +799,10 @@ static void create_pcie(const VirtBoardInfo *vbi, qemu_irq *pic,
qemu_fdt_setprop_cells(vbi->fdt, nodename, "bus-range", 0,
nr_pcie_buses - 1);
- qemu_fdt_setprop_cells(vbi->fdt, nodename, "msi-parent", vbi->v2m_phandle);
+ if (vbi->v2m_phandle) {
+ qemu_fdt_setprop_cells(vbi->fdt, nodename, "msi-parent",
+ vbi->v2m_phandle);
+ }
qemu_fdt_setprop_sized_cells(vbi->fdt, nodename, "reg",
2, base_ecam, 2, size_ecam);
@@ -888,6 +917,7 @@ static void machvirt_init(MachineState *machine)
VirtMachineState *vms = VIRT_MACHINE(machine);
qemu_irq pic[NUM_IRQS];
MemoryRegion *sysmem = get_system_memory();
+ int gic_version = vms->gic_version;
int n;
MemoryRegion *ram = g_new(MemoryRegion, 1);
const char *cpu_model = machine->cpu_model;
@@ -900,6 +930,18 @@ static void machvirt_init(MachineState *machine)
cpu_model = "cortex-a15";
}
+ /* We can probe only here because during property set
+ * KVM is not available yet
+ */
+ if (!gic_version) {
+ gic_version = kvm_arm_vgic_probe();
+ if (!gic_version) {
+ error_report("Unable to determine GIC version supported by host\n"
+ "Probably KVM acceleration is not supported\n");
+ exit(1);
+ }
+ }
+
/* Separate the actual CPU model name from any appended features */
cpustr = g_strsplit(cpu_model, ",", 2);
@@ -960,7 +1002,7 @@ static void machvirt_init(MachineState *machine)
object_property_set_bool(cpuobj, true, "realized", NULL);
}
g_strfreev(cpustr);
- fdt_add_timer_nodes(vbi);
+ fdt_add_timer_nodes(vbi, gic_version);
fdt_add_cpu_nodes(vbi);
fdt_add_psci_node(vbi);
@@ -970,7 +1012,7 @@ static void machvirt_init(MachineState *machine)
create_flash(vbi);
- create_gic(vbi, pic, vms->secure);
+ create_gic(vbi, pic, gic_version, vms->secure);
create_uart(vbi, pic);
@@ -992,6 +1034,7 @@ static void machvirt_init(MachineState *machine)
guest_info->memmap = vbi->memmap;
guest_info->irqmap = vbi->irqmap;
guest_info->use_highmem = vms->highmem;
+ guest_info->gic_version = gic_version;
guest_info_state->machine_done.notify = virt_guest_info_machine_done;
qemu_add_machine_init_done_notifier(&guest_info_state->machine_done);
@@ -1043,6 +1086,31 @@ static void virt_set_highmem(Object *obj, bool value, Error **errp)
vms->highmem = value;
}
+static char *virt_get_gic_version(Object *obj, Error **errp)
+{
+ VirtMachineState *vms = VIRT_MACHINE(obj);
+ const char *val = vms->gic_version == 3 ? "3" : "2";
+
+ return g_strdup(val);
+}
+
+static void virt_set_gic_version(Object *obj, const char *value, Error **errp)
+{
+ VirtMachineState *vms = VIRT_MACHINE(obj);
+
+ if (!strcmp(value, "3")) {
+ vms->gic_version = 3;
+ } else if (!strcmp(value, "2")) {
+ vms->gic_version = 2;
+ } else if (!strcmp(value, "host")) {
+ vms->gic_version = 0; /* Will probe later */
+ } else {
+ error_report("Invalid gic-version option value\n"
+ "Allowed values are: 3, 2, host\n");
+ exit(1);
+ }
+}
+
static void virt_instance_init(Object *obj)
{
VirtMachineState *vms = VIRT_MACHINE(obj);
@@ -1067,6 +1135,13 @@ static void virt_instance_init(Object *obj)
"Set on/off to enable/disable using "
"physical address space above 32 bits",
NULL);
+ /* Default GIC type is v2 */
+ vms->gic_version = 2;
+ object_property_add_str(obj, "gic-version", virt_get_gic_version,
+ virt_set_gic_version, NULL);
+ object_property_set_description(obj, "gic-version",
+ "Set GIC version. "
+ "Valid values are 2, 3 and host", NULL);
}
static void virt_class_init(ObjectClass *oc, void *data)
@@ -1075,7 +1150,10 @@ static void virt_class_init(ObjectClass *oc, void *data)
mc->desc = "ARM Virtual Machine",
mc->init = machvirt_init;
- mc->max_cpus = 8;
+ /* Our maximum number of CPUs depends on how many redistributors
+ * we can fit into memory map
+ */
+ mc->max_cpus = a15memmap[VIRT_GIC_REDIST].size / 0x20000;
mc->has_dynamic_sysbus = true;
mc->block_default_type = IF_VIRTIO;
mc->no_cdrom = 1;
diff --git a/hw/intc/Makefile.objs b/hw/intc/Makefile.objs
index 092d8a8..004b0c2 100644
--- a/hw/intc/Makefile.objs
+++ b/hw/intc/Makefile.objs
@@ -12,10 +12,12 @@ common-obj-$(CONFIG_IOAPIC) += ioapic_common.o
common-obj-$(CONFIG_ARM_GIC) += arm_gic_common.o
common-obj-$(CONFIG_ARM_GIC) += arm_gic.o
common-obj-$(CONFIG_ARM_GIC) += arm_gicv2m.o
+common-obj-$(CONFIG_ARM_GIC) += arm_gicv3_common.o
common-obj-$(CONFIG_OPENPIC) += openpic.o
obj-$(CONFIG_APIC) += apic.o apic_common.o
obj-$(CONFIG_ARM_GIC_KVM) += arm_gic_kvm.o
+obj-$(call land,$(CONFIG_ARM_GIC_KVM),$(TARGET_AARCH64)) += arm_gicv3_kvm.o
obj-$(CONFIG_STELLARIS) += armv7m_nvic.o
obj-$(CONFIG_EXYNOS4) += exynos4210_gic.o exynos4210_combiner.o
obj-$(CONFIG_GRLIB) += grlib_irqmp.o
diff --git a/hw/intc/arm_gic_kvm.c b/hw/intc/arm_gic_kvm.c
index e5d0f67..e8b2386 100644
--- a/hw/intc/arm_gic_kvm.c
+++ b/hw/intc/arm_gic_kvm.c
@@ -23,6 +23,7 @@
#include "sysemu/kvm.h"
#include "kvm_arm.h"
#include "gic_internal.h"
+#include "vgic_common.h"
//#define DEBUG_GIC_KVM
@@ -52,7 +53,7 @@ typedef struct KVMARMGICClass {
void (*parent_reset)(DeviceState *dev);
} KVMARMGICClass;
-static void kvm_arm_gic_set_irq(void *opaque, int irq, int level)
+void kvm_arm_gic_set_irq(uint32_t num_irq, int irq, int level)
{
/* Meaning of the 'irq' parameter:
* [0..N-1] : external interrupts
@@ -63,10 +64,9 @@ static void kvm_arm_gic_set_irq(void *opaque, int irq, int level)
* has separate fields in the irq number for type,
* CPU number and interrupt number.
*/
- GICState *s = (GICState *)opaque;
int kvm_irq, irqtype, cpu;
- if (irq < (s->num_irq - GIC_INTERNAL)) {
+ if (irq < (num_irq - GIC_INTERNAL)) {
/* External interrupt. The kernel numbers these like the GIC
* hardware, with external interrupt IDs starting after the
* internal ones.
@@ -77,7 +77,7 @@ static void kvm_arm_gic_set_irq(void *opaque, int irq, int level)
} else {
/* Internal interrupt: decode into (cpu, interrupt id) */
irqtype = KVM_ARM_IRQ_TYPE_PPI;
- irq -= (s->num_irq - GIC_INTERNAL);
+ irq -= (num_irq - GIC_INTERNAL);
cpu = irq / GIC_INTERNAL;
irq %= GIC_INTERNAL;
}
@@ -87,69 +87,36 @@ static void kvm_arm_gic_set_irq(void *opaque, int irq, int level)
kvm_set_irq(kvm_state, kvm_irq, !!level);
}
-static bool kvm_arm_gic_can_save_restore(GICState *s)
-{
- return s->dev_fd >= 0;
-}
-
-static bool kvm_gic_supports_attr(GICState *s, int group, int attrnum)
+static void kvm_arm_gicv2_set_irq(void *opaque, int irq, int level)
{
- struct kvm_device_attr attr = {
- .group = group,
- .attr = attrnum,
- .flags = 0,
- };
-
- if (s->dev_fd == -1) {
- return false;
- }
+ GICState *s = (GICState *)opaque;
- return kvm_device_ioctl(s->dev_fd, KVM_HAS_DEVICE_ATTR, &attr) == 0;
+ kvm_arm_gic_set_irq(s->num_irq, irq, level);
}
-static void kvm_gic_access(GICState *s, int group, int offset,
- int cpu, uint32_t *val, bool write)
+static bool kvm_arm_gic_can_save_restore(GICState *s)
{
- struct kvm_device_attr attr;
- int type;
- int err;
-
- cpu = cpu & 0xff;
-
- attr.flags = 0;
- attr.group = group;
- attr.attr = (((uint64_t)cpu << KVM_DEV_ARM_VGIC_CPUID_SHIFT) &
- KVM_DEV_ARM_VGIC_CPUID_MASK) |
- (((uint64_t)offset << KVM_DEV_ARM_VGIC_OFFSET_SHIFT) &
- KVM_DEV_ARM_VGIC_OFFSET_MASK);
- attr.addr = (uintptr_t)val;
-
- if (write) {
- type = KVM_SET_DEVICE_ATTR;
- } else {
- type = KVM_GET_DEVICE_ATTR;
- }
-
- err = kvm_device_ioctl(s->dev_fd, type, &attr);
- if (err < 0) {
- fprintf(stderr, "KVM_{SET/GET}_DEVICE_ATTR failed: %s\n",
- strerror(-err));
- abort();
- }
+ return s->dev_fd >= 0;
}
+#define KVM_VGIC_ATTR(offset, cpu) \
+ ((((uint64_t)(cpu) << KVM_DEV_ARM_VGIC_CPUID_SHIFT) & \
+ KVM_DEV_ARM_VGIC_CPUID_MASK) | \
+ (((uint64_t)(offset) << KVM_DEV_ARM_VGIC_OFFSET_SHIFT) & \
+ KVM_DEV_ARM_VGIC_OFFSET_MASK))
+
static void kvm_gicd_access(GICState *s, int offset, int cpu,
uint32_t *val, bool write)
{
- kvm_gic_access(s, KVM_DEV_ARM_VGIC_GRP_DIST_REGS,
- offset, cpu, val, write);
+ kvm_device_access(s->dev_fd, KVM_DEV_ARM_VGIC_GRP_DIST_REGS,
+ KVM_VGIC_ATTR(offset, cpu), val, write);
}
static void kvm_gicc_access(GICState *s, int offset, int cpu,
uint32_t *val, bool write)
{
- kvm_gic_access(s, KVM_DEV_ARM_VGIC_GRP_CPU_REGS,
- offset, cpu, val, write);
+ kvm_device_access(s->dev_fd, KVM_DEV_ARM_VGIC_GRP_CPU_REGS,
+ KVM_VGIC_ATTR(offset, cpu), val, write);
}
#define for_each_irq_reg(_ctr, _max_irq, _field_width) \
@@ -559,7 +526,7 @@ static void kvm_arm_gic_realize(DeviceState *dev, Error **errp)
return;
}
- gic_init_irqs_and_mmio(s, kvm_arm_gic_set_irq, NULL);
+ gic_init_irqs_and_mmio(s, kvm_arm_gicv2_set_irq, NULL);
for (i = 0; i < s->num_irq - GIC_INTERNAL; i++) {
qemu_irq irq = qdev_get_gpio_in(dev, i);
@@ -571,23 +538,24 @@ static void kvm_arm_gic_realize(DeviceState *dev, Error **errp)
ret = kvm_create_device(kvm_state, KVM_DEV_TYPE_ARM_VGIC_V2, false);
if (ret >= 0) {
s->dev_fd = ret;
+
+ /* Newstyle API is used, we may have attributes */
+ if (kvm_device_check_attr(s->dev_fd, KVM_DEV_ARM_VGIC_GRP_NR_IRQS, 0)) {
+ uint32_t numirqs = s->num_irq;
+ kvm_device_access(s->dev_fd, KVM_DEV_ARM_VGIC_GRP_NR_IRQS, 0,
+ &numirqs, true);
+ }
+ /* Tell the kernel to complete VGIC initialization now */
+ if (kvm_device_check_attr(s->dev_fd, KVM_DEV_ARM_VGIC_GRP_CTRL,
+ KVM_DEV_ARM_VGIC_CTRL_INIT)) {
+ kvm_device_access(s->dev_fd, KVM_DEV_ARM_VGIC_GRP_CTRL,
+ KVM_DEV_ARM_VGIC_CTRL_INIT, NULL, true);
+ }
} else if (ret != -ENODEV && ret != -ENOTSUP) {
error_setg_errno(errp, -ret, "error creating in-kernel VGIC");
return;
}
- if (kvm_gic_supports_attr(s, KVM_DEV_ARM_VGIC_GRP_NR_IRQS, 0)) {
- uint32_t numirqs = s->num_irq;
- kvm_gic_access(s, KVM_DEV_ARM_VGIC_GRP_NR_IRQS, 0, 0, &numirqs, 1);
- }
-
- /* Tell the kernel to complete VGIC initialization now */
- if (kvm_gic_supports_attr(s, KVM_DEV_ARM_VGIC_GRP_CTRL,
- KVM_DEV_ARM_VGIC_CTRL_INIT)) {
- kvm_gic_access(s, KVM_DEV_ARM_VGIC_GRP_CTRL,
- KVM_DEV_ARM_VGIC_CTRL_INIT, 0, 0, 1);
- }
-
/* Distributor */
kvm_arm_register_device(&s->iomem,
(KVM_ARM_DEVICE_VGIC_V2 << KVM_ARM_DEVICE_ID_SHIFT)
diff --git a/hw/intc/arm_gicv3_common.c b/hw/intc/arm_gicv3_common.c
new file mode 100644
index 0000000..032ece2
--- /dev/null
+++ b/hw/intc/arm_gicv3_common.c
@@ -0,0 +1,140 @@
+/*
+ * ARM GICv3 support - common bits of emulated and KVM kernel model
+ *
+ * Copyright (c) 2012 Linaro Limited
+ * Copyright (c) 2015 Huawei.
+ * Written by Peter Maydell
+ * Extended to 64 cores by Shlomo Pongratz
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation, either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License along
+ * with this program; if not, see <http://www.gnu.org/licenses/>.
+ */
+
+#include "hw/intc/arm_gicv3_common.h"
+
+static void gicv3_pre_save(void *opaque)
+{
+ GICv3State *s = (GICv3State *)opaque;
+ ARMGICv3CommonClass *c = ARM_GICV3_COMMON_GET_CLASS(s);
+
+ if (c->pre_save) {
+ c->pre_save(s);
+ }
+}
+
+static int gicv3_post_load(void *opaque, int version_id)
+{
+ GICv3State *s = (GICv3State *)opaque;
+ ARMGICv3CommonClass *c = ARM_GICV3_COMMON_GET_CLASS(s);
+
+ if (c->post_load) {
+ c->post_load(s);
+ }
+ return 0;
+}
+
+static const VMStateDescription vmstate_gicv3 = {
+ .name = "arm_gicv3",
+ .unmigratable = 1,
+ .pre_save = gicv3_pre_save,
+ .post_load = gicv3_post_load,
+};
+
+void gicv3_init_irqs_and_mmio(GICv3State *s, qemu_irq_handler handler,
+ const MemoryRegionOps *ops)
+{
+ SysBusDevice *sbd = SYS_BUS_DEVICE(s);
+ int i;
+
+ /* For the GIC, also expose incoming GPIO lines for PPIs for each CPU.
+ * GPIO array layout is thus:
+ * [0..N-1] spi
+ * [N..N+31] PPIs for CPU 0
+ * [N+32..N+63] PPIs for CPU 1
+ * ...
+ */
+ i = s->num_irq - GIC_INTERNAL + GIC_INTERNAL * s->num_cpu;
+ qdev_init_gpio_in(DEVICE(s), handler, i);
+
+ s->parent_irq = g_malloc(s->num_cpu * sizeof(qemu_irq));
+ s->parent_fiq = g_malloc(s->num_cpu * sizeof(qemu_irq));
+
+ for (i = 0; i < s->num_cpu; i++) {
+ sysbus_init_irq(sbd, &s->parent_irq[i]);
+ }
+ for (i = 0; i < s->num_cpu; i++) {
+ sysbus_init_irq(sbd, &s->parent_fiq[i]);
+ }
+
+ memory_region_init_io(&s->iomem_dist, OBJECT(s), ops, s,
+ "gicv3_dist", 0x10000);
+ memory_region_init_io(&s->iomem_redist, OBJECT(s), ops ? &ops[1] : NULL, s,
+ "gicv3_redist", 0x20000 * s->num_cpu);
+
+ sysbus_init_mmio(sbd, &s->iomem_dist);
+ sysbus_init_mmio(sbd, &s->iomem_redist);
+}
+
+static void arm_gicv3_common_realize(DeviceState *dev, Error **errp)
+{
+ GICv3State *s = ARM_GICV3_COMMON(dev);
+
+ /* revision property is actually reserved and currently used only in order
+ * to keep the interface compatible with GICv2 code, avoiding extra
+ * conditions. However, in future it could be used, for example, if we
+ * implement GICv4.
+ */
+ if (s->revision != 3) {
+ error_setg(errp, "unsupported GIC revision %d", s->revision);
+ return;
+ }
+}
+
+static void arm_gicv3_common_reset(DeviceState *dev)
+{
+ /* TODO */
+}
+
+static Property arm_gicv3_common_properties[] = {
+ DEFINE_PROP_UINT32("num-cpu", GICv3State, num_cpu, 1),
+ DEFINE_PROP_UINT32("num-irq", GICv3State, num_irq, 32),
+ DEFINE_PROP_UINT32("revision", GICv3State, revision, 3),
+ DEFINE_PROP_BOOL("has-security-extensions", GICv3State, security_extn, 0),
+ DEFINE_PROP_END_OF_LIST(),
+};
+
+static void arm_gicv3_common_class_init(ObjectClass *klass, void *data)
+{
+ DeviceClass *dc = DEVICE_CLASS(klass);
+
+ dc->reset = arm_gicv3_common_reset;
+ dc->realize = arm_gicv3_common_realize;
+ dc->props = arm_gicv3_common_properties;
+ dc->vmsd = &vmstate_gicv3;
+}
+
+static const TypeInfo arm_gicv3_common_type = {
+ .name = TYPE_ARM_GICV3_COMMON,
+ .parent = TYPE_SYS_BUS_DEVICE,
+ .instance_size = sizeof(GICv3State),
+ .class_size = sizeof(ARMGICv3CommonClass),
+ .class_init = arm_gicv3_common_class_init,
+ .abstract = true,
+};
+
+static void register_types(void)
+{
+ type_register_static(&arm_gicv3_common_type);
+}
+
+type_init(register_types)
diff --git a/hw/intc/arm_gicv3_kvm.c b/hw/intc/arm_gicv3_kvm.c
new file mode 100644
index 0000000..b48f78f
--- /dev/null
+++ b/hw/intc/arm_gicv3_kvm.c
@@ -0,0 +1,149 @@
+/*
+ * ARM Generic Interrupt Controller using KVM in-kernel support
+ *
+ * Copyright (c) 2015 Samsung Electronics Co., Ltd.
+ * Written by Pavel Fedin
+ * Based on vGICv2 code by Peter Maydell
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation, either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License along
+ * with this program; if not, see <http://www.gnu.org/licenses/>.
+ */
+
+#include "hw/intc/arm_gicv3_common.h"
+#include "hw/sysbus.h"
+#include "sysemu/kvm.h"
+#include "kvm_arm.h"
+#include "vgic_common.h"
+
+#ifdef DEBUG_GICV3_KVM
+#define DPRINTF(fmt, ...) \
+ do { fprintf(stderr, "kvm_gicv3: " fmt, ## __VA_ARGS__); } while (0)
+#else
+#define DPRINTF(fmt, ...) \
+ do { } while (0)
+#endif
+
+#define TYPE_KVM_ARM_GICV3 "kvm-arm-gicv3"
+#define KVM_ARM_GICV3(obj) \
+ OBJECT_CHECK(GICv3State, (obj), TYPE_KVM_ARM_GICV3)
+#define KVM_ARM_GICV3_CLASS(klass) \
+ OBJECT_CLASS_CHECK(KVMARMGICv3Class, (klass), TYPE_KVM_ARM_GICV3)
+#define KVM_ARM_GICV3_GET_CLASS(obj) \
+ OBJECT_GET_CLASS(KVMARMGICv3Class, (obj), TYPE_KVM_ARM_GICV3)
+
+typedef struct KVMARMGICv3Class {
+ ARMGICv3CommonClass parent_class;
+ DeviceRealize parent_realize;
+ void (*parent_reset)(DeviceState *dev);
+} KVMARMGICv3Class;
+
+static void kvm_arm_gicv3_set_irq(void *opaque, int irq, int level)
+{
+ GICv3State *s = (GICv3State *)opaque;
+
+ kvm_arm_gic_set_irq(s->num_irq, irq, level);
+}
+
+static void kvm_arm_gicv3_put(GICv3State *s)
+{
+ /* TODO */
+ DPRINTF("Cannot put kernel gic state, no kernel interface\n");
+}
+
+static void kvm_arm_gicv3_get(GICv3State *s)
+{
+ /* TODO */
+ DPRINTF("Cannot get kernel gic state, no kernel interface\n");
+}
+
+static void kvm_arm_gicv3_reset(DeviceState *dev)
+{
+ GICv3State *s = ARM_GICV3_COMMON(dev);
+ KVMARMGICv3Class *kgc = KVM_ARM_GICV3_GET_CLASS(s);
+
+ DPRINTF("Reset\n");
+
+ kgc->parent_reset(dev);
+ kvm_arm_gicv3_put(s);
+}
+
+static void kvm_arm_gicv3_realize(DeviceState *dev, Error **errp)
+{
+ GICv3State *s = KVM_ARM_GICV3(dev);
+ KVMARMGICv3Class *kgc = KVM_ARM_GICV3_GET_CLASS(s);
+ Error *local_err = NULL;
+
+ DPRINTF("kvm_arm_gicv3_realize\n");
+
+ kgc->parent_realize(dev, &local_err);
+ if (local_err) {
+ error_propagate(errp, local_err);
+ return;
+ }
+
+ if (s->security_extn) {
+ error_setg(errp, "the in-kernel VGICv3 does not implement the "
+ "security extensions");
+ return;
+ }
+
+ gicv3_init_irqs_and_mmio(s, kvm_arm_gicv3_set_irq, NULL);
+
+ /* Try to create the device via the device control API */
+ s->dev_fd = kvm_create_device(kvm_state, KVM_DEV_TYPE_ARM_VGIC_V3, false);
+ if (s->dev_fd < 0) {
+ error_setg_errno(errp, -s->dev_fd, "error creating in-kernel VGIC");
+ return;
+ }
+
+ kvm_device_access(s->dev_fd, KVM_DEV_ARM_VGIC_GRP_NR_IRQS,
+ 0, &s->num_irq, true);
+
+ /* Tell the kernel to complete VGIC initialization now */
+ kvm_device_access(s->dev_fd, KVM_DEV_ARM_VGIC_GRP_CTRL,
+ KVM_DEV_ARM_VGIC_CTRL_INIT, NULL, true);
+
+ kvm_arm_register_device(&s->iomem_dist, -1, KVM_DEV_ARM_VGIC_GRP_ADDR,
+ KVM_VGIC_V3_ADDR_TYPE_DIST, s->dev_fd);
+ kvm_arm_register_device(&s->iomem_redist, -1, KVM_DEV_ARM_VGIC_GRP_ADDR,
+ KVM_VGIC_V3_ADDR_TYPE_REDIST, s->dev_fd);
+}
+
+static void kvm_arm_gicv3_class_init(ObjectClass *klass, void *data)
+{
+ DeviceClass *dc = DEVICE_CLASS(klass);
+ ARMGICv3CommonClass *agcc = ARM_GICV3_COMMON_CLASS(klass);
+ KVMARMGICv3Class *kgc = KVM_ARM_GICV3_CLASS(klass);
+
+ agcc->pre_save = kvm_arm_gicv3_get;
+ agcc->post_load = kvm_arm_gicv3_put;
+ kgc->parent_realize = dc->realize;
+ kgc->parent_reset = dc->reset;
+ dc->realize = kvm_arm_gicv3_realize;
+ dc->reset = kvm_arm_gicv3_reset;
+}
+
+static const TypeInfo kvm_arm_gicv3_info = {
+ .name = TYPE_KVM_ARM_GICV3,
+ .parent = TYPE_ARM_GICV3_COMMON,
+ .instance_size = sizeof(GICv3State),
+ .class_init = kvm_arm_gicv3_class_init,
+ .class_size = sizeof(KVMARMGICv3Class),
+};
+
+static void kvm_arm_gicv3_register_types(void)
+{
+ type_register_static(&kvm_arm_gicv3_info);
+}
+
+type_init(kvm_arm_gicv3_register_types)
diff --git a/hw/intc/vgic_common.h b/hw/intc/vgic_common.h
new file mode 100644
index 0000000..80d919e
--- /dev/null
+++ b/hw/intc/vgic_common.h
@@ -0,0 +1,35 @@
+/*
+ * ARM KVM vGIC utility functions
+ *
+ * Copyright (c) 2015 Samsung Electronics
+ * Written by Pavel Fedin
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation, either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License along
+ * with this program; if not, see <http://www.gnu.org/licenses/>.
+ */
+
+#ifndef QEMU_ARM_VGIC_COMMON_H
+#define QEMU_ARM_VGIC_COMMON_H
+
+/**
+ * kvm_arm_gic_set_irq - Send an IRQ to the in-kernel vGIC
+ * @num_irq: Total number of IRQs configured for the GIC instance
+ * @irq: qemu internal IRQ line number:
+ * [0..N-1] : external interrupts
+ * [N..N+31] : PPI (internal) interrupts for CPU 0
+ * [N+32..N+63] : PPI (internal interrupts for CPU 1
+ * @level: level of the IRQ line.
+ */
+void kvm_arm_gic_set_irq(uint32_t num_irq, int irq, int level);
+
+#endif
diff --git a/hw/vfio/Makefile.objs b/hw/vfio/Makefile.objs
index d540c9d..d324863 100644
--- a/hw/vfio/Makefile.objs
+++ b/hw/vfio/Makefile.objs
@@ -1,6 +1,6 @@
ifeq ($(CONFIG_LINUX), y)
obj-$(CONFIG_SOFTMMU) += common.o
-obj-$(CONFIG_PCI) += pci.o
+obj-$(CONFIG_PCI) += pci.o pci-quirks.o
obj-$(CONFIG_SOFTMMU) += platform.o
obj-$(CONFIG_SOFTMMU) += calxeda-xgmac.o
endif
diff --git a/hw/vfio/common.c b/hw/vfio/common.c
index 6d21311..0d341a3 100644
--- a/hw/vfio/common.c
+++ b/hw/vfio/common.c
@@ -496,7 +496,7 @@ int vfio_mmap_region(Object *obj, VFIORegion *region,
int ret = 0;
VFIODevice *vbasedev = region->vbasedev;
- if (vbasedev->allow_mmap && size && region->flags &
+ if (!vbasedev->no_mmap && size && region->flags &
VFIO_REGION_INFO_FLAG_MMAP) {
int prot = 0;
diff --git a/hw/vfio/pci-quirks.c b/hw/vfio/pci-quirks.c
new file mode 100644
index 0000000..2bdaef1
--- /dev/null
+++ b/hw/vfio/pci-quirks.c
@@ -0,0 +1,1204 @@
+/*
+ * device quirks for PCI devices
+ *
+ * Copyright Red Hat, Inc. 2012-2015
+ *
+ * Authors:
+ * Alex Williamson <alex.williamson@redhat.com>
+ *
+ * This work is licensed under the terms of the GNU GPL, version 2. See
+ * the COPYING file in the top-level directory.
+ */
+
+#include "pci.h"
+#include "trace.h"
+#include "qemu/range.h"
+
+/* Use uin32_t for vendor & device so PCI_ANY_ID expands and cannot match hw */
+static bool vfio_pci_is(VFIOPCIDevice *vdev, uint32_t vendor, uint32_t device)
+{
+ return (vendor == PCI_ANY_ID || vendor == vdev->vendor_id) &&
+ (device == PCI_ANY_ID || device == vdev->device_id);
+}
+
+static bool vfio_is_vga(VFIOPCIDevice *vdev)
+{
+ PCIDevice *pdev = &vdev->pdev;
+ uint16_t class = pci_get_word(pdev->config + PCI_CLASS_DEVICE);
+
+ return class == PCI_CLASS_DISPLAY_VGA;
+}
+
+/*
+ * List of device ids/vendor ids for which to disable
+ * option rom loading. This avoids the guest hangs during rom
+ * execution as noticed with the BCM 57810 card for lack of a
+ * more better way to handle such issues.
+ * The user can still override by specifying a romfile or
+ * rombar=1.
+ * Please see https://bugs.launchpad.net/qemu/+bug/1284874
+ * for an analysis of the 57810 card hang. When adding
+ * a new vendor id/device id combination below, please also add
+ * your card/environment details and information that could
+ * help in debugging to the bug tracking this issue
+ */
+static const struct {
+ uint32_t vendor;
+ uint32_t device;
+} romblacklist[] = {
+ { 0x14e4, 0x168e }, /* Broadcom BCM 57810 */
+};
+
+bool vfio_blacklist_opt_rom(VFIOPCIDevice *vdev)
+{
+ int i;
+
+ for (i = 0 ; i < ARRAY_SIZE(romblacklist); i++) {
+ if (vfio_pci_is(vdev, romblacklist[i].vendor, romblacklist[i].device)) {
+ trace_vfio_quirk_rom_blacklisted(vdev->vbasedev.name,
+ romblacklist[i].vendor,
+ romblacklist[i].device);
+ return true;
+ }
+ }
+ return false;
+}
+
+/*
+ * Device specific region quirks (mostly backdoors to PCI config space)
+ */
+
+/*
+ * The generic window quirks operate on an address and data register,
+ * vfio_generic_window_address_quirk handles the address register and
+ * vfio_generic_window_data_quirk handles the data register. These ops
+ * pass reads and writes through to hardware until a value matching the
+ * stored address match/mask is written. When this occurs, the data
+ * register access emulated PCI config space for the device rather than
+ * passing through accesses. This enables devices where PCI config space
+ * is accessible behind a window register to maintain the virtualization
+ * provided through vfio.
+ */
+typedef struct VFIOConfigWindowMatch {
+ uint32_t match;
+ uint32_t mask;
+} VFIOConfigWindowMatch;
+
+typedef struct VFIOConfigWindowQuirk {
+ struct VFIOPCIDevice *vdev;
+
+ uint32_t address_val;
+
+ uint32_t address_offset;
+ uint32_t data_offset;
+
+ bool window_enabled;
+ uint8_t bar;
+
+ MemoryRegion *addr_mem;
+ MemoryRegion *data_mem;
+
+ uint32_t nr_matches;
+ VFIOConfigWindowMatch matches[];
+} VFIOConfigWindowQuirk;
+
+static uint64_t vfio_generic_window_quirk_address_read(void *opaque,
+ hwaddr addr,
+ unsigned size)
+{
+ VFIOConfigWindowQuirk *window = opaque;
+ VFIOPCIDevice *vdev = window->vdev;
+
+ return vfio_region_read(&vdev->bars[window->bar].region,
+ addr + window->address_offset, size);
+}
+
+static void vfio_generic_window_quirk_address_write(void *opaque, hwaddr addr,
+ uint64_t data,
+ unsigned size)
+{
+ VFIOConfigWindowQuirk *window = opaque;
+ VFIOPCIDevice *vdev = window->vdev;
+ int i;
+
+ window->window_enabled = false;
+
+ vfio_region_write(&vdev->bars[window->bar].region,
+ addr + window->address_offset, data, size);
+
+ for (i = 0; i < window->nr_matches; i++) {
+ if ((data & ~window->matches[i].mask) == window->matches[i].match) {
+ window->window_enabled = true;
+ window->address_val = data & window->matches[i].mask;
+ trace_vfio_quirk_generic_window_address_write(vdev->vbasedev.name,
+ memory_region_name(window->addr_mem), data);
+ break;
+ }
+ }
+}
+
+static const MemoryRegionOps vfio_generic_window_address_quirk = {
+ .read = vfio_generic_window_quirk_address_read,
+ .write = vfio_generic_window_quirk_address_write,
+ .endianness = DEVICE_LITTLE_ENDIAN,
+};
+
+static uint64_t vfio_generic_window_quirk_data_read(void *opaque,
+ hwaddr addr, unsigned size)
+{
+ VFIOConfigWindowQuirk *window = opaque;
+ VFIOPCIDevice *vdev = window->vdev;
+ uint64_t data;
+
+ /* Always read data reg, discard if window enabled */
+ data = vfio_region_read(&vdev->bars[window->bar].region,
+ addr + window->data_offset, size);
+
+ if (window->window_enabled) {
+ data = vfio_pci_read_config(&vdev->pdev, window->address_val, size);
+ trace_vfio_quirk_generic_window_data_read(vdev->vbasedev.name,
+ memory_region_name(window->data_mem), data);
+ }
+
+ return data;
+}
+
+static void vfio_generic_window_quirk_data_write(void *opaque, hwaddr addr,
+ uint64_t data, unsigned size)
+{
+ VFIOConfigWindowQuirk *window = opaque;
+ VFIOPCIDevice *vdev = window->vdev;
+
+ if (window->window_enabled) {
+ vfio_pci_write_config(&vdev->pdev, window->address_val, data, size);
+ trace_vfio_quirk_generic_window_data_write(vdev->vbasedev.name,
+ memory_region_name(window->data_mem), data);
+ return;
+ }
+
+ vfio_region_write(&vdev->bars[window->bar].region,
+ addr + window->data_offset, data, size);
+}
+
+static const MemoryRegionOps vfio_generic_window_data_quirk = {
+ .read = vfio_generic_window_quirk_data_read,
+ .write = vfio_generic_window_quirk_data_write,
+ .endianness = DEVICE_LITTLE_ENDIAN,
+};
+
+/*
+ * The generic mirror quirk handles devices which expose PCI config space
+ * through a region within a BAR. When enabled, reads and writes are
+ * redirected through to emulated PCI config space. XXX if PCI config space
+ * used memory regions, this could just be an alias.
+ */
+typedef struct VFIOConfigMirrorQuirk {
+ struct VFIOPCIDevice *vdev;
+ uint32_t offset;
+ uint8_t bar;
+ MemoryRegion *mem;
+} VFIOConfigMirrorQuirk;
+
+static uint64_t vfio_generic_quirk_mirror_read(void *opaque,
+ hwaddr addr, unsigned size)
+{
+ VFIOConfigMirrorQuirk *mirror = opaque;
+ VFIOPCIDevice *vdev = mirror->vdev;
+ uint64_t data;
+
+ /* Read and discard in case the hardware cares */
+ (void)vfio_region_read(&vdev->bars[mirror->bar].region,
+ addr + mirror->offset, size);
+
+ data = vfio_pci_read_config(&vdev->pdev, addr, size);
+ trace_vfio_quirk_generic_mirror_read(vdev->vbasedev.name,
+ memory_region_name(mirror->mem),
+ addr, data);
+ return data;
+}
+
+static void vfio_generic_quirk_mirror_write(void *opaque, hwaddr addr,
+ uint64_t data, unsigned size)
+{
+ VFIOConfigMirrorQuirk *mirror = opaque;
+ VFIOPCIDevice *vdev = mirror->vdev;
+
+ vfio_pci_write_config(&vdev->pdev, addr, data, size);
+ trace_vfio_quirk_generic_mirror_write(vdev->vbasedev.name,
+ memory_region_name(mirror->mem),
+ addr, data);
+}
+
+static const MemoryRegionOps vfio_generic_mirror_quirk = {
+ .read = vfio_generic_quirk_mirror_read,
+ .write = vfio_generic_quirk_mirror_write,
+ .endianness = DEVICE_LITTLE_ENDIAN,
+};
+
+/* Is range1 fully contained within range2? */
+static bool vfio_range_contained(uint64_t first1, uint64_t len1,
+ uint64_t first2, uint64_t len2) {
+ return (first1 >= first2 && first1 + len1 <= first2 + len2);
+}
+
+#define PCI_VENDOR_ID_ATI 0x1002
+
+/*
+ * Radeon HD cards (HD5450 & HD7850) report the upper byte of the I/O port BAR
+ * through VGA register 0x3c3. On newer cards, the I/O port BAR is always
+ * BAR4 (older cards like the X550 used BAR1, but we don't care to support
+ * those). Note that on bare metal, a read of 0x3c3 doesn't always return the
+ * I/O port BAR address. Originally this was coded to return the virtual BAR
+ * address only if the physical register read returns the actual BAR address,
+ * but users have reported greater success if we return the virtual address
+ * unconditionally.
+ */
+static uint64_t vfio_ati_3c3_quirk_read(void *opaque,
+ hwaddr addr, unsigned size)
+{
+ VFIOPCIDevice *vdev = opaque;
+ uint64_t data = vfio_pci_read_config(&vdev->pdev,
+ PCI_BASE_ADDRESS_4 + 1, size);
+
+ trace_vfio_quirk_ati_3c3_read(vdev->vbasedev.name, data);
+
+ return data;
+}
+
+static const MemoryRegionOps vfio_ati_3c3_quirk = {
+ .read = vfio_ati_3c3_quirk_read,
+ .endianness = DEVICE_LITTLE_ENDIAN,
+};
+
+static void vfio_vga_probe_ati_3c3_quirk(VFIOPCIDevice *vdev)
+{
+ VFIOQuirk *quirk;
+
+ /*
+ * As long as the BAR is >= 256 bytes it will be aligned such that the
+ * lower byte is always zero. Filter out anything else, if it exists.
+ */
+ if (!vfio_pci_is(vdev, PCI_VENDOR_ID_ATI, PCI_ANY_ID) ||
+ !vdev->bars[4].ioport || vdev->bars[4].region.size < 256) {
+ return;
+ }
+
+ quirk = g_malloc0(sizeof(*quirk));
+ quirk->mem = g_malloc0_n(sizeof(MemoryRegion), 1);
+ quirk->nr_mem = 1;
+
+ memory_region_init_io(quirk->mem, OBJECT(vdev), &vfio_ati_3c3_quirk, vdev,
+ "vfio-ati-3c3-quirk", 1);
+ memory_region_add_subregion(&vdev->vga.region[QEMU_PCI_VGA_IO_HI].mem,
+ 3 /* offset 3 bytes from 0x3c0 */, quirk->mem);
+
+ QLIST_INSERT_HEAD(&vdev->vga.region[QEMU_PCI_VGA_IO_HI].quirks,
+ quirk, next);
+
+ trace_vfio_quirk_ati_3c3_probe(vdev->vbasedev.name);
+}
+
+/*
+ * Newer ATI/AMD devices, including HD5450 and HD7850, have a mirror to PCI
+ * config space through MMIO BAR2 at offset 0x4000. Nothing seems to access
+ * the MMIO space directly, but a window to this space is provided through
+ * I/O port BAR4. Offset 0x0 is the address register and offset 0x4 is the
+ * data register. When the address is programmed to a range of 0x4000-0x4fff
+ * PCI configuration space is available. Experimentation seems to indicate
+ * that read-only may be provided by hardware.
+ */
+static void vfio_probe_ati_bar4_quirk(VFIOPCIDevice *vdev, int nr)
+{
+ VFIOQuirk *quirk;
+ VFIOConfigWindowQuirk *window;
+
+ /* This windows doesn't seem to be used except by legacy VGA code */
+ if (!vfio_pci_is(vdev, PCI_VENDOR_ID_ATI, PCI_ANY_ID) ||
+ !vdev->has_vga || nr != 4) {
+ return;
+ }
+
+ quirk = g_malloc0(sizeof(*quirk));
+ quirk->mem = g_malloc0_n(sizeof(MemoryRegion), 2);
+ quirk->nr_mem = 2;
+ window = quirk->data = g_malloc0(sizeof(*window) +
+ sizeof(VFIOConfigWindowMatch));
+ window->vdev = vdev;
+ window->address_offset = 0;
+ window->data_offset = 4;
+ window->nr_matches = 1;
+ window->matches[0].match = 0x4000;
+ window->matches[0].mask = PCIE_CONFIG_SPACE_SIZE - 1;
+ window->bar = nr;
+ window->addr_mem = &quirk->mem[0];
+ window->data_mem = &quirk->mem[1];
+
+ memory_region_init_io(window->addr_mem, OBJECT(vdev),
+ &vfio_generic_window_address_quirk, window,
+ "vfio-ati-bar4-window-address-quirk", 4);
+ memory_region_add_subregion_overlap(&vdev->bars[nr].region.mem,
+ window->address_offset,
+ window->addr_mem, 1);
+
+ memory_region_init_io(window->data_mem, OBJECT(vdev),
+ &vfio_generic_window_data_quirk, window,
+ "vfio-ati-bar4-window-data-quirk", 4);
+ memory_region_add_subregion_overlap(&vdev->bars[nr].region.mem,
+ window->data_offset,
+ window->data_mem, 1);
+
+ QLIST_INSERT_HEAD(&vdev->bars[nr].quirks, quirk, next);
+
+ trace_vfio_quirk_ati_bar4_probe(vdev->vbasedev.name);
+}
+
+/*
+ * Trap the BAR2 MMIO mirror to config space as well.
+ */
+static void vfio_probe_ati_bar2_quirk(VFIOPCIDevice *vdev, int nr)
+{
+ VFIOQuirk *quirk;
+ VFIOConfigMirrorQuirk *mirror;
+
+ /* Only enable on newer devices where BAR2 is 64bit */
+ if (!vfio_pci_is(vdev, PCI_VENDOR_ID_ATI, PCI_ANY_ID) ||
+ !vdev->has_vga || nr != 2 || !vdev->bars[2].mem64) {
+ return;
+ }
+
+ quirk = g_malloc0(sizeof(*quirk));
+ mirror = quirk->data = g_malloc0(sizeof(*mirror));
+ mirror->mem = quirk->mem = g_malloc0_n(sizeof(MemoryRegion), 1);
+ quirk->nr_mem = 1;
+ mirror->vdev = vdev;
+ mirror->offset = 0x4000;
+ mirror->bar = nr;
+
+ memory_region_init_io(mirror->mem, OBJECT(vdev),
+ &vfio_generic_mirror_quirk, mirror,
+ "vfio-ati-bar2-4000-quirk", PCI_CONFIG_SPACE_SIZE);
+ memory_region_add_subregion_overlap(&vdev->bars[nr].region.mem,
+ mirror->offset, mirror->mem, 1);
+
+ QLIST_INSERT_HEAD(&vdev->bars[nr].quirks, quirk, next);
+
+ trace_vfio_quirk_ati_bar2_probe(vdev->vbasedev.name);
+}
+
+/*
+ * Older ATI/AMD cards like the X550 have a similar window to that above.
+ * I/O port BAR1 provides a window to a mirror of PCI config space located
+ * in BAR2 at offset 0xf00. We don't care to support such older cards, but
+ * note it for future reference.
+ */
+
+#define PCI_VENDOR_ID_NVIDIA 0x10de
+
+/*
+ * Nvidia has several different methods to get to config space, the
+ * nouveu project has several of these documented here:
+ * https://github.com/pathscale/envytools/tree/master/hwdocs
+ *
+ * The first quirk is actually not documented in envytools and is found
+ * on 10de:01d1 (NVIDIA Corporation G72 [GeForce 7300 LE]). This is an
+ * NV46 chipset. The backdoor uses the legacy VGA I/O ports to access
+ * the mirror of PCI config space found at BAR0 offset 0x1800. The access
+ * sequence first writes 0x338 to I/O port 0x3d4. The target offset is
+ * then written to 0x3d0. Finally 0x538 is written for a read and 0x738
+ * is written for a write to 0x3d4. The BAR0 offset is then accessible
+ * through 0x3d0. This quirk doesn't seem to be necessary on newer cards
+ * that use the I/O port BAR5 window but it doesn't hurt to leave it.
+ */
+typedef enum {NONE = 0, SELECT, WINDOW, READ, WRITE} VFIONvidia3d0State;
+static const char *nv3d0_states[] = { "NONE", "SELECT",
+ "WINDOW", "READ", "WRITE" };
+
+typedef struct VFIONvidia3d0Quirk {
+ VFIOPCIDevice *vdev;
+ VFIONvidia3d0State state;
+ uint32_t offset;
+} VFIONvidia3d0Quirk;
+
+static uint64_t vfio_nvidia_3d4_quirk_read(void *opaque,
+ hwaddr addr, unsigned size)
+{
+ VFIONvidia3d0Quirk *quirk = opaque;
+ VFIOPCIDevice *vdev = quirk->vdev;
+
+ quirk->state = NONE;
+
+ return vfio_vga_read(&vdev->vga.region[QEMU_PCI_VGA_IO_HI],
+ addr + 0x14, size);
+}
+
+static void vfio_nvidia_3d4_quirk_write(void *opaque, hwaddr addr,
+ uint64_t data, unsigned size)
+{
+ VFIONvidia3d0Quirk *quirk = opaque;
+ VFIOPCIDevice *vdev = quirk->vdev;
+ VFIONvidia3d0State old_state = quirk->state;
+
+ quirk->state = NONE;
+
+ switch (data) {
+ case 0x338:
+ if (old_state == NONE) {
+ quirk->state = SELECT;
+ trace_vfio_quirk_nvidia_3d0_state(vdev->vbasedev.name,
+ nv3d0_states[quirk->state]);
+ }
+ break;
+ case 0x538:
+ if (old_state == WINDOW) {
+ quirk->state = READ;
+ trace_vfio_quirk_nvidia_3d0_state(vdev->vbasedev.name,
+ nv3d0_states[quirk->state]);
+ }
+ break;
+ case 0x738:
+ if (old_state == WINDOW) {
+ quirk->state = WRITE;
+ trace_vfio_quirk_nvidia_3d0_state(vdev->vbasedev.name,
+ nv3d0_states[quirk->state]);
+ }
+ break;
+ }
+
+ vfio_vga_write(&vdev->vga.region[QEMU_PCI_VGA_IO_HI],
+ addr + 0x14, data, size);
+}
+
+static const MemoryRegionOps vfio_nvidia_3d4_quirk = {
+ .read = vfio_nvidia_3d4_quirk_read,
+ .write = vfio_nvidia_3d4_quirk_write,
+ .endianness = DEVICE_LITTLE_ENDIAN,
+};
+
+static uint64_t vfio_nvidia_3d0_quirk_read(void *opaque,
+ hwaddr addr, unsigned size)
+{
+ VFIONvidia3d0Quirk *quirk = opaque;
+ VFIOPCIDevice *vdev = quirk->vdev;
+ VFIONvidia3d0State old_state = quirk->state;
+ uint64_t data = vfio_vga_read(&vdev->vga.region[QEMU_PCI_VGA_IO_HI],
+ addr + 0x10, size);
+
+ quirk->state = NONE;
+
+ if (old_state == READ &&
+ (quirk->offset & ~(PCI_CONFIG_SPACE_SIZE - 1)) == 0x1800) {
+ uint8_t offset = quirk->offset & (PCI_CONFIG_SPACE_SIZE - 1);
+
+ data = vfio_pci_read_config(&vdev->pdev, offset, size);
+ trace_vfio_quirk_nvidia_3d0_read(vdev->vbasedev.name,
+ offset, size, data);
+ }
+
+ return data;
+}
+
+static void vfio_nvidia_3d0_quirk_write(void *opaque, hwaddr addr,
+ uint64_t data, unsigned size)
+{
+ VFIONvidia3d0Quirk *quirk = opaque;
+ VFIOPCIDevice *vdev = quirk->vdev;
+ VFIONvidia3d0State old_state = quirk->state;
+
+ quirk->state = NONE;
+
+ if (old_state == SELECT) {
+ quirk->offset = (uint32_t)data;
+ quirk->state = WINDOW;
+ trace_vfio_quirk_nvidia_3d0_state(vdev->vbasedev.name,
+ nv3d0_states[quirk->state]);
+ } else if (old_state == WRITE) {
+ if ((quirk->offset & ~(PCI_CONFIG_SPACE_SIZE - 1)) == 0x1800) {
+ uint8_t offset = quirk->offset & (PCI_CONFIG_SPACE_SIZE - 1);
+
+ vfio_pci_write_config(&vdev->pdev, offset, data, size);
+ trace_vfio_quirk_nvidia_3d0_write(vdev->vbasedev.name,
+ offset, data, size);
+ return;
+ }
+ }
+
+ vfio_vga_write(&vdev->vga.region[QEMU_PCI_VGA_IO_HI],
+ addr + 0x10, data, size);
+}
+
+static const MemoryRegionOps vfio_nvidia_3d0_quirk = {
+ .read = vfio_nvidia_3d0_quirk_read,
+ .write = vfio_nvidia_3d0_quirk_write,
+ .endianness = DEVICE_LITTLE_ENDIAN,
+};
+
+static void vfio_vga_probe_nvidia_3d0_quirk(VFIOPCIDevice *vdev)
+{
+ VFIOQuirk *quirk;
+ VFIONvidia3d0Quirk *data;
+
+ if (!vfio_pci_is(vdev, PCI_VENDOR_ID_NVIDIA, PCI_ANY_ID) ||
+ !vdev->bars[1].region.size) {
+ return;
+ }
+
+ quirk = g_malloc0(sizeof(*quirk));
+ quirk->data = data = g_malloc0(sizeof(*data));
+ quirk->mem = g_malloc0_n(sizeof(MemoryRegion), 2);
+ quirk->nr_mem = 2;
+ data->vdev = vdev;
+
+ memory_region_init_io(&quirk->mem[0], OBJECT(vdev), &vfio_nvidia_3d4_quirk,
+ data, "vfio-nvidia-3d4-quirk", 2);
+ memory_region_add_subregion(&vdev->vga.region[QEMU_PCI_VGA_IO_HI].mem,
+ 0x14 /* 0x3c0 + 0x14 */, &quirk->mem[0]);
+
+ memory_region_init_io(&quirk->mem[1], OBJECT(vdev), &vfio_nvidia_3d0_quirk,
+ data, "vfio-nvidia-3d0-quirk", 2);
+ memory_region_add_subregion(&vdev->vga.region[QEMU_PCI_VGA_IO_HI].mem,
+ 0x10 /* 0x3c0 + 0x10 */, &quirk->mem[1]);
+
+ QLIST_INSERT_HEAD(&vdev->vga.region[QEMU_PCI_VGA_IO_HI].quirks,
+ quirk, next);
+
+ trace_vfio_quirk_nvidia_3d0_probe(vdev->vbasedev.name);
+}
+
+/*
+ * The second quirk is documented in envytools. The I/O port BAR5 is just
+ * a set of address/data ports to the MMIO BARs. The BAR we care about is
+ * again BAR0. This backdoor is apparently a bit newer than the one above
+ * so we need to not only trap 256 bytes @0x1800, but all of PCI config
+ * space, including extended space is available at the 4k @0x88000.
+ */
+typedef struct VFIONvidiaBAR5Quirk {
+ uint32_t master;
+ uint32_t enable;
+ MemoryRegion *addr_mem;
+ MemoryRegion *data_mem;
+ bool enabled;
+ VFIOConfigWindowQuirk window; /* last for match data */
+} VFIONvidiaBAR5Quirk;
+
+static void vfio_nvidia_bar5_enable(VFIONvidiaBAR5Quirk *bar5)
+{
+ VFIOPCIDevice *vdev = bar5->window.vdev;
+
+ if (((bar5->master & bar5->enable) & 0x1) == bar5->enabled) {
+ return;
+ }
+
+ bar5->enabled = !bar5->enabled;
+ trace_vfio_quirk_nvidia_bar5_state(vdev->vbasedev.name,
+ bar5->enabled ? "Enable" : "Disable");
+ memory_region_set_enabled(bar5->addr_mem, bar5->enabled);
+ memory_region_set_enabled(bar5->data_mem, bar5->enabled);
+}
+
+static uint64_t vfio_nvidia_bar5_quirk_master_read(void *opaque,
+ hwaddr addr, unsigned size)
+{
+ VFIONvidiaBAR5Quirk *bar5 = opaque;
+ VFIOPCIDevice *vdev = bar5->window.vdev;
+
+ return vfio_region_read(&vdev->bars[5].region, addr, size);
+}
+
+static void vfio_nvidia_bar5_quirk_master_write(void *opaque, hwaddr addr,
+ uint64_t data, unsigned size)
+{
+ VFIONvidiaBAR5Quirk *bar5 = opaque;
+ VFIOPCIDevice *vdev = bar5->window.vdev;
+
+ vfio_region_write(&vdev->bars[5].region, addr, data, size);
+
+ bar5->master = data;
+ vfio_nvidia_bar5_enable(bar5);
+}
+
+static const MemoryRegionOps vfio_nvidia_bar5_quirk_master = {
+ .read = vfio_nvidia_bar5_quirk_master_read,
+ .write = vfio_nvidia_bar5_quirk_master_write,
+ .endianness = DEVICE_LITTLE_ENDIAN,
+};
+
+static uint64_t vfio_nvidia_bar5_quirk_enable_read(void *opaque,
+ hwaddr addr, unsigned size)
+{
+ VFIONvidiaBAR5Quirk *bar5 = opaque;
+ VFIOPCIDevice *vdev = bar5->window.vdev;
+
+ return vfio_region_read(&vdev->bars[5].region, addr + 4, size);
+}
+
+static void vfio_nvidia_bar5_quirk_enable_write(void *opaque, hwaddr addr,
+ uint64_t data, unsigned size)
+{
+ VFIONvidiaBAR5Quirk *bar5 = opaque;
+ VFIOPCIDevice *vdev = bar5->window.vdev;
+
+ vfio_region_write(&vdev->bars[5].region, addr + 4, data, size);
+
+ bar5->enable = data;
+ vfio_nvidia_bar5_enable(bar5);
+}
+
+static const MemoryRegionOps vfio_nvidia_bar5_quirk_enable = {
+ .read = vfio_nvidia_bar5_quirk_enable_read,
+ .write = vfio_nvidia_bar5_quirk_enable_write,
+ .endianness = DEVICE_LITTLE_ENDIAN,
+};
+
+static void vfio_probe_nvidia_bar5_quirk(VFIOPCIDevice *vdev, int nr)
+{
+ VFIOQuirk *quirk;
+ VFIONvidiaBAR5Quirk *bar5;
+ VFIOConfigWindowQuirk *window;
+
+ if (!vfio_pci_is(vdev, PCI_VENDOR_ID_NVIDIA, PCI_ANY_ID) ||
+ !vdev->has_vga || nr != 5) {
+ return;
+ }
+
+ quirk = g_malloc0(sizeof(*quirk));
+ quirk->mem = g_malloc0_n(sizeof(MemoryRegion), 4);
+ quirk->nr_mem = 4;
+ bar5 = quirk->data = g_malloc0(sizeof(*bar5) +
+ (sizeof(VFIOConfigWindowMatch) * 2));
+ window = &bar5->window;
+
+ window->vdev = vdev;
+ window->address_offset = 0x8;
+ window->data_offset = 0xc;
+ window->nr_matches = 2;
+ window->matches[0].match = 0x1800;
+ window->matches[0].mask = PCI_CONFIG_SPACE_SIZE - 1;
+ window->matches[1].match = 0x88000;
+ window->matches[1].mask = PCIE_CONFIG_SPACE_SIZE - 1;
+ window->bar = nr;
+ window->addr_mem = bar5->addr_mem = &quirk->mem[0];
+ window->data_mem = bar5->data_mem = &quirk->mem[1];
+
+ memory_region_init_io(window->addr_mem, OBJECT(vdev),
+ &vfio_generic_window_address_quirk, window,
+ "vfio-nvidia-bar5-window-address-quirk", 4);
+ memory_region_add_subregion_overlap(&vdev->bars[nr].region.mem,
+ window->address_offset,
+ window->addr_mem, 1);
+ memory_region_set_enabled(window->addr_mem, false);
+
+ memory_region_init_io(window->data_mem, OBJECT(vdev),
+ &vfio_generic_window_data_quirk, window,
+ "vfio-nvidia-bar5-window-data-quirk", 4);
+ memory_region_add_subregion_overlap(&vdev->bars[nr].region.mem,
+ window->data_offset,
+ window->data_mem, 1);
+ memory_region_set_enabled(window->data_mem, false);
+
+ memory_region_init_io(&quirk->mem[2], OBJECT(vdev),
+ &vfio_nvidia_bar5_quirk_master, bar5,
+ "vfio-nvidia-bar5-master-quirk", 4);
+ memory_region_add_subregion_overlap(&vdev->bars[nr].region.mem,
+ 0, &quirk->mem[2], 1);
+
+ memory_region_init_io(&quirk->mem[3], OBJECT(vdev),
+ &vfio_nvidia_bar5_quirk_enable, bar5,
+ "vfio-nvidia-bar5-enable-quirk", 4);
+ memory_region_add_subregion_overlap(&vdev->bars[nr].region.mem,
+ 4, &quirk->mem[3], 1);
+
+ QLIST_INSERT_HEAD(&vdev->bars[nr].quirks, quirk, next);
+
+ trace_vfio_quirk_nvidia_bar5_probe(vdev->vbasedev.name);
+}
+
+/*
+ * Finally, BAR0 itself. We want to redirect any accesses to either
+ * 0x1800 or 0x88000 through the PCI config space access functions.
+ */
+static void vfio_nvidia_quirk_mirror_write(void *opaque, hwaddr addr,
+ uint64_t data, unsigned size)
+{
+ VFIOConfigMirrorQuirk *mirror = opaque;
+ VFIOPCIDevice *vdev = mirror->vdev;
+ PCIDevice *pdev = &vdev->pdev;
+
+ vfio_generic_quirk_mirror_write(opaque, addr, data, size);
+
+ /*
+ * Nvidia seems to acknowledge MSI interrupts by writing 0xff to the
+ * MSI capability ID register. Both the ID and next register are
+ * read-only, so we allow writes covering either of those to real hw.
+ */
+ if ((pdev->cap_present & QEMU_PCI_CAP_MSI) &&
+ vfio_range_contained(addr, size, pdev->msi_cap, PCI_MSI_FLAGS)) {
+ vfio_region_write(&vdev->bars[mirror->bar].region,
+ addr + mirror->offset, data, size);
+ trace_vfio_quirk_nvidia_bar0_msi_ack(vdev->vbasedev.name);
+ }
+}
+
+static const MemoryRegionOps vfio_nvidia_mirror_quirk = {
+ .read = vfio_generic_quirk_mirror_read,
+ .write = vfio_nvidia_quirk_mirror_write,
+ .endianness = DEVICE_LITTLE_ENDIAN,
+};
+
+static void vfio_probe_nvidia_bar0_quirk(VFIOPCIDevice *vdev, int nr)
+{
+ VFIOQuirk *quirk;
+ VFIOConfigMirrorQuirk *mirror;
+
+ if (!vfio_pci_is(vdev, PCI_VENDOR_ID_NVIDIA, PCI_ANY_ID) ||
+ !vfio_is_vga(vdev) || nr != 0) {
+ return;
+ }
+
+ quirk = g_malloc0(sizeof(*quirk));
+ mirror = quirk->data = g_malloc0(sizeof(*mirror));
+ mirror->mem = quirk->mem = g_malloc0_n(sizeof(MemoryRegion), 1);
+ quirk->nr_mem = 1;
+ mirror->vdev = vdev;
+ mirror->offset = 0x88000;
+ mirror->bar = nr;
+
+ memory_region_init_io(mirror->mem, OBJECT(vdev),
+ &vfio_nvidia_mirror_quirk, mirror,
+ "vfio-nvidia-bar0-88000-mirror-quirk",
+ PCIE_CONFIG_SPACE_SIZE);
+ memory_region_add_subregion_overlap(&vdev->bars[nr].region.mem,
+ mirror->offset, mirror->mem, 1);
+
+ QLIST_INSERT_HEAD(&vdev->bars[nr].quirks, quirk, next);
+
+ /* The 0x1800 offset mirror only seems to get used by legacy VGA */
+ if (vdev->has_vga) {
+ quirk = g_malloc0(sizeof(*quirk));
+ mirror = quirk->data = g_malloc0(sizeof(*mirror));
+ mirror->mem = quirk->mem = g_malloc0_n(sizeof(MemoryRegion), 1);
+ quirk->nr_mem = 1;
+ mirror->vdev = vdev;
+ mirror->offset = 0x1800;
+ mirror->bar = nr;
+
+ memory_region_init_io(mirror->mem, OBJECT(vdev),
+ &vfio_nvidia_mirror_quirk, mirror,
+ "vfio-nvidia-bar0-1800-mirror-quirk",
+ PCI_CONFIG_SPACE_SIZE);
+ memory_region_add_subregion_overlap(&vdev->bars[nr].region.mem,
+ mirror->offset, mirror->mem, 1);
+
+ QLIST_INSERT_HEAD(&vdev->bars[nr].quirks, quirk, next);
+ }
+
+ trace_vfio_quirk_nvidia_bar0_probe(vdev->vbasedev.name);
+}
+
+/*
+ * TODO - Some Nvidia devices provide config access to their companion HDA
+ * device and even to their parent bridge via these config space mirrors.
+ * Add quirks for those regions.
+ */
+
+#define PCI_VENDOR_ID_REALTEK 0x10ec
+
+/*
+ * RTL8168 devices have a backdoor that can access the MSI-X table. At BAR2
+ * offset 0x70 there is a dword data register, offset 0x74 is a dword address
+ * register. According to the Linux r8169 driver, the MSI-X table is addressed
+ * when the "type" portion of the address register is set to 0x1. This appears
+ * to be bits 16:30. Bit 31 is both a write indicator and some sort of
+ * "address latched" indicator. Bits 12:15 are a mask field, which we can
+ * ignore because the MSI-X table should always be accessed as a dword (full
+ * mask). Bits 0:11 is offset within the type.
+ *
+ * Example trace:
+ *
+ * Read from MSI-X table offset 0
+ * vfio: vfio_bar_write(0000:05:00.0:BAR2+0x74, 0x1f000, 4) // store read addr
+ * vfio: vfio_bar_read(0000:05:00.0:BAR2+0x74, 4) = 0x8001f000 // latch
+ * vfio: vfio_bar_read(0000:05:00.0:BAR2+0x70, 4) = 0xfee00398 // read data
+ *
+ * Write 0xfee00000 to MSI-X table offset 0
+ * vfio: vfio_bar_write(0000:05:00.0:BAR2+0x70, 0xfee00000, 4) // write data
+ * vfio: vfio_bar_write(0000:05:00.0:BAR2+0x74, 0x8001f000, 4) // do write
+ * vfio: vfio_bar_read(0000:05:00.0:BAR2+0x74, 4) = 0x1f000 // complete
+ */
+typedef struct VFIOrtl8168Quirk {
+ VFIOPCIDevice *vdev;
+ uint32_t addr;
+ uint32_t data;
+ bool enabled;
+} VFIOrtl8168Quirk;
+
+static uint64_t vfio_rtl8168_quirk_address_read(void *opaque,
+ hwaddr addr, unsigned size)
+{
+ VFIOrtl8168Quirk *rtl = opaque;
+ VFIOPCIDevice *vdev = rtl->vdev;
+ uint64_t data = vfio_region_read(&vdev->bars[2].region, addr + 0x74, size);
+
+ if (rtl->enabled) {
+ data = rtl->addr ^ 0x80000000U; /* latch/complete */
+ trace_vfio_quirk_rtl8168_fake_latch(vdev->vbasedev.name, data);
+ }
+
+ return data;
+}
+
+static void vfio_rtl8168_quirk_address_write(void *opaque, hwaddr addr,
+ uint64_t data, unsigned size)
+{
+ VFIOrtl8168Quirk *rtl = opaque;
+ VFIOPCIDevice *vdev = rtl->vdev;
+
+ rtl->enabled = false;
+
+ if ((data & 0x7fff0000) == 0x10000) { /* MSI-X table */
+ rtl->enabled = true;
+ rtl->addr = (uint32_t)data;
+
+ if (data & 0x80000000U) { /* Do write */
+ if (vdev->pdev.cap_present & QEMU_PCI_CAP_MSIX) {
+ hwaddr offset = data & 0xfff;
+ uint64_t val = rtl->data;
+
+ trace_vfio_quirk_rtl8168_msix_write(vdev->vbasedev.name,
+ (uint16_t)offset, val);
+
+ /* Write to the proper guest MSI-X table instead */
+ memory_region_dispatch_write(&vdev->pdev.msix_table_mmio,
+ offset, val, size,
+ MEMTXATTRS_UNSPECIFIED);
+ }
+ return; /* Do not write guest MSI-X data to hardware */
+ }
+ }
+
+ vfio_region_write(&vdev->bars[2].region, addr + 0x74, data, size);
+}
+
+static const MemoryRegionOps vfio_rtl_address_quirk = {
+ .read = vfio_rtl8168_quirk_address_read,
+ .write = vfio_rtl8168_quirk_address_write,
+ .valid = {
+ .min_access_size = 4,
+ .max_access_size = 4,
+ .unaligned = false,
+ },
+ .endianness = DEVICE_LITTLE_ENDIAN,
+};
+
+static uint64_t vfio_rtl8168_quirk_data_read(void *opaque,
+ hwaddr addr, unsigned size)
+{
+ VFIOrtl8168Quirk *rtl = opaque;
+ VFIOPCIDevice *vdev = rtl->vdev;
+ uint64_t data = vfio_region_read(&vdev->bars[2].region, addr + 0x74, size);
+
+ if (rtl->enabled && (vdev->pdev.cap_present & QEMU_PCI_CAP_MSIX)) {
+ hwaddr offset = rtl->addr & 0xfff;
+ memory_region_dispatch_read(&vdev->pdev.msix_table_mmio, offset,
+ &data, size, MEMTXATTRS_UNSPECIFIED);
+ trace_vfio_quirk_rtl8168_msix_read(vdev->vbasedev.name, offset, data);
+ }
+
+ return data;
+}
+
+static void vfio_rtl8168_quirk_data_write(void *opaque, hwaddr addr,
+ uint64_t data, unsigned size)
+{
+ VFIOrtl8168Quirk *rtl = opaque;
+ VFIOPCIDevice *vdev = rtl->vdev;
+
+ rtl->data = (uint32_t)data;
+
+ vfio_region_write(&vdev->bars[2].region, addr + 0x70, data, size);
+}
+
+static const MemoryRegionOps vfio_rtl_data_quirk = {
+ .read = vfio_rtl8168_quirk_data_read,
+ .write = vfio_rtl8168_quirk_data_write,
+ .valid = {
+ .min_access_size = 4,
+ .max_access_size = 4,
+ .unaligned = false,
+ },
+ .endianness = DEVICE_LITTLE_ENDIAN,
+};
+
+static void vfio_probe_rtl8168_bar2_quirk(VFIOPCIDevice *vdev, int nr)
+{
+ VFIOQuirk *quirk;
+ VFIOrtl8168Quirk *rtl;
+
+ if (!vfio_pci_is(vdev, PCI_VENDOR_ID_REALTEK, 0x8168) || nr != 2) {
+ return;
+ }
+
+ quirk = g_malloc0(sizeof(*quirk));
+ quirk->mem = g_malloc0_n(sizeof(MemoryRegion), 2);
+ quirk->nr_mem = 2;
+ quirk->data = rtl = g_malloc0(sizeof(*rtl));
+ rtl->vdev = vdev;
+
+ memory_region_init_io(&quirk->mem[0], OBJECT(vdev),
+ &vfio_rtl_address_quirk, rtl,
+ "vfio-rtl8168-window-address-quirk", 4);
+ memory_region_add_subregion_overlap(&vdev->bars[nr].region.mem,
+ 0x74, &quirk->mem[0], 1);
+
+ memory_region_init_io(&quirk->mem[1], OBJECT(vdev),
+ &vfio_rtl_data_quirk, rtl,
+ "vfio-rtl8168-window-data-quirk", 4);
+ memory_region_add_subregion_overlap(&vdev->bars[nr].region.mem,
+ 0x70, &quirk->mem[1], 1);
+
+ QLIST_INSERT_HEAD(&vdev->bars[nr].quirks, quirk, next);
+
+ trace_vfio_quirk_rtl8168_probe(vdev->vbasedev.name);
+}
+
+/*
+ * Common quirk probe entry points.
+ */
+void vfio_vga_quirk_setup(VFIOPCIDevice *vdev)
+{
+ vfio_vga_probe_ati_3c3_quirk(vdev);
+ vfio_vga_probe_nvidia_3d0_quirk(vdev);
+}
+
+void vfio_vga_quirk_teardown(VFIOPCIDevice *vdev)
+{
+ VFIOQuirk *quirk;
+ int i, j;
+
+ for (i = 0; i < ARRAY_SIZE(vdev->vga.region); i++) {
+ QLIST_FOREACH(quirk, &vdev->vga.region[i].quirks, next) {
+ for (j = 0; j < quirk->nr_mem; j++) {
+ memory_region_del_subregion(&vdev->vga.region[i].mem,
+ &quirk->mem[j]);
+ }
+ }
+ }
+}
+
+void vfio_vga_quirk_free(VFIOPCIDevice *vdev)
+{
+ int i, j;
+
+ for (i = 0; i < ARRAY_SIZE(vdev->vga.region); i++) {
+ while (!QLIST_EMPTY(&vdev->vga.region[i].quirks)) {
+ VFIOQuirk *quirk = QLIST_FIRST(&vdev->vga.region[i].quirks);
+ QLIST_REMOVE(quirk, next);
+ for (j = 0; j < quirk->nr_mem; j++) {
+ object_unparent(OBJECT(&quirk->mem[j]));
+ }
+ g_free(quirk->mem);
+ g_free(quirk->data);
+ g_free(quirk);
+ }
+ }
+}
+
+void vfio_bar_quirk_setup(VFIOPCIDevice *vdev, int nr)
+{
+ vfio_probe_ati_bar4_quirk(vdev, nr);
+ vfio_probe_ati_bar2_quirk(vdev, nr);
+ vfio_probe_nvidia_bar5_quirk(vdev, nr);
+ vfio_probe_nvidia_bar0_quirk(vdev, nr);
+ vfio_probe_rtl8168_bar2_quirk(vdev, nr);
+}
+
+void vfio_bar_quirk_teardown(VFIOPCIDevice *vdev, int nr)
+{
+ VFIOBAR *bar = &vdev->bars[nr];
+ VFIOQuirk *quirk;
+ int i;
+
+ QLIST_FOREACH(quirk, &bar->quirks, next) {
+ for (i = 0; i < quirk->nr_mem; i++) {
+ memory_region_del_subregion(&bar->region.mem, &quirk->mem[i]);
+ }
+ }
+}
+
+void vfio_bar_quirk_free(VFIOPCIDevice *vdev, int nr)
+{
+ VFIOBAR *bar = &vdev->bars[nr];
+ int i;
+
+ while (!QLIST_EMPTY(&bar->quirks)) {
+ VFIOQuirk *quirk = QLIST_FIRST(&bar->quirks);
+ QLIST_REMOVE(quirk, next);
+ for (i = 0; i < quirk->nr_mem; i++) {
+ object_unparent(OBJECT(&quirk->mem[i]));
+ }
+ g_free(quirk->mem);
+ g_free(quirk->data);
+ g_free(quirk);
+ }
+}
+
+/*
+ * Reset quirks
+ */
+
+/*
+ * AMD Radeon PCI config reset, based on Linux:
+ * drivers/gpu/drm/radeon/ci_smc.c:ci_is_smc_running()
+ * drivers/gpu/drm/radeon/radeon_device.c:radeon_pci_config_reset
+ * drivers/gpu/drm/radeon/ci_smc.c:ci_reset_smc()
+ * drivers/gpu/drm/radeon/ci_smc.c:ci_stop_smc_clock()
+ * IDs: include/drm/drm_pciids.h
+ * Registers: http://cgit.freedesktop.org/~agd5f/linux/commit/?id=4e2aa447f6f0
+ *
+ * Bonaire and Hawaii GPUs do not respond to a bus reset. This is a bug in the
+ * hardware that should be fixed on future ASICs. The symptom of this is that
+ * once the accerlated driver loads, Windows guests will bsod on subsequent
+ * attmpts to load the driver, such as after VM reset or shutdown/restart. To
+ * work around this, we do an AMD specific PCI config reset, followed by an SMC
+ * reset. The PCI config reset only works if SMC firmware is running, so we
+ * have a dependency on the state of the device as to whether this reset will
+ * be effective. There are still cases where we won't be able to kick the
+ * device into working, but this greatly improves the usability overall. The
+ * config reset magic is relatively common on AMD GPUs, but the setup and SMC
+ * poking is largely ASIC specific.
+ */
+static bool vfio_radeon_smc_is_running(VFIOPCIDevice *vdev)
+{
+ uint32_t clk, pc_c;
+
+ /*
+ * Registers 200h and 204h are index and data registers for accessing
+ * indirect configuration registers within the device.
+ */
+ vfio_region_write(&vdev->bars[5].region, 0x200, 0x80000004, 4);
+ clk = vfio_region_read(&vdev->bars[5].region, 0x204, 4);
+ vfio_region_write(&vdev->bars[5].region, 0x200, 0x80000370, 4);
+ pc_c = vfio_region_read(&vdev->bars[5].region, 0x204, 4);
+
+ return (!(clk & 1) && (0x20100 <= pc_c));
+}
+
+/*
+ * The scope of a config reset is controlled by a mode bit in the misc register
+ * and a fuse, exposed as a bit in another register. The fuse is the default
+ * (0 = GFX, 1 = whole GPU), the misc bit is a toggle, with the forumula
+ * scope = !(misc ^ fuse), where the resulting scope is defined the same as
+ * the fuse. A truth table therefore tells us that if misc == fuse, we need
+ * to flip the value of the bit in the misc register.
+ */
+static void vfio_radeon_set_gfx_only_reset(VFIOPCIDevice *vdev)
+{
+ uint32_t misc, fuse;
+ bool a, b;
+
+ vfio_region_write(&vdev->bars[5].region, 0x200, 0xc00c0000, 4);
+ fuse = vfio_region_read(&vdev->bars[5].region, 0x204, 4);
+ b = fuse & 64;
+
+ vfio_region_write(&vdev->bars[5].region, 0x200, 0xc0000010, 4);
+ misc = vfio_region_read(&vdev->bars[5].region, 0x204, 4);
+ a = misc & 2;
+
+ if (a == b) {
+ vfio_region_write(&vdev->bars[5].region, 0x204, misc ^ 2, 4);
+ vfio_region_read(&vdev->bars[5].region, 0x204, 4); /* flush */
+ }
+}
+
+static int vfio_radeon_reset(VFIOPCIDevice *vdev)
+{
+ PCIDevice *pdev = &vdev->pdev;
+ int i, ret = 0;
+ uint32_t data;
+
+ /* Defer to a kernel implemented reset */
+ if (vdev->vbasedev.reset_works) {
+ trace_vfio_quirk_ati_bonaire_reset_skipped(vdev->vbasedev.name);
+ return -ENODEV;
+ }
+
+ /* Enable only memory BAR access */
+ vfio_pci_write_config(pdev, PCI_COMMAND, PCI_COMMAND_MEMORY, 2);
+
+ /* Reset only works if SMC firmware is loaded and running */
+ if (!vfio_radeon_smc_is_running(vdev)) {
+ ret = -EINVAL;
+ trace_vfio_quirk_ati_bonaire_reset_no_smc(vdev->vbasedev.name);
+ goto out;
+ }
+
+ /* Make sure only the GFX function is reset */
+ vfio_radeon_set_gfx_only_reset(vdev);
+
+ /* AMD PCI config reset */
+ vfio_pci_write_config(pdev, 0x7c, 0x39d5e86b, 4);
+ usleep(100);
+
+ /* Read back the memory size to make sure we're out of reset */
+ for (i = 0; i < 100000; i++) {
+ if (vfio_region_read(&vdev->bars[5].region, 0x5428, 4) != 0xffffffff) {
+ goto reset_smc;
+ }
+ usleep(1);
+ }
+
+ trace_vfio_quirk_ati_bonaire_reset_timeout(vdev->vbasedev.name);
+
+reset_smc:
+ /* Reset SMC */
+ vfio_region_write(&vdev->bars[5].region, 0x200, 0x80000000, 4);
+ data = vfio_region_read(&vdev->bars[5].region, 0x204, 4);
+ data |= 1;
+ vfio_region_write(&vdev->bars[5].region, 0x204, data, 4);
+
+ /* Disable SMC clock */
+ vfio_region_write(&vdev->bars[5].region, 0x200, 0x80000004, 4);
+ data = vfio_region_read(&vdev->bars[5].region, 0x204, 4);
+ data |= 1;
+ vfio_region_write(&vdev->bars[5].region, 0x204, data, 4);
+
+ trace_vfio_quirk_ati_bonaire_reset_done(vdev->vbasedev.name);
+
+out:
+ /* Restore PCI command register */
+ vfio_pci_write_config(pdev, PCI_COMMAND, 0, 2);
+
+ return ret;
+}
+
+void vfio_setup_resetfn_quirk(VFIOPCIDevice *vdev)
+{
+ switch (vdev->vendor_id) {
+ case 0x1002:
+ switch (vdev->device_id) {
+ /* Bonaire */
+ case 0x6649: /* Bonaire [FirePro W5100] */
+ case 0x6650:
+ case 0x6651:
+ case 0x6658: /* Bonaire XTX [Radeon R7 260X] */
+ case 0x665c: /* Bonaire XT [Radeon HD 7790/8770 / R9 260 OEM] */
+ case 0x665d: /* Bonaire [Radeon R7 200 Series] */
+ /* Hawaii */
+ case 0x67A0: /* Hawaii XT GL [FirePro W9100] */
+ case 0x67A1: /* Hawaii PRO GL [FirePro W8100] */
+ case 0x67A2:
+ case 0x67A8:
+ case 0x67A9:
+ case 0x67AA:
+ case 0x67B0: /* Hawaii XT [Radeon R9 290X] */
+ case 0x67B1: /* Hawaii PRO [Radeon R9 290] */
+ case 0x67B8:
+ case 0x67B9:
+ case 0x67BA:
+ case 0x67BE:
+ vdev->resetfn = vfio_radeon_reset;
+ trace_vfio_quirk_ati_bonaire_reset(vdev->vbasedev.name);
+ break;
+ }
+ break;
+ }
+}
diff --git a/hw/vfio/pci.c b/hw/vfio/pci.c
index 73d34b9..dcabb6d 100644
--- a/hw/vfio/pci.c
+++ b/hw/vfio/pci.c
@@ -26,178 +26,18 @@
#include <unistd.h>
#include "config.h"
-#include "exec/address-spaces.h"
-#include "exec/memory.h"
#include "hw/pci/msi.h"
#include "hw/pci/msix.h"
-#include "hw/pci/pci.h"
-#include "qemu-common.h"
#include "qemu/error-report.h"
-#include "qemu/event_notifier.h"
-#include "qemu/queue.h"
#include "qemu/range.h"
#include "sysemu/kvm.h"
#include "sysemu/sysemu.h"
+#include "pci.h"
#include "trace.h"
-#include "hw/vfio/vfio.h"
-#include "hw/vfio/vfio-common.h"
-
-struct VFIOPCIDevice;
-
-typedef struct VFIOQuirk {
- MemoryRegion mem;
- struct VFIOPCIDevice *vdev;
- QLIST_ENTRY(VFIOQuirk) next;
- struct {
- uint32_t base_offset:TARGET_PAGE_BITS;
- uint32_t address_offset:TARGET_PAGE_BITS;
- uint32_t address_size:3;
- uint32_t bar:3;
-
- uint32_t address_match;
- uint32_t address_mask;
-
- uint32_t address_val:TARGET_PAGE_BITS;
- uint32_t data_offset:TARGET_PAGE_BITS;
- uint32_t data_size:3;
-
- uint8_t flags;
- uint8_t read_flags;
- uint8_t write_flags;
- } data;
-} VFIOQuirk;
-
-typedef struct VFIOBAR {
- VFIORegion region;
- bool ioport;
- bool mem64;
- QLIST_HEAD(, VFIOQuirk) quirks;
-} VFIOBAR;
-
-typedef struct VFIOVGARegion {
- MemoryRegion mem;
- off_t offset;
- int nr;
- QLIST_HEAD(, VFIOQuirk) quirks;
-} VFIOVGARegion;
-
-typedef struct VFIOVGA {
- off_t fd_offset;
- int fd;
- VFIOVGARegion region[QEMU_PCI_VGA_NUM_REGIONS];
-} VFIOVGA;
-
-typedef struct VFIOINTx {
- bool pending; /* interrupt pending */
- bool kvm_accel; /* set when QEMU bypass through KVM enabled */
- uint8_t pin; /* which pin to pull for qemu_set_irq */
- EventNotifier interrupt; /* eventfd triggered on interrupt */
- EventNotifier unmask; /* eventfd for unmask on QEMU bypass */
- PCIINTxRoute route; /* routing info for QEMU bypass */
- uint32_t mmap_timeout; /* delay to re-enable mmaps after interrupt */
- QEMUTimer *mmap_timer; /* enable mmaps after periods w/o interrupts */
-} VFIOINTx;
-
-typedef struct VFIOMSIVector {
- /*
- * Two interrupt paths are configured per vector. The first, is only used
- * for interrupts injected via QEMU. This is typically the non-accel path,
- * but may also be used when we want QEMU to handle masking and pending
- * bits. The KVM path bypasses QEMU and is therefore higher performance,
- * but requires masking at the device. virq is used to track the MSI route
- * through KVM, thus kvm_interrupt is only available when virq is set to a
- * valid (>= 0) value.
- */
- EventNotifier interrupt;
- EventNotifier kvm_interrupt;
- struct VFIOPCIDevice *vdev; /* back pointer to device */
- int virq;
- bool use;
-} VFIOMSIVector;
-
-enum {
- VFIO_INT_NONE = 0,
- VFIO_INT_INTx = 1,
- VFIO_INT_MSI = 2,
- VFIO_INT_MSIX = 3,
-};
-
-/* Cache of MSI-X setup plus extra mmap and memory region for split BAR map */
-typedef struct VFIOMSIXInfo {
- uint8_t table_bar;
- uint8_t pba_bar;
- uint16_t entries;
- uint32_t table_offset;
- uint32_t pba_offset;
- MemoryRegion mmap_mem;
- void *mmap;
-} VFIOMSIXInfo;
-
-typedef struct VFIOPCIDevice {
- PCIDevice pdev;
- VFIODevice vbasedev;
- VFIOINTx intx;
- unsigned int config_size;
- uint8_t *emulated_config_bits; /* QEMU emulated bits, little-endian */
- off_t config_offset; /* Offset of config space region within device fd */
- unsigned int rom_size;
- off_t rom_offset; /* Offset of ROM region within device fd */
- void *rom;
- int msi_cap_size;
- VFIOMSIVector *msi_vectors;
- VFIOMSIXInfo *msix;
- int nr_vectors; /* Number of MSI/MSIX vectors currently in use */
- int interrupt; /* Current interrupt type */
- VFIOBAR bars[PCI_NUM_REGIONS - 1]; /* No ROM */
- VFIOVGA vga; /* 0xa0000, 0x3b0, 0x3c0 */
- PCIHostDeviceAddress host;
- EventNotifier err_notifier;
- EventNotifier req_notifier;
- int (*resetfn)(struct VFIOPCIDevice *);
- uint32_t features;
-#define VFIO_FEATURE_ENABLE_VGA_BIT 0
-#define VFIO_FEATURE_ENABLE_VGA (1 << VFIO_FEATURE_ENABLE_VGA_BIT)
-#define VFIO_FEATURE_ENABLE_REQ_BIT 1
-#define VFIO_FEATURE_ENABLE_REQ (1 << VFIO_FEATURE_ENABLE_REQ_BIT)
- int32_t bootindex;
- uint8_t pm_cap;
- bool has_vga;
- bool pci_aer;
- bool req_enabled;
- bool has_flr;
- bool has_pm_reset;
- bool rom_read_failed;
-} VFIOPCIDevice;
-
-typedef struct VFIORomBlacklistEntry {
- uint16_t vendor_id;
- uint16_t device_id;
-} VFIORomBlacklistEntry;
-
-/*
- * List of device ids/vendor ids for which to disable
- * option rom loading. This avoids the guest hangs during rom
- * execution as noticed with the BCM 57810 card for lack of a
- * more better way to handle such issues.
- * The user can still override by specifying a romfile or
- * rombar=1.
- * Please see https://bugs.launchpad.net/qemu/+bug/1284874
- * for an analysis of the 57810 card hang. When adding
- * a new vendor id/device id combination below, please also add
- * your card/environment details and information that could
- * help in debugging to the bug tracking this issue
- */
-static const VFIORomBlacklistEntry romblacklist[] = {
- /* Broadcom BCM 57810 */
- { 0x14e4, 0x168e }
-};
#define MSIX_CAP_LENGTH 12
static void vfio_disable_interrupts(VFIOPCIDevice *vdev);
-static uint32_t vfio_pci_read_config(PCIDevice *pdev, uint32_t addr, int len);
-static void vfio_pci_write_config(PCIDevice *pdev, uint32_t addr,
- uint32_t val, int len);
static void vfio_mmap_set_enabled(VFIOPCIDevice *vdev, bool enabled);
/*
@@ -247,7 +87,7 @@ static void vfio_intx_interrupt(void *opaque)
}
}
-static void vfio_eoi(VFIODevice *vbasedev)
+static void vfio_intx_eoi(VFIODevice *vbasedev)
{
VFIOPCIDevice *vdev = container_of(vbasedev, VFIOPCIDevice, vbasedev);
@@ -255,14 +95,14 @@ static void vfio_eoi(VFIODevice *vbasedev)
return;
}
- trace_vfio_eoi(vbasedev->name);
+ trace_vfio_intx_eoi(vbasedev->name);
vdev->intx.pending = false;
pci_irq_deassert(&vdev->pdev);
vfio_unmask_single_irqindex(vbasedev, VFIO_PCI_INTX_IRQ_INDEX);
}
-static void vfio_enable_intx_kvm(VFIOPCIDevice *vdev)
+static void vfio_intx_enable_kvm(VFIOPCIDevice *vdev)
{
#ifdef CONFIG_KVM
struct kvm_irqfd irqfd = {
@@ -274,7 +114,7 @@ static void vfio_enable_intx_kvm(VFIOPCIDevice *vdev)
int ret, argsz;
int32_t *pfd;
- if (!VFIO_ALLOW_KVM_INTX || !kvm_irqfds_enabled() ||
+ if (vdev->no_kvm_intx || !kvm_irqfds_enabled() ||
vdev->intx.route.mode != PCI_INTX_ENABLED ||
!kvm_resamplefds_enabled()) {
return;
@@ -324,7 +164,7 @@ static void vfio_enable_intx_kvm(VFIOPCIDevice *vdev)
vdev->intx.kvm_accel = true;
- trace_vfio_enable_intx_kvm(vdev->vbasedev.name);
+ trace_vfio_intx_enable_kvm(vdev->vbasedev.name);
return;
@@ -339,7 +179,7 @@ fail:
#endif
}
-static void vfio_disable_intx_kvm(VFIOPCIDevice *vdev)
+static void vfio_intx_disable_kvm(VFIOPCIDevice *vdev)
{
#ifdef CONFIG_KVM
struct kvm_irqfd irqfd = {
@@ -376,11 +216,11 @@ static void vfio_disable_intx_kvm(VFIOPCIDevice *vdev)
/* If we've missed an event, let it re-fire through QEMU */
vfio_unmask_single_irqindex(&vdev->vbasedev, VFIO_PCI_INTX_IRQ_INDEX);
- trace_vfio_disable_intx_kvm(vdev->vbasedev.name);
+ trace_vfio_intx_disable_kvm(vdev->vbasedev.name);
#endif
}
-static void vfio_update_irq(PCIDevice *pdev)
+static void vfio_intx_update(PCIDevice *pdev)
{
VFIOPCIDevice *vdev = DO_UPCAST(VFIOPCIDevice, pdev, pdev);
PCIINTxRoute route;
@@ -395,10 +235,10 @@ static void vfio_update_irq(PCIDevice *pdev)
return; /* Nothing changed */
}
- trace_vfio_update_irq(vdev->vbasedev.name,
- vdev->intx.route.irq, route.irq);
+ trace_vfio_intx_update(vdev->vbasedev.name,
+ vdev->intx.route.irq, route.irq);
- vfio_disable_intx_kvm(vdev);
+ vfio_intx_disable_kvm(vdev);
vdev->intx.route = route;
@@ -406,13 +246,13 @@ static void vfio_update_irq(PCIDevice *pdev)
return;
}
- vfio_enable_intx_kvm(vdev);
+ vfio_intx_enable_kvm(vdev);
/* Re-enable the interrupt in cased we missed an EOI */
- vfio_eoi(&vdev->vbasedev);
+ vfio_intx_eoi(&vdev->vbasedev);
}
-static int vfio_enable_intx(VFIOPCIDevice *vdev)
+static int vfio_intx_enable(VFIOPCIDevice *vdev)
{
uint8_t pin = vfio_pci_read_config(&vdev->pdev, PCI_INTERRUPT_PIN, 1);
int ret, argsz;
@@ -467,21 +307,21 @@ static int vfio_enable_intx(VFIOPCIDevice *vdev)
return -errno;
}
- vfio_enable_intx_kvm(vdev);
+ vfio_intx_enable_kvm(vdev);
vdev->interrupt = VFIO_INT_INTx;
- trace_vfio_enable_intx(vdev->vbasedev.name);
+ trace_vfio_intx_enable(vdev->vbasedev.name);
return 0;
}
-static void vfio_disable_intx(VFIOPCIDevice *vdev)
+static void vfio_intx_disable(VFIOPCIDevice *vdev)
{
int fd;
timer_del(vdev->intx.mmap_timer);
- vfio_disable_intx_kvm(vdev);
+ vfio_intx_disable_kvm(vdev);
vfio_disable_irqindex(&vdev->vbasedev, VFIO_PCI_INTX_IRQ_INDEX);
vdev->intx.pending = false;
pci_irq_deassert(&vdev->pdev);
@@ -493,7 +333,7 @@ static void vfio_disable_intx(VFIOPCIDevice *vdev)
vdev->interrupt = VFIO_INT_NONE;
- trace_vfio_disable_intx(vdev->vbasedev.name);
+ trace_vfio_intx_disable(vdev->vbasedev.name);
}
/*
@@ -503,33 +343,28 @@ static void vfio_msi_interrupt(void *opaque)
{
VFIOMSIVector *vector = opaque;
VFIOPCIDevice *vdev = vector->vdev;
+ MSIMessage (*get_msg)(PCIDevice *dev, unsigned vector);
+ void (*notify)(PCIDevice *dev, unsigned vector);
+ MSIMessage msg;
int nr = vector - vdev->msi_vectors;
if (!event_notifier_test_and_clear(&vector->interrupt)) {
return;
}
-#ifdef DEBUG_VFIO
- MSIMessage msg;
-
if (vdev->interrupt == VFIO_INT_MSIX) {
- msg = msix_get_message(&vdev->pdev, nr);
+ get_msg = msix_get_message;
+ notify = msix_notify;
} else if (vdev->interrupt == VFIO_INT_MSI) {
- msg = msi_get_message(&vdev->pdev, nr);
+ get_msg = msi_get_message;
+ notify = msi_notify;
} else {
abort();
}
+ msg = get_msg(&vdev->pdev, nr);
trace_vfio_msi_interrupt(vdev->vbasedev.name, nr, msg.address, msg.data);
-#endif
-
- if (vdev->interrupt == VFIO_INT_MSIX) {
- msix_notify(&vdev->pdev, nr);
- } else if (vdev->interrupt == VFIO_INT_MSI) {
- msi_notify(&vdev->pdev, nr);
- } else {
- error_report("vfio: MSI interrupt receieved, but not enabled?");
- }
+ notify(&vdev->pdev, nr);
}
static int vfio_enable_vectors(VFIOPCIDevice *vdev, bool msix)
@@ -576,13 +411,12 @@ static int vfio_enable_vectors(VFIOPCIDevice *vdev, bool msix)
return ret;
}
-static void vfio_add_kvm_msi_virq(VFIOMSIVector *vector, MSIMessage *msg,
- bool msix)
+static void vfio_add_kvm_msi_virq(VFIOPCIDevice *vdev, VFIOMSIVector *vector,
+ MSIMessage *msg, bool msix)
{
int virq;
- if ((msix && !VFIO_ALLOW_KVM_MSIX) ||
- (!msix && !VFIO_ALLOW_KVM_MSI) || !msg) {
+ if ((msix && vdev->no_kvm_msix) || (!msix && vdev->no_kvm_msi) || !msg) {
return;
}
@@ -655,7 +489,7 @@ static int vfio_msix_vector_do_use(PCIDevice *pdev, unsigned int nr,
vfio_update_kvm_msi_virq(vector, *msg);
}
} else {
- vfio_add_kvm_msi_virq(vector, msg, true);
+ vfio_add_kvm_msi_virq(vdev, vector, msg, true);
}
/*
@@ -747,7 +581,7 @@ static void vfio_msix_vector_release(PCIDevice *pdev, unsigned int nr)
}
}
-static void vfio_enable_msix(VFIOPCIDevice *vdev)
+static void vfio_msix_enable(VFIOPCIDevice *vdev)
{
vfio_disable_interrupts(vdev);
@@ -776,10 +610,10 @@ static void vfio_enable_msix(VFIOPCIDevice *vdev)
error_report("vfio: msix_set_vector_notifiers failed");
}
- trace_vfio_enable_msix(vdev->vbasedev.name);
+ trace_vfio_msix_enable(vdev->vbasedev.name);
}
-static void vfio_enable_msi(VFIOPCIDevice *vdev)
+static void vfio_msi_enable(VFIOPCIDevice *vdev)
{
int ret, i;
@@ -808,7 +642,7 @@ retry:
* Attempt to enable route through KVM irqchip,
* default to userspace handling if unavailable.
*/
- vfio_add_kvm_msi_virq(vector, &msg, false);
+ vfio_add_kvm_msi_virq(vdev, vector, &msg, false);
}
/* Set interrupt type prior to possible interrupts */
@@ -852,10 +686,10 @@ retry:
return;
}
- trace_vfio_enable_msi(vdev->vbasedev.name, vdev->nr_vectors);
+ trace_vfio_msi_enable(vdev->vbasedev.name, vdev->nr_vectors);
}
-static void vfio_disable_msi_common(VFIOPCIDevice *vdev)
+static void vfio_msi_disable_common(VFIOPCIDevice *vdev)
{
int i;
@@ -876,10 +710,10 @@ static void vfio_disable_msi_common(VFIOPCIDevice *vdev)
vdev->nr_vectors = 0;
vdev->interrupt = VFIO_INT_NONE;
- vfio_enable_intx(vdev);
+ vfio_intx_enable(vdev);
}
-static void vfio_disable_msix(VFIOPCIDevice *vdev)
+static void vfio_msix_disable(VFIOPCIDevice *vdev)
{
int i;
@@ -900,17 +734,17 @@ static void vfio_disable_msix(VFIOPCIDevice *vdev)
vfio_disable_irqindex(&vdev->vbasedev, VFIO_PCI_MSIX_IRQ_INDEX);
}
- vfio_disable_msi_common(vdev);
+ vfio_msi_disable_common(vdev);
- trace_vfio_disable_msix(vdev->vbasedev.name);
+ trace_vfio_msix_disable(vdev->vbasedev.name);
}
-static void vfio_disable_msi(VFIOPCIDevice *vdev)
+static void vfio_msi_disable(VFIOPCIDevice *vdev)
{
vfio_disable_irqindex(&vdev->vbasedev, VFIO_PCI_MSI_IRQ_INDEX);
- vfio_disable_msi_common(vdev);
+ vfio_msi_disable_common(vdev);
- trace_vfio_disable_msi(vdev->vbasedev.name);
+ trace_vfio_msi_disable(vdev->vbasedev.name);
}
static void vfio_update_msi(VFIOPCIDevice *vdev)
@@ -1033,26 +867,6 @@ static const MemoryRegionOps vfio_rom_ops = {
.endianness = DEVICE_LITTLE_ENDIAN,
};
-static bool vfio_blacklist_opt_rom(VFIOPCIDevice *vdev)
-{
- PCIDevice *pdev = &vdev->pdev;
- uint16_t vendor_id, device_id;
- int count = 0;
-
- vendor_id = pci_get_word(pdev->config + PCI_VENDOR_ID);
- device_id = pci_get_word(pdev->config + PCI_DEVICE_ID);
-
- while (count < ARRAY_SIZE(romblacklist)) {
- if (romblacklist[count].vendor_id == vendor_id &&
- romblacklist[count].device_id == device_id) {
- return true;
- }
- count++;
- }
-
- return false;
-}
-
static void vfio_pci_size_rom(VFIOPCIDevice *vdev)
{
uint32_t orig, size = cpu_to_le32((uint32_t)PCI_ROM_ADDRESS_MASK);
@@ -1130,7 +944,7 @@ static void vfio_pci_size_rom(VFIOPCIDevice *vdev)
vdev->rom_read_failed = false;
}
-static void vfio_vga_write(void *opaque, hwaddr addr,
+void vfio_vga_write(void *opaque, hwaddr addr,
uint64_t data, unsigned size)
{
VFIOVGARegion *region = opaque;
@@ -1166,7 +980,7 @@ static void vfio_vga_write(void *opaque, hwaddr addr,
trace_vfio_vga_write(region->offset + addr, data, size);
}
-static uint64_t vfio_vga_read(void *opaque, hwaddr addr, unsigned size)
+uint64_t vfio_vga_read(void *opaque, hwaddr addr, unsigned size)
{
VFIOVGARegion *region = opaque;
VFIOVGA *vga = container_of(region, VFIOVGA, region[region->nr]);
@@ -1212,858 +1026,9 @@ static const MemoryRegionOps vfio_vga_ops = {
};
/*
- * Device specific quirks
- */
-
-/* Is range1 fully contained within range2? */
-static bool vfio_range_contained(uint64_t first1, uint64_t len1,
- uint64_t first2, uint64_t len2) {
- return (first1 >= first2 && first1 + len1 <= first2 + len2);
-}
-
-static bool vfio_flags_enabled(uint8_t flags, uint8_t mask)
-{
- return (mask && (flags & mask) == mask);
-}
-
-static uint64_t vfio_generic_window_quirk_read(void *opaque,
- hwaddr addr, unsigned size)
-{
- VFIOQuirk *quirk = opaque;
- VFIOPCIDevice *vdev = quirk->vdev;
- uint64_t data;
-
- if (vfio_flags_enabled(quirk->data.flags, quirk->data.read_flags) &&
- ranges_overlap(addr, size,
- quirk->data.data_offset, quirk->data.data_size)) {
- hwaddr offset = addr - quirk->data.data_offset;
-
- if (!vfio_range_contained(addr, size, quirk->data.data_offset,
- quirk->data.data_size)) {
- hw_error("%s: window data read not fully contained: %s",
- __func__, memory_region_name(&quirk->mem));
- }
-
- data = vfio_pci_read_config(&vdev->pdev,
- quirk->data.address_val + offset, size);
-
- trace_vfio_generic_window_quirk_read(memory_region_name(&quirk->mem),
- vdev->vbasedev.name,
- quirk->data.bar,
- addr, size, data);
- } else {
- data = vfio_region_read(&vdev->bars[quirk->data.bar].region,
- addr + quirk->data.base_offset, size);
- }
-
- return data;
-}
-
-static void vfio_generic_window_quirk_write(void *opaque, hwaddr addr,
- uint64_t data, unsigned size)
-{
- VFIOQuirk *quirk = opaque;
- VFIOPCIDevice *vdev = quirk->vdev;
-
- if (ranges_overlap(addr, size,
- quirk->data.address_offset, quirk->data.address_size)) {
-
- if (addr != quirk->data.address_offset) {
- hw_error("%s: offset write into address window: %s",
- __func__, memory_region_name(&quirk->mem));
- }
-
- if ((data & ~quirk->data.address_mask) == quirk->data.address_match) {
- quirk->data.flags |= quirk->data.write_flags |
- quirk->data.read_flags;
- quirk->data.address_val = data & quirk->data.address_mask;
- } else {
- quirk->data.flags &= ~(quirk->data.write_flags |
- quirk->data.read_flags);
- }
- }
-
- if (vfio_flags_enabled(quirk->data.flags, quirk->data.write_flags) &&
- ranges_overlap(addr, size,
- quirk->data.data_offset, quirk->data.data_size)) {
- hwaddr offset = addr - quirk->data.data_offset;
-
- if (!vfio_range_contained(addr, size, quirk->data.data_offset,
- quirk->data.data_size)) {
- hw_error("%s: window data write not fully contained: %s",
- __func__, memory_region_name(&quirk->mem));
- }
-
- vfio_pci_write_config(&vdev->pdev,
- quirk->data.address_val + offset, data, size);
- trace_vfio_generic_window_quirk_write(memory_region_name(&quirk->mem),
- vdev->vbasedev.name,
- quirk->data.bar,
- addr, data, size);
- return;
- }
-
- vfio_region_write(&vdev->bars[quirk->data.bar].region,
- addr + quirk->data.base_offset, data, size);
-}
-
-static const MemoryRegionOps vfio_generic_window_quirk = {
- .read = vfio_generic_window_quirk_read,
- .write = vfio_generic_window_quirk_write,
- .endianness = DEVICE_LITTLE_ENDIAN,
-};
-
-static uint64_t vfio_generic_quirk_read(void *opaque,
- hwaddr addr, unsigned size)
-{
- VFIOQuirk *quirk = opaque;
- VFIOPCIDevice *vdev = quirk->vdev;
- hwaddr base = quirk->data.address_match & TARGET_PAGE_MASK;
- hwaddr offset = quirk->data.address_match & ~TARGET_PAGE_MASK;
- uint64_t data;
-
- if (vfio_flags_enabled(quirk->data.flags, quirk->data.read_flags) &&
- ranges_overlap(addr, size, offset, quirk->data.address_mask + 1)) {
- if (!vfio_range_contained(addr, size, offset,
- quirk->data.address_mask + 1)) {
- hw_error("%s: read not fully contained: %s",
- __func__, memory_region_name(&quirk->mem));
- }
-
- data = vfio_pci_read_config(&vdev->pdev, addr - offset, size);
-
- trace_vfio_generic_quirk_read(memory_region_name(&quirk->mem),
- vdev->vbasedev.name, quirk->data.bar,
- addr + base, size, data);
- } else {
- data = vfio_region_read(&vdev->bars[quirk->data.bar].region,
- addr + base, size);
- }
-
- return data;
-}
-
-static void vfio_generic_quirk_write(void *opaque, hwaddr addr,
- uint64_t data, unsigned size)
-{
- VFIOQuirk *quirk = opaque;
- VFIOPCIDevice *vdev = quirk->vdev;
- hwaddr base = quirk->data.address_match & TARGET_PAGE_MASK;
- hwaddr offset = quirk->data.address_match & ~TARGET_PAGE_MASK;
-
- if (vfio_flags_enabled(quirk->data.flags, quirk->data.write_flags) &&
- ranges_overlap(addr, size, offset, quirk->data.address_mask + 1)) {
- if (!vfio_range_contained(addr, size, offset,
- quirk->data.address_mask + 1)) {
- hw_error("%s: write not fully contained: %s",
- __func__, memory_region_name(&quirk->mem));
- }
-
- vfio_pci_write_config(&vdev->pdev, addr - offset, data, size);
-
- trace_vfio_generic_quirk_write(memory_region_name(&quirk->mem),
- vdev->vbasedev.name, quirk->data.bar,
- addr + base, data, size);
- } else {
- vfio_region_write(&vdev->bars[quirk->data.bar].region,
- addr + base, data, size);
- }
-}
-
-static const MemoryRegionOps vfio_generic_quirk = {
- .read = vfio_generic_quirk_read,
- .write = vfio_generic_quirk_write,
- .endianness = DEVICE_LITTLE_ENDIAN,
-};
-
-#define PCI_VENDOR_ID_ATI 0x1002
-
-/*
- * Radeon HD cards (HD5450 & HD7850) report the upper byte of the I/O port BAR
- * through VGA register 0x3c3. On newer cards, the I/O port BAR is always
- * BAR4 (older cards like the X550 used BAR1, but we don't care to support
- * those). Note that on bare metal, a read of 0x3c3 doesn't always return the
- * I/O port BAR address. Originally this was coded to return the virtual BAR
- * address only if the physical register read returns the actual BAR address,
- * but users have reported greater success if we return the virtual address
- * unconditionally.
- */
-static uint64_t vfio_ati_3c3_quirk_read(void *opaque,
- hwaddr addr, unsigned size)
-{
- VFIOQuirk *quirk = opaque;
- VFIOPCIDevice *vdev = quirk->vdev;
- uint64_t data = vfio_pci_read_config(&vdev->pdev,
- PCI_BASE_ADDRESS_0 + (4 * 4) + 1,
- size);
- trace_vfio_ati_3c3_quirk_read(data);
-
- return data;
-}
-
-static const MemoryRegionOps vfio_ati_3c3_quirk = {
- .read = vfio_ati_3c3_quirk_read,
- .endianness = DEVICE_LITTLE_ENDIAN,
-};
-
-static void vfio_vga_probe_ati_3c3_quirk(VFIOPCIDevice *vdev)
-{
- PCIDevice *pdev = &vdev->pdev;
- VFIOQuirk *quirk;
-
- if (pci_get_word(pdev->config + PCI_VENDOR_ID) != PCI_VENDOR_ID_ATI) {
- return;
- }
-
- /*
- * As long as the BAR is >= 256 bytes it will be aligned such that the
- * lower byte is always zero. Filter out anything else, if it exists.
- */
- if (!vdev->bars[4].ioport || vdev->bars[4].region.size < 256) {
- return;
- }
-
- quirk = g_malloc0(sizeof(*quirk));
- quirk->vdev = vdev;
-
- memory_region_init_io(&quirk->mem, OBJECT(vdev), &vfio_ati_3c3_quirk, quirk,
- "vfio-ati-3c3-quirk", 1);
- memory_region_add_subregion(&vdev->vga.region[QEMU_PCI_VGA_IO_HI].mem,
- 3 /* offset 3 bytes from 0x3c0 */, &quirk->mem);
-
- QLIST_INSERT_HEAD(&vdev->vga.region[QEMU_PCI_VGA_IO_HI].quirks,
- quirk, next);
-
- trace_vfio_vga_probe_ati_3c3_quirk(vdev->vbasedev.name);
-}
-
-/*
- * Newer ATI/AMD devices, including HD5450 and HD7850, have a window to PCI
- * config space through MMIO BAR2 at offset 0x4000. Nothing seems to access
- * the MMIO space directly, but a window to this space is provided through
- * I/O port BAR4. Offset 0x0 is the address register and offset 0x4 is the
- * data register. When the address is programmed to a range of 0x4000-0x4fff
- * PCI configuration space is available. Experimentation seems to indicate
- * that only read-only access is provided, but we drop writes when the window
- * is enabled to config space nonetheless.
- */
-static void vfio_probe_ati_bar4_window_quirk(VFIOPCIDevice *vdev, int nr)
-{
- PCIDevice *pdev = &vdev->pdev;
- VFIOQuirk *quirk;
-
- if (!vdev->has_vga || nr != 4 ||
- pci_get_word(pdev->config + PCI_VENDOR_ID) != PCI_VENDOR_ID_ATI) {
- return;
- }
-
- quirk = g_malloc0(sizeof(*quirk));
- quirk->vdev = vdev;
- quirk->data.address_size = 4;
- quirk->data.data_offset = 4;
- quirk->data.data_size = 4;
- quirk->data.address_match = 0x4000;
- quirk->data.address_mask = PCIE_CONFIG_SPACE_SIZE - 1;
- quirk->data.bar = nr;
- quirk->data.read_flags = quirk->data.write_flags = 1;
-
- memory_region_init_io(&quirk->mem, OBJECT(vdev),
- &vfio_generic_window_quirk, quirk,
- "vfio-ati-bar4-window-quirk", 8);
- memory_region_add_subregion_overlap(&vdev->bars[nr].region.mem,
- quirk->data.base_offset, &quirk->mem, 1);
-
- QLIST_INSERT_HEAD(&vdev->bars[nr].quirks, quirk, next);
-
- trace_vfio_probe_ati_bar4_window_quirk(vdev->vbasedev.name);
-}
-
-#define PCI_VENDOR_ID_REALTEK 0x10ec
-
-/*
- * RTL8168 devices have a backdoor that can access the MSI-X table. At BAR2
- * offset 0x70 there is a dword data register, offset 0x74 is a dword address
- * register. According to the Linux r8169 driver, the MSI-X table is addressed
- * when the "type" portion of the address register is set to 0x1. This appears
- * to be bits 16:30. Bit 31 is both a write indicator and some sort of
- * "address latched" indicator. Bits 12:15 are a mask field, which we can
- * ignore because the MSI-X table should always be accessed as a dword (full
- * mask). Bits 0:11 is offset within the type.
- *
- * Example trace:
- *
- * Read from MSI-X table offset 0
- * vfio: vfio_bar_write(0000:05:00.0:BAR2+0x74, 0x1f000, 4) // store read addr
- * vfio: vfio_bar_read(0000:05:00.0:BAR2+0x74, 4) = 0x8001f000 // latch
- * vfio: vfio_bar_read(0000:05:00.0:BAR2+0x70, 4) = 0xfee00398 // read data
- *
- * Write 0xfee00000 to MSI-X table offset 0
- * vfio: vfio_bar_write(0000:05:00.0:BAR2+0x70, 0xfee00000, 4) // write data
- * vfio: vfio_bar_write(0000:05:00.0:BAR2+0x74, 0x8001f000, 4) // do write
- * vfio: vfio_bar_read(0000:05:00.0:BAR2+0x74, 4) = 0x1f000 // complete
- */
-
-static uint64_t vfio_rtl8168_window_quirk_read(void *opaque,
- hwaddr addr, unsigned size)
-{
- VFIOQuirk *quirk = opaque;
- VFIOPCIDevice *vdev = quirk->vdev;
-
- switch (addr) {
- case 4: /* address */
- if (quirk->data.flags) {
- trace_vfio_rtl8168_window_quirk_read_fake(
- memory_region_name(&quirk->mem),
- vdev->vbasedev.name);
-
- return quirk->data.address_match ^ 0x80000000U;
- }
- break;
- case 0: /* data */
- if (quirk->data.flags) {
- uint64_t val;
-
- trace_vfio_rtl8168_window_quirk_read_table(
- memory_region_name(&quirk->mem),
- vdev->vbasedev.name);
-
- if (!(vdev->pdev.cap_present & QEMU_PCI_CAP_MSIX)) {
- return 0;
- }
-
- memory_region_dispatch_read(&vdev->pdev.msix_table_mmio,
- (hwaddr)(quirk->data.address_match
- & 0xfff),
- &val,
- size,
- MEMTXATTRS_UNSPECIFIED);
- return val;
- }
- }
-
- trace_vfio_rtl8168_window_quirk_read_direct(memory_region_name(&quirk->mem),
- vdev->vbasedev.name);
-
- return vfio_region_read(&vdev->bars[quirk->data.bar].region,
- addr + 0x70, size);
-}
-
-static void vfio_rtl8168_window_quirk_write(void *opaque, hwaddr addr,
- uint64_t data, unsigned size)
-{
- VFIOQuirk *quirk = opaque;
- VFIOPCIDevice *vdev = quirk->vdev;
-
- switch (addr) {
- case 4: /* address */
- if ((data & 0x7fff0000) == 0x10000) {
- if (data & 0x80000000U &&
- vdev->pdev.cap_present & QEMU_PCI_CAP_MSIX) {
-
- trace_vfio_rtl8168_window_quirk_write_table(
- memory_region_name(&quirk->mem),
- vdev->vbasedev.name);
-
- memory_region_dispatch_write(&vdev->pdev.msix_table_mmio,
- (hwaddr)(data & 0xfff),
- (uint64_t)quirk->data.address_mask,
- size, MEMTXATTRS_UNSPECIFIED);
- }
-
- quirk->data.flags = 1;
- quirk->data.address_match = data;
-
- return;
- }
- quirk->data.flags = 0;
- break;
- case 0: /* data */
- quirk->data.address_mask = data;
- break;
- }
-
- trace_vfio_rtl8168_window_quirk_write_direct(
- memory_region_name(&quirk->mem),
- vdev->vbasedev.name);
-
- vfio_region_write(&vdev->bars[quirk->data.bar].region,
- addr + 0x70, data, size);
-}
-
-static const MemoryRegionOps vfio_rtl8168_window_quirk = {
- .read = vfio_rtl8168_window_quirk_read,
- .write = vfio_rtl8168_window_quirk_write,
- .valid = {
- .min_access_size = 4,
- .max_access_size = 4,
- .unaligned = false,
- },
- .endianness = DEVICE_LITTLE_ENDIAN,
-};
-
-static void vfio_probe_rtl8168_bar2_window_quirk(VFIOPCIDevice *vdev, int nr)
-{
- PCIDevice *pdev = &vdev->pdev;
- VFIOQuirk *quirk;
-
- if (pci_get_word(pdev->config + PCI_VENDOR_ID) != PCI_VENDOR_ID_REALTEK ||
- pci_get_word(pdev->config + PCI_DEVICE_ID) != 0x8168 || nr != 2) {
- return;
- }
-
- quirk = g_malloc0(sizeof(*quirk));
- quirk->vdev = vdev;
- quirk->data.bar = nr;
-
- memory_region_init_io(&quirk->mem, OBJECT(vdev), &vfio_rtl8168_window_quirk,
- quirk, "vfio-rtl8168-window-quirk", 8);
- memory_region_add_subregion_overlap(&vdev->bars[nr].region.mem,
- 0x70, &quirk->mem, 1);
-
- QLIST_INSERT_HEAD(&vdev->bars[nr].quirks, quirk, next);
-
- trace_vfio_probe_rtl8168_bar2_window_quirk(vdev->vbasedev.name);
-}
-/*
- * Trap the BAR2 MMIO window to config space as well.
- */
-static void vfio_probe_ati_bar2_4000_quirk(VFIOPCIDevice *vdev, int nr)
-{
- PCIDevice *pdev = &vdev->pdev;
- VFIOQuirk *quirk;
-
- /* Only enable on newer devices where BAR2 is 64bit */
- if (!vdev->has_vga || nr != 2 || !vdev->bars[2].mem64 ||
- pci_get_word(pdev->config + PCI_VENDOR_ID) != PCI_VENDOR_ID_ATI) {
- return;
- }
-
- quirk = g_malloc0(sizeof(*quirk));
- quirk->vdev = vdev;
- quirk->data.flags = quirk->data.read_flags = quirk->data.write_flags = 1;
- quirk->data.address_match = 0x4000;
- quirk->data.address_mask = PCIE_CONFIG_SPACE_SIZE - 1;
- quirk->data.bar = nr;
-
- memory_region_init_io(&quirk->mem, OBJECT(vdev), &vfio_generic_quirk, quirk,
- "vfio-ati-bar2-4000-quirk",
- TARGET_PAGE_ALIGN(quirk->data.address_mask + 1));
- memory_region_add_subregion_overlap(&vdev->bars[nr].region.mem,
- quirk->data.address_match & TARGET_PAGE_MASK,
- &quirk->mem, 1);
-
- QLIST_INSERT_HEAD(&vdev->bars[nr].quirks, quirk, next);
-
- trace_vfio_probe_ati_bar2_4000_quirk(vdev->vbasedev.name);
-}
-
-/*
- * Older ATI/AMD cards like the X550 have a similar window to that above.
- * I/O port BAR1 provides a window to a mirror of PCI config space located
- * in BAR2 at offset 0xf00. We don't care to support such older cards, but
- * note it for future reference.
- */
-
-#define PCI_VENDOR_ID_NVIDIA 0x10de
-
-/*
- * Nvidia has several different methods to get to config space, the
- * nouveu project has several of these documented here:
- * https://github.com/pathscale/envytools/tree/master/hwdocs
- *
- * The first quirk is actually not documented in envytools and is found
- * on 10de:01d1 (NVIDIA Corporation G72 [GeForce 7300 LE]). This is an
- * NV46 chipset. The backdoor uses the legacy VGA I/O ports to access
- * the mirror of PCI config space found at BAR0 offset 0x1800. The access
- * sequence first writes 0x338 to I/O port 0x3d4. The target offset is
- * then written to 0x3d0. Finally 0x538 is written for a read and 0x738
- * is written for a write to 0x3d4. The BAR0 offset is then accessible
- * through 0x3d0. This quirk doesn't seem to be necessary on newer cards
- * that use the I/O port BAR5 window but it doesn't hurt to leave it.
- */
-enum {
- NV_3D0_NONE = 0,
- NV_3D0_SELECT,
- NV_3D0_WINDOW,
- NV_3D0_READ,
- NV_3D0_WRITE,
-};
-
-static uint64_t vfio_nvidia_3d0_quirk_read(void *opaque,
- hwaddr addr, unsigned size)
-{
- VFIOQuirk *quirk = opaque;
- VFIOPCIDevice *vdev = quirk->vdev;
- PCIDevice *pdev = &vdev->pdev;
- uint64_t data = vfio_vga_read(&vdev->vga.region[QEMU_PCI_VGA_IO_HI],
- addr + quirk->data.base_offset, size);
-
- if (quirk->data.flags == NV_3D0_READ && addr == quirk->data.data_offset) {
- data = vfio_pci_read_config(pdev, quirk->data.address_val, size);
- trace_vfio_nvidia_3d0_quirk_read(size, data);
- }
-
- quirk->data.flags = NV_3D0_NONE;
-
- return data;
-}
-
-static void vfio_nvidia_3d0_quirk_write(void *opaque, hwaddr addr,
- uint64_t data, unsigned size)
-{
- VFIOQuirk *quirk = opaque;
- VFIOPCIDevice *vdev = quirk->vdev;
- PCIDevice *pdev = &vdev->pdev;
-
- switch (quirk->data.flags) {
- case NV_3D0_NONE:
- if (addr == quirk->data.address_offset && data == 0x338) {
- quirk->data.flags = NV_3D0_SELECT;
- }
- break;
- case NV_3D0_SELECT:
- quirk->data.flags = NV_3D0_NONE;
- if (addr == quirk->data.data_offset &&
- (data & ~quirk->data.address_mask) == quirk->data.address_match) {
- quirk->data.flags = NV_3D0_WINDOW;
- quirk->data.address_val = data & quirk->data.address_mask;
- }
- break;
- case NV_3D0_WINDOW:
- quirk->data.flags = NV_3D0_NONE;
- if (addr == quirk->data.address_offset) {
- if (data == 0x538) {
- quirk->data.flags = NV_3D0_READ;
- } else if (data == 0x738) {
- quirk->data.flags = NV_3D0_WRITE;
- }
- }
- break;
- case NV_3D0_WRITE:
- quirk->data.flags = NV_3D0_NONE;
- if (addr == quirk->data.data_offset) {
- vfio_pci_write_config(pdev, quirk->data.address_val, data, size);
- trace_vfio_nvidia_3d0_quirk_write(data, size);
- return;
- }
- break;
- }
-
- vfio_vga_write(&vdev->vga.region[QEMU_PCI_VGA_IO_HI],
- addr + quirk->data.base_offset, data, size);
-}
-
-static const MemoryRegionOps vfio_nvidia_3d0_quirk = {
- .read = vfio_nvidia_3d0_quirk_read,
- .write = vfio_nvidia_3d0_quirk_write,
- .endianness = DEVICE_LITTLE_ENDIAN,
-};
-
-static void vfio_vga_probe_nvidia_3d0_quirk(VFIOPCIDevice *vdev)
-{
- PCIDevice *pdev = &vdev->pdev;
- VFIOQuirk *quirk;
-
- if (pci_get_word(pdev->config + PCI_VENDOR_ID) != PCI_VENDOR_ID_NVIDIA ||
- !vdev->bars[1].region.size) {
- return;
- }
-
- quirk = g_malloc0(sizeof(*quirk));
- quirk->vdev = vdev;
- quirk->data.base_offset = 0x10;
- quirk->data.address_offset = 4;
- quirk->data.address_size = 2;
- quirk->data.address_match = 0x1800;
- quirk->data.address_mask = PCI_CONFIG_SPACE_SIZE - 1;
- quirk->data.data_offset = 0;
- quirk->data.data_size = 4;
-
- memory_region_init_io(&quirk->mem, OBJECT(vdev), &vfio_nvidia_3d0_quirk,
- quirk, "vfio-nvidia-3d0-quirk", 6);
- memory_region_add_subregion(&vdev->vga.region[QEMU_PCI_VGA_IO_HI].mem,
- quirk->data.base_offset, &quirk->mem);
-
- QLIST_INSERT_HEAD(&vdev->vga.region[QEMU_PCI_VGA_IO_HI].quirks,
- quirk, next);
-
- trace_vfio_vga_probe_nvidia_3d0_quirk(vdev->vbasedev.name);
-}
-
-/*
- * The second quirk is documented in envytools. The I/O port BAR5 is just
- * a set of address/data ports to the MMIO BARs. The BAR we care about is
- * again BAR0. This backdoor is apparently a bit newer than the one above
- * so we need to not only trap 256 bytes @0x1800, but all of PCI config
- * space, including extended space is available at the 4k @0x88000.
- */
-enum {
- NV_BAR5_ADDRESS = 0x1,
- NV_BAR5_ENABLE = 0x2,
- NV_BAR5_MASTER = 0x4,
- NV_BAR5_VALID = 0x7,
-};
-
-static void vfio_nvidia_bar5_window_quirk_write(void *opaque, hwaddr addr,
- uint64_t data, unsigned size)
-{
- VFIOQuirk *quirk = opaque;
-
- switch (addr) {
- case 0x0:
- if (data & 0x1) {
- quirk->data.flags |= NV_BAR5_MASTER;
- } else {
- quirk->data.flags &= ~NV_BAR5_MASTER;
- }
- break;
- case 0x4:
- if (data & 0x1) {
- quirk->data.flags |= NV_BAR5_ENABLE;
- } else {
- quirk->data.flags &= ~NV_BAR5_ENABLE;
- }
- break;
- case 0x8:
- if (quirk->data.flags & NV_BAR5_MASTER) {
- if ((data & ~0xfff) == 0x88000) {
- quirk->data.flags |= NV_BAR5_ADDRESS;
- quirk->data.address_val = data & 0xfff;
- } else if ((data & ~0xff) == 0x1800) {
- quirk->data.flags |= NV_BAR5_ADDRESS;
- quirk->data.address_val = data & 0xff;
- } else {
- quirk->data.flags &= ~NV_BAR5_ADDRESS;
- }
- }
- break;
- }
-
- vfio_generic_window_quirk_write(opaque, addr, data, size);
-}
-
-static const MemoryRegionOps vfio_nvidia_bar5_window_quirk = {
- .read = vfio_generic_window_quirk_read,
- .write = vfio_nvidia_bar5_window_quirk_write,
- .valid.min_access_size = 4,
- .endianness = DEVICE_LITTLE_ENDIAN,
-};
-
-static void vfio_probe_nvidia_bar5_window_quirk(VFIOPCIDevice *vdev, int nr)
-{
- PCIDevice *pdev = &vdev->pdev;
- VFIOQuirk *quirk;
-
- if (!vdev->has_vga || nr != 5 ||
- pci_get_word(pdev->config + PCI_VENDOR_ID) != PCI_VENDOR_ID_NVIDIA) {
- return;
- }
-
- quirk = g_malloc0(sizeof(*quirk));
- quirk->vdev = vdev;
- quirk->data.read_flags = quirk->data.write_flags = NV_BAR5_VALID;
- quirk->data.address_offset = 0x8;
- quirk->data.address_size = 0; /* actually 4, but avoids generic code */
- quirk->data.data_offset = 0xc;
- quirk->data.data_size = 4;
- quirk->data.bar = nr;
-
- memory_region_init_io(&quirk->mem, OBJECT(vdev),
- &vfio_nvidia_bar5_window_quirk, quirk,
- "vfio-nvidia-bar5-window-quirk", 16);
- memory_region_add_subregion_overlap(&vdev->bars[nr].region.mem,
- 0, &quirk->mem, 1);
-
- QLIST_INSERT_HEAD(&vdev->bars[nr].quirks, quirk, next);
-
- trace_vfio_probe_nvidia_bar5_window_quirk(vdev->vbasedev.name);
-}
-
-static void vfio_nvidia_88000_quirk_write(void *opaque, hwaddr addr,
- uint64_t data, unsigned size)
-{
- VFIOQuirk *quirk = opaque;
- VFIOPCIDevice *vdev = quirk->vdev;
- PCIDevice *pdev = &vdev->pdev;
- hwaddr base = quirk->data.address_match & TARGET_PAGE_MASK;
-
- vfio_generic_quirk_write(opaque, addr, data, size);
-
- /*
- * Nvidia seems to acknowledge MSI interrupts by writing 0xff to the
- * MSI capability ID register. Both the ID and next register are
- * read-only, so we allow writes covering either of those to real hw.
- * NB - only fixed for the 0x88000 MMIO window.
- */
- if ((pdev->cap_present & QEMU_PCI_CAP_MSI) &&
- vfio_range_contained(addr, size, pdev->msi_cap, PCI_MSI_FLAGS)) {
- vfio_region_write(&vdev->bars[quirk->data.bar].region,
- addr + base, data, size);
- }
-}
-
-static const MemoryRegionOps vfio_nvidia_88000_quirk = {
- .read = vfio_generic_quirk_read,
- .write = vfio_nvidia_88000_quirk_write,
- .endianness = DEVICE_LITTLE_ENDIAN,
-};
-
-/*
- * Finally, BAR0 itself. We want to redirect any accesses to either
- * 0x1800 or 0x88000 through the PCI config space access functions.
- *
- * NB - quirk at a page granularity or else they don't seem to work when
- * BARs are mmap'd
- *
- * Here's offset 0x88000...
- */
-static void vfio_probe_nvidia_bar0_88000_quirk(VFIOPCIDevice *vdev, int nr)
-{
- PCIDevice *pdev = &vdev->pdev;
- VFIOQuirk *quirk;
- uint16_t vendor, class;
-
- vendor = pci_get_word(pdev->config + PCI_VENDOR_ID);
- class = pci_get_word(pdev->config + PCI_CLASS_DEVICE);
-
- if (nr != 0 || vendor != PCI_VENDOR_ID_NVIDIA ||
- class != PCI_CLASS_DISPLAY_VGA) {
- return;
- }
-
- quirk = g_malloc0(sizeof(*quirk));
- quirk->vdev = vdev;
- quirk->data.flags = quirk->data.read_flags = quirk->data.write_flags = 1;
- quirk->data.address_match = 0x88000;
- quirk->data.address_mask = PCIE_CONFIG_SPACE_SIZE - 1;
- quirk->data.bar = nr;
-
- memory_region_init_io(&quirk->mem, OBJECT(vdev), &vfio_nvidia_88000_quirk,
- quirk, "vfio-nvidia-bar0-88000-quirk",
- TARGET_PAGE_ALIGN(quirk->data.address_mask + 1));
- memory_region_add_subregion_overlap(&vdev->bars[nr].region.mem,
- quirk->data.address_match & TARGET_PAGE_MASK,
- &quirk->mem, 1);
-
- QLIST_INSERT_HEAD(&vdev->bars[nr].quirks, quirk, next);
-
- trace_vfio_probe_nvidia_bar0_88000_quirk(vdev->vbasedev.name);
-}
-
-/*
- * And here's the same for BAR0 offset 0x1800...
- */
-static void vfio_probe_nvidia_bar0_1800_quirk(VFIOPCIDevice *vdev, int nr)
-{
- PCIDevice *pdev = &vdev->pdev;
- VFIOQuirk *quirk;
-
- if (!vdev->has_vga || nr != 0 ||
- pci_get_word(pdev->config + PCI_VENDOR_ID) != PCI_VENDOR_ID_NVIDIA) {
- return;
- }
-
- /* Log the chipset ID */
- trace_vfio_probe_nvidia_bar0_1800_quirk_id(
- (unsigned int)(vfio_region_read(&vdev->bars[0].region, 0, 4) >> 20)
- & 0xff);
-
- quirk = g_malloc0(sizeof(*quirk));
- quirk->vdev = vdev;
- quirk->data.flags = quirk->data.read_flags = quirk->data.write_flags = 1;
- quirk->data.address_match = 0x1800;
- quirk->data.address_mask = PCI_CONFIG_SPACE_SIZE - 1;
- quirk->data.bar = nr;
-
- memory_region_init_io(&quirk->mem, OBJECT(vdev), &vfio_generic_quirk, quirk,
- "vfio-nvidia-bar0-1800-quirk",
- TARGET_PAGE_ALIGN(quirk->data.address_mask + 1));
- memory_region_add_subregion_overlap(&vdev->bars[nr].region.mem,
- quirk->data.address_match & TARGET_PAGE_MASK,
- &quirk->mem, 1);
-
- QLIST_INSERT_HEAD(&vdev->bars[nr].quirks, quirk, next);
-
- trace_vfio_probe_nvidia_bar0_1800_quirk(vdev->vbasedev.name);
-}
-
-/*
- * TODO - Some Nvidia devices provide config access to their companion HDA
- * device and even to their parent bridge via these config space mirrors.
- * Add quirks for those regions.
- */
-
-/*
- * Common quirk probe entry points.
- */
-static void vfio_vga_quirk_setup(VFIOPCIDevice *vdev)
-{
- vfio_vga_probe_ati_3c3_quirk(vdev);
- vfio_vga_probe_nvidia_3d0_quirk(vdev);
-}
-
-static void vfio_vga_quirk_teardown(VFIOPCIDevice *vdev)
-{
- VFIOQuirk *quirk;
- int i;
-
- for (i = 0; i < ARRAY_SIZE(vdev->vga.region); i++) {
- QLIST_FOREACH(quirk, &vdev->vga.region[i].quirks, next) {
- memory_region_del_subregion(&vdev->vga.region[i].mem, &quirk->mem);
- }
- }
-}
-
-static void vfio_vga_quirk_free(VFIOPCIDevice *vdev)
-{
- int i;
-
- for (i = 0; i < ARRAY_SIZE(vdev->vga.region); i++) {
- while (!QLIST_EMPTY(&vdev->vga.region[i].quirks)) {
- VFIOQuirk *quirk = QLIST_FIRST(&vdev->vga.region[i].quirks);
- object_unparent(OBJECT(&quirk->mem));
- QLIST_REMOVE(quirk, next);
- g_free(quirk);
- }
- }
-}
-
-static void vfio_bar_quirk_setup(VFIOPCIDevice *vdev, int nr)
-{
- vfio_probe_ati_bar4_window_quirk(vdev, nr);
- vfio_probe_ati_bar2_4000_quirk(vdev, nr);
- vfio_probe_nvidia_bar5_window_quirk(vdev, nr);
- vfio_probe_nvidia_bar0_88000_quirk(vdev, nr);
- vfio_probe_nvidia_bar0_1800_quirk(vdev, nr);
- vfio_probe_rtl8168_bar2_window_quirk(vdev, nr);
-}
-
-static void vfio_bar_quirk_teardown(VFIOPCIDevice *vdev, int nr)
-{
- VFIOBAR *bar = &vdev->bars[nr];
- VFIOQuirk *quirk;
-
- QLIST_FOREACH(quirk, &bar->quirks, next) {
- memory_region_del_subregion(&bar->region.mem, &quirk->mem);
- }
-}
-
-static void vfio_bar_quirk_free(VFIOPCIDevice *vdev, int nr)
-{
- VFIOBAR *bar = &vdev->bars[nr];
-
- while (!QLIST_EMPTY(&bar->quirks)) {
- VFIOQuirk *quirk = QLIST_FIRST(&bar->quirks);
- object_unparent(OBJECT(&quirk->mem));
- QLIST_REMOVE(quirk, next);
- g_free(quirk);
- }
-}
-
-/*
* PCI config space
*/
-static uint32_t vfio_pci_read_config(PCIDevice *pdev, uint32_t addr, int len)
+uint32_t vfio_pci_read_config(PCIDevice *pdev, uint32_t addr, int len)
{
VFIOPCIDevice *vdev = DO_UPCAST(VFIOPCIDevice, pdev, pdev);
uint32_t emu_bits = 0, emu_val = 0, phys_val = 0, val;
@@ -2096,8 +1061,8 @@ static uint32_t vfio_pci_read_config(PCIDevice *pdev, uint32_t addr, int len)
return val;
}
-static void vfio_pci_write_config(PCIDevice *pdev, uint32_t addr,
- uint32_t val, int len)
+void vfio_pci_write_config(PCIDevice *pdev,
+ uint32_t addr, uint32_t val, int len)
{
VFIOPCIDevice *vdev = DO_UPCAST(VFIOPCIDevice, pdev, pdev);
uint32_t val_le = cpu_to_le32(val);
@@ -2123,11 +1088,11 @@ static void vfio_pci_write_config(PCIDevice *pdev, uint32_t addr,
if (!was_enabled) {
if (is_enabled) {
- vfio_enable_msi(vdev);
+ vfio_msi_enable(vdev);
}
} else {
if (!is_enabled) {
- vfio_disable_msi(vdev);
+ vfio_msi_disable(vdev);
} else {
vfio_update_msi(vdev);
}
@@ -2141,9 +1106,9 @@ static void vfio_pci_write_config(PCIDevice *pdev, uint32_t addr,
is_enabled = msix_enabled(pdev);
if (!was_enabled && is_enabled) {
- vfio_enable_msix(vdev);
+ vfio_msix_enable(vdev);
} else if (was_enabled && !is_enabled) {
- vfio_disable_msix(vdev);
+ vfio_msix_disable(vdev);
}
} else {
/* Write everything to QEMU to keep emulated bits correct */
@@ -2162,17 +1127,17 @@ static void vfio_disable_interrupts(VFIOPCIDevice *vdev)
* disable MSI/X and then cleanup by disabling INTx.
*/
if (vdev->interrupt == VFIO_INT_MSIX) {
- vfio_disable_msix(vdev);
+ vfio_msix_disable(vdev);
} else if (vdev->interrupt == VFIO_INT_MSI) {
- vfio_disable_msi(vdev);
+ vfio_msi_disable(vdev);
}
if (vdev->interrupt == VFIO_INT_INTx) {
- vfio_disable_intx(vdev);
+ vfio_intx_disable(vdev);
}
}
-static int vfio_setup_msi(VFIOPCIDevice *vdev, int pos)
+static int vfio_msi_setup(VFIOPCIDevice *vdev, int pos)
{
uint16_t ctrl;
bool msi_64bit, msi_maskbit;
@@ -2188,7 +1153,7 @@ static int vfio_setup_msi(VFIOPCIDevice *vdev, int pos)
msi_maskbit = !!(ctrl & PCI_MSI_FLAGS_MASKBIT);
entries = 1 << ((ctrl & PCI_MSI_FLAGS_QMASK) >> 1);
- trace_vfio_setup_msi(vdev->vbasedev.name, pos);
+ trace_vfio_msi_setup(vdev->vbasedev.name, pos);
ret = msi_init(&vdev->pdev, pos, entries, msi_64bit, msi_maskbit);
if (ret < 0) {
@@ -2211,12 +1176,13 @@ static int vfio_setup_msi(VFIOPCIDevice *vdev, int pos)
* need to first look for where the MSI-X table lives. So we
* unfortunately split MSI-X setup across two functions.
*/
-static int vfio_early_setup_msix(VFIOPCIDevice *vdev)
+static int vfio_msix_early_setup(VFIOPCIDevice *vdev)
{
uint8_t pos;
uint16_t ctrl;
uint32_t table, pba;
int fd = vdev->vbasedev.fd;
+ VFIOMSIXInfo *msix;
pos = pci_find_capability(&vdev->pdev, PCI_CAP_ID_MSIX);
if (!pos) {
@@ -2242,49 +1208,44 @@ static int vfio_early_setup_msix(VFIOPCIDevice *vdev)
table = le32_to_cpu(table);
pba = le32_to_cpu(pba);
- vdev->msix = g_malloc0(sizeof(*(vdev->msix)));
- vdev->msix->table_bar = table & PCI_MSIX_FLAGS_BIRMASK;
- vdev->msix->table_offset = table & ~PCI_MSIX_FLAGS_BIRMASK;
- vdev->msix->pba_bar = pba & PCI_MSIX_FLAGS_BIRMASK;
- vdev->msix->pba_offset = pba & ~PCI_MSIX_FLAGS_BIRMASK;
- vdev->msix->entries = (ctrl & PCI_MSIX_FLAGS_QSIZE) + 1;
+ msix = g_malloc0(sizeof(*msix));
+ msix->table_bar = table & PCI_MSIX_FLAGS_BIRMASK;
+ msix->table_offset = table & ~PCI_MSIX_FLAGS_BIRMASK;
+ msix->pba_bar = pba & PCI_MSIX_FLAGS_BIRMASK;
+ msix->pba_offset = pba & ~PCI_MSIX_FLAGS_BIRMASK;
+ msix->entries = (ctrl & PCI_MSIX_FLAGS_QSIZE) + 1;
/*
* Test the size of the pba_offset variable and catch if it extends outside
* of the specified BAR. If it is the case, we need to apply a hardware
* specific quirk if the device is known or we have a broken configuration.
*/
- if (vdev->msix->pba_offset >=
- vdev->bars[vdev->msix->pba_bar].region.size) {
-
- PCIDevice *pdev = &vdev->pdev;
- uint16_t vendor = pci_get_word(pdev->config + PCI_VENDOR_ID);
- uint16_t device = pci_get_word(pdev->config + PCI_DEVICE_ID);
-
+ if (msix->pba_offset >= vdev->bars[msix->pba_bar].region.size) {
/*
* Chelsio T5 Virtual Function devices are encoded as 0x58xx for T5
* adapters. The T5 hardware returns an incorrect value of 0x8000 for
* the VF PBA offset while the BAR itself is only 8k. The correct value
* is 0x1000, so we hard code that here.
*/
- if (vendor == PCI_VENDOR_ID_CHELSIO && (device & 0xff00) == 0x5800) {
- vdev->msix->pba_offset = 0x1000;
+ if (vdev->vendor_id == PCI_VENDOR_ID_CHELSIO &&
+ (vdev->device_id & 0xff00) == 0x5800) {
+ msix->pba_offset = 0x1000;
} else {
error_report("vfio: Hardware reports invalid configuration, "
"MSIX PBA outside of specified BAR");
+ g_free(msix);
return -EINVAL;
}
}
- trace_vfio_early_setup_msix(vdev->vbasedev.name, pos,
- vdev->msix->table_bar,
- vdev->msix->table_offset,
- vdev->msix->entries);
+ trace_vfio_msix_early_setup(vdev->vbasedev.name, pos, msix->table_bar,
+ msix->table_offset, msix->entries);
+ vdev->msix = msix;
return 0;
}
-static int vfio_setup_msix(VFIOPCIDevice *vdev, int pos)
+static int vfio_msix_setup(VFIOPCIDevice *vdev, int pos)
{
int ret;
@@ -2707,14 +1668,14 @@ static int vfio_add_std_cap(VFIOPCIDevice *vdev, uint8_t pos)
switch (cap_id) {
case PCI_CAP_ID_MSI:
- ret = vfio_setup_msi(vdev, pos);
+ ret = vfio_msi_setup(vdev, pos);
break;
case PCI_CAP_ID_EXP:
vfio_check_pcie_flr(vdev, pos);
ret = vfio_setup_pcie_cap(vdev, pos, size);
break;
case PCI_CAP_ID_MSIX:
- ret = vfio_setup_msix(vdev, pos);
+ ret = vfio_msix_setup(vdev, pos);
break;
case PCI_CAP_ID_PM:
vfio_check_pm_reset(vdev, pos);
@@ -2792,7 +1753,7 @@ static void vfio_pci_pre_reset(VFIOPCIDevice *vdev)
static void vfio_pci_post_reset(VFIOPCIDevice *vdev)
{
- vfio_enable_intx(vdev);
+ vfio_intx_enable(vdev);
}
static bool vfio_pci_host_match(PCIHostDeviceAddress *host1,
@@ -3016,7 +1977,7 @@ static void vfio_pci_compute_needs_reset(VFIODevice *vbasedev)
static VFIODeviceOps vfio_pci_ops = {
.vfio_compute_needs_reset = vfio_pci_compute_needs_reset,
.vfio_hot_reset_multi = vfio_pci_hot_reset_multi,
- .vfio_eoi = vfio_eoi,
+ .vfio_eoi = vfio_intx_eoi,
};
static int vfio_populate_device(VFIOPCIDevice *vdev)
@@ -3351,162 +2312,6 @@ static void vfio_unregister_req_notifier(VFIOPCIDevice *vdev)
vdev->req_enabled = false;
}
-/*
- * AMD Radeon PCI config reset, based on Linux:
- * drivers/gpu/drm/radeon/ci_smc.c:ci_is_smc_running()
- * drivers/gpu/drm/radeon/radeon_device.c:radeon_pci_config_reset
- * drivers/gpu/drm/radeon/ci_smc.c:ci_reset_smc()
- * drivers/gpu/drm/radeon/ci_smc.c:ci_stop_smc_clock()
- * IDs: include/drm/drm_pciids.h
- * Registers: http://cgit.freedesktop.org/~agd5f/linux/commit/?id=4e2aa447f6f0
- *
- * Bonaire and Hawaii GPUs do not respond to a bus reset. This is a bug in the
- * hardware that should be fixed on future ASICs. The symptom of this is that
- * once the accerlated driver loads, Windows guests will bsod on subsequent
- * attmpts to load the driver, such as after VM reset or shutdown/restart. To
- * work around this, we do an AMD specific PCI config reset, followed by an SMC
- * reset. The PCI config reset only works if SMC firmware is running, so we
- * have a dependency on the state of the device as to whether this reset will
- * be effective. There are still cases where we won't be able to kick the
- * device into working, but this greatly improves the usability overall. The
- * config reset magic is relatively common on AMD GPUs, but the setup and SMC
- * poking is largely ASIC specific.
- */
-static bool vfio_radeon_smc_is_running(VFIOPCIDevice *vdev)
-{
- uint32_t clk, pc_c;
-
- /*
- * Registers 200h and 204h are index and data registers for accessing
- * indirect configuration registers within the device.
- */
- vfio_region_write(&vdev->bars[5].region, 0x200, 0x80000004, 4);
- clk = vfio_region_read(&vdev->bars[5].region, 0x204, 4);
- vfio_region_write(&vdev->bars[5].region, 0x200, 0x80000370, 4);
- pc_c = vfio_region_read(&vdev->bars[5].region, 0x204, 4);
-
- return (!(clk & 1) && (0x20100 <= pc_c));
-}
-
-/*
- * The scope of a config reset is controlled by a mode bit in the misc register
- * and a fuse, exposed as a bit in another register. The fuse is the default
- * (0 = GFX, 1 = whole GPU), the misc bit is a toggle, with the forumula
- * scope = !(misc ^ fuse), where the resulting scope is defined the same as
- * the fuse. A truth table therefore tells us that if misc == fuse, we need
- * to flip the value of the bit in the misc register.
- */
-static void vfio_radeon_set_gfx_only_reset(VFIOPCIDevice *vdev)
-{
- uint32_t misc, fuse;
- bool a, b;
-
- vfio_region_write(&vdev->bars[5].region, 0x200, 0xc00c0000, 4);
- fuse = vfio_region_read(&vdev->bars[5].region, 0x204, 4);
- b = fuse & 64;
-
- vfio_region_write(&vdev->bars[5].region, 0x200, 0xc0000010, 4);
- misc = vfio_region_read(&vdev->bars[5].region, 0x204, 4);
- a = misc & 2;
-
- if (a == b) {
- vfio_region_write(&vdev->bars[5].region, 0x204, misc ^ 2, 4);
- vfio_region_read(&vdev->bars[5].region, 0x204, 4); /* flush */
- }
-}
-
-static int vfio_radeon_reset(VFIOPCIDevice *vdev)
-{
- PCIDevice *pdev = &vdev->pdev;
- int i, ret = 0;
- uint32_t data;
-
- /* Defer to a kernel implemented reset */
- if (vdev->vbasedev.reset_works) {
- return -ENODEV;
- }
-
- /* Enable only memory BAR access */
- vfio_pci_write_config(pdev, PCI_COMMAND, PCI_COMMAND_MEMORY, 2);
-
- /* Reset only works if SMC firmware is loaded and running */
- if (!vfio_radeon_smc_is_running(vdev)) {
- ret = -EINVAL;
- goto out;
- }
-
- /* Make sure only the GFX function is reset */
- vfio_radeon_set_gfx_only_reset(vdev);
-
- /* AMD PCI config reset */
- vfio_pci_write_config(pdev, 0x7c, 0x39d5e86b, 4);
- usleep(100);
-
- /* Read back the memory size to make sure we're out of reset */
- for (i = 0; i < 100000; i++) {
- if (vfio_region_read(&vdev->bars[5].region, 0x5428, 4) != 0xffffffff) {
- break;
- }
- usleep(1);
- }
-
- /* Reset SMC */
- vfio_region_write(&vdev->bars[5].region, 0x200, 0x80000000, 4);
- data = vfio_region_read(&vdev->bars[5].region, 0x204, 4);
- data |= 1;
- vfio_region_write(&vdev->bars[5].region, 0x204, data, 4);
-
- /* Disable SMC clock */
- vfio_region_write(&vdev->bars[5].region, 0x200, 0x80000004, 4);
- data = vfio_region_read(&vdev->bars[5].region, 0x204, 4);
- data |= 1;
- vfio_region_write(&vdev->bars[5].region, 0x204, data, 4);
-
-out:
- /* Restore PCI command register */
- vfio_pci_write_config(pdev, PCI_COMMAND, 0, 2);
-
- return ret;
-}
-
-static void vfio_setup_resetfn(VFIOPCIDevice *vdev)
-{
- PCIDevice *pdev = &vdev->pdev;
- uint16_t vendor, device;
-
- vendor = pci_get_word(pdev->config + PCI_VENDOR_ID);
- device = pci_get_word(pdev->config + PCI_DEVICE_ID);
-
- switch (vendor) {
- case 0x1002:
- switch (device) {
- /* Bonaire */
- case 0x6649: /* Bonaire [FirePro W5100] */
- case 0x6650:
- case 0x6651:
- case 0x6658: /* Bonaire XTX [Radeon R7 260X] */
- case 0x665c: /* Bonaire XT [Radeon HD 7790/8770 / R9 260 OEM] */
- case 0x665d: /* Bonaire [Radeon R7 200 Series] */
- /* Hawaii */
- case 0x67A0: /* Hawaii XT GL [FirePro W9100] */
- case 0x67A1: /* Hawaii PRO GL [FirePro W8100] */
- case 0x67A2:
- case 0x67A8:
- case 0x67A9:
- case 0x67AA:
- case 0x67B0: /* Hawaii XT [Radeon R9 290X] */
- case 0x67B1: /* Hawaii PRO [Radeon R9 290] */
- case 0x67B8:
- case 0x67B9:
- case 0x67BA:
- case 0x67BE:
- vdev->resetfn = vfio_radeon_reset;
- break;
- }
- break;
- }
-}
-
static int vfio_initfn(PCIDevice *pdev)
{
VFIOPCIDevice *vdev = DO_UPCAST(VFIOPCIDevice, pdev, pdev);
@@ -3599,6 +2404,54 @@ static int vfio_initfn(PCIDevice *pdev)
/* QEMU can choose to expose the ROM or not */
memset(vdev->emulated_config_bits + PCI_ROM_ADDRESS, 0xff, 4);
+ /*
+ * The PCI spec reserves vendor ID 0xffff as an invalid value. The
+ * device ID is managed by the vendor and need only be a 16-bit value.
+ * Allow any 16-bit value for subsystem so they can be hidden or changed.
+ */
+ if (vdev->vendor_id != PCI_ANY_ID) {
+ if (vdev->vendor_id >= 0xffff) {
+ error_report("vfio: Invalid PCI vendor ID provided");
+ return -EINVAL;
+ }
+ vfio_add_emulated_word(vdev, PCI_VENDOR_ID, vdev->vendor_id, ~0);
+ trace_vfio_pci_emulated_vendor_id(vdev->vbasedev.name, vdev->vendor_id);
+ } else {
+ vdev->vendor_id = pci_get_word(pdev->config + PCI_VENDOR_ID);
+ }
+
+ if (vdev->device_id != PCI_ANY_ID) {
+ if (vdev->device_id > 0xffff) {
+ error_report("vfio: Invalid PCI device ID provided");
+ return -EINVAL;
+ }
+ vfio_add_emulated_word(vdev, PCI_DEVICE_ID, vdev->device_id, ~0);
+ trace_vfio_pci_emulated_device_id(vdev->vbasedev.name, vdev->device_id);
+ } else {
+ vdev->device_id = pci_get_word(pdev->config + PCI_DEVICE_ID);
+ }
+
+ if (vdev->sub_vendor_id != PCI_ANY_ID) {
+ if (vdev->sub_vendor_id > 0xffff) {
+ error_report("vfio: Invalid PCI subsystem vendor ID provided");
+ return -EINVAL;
+ }
+ vfio_add_emulated_word(vdev, PCI_SUBSYSTEM_VENDOR_ID,
+ vdev->sub_vendor_id, ~0);
+ trace_vfio_pci_emulated_sub_vendor_id(vdev->vbasedev.name,
+ vdev->sub_vendor_id);
+ }
+
+ if (vdev->sub_device_id != PCI_ANY_ID) {
+ if (vdev->sub_device_id > 0xffff) {
+ error_report("vfio: Invalid PCI subsystem device ID provided");
+ return -EINVAL;
+ }
+ vfio_add_emulated_word(vdev, PCI_SUBSYSTEM_ID, vdev->sub_device_id, ~0);
+ trace_vfio_pci_emulated_sub_device_id(vdev->vbasedev.name,
+ vdev->sub_device_id);
+ }
+
/* QEMU can change multi-function devices to single function, or reverse */
vdev->emulated_config_bits[PCI_HEADER_TYPE] =
PCI_HEADER_TYPE_MULTI_FUNCTION;
@@ -3620,7 +2473,7 @@ static int vfio_initfn(PCIDevice *pdev)
vfio_pci_size_rom(vdev);
- ret = vfio_early_setup_msix(vdev);
+ ret = vfio_msix_early_setup(vdev);
if (ret) {
return ret;
}
@@ -3646,8 +2499,8 @@ static int vfio_initfn(PCIDevice *pdev)
if (vfio_pci_read_config(&vdev->pdev, PCI_INTERRUPT_PIN, 1)) {
vdev->intx.mmap_timer = timer_new_ms(QEMU_CLOCK_VIRTUAL,
vfio_intx_mmap_enable, vdev);
- pci_device_set_intx_routing_notifier(&vdev->pdev, vfio_update_irq);
- ret = vfio_enable_intx(vdev);
+ pci_device_set_intx_routing_notifier(&vdev->pdev, vfio_intx_update);
+ ret = vfio_intx_enable(vdev);
if (ret) {
goto out_teardown;
}
@@ -3655,7 +2508,7 @@ static int vfio_initfn(PCIDevice *pdev)
vfio_register_err_notifier(vdev);
vfio_register_req_notifier(vdev);
- vfio_setup_resetfn(vdev);
+ vfio_setup_resetfn_quirk(vdev);
return 0;
@@ -3748,7 +2601,16 @@ static Property vfio_pci_dev_properties[] = {
VFIO_FEATURE_ENABLE_VGA_BIT, false),
DEFINE_PROP_BIT("x-req", VFIOPCIDevice, features,
VFIO_FEATURE_ENABLE_REQ_BIT, true),
- DEFINE_PROP_BOOL("x-mmap", VFIOPCIDevice, vbasedev.allow_mmap, true),
+ DEFINE_PROP_BOOL("x-no-mmap", VFIOPCIDevice, vbasedev.no_mmap, false),
+ DEFINE_PROP_BOOL("x-no-kvm-intx", VFIOPCIDevice, no_kvm_intx, false),
+ DEFINE_PROP_BOOL("x-no-kvm-msi", VFIOPCIDevice, no_kvm_msi, false),
+ DEFINE_PROP_BOOL("x-no-kvm-msix", VFIOPCIDevice, no_kvm_msix, false),
+ DEFINE_PROP_UINT32("x-pci-vendor-id", VFIOPCIDevice, vendor_id, PCI_ANY_ID),
+ DEFINE_PROP_UINT32("x-pci-device-id", VFIOPCIDevice, device_id, PCI_ANY_ID),
+ DEFINE_PROP_UINT32("x-pci-sub-vendor-id", VFIOPCIDevice,
+ sub_vendor_id, PCI_ANY_ID),
+ DEFINE_PROP_UINT32("x-pci-sub-device-id", VFIOPCIDevice,
+ sub_device_id, PCI_ANY_ID),
/*
* TODO - support passed fds... is this necessary?
* DEFINE_PROP_STRING("vfiofd", VFIOPCIDevice, vfiofd_name),
diff --git a/hw/vfio/pci.h b/hw/vfio/pci.h
new file mode 100644
index 0000000..f004d52
--- /dev/null
+++ b/hw/vfio/pci.h
@@ -0,0 +1,159 @@
+/*
+ * vfio based device assignment support - PCI devices
+ *
+ * Copyright Red Hat, Inc. 2012-2015
+ *
+ * Authors:
+ * Alex Williamson <alex.williamson@redhat.com>
+ *
+ * This work is licensed under the terms of the GNU GPL, version 2. See
+ * the COPYING file in the top-level directory.
+ */
+#ifndef HW_VFIO_VFIO_PCI_H
+#define HW_VFIO_VFIO_PCI_H
+
+#include "qemu-common.h"
+#include "exec/memory.h"
+#include "hw/pci/pci.h"
+#include "hw/vfio/vfio-common.h"
+#include "qemu/event_notifier.h"
+#include "qemu/queue.h"
+#include "qemu/timer.h"
+
+#define PCI_ANY_ID (~0)
+
+struct VFIOPCIDevice;
+
+typedef struct VFIOQuirk {
+ QLIST_ENTRY(VFIOQuirk) next;
+ void *data;
+ int nr_mem;
+ MemoryRegion *mem;
+} VFIOQuirk;
+
+typedef struct VFIOBAR {
+ VFIORegion region;
+ bool ioport;
+ bool mem64;
+ QLIST_HEAD(, VFIOQuirk) quirks;
+} VFIOBAR;
+
+typedef struct VFIOVGARegion {
+ MemoryRegion mem;
+ off_t offset;
+ int nr;
+ QLIST_HEAD(, VFIOQuirk) quirks;
+} VFIOVGARegion;
+
+typedef struct VFIOVGA {
+ off_t fd_offset;
+ int fd;
+ VFIOVGARegion region[QEMU_PCI_VGA_NUM_REGIONS];
+} VFIOVGA;
+
+typedef struct VFIOINTx {
+ bool pending; /* interrupt pending */
+ bool kvm_accel; /* set when QEMU bypass through KVM enabled */
+ uint8_t pin; /* which pin to pull for qemu_set_irq */
+ EventNotifier interrupt; /* eventfd triggered on interrupt */
+ EventNotifier unmask; /* eventfd for unmask on QEMU bypass */
+ PCIINTxRoute route; /* routing info for QEMU bypass */
+ uint32_t mmap_timeout; /* delay to re-enable mmaps after interrupt */
+ QEMUTimer *mmap_timer; /* enable mmaps after periods w/o interrupts */
+} VFIOINTx;
+
+typedef struct VFIOMSIVector {
+ /*
+ * Two interrupt paths are configured per vector. The first, is only used
+ * for interrupts injected via QEMU. This is typically the non-accel path,
+ * but may also be used when we want QEMU to handle masking and pending
+ * bits. The KVM path bypasses QEMU and is therefore higher performance,
+ * but requires masking at the device. virq is used to track the MSI route
+ * through KVM, thus kvm_interrupt is only available when virq is set to a
+ * valid (>= 0) value.
+ */
+ EventNotifier interrupt;
+ EventNotifier kvm_interrupt;
+ struct VFIOPCIDevice *vdev; /* back pointer to device */
+ int virq;
+ bool use;
+} VFIOMSIVector;
+
+enum {
+ VFIO_INT_NONE = 0,
+ VFIO_INT_INTx = 1,
+ VFIO_INT_MSI = 2,
+ VFIO_INT_MSIX = 3,
+};
+
+/* Cache of MSI-X setup plus extra mmap and memory region for split BAR map */
+typedef struct VFIOMSIXInfo {
+ uint8_t table_bar;
+ uint8_t pba_bar;
+ uint16_t entries;
+ uint32_t table_offset;
+ uint32_t pba_offset;
+ MemoryRegion mmap_mem;
+ void *mmap;
+} VFIOMSIXInfo;
+
+typedef struct VFIOPCIDevice {
+ PCIDevice pdev;
+ VFIODevice vbasedev;
+ VFIOINTx intx;
+ unsigned int config_size;
+ uint8_t *emulated_config_bits; /* QEMU emulated bits, little-endian */
+ off_t config_offset; /* Offset of config space region within device fd */
+ unsigned int rom_size;
+ off_t rom_offset; /* Offset of ROM region within device fd */
+ void *rom;
+ int msi_cap_size;
+ VFIOMSIVector *msi_vectors;
+ VFIOMSIXInfo *msix;
+ int nr_vectors; /* Number of MSI/MSIX vectors currently in use */
+ int interrupt; /* Current interrupt type */
+ VFIOBAR bars[PCI_NUM_REGIONS - 1]; /* No ROM */
+ VFIOVGA vga; /* 0xa0000, 0x3b0, 0x3c0 */
+ PCIHostDeviceAddress host;
+ EventNotifier err_notifier;
+ EventNotifier req_notifier;
+ int (*resetfn)(struct VFIOPCIDevice *);
+ uint32_t vendor_id;
+ uint32_t device_id;
+ uint32_t sub_vendor_id;
+ uint32_t sub_device_id;
+ uint32_t features;
+#define VFIO_FEATURE_ENABLE_VGA_BIT 0
+#define VFIO_FEATURE_ENABLE_VGA (1 << VFIO_FEATURE_ENABLE_VGA_BIT)
+#define VFIO_FEATURE_ENABLE_REQ_BIT 1
+#define VFIO_FEATURE_ENABLE_REQ (1 << VFIO_FEATURE_ENABLE_REQ_BIT)
+ int32_t bootindex;
+ uint8_t pm_cap;
+ bool has_vga;
+ bool pci_aer;
+ bool req_enabled;
+ bool has_flr;
+ bool has_pm_reset;
+ bool rom_read_failed;
+ bool no_kvm_intx;
+ bool no_kvm_msi;
+ bool no_kvm_msix;
+} VFIOPCIDevice;
+
+uint32_t vfio_pci_read_config(PCIDevice *pdev, uint32_t addr, int len);
+void vfio_pci_write_config(PCIDevice *pdev,
+ uint32_t addr, uint32_t val, int len);
+
+uint64_t vfio_vga_read(void *opaque, hwaddr addr, unsigned size);
+void vfio_vga_write(void *opaque, hwaddr addr, uint64_t data, unsigned size);
+
+bool vfio_blacklist_opt_rom(VFIOPCIDevice *vdev);
+void vfio_vga_quirk_setup(VFIOPCIDevice *vdev);
+void vfio_vga_quirk_teardown(VFIOPCIDevice *vdev);
+void vfio_vga_quirk_free(VFIOPCIDevice *vdev);
+void vfio_bar_quirk_setup(VFIOPCIDevice *vdev, int nr);
+void vfio_bar_quirk_teardown(VFIOPCIDevice *vdev, int nr);
+void vfio_bar_quirk_free(VFIOPCIDevice *vdev, int nr);
+void vfio_setup_resetfn_quirk(VFIOPCIDevice *vdev);
+
+#endif /* HW_VFIO_VFIO_PCI_H */
diff --git a/hw/vfio/platform.c b/hw/vfio/platform.c
index de4ec52..a6726cd 100644
--- a/hw/vfio/platform.c
+++ b/hw/vfio/platform.c
@@ -678,7 +678,7 @@ static const VMStateDescription vfio_platform_vmstate = {
static Property vfio_platform_dev_properties[] = {
DEFINE_PROP_STRING("host", VFIOPlatformDevice, vbasedev.name),
- DEFINE_PROP_BOOL("x-mmap", VFIOPlatformDevice, vbasedev.allow_mmap, true),
+ DEFINE_PROP_BOOL("x-no-mmap", VFIOPlatformDevice, vbasedev.no_mmap, false),
DEFINE_PROP_UINT32("mmap-timeout-ms", VFIOPlatformDevice,
mmap_timeout, 1100),
DEFINE_PROP_BOOL("x-irqfd", VFIOPlatformDevice, irqfd_allowed, true),
diff --git a/include/hw/acpi/acpi-defs.h b/include/hw/acpi/acpi-defs.h
index 2b431e6..c7a03d4 100644
--- a/include/hw/acpi/acpi-defs.h
+++ b/include/hw/acpi/acpi-defs.h
@@ -384,6 +384,15 @@ struct AcpiMadtGenericMsiFrame {
typedef struct AcpiMadtGenericMsiFrame AcpiMadtGenericMsiFrame;
+struct AcpiMadtGenericRedistributor {
+ ACPI_SUB_HEADER_DEF
+ uint16_t reserved;
+ uint64_t base_address;
+ uint32_t range_length;
+} QEMU_PACKED;
+
+typedef struct AcpiMadtGenericRedistributor AcpiMadtGenericRedistributor;
+
/*
* Generic Timer Description Table (GTDT)
*/
diff --git a/include/hw/arm/virt-acpi-build.h b/include/hw/arm/virt-acpi-build.h
index 19b68a4..744b666 100644
--- a/include/hw/arm/virt-acpi-build.h
+++ b/include/hw/arm/virt-acpi-build.h
@@ -32,6 +32,7 @@ typedef struct VirtGuestInfo {
const MemMapEntry *memmap;
const int *irqmap;
bool use_highmem;
+ int gic_version;
} VirtGuestInfo;
diff --git a/include/hw/arm/virt.h b/include/hw/arm/virt.h
index 808753f..f464586 100644
--- a/include/hw/arm/virt.h
+++ b/include/hw/arm/virt.h
@@ -46,6 +46,9 @@ enum {
VIRT_CPUPERIPHS,
VIRT_GIC_DIST,
VIRT_GIC_CPU,
+ VIRT_GIC_V2M,
+ VIRT_GIC_ITS,
+ VIRT_GIC_REDIST,
VIRT_UART,
VIRT_MMIO,
VIRT_RTC,
@@ -54,7 +57,6 @@ enum {
VIRT_PCIE_MMIO,
VIRT_PCIE_PIO,
VIRT_PCIE_ECAM,
- VIRT_GIC_V2M,
VIRT_PLATFORM_BUS,
VIRT_PCIE_MMIO_HIGH,
};
diff --git a/include/hw/intc/arm_gicv3_common.h b/include/hw/intc/arm_gicv3_common.h
new file mode 100644
index 0000000..c2fd8da
--- /dev/null
+++ b/include/hw/intc/arm_gicv3_common.h
@@ -0,0 +1,68 @@
+/*
+ * ARM GIC support
+ *
+ * Copyright (c) 2012 Linaro Limited
+ * Copyright (c) 2015 Huawei.
+ * Written by Peter Maydell
+ * Extended to 64 cores by Shlomo Pongratz
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation, either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License along
+ * with this program; if not, see <http://www.gnu.org/licenses/>.
+ */
+
+#ifndef HW_ARM_GICV3_COMMON_H
+#define HW_ARM_GICV3_COMMON_H
+
+#include "hw/sysbus.h"
+#include "hw/intc/arm_gic_common.h"
+
+typedef struct GICv3State {
+ /*< private >*/
+ SysBusDevice parent_obj;
+ /*< public >*/
+
+ qemu_irq *parent_irq;
+ qemu_irq *parent_fiq;
+
+ MemoryRegion iomem_dist; /* Distributor */
+ MemoryRegion iomem_redist; /* Redistributors */
+
+ uint32_t num_cpu;
+ uint32_t num_irq;
+ uint32_t revision;
+ bool security_extn;
+
+ int dev_fd; /* kvm device fd if backed by kvm vgic support */
+} GICv3State;
+
+#define TYPE_ARM_GICV3_COMMON "arm-gicv3-common"
+#define ARM_GICV3_COMMON(obj) \
+ OBJECT_CHECK(GICv3State, (obj), TYPE_ARM_GICV3_COMMON)
+#define ARM_GICV3_COMMON_CLASS(klass) \
+ OBJECT_CLASS_CHECK(ARMGICv3CommonClass, (klass), TYPE_ARM_GICV3_COMMON)
+#define ARM_GICV3_COMMON_GET_CLASS(obj) \
+ OBJECT_GET_CLASS(ARMGICv3CommonClass, (obj), TYPE_ARM_GICV3_COMMON)
+
+typedef struct ARMGICv3CommonClass {
+ /*< private >*/
+ SysBusDeviceClass parent_class;
+ /*< public >*/
+
+ void (*pre_save)(GICv3State *s);
+ void (*post_load)(GICv3State *s);
+} ARMGICv3CommonClass;
+
+void gicv3_init_irqs_and_mmio(GICv3State *s, qemu_irq_handler handler,
+ const MemoryRegionOps *ops);
+
+#endif
diff --git a/include/hw/vfio/vfio-common.h b/include/hw/vfio/vfio-common.h
index 59a321d..9b9901f 100644
--- a/include/hw/vfio/vfio-common.h
+++ b/include/hw/vfio/vfio-common.h
@@ -35,11 +35,6 @@
do { } while (0)
#endif
-/* Extra debugging, trap acceleration paths for more logging */
-#define VFIO_ALLOW_KVM_INTX 1
-#define VFIO_ALLOW_KVM_MSI 1
-#define VFIO_ALLOW_KVM_MSIX 1
-
enum {
VFIO_DEVICE_TYPE_PCI = 0,
VFIO_DEVICE_TYPE_PLATFORM = 1,
@@ -102,7 +97,7 @@ typedef struct VFIODevice {
int type;
bool reset_works;
bool needs_reset;
- bool allow_mmap;
+ bool no_mmap;
VFIODeviceOps *ops;
unsigned int num_irqs;
unsigned int num_regions;
diff --git a/include/sysemu/kvm.h b/include/sysemu/kvm.h
index 983e99e..2a58b4d 100644
--- a/include/sysemu/kvm.h
+++ b/include/sysemu/kvm.h
@@ -240,6 +240,32 @@ int kvm_device_ioctl(int fd, int type, ...);
int kvm_vm_check_attr(KVMState *s, uint32_t group, uint64_t attr);
/**
+ * kvm_device_check_attr - check for existence of a specific device attribute
+ * @fd: The device file descriptor
+ * @group: the group
+ * @attr: the attribute of that group to query for
+ *
+ * Returns: 1 if the attribute exists
+ * 0 if the attribute either does not exist or if the vm device
+ * interface is unavailable
+ */
+int kvm_device_check_attr(int fd, uint32_t group, uint64_t attr);
+
+/**
+ * kvm_device_access - set or get value of a specific vm attribute
+ * @fd: The device file descriptor
+ * @group: the group
+ * @attr: the attribute of that group to set or get
+ * @val: pointer to a storage area for the value
+ * @write: true for set and false for get operation
+ *
+ * This function is not allowed to fail. Use kvm_device_check_attr()
+ * in order to check for the availability of optional attributes.
+ */
+void kvm_device_access(int fd, int group, uint64_t attr,
+ void *val, bool write);
+
+/**
* kvm_create_device - create a KVM device for the device control API
* @KVMState: The KVMState pointer
* @type: The KVM device type (see Documentation/virtual/kvm/devices in the
diff --git a/kvm-all.c b/kvm-all.c
index de1924c..0be4615 100644
--- a/kvm-all.c
+++ b/kvm-all.c
@@ -24,6 +24,7 @@
#include "qemu/atomic.h"
#include "qemu/option.h"
#include "qemu/config-file.h"
+#include "qemu/error-report.h"
#include "hw/hw.h"
#include "hw/pci/msi.h"
#include "hw/s390x/adapter.h"
@@ -2008,6 +2009,39 @@ int kvm_vm_check_attr(KVMState *s, uint32_t group, uint64_t attr)
return ret ? 0 : 1;
}
+int kvm_device_check_attr(int dev_fd, uint32_t group, uint64_t attr)
+{
+ struct kvm_device_attr attribute = {
+ .group = group,
+ .attr = attr,
+ .flags = 0,
+ };
+
+ return kvm_device_ioctl(dev_fd, KVM_HAS_DEVICE_ATTR, &attribute) ? 0 : 1;
+}
+
+void kvm_device_access(int fd, int group, uint64_t attr,
+ void *val, bool write)
+{
+ struct kvm_device_attr kvmattr;
+ int err;
+
+ kvmattr.flags = 0;
+ kvmattr.group = group;
+ kvmattr.attr = attr;
+ kvmattr.addr = (uintptr_t)val;
+
+ err = kvm_device_ioctl(fd,
+ write ? KVM_SET_DEVICE_ATTR : KVM_GET_DEVICE_ATTR,
+ &kvmattr);
+ if (err < 0) {
+ error_report("KVM_%s_DEVICE_ATTR failed: %s\n"
+ "Group %d attr 0x%016" PRIx64, write ? "SET" : "GET",
+ strerror(-err), group, attr);
+ abort();
+ }
+}
+
int kvm_has_sync_mmu(void)
{
return kvm_check_extension(kvm_state, KVM_CAP_SYNC_MMU);
diff --git a/target-arm/kvm.c b/target-arm/kvm.c
index b278542..6aadcd8 100644
--- a/target-arm/kvm.c
+++ b/target-arm/kvm.c
@@ -585,18 +585,23 @@ void kvm_arch_init_irq_routing(KVMState *s)
int kvm_arch_irqchip_create(KVMState *s)
{
- int ret;
-
/* If we can create the VGIC using the newer device control API, we
* let the device do this when it initializes itself, otherwise we
* fall back to the old API */
+ return kvm_check_extension(s, KVM_CAP_DEVICE_CTRL);
+}
- ret = kvm_create_device(s, KVM_DEV_TYPE_ARM_VGIC_V2, true);
- if (ret == 0) {
- return 1;
+int kvm_arm_vgic_probe(void)
+{
+ if (kvm_create_device(kvm_state,
+ KVM_DEV_TYPE_ARM_VGIC_V3, true) == 0) {
+ return 3;
+ } else if (kvm_create_device(kvm_state,
+ KVM_DEV_TYPE_ARM_VGIC_V2, true) == 0) {
+ return 2;
+ } else {
+ return 0;
}
-
- return 0;
}
int kvm_arch_fixup_msi_route(struct kvm_irq_routing_entry *route,
diff --git a/target-arm/kvm_arm.h b/target-arm/kvm_arm.h
index b3e0ab7..b516041 100644
--- a/target-arm/kvm_arm.h
+++ b/target-arm/kvm_arm.h
@@ -189,6 +189,15 @@ int kvm_arm_sync_mpstate_to_kvm(ARMCPU *cpu);
*/
int kvm_arm_sync_mpstate_to_qemu(ARMCPU *cpu);
+int kvm_arm_vgic_probe(void);
+
+#else
+
+static inline int kvm_arm_vgic_probe(void)
+{
+ return 0;
+}
+
#endif
static inline const char *gic_class_name(void)
@@ -196,4 +205,14 @@ static inline const char *gic_class_name(void)
return kvm_irqchip_in_kernel() ? "kvm-arm-gic" : "arm_gic";
}
+/**
+ * gicv3_class_name
+ *
+ * Return name of GICv3 class to use depending on whether KVM acceleration is
+ * in use. May throw an error if the chosen implementation is not available.
+ *
+ * Returns: class name to use
+ */
+const char *gicv3_class_name(void);
+
#endif
diff --git a/target-arm/machine.c b/target-arm/machine.c
index 32adfe7..36a0d15 100644
--- a/target-arm/machine.c
+++ b/target-arm/machine.c
@@ -1,5 +1,6 @@
#include "hw/hw.h"
#include "hw/boards.h"
+#include "qemu/error-report.h"
#include "sysemu/kvm.h"
#include "kvm_arm.h"
#include "internals.h"
@@ -328,3 +329,20 @@ const VMStateDescription vmstate_arm_cpu = {
NULL
}
};
+
+const char *gicv3_class_name(void)
+{
+ if (kvm_irqchip_in_kernel()) {
+#ifdef TARGET_AARCH64
+ return "kvm-arm-gicv3";
+#else
+ error_report("KVM GICv3 acceleration is not supported on this "
+ "platform\n");
+#endif
+ } else {
+ /* TODO: Software emulation is not implemented yet */
+ error_report("KVM is currently required for GICv3 emulation\n");
+ }
+
+ exit(1);
+}
diff --git a/trace-events b/trace-events
index 88a2f14..25c53e0 100644
--- a/trace-events
+++ b/trace-events
@@ -1522,56 +1522,30 @@ xen_pv_mmio_write(uint64_t addr) "WARNING: write to Xen PV Device MMIO space (ad
pci_cfg_read(const char *dev, unsigned devid, unsigned fnid, unsigned offs, unsigned val) "%s %02u:%u @0x%x -> 0x%x"
pci_cfg_write(const char *dev, unsigned devid, unsigned fnid, unsigned offs, unsigned val) "%s %02u:%u @0x%x <- 0x%x"
-# hw/vfio/vfio-pci.c
+# hw/vfio/pci.c
vfio_intx_interrupt(const char *name, char line) " (%s) Pin %c"
-vfio_eoi(const char *name) " (%s) EOI"
-vfio_enable_intx_kvm(const char *name) " (%s) KVM INTx accel enabled"
-vfio_disable_intx_kvm(const char *name) " (%s) KVM INTx accel disabled"
-vfio_update_irq(const char *name, int new_irq, int target_irq) " (%s) IRQ moved %d -> %d"
-vfio_enable_intx(const char *name) " (%s)"
-vfio_disable_intx(const char *name) " (%s)"
+vfio_intx_eoi(const char *name) " (%s) EOI"
+vfio_intx_enable_kvm(const char *name) " (%s) KVM INTx accel enabled"
+vfio_intx_disable_kvm(const char *name) " (%s) KVM INTx accel disabled"
+vfio_intx_update(const char *name, int new_irq, int target_irq) " (%s) IRQ moved %d -> %d"
+vfio_intx_enable(const char *name) " (%s)"
+vfio_intx_disable(const char *name) " (%s)"
vfio_msi_interrupt(const char *name, int index, uint64_t addr, int data) " (%s) vector %d 0x%"PRIx64"/0x%x"
vfio_msix_vector_do_use(const char *name, int index) " (%s) vector %d used"
vfio_msix_vector_release(const char *name, int index) " (%s) vector %d released"
-vfio_enable_msix(const char *name) " (%s)"
-vfio_enable_msi(const char *name, int nr_vectors) " (%s) Enabled %d MSI vectors"
-vfio_disable_msix(const char *name) " (%s)"
-vfio_disable_msi(const char *name) " (%s)"
+vfio_msix_enable(const char *name) " (%s)"
+vfio_msix_disable(const char *name) " (%s)"
+vfio_msi_enable(const char *name, int nr_vectors) " (%s) Enabled %d MSI vectors"
+vfio_msi_disable(const char *name) " (%s)"
vfio_pci_load_rom(const char *name, unsigned long size, unsigned long offset, unsigned long flags) "Device %s ROM:\n size: 0x%lx, offset: 0x%lx, flags: 0x%lx"
vfio_rom_read(const char *name, uint64_t addr, int size, uint64_t data) " (%s, 0x%"PRIx64", 0x%x) = 0x%"PRIx64
vfio_pci_size_rom(const char *name, int size) "%s ROM size 0x%x"
vfio_vga_write(uint64_t addr, uint64_t data, int size) " (0x%"PRIx64", 0x%"PRIx64", %d)"
vfio_vga_read(uint64_t addr, int size, uint64_t data) " (0x%"PRIx64", %d) = 0x%"PRIx64
-# remove ) =
-vfio_generic_window_quirk_read(const char * region_name, const char *name, int index, uint64_t addr, int size, uint64_t data) "%s read(%s:BAR%d+0x%"PRIx64", %d = 0x%"PRIx64
-## remove )
-vfio_generic_window_quirk_write(const char * region_name, const char *name, int index, uint64_t addr, uint64_t data, int size) "%s write(%s:BAR%d+0x%"PRIx64", 0x%"PRIx64", %d"
-# remove ) =
-vfio_generic_quirk_read(const char * region_name, const char *name, int index, uint64_t addr, int size, uint64_t data) "%s read(%s:BAR%d+0x%"PRIx64", %d = 0x%"PRIx64
-# remove )
-vfio_generic_quirk_write(const char * region_name, const char *name, int index, uint64_t addr, uint64_t data, int size) "%s write(%s:BAR%d+0x%"PRIx64", 0x%"PRIx64", %d"
-vfio_ati_3c3_quirk_read(uint64_t data) " (0x3c3, 1) = 0x%"PRIx64
-vfio_vga_probe_ati_3c3_quirk(const char *name) "Enabled ATI/AMD quirk 0x3c3 BAR4for device %s"
-vfio_probe_ati_bar4_window_quirk(const char *name) "Enabled ATI/AMD BAR4 window quirk for device %s"
-#issue with )
-vfio_rtl8168_window_quirk_read_fake(const char *region_name, const char *name) "%s fake read(%s"
-vfio_rtl8168_window_quirk_read_table(const char *region_name, const char *name) "%s MSI-X table read(%s"
-vfio_rtl8168_window_quirk_read_direct(const char *region_name, const char *name) "%s direct read(%s"
-vfio_rtl8168_window_quirk_write_table(const char *region_name, const char *name) "%s MSI-X table write(%s"
-vfio_rtl8168_window_quirk_write_direct(const char *region_name, const char *name) "%s direct write(%s"
-vfio_probe_rtl8168_bar2_window_quirk(const char *name) "Enabled RTL8168 BAR2 window quirk for device %s"
-vfio_probe_ati_bar2_4000_quirk(const char *name) "Enabled ATI/AMD BAR2 0x4000 quirk for device %s"
-vfio_nvidia_3d0_quirk_read(int size, uint64_t data) " (0x3d0, %d) = 0x%"PRIx64
-vfio_nvidia_3d0_quirk_write(uint64_t data, int size) " (0x3d0, 0x%"PRIx64", %d)"
-vfio_vga_probe_nvidia_3d0_quirk(const char *name) "Enabled NVIDIA VGA 0x3d0 quirk for device %s"
-vfio_probe_nvidia_bar5_window_quirk(const char *name) "Enabled NVIDIA BAR5 window quirk for device %s"
-vfio_probe_nvidia_bar0_88000_quirk(const char *name) "Enabled NVIDIA BAR0 0x88000 quirk for device %s"
-vfio_probe_nvidia_bar0_1800_quirk_id(int id) "Nvidia NV%02x"
-vfio_probe_nvidia_bar0_1800_quirk(const char *name) "Enabled NVIDIA BAR0 0x1800 quirk for device %s"
vfio_pci_read_config(const char *name, int addr, int len, int val) " (%s, @0x%x, len=0x%x) %x"
vfio_pci_write_config(const char *name, int addr, int val, int len) " (%s, @0x%x, 0x%x, len=0x%x)"
-vfio_setup_msi(const char *name, int pos) "%s PCI MSI CAP @0x%x"
-vfio_early_setup_msix(const char *name, int pos, int table_bar, int offset, int entries) "%s PCI MSI-X CAP @0x%x, BAR %d, offset 0x%x, entries %d"
+vfio_msi_setup(const char *name, int pos) "%s PCI MSI CAP @0x%x"
+vfio_msix_early_setup(const char *name, int pos, int table_bar, int offset, int entries) "%s PCI MSI-X CAP @0x%x, BAR %d, offset 0x%x, entries %d"
vfio_check_pcie_flr(const char *name) "%s Supports FLR via PCIe cap"
vfio_check_pm_reset(const char *name) "%s Supports PM reset"
vfio_check_af_flr(const char *name) "%s Supports FLR via AF cap"
@@ -1586,6 +1560,41 @@ vfio_initfn(const char *name, int group_id) " (%s) group %d"
vfio_pci_reset(const char *name) " (%s)"
vfio_pci_reset_flr(const char *name) "%s FLR/VFIO_DEVICE_RESET"
vfio_pci_reset_pm(const char *name) "%s PCI PM Reset"
+vfio_pci_emulated_vendor_id(const char *name, uint16_t val) "%s %04x"
+vfio_pci_emulated_device_id(const char *name, uint16_t val) "%s %04x"
+vfio_pci_emulated_sub_vendor_id(const char *name, uint16_t val) "%s %04x"
+vfio_pci_emulated_sub_device_id(const char *name, uint16_t val) "%s %04x"
+
+# hw/vfio/pci-quirks.
+vfio_quirk_rom_blacklisted(const char *name, uint16_t vid, uint16_t did) "%s %04x:%04x"
+vfio_quirk_generic_window_address_write(const char *name, const char * region_name, uint64_t data) "%s %s 0x%"PRIx64
+vfio_quirk_generic_window_data_read(const char *name, const char * region_name, uint64_t data) "%s %s 0x%"PRIx64
+vfio_quirk_generic_window_data_write(const char *name, const char * region_name, uint64_t data) "%s %s 0x%"PRIx64
+vfio_quirk_generic_mirror_read(const char *name, const char * region_name, uint64_t addr, uint64_t data) "%s %s 0x%"PRIx64": 0x%"PRIx64
+vfio_quirk_generic_mirror_write(const char *name, const char * region_name, uint64_t addr, uint64_t data) "%s %s 0x%"PRIx64": 0x%"PRIx64
+vfio_quirk_ati_3c3_read(const char *name, uint64_t data) "%s 0x%"PRIx64
+vfio_quirk_ati_3c3_probe(const char *name) "%s"
+vfio_quirk_ati_bar4_probe(const char *name) "%s"
+vfio_quirk_ati_bar2_probe(const char *name) "%s"
+vfio_quirk_nvidia_3d0_state(const char *name, const char *state) "%s %s"
+vfio_quirk_nvidia_3d0_read(const char *name, uint8_t offset, unsigned size, uint64_t val) " (%s, @0x%x, len=0x%x) %"PRIx64
+vfio_quirk_nvidia_3d0_write(const char *name, uint8_t offset, uint64_t data, unsigned size) "(%s, @0x%x, 0x%"PRIx64", len=0x%x)"
+vfio_quirk_nvidia_3d0_probe(const char *name) "%s"
+vfio_quirk_nvidia_bar5_state(const char *name, const char *state) "%s %s"
+vfio_quirk_nvidia_bar5_probe(const char *name) "%s"
+vfio_quirk_nvidia_bar0_msi_ack(const char *name) "%s"
+vfio_quirk_nvidia_bar0_probe(const char *name) "%s"
+vfio_quirk_rtl8168_fake_latch(const char *name, uint64_t val) "%s 0x%"PRIx64
+vfio_quirk_rtl8168_msix_write(const char *name, uint16_t offset, uint64_t val) "%s MSI-X table write[0x%x]: 0x%"PRIx64
+vfio_quirk_rtl8168_msix_read(const char *name, uint16_t offset, uint64_t val) "%s MSI-X table read[0x%x]: 0x%"PRIx64
+vfio_quirk_rtl8168_probe(const char *name) "%s"
+
+vfio_quirk_ati_bonaire_reset_skipped(const char *name) "%s"
+vfio_quirk_ati_bonaire_reset_no_smc(const char *name) "%s"
+vfio_quirk_ati_bonaire_reset_timeout(const char *name) "%s"
+vfio_quirk_ati_bonaire_reset_done(const char *name) "%s"
+vfio_quirk_ati_bonaire_reset(const char *name) "%s"
+
# hw/vfio/vfio-common.c
vfio_region_write(const char *name, int index, uint64_t addr, uint64_t data, unsigned size) " (%s:region%d+0x%"PRIx64", 0x%"PRIx64 ", %d)"