aboutsummaryrefslogtreecommitdiff
path: root/hw/pci
diff options
context:
space:
mode:
Diffstat (limited to 'hw/pci')
-rw-r--r--hw/pci/msi.c2
-rw-r--r--hw/pci/msix.c15
-rw-r--r--hw/pci/pci-hmp-cmds.c28
-rw-r--r--hw/pci/pci-stub.c6
-rw-r--r--hw/pci/pci.c541
-rw-r--r--hw/pci/pci_bridge.c12
-rw-r--r--hw/pci/pci_host.c15
-rw-r--r--hw/pci/pcie.c203
-rw-r--r--hw/pci/pcie_port.c26
-rw-r--r--hw/pci/pcie_sriov.c413
-rw-r--r--hw/pci/trace-events8
11 files changed, 996 insertions, 273 deletions
diff --git a/hw/pci/msi.c b/hw/pci/msi.c
index 8104ac1..b9f5b45 100644
--- a/hw/pci/msi.c
+++ b/hw/pci/msi.c
@@ -23,7 +23,7 @@
#include "hw/xen/xen.h"
#include "qemu/range.h"
#include "qapi/error.h"
-#include "sysemu/xen.h"
+#include "system/xen.h"
#include "hw/i386/kvm/xen_evtchn.h"
diff --git a/hw/pci/msix.c b/hw/pci/msix.c
index 487e498..8c7f670 100644
--- a/hw/pci/msix.c
+++ b/hw/pci/msix.c
@@ -15,11 +15,12 @@
*/
#include "qemu/osdep.h"
+#include "qemu/log.h"
#include "hw/pci/msi.h"
#include "hw/pci/msix.h"
#include "hw/pci/pci.h"
#include "hw/xen/xen.h"
-#include "sysemu/xen.h"
+#include "system/xen.h"
#include "migration/qemu-file-types.h"
#include "migration/vmstate.h"
#include "qemu/range.h"
@@ -71,7 +72,7 @@ static uint8_t *msix_pending_byte(PCIDevice *dev, int vector)
return dev->msix_pba + vector / 8;
}
-static int msix_is_pending(PCIDevice *dev, int vector)
+int msix_is_pending(PCIDevice *dev, unsigned int vector)
{
return *msix_pending_byte(dev, vector) & msix_pending_mask(vector);
}
@@ -250,7 +251,7 @@ static uint64_t msix_pba_mmio_read(void *opaque, hwaddr addr,
PCIDevice *dev = opaque;
if (dev->msix_vector_poll_notifier) {
unsigned vector_start = addr * 8;
- unsigned vector_end = MIN(addr + size * 8, dev->msix_entries_nr);
+ unsigned vector_end = MIN((addr + size) * 8, dev->msix_entries_nr);
dev->msix_vector_poll_notifier(dev, vector_start, vector_end);
}
@@ -260,6 +261,14 @@ static uint64_t msix_pba_mmio_read(void *opaque, hwaddr addr,
static void msix_pba_mmio_write(void *opaque, hwaddr addr,
uint64_t val, unsigned size)
{
+ PCIDevice *dev = opaque;
+
+ qemu_log_mask(LOG_GUEST_ERROR,
+ "PCI [%s:%02x:%02x.%x] attempt to write to MSI-X "
+ "PBA at 0x%" FMT_PCIBUS ", ignoring.\n",
+ pci_root_bus_path(dev), pci_dev_bus_num(dev),
+ PCI_SLOT(dev->devfn), PCI_FUNC(dev->devfn),
+ addr);
}
static const MemoryRegionOps msix_pba_mmio_ops = {
diff --git a/hw/pci/pci-hmp-cmds.c b/hw/pci/pci-hmp-cmds.c
index b09fce9..a5f6483 100644
--- a/hw/pci/pci-hmp-cmds.c
+++ b/hw/pci/pci-hmp-cmds.c
@@ -20,7 +20,7 @@
#include "monitor/monitor.h"
#include "pci-internal.h"
#include "qapi/error.h"
-#include "qapi/qmp/qdict.h"
+#include "qobject/qdict.h"
#include "qapi/qapi-commands-pci.h"
#include "qemu/cutils.h"
@@ -83,15 +83,25 @@ static void hmp_info_pci_device(Monitor *mon, const PciDeviceInfo *dev)
monitor_printf(mon, " BAR%" PRId64 ": ", region->value->bar);
if (!strcmp(region->value->type, "io")) {
- monitor_printf(mon, "I/O at 0x%04" PRIx64
- " [0x%04" PRIx64 "].\n",
- addr, addr + size - 1);
+ if (addr != PCI_BAR_UNMAPPED) {
+ monitor_printf(mon, "I/O at 0x%04" PRIx64
+ " [0x%04" PRIx64 "]\n",
+ addr, addr + size - 1);
+ } else {
+ monitor_printf(mon, "I/O (not mapped)\n");
+ }
} else {
- monitor_printf(mon, "%d bit%s memory at 0x%08" PRIx64
- " [0x%08" PRIx64 "].\n",
- region->value->mem_type_64 ? 64 : 32,
- region->value->prefetch ? " prefetchable" : "",
- addr, addr + size - 1);
+ if (addr != PCI_BAR_UNMAPPED) {
+ monitor_printf(mon, "%d bit%s memory at 0x%08" PRIx64
+ " [0x%08" PRIx64 "]\n",
+ region->value->mem_type_64 ? 64 : 32,
+ region->value->prefetch ? " prefetchable" : "",
+ addr, addr + size - 1);
+ } else {
+ monitor_printf(mon, "%d bit%s memory (not mapped)\n",
+ region->value->mem_type_64 ? 64 : 32,
+ region->value->prefetch ? " prefetchable" : "");
+ }
}
}
diff --git a/hw/pci/pci-stub.c b/hw/pci/pci-stub.c
index f050868..3397d0c 100644
--- a/hw/pci/pci-stub.c
+++ b/hw/pci/pci-stub.c
@@ -46,14 +46,12 @@ void hmp_pcie_aer_inject_error(Monitor *mon, const QDict *qdict)
/* kvm-all wants this */
MSIMessage pci_get_msi_message(PCIDevice *dev, int vector)
{
- g_assert(false);
- return (MSIMessage){};
+ g_assert_not_reached();
}
uint16_t pci_requester_id(PCIDevice *dev)
{
- g_assert(false);
- return 0;
+ g_assert_not_reached();
}
/* Required by ahci.c */
diff --git a/hw/pci/pci.c b/hw/pci/pci.c
index fab86d0..c70b5ce 100644
--- a/hw/pci/pci.c
+++ b/hw/pci/pci.c
@@ -32,12 +32,13 @@
#include "hw/pci/pci_host.h"
#include "hw/qdev-properties.h"
#include "hw/qdev-properties-system.h"
+#include "migration/cpr.h"
#include "migration/qemu-file-types.h"
#include "migration/vmstate.h"
#include "net/net.h"
-#include "sysemu/numa.h"
-#include "sysemu/runstate.h"
-#include "sysemu/sysemu.h"
+#include "system/numa.h"
+#include "system/runstate.h"
+#include "system/system.h"
#include "hw/loader.h"
#include "qemu/error-report.h"
#include "qemu/range.h"
@@ -46,6 +47,7 @@
#include "hw/pci/msix.h"
#include "hw/hotplug.h"
#include "hw/boards.h"
+#include "hw/nvram/fw_cfg.h"
#include "qapi/error.h"
#include "qemu/cutils.h"
#include "pci-internal.h"
@@ -53,13 +55,6 @@
#include "hw/xen/xen.h"
#include "hw/i386/kvm/xen_evtchn.h"
-//#define DEBUG_PCI
-#ifdef DEBUG_PCI
-# define PCI_DPRINTF(format, ...) printf(format, ## __VA_ARGS__)
-#else
-# define PCI_DPRINTF(format, ...) do { } while (0)
-#endif
-
bool pci_available = true;
static char *pcibus_get_dev_path(DeviceState *dev);
@@ -67,11 +62,24 @@ static char *pcibus_get_fw_dev_path(DeviceState *dev);
static void pcibus_reset_hold(Object *obj, ResetType type);
static bool pcie_has_upstream_port(PCIDevice *dev);
-static Property pci_props[] = {
+static void prop_pci_busnr_get(Object *obj, Visitor *v, const char *name,
+ void *opaque, Error **errp)
+{
+ uint8_t busnr = pci_dev_bus_num(PCI_DEVICE(obj));
+
+ visit_type_uint8(v, name, &busnr, errp);
+}
+
+static const PropertyInfo prop_pci_busnr = {
+ .type = "busnr",
+ .get = prop_pci_busnr_get,
+};
+
+static const Property pci_props[] = {
DEFINE_PROP_PCI_DEVFN("addr", PCIDevice, devfn, -1),
DEFINE_PROP_STRING("romfile", PCIDevice, romfile),
DEFINE_PROP_UINT32("romsize", PCIDevice, romsize, UINT32_MAX),
- DEFINE_PROP_UINT32("rombar", PCIDevice, rom_bar, 1),
+ DEFINE_PROP_INT32("rombar", PCIDevice, rom_bar, -1),
DEFINE_PROP_BIT("multifunction", PCIDevice, cap_present,
QEMU_PCI_CAP_MULTIFUNCTION_BITNR, false),
DEFINE_PROP_BIT("x-pcie-lnksta-dllla", PCIDevice, cap_present,
@@ -85,7 +93,12 @@ static Property pci_props[] = {
QEMU_PCIE_ERR_UNC_MASK_BITNR, true),
DEFINE_PROP_BIT("x-pcie-ari-nextfn-1", PCIDevice, cap_present,
QEMU_PCIE_ARI_NEXTFN_1_BITNR, false),
- DEFINE_PROP_END_OF_LIST()
+ DEFINE_PROP_SIZE32("x-max-bounce-buffer-size", PCIDevice,
+ max_bounce_buffer_size, DEFAULT_MAX_BOUNCE_BUFFER_SIZE),
+ DEFINE_PROP_STRING("sriov-pf", PCIDevice, sriov_pf),
+ DEFINE_PROP_BIT("x-pcie-ext-tag", PCIDevice, cap_present,
+ QEMU_PCIE_EXT_TAG_BITNR, true),
+ { .name = "busnr", .info = &prop_pci_busnr },
};
static const VMStateDescription vmstate_pcibus = {
@@ -116,6 +129,12 @@ static GSequence *pci_acpi_index_list(void)
return used_acpi_index_list;
}
+static void pci_set_master(PCIDevice *d, bool enable)
+{
+ memory_region_set_enabled(&d->bus_master_enable_region, enable);
+ d->is_master = enable; /* cache the status */
+}
+
static void pci_init_bus_master(PCIDevice *pci_dev)
{
AddressSpace *dma_as = pci_device_iommu_address_space(pci_dev);
@@ -123,7 +142,7 @@ static void pci_init_bus_master(PCIDevice *pci_dev)
memory_region_init_alias(&pci_dev->bus_master_enable_region,
OBJECT(pci_dev), "bus master",
dma_as->root, 0, memory_region_size(dma_as->root));
- memory_region_set_enabled(&pci_dev->bus_master_enable_region, false);
+ pci_set_master(pci_dev, false);
memory_region_add_subregion(&pci_dev->bus_master_container_region, 0,
&pci_dev->bus_master_enable_region);
}
@@ -198,11 +217,57 @@ static uint16_t pcibus_numa_node(PCIBus *bus)
return NUMA_NODE_UNASSIGNED;
}
-static void pci_bus_class_init(ObjectClass *klass, void *data)
+bool pci_bus_add_fw_cfg_extra_pci_roots(FWCfgState *fw_cfg,
+ PCIBus *bus,
+ Error **errp)
+{
+ Object *obj;
+
+ if (!bus) {
+ return true;
+ }
+ obj = OBJECT(bus);
+
+ return fw_cfg_add_file_from_generator(fw_cfg, obj->parent,
+ object_get_canonical_path_component(obj),
+ "etc/extra-pci-roots", errp);
+}
+
+static GByteArray *pci_bus_fw_cfg_gen_data(Object *obj, Error **errp)
+{
+ PCIBus *bus = PCI_BUS(obj);
+ GByteArray *byte_array;
+ uint64_t extra_hosts = 0;
+
+ if (!bus) {
+ return NULL;
+ }
+
+ QLIST_FOREACH(bus, &bus->child, sibling) {
+ /* look for expander root buses */
+ if (pci_bus_is_root(bus)) {
+ extra_hosts++;
+ }
+ }
+
+ if (!extra_hosts) {
+ return NULL;
+ }
+ extra_hosts = cpu_to_le64(extra_hosts);
+
+ byte_array = g_byte_array_new();
+ g_byte_array_append(byte_array,
+ (const void *)&extra_hosts, sizeof(extra_hosts));
+
+ return byte_array;
+}
+
+static void pci_bus_class_init(ObjectClass *klass, const void *data)
{
BusClass *k = BUS_CLASS(klass);
PCIBusClass *pbc = PCI_BUS_CLASS(klass);
ResettableClass *rc = RESETTABLE_CLASS(klass);
+ FWCfgDataGeneratorClass *fwgc = FW_CFG_DATA_GENERATOR_CLASS(klass);
k->print_dev = pcibus_dev_print;
k->get_dev_path = pcibus_get_dev_path;
@@ -214,6 +279,8 @@ static void pci_bus_class_init(ObjectClass *klass, void *data)
pbc->bus_num = pcibus_num;
pbc->numa_node = pcibus_numa_node;
+
+ fwgc->get_data = pci_bus_fw_cfg_gen_data;
}
static const TypeInfo pci_bus_info = {
@@ -222,6 +289,10 @@ static const TypeInfo pci_bus_info = {
.instance_size = sizeof(PCIBus),
.class_size = sizeof(PCIBusClass),
.class_init = pci_bus_class_init,
+ .interfaces = (const InterfaceInfo[]) {
+ { TYPE_FW_CFG_DATA_GENERATOR_INTERFACE },
+ { }
+ }
};
static const TypeInfo cxl_interface_info = {
@@ -239,7 +310,7 @@ static const TypeInfo conventional_pci_interface_info = {
.parent = TYPE_INTERFACE,
};
-static void pcie_bus_class_init(ObjectClass *klass, void *data)
+static void pcie_bus_class_init(ObjectClass *klass, const void *data)
{
BusClass *k = BUS_CLASS(klass);
@@ -365,6 +436,84 @@ static void pci_msi_trigger(PCIDevice *dev, MSIMessage msg)
attrs, NULL);
}
+/*
+ * Register and track a PM capability. If wmask is also enabled for the power
+ * state field of the pmcsr register, guest writes may change the device PM
+ * state. BAR access is only enabled while the device is in the D0 state.
+ * Return the capability offset or negative error code.
+ */
+int pci_pm_init(PCIDevice *d, uint8_t offset, Error **errp)
+{
+ int cap = pci_add_capability(d, PCI_CAP_ID_PM, offset, PCI_PM_SIZEOF, errp);
+
+ if (cap < 0) {
+ return cap;
+ }
+
+ d->pm_cap = cap;
+ d->cap_present |= QEMU_PCI_CAP_PM;
+
+ return cap;
+}
+
+static uint8_t pci_pm_state(PCIDevice *d)
+{
+ uint16_t pmcsr;
+
+ if (!(d->cap_present & QEMU_PCI_CAP_PM)) {
+ return 0;
+ }
+
+ pmcsr = pci_get_word(d->config + d->pm_cap + PCI_PM_CTRL);
+
+ return pmcsr & PCI_PM_CTRL_STATE_MASK;
+}
+
+/*
+ * Update the PM capability state based on the new value stored in config
+ * space respective to the old, pre-write state provided. If the new value
+ * is rejected (unsupported or invalid transition) restore the old value.
+ * Return the resulting PM state.
+ */
+static uint8_t pci_pm_update(PCIDevice *d, uint32_t addr, int l, uint8_t old)
+{
+ uint16_t pmc;
+ uint8_t new;
+
+ if (!(d->cap_present & QEMU_PCI_CAP_PM) ||
+ !range_covers_byte(addr, l, d->pm_cap + PCI_PM_CTRL)) {
+ return old;
+ }
+
+ new = pci_pm_state(d);
+ if (new == old) {
+ return old;
+ }
+
+ pmc = pci_get_word(d->config + d->pm_cap + PCI_PM_PMC);
+
+ /*
+ * Transitions to D1 & D2 are only allowed if supported. Devices may
+ * only transition to higher D-states or to D0.
+ */
+ if ((!(pmc & PCI_PM_CAP_D1) && new == 1) ||
+ (!(pmc & PCI_PM_CAP_D2) && new == 2) ||
+ (old && new && new < old)) {
+ pci_word_test_and_clear_mask(d->config + d->pm_cap + PCI_PM_CTRL,
+ PCI_PM_CTRL_STATE_MASK);
+ pci_word_test_and_set_mask(d->config + d->pm_cap + PCI_PM_CTRL,
+ old);
+ trace_pci_pm_bad_transition(d->name, pci_dev_bus_num(d),
+ PCI_SLOT(d->devfn), PCI_FUNC(d->devfn),
+ old, new);
+ return old;
+ }
+
+ trace_pci_pm_transition(d->name, pci_dev_bus_num(d), PCI_SLOT(d->devfn),
+ PCI_FUNC(d->devfn), old, new);
+ return new;
+}
+
static void pci_reset_regions(PCIDevice *dev)
{
int r;
@@ -389,6 +538,10 @@ static void pci_reset_regions(PCIDevice *dev)
static void pci_do_device_reset(PCIDevice *dev)
{
+ if ((dev->cap_present & QEMU_PCI_SKIP_RESET_ON_CPR) && cpr_is_incoming()) {
+ return;
+ }
+
pci_device_deassert_intx(dev);
assert(dev->irq_state == 0);
@@ -404,6 +557,11 @@ static void pci_do_device_reset(PCIDevice *dev)
pci_get_word(dev->wmask + PCI_INTERRUPT_LINE) |
pci_get_word(dev->w1cmask + PCI_INTERRUPT_LINE));
dev->config[PCI_CACHE_LINE_SIZE] = 0x0;
+ /* Default PM state is D0 */
+ if (dev->cap_present & QEMU_PCI_CAP_PM) {
+ pci_word_test_and_clear_mask(dev->config + dev->pm_cap + PCI_PM_CTRL,
+ PCI_PM_CTRL_STATE_MASK);
+ }
pci_reset_regions(dev);
pci_update_mappings(dev);
@@ -657,9 +815,8 @@ static int get_pci_config_device(QEMUFile *f, void *pv, size_t size,
pci_bridge_update_mappings(PCI_BRIDGE(s));
}
- memory_region_set_enabled(&s->bus_master_enable_region,
- pci_get_word(s->config + PCI_COMMAND)
- & PCI_COMMAND_MASTER);
+ pci_set_master(s, pci_get_word(s->config + PCI_COMMAND)
+ & PCI_COMMAND_MASTER);
g_free(config);
return 0;
@@ -733,10 +890,17 @@ static bool migrate_is_not_pcie(void *opaque, int version_id)
return !pci_is_express((PCIDevice *)opaque);
}
+static int pci_post_load(void *opaque, int version_id)
+{
+ pcie_sriov_pf_post_load(opaque);
+ return 0;
+}
+
const VMStateDescription vmstate_pci_device = {
.name = "PCIDevice",
.version_id = 2,
.minimum_version_id = 1,
+ .post_load = pci_post_load,
.fields = (const VMStateField[]) {
VMSTATE_INT32_POSITIVE_LE(version_id, PCIDevice),
VMSTATE_BUFFER_UNSAFE_INFO_TEST(config, PCIDevice,
@@ -952,13 +1116,8 @@ static void pci_init_multifunction(PCIBus *bus, PCIDevice *dev, Error **errp)
dev->config[PCI_HEADER_TYPE] |= PCI_HEADER_TYPE_MULTI_FUNCTION;
}
- /*
- * With SR/IOV and ARI, a device at function 0 need not be a multifunction
- * device, as it may just be a VF that ended up with function 0 in
- * the legacy PCI interpretation. Avoid failing in such cases:
- */
- if (pci_is_vf(dev) &&
- dev->exp.sriov_vf.pf->cap_present & QEMU_PCI_CAP_MULTIFUNCTION) {
+ /* SR/IOV is not handled here. */
+ if (pci_is_vf(dev)) {
return;
}
@@ -991,7 +1150,8 @@ static void pci_init_multifunction(PCIBus *bus, PCIDevice *dev, Error **errp)
}
/* function 0 indicates single function, so function > 0 must be NULL */
for (func = 1; func < PCI_FUNC_MAX; ++func) {
- if (bus->devices[PCI_DEVFN(slot, func)]) {
+ PCIDevice *device = bus->devices[PCI_DEVFN(slot, func)];
+ if (device && !pci_is_vf(device)) {
error_setg(errp, "PCI: %x.0 indicates single function, "
"but %x.%x is already populated.",
slot, slot, func);
@@ -1179,14 +1339,15 @@ static PCIDevice *do_pci_register_device(PCIDevice *pci_dev,
PCI_SLOT(devfn), PCI_FUNC(devfn), name,
bus->devices[devfn]->name, bus->devices[devfn]->qdev.id);
return NULL;
- } /*
- * Populating function 0 triggers a scan from the guest that
- * exposes other non-zero functions. Hence we need to ensure that
- * function 0 wasn't added yet.
- */
- else if (dev->hotplugged &&
- !pci_is_vf(pci_dev) &&
- pci_get_function_0(pci_dev)) {
+ }
+
+ /*
+ * Populating function 0 triggers a scan from the guest that
+ * exposes other non-zero functions. Hence we need to ensure that
+ * function 0 wasn't added yet.
+ */
+ if (dev->hotplugged && !pci_is_vf(pci_dev) &&
+ pci_get_function_0(pci_dev)) {
error_setg(errp, "PCI: slot %d function 0 already occupied by %s,"
" new func %s cannot be exposed to guest.",
PCI_SLOT(pci_get_function_0(pci_dev)->devfn),
@@ -1204,6 +1365,8 @@ static PCIDevice *do_pci_register_device(PCIDevice *pci_dev,
"bus master container", UINT64_MAX);
address_space_init(&pci_dev->bus_master_as,
&pci_dev->bus_master_container_region, pci_dev->name);
+ pci_dev->bus_master_as.max_bounce_buffer_size =
+ pci_dev->max_bounce_buffer_size;
if (phase_check(PHASE_MACHINE_READY)) {
pci_init_bus_master(pci_dev);
@@ -1276,6 +1439,7 @@ static void pci_qdev_unrealize(DeviceState *dev)
pci_unregister_io_regions(pci_dev);
pci_del_option_rom(pci_dev);
+ pcie_sriov_unregister_device(pci_dev);
if (pc->exit) {
pc->exit(pci_dev);
@@ -1307,7 +1471,6 @@ void pci_register_bar(PCIDevice *pci_dev, int region_num,
pcibus_t size = memory_region_size(memory);
uint8_t hdr_type;
- assert(!pci_is_vf(pci_dev)); /* VFs must use pcie_sriov_vf_register_bar */
assert(region_num >= 0);
assert(region_num < PCI_NUM_REGIONS);
assert(is_power_of_2(size));
@@ -1318,7 +1481,7 @@ void pci_register_bar(PCIDevice *pci_dev, int region_num,
assert(hdr_type != PCI_HEADER_TYPE_BRIDGE || region_num < 2);
r = &pci_dev->io_regions[region_num];
- r->addr = PCI_BAR_UNMAPPED;
+ assert(!r->size);
r->size = size;
r->type = type;
r->memory = memory;
@@ -1326,22 +1489,35 @@ void pci_register_bar(PCIDevice *pci_dev, int region_num,
? pci_get_bus(pci_dev)->address_space_io
: pci_get_bus(pci_dev)->address_space_mem;
- wmask = ~(size - 1);
- if (region_num == PCI_ROM_SLOT) {
- /* ROM enable bit is writable */
- wmask |= PCI_ROM_ADDRESS_ENABLE;
- }
+ if (pci_is_vf(pci_dev)) {
+ PCIDevice *pf = pci_dev->exp.sriov_vf.pf;
+ assert(!pf || type == pf->exp.sriov_pf.vf_bar_type[region_num]);
- addr = pci_bar(pci_dev, region_num);
- pci_set_long(pci_dev->config + addr, type);
-
- if (!(r->type & PCI_BASE_ADDRESS_SPACE_IO) &&
- r->type & PCI_BASE_ADDRESS_MEM_TYPE_64) {
- pci_set_quad(pci_dev->wmask + addr, wmask);
- pci_set_quad(pci_dev->cmask + addr, ~0ULL);
+ r->addr = pci_bar_address(pci_dev, region_num, r->type, r->size);
+ if (r->addr != PCI_BAR_UNMAPPED) {
+ memory_region_add_subregion_overlap(r->address_space,
+ r->addr, r->memory, 1);
+ }
} else {
- pci_set_long(pci_dev->wmask + addr, wmask & 0xffffffff);
- pci_set_long(pci_dev->cmask + addr, 0xffffffff);
+ r->addr = PCI_BAR_UNMAPPED;
+
+ wmask = ~(size - 1);
+ if (region_num == PCI_ROM_SLOT) {
+ /* ROM enable bit is writable */
+ wmask |= PCI_ROM_ADDRESS_ENABLE;
+ }
+
+ addr = pci_bar(pci_dev, region_num);
+ pci_set_long(pci_dev->config + addr, type);
+
+ if (!(r->type & PCI_BASE_ADDRESS_SPACE_IO) &&
+ r->type & PCI_BASE_ADDRESS_MEM_TYPE_64) {
+ pci_set_quad(pci_dev->wmask + addr, wmask);
+ pci_set_quad(pci_dev->cmask + addr, ~0ULL);
+ } else {
+ pci_set_long(pci_dev->wmask + addr, wmask & 0xffffffff);
+ pci_set_long(pci_dev->cmask + addr, 0xffffffff);
+ }
}
}
@@ -1430,7 +1606,11 @@ static pcibus_t pci_config_get_bar_addr(PCIDevice *d, int reg,
pci_get_word(pf->config + sriov_cap + PCI_SRIOV_VF_OFFSET);
uint16_t vf_stride =
pci_get_word(pf->config + sriov_cap + PCI_SRIOV_VF_STRIDE);
- uint32_t vf_num = (d->devfn - (pf->devfn + vf_offset)) / vf_stride;
+ uint32_t vf_num = d->devfn - (pf->devfn + vf_offset);
+
+ if (vf_num) {
+ vf_num /= vf_stride;
+ }
if (type & PCI_BASE_ADDRESS_MEM_TYPE_64) {
new_addr = pci_get_quad(pf->config + bar);
@@ -1525,7 +1705,7 @@ static void pci_update_mappings(PCIDevice *d)
continue;
new_addr = pci_bar_address(d, i, r->type, r->size);
- if (!d->has_power) {
+ if (!d->enabled || pci_pm_state(d)) {
new_addr = PCI_BAR_UNMAPPED;
}
@@ -1555,7 +1735,7 @@ static void pci_update_mappings(PCIDevice *d)
pci_update_vga(d);
}
-static inline int pci_irq_disabled(PCIDevice *d)
+int pci_irq_disabled(PCIDevice *d)
{
return pci_get_word(d->config + PCI_COMMAND) & PCI_COMMAND_INTX_DISABLE;
}
@@ -1591,6 +1771,7 @@ uint32_t pci_default_read_config(PCIDevice *d,
void pci_default_write_config(PCIDevice *d, uint32_t addr, uint32_t val_in, int l)
{
+ uint8_t new_pm_state, old_pm_state = pci_pm_state(d);
int i, was_irq_disabled = pci_irq_disabled(d);
uint32_t val = val_in;
@@ -1603,17 +1784,21 @@ void pci_default_write_config(PCIDevice *d, uint32_t addr, uint32_t val_in, int
d->config[addr + i] = (d->config[addr + i] & ~wmask) | (val & wmask);
d->config[addr + i] &= ~(val & w1cmask); /* W1C: Write 1 to Clear */
}
+
+ new_pm_state = pci_pm_update(d, addr, l, old_pm_state);
+
if (ranges_overlap(addr, l, PCI_BASE_ADDRESS_0, 24) ||
ranges_overlap(addr, l, PCI_ROM_ADDRESS, 4) ||
ranges_overlap(addr, l, PCI_ROM_ADDRESS1, 4) ||
- range_covers_byte(addr, l, PCI_COMMAND))
+ range_covers_byte(addr, l, PCI_COMMAND) ||
+ !!new_pm_state != !!old_pm_state) {
pci_update_mappings(d);
+ }
if (ranges_overlap(addr, l, PCI_COMMAND, 2)) {
pci_update_irq_disabled(d, was_irq_disabled);
- memory_region_set_enabled(&d->bus_master_enable_region,
- (pci_get_word(d->config + PCI_COMMAND)
- & PCI_COMMAND_MASTER) && d->has_power);
+ pci_set_master(d, (pci_get_word(d->config + PCI_COMMAND) &
+ PCI_COMMAND_MASTER) && d->enabled);
}
msi_write_config(d, addr, val_in, l);
@@ -2098,6 +2283,11 @@ static void pci_qdev_realize(DeviceState *qdev, Error **errp)
}
}
+ if (!pcie_sriov_register_device(pci_dev, errp)) {
+ pci_qdev_unrealize(DEVICE(pci_dev));
+ return;
+ }
+
/*
* A PCIe Downstream Port that do not have ARI Forwarding enabled must
* associate only Device 0 with the device attached to the bus
@@ -2269,12 +2459,12 @@ static void pci_patch_ids(PCIDevice *pdev, uint8_t *ptr, uint32_t size)
/* Only a valid rom will be patched. */
rom_magic = pci_get_word(ptr);
if (rom_magic != 0xaa55) {
- PCI_DPRINTF("Bad ROM magic %04x\n", rom_magic);
+ trace_pci_bad_rom_magic(rom_magic, 0xaa55);
return;
}
pcir_offset = pci_get_word(ptr + 0x18);
if (pcir_offset + 8 >= size || memcmp(ptr + pcir_offset, "PCIR", 4)) {
- PCI_DPRINTF("Bad PCIR offset 0x%x or signature\n", pcir_offset);
+ trace_pci_bad_pcir_offset(pcir_offset);
return;
}
@@ -2283,8 +2473,8 @@ static void pci_patch_ids(PCIDevice *pdev, uint8_t *ptr, uint32_t size)
rom_vendor_id = pci_get_word(ptr + pcir_offset + 4);
rom_device_id = pci_get_word(ptr + pcir_offset + 6);
- PCI_DPRINTF("%s: ROM id %04x%04x / PCI id %04x%04x\n", pdev->romfile,
- vendor_id, device_id, rom_vendor_id, rom_device_id);
+ trace_pci_rom_and_pci_ids(pdev->romfile, vendor_id, device_id,
+ rom_vendor_id, rom_device_id);
checksum = ptr[6];
@@ -2292,7 +2482,7 @@ static void pci_patch_ids(PCIDevice *pdev, uint8_t *ptr, uint32_t size)
/* Patch vendor id and checksum (at offset 6 for etherboot roms). */
checksum += (uint8_t)rom_vendor_id + (uint8_t)(rom_vendor_id >> 8);
checksum -= (uint8_t)vendor_id + (uint8_t)(vendor_id >> 8);
- PCI_DPRINTF("ROM checksum %02x / %02x\n", ptr[6], checksum);
+ trace_pci_rom_checksum_change(ptr[6], checksum);
ptr[6] = checksum;
pci_set_word(ptr + pcir_offset + 4, vendor_id);
}
@@ -2301,7 +2491,7 @@ static void pci_patch_ids(PCIDevice *pdev, uint8_t *ptr, uint32_t size)
/* Patch device id and checksum (at offset 6 for etherboot roms). */
checksum += (uint8_t)rom_device_id + (uint8_t)(rom_device_id >> 8);
checksum -= (uint8_t)device_id + (uint8_t)(device_id >> 8);
- PCI_DPRINTF("ROM checksum %02x / %02x\n", ptr[6], checksum);
+ trace_pci_rom_checksum_change(ptr[6], checksum);
ptr[6] = checksum;
pci_set_word(ptr + pcir_offset + 6, device_id);
}
@@ -2352,6 +2542,14 @@ static void pci_add_option_rom(PCIDevice *pdev, bool is_default_rom,
return;
}
+ if (pci_is_vf(pdev)) {
+ if (pdev->rom_bar > 0) {
+ error_setg(errp, "ROM BAR cannot be enabled for SR-IOV VF");
+ }
+
+ return;
+ }
+
if (load_file || pdev->romsize == UINT32_MAX) {
path = qemu_find_file(QEMU_FILE_TYPE_BIOS, pdev->romfile);
if (path == NULL) {
@@ -2625,7 +2823,7 @@ MemoryRegion *pci_address_space_io(PCIDevice *dev)
return pci_get_bus(dev)->address_space_io;
}
-static void pci_device_class_init(ObjectClass *klass, void *data)
+static void pci_device_class_init(ObjectClass *klass, const void *data)
{
DeviceClass *k = DEVICE_CLASS(klass);
@@ -2633,9 +2831,13 @@ static void pci_device_class_init(ObjectClass *klass, void *data)
k->unrealize = pci_qdev_unrealize;
k->bus_type = TYPE_PCI_BUS;
device_class_set_props(k, pci_props);
+ object_class_property_set_description(
+ klass, "x-max-bounce-buffer-size",
+ "Maximum buffer size allocated for bounce buffers used for mapped "
+ "access to indirect DMA memory");
}
-static void pci_device_class_base_init(ObjectClass *klass, void *data)
+static void pci_device_class_base_init(ObjectClass *klass, const void *data)
{
if (!object_class_is_abstract(klass)) {
ObjectClass *conventional =
@@ -2742,6 +2944,23 @@ AddressSpace *pci_device_iommu_address_space(PCIDevice *dev)
return &address_space_memory;
}
+int pci_iommu_init_iotlb_notifier(PCIDevice *dev, IOMMUNotifier *n,
+ IOMMUNotify fn, void *opaque)
+{
+ PCIBus *bus;
+ PCIBus *iommu_bus;
+ int devfn;
+
+ pci_device_get_iommu_bus_devfn(dev, &bus, &iommu_bus, &devfn);
+ if (iommu_bus && iommu_bus->iommu_ops->init_iotlb_notifier) {
+ iommu_bus->iommu_ops->init_iotlb_notifier(bus, iommu_bus->iommu_opaque,
+ devfn, n, fn, opaque);
+ return 0;
+ }
+
+ return -ENODEV;
+}
+
bool pci_device_set_iommu_device(PCIDevice *dev, HostIOMMUDevice *hiod,
Error **errp)
{
@@ -2773,6 +2992,170 @@ void pci_device_unset_iommu_device(PCIDevice *dev)
}
}
+int pci_pri_request_page(PCIDevice *dev, uint32_t pasid, bool priv_req,
+ bool exec_req, hwaddr addr, bool lpig,
+ uint16_t prgi, bool is_read, bool is_write)
+{
+ PCIBus *bus;
+ PCIBus *iommu_bus;
+ int devfn;
+
+ if (!dev->is_master ||
+ ((pasid != PCI_NO_PASID) && !pcie_pasid_enabled(dev))) {
+ return -EPERM;
+ }
+
+ if (!pcie_pri_enabled(dev)) {
+ return -EPERM;
+ }
+
+ pci_device_get_iommu_bus_devfn(dev, &bus, &iommu_bus, &devfn);
+ if (iommu_bus && iommu_bus->iommu_ops->pri_request_page) {
+ return iommu_bus->iommu_ops->pri_request_page(bus,
+ iommu_bus->iommu_opaque,
+ devfn, pasid, priv_req,
+ exec_req, addr, lpig, prgi,
+ is_read, is_write);
+ }
+
+ return -ENODEV;
+}
+
+int pci_pri_register_notifier(PCIDevice *dev, uint32_t pasid,
+ IOMMUPRINotifier *notifier)
+{
+ PCIBus *bus;
+ PCIBus *iommu_bus;
+ int devfn;
+
+ if (!dev->is_master ||
+ ((pasid != PCI_NO_PASID) && !pcie_pasid_enabled(dev))) {
+ return -EPERM;
+ }
+
+ pci_device_get_iommu_bus_devfn(dev, &bus, &iommu_bus, &devfn);
+ if (iommu_bus && iommu_bus->iommu_ops->pri_register_notifier) {
+ iommu_bus->iommu_ops->pri_register_notifier(bus,
+ iommu_bus->iommu_opaque,
+ devfn, pasid, notifier);
+ return 0;
+ }
+
+ return -ENODEV;
+}
+
+void pci_pri_unregister_notifier(PCIDevice *dev, uint32_t pasid)
+{
+ PCIBus *bus;
+ PCIBus *iommu_bus;
+ int devfn;
+
+ pci_device_get_iommu_bus_devfn(dev, &bus, &iommu_bus, &devfn);
+ if (iommu_bus && iommu_bus->iommu_ops->pri_unregister_notifier) {
+ iommu_bus->iommu_ops->pri_unregister_notifier(bus,
+ iommu_bus->iommu_opaque,
+ devfn, pasid);
+ }
+}
+
+ssize_t pci_ats_request_translation(PCIDevice *dev, uint32_t pasid,
+ bool priv_req, bool exec_req,
+ hwaddr addr, size_t length,
+ bool no_write, IOMMUTLBEntry *result,
+ size_t result_length,
+ uint32_t *err_count)
+{
+ PCIBus *bus;
+ PCIBus *iommu_bus;
+ int devfn;
+
+ if (!dev->is_master ||
+ ((pasid != PCI_NO_PASID) && !pcie_pasid_enabled(dev))) {
+ return -EPERM;
+ }
+
+ if (result_length == 0) {
+ return -ENOSPC;
+ }
+
+ if (!pcie_ats_enabled(dev)) {
+ return -EPERM;
+ }
+
+ pci_device_get_iommu_bus_devfn(dev, &bus, &iommu_bus, &devfn);
+ if (iommu_bus && iommu_bus->iommu_ops->ats_request_translation) {
+ return iommu_bus->iommu_ops->ats_request_translation(bus,
+ iommu_bus->iommu_opaque,
+ devfn, pasid, priv_req,
+ exec_req, addr, length,
+ no_write, result,
+ result_length, err_count);
+ }
+
+ return -ENODEV;
+}
+
+int pci_iommu_register_iotlb_notifier(PCIDevice *dev, uint32_t pasid,
+ IOMMUNotifier *n)
+{
+ PCIBus *bus;
+ PCIBus *iommu_bus;
+ int devfn;
+
+ if ((pasid != PCI_NO_PASID) && !pcie_pasid_enabled(dev)) {
+ return -EPERM;
+ }
+
+ pci_device_get_iommu_bus_devfn(dev, &bus, &iommu_bus, &devfn);
+ if (iommu_bus && iommu_bus->iommu_ops->register_iotlb_notifier) {
+ iommu_bus->iommu_ops->register_iotlb_notifier(bus,
+ iommu_bus->iommu_opaque, devfn,
+ pasid, n);
+ return 0;
+ }
+
+ return -ENODEV;
+}
+
+int pci_iommu_unregister_iotlb_notifier(PCIDevice *dev, uint32_t pasid,
+ IOMMUNotifier *n)
+{
+ PCIBus *bus;
+ PCIBus *iommu_bus;
+ int devfn;
+
+ if ((pasid != PCI_NO_PASID) && !pcie_pasid_enabled(dev)) {
+ return -EPERM;
+ }
+
+ pci_device_get_iommu_bus_devfn(dev, &bus, &iommu_bus, &devfn);
+ if (iommu_bus && iommu_bus->iommu_ops->unregister_iotlb_notifier) {
+ iommu_bus->iommu_ops->unregister_iotlb_notifier(bus,
+ iommu_bus->iommu_opaque,
+ devfn, pasid, n);
+ return 0;
+ }
+
+ return -ENODEV;
+}
+
+int pci_iommu_get_iotlb_info(PCIDevice *dev, uint8_t *addr_width,
+ uint32_t *min_page_size)
+{
+ PCIBus *bus;
+ PCIBus *iommu_bus;
+ int devfn;
+
+ pci_device_get_iommu_bus_devfn(dev, &bus, &iommu_bus, &devfn);
+ if (iommu_bus && iommu_bus->iommu_ops->get_iotlb_info) {
+ iommu_bus->iommu_ops->get_iotlb_info(iommu_bus->iommu_opaque,
+ addr_width, min_page_size);
+ return 0;
+ }
+
+ return -ENODEV;
+}
+
void pci_setup_iommu(PCIBus *bus, const PCIIOMMUOps *ops, void *opaque)
{
/*
@@ -2886,16 +3269,30 @@ MSIMessage pci_get_msi_message(PCIDevice *dev, int vector)
void pci_set_power(PCIDevice *d, bool state)
{
- if (d->has_power == state) {
+ /*
+ * Don't change the enabled state of VFs when powering on/off the device.
+ *
+ * When powering on, VFs must not be enabled immediately but they must
+ * wait until the guest configures SR-IOV.
+ * When powering off, their corresponding PFs will be reset and disable
+ * VFs.
+ */
+ if (!pci_is_vf(d)) {
+ pci_set_enabled(d, state);
+ }
+}
+
+void pci_set_enabled(PCIDevice *d, bool state)
+{
+ if (d->enabled == state) {
return;
}
- d->has_power = state;
+ d->enabled = state;
pci_update_mappings(d);
- memory_region_set_enabled(&d->bus_master_enable_region,
- (pci_get_word(d->config + PCI_COMMAND)
- & PCI_COMMAND_MASTER) && d->has_power);
- if (!d->has_power) {
+ pci_set_master(d, (pci_get_word(d->config + PCI_COMMAND)
+ & PCI_COMMAND_MASTER) && d->enabled);
+ if (qdev_is_realized(&d->qdev)) {
pci_device_reset(d);
}
}
diff --git a/hw/pci/pci_bridge.c b/hw/pci/pci_bridge.c
index 6a4e388..76255c4 100644
--- a/hw/pci/pci_bridge.c
+++ b/hw/pci/pci_bridge.c
@@ -380,9 +380,12 @@ void pci_bridge_initfn(PCIDevice *dev, const char *typename)
sec_bus->map_irq = br->map_irq ? br->map_irq : pci_swizzle_map_irq_fn;
sec_bus->address_space_mem = &br->address_space_mem;
memory_region_init(&br->address_space_mem, OBJECT(br), "pci_bridge_pci", UINT64_MAX);
+ address_space_init(&br->as_mem, &br->address_space_mem,
+ "pci_bridge_pci_mem");
sec_bus->address_space_io = &br->address_space_io;
memory_region_init(&br->address_space_io, OBJECT(br), "pci_bridge_io",
4 * GiB);
+ address_space_init(&br->as_io, &br->address_space_io, "pci_bridge_pci_io");
pci_bridge_region_init(br);
QLIST_INIT(&sec_bus->child);
QLIST_INSERT_HEAD(&parent->child, sec_bus, sibling);
@@ -399,6 +402,8 @@ void pci_bridge_exitfn(PCIDevice *pci_dev)
PCIBridge *s = PCI_BRIDGE(pci_dev);
assert(QLIST_EMPTY(&s->sec_bus.child));
QLIST_REMOVE(&s->sec_bus, sibling);
+ address_space_destroy(&s->as_mem);
+ address_space_destroy(&s->as_io);
pci_bridge_region_del(s, &s->windows);
pci_bridge_region_cleanup(s, &s->windows);
/* object_unparent() is called automatically during device deletion */
@@ -472,13 +477,12 @@ int pci_bridge_qemu_reserve_cap_init(PCIDevice *dev, int cap_offset,
return 0;
}
-static Property pci_bridge_properties[] = {
+static const Property pci_bridge_properties[] = {
DEFINE_PROP_BOOL("x-pci-express-writeable-slt-bug", PCIBridge,
pcie_writeable_slt_bug, false),
- DEFINE_PROP_END_OF_LIST(),
};
-static void pci_bridge_class_init(ObjectClass *klass, void *data)
+static void pci_bridge_class_init(ObjectClass *klass, const void *data)
{
AcpiDevAmlIfClass *adevc = ACPI_DEV_AML_IF_CLASS(klass);
DeviceClass *k = DEVICE_CLASS(klass);
@@ -493,7 +497,7 @@ static const TypeInfo pci_bridge_type_info = {
.instance_size = sizeof(PCIBridge),
.class_init = pci_bridge_class_init,
.abstract = true,
- .interfaces = (InterfaceInfo[]) {
+ .interfaces = (const InterfaceInfo[]) {
{ TYPE_ACPI_DEV_AML_IF },
{ },
},
diff --git a/hw/pci/pci_host.c b/hw/pci/pci_host.c
index dfe6fe6..7179d99 100644
--- a/hw/pci/pci_host.c
+++ b/hw/pci/pci_host.c
@@ -86,7 +86,7 @@ void pci_host_config_write_common(PCIDevice *pci_dev, uint32_t addr,
* allowing direct removal of unexposed functions.
*/
if ((pci_dev->qdev.hotplugged && !pci_get_function_0(pci_dev)) ||
- !pci_dev->has_power || is_pci_dev_ejected(pci_dev)) {
+ !pci_dev->enabled || is_pci_dev_ejected(pci_dev)) {
return;
}
@@ -111,7 +111,7 @@ uint32_t pci_host_config_read_common(PCIDevice *pci_dev, uint32_t addr,
* allowing direct removal of unexposed functions.
*/
if ((pci_dev->qdev.hotplugged && !pci_get_function_0(pci_dev)) ||
- !pci_dev->has_power || is_pci_dev_ejected(pci_dev)) {
+ !pci_dev->enabled || is_pci_dev_ejected(pci_dev)) {
return ~0x0;
}
@@ -217,12 +217,6 @@ const MemoryRegionOps pci_host_data_le_ops = {
.endianness = DEVICE_LITTLE_ENDIAN,
};
-const MemoryRegionOps pci_host_data_be_ops = {
- .read = pci_host_data_read,
- .write = pci_host_data_write,
- .endianness = DEVICE_BIG_ENDIAN,
-};
-
static bool pci_host_needed(void *opaque)
{
PCIHostState *s = opaque;
@@ -240,14 +234,13 @@ const VMStateDescription vmstate_pcihost = {
}
};
-static Property pci_host_properties_common[] = {
+static const Property pci_host_properties_common[] = {
DEFINE_PROP_BOOL("x-config-reg-migration-enabled", PCIHostState,
mig_enabled, true),
DEFINE_PROP_BOOL(PCI_HOST_BYPASS_IOMMU, PCIHostState, bypass_iommu, false),
- DEFINE_PROP_END_OF_LIST(),
};
-static void pci_host_class_init(ObjectClass *klass, void *data)
+static void pci_host_class_init(ObjectClass *klass, const void *data)
{
DeviceClass *dc = DEVICE_CLASS(klass);
device_class_set_props(dc, pci_host_properties_common);
diff --git a/hw/pci/pcie.c b/hw/pci/pcie.c
index 4b2f080..eaeb688 100644
--- a/hw/pci/pcie.c
+++ b/hw/pci/pcie.c
@@ -86,7 +86,13 @@ pcie_cap_v1_fill(PCIDevice *dev, uint8_t port, uint8_t type, uint8_t version)
* Specification, Revision 1.1., or subsequent PCI Express Base
* Specification revisions.
*/
- pci_set_long(exp_cap + PCI_EXP_DEVCAP, PCI_EXP_DEVCAP_RBER);
+ uint32_t devcap = PCI_EXP_DEVCAP_RBER;
+
+ if (dev->cap_present & QEMU_PCIE_EXT_TAG) {
+ devcap = PCI_EXP_DEVCAP_RBER | PCI_EXP_DEVCAP_EXT_TAG;
+ }
+
+ pci_set_long(exp_cap + PCI_EXP_DEVCAP, devcap);
pci_set_long(exp_cap + PCI_EXP_LNKCAP,
(port << PCI_EXP_LNKCAP_PN_SHIFT) |
@@ -105,46 +111,18 @@ pcie_cap_v1_fill(PCIDevice *dev, uint8_t port, uint8_t type, uint8_t version)
pci_set_word(cmask + PCI_EXP_LNKSTA, 0);
}
-static void pcie_cap_fill_slot_lnk(PCIDevice *dev)
+/* Includes setting the target speed default */
+static void pcie_cap_fill_lnk(uint8_t *exp_cap, PCIExpLinkWidth width,
+ PCIExpLinkSpeed speed)
{
- PCIESlot *s = (PCIESlot *)object_dynamic_cast(OBJECT(dev), TYPE_PCIE_SLOT);
- uint8_t *exp_cap = dev->config + dev->exp.exp_cap;
-
- /* Skip anything that isn't a PCIESlot */
- if (!s) {
- return;
- }
-
/* Clear and fill LNKCAP from what was configured above */
pci_long_test_and_clear_mask(exp_cap + PCI_EXP_LNKCAP,
PCI_EXP_LNKCAP_MLW | PCI_EXP_LNKCAP_SLS);
pci_long_test_and_set_mask(exp_cap + PCI_EXP_LNKCAP,
- QEMU_PCI_EXP_LNKCAP_MLW(s->width) |
- QEMU_PCI_EXP_LNKCAP_MLS(s->speed));
-
- /*
- * Link bandwidth notification is required for all root ports and
- * downstream ports supporting links wider than x1 or multiple link
- * speeds.
- */
- if (s->width > QEMU_PCI_EXP_LNK_X1 ||
- s->speed > QEMU_PCI_EXP_LNK_2_5GT) {
- pci_long_test_and_set_mask(exp_cap + PCI_EXP_LNKCAP,
- PCI_EXP_LNKCAP_LBNC);
- }
-
- if (s->speed > QEMU_PCI_EXP_LNK_2_5GT) {
- /*
- * Hot-plug capable downstream ports and downstream ports supporting
- * link speeds greater than 5GT/s must hardwire PCI_EXP_LNKCAP_DLLLARC
- * to 1b. PCI_EXP_LNKCAP_DLLLARC implies PCI_EXP_LNKSTA_DLLLA, which
- * we also hardwire to 1b here. 2.5GT/s hot-plug slots should also
- * technically implement this, but it's not done here for compatibility.
- */
- pci_long_test_and_set_mask(exp_cap + PCI_EXP_LNKCAP,
- PCI_EXP_LNKCAP_DLLLARC);
- /* the PCI_EXP_LNKSTA_DLLLA will be set in the hotplug function */
+ QEMU_PCI_EXP_LNKCAP_MLW(width) |
+ QEMU_PCI_EXP_LNKCAP_MLS(speed));
+ if (speed > QEMU_PCI_EXP_LNK_2_5GT) {
/*
* Target Link Speed defaults to the highest link speed supported by
* the component. 2.5GT/s devices are permitted to hardwire to zero.
@@ -152,7 +130,7 @@ static void pcie_cap_fill_slot_lnk(PCIDevice *dev)
pci_word_test_and_clear_mask(exp_cap + PCI_EXP_LNKCTL2,
PCI_EXP_LNKCTL2_TLS);
pci_word_test_and_set_mask(exp_cap + PCI_EXP_LNKCTL2,
- QEMU_PCI_EXP_LNKCAP_MLS(s->speed) &
+ QEMU_PCI_EXP_LNKCAP_MLS(speed) &
PCI_EXP_LNKCTL2_TLS);
}
@@ -161,27 +139,82 @@ static void pcie_cap_fill_slot_lnk(PCIDevice *dev)
* actually a reference to the highest bit supported in this register.
* We assume the device supports all link speeds.
*/
- if (s->speed > QEMU_PCI_EXP_LNK_5GT) {
+ if (speed > QEMU_PCI_EXP_LNK_5GT) {
pci_long_test_and_clear_mask(exp_cap + PCI_EXP_LNKCAP2, ~0U);
pci_long_test_and_set_mask(exp_cap + PCI_EXP_LNKCAP2,
PCI_EXP_LNKCAP2_SLS_2_5GB |
PCI_EXP_LNKCAP2_SLS_5_0GB |
PCI_EXP_LNKCAP2_SLS_8_0GB);
- if (s->speed > QEMU_PCI_EXP_LNK_8GT) {
+ if (speed > QEMU_PCI_EXP_LNK_8GT) {
pci_long_test_and_set_mask(exp_cap + PCI_EXP_LNKCAP2,
PCI_EXP_LNKCAP2_SLS_16_0GB);
}
- if (s->speed > QEMU_PCI_EXP_LNK_16GT) {
+ if (speed > QEMU_PCI_EXP_LNK_16GT) {
pci_long_test_and_set_mask(exp_cap + PCI_EXP_LNKCAP2,
PCI_EXP_LNKCAP2_SLS_32_0GB);
}
- if (s->speed > QEMU_PCI_EXP_LNK_32GT) {
+ if (speed > QEMU_PCI_EXP_LNK_32GT) {
pci_long_test_and_set_mask(exp_cap + PCI_EXP_LNKCAP2,
PCI_EXP_LNKCAP2_SLS_64_0GB);
}
}
}
+void pcie_cap_fill_link_ep_usp(PCIDevice *dev, PCIExpLinkWidth width,
+ PCIExpLinkSpeed speed)
+{
+ uint8_t *exp_cap = dev->config + dev->exp.exp_cap;
+
+ /*
+ * For an end point or USP need to set the current status as well
+ * as the capabilities.
+ */
+ pci_long_test_and_clear_mask(exp_cap + PCI_EXP_LNKSTA,
+ PCI_EXP_LNKSTA_CLS | PCI_EXP_LNKSTA_NLW);
+ pci_long_test_and_set_mask(exp_cap + PCI_EXP_LNKSTA,
+ QEMU_PCI_EXP_LNKSTA_NLW(width) |
+ QEMU_PCI_EXP_LNKSTA_CLS(speed));
+
+ pcie_cap_fill_lnk(exp_cap, width, speed);
+}
+
+static void pcie_cap_fill_slot_lnk(PCIDevice *dev)
+{
+ PCIESlot *s = (PCIESlot *)object_dynamic_cast(OBJECT(dev), TYPE_PCIE_SLOT);
+ uint8_t *exp_cap = dev->config + dev->exp.exp_cap;
+
+ /* Skip anything that isn't a PCIESlot */
+ if (!s) {
+ return;
+ }
+
+ /*
+ * Link bandwidth notification is required for all root ports and
+ * downstream ports supporting links wider than x1 or multiple link
+ * speeds.
+ */
+ if (s->width > QEMU_PCI_EXP_LNK_X1 ||
+ s->speed > QEMU_PCI_EXP_LNK_2_5GT) {
+ pci_long_test_and_set_mask(exp_cap + PCI_EXP_LNKCAP,
+ PCI_EXP_LNKCAP_LBNC);
+ }
+
+ if (s->speed > QEMU_PCI_EXP_LNK_2_5GT) {
+ /*
+ * Hot-plug capable downstream ports and downstream ports supporting
+ * link speeds greater than 5GT/s must hardwire PCI_EXP_LNKCAP_DLLLARC
+ * to 1b. PCI_EXP_LNKCAP_DLLLARC implies PCI_EXP_LNKSTA_DLLLA, which
+ * we also hardwire to 1b here. 2.5GT/s hot-plug slots should also
+ * technically implement this, but it's not done here for compatibility.
+ */
+ pci_long_test_and_set_mask(exp_cap + PCI_EXP_LNKCAP,
+ PCI_EXP_LNKCAP_DLLLARC);
+ /* the PCI_EXP_LNKSTA_DLLLA will be set in the hotplug function */
+ }
+
+ pcie_cap_fill_lnk(exp_cap, s->width, s->speed);
+}
+
int pcie_cap_init(PCIDevice *dev, uint8_t offset,
uint8_t type, uint8_t port,
Error **errp)
@@ -1080,18 +1113,22 @@ void pcie_sync_bridge_lnk(PCIDevice *bridge_dev)
if ((lnksta & PCI_EXP_LNKSTA_NLW) > (lnkcap & PCI_EXP_LNKCAP_MLW)) {
lnksta &= ~PCI_EXP_LNKSTA_NLW;
lnksta |= lnkcap & PCI_EXP_LNKCAP_MLW;
- } else if (!(lnksta & PCI_EXP_LNKSTA_NLW)) {
- lnksta |= QEMU_PCI_EXP_LNKSTA_NLW(QEMU_PCI_EXP_LNK_X1);
}
if ((lnksta & PCI_EXP_LNKSTA_CLS) > (lnkcap & PCI_EXP_LNKCAP_SLS)) {
lnksta &= ~PCI_EXP_LNKSTA_CLS;
lnksta |= lnkcap & PCI_EXP_LNKCAP_SLS;
- } else if (!(lnksta & PCI_EXP_LNKSTA_CLS)) {
- lnksta |= QEMU_PCI_EXP_LNKSTA_CLS(QEMU_PCI_EXP_LNK_2_5GT);
}
}
+ if (!(lnksta & PCI_EXP_LNKSTA_NLW)) {
+ lnksta |= QEMU_PCI_EXP_LNKSTA_NLW(QEMU_PCI_EXP_LNK_X1);
+ }
+
+ if (!(lnksta & PCI_EXP_LNKSTA_CLS)) {
+ lnksta |= QEMU_PCI_EXP_LNKSTA_CLS(QEMU_PCI_EXP_LNK_2_5GT);
+ }
+
pci_word_test_and_clear_mask(exp_cap + PCI_EXP_LNKSTA,
PCI_EXP_LNKSTA_CLS | PCI_EXP_LNKSTA_NLW);
pci_word_test_and_set_mask(exp_cap + PCI_EXP_LNKSTA, lnksta &
@@ -1177,3 +1214,81 @@ void pcie_acs_reset(PCIDevice *dev)
pci_set_word(dev->config + dev->exp.acs_cap + PCI_ACS_CTRL, 0);
}
}
+
+/* PASID */
+void pcie_pasid_init(PCIDevice *dev, uint16_t offset, uint8_t pasid_width,
+ bool exec_perm, bool priv_mod)
+{
+ static const uint16_t control_reg_rw_mask = 0x07;
+ uint16_t capability_reg;
+
+ assert(pasid_width <= PCI_EXT_CAP_PASID_MAX_WIDTH);
+
+ pcie_add_capability(dev, PCI_EXT_CAP_ID_PASID, PCI_PASID_VER, offset,
+ PCI_EXT_CAP_PASID_SIZEOF);
+
+ capability_reg = ((uint16_t)pasid_width) << PCI_PASID_CAP_WIDTH_SHIFT;
+ capability_reg |= exec_perm ? PCI_PASID_CAP_EXEC : 0;
+ capability_reg |= priv_mod ? PCI_PASID_CAP_PRIV : 0;
+ pci_set_word(dev->config + offset + PCI_PASID_CAP, capability_reg);
+
+ /* Everything is disabled by default */
+ pci_set_word(dev->config + offset + PCI_PASID_CTRL, 0);
+
+ pci_set_word(dev->wmask + offset + PCI_PASID_CTRL, control_reg_rw_mask);
+
+ dev->exp.pasid_cap = offset;
+}
+
+/* PRI */
+void pcie_pri_init(PCIDevice *dev, uint16_t offset, uint32_t outstanding_pr_cap,
+ bool prg_response_pasid_req)
+{
+ static const uint16_t control_reg_rw_mask = 0x3;
+ static const uint16_t status_reg_rw1_mask = 0x3;
+ static const uint32_t pr_alloc_reg_rw_mask = 0xffffffff;
+ uint16_t status_reg;
+
+ status_reg = prg_response_pasid_req ? PCI_PRI_STATUS_PASID : 0;
+ status_reg |= PCI_PRI_STATUS_STOPPED; /* Stopped by default */
+
+ pcie_add_capability(dev, PCI_EXT_CAP_ID_PRI, PCI_PRI_VER, offset,
+ PCI_EXT_CAP_PRI_SIZEOF);
+ /* Disabled by default */
+
+ pci_set_word(dev->config + offset + PCI_PRI_STATUS, status_reg);
+ pci_set_long(dev->config + offset + PCI_PRI_MAX_REQ, outstanding_pr_cap);
+
+ pci_set_word(dev->wmask + offset + PCI_PRI_CTRL, control_reg_rw_mask);
+ pci_set_word(dev->w1cmask + offset + PCI_PRI_STATUS, status_reg_rw1_mask);
+ pci_set_long(dev->wmask + offset + PCI_PRI_ALLOC_REQ, pr_alloc_reg_rw_mask);
+
+ dev->exp.pri_cap = offset;
+}
+
+bool pcie_pri_enabled(const PCIDevice *dev)
+{
+ if (!pci_is_express(dev) || !dev->exp.pri_cap) {
+ return false;
+ }
+ return (pci_get_word(dev->config + dev->exp.pri_cap + PCI_PRI_CTRL) &
+ PCI_PRI_CTRL_ENABLE) != 0;
+}
+
+bool pcie_pasid_enabled(const PCIDevice *dev)
+{
+ if (!pci_is_express(dev) || !dev->exp.pasid_cap) {
+ return false;
+ }
+ return (pci_get_word(dev->config + dev->exp.pasid_cap + PCI_PASID_CTRL) &
+ PCI_PASID_CTRL_ENABLE) != 0;
+}
+
+bool pcie_ats_enabled(const PCIDevice *dev)
+{
+ if (!pci_is_express(dev) || !dev->exp.ats_cap) {
+ return false;
+ }
+ return (pci_get_word(dev->config + dev->exp.ats_cap + PCI_ATS_CTRL) &
+ PCI_ATS_CTRL_ENABLE) != 0;
+}
diff --git a/hw/pci/pcie_port.c b/hw/pci/pcie_port.c
index 20ff2b3..f3841a2 100644
--- a/hw/pci/pcie_port.c
+++ b/hw/pci/pcie_port.c
@@ -92,16 +92,6 @@ static PCIESlot *pcie_chassis_find_slot_with_chassis(struct PCIEChassis *c,
return s;
}
-PCIESlot *pcie_chassis_find_slot(uint8_t chassis_number, uint16_t slot)
-{
- struct PCIEChassis *c;
- c = pcie_chassis_find(chassis_number);
- if (!c) {
- return NULL;
- }
- return pcie_chassis_find_slot_with_chassis(c, slot);
-}
-
int pcie_chassis_add_slot(struct PCIESlot *slot)
{
struct PCIEChassis *c;
@@ -121,15 +111,14 @@ void pcie_chassis_del_slot(PCIESlot *s)
QLIST_REMOVE(s, next);
}
-static Property pcie_port_props[] = {
+static const Property pcie_port_props[] = {
DEFINE_PROP_UINT8("port", PCIEPort, port, 0),
DEFINE_PROP_UINT16("aer_log_max", PCIEPort,
parent_obj.parent_obj.exp.aer_log.log_max,
PCIE_AER_LOG_MAX_DEFAULT),
- DEFINE_PROP_END_OF_LIST()
};
-static void pcie_port_class_init(ObjectClass *oc, void *data)
+static void pcie_port_class_init(ObjectClass *oc, const void *data)
{
DeviceClass *dc = DEVICE_CLASS(oc);
@@ -199,7 +188,7 @@ int pcie_count_ds_ports(PCIBus *bus)
return dsp_count;
}
-static bool pcie_slot_is_hotpluggbale_bus(HotplugHandler *plug_handler,
+static bool pcie_slot_is_hotpluggable_bus(HotplugHandler *plug_handler,
BusState *bus)
{
PCIESlot *s = PCIE_SLOT(bus->parent);
@@ -214,16 +203,15 @@ static const TypeInfo pcie_port_type_info = {
.class_init = pcie_port_class_init,
};
-static Property pcie_slot_props[] = {
+static const Property pcie_slot_props[] = {
DEFINE_PROP_UINT8("chassis", PCIESlot, chassis, 0),
DEFINE_PROP_UINT16("slot", PCIESlot, slot, 0),
DEFINE_PROP_BOOL("hotplug", PCIESlot, hotplug, true),
DEFINE_PROP_BOOL("x-do-not-expose-native-hotplug-cap", PCIESlot,
hide_native_hotplug_cap, false),
- DEFINE_PROP_END_OF_LIST()
};
-static void pcie_slot_class_init(ObjectClass *oc, void *data)
+static void pcie_slot_class_init(ObjectClass *oc, const void *data)
{
DeviceClass *dc = DEVICE_CLASS(oc);
HotplugHandlerClass *hc = HOTPLUG_HANDLER_CLASS(oc);
@@ -233,7 +221,7 @@ static void pcie_slot_class_init(ObjectClass *oc, void *data)
hc->plug = pcie_cap_slot_plug_cb;
hc->unplug = pcie_cap_slot_unplug_cb;
hc->unplug_request = pcie_cap_slot_unplug_request_cb;
- hc->is_hotpluggable_bus = pcie_slot_is_hotpluggbale_bus;
+ hc->is_hotpluggable_bus = pcie_slot_is_hotpluggable_bus;
}
static const TypeInfo pcie_slot_type_info = {
@@ -242,7 +230,7 @@ static const TypeInfo pcie_slot_type_info = {
.instance_size = sizeof(PCIESlot),
.abstract = true,
.class_init = pcie_slot_class_init,
- .interfaces = (InterfaceInfo[]) {
+ .interfaces = (const InterfaceInfo[]) {
{ TYPE_HOTPLUG_HANDLER },
{ }
}
diff --git a/hw/pci/pcie_sriov.c b/hw/pci/pcie_sriov.c
index e9b2322..3ad1874 100644
--- a/hw/pci/pcie_sriov.c
+++ b/hw/pci/pcie_sriov.c
@@ -15,28 +15,83 @@
#include "hw/pci/pcie.h"
#include "hw/pci/pci_bus.h"
#include "hw/qdev-properties.h"
-#include "qemu/error-report.h"
#include "qemu/range.h"
#include "qapi/error.h"
#include "trace.h"
-static PCIDevice *register_vf(PCIDevice *pf, int devfn,
- const char *name, uint16_t vf_num);
-static void unregister_vfs(PCIDevice *dev);
+static GHashTable *pfs;
-void pcie_sriov_pf_init(PCIDevice *dev, uint16_t offset,
- const char *vfname, uint16_t vf_dev_id,
- uint16_t init_vfs, uint16_t total_vfs,
- uint16_t vf_offset, uint16_t vf_stride)
+static void unparent_vfs(PCIDevice *dev, uint16_t total_vfs)
+{
+ for (uint16_t i = 0; i < total_vfs; i++) {
+ PCIDevice *vf = dev->exp.sriov_pf.vf[i];
+ object_unparent(OBJECT(vf));
+ object_unref(OBJECT(vf));
+ }
+ g_free(dev->exp.sriov_pf.vf);
+ dev->exp.sriov_pf.vf = NULL;
+}
+
+static void register_vfs(PCIDevice *dev)
+{
+ uint16_t num_vfs;
+ uint16_t i;
+ uint16_t sriov_cap = dev->exp.sriov_cap;
+
+ assert(sriov_cap > 0);
+ num_vfs = pci_get_word(dev->config + sriov_cap + PCI_SRIOV_NUM_VF);
+
+ trace_sriov_register_vfs(dev->name, PCI_SLOT(dev->devfn),
+ PCI_FUNC(dev->devfn), num_vfs);
+ for (i = 0; i < num_vfs; i++) {
+ pci_set_enabled(dev->exp.sriov_pf.vf[i], true);
+ }
+
+ pci_set_word(dev->wmask + sriov_cap + PCI_SRIOV_NUM_VF, 0);
+}
+
+static void unregister_vfs(PCIDevice *dev)
+{
+ uint8_t *cfg = dev->config + dev->exp.sriov_cap;
+ uint16_t i;
+
+ trace_sriov_unregister_vfs(dev->name, PCI_SLOT(dev->devfn),
+ PCI_FUNC(dev->devfn));
+ for (i = 0; i < pci_get_word(cfg + PCI_SRIOV_TOTAL_VF); i++) {
+ pci_set_enabled(dev->exp.sriov_pf.vf[i], false);
+ }
+
+ pci_set_word(dev->wmask + dev->exp.sriov_cap + PCI_SRIOV_NUM_VF, 0xffff);
+}
+
+static bool pcie_sriov_pf_init_common(PCIDevice *dev, uint16_t offset,
+ uint16_t vf_dev_id, uint16_t init_vfs,
+ uint16_t total_vfs, uint16_t vf_offset,
+ uint16_t vf_stride, Error **errp)
{
+ int32_t devfn = dev->devfn + vf_offset;
uint8_t *cfg = dev->config + offset;
uint8_t *wmask;
+ if (!pci_is_express(dev)) {
+ error_setg(errp, "PCI Express is required for SR-IOV PF");
+ return false;
+ }
+
+ if (pci_is_vf(dev)) {
+ error_setg(errp, "a device cannot be a SR-IOV PF and a VF at the same time");
+ return false;
+ }
+
+ if (total_vfs &&
+ (uint32_t)devfn + (uint32_t)(total_vfs - 1) * vf_stride >= PCI_DEVFN_MAX) {
+ error_setg(errp, "VF addr overflows");
+ return false;
+ }
+
pcie_add_capability(dev, PCI_EXT_CAP_ID_SRIOV, 1,
offset, PCI_EXT_CAP_SRIOV_SIZEOF);
dev->exp.sriov_cap = offset;
- dev->exp.sriov_pf.num_vfs = 0;
- dev->exp.sriov_pf.vfname = g_strdup(vfname);
dev->exp.sriov_pf.vf = NULL;
pci_set_word(cfg + PCI_SRIOV_VF_OFFSET, vf_offset);
@@ -69,13 +124,74 @@ void pcie_sriov_pf_init(PCIDevice *dev, uint16_t offset,
pci_set_word(wmask + PCI_SRIOV_SYS_PGSIZE, 0x553);
qdev_prop_set_bit(&dev->qdev, "multifunction", true);
+
+ return true;
+}
+
+bool pcie_sriov_pf_init(PCIDevice *dev, uint16_t offset,
+ const char *vfname, uint16_t vf_dev_id,
+ uint16_t init_vfs, uint16_t total_vfs,
+ uint16_t vf_offset, uint16_t vf_stride,
+ Error **errp)
+{
+ BusState *bus = qdev_get_parent_bus(&dev->qdev);
+ int32_t devfn = dev->devfn + vf_offset;
+
+ if (pfs && g_hash_table_contains(pfs, dev->qdev.id)) {
+ error_setg(errp, "attaching user-created SR-IOV VF unsupported");
+ return false;
+ }
+
+ if (!pcie_sriov_pf_init_common(dev, offset, vf_dev_id, init_vfs,
+ total_vfs, vf_offset, vf_stride, errp)) {
+ return false;
+ }
+
+ dev->exp.sriov_pf.vf = g_new(PCIDevice *, total_vfs);
+
+ for (uint16_t i = 0; i < total_vfs; i++) {
+ PCIDevice *vf = pci_new(devfn, vfname);
+ vf->exp.sriov_vf.pf = dev;
+ vf->exp.sriov_vf.vf_number = i;
+
+ if (!qdev_realize(&vf->qdev, bus, errp)) {
+ object_unparent(OBJECT(vf));
+ object_unref(vf);
+ unparent_vfs(dev, i);
+ return false;
+ }
+
+ /* set vid/did according to sr/iov spec - they are not used */
+ pci_config_set_vendor_id(vf->config, 0xffff);
+ pci_config_set_device_id(vf->config, 0xffff);
+
+ dev->exp.sriov_pf.vf[i] = vf;
+ devfn += vf_stride;
+ }
+
+ return true;
}
void pcie_sriov_pf_exit(PCIDevice *dev)
{
- unregister_vfs(dev);
- g_free((char *)dev->exp.sriov_pf.vfname);
- dev->exp.sriov_pf.vfname = NULL;
+ uint8_t *cfg = dev->config + dev->exp.sriov_cap;
+
+ if (dev->exp.sriov_pf.vf_user_created) {
+ uint16_t ven_id = pci_get_word(dev->config + PCI_VENDOR_ID);
+ uint16_t total_vfs = pci_get_word(dev->config + PCI_SRIOV_TOTAL_VF);
+ uint16_t vf_dev_id = pci_get_word(dev->config + PCI_SRIOV_VF_DID);
+
+ unregister_vfs(dev);
+
+ for (uint16_t i = 0; i < total_vfs; i++) {
+ dev->exp.sriov_pf.vf[i]->exp.sriov_vf.pf = NULL;
+
+ pci_config_set_vendor_id(dev->exp.sriov_pf.vf[i]->config, ven_id);
+ pci_config_set_device_id(dev->exp.sriov_pf.vf[i]->config, vf_dev_id);
+ }
+ } else {
+ unparent_vfs(dev, pci_get_word(cfg + PCI_SRIOV_TOTAL_VF));
+ }
}
void pcie_sriov_pf_init_vf_bar(PCIDevice *dev, int region_num,
@@ -108,113 +224,179 @@ void pcie_sriov_pf_init_vf_bar(PCIDevice *dev, int region_num,
void pcie_sriov_vf_register_bar(PCIDevice *dev, int region_num,
MemoryRegion *memory)
{
- PCIIORegion *r;
- PCIBus *bus = pci_get_bus(dev);
uint8_t type;
- pcibus_t size = memory_region_size(memory);
- assert(pci_is_vf(dev)); /* PFs must use pci_register_bar */
- assert(region_num >= 0);
- assert(region_num < PCI_NUM_REGIONS);
+ assert(dev->exp.sriov_vf.pf);
type = dev->exp.sriov_vf.pf->exp.sriov_pf.vf_bar_type[region_num];
- if (!is_power_of_2(size)) {
- error_report("%s: PCI region size must be a power"
- " of two - type=0x%x, size=0x%"FMT_PCIBUS,
- __func__, type, size);
- exit(1);
- }
+ return pci_register_bar(dev, region_num, type, memory);
+}
- r = &dev->io_regions[region_num];
- r->memory = memory;
- r->address_space =
- type & PCI_BASE_ADDRESS_SPACE_IO
- ? bus->address_space_io
- : bus->address_space_mem;
- r->size = size;
- r->type = type;
-
- r->addr = pci_bar_address(dev, region_num, r->type, r->size);
- if (r->addr != PCI_BAR_UNMAPPED) {
- memory_region_add_subregion_overlap(r->address_space,
- r->addr, r->memory, 1);
- }
+static gint compare_vf_devfns(gconstpointer a, gconstpointer b)
+{
+ return (*(PCIDevice **)a)->devfn - (*(PCIDevice **)b)->devfn;
}
-static PCIDevice *register_vf(PCIDevice *pf, int devfn, const char *name,
- uint16_t vf_num)
+int16_t pcie_sriov_pf_init_from_user_created_vfs(PCIDevice *dev,
+ uint16_t offset,
+ Error **errp)
{
- PCIDevice *dev = pci_new(devfn, name);
- dev->exp.sriov_vf.pf = pf;
- dev->exp.sriov_vf.vf_number = vf_num;
- PCIBus *bus = pci_get_bus(pf);
- Error *local_err = NULL;
-
- qdev_realize(&dev->qdev, &bus->qbus, &local_err);
- if (local_err) {
- error_report_err(local_err);
- return NULL;
+ GPtrArray *pf;
+ PCIDevice **vfs;
+ BusState *bus = qdev_get_parent_bus(DEVICE(dev));
+ uint16_t ven_id = pci_get_word(dev->config + PCI_VENDOR_ID);
+ uint16_t size = PCI_EXT_CAP_SRIOV_SIZEOF;
+ uint16_t vf_dev_id;
+ uint16_t vf_offset;
+ uint16_t vf_stride;
+ uint16_t i;
+
+ if (!pfs || !dev->qdev.id) {
+ return 0;
+ }
+
+ pf = g_hash_table_lookup(pfs, dev->qdev.id);
+ if (!pf) {
+ return 0;
+ }
+
+ if (pf->len > UINT16_MAX) {
+ error_setg(errp, "too many VFs");
+ return -1;
}
- /* set vid/did according to sr/iov spec - they are not used */
- pci_config_set_vendor_id(dev->config, 0xffff);
- pci_config_set_device_id(dev->config, 0xffff);
+ g_ptr_array_sort(pf, compare_vf_devfns);
+ vfs = (void *)pf->pdata;
+
+ if (vfs[0]->devfn <= dev->devfn) {
+ error_setg(errp, "a VF function number is less than the PF function number");
+ return -1;
+ }
+
+ vf_dev_id = pci_get_word(vfs[0]->config + PCI_DEVICE_ID);
+ vf_offset = vfs[0]->devfn - dev->devfn;
+ vf_stride = pf->len < 2 ? 0 : vfs[1]->devfn - vfs[0]->devfn;
+
+ for (i = 0; i < pf->len; i++) {
+ if (bus != qdev_get_parent_bus(&vfs[i]->qdev)) {
+ error_setg(errp, "SR-IOV VF parent bus mismatches with PF");
+ return -1;
+ }
+
+ if (ven_id != pci_get_word(vfs[i]->config + PCI_VENDOR_ID)) {
+ error_setg(errp, "SR-IOV VF vendor ID mismatches with PF");
+ return -1;
+ }
+
+ if (vf_dev_id != pci_get_word(vfs[i]->config + PCI_DEVICE_ID)) {
+ error_setg(errp, "inconsistent SR-IOV VF device IDs");
+ return -1;
+ }
- return dev;
+ for (size_t j = 0; j < PCI_NUM_REGIONS; j++) {
+ if (vfs[i]->io_regions[j].size != vfs[0]->io_regions[j].size ||
+ vfs[i]->io_regions[j].type != vfs[0]->io_regions[j].type) {
+ error_setg(errp, "inconsistent SR-IOV BARs");
+ return -1;
+ }
+ }
+
+ if (vfs[i]->devfn - vfs[0]->devfn != vf_stride * i) {
+ error_setg(errp, "inconsistent SR-IOV stride");
+ return -1;
+ }
+ }
+
+ if (!pcie_sriov_pf_init_common(dev, offset, vf_dev_id, pf->len,
+ pf->len, vf_offset, vf_stride, errp)) {
+ return -1;
+ }
+
+ if (!pcie_find_capability(dev, PCI_EXT_CAP_ID_ARI)) {
+ pcie_ari_init(dev, offset + size);
+ size += PCI_ARI_SIZEOF;
+ }
+
+ for (i = 0; i < pf->len; i++) {
+ vfs[i]->exp.sriov_vf.pf = dev;
+ vfs[i]->exp.sriov_vf.vf_number = i;
+
+ /* set vid/did according to sr/iov spec - they are not used */
+ pci_config_set_vendor_id(vfs[i]->config, 0xffff);
+ pci_config_set_device_id(vfs[i]->config, 0xffff);
+ }
+
+ dev->exp.sriov_pf.vf = vfs;
+ dev->exp.sriov_pf.vf_user_created = true;
+
+ for (i = 0; i < PCI_NUM_REGIONS; i++) {
+ PCIIORegion *region = &vfs[0]->io_regions[i];
+
+ if (region->size) {
+ pcie_sriov_pf_init_vf_bar(dev, i, region->type, region->size);
+ }
+ }
+
+ return size;
}
-static void register_vfs(PCIDevice *dev)
+bool pcie_sriov_register_device(PCIDevice *dev, Error **errp)
{
- uint16_t num_vfs;
- uint16_t i;
- uint16_t sriov_cap = dev->exp.sriov_cap;
- uint16_t vf_offset =
- pci_get_word(dev->config + sriov_cap + PCI_SRIOV_VF_OFFSET);
- uint16_t vf_stride =
- pci_get_word(dev->config + sriov_cap + PCI_SRIOV_VF_STRIDE);
- int32_t devfn = dev->devfn + vf_offset;
-
- assert(sriov_cap > 0);
- num_vfs = pci_get_word(dev->config + sriov_cap + PCI_SRIOV_NUM_VF);
- if (num_vfs > pci_get_word(dev->config + sriov_cap + PCI_SRIOV_TOTAL_VF)) {
- return;
+ if (!dev->exp.sriov_pf.vf && dev->qdev.id &&
+ pfs && g_hash_table_contains(pfs, dev->qdev.id)) {
+ error_setg(errp, "attaching user-created SR-IOV VF unsupported");
+ return false;
}
- dev->exp.sriov_pf.vf = g_new(PCIDevice *, num_vfs);
+ if (dev->sriov_pf) {
+ PCIDevice *pci_pf;
+ GPtrArray *pf;
- trace_sriov_register_vfs(dev->name, PCI_SLOT(dev->devfn),
- PCI_FUNC(dev->devfn), num_vfs);
- for (i = 0; i < num_vfs; i++) {
- dev->exp.sriov_pf.vf[i] = register_vf(dev, devfn,
- dev->exp.sriov_pf.vfname, i);
- if (!dev->exp.sriov_pf.vf[i]) {
- num_vfs = i;
- break;
+ if (!PCI_DEVICE_GET_CLASS(dev)->sriov_vf_user_creatable) {
+ error_setg(errp, "user cannot create SR-IOV VF with this device type");
+ return false;
}
- devfn += vf_stride;
+
+ if (!pci_is_express(dev)) {
+ error_setg(errp, "PCI Express is required for SR-IOV VF");
+ return false;
+ }
+
+ if (!pci_qdev_find_device(dev->sriov_pf, &pci_pf)) {
+ error_setg(errp, "PCI device specified as SR-IOV PF already exists");
+ return false;
+ }
+
+ if (!pfs) {
+ pfs = g_hash_table_new_full(g_str_hash, g_str_equal, g_free, NULL);
+ }
+
+ pf = g_hash_table_lookup(pfs, dev->sriov_pf);
+ if (!pf) {
+ pf = g_ptr_array_new();
+ g_hash_table_insert(pfs, g_strdup(dev->sriov_pf), pf);
+ }
+
+ g_ptr_array_add(pf, dev);
}
- dev->exp.sriov_pf.num_vfs = num_vfs;
+
+ return true;
}
-static void unregister_vfs(PCIDevice *dev)
+void pcie_sriov_unregister_device(PCIDevice *dev)
{
- uint16_t num_vfs = dev->exp.sriov_pf.num_vfs;
- uint16_t i;
+ if (dev->sriov_pf && pfs) {
+ GPtrArray *pf = g_hash_table_lookup(pfs, dev->sriov_pf);
- trace_sriov_unregister_vfs(dev->name, PCI_SLOT(dev->devfn),
- PCI_FUNC(dev->devfn), num_vfs);
- for (i = 0; i < num_vfs; i++) {
- Error *err = NULL;
- PCIDevice *vf = dev->exp.sriov_pf.vf[i];
- if (!object_property_set_bool(OBJECT(vf), "realized", false, &err)) {
- error_reportf_err(err, "Failed to unplug: ");
+ if (pf) {
+ g_ptr_array_remove_fast(pf, dev);
+
+ if (!pf->len) {
+ g_hash_table_remove(pfs, dev->sriov_pf);
+ g_ptr_array_free(pf, FALSE);
+ }
}
- object_unparent(OBJECT(vf));
- object_unref(OBJECT(vf));
}
- g_free(dev->exp.sriov_pf.vf);
- dev->exp.sriov_pf.vf = NULL;
- dev->exp.sriov_pf.num_vfs = 0;
}
void pcie_sriov_config_write(PCIDevice *dev, uint32_t address,
@@ -235,15 +417,29 @@ void pcie_sriov_config_write(PCIDevice *dev, uint32_t address,
PCI_FUNC(dev->devfn), off, val, len);
if (range_covers_byte(off, len, PCI_SRIOV_CTRL)) {
- if (dev->exp.sriov_pf.num_vfs) {
- if (!(val & PCI_SRIOV_CTRL_VFE)) {
- unregister_vfs(dev);
- }
+ if (val & PCI_SRIOV_CTRL_VFE) {
+ register_vfs(dev);
} else {
- if (val & PCI_SRIOV_CTRL_VFE) {
- register_vfs(dev);
- }
+ unregister_vfs(dev);
}
+ } else if (range_covers_byte(off, len, PCI_SRIOV_NUM_VF)) {
+ uint8_t *cfg = dev->config + sriov_cap;
+ uint8_t *wmask = dev->wmask + sriov_cap;
+ uint16_t num_vfs = pci_get_word(cfg + PCI_SRIOV_NUM_VF);
+ uint16_t wmask_val = PCI_SRIOV_CTRL_MSE | PCI_SRIOV_CTRL_ARI;
+
+ if (num_vfs <= pci_get_word(cfg + PCI_SRIOV_TOTAL_VF)) {
+ wmask_val |= PCI_SRIOV_CTRL_VFE;
+ }
+
+ pci_set_word(wmask + PCI_SRIOV_CTRL, wmask_val);
+ }
+}
+
+void pcie_sriov_pf_post_load(PCIDevice *dev)
+{
+ if (dev->exp.sriov_cap) {
+ register_vfs(dev);
}
}
@@ -260,6 +456,8 @@ void pcie_sriov_pf_reset(PCIDevice *dev)
unregister_vfs(dev);
pci_set_word(dev->config + sriov_cap + PCI_SRIOV_NUM_VF, 0);
+ pci_set_word(dev->wmask + sriov_cap + PCI_SRIOV_CTRL,
+ PCI_SRIOV_CTRL_VFE | PCI_SRIOV_CTRL_MSE | PCI_SRIOV_CTRL_ARI);
/*
* Default is to use 4K pages, software can modify it
@@ -294,7 +492,7 @@ void pcie_sriov_pf_add_sup_pgsize(PCIDevice *dev, uint16_t opt_sup_pgsize)
uint16_t pcie_sriov_vf_number(PCIDevice *dev)
{
- assert(pci_is_vf(dev));
+ assert(dev->exp.sriov_vf.pf);
return dev->exp.sriov_vf.vf_number;
}
@@ -306,7 +504,7 @@ PCIDevice *pcie_sriov_get_pf(PCIDevice *dev)
PCIDevice *pcie_sriov_get_vf_at_index(PCIDevice *dev, int n)
{
assert(!pci_is_vf(dev));
- if (n < dev->exp.sriov_pf.num_vfs) {
+ if (n < pcie_sriov_num_vfs(dev)) {
return dev->exp.sriov_pf.vf[n];
}
return NULL;
@@ -314,5 +512,10 @@ PCIDevice *pcie_sriov_get_vf_at_index(PCIDevice *dev, int n)
uint16_t pcie_sriov_num_vfs(PCIDevice *dev)
{
- return dev->exp.sriov_pf.num_vfs;
+ uint16_t sriov_cap = dev->exp.sriov_cap;
+ uint8_t *cfg = dev->config + sriov_cap;
+
+ return sriov_cap &&
+ (pci_get_word(cfg + PCI_SRIOV_CTRL) & PCI_SRIOV_CTRL_VFE) ?
+ pci_get_word(cfg + PCI_SRIOV_NUM_VF) : 0;
}
diff --git a/hw/pci/trace-events b/hw/pci/trace-events
index 19643aa..02c80d3 100644
--- a/hw/pci/trace-events
+++ b/hw/pci/trace-events
@@ -1,9 +1,15 @@
# See docs/devel/tracing.rst for syntax documentation.
# pci.c
+pci_pm_bad_transition(const char *dev, uint32_t bus, uint32_t slot, uint32_t func, uint8_t old, uint8_t new) "%s %02x:%02x.%x REJECTED PM transition D%d->D%d"
+pci_pm_transition(const char *dev, uint32_t bus, uint32_t slot, uint32_t func, uint8_t old, uint8_t new) "%s %02x:%02x.%x PM transition D%d->D%d"
pci_update_mappings_del(const char *dev, uint32_t bus, uint32_t slot, uint32_t func, int bar, uint64_t addr, uint64_t size) "%s %02x:%02x.%x %d,0x%"PRIx64"+0x%"PRIx64
pci_update_mappings_add(const char *dev, uint32_t bus, uint32_t slot, uint32_t func, int bar, uint64_t addr, uint64_t size) "%s %02x:%02x.%x %d,0x%"PRIx64"+0x%"PRIx64
pci_route_irq(int dev_irq, const char *dev_path, int parent_irq, const char *parent_path) "IRQ %d @%s -> IRQ %d @%s"
+pci_bad_rom_magic(uint16_t bad_rom_magic, uint16_t good_rom_magic) "Bad ROM magic number: %04"PRIX16". Should be: %04"PRIX16
+pci_bad_pcir_offset(uint16_t pcir_offset) "Bad PCIR offset 0x%"PRIx16" or signature"
+pci_rom_and_pci_ids(char *romfile, uint16_t vendor_id, uint16_t device_id, uint16_t rom_vendor_id, uint16_t rom_device_id) "%s: ROM ID %04"PRIx16":%04"PRIx16" | PCI ID %04"PRIx16":%04"PRIx16
+pci_rom_checksum_change(uint8_t old_checksum, uint8_t new_checksum) "ROM checksum changed from %02"PRIx8" to %02"PRIx8
# pci_host.c
pci_cfg_read(const char *dev, uint32_t bus, uint32_t slot, uint32_t func, unsigned offs, unsigned val) "%s %02x:%02x.%x @0x%x -> 0x%x"
@@ -14,7 +20,7 @@ msix_write_config(char *name, bool enabled, bool masked) "dev %s enabled %d mask
# hw/pci/pcie_sriov.c
sriov_register_vfs(const char *name, int slot, int function, int num_vfs) "%s %02x:%x: creating %d vf devs"
-sriov_unregister_vfs(const char *name, int slot, int function, int num_vfs) "%s %02x:%x: Unregistering %d vf devs"
+sriov_unregister_vfs(const char *name, int slot, int function) "%s %02x:%x: Unregistering vf devs"
sriov_config_write(const char *name, int slot, int fun, uint32_t offset, uint32_t val, uint32_t len) "%s %02x:%x: sriov offset 0x%x val 0x%x len %d"
# pcie.c