diff options
Diffstat (limited to 'hw/pci')
-rw-r--r-- | hw/pci/msi.c | 2 | ||||
-rw-r--r-- | hw/pci/msix.c | 15 | ||||
-rw-r--r-- | hw/pci/pci-hmp-cmds.c | 28 | ||||
-rw-r--r-- | hw/pci/pci-stub.c | 6 | ||||
-rw-r--r-- | hw/pci/pci.c | 541 | ||||
-rw-r--r-- | hw/pci/pci_bridge.c | 12 | ||||
-rw-r--r-- | hw/pci/pci_host.c | 15 | ||||
-rw-r--r-- | hw/pci/pcie.c | 203 | ||||
-rw-r--r-- | hw/pci/pcie_port.c | 26 | ||||
-rw-r--r-- | hw/pci/pcie_sriov.c | 413 | ||||
-rw-r--r-- | hw/pci/trace-events | 8 |
11 files changed, 996 insertions, 273 deletions
diff --git a/hw/pci/msi.c b/hw/pci/msi.c index 8104ac1..b9f5b45 100644 --- a/hw/pci/msi.c +++ b/hw/pci/msi.c @@ -23,7 +23,7 @@ #include "hw/xen/xen.h" #include "qemu/range.h" #include "qapi/error.h" -#include "sysemu/xen.h" +#include "system/xen.h" #include "hw/i386/kvm/xen_evtchn.h" diff --git a/hw/pci/msix.c b/hw/pci/msix.c index 487e498..8c7f670 100644 --- a/hw/pci/msix.c +++ b/hw/pci/msix.c @@ -15,11 +15,12 @@ */ #include "qemu/osdep.h" +#include "qemu/log.h" #include "hw/pci/msi.h" #include "hw/pci/msix.h" #include "hw/pci/pci.h" #include "hw/xen/xen.h" -#include "sysemu/xen.h" +#include "system/xen.h" #include "migration/qemu-file-types.h" #include "migration/vmstate.h" #include "qemu/range.h" @@ -71,7 +72,7 @@ static uint8_t *msix_pending_byte(PCIDevice *dev, int vector) return dev->msix_pba + vector / 8; } -static int msix_is_pending(PCIDevice *dev, int vector) +int msix_is_pending(PCIDevice *dev, unsigned int vector) { return *msix_pending_byte(dev, vector) & msix_pending_mask(vector); } @@ -250,7 +251,7 @@ static uint64_t msix_pba_mmio_read(void *opaque, hwaddr addr, PCIDevice *dev = opaque; if (dev->msix_vector_poll_notifier) { unsigned vector_start = addr * 8; - unsigned vector_end = MIN(addr + size * 8, dev->msix_entries_nr); + unsigned vector_end = MIN((addr + size) * 8, dev->msix_entries_nr); dev->msix_vector_poll_notifier(dev, vector_start, vector_end); } @@ -260,6 +261,14 @@ static uint64_t msix_pba_mmio_read(void *opaque, hwaddr addr, static void msix_pba_mmio_write(void *opaque, hwaddr addr, uint64_t val, unsigned size) { + PCIDevice *dev = opaque; + + qemu_log_mask(LOG_GUEST_ERROR, + "PCI [%s:%02x:%02x.%x] attempt to write to MSI-X " + "PBA at 0x%" FMT_PCIBUS ", ignoring.\n", + pci_root_bus_path(dev), pci_dev_bus_num(dev), + PCI_SLOT(dev->devfn), PCI_FUNC(dev->devfn), + addr); } static const MemoryRegionOps msix_pba_mmio_ops = { diff --git a/hw/pci/pci-hmp-cmds.c b/hw/pci/pci-hmp-cmds.c index b09fce9..a5f6483 100644 --- a/hw/pci/pci-hmp-cmds.c +++ b/hw/pci/pci-hmp-cmds.c @@ -20,7 +20,7 @@ #include "monitor/monitor.h" #include "pci-internal.h" #include "qapi/error.h" -#include "qapi/qmp/qdict.h" +#include "qobject/qdict.h" #include "qapi/qapi-commands-pci.h" #include "qemu/cutils.h" @@ -83,15 +83,25 @@ static void hmp_info_pci_device(Monitor *mon, const PciDeviceInfo *dev) monitor_printf(mon, " BAR%" PRId64 ": ", region->value->bar); if (!strcmp(region->value->type, "io")) { - monitor_printf(mon, "I/O at 0x%04" PRIx64 - " [0x%04" PRIx64 "].\n", - addr, addr + size - 1); + if (addr != PCI_BAR_UNMAPPED) { + monitor_printf(mon, "I/O at 0x%04" PRIx64 + " [0x%04" PRIx64 "]\n", + addr, addr + size - 1); + } else { + monitor_printf(mon, "I/O (not mapped)\n"); + } } else { - monitor_printf(mon, "%d bit%s memory at 0x%08" PRIx64 - " [0x%08" PRIx64 "].\n", - region->value->mem_type_64 ? 64 : 32, - region->value->prefetch ? " prefetchable" : "", - addr, addr + size - 1); + if (addr != PCI_BAR_UNMAPPED) { + monitor_printf(mon, "%d bit%s memory at 0x%08" PRIx64 + " [0x%08" PRIx64 "]\n", + region->value->mem_type_64 ? 64 : 32, + region->value->prefetch ? " prefetchable" : "", + addr, addr + size - 1); + } else { + monitor_printf(mon, "%d bit%s memory (not mapped)\n", + region->value->mem_type_64 ? 64 : 32, + region->value->prefetch ? " prefetchable" : ""); + } } } diff --git a/hw/pci/pci-stub.c b/hw/pci/pci-stub.c index f050868..3397d0c 100644 --- a/hw/pci/pci-stub.c +++ b/hw/pci/pci-stub.c @@ -46,14 +46,12 @@ void hmp_pcie_aer_inject_error(Monitor *mon, const QDict *qdict) /* kvm-all wants this */ MSIMessage pci_get_msi_message(PCIDevice *dev, int vector) { - g_assert(false); - return (MSIMessage){}; + g_assert_not_reached(); } uint16_t pci_requester_id(PCIDevice *dev) { - g_assert(false); - return 0; + g_assert_not_reached(); } /* Required by ahci.c */ diff --git a/hw/pci/pci.c b/hw/pci/pci.c index fab86d0..c70b5ce 100644 --- a/hw/pci/pci.c +++ b/hw/pci/pci.c @@ -32,12 +32,13 @@ #include "hw/pci/pci_host.h" #include "hw/qdev-properties.h" #include "hw/qdev-properties-system.h" +#include "migration/cpr.h" #include "migration/qemu-file-types.h" #include "migration/vmstate.h" #include "net/net.h" -#include "sysemu/numa.h" -#include "sysemu/runstate.h" -#include "sysemu/sysemu.h" +#include "system/numa.h" +#include "system/runstate.h" +#include "system/system.h" #include "hw/loader.h" #include "qemu/error-report.h" #include "qemu/range.h" @@ -46,6 +47,7 @@ #include "hw/pci/msix.h" #include "hw/hotplug.h" #include "hw/boards.h" +#include "hw/nvram/fw_cfg.h" #include "qapi/error.h" #include "qemu/cutils.h" #include "pci-internal.h" @@ -53,13 +55,6 @@ #include "hw/xen/xen.h" #include "hw/i386/kvm/xen_evtchn.h" -//#define DEBUG_PCI -#ifdef DEBUG_PCI -# define PCI_DPRINTF(format, ...) printf(format, ## __VA_ARGS__) -#else -# define PCI_DPRINTF(format, ...) do { } while (0) -#endif - bool pci_available = true; static char *pcibus_get_dev_path(DeviceState *dev); @@ -67,11 +62,24 @@ static char *pcibus_get_fw_dev_path(DeviceState *dev); static void pcibus_reset_hold(Object *obj, ResetType type); static bool pcie_has_upstream_port(PCIDevice *dev); -static Property pci_props[] = { +static void prop_pci_busnr_get(Object *obj, Visitor *v, const char *name, + void *opaque, Error **errp) +{ + uint8_t busnr = pci_dev_bus_num(PCI_DEVICE(obj)); + + visit_type_uint8(v, name, &busnr, errp); +} + +static const PropertyInfo prop_pci_busnr = { + .type = "busnr", + .get = prop_pci_busnr_get, +}; + +static const Property pci_props[] = { DEFINE_PROP_PCI_DEVFN("addr", PCIDevice, devfn, -1), DEFINE_PROP_STRING("romfile", PCIDevice, romfile), DEFINE_PROP_UINT32("romsize", PCIDevice, romsize, UINT32_MAX), - DEFINE_PROP_UINT32("rombar", PCIDevice, rom_bar, 1), + DEFINE_PROP_INT32("rombar", PCIDevice, rom_bar, -1), DEFINE_PROP_BIT("multifunction", PCIDevice, cap_present, QEMU_PCI_CAP_MULTIFUNCTION_BITNR, false), DEFINE_PROP_BIT("x-pcie-lnksta-dllla", PCIDevice, cap_present, @@ -85,7 +93,12 @@ static Property pci_props[] = { QEMU_PCIE_ERR_UNC_MASK_BITNR, true), DEFINE_PROP_BIT("x-pcie-ari-nextfn-1", PCIDevice, cap_present, QEMU_PCIE_ARI_NEXTFN_1_BITNR, false), - DEFINE_PROP_END_OF_LIST() + DEFINE_PROP_SIZE32("x-max-bounce-buffer-size", PCIDevice, + max_bounce_buffer_size, DEFAULT_MAX_BOUNCE_BUFFER_SIZE), + DEFINE_PROP_STRING("sriov-pf", PCIDevice, sriov_pf), + DEFINE_PROP_BIT("x-pcie-ext-tag", PCIDevice, cap_present, + QEMU_PCIE_EXT_TAG_BITNR, true), + { .name = "busnr", .info = &prop_pci_busnr }, }; static const VMStateDescription vmstate_pcibus = { @@ -116,6 +129,12 @@ static GSequence *pci_acpi_index_list(void) return used_acpi_index_list; } +static void pci_set_master(PCIDevice *d, bool enable) +{ + memory_region_set_enabled(&d->bus_master_enable_region, enable); + d->is_master = enable; /* cache the status */ +} + static void pci_init_bus_master(PCIDevice *pci_dev) { AddressSpace *dma_as = pci_device_iommu_address_space(pci_dev); @@ -123,7 +142,7 @@ static void pci_init_bus_master(PCIDevice *pci_dev) memory_region_init_alias(&pci_dev->bus_master_enable_region, OBJECT(pci_dev), "bus master", dma_as->root, 0, memory_region_size(dma_as->root)); - memory_region_set_enabled(&pci_dev->bus_master_enable_region, false); + pci_set_master(pci_dev, false); memory_region_add_subregion(&pci_dev->bus_master_container_region, 0, &pci_dev->bus_master_enable_region); } @@ -198,11 +217,57 @@ static uint16_t pcibus_numa_node(PCIBus *bus) return NUMA_NODE_UNASSIGNED; } -static void pci_bus_class_init(ObjectClass *klass, void *data) +bool pci_bus_add_fw_cfg_extra_pci_roots(FWCfgState *fw_cfg, + PCIBus *bus, + Error **errp) +{ + Object *obj; + + if (!bus) { + return true; + } + obj = OBJECT(bus); + + return fw_cfg_add_file_from_generator(fw_cfg, obj->parent, + object_get_canonical_path_component(obj), + "etc/extra-pci-roots", errp); +} + +static GByteArray *pci_bus_fw_cfg_gen_data(Object *obj, Error **errp) +{ + PCIBus *bus = PCI_BUS(obj); + GByteArray *byte_array; + uint64_t extra_hosts = 0; + + if (!bus) { + return NULL; + } + + QLIST_FOREACH(bus, &bus->child, sibling) { + /* look for expander root buses */ + if (pci_bus_is_root(bus)) { + extra_hosts++; + } + } + + if (!extra_hosts) { + return NULL; + } + extra_hosts = cpu_to_le64(extra_hosts); + + byte_array = g_byte_array_new(); + g_byte_array_append(byte_array, + (const void *)&extra_hosts, sizeof(extra_hosts)); + + return byte_array; +} + +static void pci_bus_class_init(ObjectClass *klass, const void *data) { BusClass *k = BUS_CLASS(klass); PCIBusClass *pbc = PCI_BUS_CLASS(klass); ResettableClass *rc = RESETTABLE_CLASS(klass); + FWCfgDataGeneratorClass *fwgc = FW_CFG_DATA_GENERATOR_CLASS(klass); k->print_dev = pcibus_dev_print; k->get_dev_path = pcibus_get_dev_path; @@ -214,6 +279,8 @@ static void pci_bus_class_init(ObjectClass *klass, void *data) pbc->bus_num = pcibus_num; pbc->numa_node = pcibus_numa_node; + + fwgc->get_data = pci_bus_fw_cfg_gen_data; } static const TypeInfo pci_bus_info = { @@ -222,6 +289,10 @@ static const TypeInfo pci_bus_info = { .instance_size = sizeof(PCIBus), .class_size = sizeof(PCIBusClass), .class_init = pci_bus_class_init, + .interfaces = (const InterfaceInfo[]) { + { TYPE_FW_CFG_DATA_GENERATOR_INTERFACE }, + { } + } }; static const TypeInfo cxl_interface_info = { @@ -239,7 +310,7 @@ static const TypeInfo conventional_pci_interface_info = { .parent = TYPE_INTERFACE, }; -static void pcie_bus_class_init(ObjectClass *klass, void *data) +static void pcie_bus_class_init(ObjectClass *klass, const void *data) { BusClass *k = BUS_CLASS(klass); @@ -365,6 +436,84 @@ static void pci_msi_trigger(PCIDevice *dev, MSIMessage msg) attrs, NULL); } +/* + * Register and track a PM capability. If wmask is also enabled for the power + * state field of the pmcsr register, guest writes may change the device PM + * state. BAR access is only enabled while the device is in the D0 state. + * Return the capability offset or negative error code. + */ +int pci_pm_init(PCIDevice *d, uint8_t offset, Error **errp) +{ + int cap = pci_add_capability(d, PCI_CAP_ID_PM, offset, PCI_PM_SIZEOF, errp); + + if (cap < 0) { + return cap; + } + + d->pm_cap = cap; + d->cap_present |= QEMU_PCI_CAP_PM; + + return cap; +} + +static uint8_t pci_pm_state(PCIDevice *d) +{ + uint16_t pmcsr; + + if (!(d->cap_present & QEMU_PCI_CAP_PM)) { + return 0; + } + + pmcsr = pci_get_word(d->config + d->pm_cap + PCI_PM_CTRL); + + return pmcsr & PCI_PM_CTRL_STATE_MASK; +} + +/* + * Update the PM capability state based on the new value stored in config + * space respective to the old, pre-write state provided. If the new value + * is rejected (unsupported or invalid transition) restore the old value. + * Return the resulting PM state. + */ +static uint8_t pci_pm_update(PCIDevice *d, uint32_t addr, int l, uint8_t old) +{ + uint16_t pmc; + uint8_t new; + + if (!(d->cap_present & QEMU_PCI_CAP_PM) || + !range_covers_byte(addr, l, d->pm_cap + PCI_PM_CTRL)) { + return old; + } + + new = pci_pm_state(d); + if (new == old) { + return old; + } + + pmc = pci_get_word(d->config + d->pm_cap + PCI_PM_PMC); + + /* + * Transitions to D1 & D2 are only allowed if supported. Devices may + * only transition to higher D-states or to D0. + */ + if ((!(pmc & PCI_PM_CAP_D1) && new == 1) || + (!(pmc & PCI_PM_CAP_D2) && new == 2) || + (old && new && new < old)) { + pci_word_test_and_clear_mask(d->config + d->pm_cap + PCI_PM_CTRL, + PCI_PM_CTRL_STATE_MASK); + pci_word_test_and_set_mask(d->config + d->pm_cap + PCI_PM_CTRL, + old); + trace_pci_pm_bad_transition(d->name, pci_dev_bus_num(d), + PCI_SLOT(d->devfn), PCI_FUNC(d->devfn), + old, new); + return old; + } + + trace_pci_pm_transition(d->name, pci_dev_bus_num(d), PCI_SLOT(d->devfn), + PCI_FUNC(d->devfn), old, new); + return new; +} + static void pci_reset_regions(PCIDevice *dev) { int r; @@ -389,6 +538,10 @@ static void pci_reset_regions(PCIDevice *dev) static void pci_do_device_reset(PCIDevice *dev) { + if ((dev->cap_present & QEMU_PCI_SKIP_RESET_ON_CPR) && cpr_is_incoming()) { + return; + } + pci_device_deassert_intx(dev); assert(dev->irq_state == 0); @@ -404,6 +557,11 @@ static void pci_do_device_reset(PCIDevice *dev) pci_get_word(dev->wmask + PCI_INTERRUPT_LINE) | pci_get_word(dev->w1cmask + PCI_INTERRUPT_LINE)); dev->config[PCI_CACHE_LINE_SIZE] = 0x0; + /* Default PM state is D0 */ + if (dev->cap_present & QEMU_PCI_CAP_PM) { + pci_word_test_and_clear_mask(dev->config + dev->pm_cap + PCI_PM_CTRL, + PCI_PM_CTRL_STATE_MASK); + } pci_reset_regions(dev); pci_update_mappings(dev); @@ -657,9 +815,8 @@ static int get_pci_config_device(QEMUFile *f, void *pv, size_t size, pci_bridge_update_mappings(PCI_BRIDGE(s)); } - memory_region_set_enabled(&s->bus_master_enable_region, - pci_get_word(s->config + PCI_COMMAND) - & PCI_COMMAND_MASTER); + pci_set_master(s, pci_get_word(s->config + PCI_COMMAND) + & PCI_COMMAND_MASTER); g_free(config); return 0; @@ -733,10 +890,17 @@ static bool migrate_is_not_pcie(void *opaque, int version_id) return !pci_is_express((PCIDevice *)opaque); } +static int pci_post_load(void *opaque, int version_id) +{ + pcie_sriov_pf_post_load(opaque); + return 0; +} + const VMStateDescription vmstate_pci_device = { .name = "PCIDevice", .version_id = 2, .minimum_version_id = 1, + .post_load = pci_post_load, .fields = (const VMStateField[]) { VMSTATE_INT32_POSITIVE_LE(version_id, PCIDevice), VMSTATE_BUFFER_UNSAFE_INFO_TEST(config, PCIDevice, @@ -952,13 +1116,8 @@ static void pci_init_multifunction(PCIBus *bus, PCIDevice *dev, Error **errp) dev->config[PCI_HEADER_TYPE] |= PCI_HEADER_TYPE_MULTI_FUNCTION; } - /* - * With SR/IOV and ARI, a device at function 0 need not be a multifunction - * device, as it may just be a VF that ended up with function 0 in - * the legacy PCI interpretation. Avoid failing in such cases: - */ - if (pci_is_vf(dev) && - dev->exp.sriov_vf.pf->cap_present & QEMU_PCI_CAP_MULTIFUNCTION) { + /* SR/IOV is not handled here. */ + if (pci_is_vf(dev)) { return; } @@ -991,7 +1150,8 @@ static void pci_init_multifunction(PCIBus *bus, PCIDevice *dev, Error **errp) } /* function 0 indicates single function, so function > 0 must be NULL */ for (func = 1; func < PCI_FUNC_MAX; ++func) { - if (bus->devices[PCI_DEVFN(slot, func)]) { + PCIDevice *device = bus->devices[PCI_DEVFN(slot, func)]; + if (device && !pci_is_vf(device)) { error_setg(errp, "PCI: %x.0 indicates single function, " "but %x.%x is already populated.", slot, slot, func); @@ -1179,14 +1339,15 @@ static PCIDevice *do_pci_register_device(PCIDevice *pci_dev, PCI_SLOT(devfn), PCI_FUNC(devfn), name, bus->devices[devfn]->name, bus->devices[devfn]->qdev.id); return NULL; - } /* - * Populating function 0 triggers a scan from the guest that - * exposes other non-zero functions. Hence we need to ensure that - * function 0 wasn't added yet. - */ - else if (dev->hotplugged && - !pci_is_vf(pci_dev) && - pci_get_function_0(pci_dev)) { + } + + /* + * Populating function 0 triggers a scan from the guest that + * exposes other non-zero functions. Hence we need to ensure that + * function 0 wasn't added yet. + */ + if (dev->hotplugged && !pci_is_vf(pci_dev) && + pci_get_function_0(pci_dev)) { error_setg(errp, "PCI: slot %d function 0 already occupied by %s," " new func %s cannot be exposed to guest.", PCI_SLOT(pci_get_function_0(pci_dev)->devfn), @@ -1204,6 +1365,8 @@ static PCIDevice *do_pci_register_device(PCIDevice *pci_dev, "bus master container", UINT64_MAX); address_space_init(&pci_dev->bus_master_as, &pci_dev->bus_master_container_region, pci_dev->name); + pci_dev->bus_master_as.max_bounce_buffer_size = + pci_dev->max_bounce_buffer_size; if (phase_check(PHASE_MACHINE_READY)) { pci_init_bus_master(pci_dev); @@ -1276,6 +1439,7 @@ static void pci_qdev_unrealize(DeviceState *dev) pci_unregister_io_regions(pci_dev); pci_del_option_rom(pci_dev); + pcie_sriov_unregister_device(pci_dev); if (pc->exit) { pc->exit(pci_dev); @@ -1307,7 +1471,6 @@ void pci_register_bar(PCIDevice *pci_dev, int region_num, pcibus_t size = memory_region_size(memory); uint8_t hdr_type; - assert(!pci_is_vf(pci_dev)); /* VFs must use pcie_sriov_vf_register_bar */ assert(region_num >= 0); assert(region_num < PCI_NUM_REGIONS); assert(is_power_of_2(size)); @@ -1318,7 +1481,7 @@ void pci_register_bar(PCIDevice *pci_dev, int region_num, assert(hdr_type != PCI_HEADER_TYPE_BRIDGE || region_num < 2); r = &pci_dev->io_regions[region_num]; - r->addr = PCI_BAR_UNMAPPED; + assert(!r->size); r->size = size; r->type = type; r->memory = memory; @@ -1326,22 +1489,35 @@ void pci_register_bar(PCIDevice *pci_dev, int region_num, ? pci_get_bus(pci_dev)->address_space_io : pci_get_bus(pci_dev)->address_space_mem; - wmask = ~(size - 1); - if (region_num == PCI_ROM_SLOT) { - /* ROM enable bit is writable */ - wmask |= PCI_ROM_ADDRESS_ENABLE; - } + if (pci_is_vf(pci_dev)) { + PCIDevice *pf = pci_dev->exp.sriov_vf.pf; + assert(!pf || type == pf->exp.sriov_pf.vf_bar_type[region_num]); - addr = pci_bar(pci_dev, region_num); - pci_set_long(pci_dev->config + addr, type); - - if (!(r->type & PCI_BASE_ADDRESS_SPACE_IO) && - r->type & PCI_BASE_ADDRESS_MEM_TYPE_64) { - pci_set_quad(pci_dev->wmask + addr, wmask); - pci_set_quad(pci_dev->cmask + addr, ~0ULL); + r->addr = pci_bar_address(pci_dev, region_num, r->type, r->size); + if (r->addr != PCI_BAR_UNMAPPED) { + memory_region_add_subregion_overlap(r->address_space, + r->addr, r->memory, 1); + } } else { - pci_set_long(pci_dev->wmask + addr, wmask & 0xffffffff); - pci_set_long(pci_dev->cmask + addr, 0xffffffff); + r->addr = PCI_BAR_UNMAPPED; + + wmask = ~(size - 1); + if (region_num == PCI_ROM_SLOT) { + /* ROM enable bit is writable */ + wmask |= PCI_ROM_ADDRESS_ENABLE; + } + + addr = pci_bar(pci_dev, region_num); + pci_set_long(pci_dev->config + addr, type); + + if (!(r->type & PCI_BASE_ADDRESS_SPACE_IO) && + r->type & PCI_BASE_ADDRESS_MEM_TYPE_64) { + pci_set_quad(pci_dev->wmask + addr, wmask); + pci_set_quad(pci_dev->cmask + addr, ~0ULL); + } else { + pci_set_long(pci_dev->wmask + addr, wmask & 0xffffffff); + pci_set_long(pci_dev->cmask + addr, 0xffffffff); + } } } @@ -1430,7 +1606,11 @@ static pcibus_t pci_config_get_bar_addr(PCIDevice *d, int reg, pci_get_word(pf->config + sriov_cap + PCI_SRIOV_VF_OFFSET); uint16_t vf_stride = pci_get_word(pf->config + sriov_cap + PCI_SRIOV_VF_STRIDE); - uint32_t vf_num = (d->devfn - (pf->devfn + vf_offset)) / vf_stride; + uint32_t vf_num = d->devfn - (pf->devfn + vf_offset); + + if (vf_num) { + vf_num /= vf_stride; + } if (type & PCI_BASE_ADDRESS_MEM_TYPE_64) { new_addr = pci_get_quad(pf->config + bar); @@ -1525,7 +1705,7 @@ static void pci_update_mappings(PCIDevice *d) continue; new_addr = pci_bar_address(d, i, r->type, r->size); - if (!d->has_power) { + if (!d->enabled || pci_pm_state(d)) { new_addr = PCI_BAR_UNMAPPED; } @@ -1555,7 +1735,7 @@ static void pci_update_mappings(PCIDevice *d) pci_update_vga(d); } -static inline int pci_irq_disabled(PCIDevice *d) +int pci_irq_disabled(PCIDevice *d) { return pci_get_word(d->config + PCI_COMMAND) & PCI_COMMAND_INTX_DISABLE; } @@ -1591,6 +1771,7 @@ uint32_t pci_default_read_config(PCIDevice *d, void pci_default_write_config(PCIDevice *d, uint32_t addr, uint32_t val_in, int l) { + uint8_t new_pm_state, old_pm_state = pci_pm_state(d); int i, was_irq_disabled = pci_irq_disabled(d); uint32_t val = val_in; @@ -1603,17 +1784,21 @@ void pci_default_write_config(PCIDevice *d, uint32_t addr, uint32_t val_in, int d->config[addr + i] = (d->config[addr + i] & ~wmask) | (val & wmask); d->config[addr + i] &= ~(val & w1cmask); /* W1C: Write 1 to Clear */ } + + new_pm_state = pci_pm_update(d, addr, l, old_pm_state); + if (ranges_overlap(addr, l, PCI_BASE_ADDRESS_0, 24) || ranges_overlap(addr, l, PCI_ROM_ADDRESS, 4) || ranges_overlap(addr, l, PCI_ROM_ADDRESS1, 4) || - range_covers_byte(addr, l, PCI_COMMAND)) + range_covers_byte(addr, l, PCI_COMMAND) || + !!new_pm_state != !!old_pm_state) { pci_update_mappings(d); + } if (ranges_overlap(addr, l, PCI_COMMAND, 2)) { pci_update_irq_disabled(d, was_irq_disabled); - memory_region_set_enabled(&d->bus_master_enable_region, - (pci_get_word(d->config + PCI_COMMAND) - & PCI_COMMAND_MASTER) && d->has_power); + pci_set_master(d, (pci_get_word(d->config + PCI_COMMAND) & + PCI_COMMAND_MASTER) && d->enabled); } msi_write_config(d, addr, val_in, l); @@ -2098,6 +2283,11 @@ static void pci_qdev_realize(DeviceState *qdev, Error **errp) } } + if (!pcie_sriov_register_device(pci_dev, errp)) { + pci_qdev_unrealize(DEVICE(pci_dev)); + return; + } + /* * A PCIe Downstream Port that do not have ARI Forwarding enabled must * associate only Device 0 with the device attached to the bus @@ -2269,12 +2459,12 @@ static void pci_patch_ids(PCIDevice *pdev, uint8_t *ptr, uint32_t size) /* Only a valid rom will be patched. */ rom_magic = pci_get_word(ptr); if (rom_magic != 0xaa55) { - PCI_DPRINTF("Bad ROM magic %04x\n", rom_magic); + trace_pci_bad_rom_magic(rom_magic, 0xaa55); return; } pcir_offset = pci_get_word(ptr + 0x18); if (pcir_offset + 8 >= size || memcmp(ptr + pcir_offset, "PCIR", 4)) { - PCI_DPRINTF("Bad PCIR offset 0x%x or signature\n", pcir_offset); + trace_pci_bad_pcir_offset(pcir_offset); return; } @@ -2283,8 +2473,8 @@ static void pci_patch_ids(PCIDevice *pdev, uint8_t *ptr, uint32_t size) rom_vendor_id = pci_get_word(ptr + pcir_offset + 4); rom_device_id = pci_get_word(ptr + pcir_offset + 6); - PCI_DPRINTF("%s: ROM id %04x%04x / PCI id %04x%04x\n", pdev->romfile, - vendor_id, device_id, rom_vendor_id, rom_device_id); + trace_pci_rom_and_pci_ids(pdev->romfile, vendor_id, device_id, + rom_vendor_id, rom_device_id); checksum = ptr[6]; @@ -2292,7 +2482,7 @@ static void pci_patch_ids(PCIDevice *pdev, uint8_t *ptr, uint32_t size) /* Patch vendor id and checksum (at offset 6 for etherboot roms). */ checksum += (uint8_t)rom_vendor_id + (uint8_t)(rom_vendor_id >> 8); checksum -= (uint8_t)vendor_id + (uint8_t)(vendor_id >> 8); - PCI_DPRINTF("ROM checksum %02x / %02x\n", ptr[6], checksum); + trace_pci_rom_checksum_change(ptr[6], checksum); ptr[6] = checksum; pci_set_word(ptr + pcir_offset + 4, vendor_id); } @@ -2301,7 +2491,7 @@ static void pci_patch_ids(PCIDevice *pdev, uint8_t *ptr, uint32_t size) /* Patch device id and checksum (at offset 6 for etherboot roms). */ checksum += (uint8_t)rom_device_id + (uint8_t)(rom_device_id >> 8); checksum -= (uint8_t)device_id + (uint8_t)(device_id >> 8); - PCI_DPRINTF("ROM checksum %02x / %02x\n", ptr[6], checksum); + trace_pci_rom_checksum_change(ptr[6], checksum); ptr[6] = checksum; pci_set_word(ptr + pcir_offset + 6, device_id); } @@ -2352,6 +2542,14 @@ static void pci_add_option_rom(PCIDevice *pdev, bool is_default_rom, return; } + if (pci_is_vf(pdev)) { + if (pdev->rom_bar > 0) { + error_setg(errp, "ROM BAR cannot be enabled for SR-IOV VF"); + } + + return; + } + if (load_file || pdev->romsize == UINT32_MAX) { path = qemu_find_file(QEMU_FILE_TYPE_BIOS, pdev->romfile); if (path == NULL) { @@ -2625,7 +2823,7 @@ MemoryRegion *pci_address_space_io(PCIDevice *dev) return pci_get_bus(dev)->address_space_io; } -static void pci_device_class_init(ObjectClass *klass, void *data) +static void pci_device_class_init(ObjectClass *klass, const void *data) { DeviceClass *k = DEVICE_CLASS(klass); @@ -2633,9 +2831,13 @@ static void pci_device_class_init(ObjectClass *klass, void *data) k->unrealize = pci_qdev_unrealize; k->bus_type = TYPE_PCI_BUS; device_class_set_props(k, pci_props); + object_class_property_set_description( + klass, "x-max-bounce-buffer-size", + "Maximum buffer size allocated for bounce buffers used for mapped " + "access to indirect DMA memory"); } -static void pci_device_class_base_init(ObjectClass *klass, void *data) +static void pci_device_class_base_init(ObjectClass *klass, const void *data) { if (!object_class_is_abstract(klass)) { ObjectClass *conventional = @@ -2742,6 +2944,23 @@ AddressSpace *pci_device_iommu_address_space(PCIDevice *dev) return &address_space_memory; } +int pci_iommu_init_iotlb_notifier(PCIDevice *dev, IOMMUNotifier *n, + IOMMUNotify fn, void *opaque) +{ + PCIBus *bus; + PCIBus *iommu_bus; + int devfn; + + pci_device_get_iommu_bus_devfn(dev, &bus, &iommu_bus, &devfn); + if (iommu_bus && iommu_bus->iommu_ops->init_iotlb_notifier) { + iommu_bus->iommu_ops->init_iotlb_notifier(bus, iommu_bus->iommu_opaque, + devfn, n, fn, opaque); + return 0; + } + + return -ENODEV; +} + bool pci_device_set_iommu_device(PCIDevice *dev, HostIOMMUDevice *hiod, Error **errp) { @@ -2773,6 +2992,170 @@ void pci_device_unset_iommu_device(PCIDevice *dev) } } +int pci_pri_request_page(PCIDevice *dev, uint32_t pasid, bool priv_req, + bool exec_req, hwaddr addr, bool lpig, + uint16_t prgi, bool is_read, bool is_write) +{ + PCIBus *bus; + PCIBus *iommu_bus; + int devfn; + + if (!dev->is_master || + ((pasid != PCI_NO_PASID) && !pcie_pasid_enabled(dev))) { + return -EPERM; + } + + if (!pcie_pri_enabled(dev)) { + return -EPERM; + } + + pci_device_get_iommu_bus_devfn(dev, &bus, &iommu_bus, &devfn); + if (iommu_bus && iommu_bus->iommu_ops->pri_request_page) { + return iommu_bus->iommu_ops->pri_request_page(bus, + iommu_bus->iommu_opaque, + devfn, pasid, priv_req, + exec_req, addr, lpig, prgi, + is_read, is_write); + } + + return -ENODEV; +} + +int pci_pri_register_notifier(PCIDevice *dev, uint32_t pasid, + IOMMUPRINotifier *notifier) +{ + PCIBus *bus; + PCIBus *iommu_bus; + int devfn; + + if (!dev->is_master || + ((pasid != PCI_NO_PASID) && !pcie_pasid_enabled(dev))) { + return -EPERM; + } + + pci_device_get_iommu_bus_devfn(dev, &bus, &iommu_bus, &devfn); + if (iommu_bus && iommu_bus->iommu_ops->pri_register_notifier) { + iommu_bus->iommu_ops->pri_register_notifier(bus, + iommu_bus->iommu_opaque, + devfn, pasid, notifier); + return 0; + } + + return -ENODEV; +} + +void pci_pri_unregister_notifier(PCIDevice *dev, uint32_t pasid) +{ + PCIBus *bus; + PCIBus *iommu_bus; + int devfn; + + pci_device_get_iommu_bus_devfn(dev, &bus, &iommu_bus, &devfn); + if (iommu_bus && iommu_bus->iommu_ops->pri_unregister_notifier) { + iommu_bus->iommu_ops->pri_unregister_notifier(bus, + iommu_bus->iommu_opaque, + devfn, pasid); + } +} + +ssize_t pci_ats_request_translation(PCIDevice *dev, uint32_t pasid, + bool priv_req, bool exec_req, + hwaddr addr, size_t length, + bool no_write, IOMMUTLBEntry *result, + size_t result_length, + uint32_t *err_count) +{ + PCIBus *bus; + PCIBus *iommu_bus; + int devfn; + + if (!dev->is_master || + ((pasid != PCI_NO_PASID) && !pcie_pasid_enabled(dev))) { + return -EPERM; + } + + if (result_length == 0) { + return -ENOSPC; + } + + if (!pcie_ats_enabled(dev)) { + return -EPERM; + } + + pci_device_get_iommu_bus_devfn(dev, &bus, &iommu_bus, &devfn); + if (iommu_bus && iommu_bus->iommu_ops->ats_request_translation) { + return iommu_bus->iommu_ops->ats_request_translation(bus, + iommu_bus->iommu_opaque, + devfn, pasid, priv_req, + exec_req, addr, length, + no_write, result, + result_length, err_count); + } + + return -ENODEV; +} + +int pci_iommu_register_iotlb_notifier(PCIDevice *dev, uint32_t pasid, + IOMMUNotifier *n) +{ + PCIBus *bus; + PCIBus *iommu_bus; + int devfn; + + if ((pasid != PCI_NO_PASID) && !pcie_pasid_enabled(dev)) { + return -EPERM; + } + + pci_device_get_iommu_bus_devfn(dev, &bus, &iommu_bus, &devfn); + if (iommu_bus && iommu_bus->iommu_ops->register_iotlb_notifier) { + iommu_bus->iommu_ops->register_iotlb_notifier(bus, + iommu_bus->iommu_opaque, devfn, + pasid, n); + return 0; + } + + return -ENODEV; +} + +int pci_iommu_unregister_iotlb_notifier(PCIDevice *dev, uint32_t pasid, + IOMMUNotifier *n) +{ + PCIBus *bus; + PCIBus *iommu_bus; + int devfn; + + if ((pasid != PCI_NO_PASID) && !pcie_pasid_enabled(dev)) { + return -EPERM; + } + + pci_device_get_iommu_bus_devfn(dev, &bus, &iommu_bus, &devfn); + if (iommu_bus && iommu_bus->iommu_ops->unregister_iotlb_notifier) { + iommu_bus->iommu_ops->unregister_iotlb_notifier(bus, + iommu_bus->iommu_opaque, + devfn, pasid, n); + return 0; + } + + return -ENODEV; +} + +int pci_iommu_get_iotlb_info(PCIDevice *dev, uint8_t *addr_width, + uint32_t *min_page_size) +{ + PCIBus *bus; + PCIBus *iommu_bus; + int devfn; + + pci_device_get_iommu_bus_devfn(dev, &bus, &iommu_bus, &devfn); + if (iommu_bus && iommu_bus->iommu_ops->get_iotlb_info) { + iommu_bus->iommu_ops->get_iotlb_info(iommu_bus->iommu_opaque, + addr_width, min_page_size); + return 0; + } + + return -ENODEV; +} + void pci_setup_iommu(PCIBus *bus, const PCIIOMMUOps *ops, void *opaque) { /* @@ -2886,16 +3269,30 @@ MSIMessage pci_get_msi_message(PCIDevice *dev, int vector) void pci_set_power(PCIDevice *d, bool state) { - if (d->has_power == state) { + /* + * Don't change the enabled state of VFs when powering on/off the device. + * + * When powering on, VFs must not be enabled immediately but they must + * wait until the guest configures SR-IOV. + * When powering off, their corresponding PFs will be reset and disable + * VFs. + */ + if (!pci_is_vf(d)) { + pci_set_enabled(d, state); + } +} + +void pci_set_enabled(PCIDevice *d, bool state) +{ + if (d->enabled == state) { return; } - d->has_power = state; + d->enabled = state; pci_update_mappings(d); - memory_region_set_enabled(&d->bus_master_enable_region, - (pci_get_word(d->config + PCI_COMMAND) - & PCI_COMMAND_MASTER) && d->has_power); - if (!d->has_power) { + pci_set_master(d, (pci_get_word(d->config + PCI_COMMAND) + & PCI_COMMAND_MASTER) && d->enabled); + if (qdev_is_realized(&d->qdev)) { pci_device_reset(d); } } diff --git a/hw/pci/pci_bridge.c b/hw/pci/pci_bridge.c index 6a4e388..76255c4 100644 --- a/hw/pci/pci_bridge.c +++ b/hw/pci/pci_bridge.c @@ -380,9 +380,12 @@ void pci_bridge_initfn(PCIDevice *dev, const char *typename) sec_bus->map_irq = br->map_irq ? br->map_irq : pci_swizzle_map_irq_fn; sec_bus->address_space_mem = &br->address_space_mem; memory_region_init(&br->address_space_mem, OBJECT(br), "pci_bridge_pci", UINT64_MAX); + address_space_init(&br->as_mem, &br->address_space_mem, + "pci_bridge_pci_mem"); sec_bus->address_space_io = &br->address_space_io; memory_region_init(&br->address_space_io, OBJECT(br), "pci_bridge_io", 4 * GiB); + address_space_init(&br->as_io, &br->address_space_io, "pci_bridge_pci_io"); pci_bridge_region_init(br); QLIST_INIT(&sec_bus->child); QLIST_INSERT_HEAD(&parent->child, sec_bus, sibling); @@ -399,6 +402,8 @@ void pci_bridge_exitfn(PCIDevice *pci_dev) PCIBridge *s = PCI_BRIDGE(pci_dev); assert(QLIST_EMPTY(&s->sec_bus.child)); QLIST_REMOVE(&s->sec_bus, sibling); + address_space_destroy(&s->as_mem); + address_space_destroy(&s->as_io); pci_bridge_region_del(s, &s->windows); pci_bridge_region_cleanup(s, &s->windows); /* object_unparent() is called automatically during device deletion */ @@ -472,13 +477,12 @@ int pci_bridge_qemu_reserve_cap_init(PCIDevice *dev, int cap_offset, return 0; } -static Property pci_bridge_properties[] = { +static const Property pci_bridge_properties[] = { DEFINE_PROP_BOOL("x-pci-express-writeable-slt-bug", PCIBridge, pcie_writeable_slt_bug, false), - DEFINE_PROP_END_OF_LIST(), }; -static void pci_bridge_class_init(ObjectClass *klass, void *data) +static void pci_bridge_class_init(ObjectClass *klass, const void *data) { AcpiDevAmlIfClass *adevc = ACPI_DEV_AML_IF_CLASS(klass); DeviceClass *k = DEVICE_CLASS(klass); @@ -493,7 +497,7 @@ static const TypeInfo pci_bridge_type_info = { .instance_size = sizeof(PCIBridge), .class_init = pci_bridge_class_init, .abstract = true, - .interfaces = (InterfaceInfo[]) { + .interfaces = (const InterfaceInfo[]) { { TYPE_ACPI_DEV_AML_IF }, { }, }, diff --git a/hw/pci/pci_host.c b/hw/pci/pci_host.c index dfe6fe6..7179d99 100644 --- a/hw/pci/pci_host.c +++ b/hw/pci/pci_host.c @@ -86,7 +86,7 @@ void pci_host_config_write_common(PCIDevice *pci_dev, uint32_t addr, * allowing direct removal of unexposed functions. */ if ((pci_dev->qdev.hotplugged && !pci_get_function_0(pci_dev)) || - !pci_dev->has_power || is_pci_dev_ejected(pci_dev)) { + !pci_dev->enabled || is_pci_dev_ejected(pci_dev)) { return; } @@ -111,7 +111,7 @@ uint32_t pci_host_config_read_common(PCIDevice *pci_dev, uint32_t addr, * allowing direct removal of unexposed functions. */ if ((pci_dev->qdev.hotplugged && !pci_get_function_0(pci_dev)) || - !pci_dev->has_power || is_pci_dev_ejected(pci_dev)) { + !pci_dev->enabled || is_pci_dev_ejected(pci_dev)) { return ~0x0; } @@ -217,12 +217,6 @@ const MemoryRegionOps pci_host_data_le_ops = { .endianness = DEVICE_LITTLE_ENDIAN, }; -const MemoryRegionOps pci_host_data_be_ops = { - .read = pci_host_data_read, - .write = pci_host_data_write, - .endianness = DEVICE_BIG_ENDIAN, -}; - static bool pci_host_needed(void *opaque) { PCIHostState *s = opaque; @@ -240,14 +234,13 @@ const VMStateDescription vmstate_pcihost = { } }; -static Property pci_host_properties_common[] = { +static const Property pci_host_properties_common[] = { DEFINE_PROP_BOOL("x-config-reg-migration-enabled", PCIHostState, mig_enabled, true), DEFINE_PROP_BOOL(PCI_HOST_BYPASS_IOMMU, PCIHostState, bypass_iommu, false), - DEFINE_PROP_END_OF_LIST(), }; -static void pci_host_class_init(ObjectClass *klass, void *data) +static void pci_host_class_init(ObjectClass *klass, const void *data) { DeviceClass *dc = DEVICE_CLASS(klass); device_class_set_props(dc, pci_host_properties_common); diff --git a/hw/pci/pcie.c b/hw/pci/pcie.c index 4b2f080..eaeb688 100644 --- a/hw/pci/pcie.c +++ b/hw/pci/pcie.c @@ -86,7 +86,13 @@ pcie_cap_v1_fill(PCIDevice *dev, uint8_t port, uint8_t type, uint8_t version) * Specification, Revision 1.1., or subsequent PCI Express Base * Specification revisions. */ - pci_set_long(exp_cap + PCI_EXP_DEVCAP, PCI_EXP_DEVCAP_RBER); + uint32_t devcap = PCI_EXP_DEVCAP_RBER; + + if (dev->cap_present & QEMU_PCIE_EXT_TAG) { + devcap = PCI_EXP_DEVCAP_RBER | PCI_EXP_DEVCAP_EXT_TAG; + } + + pci_set_long(exp_cap + PCI_EXP_DEVCAP, devcap); pci_set_long(exp_cap + PCI_EXP_LNKCAP, (port << PCI_EXP_LNKCAP_PN_SHIFT) | @@ -105,46 +111,18 @@ pcie_cap_v1_fill(PCIDevice *dev, uint8_t port, uint8_t type, uint8_t version) pci_set_word(cmask + PCI_EXP_LNKSTA, 0); } -static void pcie_cap_fill_slot_lnk(PCIDevice *dev) +/* Includes setting the target speed default */ +static void pcie_cap_fill_lnk(uint8_t *exp_cap, PCIExpLinkWidth width, + PCIExpLinkSpeed speed) { - PCIESlot *s = (PCIESlot *)object_dynamic_cast(OBJECT(dev), TYPE_PCIE_SLOT); - uint8_t *exp_cap = dev->config + dev->exp.exp_cap; - - /* Skip anything that isn't a PCIESlot */ - if (!s) { - return; - } - /* Clear and fill LNKCAP from what was configured above */ pci_long_test_and_clear_mask(exp_cap + PCI_EXP_LNKCAP, PCI_EXP_LNKCAP_MLW | PCI_EXP_LNKCAP_SLS); pci_long_test_and_set_mask(exp_cap + PCI_EXP_LNKCAP, - QEMU_PCI_EXP_LNKCAP_MLW(s->width) | - QEMU_PCI_EXP_LNKCAP_MLS(s->speed)); - - /* - * Link bandwidth notification is required for all root ports and - * downstream ports supporting links wider than x1 or multiple link - * speeds. - */ - if (s->width > QEMU_PCI_EXP_LNK_X1 || - s->speed > QEMU_PCI_EXP_LNK_2_5GT) { - pci_long_test_and_set_mask(exp_cap + PCI_EXP_LNKCAP, - PCI_EXP_LNKCAP_LBNC); - } - - if (s->speed > QEMU_PCI_EXP_LNK_2_5GT) { - /* - * Hot-plug capable downstream ports and downstream ports supporting - * link speeds greater than 5GT/s must hardwire PCI_EXP_LNKCAP_DLLLARC - * to 1b. PCI_EXP_LNKCAP_DLLLARC implies PCI_EXP_LNKSTA_DLLLA, which - * we also hardwire to 1b here. 2.5GT/s hot-plug slots should also - * technically implement this, but it's not done here for compatibility. - */ - pci_long_test_and_set_mask(exp_cap + PCI_EXP_LNKCAP, - PCI_EXP_LNKCAP_DLLLARC); - /* the PCI_EXP_LNKSTA_DLLLA will be set in the hotplug function */ + QEMU_PCI_EXP_LNKCAP_MLW(width) | + QEMU_PCI_EXP_LNKCAP_MLS(speed)); + if (speed > QEMU_PCI_EXP_LNK_2_5GT) { /* * Target Link Speed defaults to the highest link speed supported by * the component. 2.5GT/s devices are permitted to hardwire to zero. @@ -152,7 +130,7 @@ static void pcie_cap_fill_slot_lnk(PCIDevice *dev) pci_word_test_and_clear_mask(exp_cap + PCI_EXP_LNKCTL2, PCI_EXP_LNKCTL2_TLS); pci_word_test_and_set_mask(exp_cap + PCI_EXP_LNKCTL2, - QEMU_PCI_EXP_LNKCAP_MLS(s->speed) & + QEMU_PCI_EXP_LNKCAP_MLS(speed) & PCI_EXP_LNKCTL2_TLS); } @@ -161,27 +139,82 @@ static void pcie_cap_fill_slot_lnk(PCIDevice *dev) * actually a reference to the highest bit supported in this register. * We assume the device supports all link speeds. */ - if (s->speed > QEMU_PCI_EXP_LNK_5GT) { + if (speed > QEMU_PCI_EXP_LNK_5GT) { pci_long_test_and_clear_mask(exp_cap + PCI_EXP_LNKCAP2, ~0U); pci_long_test_and_set_mask(exp_cap + PCI_EXP_LNKCAP2, PCI_EXP_LNKCAP2_SLS_2_5GB | PCI_EXP_LNKCAP2_SLS_5_0GB | PCI_EXP_LNKCAP2_SLS_8_0GB); - if (s->speed > QEMU_PCI_EXP_LNK_8GT) { + if (speed > QEMU_PCI_EXP_LNK_8GT) { pci_long_test_and_set_mask(exp_cap + PCI_EXP_LNKCAP2, PCI_EXP_LNKCAP2_SLS_16_0GB); } - if (s->speed > QEMU_PCI_EXP_LNK_16GT) { + if (speed > QEMU_PCI_EXP_LNK_16GT) { pci_long_test_and_set_mask(exp_cap + PCI_EXP_LNKCAP2, PCI_EXP_LNKCAP2_SLS_32_0GB); } - if (s->speed > QEMU_PCI_EXP_LNK_32GT) { + if (speed > QEMU_PCI_EXP_LNK_32GT) { pci_long_test_and_set_mask(exp_cap + PCI_EXP_LNKCAP2, PCI_EXP_LNKCAP2_SLS_64_0GB); } } } +void pcie_cap_fill_link_ep_usp(PCIDevice *dev, PCIExpLinkWidth width, + PCIExpLinkSpeed speed) +{ + uint8_t *exp_cap = dev->config + dev->exp.exp_cap; + + /* + * For an end point or USP need to set the current status as well + * as the capabilities. + */ + pci_long_test_and_clear_mask(exp_cap + PCI_EXP_LNKSTA, + PCI_EXP_LNKSTA_CLS | PCI_EXP_LNKSTA_NLW); + pci_long_test_and_set_mask(exp_cap + PCI_EXP_LNKSTA, + QEMU_PCI_EXP_LNKSTA_NLW(width) | + QEMU_PCI_EXP_LNKSTA_CLS(speed)); + + pcie_cap_fill_lnk(exp_cap, width, speed); +} + +static void pcie_cap_fill_slot_lnk(PCIDevice *dev) +{ + PCIESlot *s = (PCIESlot *)object_dynamic_cast(OBJECT(dev), TYPE_PCIE_SLOT); + uint8_t *exp_cap = dev->config + dev->exp.exp_cap; + + /* Skip anything that isn't a PCIESlot */ + if (!s) { + return; + } + + /* + * Link bandwidth notification is required for all root ports and + * downstream ports supporting links wider than x1 or multiple link + * speeds. + */ + if (s->width > QEMU_PCI_EXP_LNK_X1 || + s->speed > QEMU_PCI_EXP_LNK_2_5GT) { + pci_long_test_and_set_mask(exp_cap + PCI_EXP_LNKCAP, + PCI_EXP_LNKCAP_LBNC); + } + + if (s->speed > QEMU_PCI_EXP_LNK_2_5GT) { + /* + * Hot-plug capable downstream ports and downstream ports supporting + * link speeds greater than 5GT/s must hardwire PCI_EXP_LNKCAP_DLLLARC + * to 1b. PCI_EXP_LNKCAP_DLLLARC implies PCI_EXP_LNKSTA_DLLLA, which + * we also hardwire to 1b here. 2.5GT/s hot-plug slots should also + * technically implement this, but it's not done here for compatibility. + */ + pci_long_test_and_set_mask(exp_cap + PCI_EXP_LNKCAP, + PCI_EXP_LNKCAP_DLLLARC); + /* the PCI_EXP_LNKSTA_DLLLA will be set in the hotplug function */ + } + + pcie_cap_fill_lnk(exp_cap, s->width, s->speed); +} + int pcie_cap_init(PCIDevice *dev, uint8_t offset, uint8_t type, uint8_t port, Error **errp) @@ -1080,18 +1113,22 @@ void pcie_sync_bridge_lnk(PCIDevice *bridge_dev) if ((lnksta & PCI_EXP_LNKSTA_NLW) > (lnkcap & PCI_EXP_LNKCAP_MLW)) { lnksta &= ~PCI_EXP_LNKSTA_NLW; lnksta |= lnkcap & PCI_EXP_LNKCAP_MLW; - } else if (!(lnksta & PCI_EXP_LNKSTA_NLW)) { - lnksta |= QEMU_PCI_EXP_LNKSTA_NLW(QEMU_PCI_EXP_LNK_X1); } if ((lnksta & PCI_EXP_LNKSTA_CLS) > (lnkcap & PCI_EXP_LNKCAP_SLS)) { lnksta &= ~PCI_EXP_LNKSTA_CLS; lnksta |= lnkcap & PCI_EXP_LNKCAP_SLS; - } else if (!(lnksta & PCI_EXP_LNKSTA_CLS)) { - lnksta |= QEMU_PCI_EXP_LNKSTA_CLS(QEMU_PCI_EXP_LNK_2_5GT); } } + if (!(lnksta & PCI_EXP_LNKSTA_NLW)) { + lnksta |= QEMU_PCI_EXP_LNKSTA_NLW(QEMU_PCI_EXP_LNK_X1); + } + + if (!(lnksta & PCI_EXP_LNKSTA_CLS)) { + lnksta |= QEMU_PCI_EXP_LNKSTA_CLS(QEMU_PCI_EXP_LNK_2_5GT); + } + pci_word_test_and_clear_mask(exp_cap + PCI_EXP_LNKSTA, PCI_EXP_LNKSTA_CLS | PCI_EXP_LNKSTA_NLW); pci_word_test_and_set_mask(exp_cap + PCI_EXP_LNKSTA, lnksta & @@ -1177,3 +1214,81 @@ void pcie_acs_reset(PCIDevice *dev) pci_set_word(dev->config + dev->exp.acs_cap + PCI_ACS_CTRL, 0); } } + +/* PASID */ +void pcie_pasid_init(PCIDevice *dev, uint16_t offset, uint8_t pasid_width, + bool exec_perm, bool priv_mod) +{ + static const uint16_t control_reg_rw_mask = 0x07; + uint16_t capability_reg; + + assert(pasid_width <= PCI_EXT_CAP_PASID_MAX_WIDTH); + + pcie_add_capability(dev, PCI_EXT_CAP_ID_PASID, PCI_PASID_VER, offset, + PCI_EXT_CAP_PASID_SIZEOF); + + capability_reg = ((uint16_t)pasid_width) << PCI_PASID_CAP_WIDTH_SHIFT; + capability_reg |= exec_perm ? PCI_PASID_CAP_EXEC : 0; + capability_reg |= priv_mod ? PCI_PASID_CAP_PRIV : 0; + pci_set_word(dev->config + offset + PCI_PASID_CAP, capability_reg); + + /* Everything is disabled by default */ + pci_set_word(dev->config + offset + PCI_PASID_CTRL, 0); + + pci_set_word(dev->wmask + offset + PCI_PASID_CTRL, control_reg_rw_mask); + + dev->exp.pasid_cap = offset; +} + +/* PRI */ +void pcie_pri_init(PCIDevice *dev, uint16_t offset, uint32_t outstanding_pr_cap, + bool prg_response_pasid_req) +{ + static const uint16_t control_reg_rw_mask = 0x3; + static const uint16_t status_reg_rw1_mask = 0x3; + static const uint32_t pr_alloc_reg_rw_mask = 0xffffffff; + uint16_t status_reg; + + status_reg = prg_response_pasid_req ? PCI_PRI_STATUS_PASID : 0; + status_reg |= PCI_PRI_STATUS_STOPPED; /* Stopped by default */ + + pcie_add_capability(dev, PCI_EXT_CAP_ID_PRI, PCI_PRI_VER, offset, + PCI_EXT_CAP_PRI_SIZEOF); + /* Disabled by default */ + + pci_set_word(dev->config + offset + PCI_PRI_STATUS, status_reg); + pci_set_long(dev->config + offset + PCI_PRI_MAX_REQ, outstanding_pr_cap); + + pci_set_word(dev->wmask + offset + PCI_PRI_CTRL, control_reg_rw_mask); + pci_set_word(dev->w1cmask + offset + PCI_PRI_STATUS, status_reg_rw1_mask); + pci_set_long(dev->wmask + offset + PCI_PRI_ALLOC_REQ, pr_alloc_reg_rw_mask); + + dev->exp.pri_cap = offset; +} + +bool pcie_pri_enabled(const PCIDevice *dev) +{ + if (!pci_is_express(dev) || !dev->exp.pri_cap) { + return false; + } + return (pci_get_word(dev->config + dev->exp.pri_cap + PCI_PRI_CTRL) & + PCI_PRI_CTRL_ENABLE) != 0; +} + +bool pcie_pasid_enabled(const PCIDevice *dev) +{ + if (!pci_is_express(dev) || !dev->exp.pasid_cap) { + return false; + } + return (pci_get_word(dev->config + dev->exp.pasid_cap + PCI_PASID_CTRL) & + PCI_PASID_CTRL_ENABLE) != 0; +} + +bool pcie_ats_enabled(const PCIDevice *dev) +{ + if (!pci_is_express(dev) || !dev->exp.ats_cap) { + return false; + } + return (pci_get_word(dev->config + dev->exp.ats_cap + PCI_ATS_CTRL) & + PCI_ATS_CTRL_ENABLE) != 0; +} diff --git a/hw/pci/pcie_port.c b/hw/pci/pcie_port.c index 20ff2b3..f3841a2 100644 --- a/hw/pci/pcie_port.c +++ b/hw/pci/pcie_port.c @@ -92,16 +92,6 @@ static PCIESlot *pcie_chassis_find_slot_with_chassis(struct PCIEChassis *c, return s; } -PCIESlot *pcie_chassis_find_slot(uint8_t chassis_number, uint16_t slot) -{ - struct PCIEChassis *c; - c = pcie_chassis_find(chassis_number); - if (!c) { - return NULL; - } - return pcie_chassis_find_slot_with_chassis(c, slot); -} - int pcie_chassis_add_slot(struct PCIESlot *slot) { struct PCIEChassis *c; @@ -121,15 +111,14 @@ void pcie_chassis_del_slot(PCIESlot *s) QLIST_REMOVE(s, next); } -static Property pcie_port_props[] = { +static const Property pcie_port_props[] = { DEFINE_PROP_UINT8("port", PCIEPort, port, 0), DEFINE_PROP_UINT16("aer_log_max", PCIEPort, parent_obj.parent_obj.exp.aer_log.log_max, PCIE_AER_LOG_MAX_DEFAULT), - DEFINE_PROP_END_OF_LIST() }; -static void pcie_port_class_init(ObjectClass *oc, void *data) +static void pcie_port_class_init(ObjectClass *oc, const void *data) { DeviceClass *dc = DEVICE_CLASS(oc); @@ -199,7 +188,7 @@ int pcie_count_ds_ports(PCIBus *bus) return dsp_count; } -static bool pcie_slot_is_hotpluggbale_bus(HotplugHandler *plug_handler, +static bool pcie_slot_is_hotpluggable_bus(HotplugHandler *plug_handler, BusState *bus) { PCIESlot *s = PCIE_SLOT(bus->parent); @@ -214,16 +203,15 @@ static const TypeInfo pcie_port_type_info = { .class_init = pcie_port_class_init, }; -static Property pcie_slot_props[] = { +static const Property pcie_slot_props[] = { DEFINE_PROP_UINT8("chassis", PCIESlot, chassis, 0), DEFINE_PROP_UINT16("slot", PCIESlot, slot, 0), DEFINE_PROP_BOOL("hotplug", PCIESlot, hotplug, true), DEFINE_PROP_BOOL("x-do-not-expose-native-hotplug-cap", PCIESlot, hide_native_hotplug_cap, false), - DEFINE_PROP_END_OF_LIST() }; -static void pcie_slot_class_init(ObjectClass *oc, void *data) +static void pcie_slot_class_init(ObjectClass *oc, const void *data) { DeviceClass *dc = DEVICE_CLASS(oc); HotplugHandlerClass *hc = HOTPLUG_HANDLER_CLASS(oc); @@ -233,7 +221,7 @@ static void pcie_slot_class_init(ObjectClass *oc, void *data) hc->plug = pcie_cap_slot_plug_cb; hc->unplug = pcie_cap_slot_unplug_cb; hc->unplug_request = pcie_cap_slot_unplug_request_cb; - hc->is_hotpluggable_bus = pcie_slot_is_hotpluggbale_bus; + hc->is_hotpluggable_bus = pcie_slot_is_hotpluggable_bus; } static const TypeInfo pcie_slot_type_info = { @@ -242,7 +230,7 @@ static const TypeInfo pcie_slot_type_info = { .instance_size = sizeof(PCIESlot), .abstract = true, .class_init = pcie_slot_class_init, - .interfaces = (InterfaceInfo[]) { + .interfaces = (const InterfaceInfo[]) { { TYPE_HOTPLUG_HANDLER }, { } } diff --git a/hw/pci/pcie_sriov.c b/hw/pci/pcie_sriov.c index e9b2322..3ad1874 100644 --- a/hw/pci/pcie_sriov.c +++ b/hw/pci/pcie_sriov.c @@ -15,28 +15,83 @@ #include "hw/pci/pcie.h" #include "hw/pci/pci_bus.h" #include "hw/qdev-properties.h" -#include "qemu/error-report.h" #include "qemu/range.h" #include "qapi/error.h" #include "trace.h" -static PCIDevice *register_vf(PCIDevice *pf, int devfn, - const char *name, uint16_t vf_num); -static void unregister_vfs(PCIDevice *dev); +static GHashTable *pfs; -void pcie_sriov_pf_init(PCIDevice *dev, uint16_t offset, - const char *vfname, uint16_t vf_dev_id, - uint16_t init_vfs, uint16_t total_vfs, - uint16_t vf_offset, uint16_t vf_stride) +static void unparent_vfs(PCIDevice *dev, uint16_t total_vfs) +{ + for (uint16_t i = 0; i < total_vfs; i++) { + PCIDevice *vf = dev->exp.sriov_pf.vf[i]; + object_unparent(OBJECT(vf)); + object_unref(OBJECT(vf)); + } + g_free(dev->exp.sriov_pf.vf); + dev->exp.sriov_pf.vf = NULL; +} + +static void register_vfs(PCIDevice *dev) +{ + uint16_t num_vfs; + uint16_t i; + uint16_t sriov_cap = dev->exp.sriov_cap; + + assert(sriov_cap > 0); + num_vfs = pci_get_word(dev->config + sriov_cap + PCI_SRIOV_NUM_VF); + + trace_sriov_register_vfs(dev->name, PCI_SLOT(dev->devfn), + PCI_FUNC(dev->devfn), num_vfs); + for (i = 0; i < num_vfs; i++) { + pci_set_enabled(dev->exp.sriov_pf.vf[i], true); + } + + pci_set_word(dev->wmask + sriov_cap + PCI_SRIOV_NUM_VF, 0); +} + +static void unregister_vfs(PCIDevice *dev) +{ + uint8_t *cfg = dev->config + dev->exp.sriov_cap; + uint16_t i; + + trace_sriov_unregister_vfs(dev->name, PCI_SLOT(dev->devfn), + PCI_FUNC(dev->devfn)); + for (i = 0; i < pci_get_word(cfg + PCI_SRIOV_TOTAL_VF); i++) { + pci_set_enabled(dev->exp.sriov_pf.vf[i], false); + } + + pci_set_word(dev->wmask + dev->exp.sriov_cap + PCI_SRIOV_NUM_VF, 0xffff); +} + +static bool pcie_sriov_pf_init_common(PCIDevice *dev, uint16_t offset, + uint16_t vf_dev_id, uint16_t init_vfs, + uint16_t total_vfs, uint16_t vf_offset, + uint16_t vf_stride, Error **errp) { + int32_t devfn = dev->devfn + vf_offset; uint8_t *cfg = dev->config + offset; uint8_t *wmask; + if (!pci_is_express(dev)) { + error_setg(errp, "PCI Express is required for SR-IOV PF"); + return false; + } + + if (pci_is_vf(dev)) { + error_setg(errp, "a device cannot be a SR-IOV PF and a VF at the same time"); + return false; + } + + if (total_vfs && + (uint32_t)devfn + (uint32_t)(total_vfs - 1) * vf_stride >= PCI_DEVFN_MAX) { + error_setg(errp, "VF addr overflows"); + return false; + } + pcie_add_capability(dev, PCI_EXT_CAP_ID_SRIOV, 1, offset, PCI_EXT_CAP_SRIOV_SIZEOF); dev->exp.sriov_cap = offset; - dev->exp.sriov_pf.num_vfs = 0; - dev->exp.sriov_pf.vfname = g_strdup(vfname); dev->exp.sriov_pf.vf = NULL; pci_set_word(cfg + PCI_SRIOV_VF_OFFSET, vf_offset); @@ -69,13 +124,74 @@ void pcie_sriov_pf_init(PCIDevice *dev, uint16_t offset, pci_set_word(wmask + PCI_SRIOV_SYS_PGSIZE, 0x553); qdev_prop_set_bit(&dev->qdev, "multifunction", true); + + return true; +} + +bool pcie_sriov_pf_init(PCIDevice *dev, uint16_t offset, + const char *vfname, uint16_t vf_dev_id, + uint16_t init_vfs, uint16_t total_vfs, + uint16_t vf_offset, uint16_t vf_stride, + Error **errp) +{ + BusState *bus = qdev_get_parent_bus(&dev->qdev); + int32_t devfn = dev->devfn + vf_offset; + + if (pfs && g_hash_table_contains(pfs, dev->qdev.id)) { + error_setg(errp, "attaching user-created SR-IOV VF unsupported"); + return false; + } + + if (!pcie_sriov_pf_init_common(dev, offset, vf_dev_id, init_vfs, + total_vfs, vf_offset, vf_stride, errp)) { + return false; + } + + dev->exp.sriov_pf.vf = g_new(PCIDevice *, total_vfs); + + for (uint16_t i = 0; i < total_vfs; i++) { + PCIDevice *vf = pci_new(devfn, vfname); + vf->exp.sriov_vf.pf = dev; + vf->exp.sriov_vf.vf_number = i; + + if (!qdev_realize(&vf->qdev, bus, errp)) { + object_unparent(OBJECT(vf)); + object_unref(vf); + unparent_vfs(dev, i); + return false; + } + + /* set vid/did according to sr/iov spec - they are not used */ + pci_config_set_vendor_id(vf->config, 0xffff); + pci_config_set_device_id(vf->config, 0xffff); + + dev->exp.sriov_pf.vf[i] = vf; + devfn += vf_stride; + } + + return true; } void pcie_sriov_pf_exit(PCIDevice *dev) { - unregister_vfs(dev); - g_free((char *)dev->exp.sriov_pf.vfname); - dev->exp.sriov_pf.vfname = NULL; + uint8_t *cfg = dev->config + dev->exp.sriov_cap; + + if (dev->exp.sriov_pf.vf_user_created) { + uint16_t ven_id = pci_get_word(dev->config + PCI_VENDOR_ID); + uint16_t total_vfs = pci_get_word(dev->config + PCI_SRIOV_TOTAL_VF); + uint16_t vf_dev_id = pci_get_word(dev->config + PCI_SRIOV_VF_DID); + + unregister_vfs(dev); + + for (uint16_t i = 0; i < total_vfs; i++) { + dev->exp.sriov_pf.vf[i]->exp.sriov_vf.pf = NULL; + + pci_config_set_vendor_id(dev->exp.sriov_pf.vf[i]->config, ven_id); + pci_config_set_device_id(dev->exp.sriov_pf.vf[i]->config, vf_dev_id); + } + } else { + unparent_vfs(dev, pci_get_word(cfg + PCI_SRIOV_TOTAL_VF)); + } } void pcie_sriov_pf_init_vf_bar(PCIDevice *dev, int region_num, @@ -108,113 +224,179 @@ void pcie_sriov_pf_init_vf_bar(PCIDevice *dev, int region_num, void pcie_sriov_vf_register_bar(PCIDevice *dev, int region_num, MemoryRegion *memory) { - PCIIORegion *r; - PCIBus *bus = pci_get_bus(dev); uint8_t type; - pcibus_t size = memory_region_size(memory); - assert(pci_is_vf(dev)); /* PFs must use pci_register_bar */ - assert(region_num >= 0); - assert(region_num < PCI_NUM_REGIONS); + assert(dev->exp.sriov_vf.pf); type = dev->exp.sriov_vf.pf->exp.sriov_pf.vf_bar_type[region_num]; - if (!is_power_of_2(size)) { - error_report("%s: PCI region size must be a power" - " of two - type=0x%x, size=0x%"FMT_PCIBUS, - __func__, type, size); - exit(1); - } + return pci_register_bar(dev, region_num, type, memory); +} - r = &dev->io_regions[region_num]; - r->memory = memory; - r->address_space = - type & PCI_BASE_ADDRESS_SPACE_IO - ? bus->address_space_io - : bus->address_space_mem; - r->size = size; - r->type = type; - - r->addr = pci_bar_address(dev, region_num, r->type, r->size); - if (r->addr != PCI_BAR_UNMAPPED) { - memory_region_add_subregion_overlap(r->address_space, - r->addr, r->memory, 1); - } +static gint compare_vf_devfns(gconstpointer a, gconstpointer b) +{ + return (*(PCIDevice **)a)->devfn - (*(PCIDevice **)b)->devfn; } -static PCIDevice *register_vf(PCIDevice *pf, int devfn, const char *name, - uint16_t vf_num) +int16_t pcie_sriov_pf_init_from_user_created_vfs(PCIDevice *dev, + uint16_t offset, + Error **errp) { - PCIDevice *dev = pci_new(devfn, name); - dev->exp.sriov_vf.pf = pf; - dev->exp.sriov_vf.vf_number = vf_num; - PCIBus *bus = pci_get_bus(pf); - Error *local_err = NULL; - - qdev_realize(&dev->qdev, &bus->qbus, &local_err); - if (local_err) { - error_report_err(local_err); - return NULL; + GPtrArray *pf; + PCIDevice **vfs; + BusState *bus = qdev_get_parent_bus(DEVICE(dev)); + uint16_t ven_id = pci_get_word(dev->config + PCI_VENDOR_ID); + uint16_t size = PCI_EXT_CAP_SRIOV_SIZEOF; + uint16_t vf_dev_id; + uint16_t vf_offset; + uint16_t vf_stride; + uint16_t i; + + if (!pfs || !dev->qdev.id) { + return 0; + } + + pf = g_hash_table_lookup(pfs, dev->qdev.id); + if (!pf) { + return 0; + } + + if (pf->len > UINT16_MAX) { + error_setg(errp, "too many VFs"); + return -1; } - /* set vid/did according to sr/iov spec - they are not used */ - pci_config_set_vendor_id(dev->config, 0xffff); - pci_config_set_device_id(dev->config, 0xffff); + g_ptr_array_sort(pf, compare_vf_devfns); + vfs = (void *)pf->pdata; + + if (vfs[0]->devfn <= dev->devfn) { + error_setg(errp, "a VF function number is less than the PF function number"); + return -1; + } + + vf_dev_id = pci_get_word(vfs[0]->config + PCI_DEVICE_ID); + vf_offset = vfs[0]->devfn - dev->devfn; + vf_stride = pf->len < 2 ? 0 : vfs[1]->devfn - vfs[0]->devfn; + + for (i = 0; i < pf->len; i++) { + if (bus != qdev_get_parent_bus(&vfs[i]->qdev)) { + error_setg(errp, "SR-IOV VF parent bus mismatches with PF"); + return -1; + } + + if (ven_id != pci_get_word(vfs[i]->config + PCI_VENDOR_ID)) { + error_setg(errp, "SR-IOV VF vendor ID mismatches with PF"); + return -1; + } + + if (vf_dev_id != pci_get_word(vfs[i]->config + PCI_DEVICE_ID)) { + error_setg(errp, "inconsistent SR-IOV VF device IDs"); + return -1; + } - return dev; + for (size_t j = 0; j < PCI_NUM_REGIONS; j++) { + if (vfs[i]->io_regions[j].size != vfs[0]->io_regions[j].size || + vfs[i]->io_regions[j].type != vfs[0]->io_regions[j].type) { + error_setg(errp, "inconsistent SR-IOV BARs"); + return -1; + } + } + + if (vfs[i]->devfn - vfs[0]->devfn != vf_stride * i) { + error_setg(errp, "inconsistent SR-IOV stride"); + return -1; + } + } + + if (!pcie_sriov_pf_init_common(dev, offset, vf_dev_id, pf->len, + pf->len, vf_offset, vf_stride, errp)) { + return -1; + } + + if (!pcie_find_capability(dev, PCI_EXT_CAP_ID_ARI)) { + pcie_ari_init(dev, offset + size); + size += PCI_ARI_SIZEOF; + } + + for (i = 0; i < pf->len; i++) { + vfs[i]->exp.sriov_vf.pf = dev; + vfs[i]->exp.sriov_vf.vf_number = i; + + /* set vid/did according to sr/iov spec - they are not used */ + pci_config_set_vendor_id(vfs[i]->config, 0xffff); + pci_config_set_device_id(vfs[i]->config, 0xffff); + } + + dev->exp.sriov_pf.vf = vfs; + dev->exp.sriov_pf.vf_user_created = true; + + for (i = 0; i < PCI_NUM_REGIONS; i++) { + PCIIORegion *region = &vfs[0]->io_regions[i]; + + if (region->size) { + pcie_sriov_pf_init_vf_bar(dev, i, region->type, region->size); + } + } + + return size; } -static void register_vfs(PCIDevice *dev) +bool pcie_sriov_register_device(PCIDevice *dev, Error **errp) { - uint16_t num_vfs; - uint16_t i; - uint16_t sriov_cap = dev->exp.sriov_cap; - uint16_t vf_offset = - pci_get_word(dev->config + sriov_cap + PCI_SRIOV_VF_OFFSET); - uint16_t vf_stride = - pci_get_word(dev->config + sriov_cap + PCI_SRIOV_VF_STRIDE); - int32_t devfn = dev->devfn + vf_offset; - - assert(sriov_cap > 0); - num_vfs = pci_get_word(dev->config + sriov_cap + PCI_SRIOV_NUM_VF); - if (num_vfs > pci_get_word(dev->config + sriov_cap + PCI_SRIOV_TOTAL_VF)) { - return; + if (!dev->exp.sriov_pf.vf && dev->qdev.id && + pfs && g_hash_table_contains(pfs, dev->qdev.id)) { + error_setg(errp, "attaching user-created SR-IOV VF unsupported"); + return false; } - dev->exp.sriov_pf.vf = g_new(PCIDevice *, num_vfs); + if (dev->sriov_pf) { + PCIDevice *pci_pf; + GPtrArray *pf; - trace_sriov_register_vfs(dev->name, PCI_SLOT(dev->devfn), - PCI_FUNC(dev->devfn), num_vfs); - for (i = 0; i < num_vfs; i++) { - dev->exp.sriov_pf.vf[i] = register_vf(dev, devfn, - dev->exp.sriov_pf.vfname, i); - if (!dev->exp.sriov_pf.vf[i]) { - num_vfs = i; - break; + if (!PCI_DEVICE_GET_CLASS(dev)->sriov_vf_user_creatable) { + error_setg(errp, "user cannot create SR-IOV VF with this device type"); + return false; } - devfn += vf_stride; + + if (!pci_is_express(dev)) { + error_setg(errp, "PCI Express is required for SR-IOV VF"); + return false; + } + + if (!pci_qdev_find_device(dev->sriov_pf, &pci_pf)) { + error_setg(errp, "PCI device specified as SR-IOV PF already exists"); + return false; + } + + if (!pfs) { + pfs = g_hash_table_new_full(g_str_hash, g_str_equal, g_free, NULL); + } + + pf = g_hash_table_lookup(pfs, dev->sriov_pf); + if (!pf) { + pf = g_ptr_array_new(); + g_hash_table_insert(pfs, g_strdup(dev->sriov_pf), pf); + } + + g_ptr_array_add(pf, dev); } - dev->exp.sriov_pf.num_vfs = num_vfs; + + return true; } -static void unregister_vfs(PCIDevice *dev) +void pcie_sriov_unregister_device(PCIDevice *dev) { - uint16_t num_vfs = dev->exp.sriov_pf.num_vfs; - uint16_t i; + if (dev->sriov_pf && pfs) { + GPtrArray *pf = g_hash_table_lookup(pfs, dev->sriov_pf); - trace_sriov_unregister_vfs(dev->name, PCI_SLOT(dev->devfn), - PCI_FUNC(dev->devfn), num_vfs); - for (i = 0; i < num_vfs; i++) { - Error *err = NULL; - PCIDevice *vf = dev->exp.sriov_pf.vf[i]; - if (!object_property_set_bool(OBJECT(vf), "realized", false, &err)) { - error_reportf_err(err, "Failed to unplug: "); + if (pf) { + g_ptr_array_remove_fast(pf, dev); + + if (!pf->len) { + g_hash_table_remove(pfs, dev->sriov_pf); + g_ptr_array_free(pf, FALSE); + } } - object_unparent(OBJECT(vf)); - object_unref(OBJECT(vf)); } - g_free(dev->exp.sriov_pf.vf); - dev->exp.sriov_pf.vf = NULL; - dev->exp.sriov_pf.num_vfs = 0; } void pcie_sriov_config_write(PCIDevice *dev, uint32_t address, @@ -235,15 +417,29 @@ void pcie_sriov_config_write(PCIDevice *dev, uint32_t address, PCI_FUNC(dev->devfn), off, val, len); if (range_covers_byte(off, len, PCI_SRIOV_CTRL)) { - if (dev->exp.sriov_pf.num_vfs) { - if (!(val & PCI_SRIOV_CTRL_VFE)) { - unregister_vfs(dev); - } + if (val & PCI_SRIOV_CTRL_VFE) { + register_vfs(dev); } else { - if (val & PCI_SRIOV_CTRL_VFE) { - register_vfs(dev); - } + unregister_vfs(dev); } + } else if (range_covers_byte(off, len, PCI_SRIOV_NUM_VF)) { + uint8_t *cfg = dev->config + sriov_cap; + uint8_t *wmask = dev->wmask + sriov_cap; + uint16_t num_vfs = pci_get_word(cfg + PCI_SRIOV_NUM_VF); + uint16_t wmask_val = PCI_SRIOV_CTRL_MSE | PCI_SRIOV_CTRL_ARI; + + if (num_vfs <= pci_get_word(cfg + PCI_SRIOV_TOTAL_VF)) { + wmask_val |= PCI_SRIOV_CTRL_VFE; + } + + pci_set_word(wmask + PCI_SRIOV_CTRL, wmask_val); + } +} + +void pcie_sriov_pf_post_load(PCIDevice *dev) +{ + if (dev->exp.sriov_cap) { + register_vfs(dev); } } @@ -260,6 +456,8 @@ void pcie_sriov_pf_reset(PCIDevice *dev) unregister_vfs(dev); pci_set_word(dev->config + sriov_cap + PCI_SRIOV_NUM_VF, 0); + pci_set_word(dev->wmask + sriov_cap + PCI_SRIOV_CTRL, + PCI_SRIOV_CTRL_VFE | PCI_SRIOV_CTRL_MSE | PCI_SRIOV_CTRL_ARI); /* * Default is to use 4K pages, software can modify it @@ -294,7 +492,7 @@ void pcie_sriov_pf_add_sup_pgsize(PCIDevice *dev, uint16_t opt_sup_pgsize) uint16_t pcie_sriov_vf_number(PCIDevice *dev) { - assert(pci_is_vf(dev)); + assert(dev->exp.sriov_vf.pf); return dev->exp.sriov_vf.vf_number; } @@ -306,7 +504,7 @@ PCIDevice *pcie_sriov_get_pf(PCIDevice *dev) PCIDevice *pcie_sriov_get_vf_at_index(PCIDevice *dev, int n) { assert(!pci_is_vf(dev)); - if (n < dev->exp.sriov_pf.num_vfs) { + if (n < pcie_sriov_num_vfs(dev)) { return dev->exp.sriov_pf.vf[n]; } return NULL; @@ -314,5 +512,10 @@ PCIDevice *pcie_sriov_get_vf_at_index(PCIDevice *dev, int n) uint16_t pcie_sriov_num_vfs(PCIDevice *dev) { - return dev->exp.sriov_pf.num_vfs; + uint16_t sriov_cap = dev->exp.sriov_cap; + uint8_t *cfg = dev->config + sriov_cap; + + return sriov_cap && + (pci_get_word(cfg + PCI_SRIOV_CTRL) & PCI_SRIOV_CTRL_VFE) ? + pci_get_word(cfg + PCI_SRIOV_NUM_VF) : 0; } diff --git a/hw/pci/trace-events b/hw/pci/trace-events index 19643aa..02c80d3 100644 --- a/hw/pci/trace-events +++ b/hw/pci/trace-events @@ -1,9 +1,15 @@ # See docs/devel/tracing.rst for syntax documentation. # pci.c +pci_pm_bad_transition(const char *dev, uint32_t bus, uint32_t slot, uint32_t func, uint8_t old, uint8_t new) "%s %02x:%02x.%x REJECTED PM transition D%d->D%d" +pci_pm_transition(const char *dev, uint32_t bus, uint32_t slot, uint32_t func, uint8_t old, uint8_t new) "%s %02x:%02x.%x PM transition D%d->D%d" pci_update_mappings_del(const char *dev, uint32_t bus, uint32_t slot, uint32_t func, int bar, uint64_t addr, uint64_t size) "%s %02x:%02x.%x %d,0x%"PRIx64"+0x%"PRIx64 pci_update_mappings_add(const char *dev, uint32_t bus, uint32_t slot, uint32_t func, int bar, uint64_t addr, uint64_t size) "%s %02x:%02x.%x %d,0x%"PRIx64"+0x%"PRIx64 pci_route_irq(int dev_irq, const char *dev_path, int parent_irq, const char *parent_path) "IRQ %d @%s -> IRQ %d @%s" +pci_bad_rom_magic(uint16_t bad_rom_magic, uint16_t good_rom_magic) "Bad ROM magic number: %04"PRIX16". Should be: %04"PRIX16 +pci_bad_pcir_offset(uint16_t pcir_offset) "Bad PCIR offset 0x%"PRIx16" or signature" +pci_rom_and_pci_ids(char *romfile, uint16_t vendor_id, uint16_t device_id, uint16_t rom_vendor_id, uint16_t rom_device_id) "%s: ROM ID %04"PRIx16":%04"PRIx16" | PCI ID %04"PRIx16":%04"PRIx16 +pci_rom_checksum_change(uint8_t old_checksum, uint8_t new_checksum) "ROM checksum changed from %02"PRIx8" to %02"PRIx8 # pci_host.c pci_cfg_read(const char *dev, uint32_t bus, uint32_t slot, uint32_t func, unsigned offs, unsigned val) "%s %02x:%02x.%x @0x%x -> 0x%x" @@ -14,7 +20,7 @@ msix_write_config(char *name, bool enabled, bool masked) "dev %s enabled %d mask # hw/pci/pcie_sriov.c sriov_register_vfs(const char *name, int slot, int function, int num_vfs) "%s %02x:%x: creating %d vf devs" -sriov_unregister_vfs(const char *name, int slot, int function, int num_vfs) "%s %02x:%x: Unregistering %d vf devs" +sriov_unregister_vfs(const char *name, int slot, int function) "%s %02x:%x: Unregistering vf devs" sriov_config_write(const char *name, int slot, int fun, uint32_t offset, uint32_t val, uint32_t len) "%s %02x:%x: sriov offset 0x%x val 0x%x len %d" # pcie.c |