aboutsummaryrefslogtreecommitdiff
path: root/hw/pci/pci.c
diff options
context:
space:
mode:
Diffstat (limited to 'hw/pci/pci.c')
-rw-r--r--hw/pci/pci.c442
1 files changed, 381 insertions, 61 deletions
diff --git a/hw/pci/pci.c b/hw/pci/pci.c
index b6c630c..c70b5ce 100644
--- a/hw/pci/pci.c
+++ b/hw/pci/pci.c
@@ -32,6 +32,7 @@
#include "hw/pci/pci_host.h"
#include "hw/qdev-properties.h"
#include "hw/qdev-properties-system.h"
+#include "migration/cpr.h"
#include "migration/qemu-file-types.h"
#include "migration/vmstate.h"
#include "net/net.h"
@@ -54,13 +55,6 @@
#include "hw/xen/xen.h"
#include "hw/i386/kvm/xen_evtchn.h"
-//#define DEBUG_PCI
-#ifdef DEBUG_PCI
-# define PCI_DPRINTF(format, ...) printf(format, ## __VA_ARGS__)
-#else
-# define PCI_DPRINTF(format, ...) do { } while (0)
-#endif
-
bool pci_available = true;
static char *pcibus_get_dev_path(DeviceState *dev);
@@ -77,7 +71,7 @@ static void prop_pci_busnr_get(Object *obj, Visitor *v, const char *name,
}
static const PropertyInfo prop_pci_busnr = {
- .name = "busnr",
+ .type = "busnr",
.get = prop_pci_busnr_get,
};
@@ -85,7 +79,7 @@ static const Property pci_props[] = {
DEFINE_PROP_PCI_DEVFN("addr", PCIDevice, devfn, -1),
DEFINE_PROP_STRING("romfile", PCIDevice, romfile),
DEFINE_PROP_UINT32("romsize", PCIDevice, romsize, UINT32_MAX),
- DEFINE_PROP_UINT32("rombar", PCIDevice, rom_bar, 1),
+ DEFINE_PROP_INT32("rombar", PCIDevice, rom_bar, -1),
DEFINE_PROP_BIT("multifunction", PCIDevice, cap_present,
QEMU_PCI_CAP_MULTIFUNCTION_BITNR, false),
DEFINE_PROP_BIT("x-pcie-lnksta-dllla", PCIDevice, cap_present,
@@ -101,6 +95,7 @@ static const Property pci_props[] = {
QEMU_PCIE_ARI_NEXTFN_1_BITNR, false),
DEFINE_PROP_SIZE32("x-max-bounce-buffer-size", PCIDevice,
max_bounce_buffer_size, DEFAULT_MAX_BOUNCE_BUFFER_SIZE),
+ DEFINE_PROP_STRING("sriov-pf", PCIDevice, sriov_pf),
DEFINE_PROP_BIT("x-pcie-ext-tag", PCIDevice, cap_present,
QEMU_PCIE_EXT_TAG_BITNR, true),
{ .name = "busnr", .info = &prop_pci_busnr },
@@ -134,6 +129,12 @@ static GSequence *pci_acpi_index_list(void)
return used_acpi_index_list;
}
+static void pci_set_master(PCIDevice *d, bool enable)
+{
+ memory_region_set_enabled(&d->bus_master_enable_region, enable);
+ d->is_master = enable; /* cache the status */
+}
+
static void pci_init_bus_master(PCIDevice *pci_dev)
{
AddressSpace *dma_as = pci_device_iommu_address_space(pci_dev);
@@ -141,7 +142,7 @@ static void pci_init_bus_master(PCIDevice *pci_dev)
memory_region_init_alias(&pci_dev->bus_master_enable_region,
OBJECT(pci_dev), "bus master",
dma_as->root, 0, memory_region_size(dma_as->root));
- memory_region_set_enabled(&pci_dev->bus_master_enable_region, false);
+ pci_set_master(pci_dev, false);
memory_region_add_subregion(&pci_dev->bus_master_container_region, 0,
&pci_dev->bus_master_enable_region);
}
@@ -261,7 +262,7 @@ static GByteArray *pci_bus_fw_cfg_gen_data(Object *obj, Error **errp)
return byte_array;
}
-static void pci_bus_class_init(ObjectClass *klass, void *data)
+static void pci_bus_class_init(ObjectClass *klass, const void *data)
{
BusClass *k = BUS_CLASS(klass);
PCIBusClass *pbc = PCI_BUS_CLASS(klass);
@@ -288,7 +289,7 @@ static const TypeInfo pci_bus_info = {
.instance_size = sizeof(PCIBus),
.class_size = sizeof(PCIBusClass),
.class_init = pci_bus_class_init,
- .interfaces = (InterfaceInfo[]) {
+ .interfaces = (const InterfaceInfo[]) {
{ TYPE_FW_CFG_DATA_GENERATOR_INTERFACE },
{ }
}
@@ -309,7 +310,7 @@ static const TypeInfo conventional_pci_interface_info = {
.parent = TYPE_INTERFACE,
};
-static void pcie_bus_class_init(ObjectClass *klass, void *data)
+static void pcie_bus_class_init(ObjectClass *klass, const void *data)
{
BusClass *k = BUS_CLASS(klass);
@@ -435,6 +436,84 @@ static void pci_msi_trigger(PCIDevice *dev, MSIMessage msg)
attrs, NULL);
}
+/*
+ * Register and track a PM capability. If wmask is also enabled for the power
+ * state field of the pmcsr register, guest writes may change the device PM
+ * state. BAR access is only enabled while the device is in the D0 state.
+ * Return the capability offset or negative error code.
+ */
+int pci_pm_init(PCIDevice *d, uint8_t offset, Error **errp)
+{
+ int cap = pci_add_capability(d, PCI_CAP_ID_PM, offset, PCI_PM_SIZEOF, errp);
+
+ if (cap < 0) {
+ return cap;
+ }
+
+ d->pm_cap = cap;
+ d->cap_present |= QEMU_PCI_CAP_PM;
+
+ return cap;
+}
+
+static uint8_t pci_pm_state(PCIDevice *d)
+{
+ uint16_t pmcsr;
+
+ if (!(d->cap_present & QEMU_PCI_CAP_PM)) {
+ return 0;
+ }
+
+ pmcsr = pci_get_word(d->config + d->pm_cap + PCI_PM_CTRL);
+
+ return pmcsr & PCI_PM_CTRL_STATE_MASK;
+}
+
+/*
+ * Update the PM capability state based on the new value stored in config
+ * space respective to the old, pre-write state provided. If the new value
+ * is rejected (unsupported or invalid transition) restore the old value.
+ * Return the resulting PM state.
+ */
+static uint8_t pci_pm_update(PCIDevice *d, uint32_t addr, int l, uint8_t old)
+{
+ uint16_t pmc;
+ uint8_t new;
+
+ if (!(d->cap_present & QEMU_PCI_CAP_PM) ||
+ !range_covers_byte(addr, l, d->pm_cap + PCI_PM_CTRL)) {
+ return old;
+ }
+
+ new = pci_pm_state(d);
+ if (new == old) {
+ return old;
+ }
+
+ pmc = pci_get_word(d->config + d->pm_cap + PCI_PM_PMC);
+
+ /*
+ * Transitions to D1 & D2 are only allowed if supported. Devices may
+ * only transition to higher D-states or to D0.
+ */
+ if ((!(pmc & PCI_PM_CAP_D1) && new == 1) ||
+ (!(pmc & PCI_PM_CAP_D2) && new == 2) ||
+ (old && new && new < old)) {
+ pci_word_test_and_clear_mask(d->config + d->pm_cap + PCI_PM_CTRL,
+ PCI_PM_CTRL_STATE_MASK);
+ pci_word_test_and_set_mask(d->config + d->pm_cap + PCI_PM_CTRL,
+ old);
+ trace_pci_pm_bad_transition(d->name, pci_dev_bus_num(d),
+ PCI_SLOT(d->devfn), PCI_FUNC(d->devfn),
+ old, new);
+ return old;
+ }
+
+ trace_pci_pm_transition(d->name, pci_dev_bus_num(d), PCI_SLOT(d->devfn),
+ PCI_FUNC(d->devfn), old, new);
+ return new;
+}
+
static void pci_reset_regions(PCIDevice *dev)
{
int r;
@@ -459,6 +538,10 @@ static void pci_reset_regions(PCIDevice *dev)
static void pci_do_device_reset(PCIDevice *dev)
{
+ if ((dev->cap_present & QEMU_PCI_SKIP_RESET_ON_CPR) && cpr_is_incoming()) {
+ return;
+ }
+
pci_device_deassert_intx(dev);
assert(dev->irq_state == 0);
@@ -474,6 +557,11 @@ static void pci_do_device_reset(PCIDevice *dev)
pci_get_word(dev->wmask + PCI_INTERRUPT_LINE) |
pci_get_word(dev->w1cmask + PCI_INTERRUPT_LINE));
dev->config[PCI_CACHE_LINE_SIZE] = 0x0;
+ /* Default PM state is D0 */
+ if (dev->cap_present & QEMU_PCI_CAP_PM) {
+ pci_word_test_and_clear_mask(dev->config + dev->pm_cap + PCI_PM_CTRL,
+ PCI_PM_CTRL_STATE_MASK);
+ }
pci_reset_regions(dev);
pci_update_mappings(dev);
@@ -727,9 +815,8 @@ static int get_pci_config_device(QEMUFile *f, void *pv, size_t size,
pci_bridge_update_mappings(PCI_BRIDGE(s));
}
- memory_region_set_enabled(&s->bus_master_enable_region,
- pci_get_word(s->config + PCI_COMMAND)
- & PCI_COMMAND_MASTER);
+ pci_set_master(s, pci_get_word(s->config + PCI_COMMAND)
+ & PCI_COMMAND_MASTER);
g_free(config);
return 0;
@@ -803,10 +890,17 @@ static bool migrate_is_not_pcie(void *opaque, int version_id)
return !pci_is_express((PCIDevice *)opaque);
}
+static int pci_post_load(void *opaque, int version_id)
+{
+ pcie_sriov_pf_post_load(opaque);
+ return 0;
+}
+
const VMStateDescription vmstate_pci_device = {
.name = "PCIDevice",
.version_id = 2,
.minimum_version_id = 1,
+ .post_load = pci_post_load,
.fields = (const VMStateField[]) {
VMSTATE_INT32_POSITIVE_LE(version_id, PCIDevice),
VMSTATE_BUFFER_UNSAFE_INFO_TEST(config, PCIDevice,
@@ -1022,13 +1116,8 @@ static void pci_init_multifunction(PCIBus *bus, PCIDevice *dev, Error **errp)
dev->config[PCI_HEADER_TYPE] |= PCI_HEADER_TYPE_MULTI_FUNCTION;
}
- /*
- * With SR/IOV and ARI, a device at function 0 need not be a multifunction
- * device, as it may just be a VF that ended up with function 0 in
- * the legacy PCI interpretation. Avoid failing in such cases:
- */
- if (pci_is_vf(dev) &&
- dev->exp.sriov_vf.pf->cap_present & QEMU_PCI_CAP_MULTIFUNCTION) {
+ /* SR/IOV is not handled here. */
+ if (pci_is_vf(dev)) {
return;
}
@@ -1061,7 +1150,8 @@ static void pci_init_multifunction(PCIBus *bus, PCIDevice *dev, Error **errp)
}
/* function 0 indicates single function, so function > 0 must be NULL */
for (func = 1; func < PCI_FUNC_MAX; ++func) {
- if (bus->devices[PCI_DEVFN(slot, func)]) {
+ PCIDevice *device = bus->devices[PCI_DEVFN(slot, func)];
+ if (device && !pci_is_vf(device)) {
error_setg(errp, "PCI: %x.0 indicates single function, "
"but %x.%x is already populated.",
slot, slot, func);
@@ -1349,6 +1439,7 @@ static void pci_qdev_unrealize(DeviceState *dev)
pci_unregister_io_regions(pci_dev);
pci_del_option_rom(pci_dev);
+ pcie_sriov_unregister_device(pci_dev);
if (pc->exit) {
pc->exit(pci_dev);
@@ -1380,7 +1471,6 @@ void pci_register_bar(PCIDevice *pci_dev, int region_num,
pcibus_t size = memory_region_size(memory);
uint8_t hdr_type;
- assert(!pci_is_vf(pci_dev)); /* VFs must use pcie_sriov_vf_register_bar */
assert(region_num >= 0);
assert(region_num < PCI_NUM_REGIONS);
assert(is_power_of_2(size));
@@ -1391,7 +1481,7 @@ void pci_register_bar(PCIDevice *pci_dev, int region_num,
assert(hdr_type != PCI_HEADER_TYPE_BRIDGE || region_num < 2);
r = &pci_dev->io_regions[region_num];
- r->addr = PCI_BAR_UNMAPPED;
+ assert(!r->size);
r->size = size;
r->type = type;
r->memory = memory;
@@ -1399,22 +1489,35 @@ void pci_register_bar(PCIDevice *pci_dev, int region_num,
? pci_get_bus(pci_dev)->address_space_io
: pci_get_bus(pci_dev)->address_space_mem;
- wmask = ~(size - 1);
- if (region_num == PCI_ROM_SLOT) {
- /* ROM enable bit is writable */
- wmask |= PCI_ROM_ADDRESS_ENABLE;
- }
-
- addr = pci_bar(pci_dev, region_num);
- pci_set_long(pci_dev->config + addr, type);
+ if (pci_is_vf(pci_dev)) {
+ PCIDevice *pf = pci_dev->exp.sriov_vf.pf;
+ assert(!pf || type == pf->exp.sriov_pf.vf_bar_type[region_num]);
- if (!(r->type & PCI_BASE_ADDRESS_SPACE_IO) &&
- r->type & PCI_BASE_ADDRESS_MEM_TYPE_64) {
- pci_set_quad(pci_dev->wmask + addr, wmask);
- pci_set_quad(pci_dev->cmask + addr, ~0ULL);
+ r->addr = pci_bar_address(pci_dev, region_num, r->type, r->size);
+ if (r->addr != PCI_BAR_UNMAPPED) {
+ memory_region_add_subregion_overlap(r->address_space,
+ r->addr, r->memory, 1);
+ }
} else {
- pci_set_long(pci_dev->wmask + addr, wmask & 0xffffffff);
- pci_set_long(pci_dev->cmask + addr, 0xffffffff);
+ r->addr = PCI_BAR_UNMAPPED;
+
+ wmask = ~(size - 1);
+ if (region_num == PCI_ROM_SLOT) {
+ /* ROM enable bit is writable */
+ wmask |= PCI_ROM_ADDRESS_ENABLE;
+ }
+
+ addr = pci_bar(pci_dev, region_num);
+ pci_set_long(pci_dev->config + addr, type);
+
+ if (!(r->type & PCI_BASE_ADDRESS_SPACE_IO) &&
+ r->type & PCI_BASE_ADDRESS_MEM_TYPE_64) {
+ pci_set_quad(pci_dev->wmask + addr, wmask);
+ pci_set_quad(pci_dev->cmask + addr, ~0ULL);
+ } else {
+ pci_set_long(pci_dev->wmask + addr, wmask & 0xffffffff);
+ pci_set_long(pci_dev->cmask + addr, 0xffffffff);
+ }
}
}
@@ -1503,7 +1606,11 @@ static pcibus_t pci_config_get_bar_addr(PCIDevice *d, int reg,
pci_get_word(pf->config + sriov_cap + PCI_SRIOV_VF_OFFSET);
uint16_t vf_stride =
pci_get_word(pf->config + sriov_cap + PCI_SRIOV_VF_STRIDE);
- uint32_t vf_num = (d->devfn - (pf->devfn + vf_offset)) / vf_stride;
+ uint32_t vf_num = d->devfn - (pf->devfn + vf_offset);
+
+ if (vf_num) {
+ vf_num /= vf_stride;
+ }
if (type & PCI_BASE_ADDRESS_MEM_TYPE_64) {
new_addr = pci_get_quad(pf->config + bar);
@@ -1598,7 +1705,7 @@ static void pci_update_mappings(PCIDevice *d)
continue;
new_addr = pci_bar_address(d, i, r->type, r->size);
- if (!d->has_power) {
+ if (!d->enabled || pci_pm_state(d)) {
new_addr = PCI_BAR_UNMAPPED;
}
@@ -1628,7 +1735,7 @@ static void pci_update_mappings(PCIDevice *d)
pci_update_vga(d);
}
-static inline int pci_irq_disabled(PCIDevice *d)
+int pci_irq_disabled(PCIDevice *d)
{
return pci_get_word(d->config + PCI_COMMAND) & PCI_COMMAND_INTX_DISABLE;
}
@@ -1664,6 +1771,7 @@ uint32_t pci_default_read_config(PCIDevice *d,
void pci_default_write_config(PCIDevice *d, uint32_t addr, uint32_t val_in, int l)
{
+ uint8_t new_pm_state, old_pm_state = pci_pm_state(d);
int i, was_irq_disabled = pci_irq_disabled(d);
uint32_t val = val_in;
@@ -1676,17 +1784,21 @@ void pci_default_write_config(PCIDevice *d, uint32_t addr, uint32_t val_in, int
d->config[addr + i] = (d->config[addr + i] & ~wmask) | (val & wmask);
d->config[addr + i] &= ~(val & w1cmask); /* W1C: Write 1 to Clear */
}
+
+ new_pm_state = pci_pm_update(d, addr, l, old_pm_state);
+
if (ranges_overlap(addr, l, PCI_BASE_ADDRESS_0, 24) ||
ranges_overlap(addr, l, PCI_ROM_ADDRESS, 4) ||
ranges_overlap(addr, l, PCI_ROM_ADDRESS1, 4) ||
- range_covers_byte(addr, l, PCI_COMMAND))
+ range_covers_byte(addr, l, PCI_COMMAND) ||
+ !!new_pm_state != !!old_pm_state) {
pci_update_mappings(d);
+ }
if (ranges_overlap(addr, l, PCI_COMMAND, 2)) {
pci_update_irq_disabled(d, was_irq_disabled);
- memory_region_set_enabled(&d->bus_master_enable_region,
- (pci_get_word(d->config + PCI_COMMAND)
- & PCI_COMMAND_MASTER) && d->has_power);
+ pci_set_master(d, (pci_get_word(d->config + PCI_COMMAND) &
+ PCI_COMMAND_MASTER) && d->enabled);
}
msi_write_config(d, addr, val_in, l);
@@ -2171,6 +2283,11 @@ static void pci_qdev_realize(DeviceState *qdev, Error **errp)
}
}
+ if (!pcie_sriov_register_device(pci_dev, errp)) {
+ pci_qdev_unrealize(DEVICE(pci_dev));
+ return;
+ }
+
/*
* A PCIe Downstream Port that do not have ARI Forwarding enabled must
* associate only Device 0 with the device attached to the bus
@@ -2342,12 +2459,12 @@ static void pci_patch_ids(PCIDevice *pdev, uint8_t *ptr, uint32_t size)
/* Only a valid rom will be patched. */
rom_magic = pci_get_word(ptr);
if (rom_magic != 0xaa55) {
- PCI_DPRINTF("Bad ROM magic %04x\n", rom_magic);
+ trace_pci_bad_rom_magic(rom_magic, 0xaa55);
return;
}
pcir_offset = pci_get_word(ptr + 0x18);
if (pcir_offset + 8 >= size || memcmp(ptr + pcir_offset, "PCIR", 4)) {
- PCI_DPRINTF("Bad PCIR offset 0x%x or signature\n", pcir_offset);
+ trace_pci_bad_pcir_offset(pcir_offset);
return;
}
@@ -2356,8 +2473,8 @@ static void pci_patch_ids(PCIDevice *pdev, uint8_t *ptr, uint32_t size)
rom_vendor_id = pci_get_word(ptr + pcir_offset + 4);
rom_device_id = pci_get_word(ptr + pcir_offset + 6);
- PCI_DPRINTF("%s: ROM id %04x%04x / PCI id %04x%04x\n", pdev->romfile,
- vendor_id, device_id, rom_vendor_id, rom_device_id);
+ trace_pci_rom_and_pci_ids(pdev->romfile, vendor_id, device_id,
+ rom_vendor_id, rom_device_id);
checksum = ptr[6];
@@ -2365,7 +2482,7 @@ static void pci_patch_ids(PCIDevice *pdev, uint8_t *ptr, uint32_t size)
/* Patch vendor id and checksum (at offset 6 for etherboot roms). */
checksum += (uint8_t)rom_vendor_id + (uint8_t)(rom_vendor_id >> 8);
checksum -= (uint8_t)vendor_id + (uint8_t)(vendor_id >> 8);
- PCI_DPRINTF("ROM checksum %02x / %02x\n", ptr[6], checksum);
+ trace_pci_rom_checksum_change(ptr[6], checksum);
ptr[6] = checksum;
pci_set_word(ptr + pcir_offset + 4, vendor_id);
}
@@ -2374,7 +2491,7 @@ static void pci_patch_ids(PCIDevice *pdev, uint8_t *ptr, uint32_t size)
/* Patch device id and checksum (at offset 6 for etherboot roms). */
checksum += (uint8_t)rom_device_id + (uint8_t)(rom_device_id >> 8);
checksum -= (uint8_t)device_id + (uint8_t)(device_id >> 8);
- PCI_DPRINTF("ROM checksum %02x / %02x\n", ptr[6], checksum);
+ trace_pci_rom_checksum_change(ptr[6], checksum);
ptr[6] = checksum;
pci_set_word(ptr + pcir_offset + 6, device_id);
}
@@ -2425,6 +2542,14 @@ static void pci_add_option_rom(PCIDevice *pdev, bool is_default_rom,
return;
}
+ if (pci_is_vf(pdev)) {
+ if (pdev->rom_bar > 0) {
+ error_setg(errp, "ROM BAR cannot be enabled for SR-IOV VF");
+ }
+
+ return;
+ }
+
if (load_file || pdev->romsize == UINT32_MAX) {
path = qemu_find_file(QEMU_FILE_TYPE_BIOS, pdev->romfile);
if (path == NULL) {
@@ -2698,7 +2823,7 @@ MemoryRegion *pci_address_space_io(PCIDevice *dev)
return pci_get_bus(dev)->address_space_io;
}
-static void pci_device_class_init(ObjectClass *klass, void *data)
+static void pci_device_class_init(ObjectClass *klass, const void *data)
{
DeviceClass *k = DEVICE_CLASS(klass);
@@ -2712,7 +2837,7 @@ static void pci_device_class_init(ObjectClass *klass, void *data)
"access to indirect DMA memory");
}
-static void pci_device_class_base_init(ObjectClass *klass, void *data)
+static void pci_device_class_base_init(ObjectClass *klass, const void *data)
{
if (!object_class_is_abstract(klass)) {
ObjectClass *conventional =
@@ -2819,6 +2944,23 @@ AddressSpace *pci_device_iommu_address_space(PCIDevice *dev)
return &address_space_memory;
}
+int pci_iommu_init_iotlb_notifier(PCIDevice *dev, IOMMUNotifier *n,
+ IOMMUNotify fn, void *opaque)
+{
+ PCIBus *bus;
+ PCIBus *iommu_bus;
+ int devfn;
+
+ pci_device_get_iommu_bus_devfn(dev, &bus, &iommu_bus, &devfn);
+ if (iommu_bus && iommu_bus->iommu_ops->init_iotlb_notifier) {
+ iommu_bus->iommu_ops->init_iotlb_notifier(bus, iommu_bus->iommu_opaque,
+ devfn, n, fn, opaque);
+ return 0;
+ }
+
+ return -ENODEV;
+}
+
bool pci_device_set_iommu_device(PCIDevice *dev, HostIOMMUDevice *hiod,
Error **errp)
{
@@ -2850,6 +2992,170 @@ void pci_device_unset_iommu_device(PCIDevice *dev)
}
}
+int pci_pri_request_page(PCIDevice *dev, uint32_t pasid, bool priv_req,
+ bool exec_req, hwaddr addr, bool lpig,
+ uint16_t prgi, bool is_read, bool is_write)
+{
+ PCIBus *bus;
+ PCIBus *iommu_bus;
+ int devfn;
+
+ if (!dev->is_master ||
+ ((pasid != PCI_NO_PASID) && !pcie_pasid_enabled(dev))) {
+ return -EPERM;
+ }
+
+ if (!pcie_pri_enabled(dev)) {
+ return -EPERM;
+ }
+
+ pci_device_get_iommu_bus_devfn(dev, &bus, &iommu_bus, &devfn);
+ if (iommu_bus && iommu_bus->iommu_ops->pri_request_page) {
+ return iommu_bus->iommu_ops->pri_request_page(bus,
+ iommu_bus->iommu_opaque,
+ devfn, pasid, priv_req,
+ exec_req, addr, lpig, prgi,
+ is_read, is_write);
+ }
+
+ return -ENODEV;
+}
+
+int pci_pri_register_notifier(PCIDevice *dev, uint32_t pasid,
+ IOMMUPRINotifier *notifier)
+{
+ PCIBus *bus;
+ PCIBus *iommu_bus;
+ int devfn;
+
+ if (!dev->is_master ||
+ ((pasid != PCI_NO_PASID) && !pcie_pasid_enabled(dev))) {
+ return -EPERM;
+ }
+
+ pci_device_get_iommu_bus_devfn(dev, &bus, &iommu_bus, &devfn);
+ if (iommu_bus && iommu_bus->iommu_ops->pri_register_notifier) {
+ iommu_bus->iommu_ops->pri_register_notifier(bus,
+ iommu_bus->iommu_opaque,
+ devfn, pasid, notifier);
+ return 0;
+ }
+
+ return -ENODEV;
+}
+
+void pci_pri_unregister_notifier(PCIDevice *dev, uint32_t pasid)
+{
+ PCIBus *bus;
+ PCIBus *iommu_bus;
+ int devfn;
+
+ pci_device_get_iommu_bus_devfn(dev, &bus, &iommu_bus, &devfn);
+ if (iommu_bus && iommu_bus->iommu_ops->pri_unregister_notifier) {
+ iommu_bus->iommu_ops->pri_unregister_notifier(bus,
+ iommu_bus->iommu_opaque,
+ devfn, pasid);
+ }
+}
+
+ssize_t pci_ats_request_translation(PCIDevice *dev, uint32_t pasid,
+ bool priv_req, bool exec_req,
+ hwaddr addr, size_t length,
+ bool no_write, IOMMUTLBEntry *result,
+ size_t result_length,
+ uint32_t *err_count)
+{
+ PCIBus *bus;
+ PCIBus *iommu_bus;
+ int devfn;
+
+ if (!dev->is_master ||
+ ((pasid != PCI_NO_PASID) && !pcie_pasid_enabled(dev))) {
+ return -EPERM;
+ }
+
+ if (result_length == 0) {
+ return -ENOSPC;
+ }
+
+ if (!pcie_ats_enabled(dev)) {
+ return -EPERM;
+ }
+
+ pci_device_get_iommu_bus_devfn(dev, &bus, &iommu_bus, &devfn);
+ if (iommu_bus && iommu_bus->iommu_ops->ats_request_translation) {
+ return iommu_bus->iommu_ops->ats_request_translation(bus,
+ iommu_bus->iommu_opaque,
+ devfn, pasid, priv_req,
+ exec_req, addr, length,
+ no_write, result,
+ result_length, err_count);
+ }
+
+ return -ENODEV;
+}
+
+int pci_iommu_register_iotlb_notifier(PCIDevice *dev, uint32_t pasid,
+ IOMMUNotifier *n)
+{
+ PCIBus *bus;
+ PCIBus *iommu_bus;
+ int devfn;
+
+ if ((pasid != PCI_NO_PASID) && !pcie_pasid_enabled(dev)) {
+ return -EPERM;
+ }
+
+ pci_device_get_iommu_bus_devfn(dev, &bus, &iommu_bus, &devfn);
+ if (iommu_bus && iommu_bus->iommu_ops->register_iotlb_notifier) {
+ iommu_bus->iommu_ops->register_iotlb_notifier(bus,
+ iommu_bus->iommu_opaque, devfn,
+ pasid, n);
+ return 0;
+ }
+
+ return -ENODEV;
+}
+
+int pci_iommu_unregister_iotlb_notifier(PCIDevice *dev, uint32_t pasid,
+ IOMMUNotifier *n)
+{
+ PCIBus *bus;
+ PCIBus *iommu_bus;
+ int devfn;
+
+ if ((pasid != PCI_NO_PASID) && !pcie_pasid_enabled(dev)) {
+ return -EPERM;
+ }
+
+ pci_device_get_iommu_bus_devfn(dev, &bus, &iommu_bus, &devfn);
+ if (iommu_bus && iommu_bus->iommu_ops->unregister_iotlb_notifier) {
+ iommu_bus->iommu_ops->unregister_iotlb_notifier(bus,
+ iommu_bus->iommu_opaque,
+ devfn, pasid, n);
+ return 0;
+ }
+
+ return -ENODEV;
+}
+
+int pci_iommu_get_iotlb_info(PCIDevice *dev, uint8_t *addr_width,
+ uint32_t *min_page_size)
+{
+ PCIBus *bus;
+ PCIBus *iommu_bus;
+ int devfn;
+
+ pci_device_get_iommu_bus_devfn(dev, &bus, &iommu_bus, &devfn);
+ if (iommu_bus && iommu_bus->iommu_ops->get_iotlb_info) {
+ iommu_bus->iommu_ops->get_iotlb_info(iommu_bus->iommu_opaque,
+ addr_width, min_page_size);
+ return 0;
+ }
+
+ return -ENODEV;
+}
+
void pci_setup_iommu(PCIBus *bus, const PCIIOMMUOps *ops, void *opaque)
{
/*
@@ -2963,16 +3269,30 @@ MSIMessage pci_get_msi_message(PCIDevice *dev, int vector)
void pci_set_power(PCIDevice *d, bool state)
{
- if (d->has_power == state) {
+ /*
+ * Don't change the enabled state of VFs when powering on/off the device.
+ *
+ * When powering on, VFs must not be enabled immediately but they must
+ * wait until the guest configures SR-IOV.
+ * When powering off, their corresponding PFs will be reset and disable
+ * VFs.
+ */
+ if (!pci_is_vf(d)) {
+ pci_set_enabled(d, state);
+ }
+}
+
+void pci_set_enabled(PCIDevice *d, bool state)
+{
+ if (d->enabled == state) {
return;
}
- d->has_power = state;
+ d->enabled = state;
pci_update_mappings(d);
- memory_region_set_enabled(&d->bus_master_enable_region,
- (pci_get_word(d->config + PCI_COMMAND)
- & PCI_COMMAND_MASTER) && d->has_power);
- if (!d->has_power) {
+ pci_set_master(d, (pci_get_word(d->config + PCI_COMMAND)
+ & PCI_COMMAND_MASTER) && d->enabled);
+ if (qdev_is_realized(&d->qdev)) {
pci_device_reset(d);
}
}