diff options
author | David Woodhouse <dwmw@amazon.co.uk> | 2023-01-13 23:35:46 +0000 |
---|---|---|
committer | David Woodhouse <dwmw@amazon.co.uk> | 2023-03-01 09:09:22 +0000 |
commit | 6096cf7877bc6ee84e6b3b44dfe144bc8b549724 (patch) | |
tree | 15f6b36d90979aa91f68e5df93f159507e7666a0 /hw/pci | |
parent | 4f81baa33ed645fc17a9908236630b8154502ae5 (diff) | |
download | qemu-6096cf7877bc6ee84e6b3b44dfe144bc8b549724.zip qemu-6096cf7877bc6ee84e6b3b44dfe144bc8b549724.tar.gz qemu-6096cf7877bc6ee84e6b3b44dfe144bc8b549724.tar.bz2 |
hw/xen: Support MSI mapping to PIRQ
The way that Xen handles MSI PIRQs is kind of awful.
There is a special MSI message which targets a PIRQ. The vector in the
low bits of data must be zero. The low 8 bits of the PIRQ# are in the
destination ID field, the extended destination ID field is unused, and
instead the high bits of the PIRQ# are in the high 32 bits of the address.
Using the high bits of the address means that we can't intercept and
translate these messages in kvm_send_msi(), because they won't be caught
by the APIC — addresses like 0x1000fee46000 aren't in the APIC's range.
So we catch them in pci_msi_trigger() instead, and deliver the event
channel directly.
That isn't even the worst part. The worst part is that Xen snoops on
writes to devices' MSI vectors while they are *masked*. When a MSI
message is written which looks like it targets a PIRQ, it remembers
the device and vector for later.
When the guest makes a hypercall to bind that PIRQ# (snooped from a
marked MSI vector) to an event channel port, Xen *unmasks* that MSI
vector on the device. Xen guests using PIRQ delivery of MSI don't
ever actually unmask the MSI for themselves.
Now that this is working we can finally enable XENFEAT_hvm_pirqs and
let the guest use it all.
Tested with passthrough igb and emulated e1000e + AHCI.
CPU0 CPU1
0: 65 0 IO-APIC 2-edge timer
1: 0 14 xen-pirq 1-ioapic-edge i8042
4: 0 846 xen-pirq 4-ioapic-edge ttyS0
8: 1 0 xen-pirq 8-ioapic-edge rtc0
9: 0 0 xen-pirq 9-ioapic-level acpi
12: 257 0 xen-pirq 12-ioapic-edge i8042
24: 9600 0 xen-percpu -virq timer0
25: 2758 0 xen-percpu -ipi resched0
26: 0 0 xen-percpu -ipi callfunc0
27: 0 0 xen-percpu -virq debug0
28: 1526 0 xen-percpu -ipi callfuncsingle0
29: 0 0 xen-percpu -ipi spinlock0
30: 0 8608 xen-percpu -virq timer1
31: 0 874 xen-percpu -ipi resched1
32: 0 0 xen-percpu -ipi callfunc1
33: 0 0 xen-percpu -virq debug1
34: 0 1617 xen-percpu -ipi callfuncsingle1
35: 0 0 xen-percpu -ipi spinlock1
36: 8 0 xen-dyn -event xenbus
37: 0 6046 xen-pirq -msi ahci[0000:00:03.0]
38: 1 0 xen-pirq -msi-x ens4
39: 0 73 xen-pirq -msi-x ens4-rx-0
40: 14 0 xen-pirq -msi-x ens4-rx-1
41: 0 32 xen-pirq -msi-x ens4-tx-0
42: 47 0 xen-pirq -msi-x ens4-tx-1
Signed-off-by: David Woodhouse <dwmw@amazon.co.uk>
Reviewed-by: Paul Durrant <paul@xen.org>
Diffstat (limited to 'hw/pci')
-rw-r--r-- | hw/pci/msi.c | 11 | ||||
-rw-r--r-- | hw/pci/msix.c | 9 | ||||
-rw-r--r-- | hw/pci/pci.c | 17 |
3 files changed, 37 insertions, 0 deletions
diff --git a/hw/pci/msi.c b/hw/pci/msi.c index 1cadf15..041b0bd 100644 --- a/hw/pci/msi.c +++ b/hw/pci/msi.c @@ -24,6 +24,8 @@ #include "qemu/range.h" #include "qapi/error.h" +#include "hw/i386/kvm/xen_evtchn.h" + /* PCI_MSI_ADDRESS_LO */ #define PCI_MSI_ADDRESS_LO_MASK (~0x3) @@ -414,6 +416,15 @@ void msi_write_config(PCIDevice *dev, uint32_t addr, uint32_t val, int len) fprintf(stderr, "\n"); #endif + if (xen_mode == XEN_EMULATE) { + for (vector = 0; vector < msi_nr_vectors(flags); vector++) { + MSIMessage msg = msi_prepare_message(dev, vector); + + xen_evtchn_snoop_msi(dev, false, vector, msg.address, msg.data, + msi_is_masked(dev, vector)); + } + } + if (!(flags & PCI_MSI_FLAGS_ENABLE)) { return; } diff --git a/hw/pci/msix.c b/hw/pci/msix.c index 9e70fcd..ab8869d 100644 --- a/hw/pci/msix.c +++ b/hw/pci/msix.c @@ -26,6 +26,8 @@ #include "qapi/error.h" #include "trace.h" +#include "hw/i386/kvm/xen_evtchn.h" + /* MSI enable bit and maskall bit are in byte 1 in FLAGS register */ #define MSIX_CONTROL_OFFSET (PCI_MSIX_FLAGS + 1) #define MSIX_ENABLE_MASK (PCI_MSIX_FLAGS_ENABLE >> 8) @@ -124,6 +126,13 @@ static void msix_handle_mask_update(PCIDevice *dev, int vector, bool was_masked) { bool is_masked = msix_is_masked(dev, vector); + if (xen_mode == XEN_EMULATE) { + MSIMessage msg = msix_prepare_message(dev, vector); + + xen_evtchn_snoop_msi(dev, true, vector, msg.address, msg.data, + is_masked); + } + if (is_masked == was_masked) { return; } diff --git a/hw/pci/pci.c b/hw/pci/pci.c index bad8e63..10c980b 100644 --- a/hw/pci/pci.c +++ b/hw/pci/pci.c @@ -49,6 +49,9 @@ #include "qemu/cutils.h" #include "pci-internal.h" +#include "hw/xen/xen.h" +#include "hw/i386/kvm/xen_evtchn.h" + //#define DEBUG_PCI #ifdef DEBUG_PCI # define PCI_DPRINTF(format, ...) printf(format, ## __VA_ARGS__) @@ -319,6 +322,17 @@ static void pci_msi_trigger(PCIDevice *dev, MSIMessage msg) { MemTxAttrs attrs = {}; + /* + * Xen uses the high bits of the address to contain some of the bits + * of the PIRQ#. Therefore we can't just send the write cycle and + * trust that it's caught by the APIC at 0xfee00000 because the + * target of the write might be e.g. 0x0x1000fee46000 for PIRQ#4166. + * So we intercept the delivery here instead of in kvm_send_msi(). + */ + if (xen_mode == XEN_EMULATE && + xen_evtchn_deliver_pirq_msi(msg.address, msg.data)) { + return; + } attrs.requester_id = pci_requester_id(dev); address_space_stl_le(&dev->bus_master_as, msg.address, msg.data, attrs, NULL); @@ -988,6 +1002,9 @@ static void do_pci_unregister_device(PCIDevice *pci_dev) pci_get_bus(pci_dev)->devices[pci_dev->devfn] = NULL; pci_config_free(pci_dev); + if (xen_mode == XEN_EMULATE) { + xen_evtchn_remove_pci_device(pci_dev); + } if (memory_region_is_mapped(&pci_dev->bus_master_enable_region)) { memory_region_del_subregion(&pci_dev->bus_master_container_region, &pci_dev->bus_master_enable_region); |