aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
-rw-r--r--hw/i386/kvm/xen_evtchn.c80
-rw-r--r--target/i386/kvm/xen-emu.c34
2 files changed, 100 insertions, 14 deletions
diff --git a/hw/i386/kvm/xen_evtchn.c b/hw/i386/kvm/xen_evtchn.c
index 6b0bdba..f39b751 100644
--- a/hw/i386/kvm/xen_evtchn.c
+++ b/hw/i386/kvm/xen_evtchn.c
@@ -28,6 +28,8 @@
#include "hw/sysbus.h"
#include "hw/xen/xen.h"
#include "hw/i386/x86.h"
+#include "hw/i386/pc.h"
+#include "hw/pci/pci.h"
#include "hw/irq.h"
#include "xen_evtchn.h"
@@ -101,6 +103,7 @@ struct XenEvtchnState {
uint64_t callback_param;
bool evtchn_in_kernel;
+ uint32_t callback_gsi;
QEMUBH *gsi_bh;
@@ -217,11 +220,41 @@ static void xen_evtchn_register_types(void)
type_init(xen_evtchn_register_types)
+static int set_callback_pci_intx(XenEvtchnState *s, uint64_t param)
+{
+ PCMachineState *pcms = PC_MACHINE(qdev_get_machine());
+ uint8_t pin = param & 3;
+ uint8_t devfn = (param >> 8) & 0xff;
+ uint16_t bus = (param >> 16) & 0xffff;
+ uint16_t domain = (param >> 32) & 0xffff;
+ PCIDevice *pdev;
+ PCIINTxRoute r;
+
+ if (domain || !pcms) {
+ return 0;
+ }
+
+ pdev = pci_find_device(pcms->bus, bus, devfn);
+ if (!pdev) {
+ return 0;
+ }
+
+ r = pci_device_route_intx_to_irq(pdev, pin);
+ if (r.mode != PCI_INTX_ENABLED) {
+ return 0;
+ }
+
+ /*
+ * Hm, can we be notified of INTX routing changes? Not without
+ * *owning* the device and being allowed to overwrite its own
+ * ->intx_routing_notifier, AFAICT. So let's not.
+ */
+ return r.irq;
+}
+
void xen_evtchn_set_callback_level(int level)
{
XenEvtchnState *s = xen_evtchn_singleton;
- uint32_t param;
-
if (!s) {
return;
}
@@ -260,18 +293,12 @@ void xen_evtchn_set_callback_level(int level)
return;
}
- param = (uint32_t)s->callback_param;
-
- switch (s->callback_param >> CALLBACK_VIA_TYPE_SHIFT) {
- case HVM_PARAM_CALLBACK_TYPE_GSI:
- if (param < IOAPIC_NUM_PINS) {
- qemu_set_irq(s->gsis[param], level);
- if (level) {
- /* Ensure the vCPU polls for deassertion */
- kvm_xen_set_callback_asserted();
- }
+ if (s->callback_gsi && s->callback_gsi < IOAPIC_NUM_PINS) {
+ qemu_set_irq(s->gsis[s->callback_gsi], level);
+ if (level) {
+ /* Ensure the vCPU polls for deassertion */
+ kvm_xen_set_callback_asserted();
}
- break;
}
}
@@ -283,15 +310,22 @@ int xen_evtchn_set_callback_param(uint64_t param)
.u.vector = 0,
};
bool in_kernel = false;
+ uint32_t gsi = 0;
+ int type = param >> CALLBACK_VIA_TYPE_SHIFT;
int ret;
if (!s) {
return -ENOTSUP;
}
+ /*
+ * We need the BQL because set_callback_pci_intx() may call into PCI code,
+ * and because we may need to manipulate the old and new GSI levels.
+ */
+ assert(qemu_mutex_iothread_locked());
qemu_mutex_lock(&s->port_lock);
- switch (param >> CALLBACK_VIA_TYPE_SHIFT) {
+ switch (type) {
case HVM_PARAM_CALLBACK_TYPE_VECTOR: {
xa.u.vector = (uint8_t)param,
@@ -299,10 +333,17 @@ int xen_evtchn_set_callback_param(uint64_t param)
if (!ret && kvm_xen_has_cap(EVTCHN_SEND)) {
in_kernel = true;
}
+ gsi = 0;
break;
}
+ case HVM_PARAM_CALLBACK_TYPE_PCI_INTX:
+ gsi = set_callback_pci_intx(s, param);
+ ret = gsi ? 0 : -EINVAL;
+ break;
+
case HVM_PARAM_CALLBACK_TYPE_GSI:
+ gsi = (uint32_t)param;
ret = 0;
break;
@@ -320,6 +361,17 @@ int xen_evtchn_set_callback_param(uint64_t param)
}
s->callback_param = param;
s->evtchn_in_kernel = in_kernel;
+
+ if (gsi != s->callback_gsi) {
+ struct vcpu_info *vi = kvm_xen_get_vcpu_info_hva(0);
+
+ xen_evtchn_set_callback_level(0);
+ s->callback_gsi = gsi;
+
+ if (gsi && vi && vi->evtchn_upcall_pending) {
+ kvm_xen_inject_vcpu_callback_vector(0, type);
+ }
+ }
}
qemu_mutex_unlock(&s->port_lock);
diff --git a/target/i386/kvm/xen-emu.c b/target/i386/kvm/xen-emu.c
index b52617d..9e22c9f 100644
--- a/target/i386/kvm/xen-emu.c
+++ b/target/i386/kvm/xen-emu.c
@@ -131,6 +131,38 @@ int kvm_xen_init(KVMState *s, uint32_t hypercall_msr)
return ret;
}
+ /* If called a second time, don't repeat the rest of the setup. */
+ if (s->xen_caps) {
+ return 0;
+ }
+
+ /*
+ * Event channel delivery via GSI/PCI_INTX needs to poll the vcpu_info
+ * of vCPU0 to deassert the IRQ when ->evtchn_upcall_pending is cleared.
+ *
+ * In the kernel, there's a notifier hook on the PIC/IOAPIC which allows
+ * such things to be polled at precisely the right time. We *could* do
+ * it nicely in the kernel: check vcpu_info[0]->evtchn_upcall_pending at
+ * the moment the IRQ is acked, and see if it should be reasserted.
+ *
+ * But the in-kernel irqchip is deprecated, so we're unlikely to add
+ * that support in the kernel. Insist on using the split irqchip mode
+ * instead.
+ *
+ * This leaves us polling for the level going low in QEMU, which lacks
+ * the appropriate hooks in its PIC/IOAPIC code. Even VFIO is sending a
+ * spurious 'ack' to an INTX IRQ every time there's any MMIO access to
+ * the device (for which it has to unmap the device and trap access, for
+ * some period after an IRQ!!). In the Xen case, we do it on exit from
+ * KVM_RUN, if the flag is set to say that the GSI is currently asserted.
+ * Which is kind of icky, but less so than the VFIO one. I may fix them
+ * both later...
+ */
+ if (!kvm_kernel_irqchip_split()) {
+ error_report("kvm: Xen support requires kernel-irqchip=split");
+ return -EINVAL;
+ }
+
s->xen_caps = xen_caps;
return 0;
}
@@ -684,7 +716,9 @@ static bool handle_set_param(struct kvm_xen_exit *exit, X86CPU *cpu,
switch (hp.index) {
case HVM_PARAM_CALLBACK_IRQ:
+ qemu_mutex_lock_iothread();
err = xen_evtchn_set_callback_param(hp.value);
+ qemu_mutex_unlock_iothread();
xen_set_long_mode(exit->u.hcall.longmode);
break;
default: