hw/xen: Support HVM_PARAM_CALLBACK_TYPE_GSI callback

The GSI callback (and later PCI_INTX) is a level triggered interrupt. It is asserted when an event channel is delivered to vCPU0, and is supposed to be cleared when the vcpu_info->evtchn_upcall_pending field for vCPU0 is cleared again. Thankfully, Xen does *not* assert the GSI if the guest sets its own evtchn_upcall_pending field; we only need to assert the GSI when we have delivered an event for ourselves. So that's the easy part, kind of. There's a slight complexity in that we need to hold the BQL before we can call qemu_set_irq(), and we definitely can't do that while holding our own port_lock (because we'll need to take that from the qemu-side functions that the PV backend drivers will call). So if we end up wanting to set the IRQ in a context where we *don't* already hold the BQL, defer to a BH. However, we *do* need to poll for the evtchn_upcall_pending flag being cleared. In an ideal world we would poll that when the EOI happens on the PIC/IOAPIC. That's how it works in the kernel with the VFIO eventfd pairs — one is used to trigger the interrupt, and the other works in the other direction to 'resample' on EOI, and trigger the first eventfd again if the line is still active. However, QEMU doesn't seem to do that. Even VFIO level interrupts seem to be supported by temporarily unmapping the device's BARs from the guest when an interrupt happens, then trapping *all* MMIO to the device and sending the 'resample' event on *every* MMIO access until the IRQ is cleared! Maybe in future we'll plumb the 'resample' concept through QEMU's irq framework but for now we'll do what Xen itself does: just check the flag on every vmexit if the upcall GSI is known to be asserted. Signed-off-by: David Woodhouse <dwmw@amazon.co.uk> Reviewed-by: Paul Durrant <paul@xen.org>
author: David Woodhouse <dwmw@amazon.co.uk> 2022-12-15 20:35:24 +0000
committer: David Woodhouse <dwmw@amazon.co.uk> 2023-03-01 09:06:44 +0000
commit: ddf0fd9ae1fd1ff95489763b37a483adb3cd5907 (patch)
tree: c3295981420b48bab9de241138cea4cad4735a0f /target/i386/kvm
parent: 507cb64d6e66d672bfddb275fe746241e0ed8db2 (diff)
download: qemu-ddf0fd9ae1fd1ff95489763b37a483adb3cd5907.zip
qemu-ddf0fd9ae1fd1ff95489763b37a483adb3cd5907.tar.gz
qemu-ddf0fd9ae1fd1ff95489763b37a483adb3cd5907.tar.bz2
3 files changed, 52 insertions, 0 deletions
diff --git a/target/i386/kvm/kvm.c b/target/i386/kvm/kvm.c
index 5a144ec..3c37955 100644
--- a/target/i386/kvm/kvm.c
+++ b/target/i386/kvm/kvm.c
@@ -4990,6 +4990,17 @@ MemTxAttrs kvm_arch_post_run(CPUState *cpu, struct kvm_run *run)
         kvm_rate_limit_on_bus_lock();
     }
 
+    /*
+     * If the callback is asserted as a GSI (or PCI INTx) then check if
+     * vcpu_info->evtchn_upcall_pending has been cleared, and deassert
+     * the callback IRQ if so. Ideally we could hook into the PIC/IOAPIC
+     * EOI and only resample then, exactly how the VFIO eventfd pairs
+     * are designed to work for level triggered interrupts.
+     */
+    if (x86_cpu->env.xen_callback_asserted) {
+        kvm_xen_maybe_deassert_callback(cpu);
+    }
+
     /* We need to protect the apic state against concurrent accesses from
      * different threads in case the userspace irqchip is used. */
     if (!kvm_irqchip_in_kernel()) {
diff --git a/target/i386/kvm/xen-emu.c b/target/i386/kvm/xen-emu.c
index 821629f..b52617d 100644
--- a/target/i386/kvm/xen-emu.c
+++ b/target/i386/kvm/xen-emu.c
@@ -320,6 +320,39 @@ void *kvm_xen_get_vcpu_info_hva(uint32_t vcpu_id)
     return X86_CPU(cs)->env.xen_vcpu_info_hva;
 }
 
+void kvm_xen_maybe_deassert_callback(CPUState *cs)
+{
+    CPUX86State *env = &X86_CPU(cs)->env;
+    struct vcpu_info *vi = env->xen_vcpu_info_hva;
+    if (!vi) {
+        return;
+    }
+
+    /* If the evtchn_upcall_pending flag is cleared, turn the GSI off. */
+    if (!vi->evtchn_upcall_pending) {
+        qemu_mutex_lock_iothread();
+        /*
+         * Check again now we have the lock, because it may have been
+         * asserted in the interim. And we don't want to take the lock
+         * every time because this is a fast path.
+         */
+        if (!vi->evtchn_upcall_pending) {
+            X86_CPU(cs)->env.xen_callback_asserted = false;
+            xen_evtchn_set_callback_level(0);
+        }
+        qemu_mutex_unlock_iothread();
+    }
+}
+
+void kvm_xen_set_callback_asserted(void)
+{
+    CPUState *cs = qemu_get_cpu(0);
+
+    if (cs) {
+        X86_CPU(cs)->env.xen_callback_asserted = true;
+    }
+}
+
 void kvm_xen_inject_vcpu_callback_vector(uint32_t vcpu_id, int type)
 {
     CPUState *cs = qemu_get_cpu(vcpu_id);
@@ -352,6 +385,13 @@ void kvm_xen_inject_vcpu_callback_vector(uint32_t vcpu_id, int type)
          */
         qemu_cpu_kick(cs);
         break;
+
+    case HVM_PARAM_CALLBACK_TYPE_GSI:
+    case HVM_PARAM_CALLBACK_TYPE_PCI_INTX:
+        if (vcpu_id == 0) {
+            xen_evtchn_set_callback_level(1);
+        }
+        break;
     }
 }
 
diff --git a/target/i386/kvm/xen-emu.h b/target/i386/kvm/xen-emu.h
index 4526056..fe85e0b 100644
--- a/target/i386/kvm/xen-emu.h
+++ b/target/i386/kvm/xen-emu.h
@@ -28,5 +28,6 @@ int kvm_xen_init_vcpu(CPUState *cs);
 int kvm_xen_handle_exit(X86CPU *cpu, struct kvm_xen_exit *exit);
 int kvm_put_xen_state(CPUState *cs);
 int kvm_get_xen_state(CPUState *cs);
+void kvm_xen_maybe_deassert_callback(CPUState *cs);
 
 #endif /* QEMU_I386_KVM_XEN_EMU_H */
author	David Woodhouse <dwmw@amazon.co.uk>	2022-12-15 20:35:24 +0000
committer	David Woodhouse <dwmw@amazon.co.uk>	2023-03-01 09:06:44 +0000
commit	ddf0fd9ae1fd1ff95489763b37a483adb3cd5907 (patch)
tree	c3295981420b48bab9de241138cea4cad4735a0f /target/i386/kvm
parent	507cb64d6e66d672bfddb275fe746241e0ed8db2 (diff)
download	qemu-ddf0fd9ae1fd1ff95489763b37a483adb3cd5907.zip qemu-ddf0fd9ae1fd1ff95489763b37a483adb3cd5907.tar.gz qemu-ddf0fd9ae1fd1ff95489763b37a483adb3cd5907.tar.bz2