aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
-rw-r--r--hw/apic.c3
-rw-r--r--hw/kvm/apic.c34
-rw-r--r--hw/msi.h5
-rw-r--r--hw/msix.c121
-rw-r--r--hw/msix.h6
-rw-r--r--hw/pc.c9
-rw-r--r--hw/pc_piix.c14
-rw-r--r--hw/pci.h8
-rw-r--r--hw/virtio-pci.c126
-rw-r--r--hw/virtio-pci.h6
-rw-r--r--hw/xen.h10
-rw-r--r--hw/xen_apic.c5
-rw-r--r--kvm-all.c236
-rw-r--r--kvm-stub.c23
-rw-r--r--kvm.h18
-rw-r--r--linux-headers/linux/kvm.h38
-rw-r--r--qemu-common.h1
-rwxr-xr-xscripts/kvm/vmxcap13
18 files changed, 624 insertions, 52 deletions
diff --git a/hw/apic.c b/hw/apic.c
index 4eeaf88..5fbf01c 100644
--- a/hw/apic.c
+++ b/hw/apic.c
@@ -19,6 +19,7 @@
#include "apic_internal.h"
#include "apic.h"
#include "ioapic.h"
+#include "msi.h"
#include "host-utils.h"
#include "trace.h"
#include "pc.h"
@@ -862,6 +863,8 @@ static void apic_init(APICCommonState *s)
s->timer = qemu_new_timer_ns(vm_clock, apic_timer, s);
local_apics[s->idx] = s;
+
+ msi_supported = true;
}
static void apic_class_init(ObjectClass *klass, void *data)
diff --git a/hw/kvm/apic.c b/hw/kvm/apic.c
index ffe7a52..8ba4079 100644
--- a/hw/kvm/apic.c
+++ b/hw/kvm/apic.c
@@ -10,6 +10,7 @@
* See the COPYING file in the top-level directory.
*/
#include "hw/apic_internal.h"
+#include "hw/msi.h"
#include "kvm.h"
static inline void kvm_apic_set_reg(struct kvm_lapic_state *kapic,
@@ -145,10 +146,39 @@ static void kvm_apic_external_nmi(APICCommonState *s)
run_on_cpu(s->cpu_env, do_inject_external_nmi, s);
}
+static uint64_t kvm_apic_mem_read(void *opaque, target_phys_addr_t addr,
+ unsigned size)
+{
+ return ~(uint64_t)0;
+}
+
+static void kvm_apic_mem_write(void *opaque, target_phys_addr_t addr,
+ uint64_t data, unsigned size)
+{
+ MSIMessage msg = { .address = addr, .data = data };
+ int ret;
+
+ ret = kvm_irqchip_send_msi(kvm_state, msg);
+ if (ret < 0) {
+ fprintf(stderr, "KVM: injection failed, MSI lost (%s)\n",
+ strerror(-ret));
+ }
+}
+
+static const MemoryRegionOps kvm_apic_io_ops = {
+ .read = kvm_apic_mem_read,
+ .write = kvm_apic_mem_write,
+ .endianness = DEVICE_NATIVE_ENDIAN,
+};
+
static void kvm_apic_init(APICCommonState *s)
{
- memory_region_init_reservation(&s->io_memory, "kvm-apic-msi",
- MSI_SPACE_SIZE);
+ memory_region_init_io(&s->io_memory, &kvm_apic_io_ops, s, "kvm-apic-msi",
+ MSI_SPACE_SIZE);
+
+ if (kvm_has_gsi_routing()) {
+ msi_supported = true;
+ }
}
static void kvm_apic_class_init(ObjectClass *klass, void *data)
diff --git a/hw/msi.h b/hw/msi.h
index 3040bb0..75747ab 100644
--- a/hw/msi.h
+++ b/hw/msi.h
@@ -24,6 +24,11 @@
#include "qemu-common.h"
#include "pci.h"
+struct MSIMessage {
+ uint64_t address;
+ uint32_t data;
+};
+
extern bool msi_supported;
bool msi_enabled(const PCIDevice *dev);
diff --git a/hw/msix.c b/hw/msix.c
index 3835eaa..59c7a83 100644
--- a/hw/msix.c
+++ b/hw/msix.c
@@ -35,6 +35,15 @@
#define MSIX_PAGE_PENDING (MSIX_PAGE_SIZE / 2)
#define MSIX_MAX_ENTRIES 32
+static MSIMessage msix_get_message(PCIDevice *dev, unsigned vector)
+{
+ uint8_t *table_entry = dev->msix_table_page + vector * PCI_MSIX_ENTRY_SIZE;
+ MSIMessage msg;
+
+ msg.address = pci_get_quad(table_entry + PCI_MSIX_ENTRY_LOWER_ADDR);
+ msg.data = pci_get_long(table_entry + PCI_MSIX_ENTRY_DATA);
+ return msg;
+}
/* Add MSI-X capability to the config space for the device. */
/* Given a bar and its size, add MSI-X table on top of it
@@ -130,13 +139,34 @@ static bool msix_is_masked(PCIDevice *dev, int vector)
return msix_vector_masked(dev, vector, dev->msix_function_masked);
}
+static void msix_fire_vector_notifier(PCIDevice *dev,
+ unsigned int vector, bool is_masked)
+{
+ MSIMessage msg;
+ int ret;
+
+ if (!dev->msix_vector_use_notifier) {
+ return;
+ }
+ if (is_masked) {
+ dev->msix_vector_release_notifier(dev, vector);
+ } else {
+ msg = msix_get_message(dev, vector);
+ ret = dev->msix_vector_use_notifier(dev, vector, msg);
+ assert(ret >= 0);
+ }
+}
+
static void msix_handle_mask_update(PCIDevice *dev, int vector, bool was_masked)
{
bool is_masked = msix_is_masked(dev, vector);
+
if (is_masked == was_masked) {
return;
}
+ msix_fire_vector_notifier(dev, vector, is_masked);
+
if (!is_masked && msix_is_pending(dev, vector)) {
msix_clr_pending(dev, vector);
msix_notify(dev, vector);
@@ -222,10 +252,14 @@ static void msix_mmio_setup(PCIDevice *d, MemoryRegion *bar)
static void msix_mask_all(struct PCIDevice *dev, unsigned nentries)
{
int vector;
+
for (vector = 0; vector < nentries; ++vector) {
unsigned offset =
vector * PCI_MSIX_ENTRY_SIZE + PCI_MSIX_ENTRY_VECTOR_CTRL;
+ bool was_masked = msix_is_masked(dev, vector);
+
dev->msix_table_page[offset] |= PCI_MSIX_ENTRY_CTRL_MASKBIT;
+ msix_handle_mask_update(dev, vector, was_masked);
}
}
@@ -317,6 +351,7 @@ void msix_save(PCIDevice *dev, QEMUFile *f)
void msix_load(PCIDevice *dev, QEMUFile *f)
{
unsigned n = dev->msix_entries_nr;
+ unsigned int vector;
if (!(dev->cap_present & QEMU_PCI_CAP_MSIX)) {
return;
@@ -326,6 +361,10 @@ void msix_load(PCIDevice *dev, QEMUFile *f)
qemu_get_buffer(f, dev->msix_table_page, n * PCI_MSIX_ENTRY_SIZE);
qemu_get_buffer(f, dev->msix_table_page + MSIX_PAGE_PENDING, (n + 7) / 8);
msix_update_function_masked(dev);
+
+ for (vector = 0; vector < n; vector++) {
+ msix_handle_mask_update(dev, vector, true);
+ }
}
/* Does device support MSI-X? */
@@ -352,9 +391,7 @@ uint32_t msix_bar_size(PCIDevice *dev)
/* Send an MSI-X message */
void msix_notify(PCIDevice *dev, unsigned vector)
{
- uint8_t *table_entry = dev->msix_table_page + vector * PCI_MSIX_ENTRY_SIZE;
- uint64_t address;
- uint32_t data;
+ MSIMessage msg;
if (vector >= dev->msix_entries_nr || !dev->msix_entry_used[vector])
return;
@@ -363,9 +400,9 @@ void msix_notify(PCIDevice *dev, unsigned vector)
return;
}
- address = pci_get_quad(table_entry + PCI_MSIX_ENTRY_LOWER_ADDR);
- data = pci_get_long(table_entry + PCI_MSIX_ENTRY_DATA);
- stl_le_phys(address, data);
+ msg = msix_get_message(dev, vector);
+
+ stl_le_phys(msg.address, msg.data);
}
void msix_reset(PCIDevice *dev)
@@ -414,3 +451,75 @@ void msix_unuse_all_vectors(PCIDevice *dev)
return;
msix_free_irq_entries(dev);
}
+
+unsigned int msix_nr_vectors_allocated(const PCIDevice *dev)
+{
+ return dev->msix_entries_nr;
+}
+
+static int msix_set_notifier_for_vector(PCIDevice *dev, unsigned int vector)
+{
+ MSIMessage msg;
+
+ if (msix_is_masked(dev, vector)) {
+ return 0;
+ }
+ msg = msix_get_message(dev, vector);
+ return dev->msix_vector_use_notifier(dev, vector, msg);
+}
+
+static void msix_unset_notifier_for_vector(PCIDevice *dev, unsigned int vector)
+{
+ if (msix_is_masked(dev, vector)) {
+ return;
+ }
+ dev->msix_vector_release_notifier(dev, vector);
+}
+
+int msix_set_vector_notifiers(PCIDevice *dev,
+ MSIVectorUseNotifier use_notifier,
+ MSIVectorReleaseNotifier release_notifier)
+{
+ int vector, ret;
+
+ assert(use_notifier && release_notifier);
+
+ dev->msix_vector_use_notifier = use_notifier;
+ dev->msix_vector_release_notifier = release_notifier;
+
+ if ((dev->config[dev->msix_cap + MSIX_CONTROL_OFFSET] &
+ (MSIX_ENABLE_MASK | MSIX_MASKALL_MASK)) == MSIX_ENABLE_MASK) {
+ for (vector = 0; vector < dev->msix_entries_nr; vector++) {
+ ret = msix_set_notifier_for_vector(dev, vector);
+ if (ret < 0) {
+ goto undo;
+ }
+ }
+ }
+ return 0;
+
+undo:
+ while (--vector >= 0) {
+ msix_unset_notifier_for_vector(dev, vector);
+ }
+ dev->msix_vector_use_notifier = NULL;
+ dev->msix_vector_release_notifier = NULL;
+ return ret;
+}
+
+void msix_unset_vector_notifiers(PCIDevice *dev)
+{
+ int vector;
+
+ assert(dev->msix_vector_use_notifier &&
+ dev->msix_vector_release_notifier);
+
+ if ((dev->config[dev->msix_cap + MSIX_CONTROL_OFFSET] &
+ (MSIX_ENABLE_MASK | MSIX_MASKALL_MASK)) == MSIX_ENABLE_MASK) {
+ for (vector = 0; vector < dev->msix_entries_nr; vector++) {
+ msix_unset_notifier_for_vector(dev, vector);
+ }
+ }
+ dev->msix_vector_use_notifier = NULL;
+ dev->msix_vector_release_notifier = NULL;
+}
diff --git a/hw/msix.h b/hw/msix.h
index 5aba22b..50aee82 100644
--- a/hw/msix.h
+++ b/hw/msix.h
@@ -13,6 +13,8 @@ void msix_write_config(PCIDevice *pci_dev, uint32_t address,
int msix_uninit(PCIDevice *d, MemoryRegion *bar);
+unsigned int msix_nr_vectors_allocated(const PCIDevice *dev);
+
void msix_save(PCIDevice *dev, QEMUFile *f);
void msix_load(PCIDevice *dev, QEMUFile *f);
@@ -29,4 +31,8 @@ void msix_notify(PCIDevice *dev, unsigned vector);
void msix_reset(PCIDevice *dev);
+int msix_set_vector_notifiers(PCIDevice *dev,
+ MSIVectorUseNotifier use_notifier,
+ MSIVectorReleaseNotifier release_notifier);
+void msix_unset_vector_notifiers(PCIDevice *dev);
#endif
diff --git a/hw/pc.c b/hw/pc.c
index e81a06c..c790bcb 100644
--- a/hw/pc.c
+++ b/hw/pc.c
@@ -912,15 +912,6 @@ static DeviceState *apic_init(void *env, uint8_t apic_id)
apic_mapped = 1;
}
- /* KVM does not support MSI yet. */
- if (!kvm_irqchip_in_kernel()) {
- msi_supported = true;
- }
-
- if (xen_msi_support()) {
- msi_supported = true;
- }
-
return dev;
}
diff --git a/hw/pc_piix.c b/hw/pc_piix.c
index a7aad4b..f49b0aa 100644
--- a/hw/pc_piix.c
+++ b/hw/pc_piix.c
@@ -56,31 +56,27 @@ static void kvm_piix3_setup_irq_routing(bool pci_enabled)
{
#ifdef CONFIG_KVM
KVMState *s = kvm_state;
- int ret, i;
+ int i;
if (kvm_check_extension(s, KVM_CAP_IRQ_ROUTING)) {
for (i = 0; i < 8; ++i) {
if (i == 2) {
continue;
}
- kvm_irqchip_add_route(s, i, KVM_IRQCHIP_PIC_MASTER, i);
+ kvm_irqchip_add_irq_route(s, i, KVM_IRQCHIP_PIC_MASTER, i);
}
for (i = 8; i < 16; ++i) {
- kvm_irqchip_add_route(s, i, KVM_IRQCHIP_PIC_SLAVE, i - 8);
+ kvm_irqchip_add_irq_route(s, i, KVM_IRQCHIP_PIC_SLAVE, i - 8);
}
if (pci_enabled) {
for (i = 0; i < 24; ++i) {
if (i == 0) {
- kvm_irqchip_add_route(s, i, KVM_IRQCHIP_IOAPIC, 2);
+ kvm_irqchip_add_irq_route(s, i, KVM_IRQCHIP_IOAPIC, 2);
} else if (i != 2) {
- kvm_irqchip_add_route(s, i, KVM_IRQCHIP_IOAPIC, i);
+ kvm_irqchip_add_irq_route(s, i, KVM_IRQCHIP_IOAPIC, i);
}
}
}
- ret = kvm_irqchip_commit_routes(s);
- if (ret < 0) {
- hw_error("KVM IRQ routing setup failed");
- }
}
#endif /* CONFIG_KVM */
}
diff --git a/hw/pci.h b/hw/pci.h
index 8d0aa49..c3cacce 100644
--- a/hw/pci.h
+++ b/hw/pci.h
@@ -173,6 +173,10 @@ typedef struct PCIDeviceClass {
const char *romfile;
} PCIDeviceClass;
+typedef int (*MSIVectorUseNotifier)(PCIDevice *dev, unsigned int vector,
+ MSIMessage msg);
+typedef void (*MSIVectorReleaseNotifier)(PCIDevice *dev, unsigned int vector);
+
struct PCIDevice {
DeviceState qdev;
/* PCI config space */
@@ -243,6 +247,10 @@ struct PCIDevice {
bool has_rom;
MemoryRegion rom;
uint32_t rom_bar;
+
+ /* MSI-X notifiers */
+ MSIVectorUseNotifier msix_vector_use_notifier;
+ MSIVectorReleaseNotifier msix_vector_release_notifier;
};
void pci_register_bar(PCIDevice *pci_dev, int region_num,
diff --git a/hw/virtio-pci.c b/hw/virtio-pci.c
index 79b86f1..d08c159 100644
--- a/hw/virtio-pci.c
+++ b/hw/virtio-pci.c
@@ -24,6 +24,7 @@
#include "virtio-scsi.h"
#include "pci.h"
#include "qemu-error.h"
+#include "msi.h"
#include "msix.h"
#include "net.h"
#include "loader.h"
@@ -539,6 +540,107 @@ static void virtio_pci_guest_notifier_read(void *opaque)
}
}
+static int kvm_virtio_pci_vq_vector_use(VirtIOPCIProxy *proxy,
+ unsigned int queue_no,
+ unsigned int vector,
+ MSIMessage msg)
+{
+ VirtQueue *vq = virtio_get_queue(proxy->vdev, queue_no);
+ VirtIOIRQFD *irqfd = &proxy->vector_irqfd[vector];
+ int fd, ret;
+
+ fd = event_notifier_get_fd(virtio_queue_get_guest_notifier(vq));
+
+ if (irqfd->users == 0) {
+ ret = kvm_irqchip_add_msi_route(kvm_state, msg);
+ if (ret < 0) {
+ return ret;
+ }
+ irqfd->virq = ret;
+ }
+ irqfd->users++;
+
+ ret = kvm_irqchip_add_irqfd(kvm_state, fd, irqfd->virq);
+ if (ret < 0) {
+ if (--irqfd->users == 0) {
+ kvm_irqchip_release_virq(kvm_state, irqfd->virq);
+ }
+ return ret;
+ }
+
+ qemu_set_fd_handler(fd, NULL, NULL, NULL);
+
+ return 0;
+}
+
+static void kvm_virtio_pci_vq_vector_release(VirtIOPCIProxy *proxy,
+ unsigned int queue_no,
+ unsigned int vector)
+{
+ VirtQueue *vq = virtio_get_queue(proxy->vdev, queue_no);
+ VirtIOIRQFD *irqfd = &proxy->vector_irqfd[vector];
+ int fd, ret;
+
+ fd = event_notifier_get_fd(virtio_queue_get_guest_notifier(vq));
+
+ ret = kvm_irqchip_remove_irqfd(kvm_state, fd, irqfd->virq);
+ assert(ret == 0);
+
+ if (--irqfd->users == 0) {
+ kvm_irqchip_release_virq(kvm_state, irqfd->virq);
+ }
+
+ qemu_set_fd_handler(fd, virtio_pci_guest_notifier_read, NULL, vq);
+}
+
+static int kvm_virtio_pci_vector_use(PCIDevice *dev, unsigned vector,
+ MSIMessage msg)
+{
+ VirtIOPCIProxy *proxy = container_of(dev, VirtIOPCIProxy, pci_dev);
+ VirtIODevice *vdev = proxy->vdev;
+ int ret, queue_no;
+
+ for (queue_no = 0; queue_no < VIRTIO_PCI_QUEUE_MAX; queue_no++) {
+ if (!virtio_queue_get_num(vdev, queue_no)) {
+ break;
+ }
+ if (virtio_queue_vector(vdev, queue_no) != vector) {
+ continue;
+ }
+ ret = kvm_virtio_pci_vq_vector_use(proxy, queue_no, vector, msg);
+ if (ret < 0) {
+ goto undo;
+ }
+ }
+ return 0;
+
+undo:
+ while (--queue_no >= 0) {
+ if (virtio_queue_vector(vdev, queue_no) != vector) {
+ continue;
+ }
+ kvm_virtio_pci_vq_vector_release(proxy, queue_no, vector);
+ }
+ return ret;
+}
+
+static void kvm_virtio_pci_vector_release(PCIDevice *dev, unsigned vector)
+{
+ VirtIOPCIProxy *proxy = container_of(dev, VirtIOPCIProxy, pci_dev);
+ VirtIODevice *vdev = proxy->vdev;
+ int queue_no;
+
+ for (queue_no = 0; queue_no < VIRTIO_PCI_QUEUE_MAX; queue_no++) {
+ if (!virtio_queue_get_num(vdev, queue_no)) {
+ break;
+ }
+ if (virtio_queue_vector(vdev, queue_no) != vector) {
+ continue;
+ }
+ kvm_virtio_pci_vq_vector_release(proxy, queue_no, vector);
+ }
+}
+
static int virtio_pci_set_guest_notifier(void *opaque, int n, bool assign)
{
VirtIOPCIProxy *proxy = opaque;
@@ -555,6 +657,9 @@ static int virtio_pci_set_guest_notifier(void *opaque, int n, bool assign)
} else {
qemu_set_fd_handler(event_notifier_get_fd(notifier),
NULL, NULL, NULL);
+ /* Test and clear notifier before closing it,
+ * in case poll callback didn't have time to run. */
+ virtio_pci_guest_notifier_read(vq);
event_notifier_cleanup(notifier);
}
@@ -573,6 +678,13 @@ static int virtio_pci_set_guest_notifiers(void *opaque, bool assign)
VirtIODevice *vdev = proxy->vdev;
int r, n;
+ /* Must unset vector notifier while guest notifier is still assigned */
+ if (kvm_irqchip_in_kernel() && !assign) {
+ msix_unset_vector_notifiers(&proxy->pci_dev);
+ g_free(proxy->vector_irqfd);
+ proxy->vector_irqfd = NULL;
+ }
+
for (n = 0; n < VIRTIO_PCI_QUEUE_MAX; n++) {
if (!virtio_queue_get_num(vdev, n)) {
break;
@@ -584,10 +696,24 @@ static int virtio_pci_set_guest_notifiers(void *opaque, bool assign)
}
}
+ /* Must set vector notifier after guest notifier has been assigned */
+ if (kvm_irqchip_in_kernel() && assign) {
+ proxy->vector_irqfd =
+ g_malloc0(sizeof(*proxy->vector_irqfd) *
+ msix_nr_vectors_allocated(&proxy->pci_dev));
+ r = msix_set_vector_notifiers(&proxy->pci_dev,
+ kvm_virtio_pci_vector_use,
+ kvm_virtio_pci_vector_release);
+ if (r < 0) {
+ goto assign_error;
+ }
+ }
+
return 0;
assign_error:
/* We get here on assignment failure. Recover by undoing for VQs 0 .. n. */
+ assert(assign);
while (--n >= 0) {
virtio_pci_set_guest_notifier(opaque, n, !assign);
}
diff --git a/hw/virtio-pci.h b/hw/virtio-pci.h
index 889e59e..91b791b 100644
--- a/hw/virtio-pci.h
+++ b/hw/virtio-pci.h
@@ -26,6 +26,11 @@
#define VIRTIO_PCI_FLAG_USE_IOEVENTFD (1 << VIRTIO_PCI_FLAG_USE_IOEVENTFD_BIT)
typedef struct {
+ int virq;
+ unsigned int users;
+} VirtIOIRQFD;
+
+typedef struct {
PCIDevice pci_dev;
VirtIODevice *vdev;
MemoryRegion bar;
@@ -44,6 +49,7 @@ typedef struct {
VirtIOSCSIConf scsi;
bool ioeventfd_disabled;
bool ioeventfd_started;
+ VirtIOIRQFD *vector_irqfd;
} VirtIOPCIProxy;
void virtio_init_pci(VirtIOPCIProxy *proxy, VirtIODevice *vdev);
diff --git a/hw/xen.h b/hw/xen.h
index 3ae4cd0..e5926b7 100644
--- a/hw/xen.h
+++ b/hw/xen.h
@@ -57,14 +57,4 @@ void xen_register_framebuffer(struct MemoryRegion *mr);
# define HVM_MAX_VCPUS 32
#endif
-static inline int xen_msi_support(void)
-{
-#if defined(CONFIG_XEN_CTRL_INTERFACE_VERSION) \
- && CONFIG_XEN_CTRL_INTERFACE_VERSION >= 420
- return xen_enabled();
-#else
- return 0;
-#endif
-}
-
#endif /* QEMU_HW_XEN_H */
diff --git a/hw/xen_apic.c b/hw/xen_apic.c
index 1725ff6..a9e101f 100644
--- a/hw/xen_apic.c
+++ b/hw/xen_apic.c
@@ -40,6 +40,11 @@ static void xen_apic_init(APICCommonState *s)
{
memory_region_init_io(&s->io_memory, &xen_apic_io_ops, s, "xen-apic-msi",
MSI_SPACE_SIZE);
+
+#if defined(CONFIG_XEN_CTRL_INTERFACE_VERSION) \
+ && CONFIG_XEN_CTRL_INTERFACE_VERSION >= 420
+ msi_supported = true;
+#endif
}
static void xen_apic_set_base(APICCommonState *s, uint64_t val)
diff --git a/kvm-all.c b/kvm-all.c
index 9b73ccf..489ee53 100644
--- a/kvm-all.c
+++ b/kvm-all.c
@@ -24,6 +24,7 @@
#include "qemu-barrier.h"
#include "sysemu.h"
#include "hw/hw.h"
+#include "hw/msi.h"
#include "gdbstub.h"
#include "kvm.h"
#include "bswap.h"
@@ -48,6 +49,8 @@
do { } while (0)
#endif
+#define KVM_MSI_HASHTAB_SIZE 256
+
typedef struct KVMSlot
{
target_phys_addr_t start_addr;
@@ -59,6 +62,11 @@ typedef struct KVMSlot
typedef struct kvm_dirty_log KVMDirtyLog;
+typedef struct KVMMSIRoute {
+ struct kvm_irq_routing_entry kroute;
+ QTAILQ_ENTRY(KVMMSIRoute) entry;
+} KVMMSIRoute;
+
struct KVMState
{
KVMSlot slots[32];
@@ -86,7 +94,9 @@ struct KVMState
struct kvm_irq_routing *irq_routes;
int nr_allocated_irq_routes;
uint32_t *used_gsi_bitmap;
- unsigned int max_gsi;
+ unsigned int gsi_count;
+ QTAILQ_HEAD(msi_hashtab, KVMMSIRoute) msi_hashtab[KVM_MSI_HASHTAB_SIZE];
+ bool direct_msi;
#endif
};
@@ -859,14 +869,17 @@ int kvm_irqchip_set_irq(KVMState *s, int irq, int level)
#ifdef KVM_CAP_IRQ_ROUTING
static void set_gsi(KVMState *s, unsigned int gsi)
{
- assert(gsi < s->max_gsi);
-
s->used_gsi_bitmap[gsi / 32] |= 1U << (gsi % 32);
}
+static void clear_gsi(KVMState *s, unsigned int gsi)
+{
+ s->used_gsi_bitmap[gsi / 32] &= ~(1U << (gsi % 32));
+}
+
static void kvm_init_irq_routing(KVMState *s)
{
- int gsi_count;
+ int gsi_count, i;
gsi_count = kvm_check_extension(s, KVM_CAP_IRQ_ROUTING);
if (gsi_count > 0) {
@@ -875,7 +888,7 @@ static void kvm_init_irq_routing(KVMState *s)
/* Round up so we can search ints using ffs */
gsi_bits = ALIGN(gsi_count, 32);
s->used_gsi_bitmap = g_malloc0(gsi_bits / 8);
- s->max_gsi = gsi_bits;
+ s->gsi_count = gsi_count;
/* Mark any over-allocated bits as already in use */
for (i = gsi_count; i < gsi_bits; i++) {
@@ -886,9 +899,24 @@ static void kvm_init_irq_routing(KVMState *s)
s->irq_routes = g_malloc0(sizeof(*s->irq_routes));
s->nr_allocated_irq_routes = 0;
+ if (!s->direct_msi) {
+ for (i = 0; i < KVM_MSI_HASHTAB_SIZE; i++) {
+ QTAILQ_INIT(&s->msi_hashtab[i]);
+ }
+ }
+
kvm_arch_init_irq_routing(s);
}
+static void kvm_irqchip_commit_routes(KVMState *s)
+{
+ int ret;
+
+ s->irq_routes->flags = 0;
+ ret = kvm_vm_ioctl(s, KVM_SET_GSI_ROUTING, s->irq_routes);
+ assert(ret == 0);
+}
+
static void kvm_add_routing_entry(KVMState *s,
struct kvm_irq_routing_entry *entry)
{
@@ -914,12 +942,16 @@ static void kvm_add_routing_entry(KVMState *s,
new->u = entry->u;
set_gsi(s, entry->gsi);
+
+ kvm_irqchip_commit_routes(s);
}
-void kvm_irqchip_add_route(KVMState *s, int irq, int irqchip, int pin)
+void kvm_irqchip_add_irq_route(KVMState *s, int irq, int irqchip, int pin)
{
struct kvm_irq_routing_entry e;
+ assert(pin < s->gsi_count);
+
e.gsi = irq;
e.type = KVM_IRQ_ROUTING_IRQCHIP;
e.flags = 0;
@@ -928,10 +960,167 @@ void kvm_irqchip_add_route(KVMState *s, int irq, int irqchip, int pin)
kvm_add_routing_entry(s, &e);
}
-int kvm_irqchip_commit_routes(KVMState *s)
+void kvm_irqchip_release_virq(KVMState *s, int virq)
{
- s->irq_routes->flags = 0;
- return kvm_vm_ioctl(s, KVM_SET_GSI_ROUTING, s->irq_routes);
+ struct kvm_irq_routing_entry *e;
+ int i;
+
+ for (i = 0; i < s->irq_routes->nr; i++) {
+ e = &s->irq_routes->entries[i];
+ if (e->gsi == virq) {
+ s->irq_routes->nr--;
+ *e = s->irq_routes->entries[s->irq_routes->nr];
+ }
+ }
+ clear_gsi(s, virq);
+
+ kvm_irqchip_commit_routes(s);
+}
+
+static unsigned int kvm_hash_msi(uint32_t data)
+{
+ /* This is optimized for IA32 MSI layout. However, no other arch shall
+ * repeat the mistake of not providing a direct MSI injection API. */
+ return data & 0xff;
+}
+
+static void kvm_flush_dynamic_msi_routes(KVMState *s)
+{
+ KVMMSIRoute *route, *next;
+ unsigned int hash;
+
+ for (hash = 0; hash < KVM_MSI_HASHTAB_SIZE; hash++) {
+ QTAILQ_FOREACH_SAFE(route, &s->msi_hashtab[hash], entry, next) {
+ kvm_irqchip_release_virq(s, route->kroute.gsi);
+ QTAILQ_REMOVE(&s->msi_hashtab[hash], route, entry);
+ g_free(route);
+ }
+ }
+}
+
+static int kvm_irqchip_get_virq(KVMState *s)
+{
+ uint32_t *word = s->used_gsi_bitmap;
+ int max_words = ALIGN(s->gsi_count, 32) / 32;
+ int i, bit;
+ bool retry = true;
+
+again:
+ /* Return the lowest unused GSI in the bitmap */
+ for (i = 0; i < max_words; i++) {
+ bit = ffs(~word[i]);
+ if (!bit) {
+ continue;
+ }
+
+ return bit - 1 + i * 32;
+ }
+ if (!s->direct_msi && retry) {
+ retry = false;
+ kvm_flush_dynamic_msi_routes(s);
+ goto again;
+ }
+ return -ENOSPC;
+
+}
+
+static KVMMSIRoute *kvm_lookup_msi_route(KVMState *s, MSIMessage msg)
+{
+ unsigned int hash = kvm_hash_msi(msg.data);
+ KVMMSIRoute *route;
+
+ QTAILQ_FOREACH(route, &s->msi_hashtab[hash], entry) {
+ if (route->kroute.u.msi.address_lo == (uint32_t)msg.address &&
+ route->kroute.u.msi.address_hi == (msg.address >> 32) &&
+ route->kroute.u.msi.data == msg.data) {
+ return route;
+ }
+ }
+ return NULL;
+}
+
+int kvm_irqchip_send_msi(KVMState *s, MSIMessage msg)
+{
+ struct kvm_msi msi;
+ KVMMSIRoute *route;
+
+ if (s->direct_msi) {
+ msi.address_lo = (uint32_t)msg.address;
+ msi.address_hi = msg.address >> 32;
+ msi.data = msg.data;
+ msi.flags = 0;
+ memset(msi.pad, 0, sizeof(msi.pad));
+
+ return kvm_vm_ioctl(s, KVM_SIGNAL_MSI, &msi);
+ }
+
+ route = kvm_lookup_msi_route(s, msg);
+ if (!route) {
+ int virq;
+
+ virq = kvm_irqchip_get_virq(s);
+ if (virq < 0) {
+ return virq;
+ }
+
+ route = g_malloc(sizeof(KVMMSIRoute));
+ route->kroute.gsi = virq;
+ route->kroute.type = KVM_IRQ_ROUTING_MSI;
+ route->kroute.flags = 0;
+ route->kroute.u.msi.address_lo = (uint32_t)msg.address;
+ route->kroute.u.msi.address_hi = msg.address >> 32;
+ route->kroute.u.msi.data = msg.data;
+
+ kvm_add_routing_entry(s, &route->kroute);
+
+ QTAILQ_INSERT_TAIL(&s->msi_hashtab[kvm_hash_msi(msg.data)], route,
+ entry);
+ }
+
+ assert(route->kroute.type == KVM_IRQ_ROUTING_MSI);
+
+ return kvm_irqchip_set_irq(s, route->kroute.gsi, 1);
+}
+
+int kvm_irqchip_add_msi_route(KVMState *s, MSIMessage msg)
+{
+ struct kvm_irq_routing_entry kroute;
+ int virq;
+
+ if (!kvm_irqchip_in_kernel()) {
+ return -ENOSYS;
+ }
+
+ virq = kvm_irqchip_get_virq(s);
+ if (virq < 0) {
+ return virq;
+ }
+
+ kroute.gsi = virq;
+ kroute.type = KVM_IRQ_ROUTING_MSI;
+ kroute.flags = 0;
+ kroute.u.msi.address_lo = (uint32_t)msg.address;
+ kroute.u.msi.address_hi = msg.address >> 32;
+ kroute.u.msi.data = msg.data;
+
+ kvm_add_routing_entry(s, &kroute);
+
+ return virq;
+}
+
+static int kvm_irqchip_assign_irqfd(KVMState *s, int fd, int virq, bool assign)
+{
+ struct kvm_irqfd irqfd = {
+ .fd = fd,
+ .gsi = virq,
+ .flags = assign ? 0 : KVM_IRQFD_FLAG_DEASSIGN,
+ };
+
+ if (!kvm_irqchip_in_kernel()) {
+ return -ENOSYS;
+ }
+
+ return kvm_vm_ioctl(s, KVM_IRQFD, &irqfd);
}
#else /* !KVM_CAP_IRQ_ROUTING */
@@ -939,8 +1128,33 @@ int kvm_irqchip_commit_routes(KVMState *s)
static void kvm_init_irq_routing(KVMState *s)
{
}
+
+int kvm_irqchip_send_msi(KVMState *s, MSIMessage msg)
+{
+ abort();
+}
+
+int kvm_irqchip_add_msi_route(KVMState *s, MSIMessage msg)
+{
+ abort();
+}
+
+static int kvm_irqchip_assign_irqfd(KVMState *s, int fd, int virq, bool assign)
+{
+ abort();
+}
#endif /* !KVM_CAP_IRQ_ROUTING */
+int kvm_irqchip_add_irqfd(KVMState *s, int fd, int virq)
+{
+ return kvm_irqchip_assign_irqfd(s, fd, virq, true);
+}
+
+int kvm_irqchip_remove_irqfd(KVMState *s, int fd, int virq)
+{
+ return kvm_irqchip_assign_irqfd(s, fd, virq, false);
+}
+
static int kvm_irqchip_create(KVMState *s)
{
QemuOptsList *list = qemu_find_opts("machine");
@@ -948,7 +1162,7 @@ static int kvm_irqchip_create(KVMState *s)
if (QTAILQ_EMPTY(&list->head) ||
!qemu_opt_get_bool(QTAILQ_FIRST(&list->head),
- "kernel_irqchip", false) ||
+ "kernel_irqchip", true) ||
!kvm_check_extension(s, KVM_CAP_IRQCHIP)) {
return 0;
}
@@ -1072,6 +1286,8 @@ int kvm_init(void)
s->pit_state2 = kvm_check_extension(s, KVM_CAP_PIT_STATE2);
#endif
+ s->direct_msi = (kvm_check_extension(s, KVM_CAP_SIGNAL_MSI) > 0);
+
ret = kvm_arch_init(s);
if (ret < 0) {
goto err;
diff --git a/kvm-stub.c b/kvm-stub.c
index 47c573d..ec9a364 100644
--- a/kvm-stub.c
+++ b/kvm-stub.c
@@ -12,10 +12,14 @@
#include "qemu-common.h"
#include "hw/hw.h"
+#include "hw/msi.h"
#include "cpu.h"
#include "gdbstub.h"
#include "kvm.h"
+KVMState *kvm_state;
+bool kvm_kernel_irqchip;
+
int kvm_init_vcpu(CPUArchState *env)
{
return -ENOSYS;
@@ -128,3 +132,22 @@ int kvm_on_sigbus(int code, void *addr)
{
return 1;
}
+
+int kvm_irqchip_add_msi_route(KVMState *s, MSIMessage msg)
+{
+ return -ENOSYS;
+}
+
+void kvm_irqchip_release_virq(KVMState *s, int virq)
+{
+}
+
+int kvm_irqchip_add_irqfd(KVMState *s, int fd, int virq)
+{
+ return -ENOSYS;
+}
+
+int kvm_irqchip_remove_irqfd(KVMState *s, int fd, int virq)
+{
+ return -ENOSYS;
+}
diff --git a/kvm.h b/kvm.h
index 4ccae8c..9c7b0ea 100644
--- a/kvm.h
+++ b/kvm.h
@@ -44,6 +44,10 @@ typedef struct KVMCapabilityInfo {
#define KVM_CAP_INFO(CAP) { "KVM_CAP_" stringify(CAP), KVM_CAP_##CAP }
#define KVM_CAP_LAST_INFO { NULL, 0 }
+struct KVMState;
+typedef struct KVMState KVMState;
+extern KVMState *kvm_state;
+
/* external API */
int kvm_init(void);
@@ -88,10 +92,6 @@ int kvm_on_sigbus(int code, void *addr);
/* internal API */
-struct KVMState;
-typedef struct KVMState KVMState;
-extern KVMState *kvm_state;
-
int kvm_ioctl(KVMState *s, int type, ...);
int kvm_vm_ioctl(KVMState *s, int type, ...);
@@ -132,9 +132,9 @@ int kvm_arch_on_sigbus(int code, void *addr);
void kvm_arch_init_irq_routing(KVMState *s);
int kvm_irqchip_set_irq(KVMState *s, int irq, int level);
+int kvm_irqchip_send_msi(KVMState *s, MSIMessage msg);
-void kvm_irqchip_add_route(KVMState *s, int gsi, int irqchip, int pin);
-int kvm_irqchip_commit_routes(KVMState *s);
+void kvm_irqchip_add_irq_route(KVMState *s, int gsi, int irqchip, int pin);
void kvm_put_apic_state(DeviceState *d, struct kvm_lapic_state *kapic);
void kvm_get_apic_state(DeviceState *d, struct kvm_lapic_state *kapic);
@@ -212,4 +212,10 @@ int kvm_set_ioeventfd_mmio(int fd, uint32_t adr, uint32_t val, bool assign,
uint32_t size);
int kvm_set_ioeventfd_pio_word(int fd, uint16_t adr, uint16_t val, bool assign);
+
+int kvm_irqchip_add_msi_route(KVMState *s, MSIMessage msg);
+void kvm_irqchip_release_virq(KVMState *s, int virq);
+
+int kvm_irqchip_add_irqfd(KVMState *s, int fd, int virq);
+int kvm_irqchip_remove_irqfd(KVMState *s, int fd, int virq);
#endif
diff --git a/linux-headers/linux/kvm.h b/linux-headers/linux/kvm.h
index ee7bd9c..c4426ec 100644
--- a/linux-headers/linux/kvm.h
+++ b/linux-headers/linux/kvm.h
@@ -449,6 +449,30 @@ struct kvm_ppc_pvinfo {
__u8 pad[108];
};
+/* for KVM_PPC_GET_SMMU_INFO */
+#define KVM_PPC_PAGE_SIZES_MAX_SZ 8
+
+struct kvm_ppc_one_page_size {
+ __u32 page_shift; /* Page shift (or 0) */
+ __u32 pte_enc; /* Encoding in the HPTE (>>12) */
+};
+
+struct kvm_ppc_one_seg_page_size {
+ __u32 page_shift; /* Base page shift of segment (or 0) */
+ __u32 slb_enc; /* SLB encoding for BookS */
+ struct kvm_ppc_one_page_size enc[KVM_PPC_PAGE_SIZES_MAX_SZ];
+};
+
+#define KVM_PPC_PAGE_SIZES_REAL 0x00000001
+#define KVM_PPC_1T_SEGMENTS 0x00000002
+
+struct kvm_ppc_smmu_info {
+ __u64 flags;
+ __u32 slb_size;
+ __u32 pad;
+ struct kvm_ppc_one_seg_page_size sps[KVM_PPC_PAGE_SIZES_MAX_SZ];
+};
+
#define KVMIO 0xAE
/* machine type bits, to be used as argument to KVM_CREATE_VM */
@@ -590,6 +614,8 @@ struct kvm_ppc_pvinfo {
#define KVM_CAP_SYNC_REGS 74
#define KVM_CAP_PCI_2_3 75
#define KVM_CAP_KVMCLOCK_CTRL 76
+#define KVM_CAP_SIGNAL_MSI 77
+#define KVM_CAP_PPC_GET_SMMU_INFO 78
#ifdef KVM_CAP_IRQ_ROUTING
@@ -715,6 +741,14 @@ struct kvm_one_reg {
__u64 addr;
};
+struct kvm_msi {
+ __u32 address_lo;
+ __u32 address_hi;
+ __u32 data;
+ __u32 flags;
+ __u8 pad[16];
+};
+
/*
* ioctls for VM fds
*/
@@ -789,6 +823,10 @@ struct kvm_s390_ucas_mapping {
/* Available with KVM_CAP_PCI_2_3 */
#define KVM_ASSIGN_SET_INTX_MASK _IOW(KVMIO, 0xa4, \
struct kvm_assigned_pci_dev)
+/* Available with KVM_CAP_SIGNAL_MSI */
+#define KVM_SIGNAL_MSI _IOW(KVMIO, 0xa5, struct kvm_msi)
+/* Available with KVM_CAP_PPC_GET_SMMU_INFO */
+#define KVM_PPC_GET_SMMU_INFO _IOR(KVMIO, 0xa6, struct kvm_ppc_smmu_info)
/*
* ioctls for vcpu fds
diff --git a/qemu-common.h b/qemu-common.h
index cccfb42..91e0562 100644
--- a/qemu-common.h
+++ b/qemu-common.h
@@ -251,6 +251,7 @@ typedef struct PCIEAERLog PCIEAERLog;
typedef struct PCIEAERErr PCIEAERErr;
typedef struct PCIEPort PCIEPort;
typedef struct PCIESlot PCIESlot;
+typedef struct MSIMessage MSIMessage;
typedef struct SerialState SerialState;
typedef struct IRQState *qemu_irq;
typedef struct PCMCIACardState PCMCIACardState;
diff --git a/scripts/kvm/vmxcap b/scripts/kvm/vmxcap
index a74ce71..cbe6440 100755
--- a/scripts/kvm/vmxcap
+++ b/scripts/kvm/vmxcap
@@ -22,6 +22,7 @@ MSR_IA32_VMX_TRUE_PINBASED_CTLS = 0x48D
MSR_IA32_VMX_TRUE_PROCBASED_CTLS = 0x48E
MSR_IA32_VMX_TRUE_EXIT_CTLS = 0x48F
MSR_IA32_VMX_TRUE_ENTRY_CTLS = 0x490
+MSR_IA32_VMX_VMFUNC = 0x491
class msr(object):
def __init__(self):
@@ -147,6 +148,9 @@ controls = [
6: 'WBINVD exiting',
7: 'Unrestricted guest',
10: 'PAUSE-loop exiting',
+ 11: 'RDRAND exiting',
+ 12: 'Enable INVPCID',
+ 13: 'Enable VM functions',
},
cap_msr = MSR_IA32_VMX_PROCBASED_CTLS2,
),
@@ -193,6 +197,7 @@ controls = [
8: 'Wait-for-SIPI activity state',
(16,24): 'Number of CR3-target values',
(25,27): 'MSR-load/store count recommenation',
+ 28: 'IA32_SMM_MONITOR_CTL[2] can be set to 1',
(32,62): 'MSEG revision identifier',
},
msr = MSR_IA32_VMX_MISC_CTLS,
@@ -208,6 +213,7 @@ controls = [
16: '2MB EPT pages',
17: '1GB EPT pages',
20: 'INVEPT supported',
+ 21: 'EPT accessed and dirty flags',
25: 'Single-context INVEPT',
26: 'All-context INVEPT',
32: 'INVVPID supported',
@@ -218,6 +224,13 @@ controls = [
},
msr = MSR_IA32_VMX_EPT_VPID_CAP,
),
+ Misc(
+ name = 'VM Functions',
+ bits = {
+ 0: 'EPTP Switching',
+ },
+ msr = MSR_IA32_VMX_VMFUNC,
+ ),
]
for c in controls: