diff options
author | Anthony Liguori <aliguori@us.ibm.com> | 2012-06-03 07:56:23 +0800 |
---|---|---|
committer | Anthony Liguori <aliguori@us.ibm.com> | 2012-06-03 07:56:23 +0800 |
commit | 74f4d2279b12a970d499558e4c38421724777827 (patch) | |
tree | 150f8f711770974b31c5f0e086228d725f75ed7d /hw | |
parent | 2eb02f28269dc47b38aa3becbaff1cc6300210da (diff) | |
parent | 7d37d351dffee60fc7048bbfd8573421f15eb724 (diff) | |
download | qemu-74f4d2279b12a970d499558e4c38421724777827.zip qemu-74f4d2279b12a970d499558e4c38421724777827.tar.gz qemu-74f4d2279b12a970d499558e4c38421724777827.tar.bz2 |
Merge remote-tracking branch 'qemu-kvm/uq/master' into staging
* qemu-kvm/uq/master:
virtio/vhost: Add support for KVM in-kernel MSI injection
msix: Add msix_nr_vectors_allocated
kvm: Enable use of kvm_irqchip_in_kernel in hwlib code
kvm: Introduce kvm_irqchip_add/remove_irqfd
kvm: Make kvm_irqchip_commit_routes an internal service
kvm: Publicize kvm_irqchip_release_virq
kvm: Introduce kvm_irqchip_add_msi_route
kvm: Rename kvm_irqchip_add_route to kvm_irqchip_add_irq_route
msix: Introduce vector notifiers
msix: Invoke msix_handle_mask_update on msix_mask_all
msix: Factor out msix_get_message
kvm: update vmxcap for EPT A/D, INVPCID, RDRAND, VMFUNC
kvm: Enable in-kernel irqchip support by default
kvm: Add support for direct MSI injections
kvm: Update kernel headers
kvm: x86: Wire up MSI support for in-kernel irqchip
pc: Enable MSI support at APIC level
kvm: Introduce basic MSI support for in-kernel irqchips
Introduce MSIMessage structure
kvm: Refactor KVMState::max_gsi to gsi_count
Diffstat (limited to 'hw')
-rw-r--r-- | hw/apic.c | 3 | ||||
-rw-r--r-- | hw/kvm/apic.c | 34 | ||||
-rw-r--r-- | hw/msi.h | 5 | ||||
-rw-r--r-- | hw/msix.c | 121 | ||||
-rw-r--r-- | hw/msix.h | 6 | ||||
-rw-r--r-- | hw/pc.c | 9 | ||||
-rw-r--r-- | hw/pc_piix.c | 14 | ||||
-rw-r--r-- | hw/pci.h | 8 | ||||
-rw-r--r-- | hw/virtio-pci.c | 126 | ||||
-rw-r--r-- | hw/virtio-pci.h | 6 | ||||
-rw-r--r-- | hw/xen.h | 10 | ||||
-rw-r--r-- | hw/xen_apic.c | 5 |
12 files changed, 311 insertions, 36 deletions
@@ -19,6 +19,7 @@ #include "apic_internal.h" #include "apic.h" #include "ioapic.h" +#include "msi.h" #include "host-utils.h" #include "trace.h" #include "pc.h" @@ -862,6 +863,8 @@ static void apic_init(APICCommonState *s) s->timer = qemu_new_timer_ns(vm_clock, apic_timer, s); local_apics[s->idx] = s; + + msi_supported = true; } static void apic_class_init(ObjectClass *klass, void *data) diff --git a/hw/kvm/apic.c b/hw/kvm/apic.c index ffe7a52..8ba4079 100644 --- a/hw/kvm/apic.c +++ b/hw/kvm/apic.c @@ -10,6 +10,7 @@ * See the COPYING file in the top-level directory. */ #include "hw/apic_internal.h" +#include "hw/msi.h" #include "kvm.h" static inline void kvm_apic_set_reg(struct kvm_lapic_state *kapic, @@ -145,10 +146,39 @@ static void kvm_apic_external_nmi(APICCommonState *s) run_on_cpu(s->cpu_env, do_inject_external_nmi, s); } +static uint64_t kvm_apic_mem_read(void *opaque, target_phys_addr_t addr, + unsigned size) +{ + return ~(uint64_t)0; +} + +static void kvm_apic_mem_write(void *opaque, target_phys_addr_t addr, + uint64_t data, unsigned size) +{ + MSIMessage msg = { .address = addr, .data = data }; + int ret; + + ret = kvm_irqchip_send_msi(kvm_state, msg); + if (ret < 0) { + fprintf(stderr, "KVM: injection failed, MSI lost (%s)\n", + strerror(-ret)); + } +} + +static const MemoryRegionOps kvm_apic_io_ops = { + .read = kvm_apic_mem_read, + .write = kvm_apic_mem_write, + .endianness = DEVICE_NATIVE_ENDIAN, +}; + static void kvm_apic_init(APICCommonState *s) { - memory_region_init_reservation(&s->io_memory, "kvm-apic-msi", - MSI_SPACE_SIZE); + memory_region_init_io(&s->io_memory, &kvm_apic_io_ops, s, "kvm-apic-msi", + MSI_SPACE_SIZE); + + if (kvm_has_gsi_routing()) { + msi_supported = true; + } } static void kvm_apic_class_init(ObjectClass *klass, void *data) @@ -24,6 +24,11 @@ #include "qemu-common.h" #include "pci.h" +struct MSIMessage { + uint64_t address; + uint32_t data; +}; + extern bool msi_supported; bool msi_enabled(const PCIDevice *dev); @@ -35,6 +35,15 @@ #define MSIX_PAGE_PENDING (MSIX_PAGE_SIZE / 2) #define MSIX_MAX_ENTRIES 32 +static MSIMessage msix_get_message(PCIDevice *dev, unsigned vector) +{ + uint8_t *table_entry = dev->msix_table_page + vector * PCI_MSIX_ENTRY_SIZE; + MSIMessage msg; + + msg.address = pci_get_quad(table_entry + PCI_MSIX_ENTRY_LOWER_ADDR); + msg.data = pci_get_long(table_entry + PCI_MSIX_ENTRY_DATA); + return msg; +} /* Add MSI-X capability to the config space for the device. */ /* Given a bar and its size, add MSI-X table on top of it @@ -130,13 +139,34 @@ static bool msix_is_masked(PCIDevice *dev, int vector) return msix_vector_masked(dev, vector, dev->msix_function_masked); } +static void msix_fire_vector_notifier(PCIDevice *dev, + unsigned int vector, bool is_masked) +{ + MSIMessage msg; + int ret; + + if (!dev->msix_vector_use_notifier) { + return; + } + if (is_masked) { + dev->msix_vector_release_notifier(dev, vector); + } else { + msg = msix_get_message(dev, vector); + ret = dev->msix_vector_use_notifier(dev, vector, msg); + assert(ret >= 0); + } +} + static void msix_handle_mask_update(PCIDevice *dev, int vector, bool was_masked) { bool is_masked = msix_is_masked(dev, vector); + if (is_masked == was_masked) { return; } + msix_fire_vector_notifier(dev, vector, is_masked); + if (!is_masked && msix_is_pending(dev, vector)) { msix_clr_pending(dev, vector); msix_notify(dev, vector); @@ -222,10 +252,14 @@ static void msix_mmio_setup(PCIDevice *d, MemoryRegion *bar) static void msix_mask_all(struct PCIDevice *dev, unsigned nentries) { int vector; + for (vector = 0; vector < nentries; ++vector) { unsigned offset = vector * PCI_MSIX_ENTRY_SIZE + PCI_MSIX_ENTRY_VECTOR_CTRL; + bool was_masked = msix_is_masked(dev, vector); + dev->msix_table_page[offset] |= PCI_MSIX_ENTRY_CTRL_MASKBIT; + msix_handle_mask_update(dev, vector, was_masked); } } @@ -317,6 +351,7 @@ void msix_save(PCIDevice *dev, QEMUFile *f) void msix_load(PCIDevice *dev, QEMUFile *f) { unsigned n = dev->msix_entries_nr; + unsigned int vector; if (!(dev->cap_present & QEMU_PCI_CAP_MSIX)) { return; @@ -326,6 +361,10 @@ void msix_load(PCIDevice *dev, QEMUFile *f) qemu_get_buffer(f, dev->msix_table_page, n * PCI_MSIX_ENTRY_SIZE); qemu_get_buffer(f, dev->msix_table_page + MSIX_PAGE_PENDING, (n + 7) / 8); msix_update_function_masked(dev); + + for (vector = 0; vector < n; vector++) { + msix_handle_mask_update(dev, vector, true); + } } /* Does device support MSI-X? */ @@ -352,9 +391,7 @@ uint32_t msix_bar_size(PCIDevice *dev) /* Send an MSI-X message */ void msix_notify(PCIDevice *dev, unsigned vector) { - uint8_t *table_entry = dev->msix_table_page + vector * PCI_MSIX_ENTRY_SIZE; - uint64_t address; - uint32_t data; + MSIMessage msg; if (vector >= dev->msix_entries_nr || !dev->msix_entry_used[vector]) return; @@ -363,9 +400,9 @@ void msix_notify(PCIDevice *dev, unsigned vector) return; } - address = pci_get_quad(table_entry + PCI_MSIX_ENTRY_LOWER_ADDR); - data = pci_get_long(table_entry + PCI_MSIX_ENTRY_DATA); - stl_le_phys(address, data); + msg = msix_get_message(dev, vector); + + stl_le_phys(msg.address, msg.data); } void msix_reset(PCIDevice *dev) @@ -414,3 +451,75 @@ void msix_unuse_all_vectors(PCIDevice *dev) return; msix_free_irq_entries(dev); } + +unsigned int msix_nr_vectors_allocated(const PCIDevice *dev) +{ + return dev->msix_entries_nr; +} + +static int msix_set_notifier_for_vector(PCIDevice *dev, unsigned int vector) +{ + MSIMessage msg; + + if (msix_is_masked(dev, vector)) { + return 0; + } + msg = msix_get_message(dev, vector); + return dev->msix_vector_use_notifier(dev, vector, msg); +} + +static void msix_unset_notifier_for_vector(PCIDevice *dev, unsigned int vector) +{ + if (msix_is_masked(dev, vector)) { + return; + } + dev->msix_vector_release_notifier(dev, vector); +} + +int msix_set_vector_notifiers(PCIDevice *dev, + MSIVectorUseNotifier use_notifier, + MSIVectorReleaseNotifier release_notifier) +{ + int vector, ret; + + assert(use_notifier && release_notifier); + + dev->msix_vector_use_notifier = use_notifier; + dev->msix_vector_release_notifier = release_notifier; + + if ((dev->config[dev->msix_cap + MSIX_CONTROL_OFFSET] & + (MSIX_ENABLE_MASK | MSIX_MASKALL_MASK)) == MSIX_ENABLE_MASK) { + for (vector = 0; vector < dev->msix_entries_nr; vector++) { + ret = msix_set_notifier_for_vector(dev, vector); + if (ret < 0) { + goto undo; + } + } + } + return 0; + +undo: + while (--vector >= 0) { + msix_unset_notifier_for_vector(dev, vector); + } + dev->msix_vector_use_notifier = NULL; + dev->msix_vector_release_notifier = NULL; + return ret; +} + +void msix_unset_vector_notifiers(PCIDevice *dev) +{ + int vector; + + assert(dev->msix_vector_use_notifier && + dev->msix_vector_release_notifier); + + if ((dev->config[dev->msix_cap + MSIX_CONTROL_OFFSET] & + (MSIX_ENABLE_MASK | MSIX_MASKALL_MASK)) == MSIX_ENABLE_MASK) { + for (vector = 0; vector < dev->msix_entries_nr; vector++) { + msix_unset_notifier_for_vector(dev, vector); + } + } + dev->msix_vector_use_notifier = NULL; + dev->msix_vector_release_notifier = NULL; +} @@ -13,6 +13,8 @@ void msix_write_config(PCIDevice *pci_dev, uint32_t address, int msix_uninit(PCIDevice *d, MemoryRegion *bar); +unsigned int msix_nr_vectors_allocated(const PCIDevice *dev); + void msix_save(PCIDevice *dev, QEMUFile *f); void msix_load(PCIDevice *dev, QEMUFile *f); @@ -29,4 +31,8 @@ void msix_notify(PCIDevice *dev, unsigned vector); void msix_reset(PCIDevice *dev); +int msix_set_vector_notifiers(PCIDevice *dev, + MSIVectorUseNotifier use_notifier, + MSIVectorReleaseNotifier release_notifier); +void msix_unset_vector_notifiers(PCIDevice *dev); #endif @@ -912,15 +912,6 @@ static DeviceState *apic_init(void *env, uint8_t apic_id) apic_mapped = 1; } - /* KVM does not support MSI yet. */ - if (!kvm_irqchip_in_kernel()) { - msi_supported = true; - } - - if (xen_msi_support()) { - msi_supported = true; - } - return dev; } diff --git a/hw/pc_piix.c b/hw/pc_piix.c index a7aad4b..f49b0aa 100644 --- a/hw/pc_piix.c +++ b/hw/pc_piix.c @@ -56,31 +56,27 @@ static void kvm_piix3_setup_irq_routing(bool pci_enabled) { #ifdef CONFIG_KVM KVMState *s = kvm_state; - int ret, i; + int i; if (kvm_check_extension(s, KVM_CAP_IRQ_ROUTING)) { for (i = 0; i < 8; ++i) { if (i == 2) { continue; } - kvm_irqchip_add_route(s, i, KVM_IRQCHIP_PIC_MASTER, i); + kvm_irqchip_add_irq_route(s, i, KVM_IRQCHIP_PIC_MASTER, i); } for (i = 8; i < 16; ++i) { - kvm_irqchip_add_route(s, i, KVM_IRQCHIP_PIC_SLAVE, i - 8); + kvm_irqchip_add_irq_route(s, i, KVM_IRQCHIP_PIC_SLAVE, i - 8); } if (pci_enabled) { for (i = 0; i < 24; ++i) { if (i == 0) { - kvm_irqchip_add_route(s, i, KVM_IRQCHIP_IOAPIC, 2); + kvm_irqchip_add_irq_route(s, i, KVM_IRQCHIP_IOAPIC, 2); } else if (i != 2) { - kvm_irqchip_add_route(s, i, KVM_IRQCHIP_IOAPIC, i); + kvm_irqchip_add_irq_route(s, i, KVM_IRQCHIP_IOAPIC, i); } } } - ret = kvm_irqchip_commit_routes(s); - if (ret < 0) { - hw_error("KVM IRQ routing setup failed"); - } } #endif /* CONFIG_KVM */ } @@ -173,6 +173,10 @@ typedef struct PCIDeviceClass { const char *romfile; } PCIDeviceClass; +typedef int (*MSIVectorUseNotifier)(PCIDevice *dev, unsigned int vector, + MSIMessage msg); +typedef void (*MSIVectorReleaseNotifier)(PCIDevice *dev, unsigned int vector); + struct PCIDevice { DeviceState qdev; /* PCI config space */ @@ -243,6 +247,10 @@ struct PCIDevice { bool has_rom; MemoryRegion rom; uint32_t rom_bar; + + /* MSI-X notifiers */ + MSIVectorUseNotifier msix_vector_use_notifier; + MSIVectorReleaseNotifier msix_vector_release_notifier; }; void pci_register_bar(PCIDevice *pci_dev, int region_num, diff --git a/hw/virtio-pci.c b/hw/virtio-pci.c index 79b86f1..d08c159 100644 --- a/hw/virtio-pci.c +++ b/hw/virtio-pci.c @@ -24,6 +24,7 @@ #include "virtio-scsi.h" #include "pci.h" #include "qemu-error.h" +#include "msi.h" #include "msix.h" #include "net.h" #include "loader.h" @@ -539,6 +540,107 @@ static void virtio_pci_guest_notifier_read(void *opaque) } } +static int kvm_virtio_pci_vq_vector_use(VirtIOPCIProxy *proxy, + unsigned int queue_no, + unsigned int vector, + MSIMessage msg) +{ + VirtQueue *vq = virtio_get_queue(proxy->vdev, queue_no); + VirtIOIRQFD *irqfd = &proxy->vector_irqfd[vector]; + int fd, ret; + + fd = event_notifier_get_fd(virtio_queue_get_guest_notifier(vq)); + + if (irqfd->users == 0) { + ret = kvm_irqchip_add_msi_route(kvm_state, msg); + if (ret < 0) { + return ret; + } + irqfd->virq = ret; + } + irqfd->users++; + + ret = kvm_irqchip_add_irqfd(kvm_state, fd, irqfd->virq); + if (ret < 0) { + if (--irqfd->users == 0) { + kvm_irqchip_release_virq(kvm_state, irqfd->virq); + } + return ret; + } + + qemu_set_fd_handler(fd, NULL, NULL, NULL); + + return 0; +} + +static void kvm_virtio_pci_vq_vector_release(VirtIOPCIProxy *proxy, + unsigned int queue_no, + unsigned int vector) +{ + VirtQueue *vq = virtio_get_queue(proxy->vdev, queue_no); + VirtIOIRQFD *irqfd = &proxy->vector_irqfd[vector]; + int fd, ret; + + fd = event_notifier_get_fd(virtio_queue_get_guest_notifier(vq)); + + ret = kvm_irqchip_remove_irqfd(kvm_state, fd, irqfd->virq); + assert(ret == 0); + + if (--irqfd->users == 0) { + kvm_irqchip_release_virq(kvm_state, irqfd->virq); + } + + qemu_set_fd_handler(fd, virtio_pci_guest_notifier_read, NULL, vq); +} + +static int kvm_virtio_pci_vector_use(PCIDevice *dev, unsigned vector, + MSIMessage msg) +{ + VirtIOPCIProxy *proxy = container_of(dev, VirtIOPCIProxy, pci_dev); + VirtIODevice *vdev = proxy->vdev; + int ret, queue_no; + + for (queue_no = 0; queue_no < VIRTIO_PCI_QUEUE_MAX; queue_no++) { + if (!virtio_queue_get_num(vdev, queue_no)) { + break; + } + if (virtio_queue_vector(vdev, queue_no) != vector) { + continue; + } + ret = kvm_virtio_pci_vq_vector_use(proxy, queue_no, vector, msg); + if (ret < 0) { + goto undo; + } + } + return 0; + +undo: + while (--queue_no >= 0) { + if (virtio_queue_vector(vdev, queue_no) != vector) { + continue; + } + kvm_virtio_pci_vq_vector_release(proxy, queue_no, vector); + } + return ret; +} + +static void kvm_virtio_pci_vector_release(PCIDevice *dev, unsigned vector) +{ + VirtIOPCIProxy *proxy = container_of(dev, VirtIOPCIProxy, pci_dev); + VirtIODevice *vdev = proxy->vdev; + int queue_no; + + for (queue_no = 0; queue_no < VIRTIO_PCI_QUEUE_MAX; queue_no++) { + if (!virtio_queue_get_num(vdev, queue_no)) { + break; + } + if (virtio_queue_vector(vdev, queue_no) != vector) { + continue; + } + kvm_virtio_pci_vq_vector_release(proxy, queue_no, vector); + } +} + static int virtio_pci_set_guest_notifier(void *opaque, int n, bool assign) { VirtIOPCIProxy *proxy = opaque; @@ -555,6 +657,9 @@ static int virtio_pci_set_guest_notifier(void *opaque, int n, bool assign) } else { qemu_set_fd_handler(event_notifier_get_fd(notifier), NULL, NULL, NULL); + /* Test and clear notifier before closing it, + * in case poll callback didn't have time to run. */ + virtio_pci_guest_notifier_read(vq); event_notifier_cleanup(notifier); } @@ -573,6 +678,13 @@ static int virtio_pci_set_guest_notifiers(void *opaque, bool assign) VirtIODevice *vdev = proxy->vdev; int r, n; + /* Must unset vector notifier while guest notifier is still assigned */ + if (kvm_irqchip_in_kernel() && !assign) { + msix_unset_vector_notifiers(&proxy->pci_dev); + g_free(proxy->vector_irqfd); + proxy->vector_irqfd = NULL; + } + for (n = 0; n < VIRTIO_PCI_QUEUE_MAX; n++) { if (!virtio_queue_get_num(vdev, n)) { break; @@ -584,10 +696,24 @@ static int virtio_pci_set_guest_notifiers(void *opaque, bool assign) } } + /* Must set vector notifier after guest notifier has been assigned */ + if (kvm_irqchip_in_kernel() && assign) { + proxy->vector_irqfd = + g_malloc0(sizeof(*proxy->vector_irqfd) * + msix_nr_vectors_allocated(&proxy->pci_dev)); + r = msix_set_vector_notifiers(&proxy->pci_dev, + kvm_virtio_pci_vector_use, + kvm_virtio_pci_vector_release); + if (r < 0) { + goto assign_error; + } + } + return 0; assign_error: /* We get here on assignment failure. Recover by undoing for VQs 0 .. n. */ + assert(assign); while (--n >= 0) { virtio_pci_set_guest_notifier(opaque, n, !assign); } diff --git a/hw/virtio-pci.h b/hw/virtio-pci.h index 889e59e..91b791b 100644 --- a/hw/virtio-pci.h +++ b/hw/virtio-pci.h @@ -26,6 +26,11 @@ #define VIRTIO_PCI_FLAG_USE_IOEVENTFD (1 << VIRTIO_PCI_FLAG_USE_IOEVENTFD_BIT) typedef struct { + int virq; + unsigned int users; +} VirtIOIRQFD; + +typedef struct { PCIDevice pci_dev; VirtIODevice *vdev; MemoryRegion bar; @@ -44,6 +49,7 @@ typedef struct { VirtIOSCSIConf scsi; bool ioeventfd_disabled; bool ioeventfd_started; + VirtIOIRQFD *vector_irqfd; } VirtIOPCIProxy; void virtio_init_pci(VirtIOPCIProxy *proxy, VirtIODevice *vdev); @@ -57,14 +57,4 @@ void xen_register_framebuffer(struct MemoryRegion *mr); # define HVM_MAX_VCPUS 32 #endif -static inline int xen_msi_support(void) -{ -#if defined(CONFIG_XEN_CTRL_INTERFACE_VERSION) \ - && CONFIG_XEN_CTRL_INTERFACE_VERSION >= 420 - return xen_enabled(); -#else - return 0; -#endif -} - #endif /* QEMU_HW_XEN_H */ diff --git a/hw/xen_apic.c b/hw/xen_apic.c index 1725ff6..a9e101f 100644 --- a/hw/xen_apic.c +++ b/hw/xen_apic.c @@ -40,6 +40,11 @@ static void xen_apic_init(APICCommonState *s) { memory_region_init_io(&s->io_memory, &xen_apic_io_ops, s, "xen-apic-msi", MSI_SPACE_SIZE); + +#if defined(CONFIG_XEN_CTRL_INTERFACE_VERSION) \ + && CONFIG_XEN_CTRL_INTERFACE_VERSION >= 420 + msi_supported = true; +#endif } static void xen_apic_set_base(APICCommonState *s, uint64_t val) |