diff options
Diffstat (limited to 'hw')
332 files changed, 17158 insertions, 6688 deletions
diff --git a/hw/9pfs/9p.c b/hw/9pfs/9p.c index 8b001b9..acfa7db 100644 --- a/hw/9pfs/9p.c +++ b/hw/9pfs/9p.c @@ -201,8 +201,7 @@ void v9fs_path_free(V9fsPath *path) } -void G_GNUC_PRINTF(2, 3) -v9fs_path_sprintf(V9fsPath *path, const char *fmt, ...) +void v9fs_path_sprintf(V9fsPath *path, const char *fmt, ...) { va_list ap; diff --git a/hw/9pfs/9p.h b/hw/9pfs/9p.h index 259ad32..65cc45e 100644 --- a/hw/9pfs/9p.h +++ b/hw/9pfs/9p.h @@ -456,7 +456,8 @@ static inline uint8_t v9fs_request_cancelled(V9fsPDU *pdu) void coroutine_fn v9fs_reclaim_fd(V9fsPDU *pdu); void v9fs_path_init(V9fsPath *path); void v9fs_path_free(V9fsPath *path); -void v9fs_path_sprintf(V9fsPath *path, const char *fmt, ...); +void G_GNUC_PRINTF(2, 3) v9fs_path_sprintf(V9fsPath *path, const char *fmt, + ...); void v9fs_path_copy(V9fsPath *dst, const V9fsPath *src); size_t v9fs_readdir_response_size(V9fsString *name); int v9fs_name_to_path(V9fsState *s, V9fsPath *dirpath, @@ -42,6 +42,7 @@ source ufs/Kconfig source usb/Kconfig source virtio/Kconfig source vfio/Kconfig +source vfio-user/Kconfig source vmapple/Kconfig source xen/Kconfig source watchdog/Kconfig diff --git a/hw/acpi/acpi-pci-hotplug-stub.c b/hw/acpi/acpi-pci-hotplug-stub.c index b67b4a9..d58ea72 100644 --- a/hw/acpi/acpi-pci-hotplug-stub.c +++ b/hw/acpi/acpi-pci-hotplug-stub.c @@ -4,7 +4,7 @@ const VMStateDescription vmstate_acpi_pcihp_pci_status; -void acpi_pcihp_init(Object *owner, AcpiPciHpState *s, PCIBus *root_bus, +void acpi_pcihp_init(Object *owner, AcpiPciHpState *s, MemoryRegion *address_space_io, uint16_t io_base) { } @@ -34,7 +34,7 @@ void acpi_pcihp_reset(AcpiPciHpState *s) { } -bool acpi_pcihp_is_hotpluggbale_bus(AcpiPciHpState *s, BusState *bus) +bool acpi_pcihp_is_hotpluggable_bus(AcpiPciHpState *s, BusState *bus) { return true; } diff --git a/hw/acpi/aml-build.c b/hw/acpi/aml-build.c index f8f93a9..1e685f9 100644 --- a/hw/acpi/aml-build.c +++ b/hw/acpi/aml-build.c @@ -160,7 +160,7 @@ void crs_replace_with_free_ranges(GPtrArray *ranges, */ static void crs_range_merge(GPtrArray *range) { - GPtrArray *tmp = g_ptr_array_new_with_free_func(crs_range_free); + g_autoptr(GPtrArray) tmp = g_ptr_array_new_with_free_func(crs_range_free); CrsRangeEntry *entry; uint64_t range_base, range_limit; int i; @@ -191,7 +191,6 @@ static void crs_range_merge(GPtrArray *range) entry = g_ptr_array_index(tmp, i); crs_range_insert(range, entry->base, entry->limit); } - g_ptr_array_free(tmp, true); } static void @@ -2153,6 +2152,7 @@ void build_pptt(GArray *table_data, BIOSLinker *linker, MachineState *ms, int64_t socket_id = -1, cluster_id = -1, core_id = -1; uint32_t socket_offset = 0, cluster_offset = 0, core_offset = 0; uint32_t pptt_start = table_data->len; + uint32_t root_offset; int n; AcpiTable table = { .sig = "PPTT", .rev = 2, .oem_id = oem_id, .oem_table_id = oem_table_id }; @@ -2160,6 +2160,18 @@ void build_pptt(GArray *table_data, BIOSLinker *linker, MachineState *ms, acpi_table_begin(&table, table_data); /* + * Build a root node for all the processor nodes. Otherwise when + * building a multi-socket system each socket tree is separated + * and will be hard for the OS like Linux to know whether the + * system is homogeneous. + */ + root_offset = table_data->len - pptt_start; + build_processor_hierarchy_node(table_data, + (1 << 0) | /* Physical package */ + (1 << 4), /* Identical Implementation */ + 0, 0, NULL, 0); + + /* * This works with the assumption that cpus[n].props.*_id has been * sorted from top to down levels in mc->possible_cpu_arch_ids(). * Otherwise, the unexpected and duplicated containers will be @@ -2173,8 +2185,9 @@ void build_pptt(GArray *table_data, BIOSLinker *linker, MachineState *ms, core_id = -1; socket_offset = table_data->len - pptt_start; build_processor_hierarchy_node(table_data, - (1 << 0), /* Physical package */ - 0, socket_id, NULL, 0); + (1 << 0) | /* Physical package */ + (1 << 4), /* Identical Implementation */ + root_offset, socket_id, NULL, 0); } if (mc->smp_props.clusters_supported && mc->smp_props.has_clusters) { @@ -2184,7 +2197,8 @@ void build_pptt(GArray *table_data, BIOSLinker *linker, MachineState *ms, core_id = -1; cluster_offset = table_data->len - pptt_start; build_processor_hierarchy_node(table_data, - (0 << 0), /* Not a physical package */ + (0 << 0) | /* Not a physical package */ + (1 << 4), /* Identical Implementation */ socket_offset, cluster_id, NULL, 0); } } else { @@ -2202,7 +2216,8 @@ void build_pptt(GArray *table_data, BIOSLinker *linker, MachineState *ms, core_id = cpus->cpus[n].props.core_id; core_offset = table_data->len - pptt_start; build_processor_hierarchy_node(table_data, - (0 << 0), /* Not a physical package */ + (0 << 0) | /* Not a physical package */ + (1 << 4), /* Identical Implementation */ cluster_offset, core_id, NULL, 0); } diff --git a/hw/acpi/bios-linker-loader.c b/hw/acpi/bios-linker-loader.c index 1080618..c9ffe44 100644 --- a/hw/acpi/bios-linker-loader.c +++ b/hw/acpi/bios-linker-loader.c @@ -22,8 +22,6 @@ #include "hw/acpi/bios-linker-loader.h" #include "hw/nvram/fw_cfg.h" -#include "qemu/bswap.h" - /* * Linker/loader is a paravirtualized interface that passes commands to guest. * The commands can be used to request guest to diff --git a/hw/acpi/cxl.c b/hw/acpi/cxl.c index 9cd7905..75d5b30 100644 --- a/hw/acpi/cxl.c +++ b/hw/acpi/cxl.c @@ -22,6 +22,7 @@ #include "hw/pci/pci_bridge.h" #include "hw/pci/pci_host.h" #include "hw/cxl/cxl.h" +#include "hw/cxl/cxl_host.h" #include "hw/mem/memory-device.h" #include "hw/acpi/acpi.h" #include "hw/acpi/aml-build.h" @@ -135,55 +136,52 @@ static void cedt_build_chbs(GArray *table_data, PXBCXLDev *cxl) * Interleave ways encoding in CXL 2.0 ECN: 3, 6, 12 and 16-way memory * interleaving. */ -static void cedt_build_cfmws(GArray *table_data, CXLState *cxls) +static void cedt_build_cfmws(CXLFixedWindow *fw, Aml *cedt) { - GList *it; + GArray *table_data = cedt->buf; + int i; - for (it = cxls->fixed_windows; it; it = it->next) { - CXLFixedWindow *fw = it->data; - int i; - - /* Type */ - build_append_int_noprefix(table_data, 1, 1); + /* Type */ + build_append_int_noprefix(table_data, 1, 1); - /* Reserved */ - build_append_int_noprefix(table_data, 0, 1); + /* Reserved */ + build_append_int_noprefix(table_data, 0, 1); - /* Record Length */ - build_append_int_noprefix(table_data, 36 + 4 * fw->num_targets, 2); + /* Record Length */ + build_append_int_noprefix(table_data, 36 + 4 * fw->num_targets, 2); - /* Reserved */ - build_append_int_noprefix(table_data, 0, 4); + /* Reserved */ + build_append_int_noprefix(table_data, 0, 4); - /* Base HPA */ - build_append_int_noprefix(table_data, fw->mr.addr, 8); + /* Base HPA */ + build_append_int_noprefix(table_data, fw->mr.addr, 8); - /* Window Size */ - build_append_int_noprefix(table_data, fw->size, 8); + /* Window Size */ + build_append_int_noprefix(table_data, fw->size, 8); - /* Host Bridge Interleave Ways */ - build_append_int_noprefix(table_data, fw->enc_int_ways, 1); + /* Host Bridge Interleave Ways */ + build_append_int_noprefix(table_data, fw->enc_int_ways, 1); - /* Host Bridge Interleave Arithmetic */ - build_append_int_noprefix(table_data, 0, 1); + /* Host Bridge Interleave Arithmetic */ + build_append_int_noprefix(table_data, 0, 1); - /* Reserved */ - build_append_int_noprefix(table_data, 0, 2); + /* Reserved */ + build_append_int_noprefix(table_data, 0, 2); - /* Host Bridge Interleave Granularity */ - build_append_int_noprefix(table_data, fw->enc_int_gran, 4); + /* Host Bridge Interleave Granularity */ + build_append_int_noprefix(table_data, fw->enc_int_gran, 4); - /* Window Restrictions */ - build_append_int_noprefix(table_data, 0x0f, 2); /* No restrictions */ + /* Window Restrictions */ + build_append_int_noprefix(table_data, 0x0f, 2); - /* QTG ID */ - build_append_int_noprefix(table_data, 0, 2); + /* QTG ID */ + build_append_int_noprefix(table_data, 0, 2); - /* Host Bridge List (list of UIDs - currently bus_nr) */ - for (i = 0; i < fw->num_targets; i++) { - g_assert(fw->target_hbs[i]); - build_append_int_noprefix(table_data, PXB_DEV(fw->target_hbs[i])->bus_nr, 4); - } + /* Host Bridge List (list of UIDs - currently bus_nr) */ + for (i = 0; i < fw->num_targets; i++) { + g_assert(fw->target_hbs[i]); + build_append_int_noprefix(table_data, + PXB_DEV(fw->target_hbs[i])->bus_nr, 4); } } @@ -202,6 +200,7 @@ void cxl_build_cedt(GArray *table_offsets, GArray *table_data, BIOSLinker *linker, const char *oem_id, const char *oem_table_id, CXLState *cxl_state) { + GSList *cfmws_list, *iter; Aml *cedt; AcpiTable table = { .sig = "CEDT", .rev = 1, .oem_id = oem_id, .oem_table_id = oem_table_id }; @@ -213,7 +212,12 @@ void cxl_build_cedt(GArray *table_offsets, GArray *table_data, /* reserve space for CEDT header */ object_child_foreach_recursive(object_get_root(), cxl_foreach_pxb_hb, cedt); - cedt_build_cfmws(cedt->buf, cxl_state); + + cfmws_list = cxl_fmws_get_all_sorted(); + for (iter = cfmws_list; iter; iter = iter->next) { + cedt_build_cfmws(CXL_FMW(iter->data), cedt); + } + g_slist_free(cfmws_list); /* copy AML table into ACPI tables blob and patch header there */ g_array_append_vals(table_data, cedt->buf->data, cedt->buf->len); diff --git a/hw/acpi/generic_event_device.c b/hw/acpi/generic_event_device.c index 7a62f8d..95682b7 100644 --- a/hw/acpi/generic_event_device.c +++ b/hw/acpi/generic_event_device.c @@ -12,10 +12,13 @@ #include "qemu/osdep.h" #include "qapi/error.h" #include "hw/acpi/acpi.h" +#include "hw/acpi/pcihp.h" #include "hw/acpi/generic_event_device.h" +#include "hw/pci/pci.h" #include "hw/irq.h" #include "hw/mem/pc-dimm.h" #include "hw/mem/nvdimm.h" +#include "hw/pci/pci_device.h" #include "hw/qdev-properties.h" #include "migration/vmstate.h" #include "qemu/error-report.h" @@ -26,6 +29,7 @@ static const uint32_t ged_supported_events[] = { ACPI_GED_PWR_DOWN_EVT, ACPI_GED_NVDIMM_HOTPLUG_EVT, ACPI_GED_CPU_HOTPLUG_EVT, + ACPI_GED_PCI_HOTPLUG_EVT, }; /* @@ -121,6 +125,12 @@ void build_ged_aml(Aml *table, const char *name, HotplugHandler *hotplug_dev, aml_notify(aml_name("\\_SB.NVDR"), aml_int(0x80))); break; + case ACPI_GED_PCI_HOTPLUG_EVT: + aml_append(if_ctx, + aml_acquire(aml_name("\\_SB.PCI0.BLCK"), 0xFFFF)); + aml_append(if_ctx, aml_call0("\\_SB.PCI0.PCNT")); + aml_append(if_ctx, aml_release(aml_name("\\_SB.PCI0.BLCK"))); + break; default: /* * Please make sure all the events in ged_supported_events[] @@ -227,6 +237,14 @@ static const MemoryRegionOps ged_regs_ops = { }, }; +static void acpi_ged_device_pre_plug_cb(HotplugHandler *hotplug_dev, + DeviceState *dev, Error **errp) +{ + if (object_dynamic_cast(OBJECT(dev), TYPE_PCI_DEVICE)) { + acpi_pcihp_device_pre_plug_cb(hotplug_dev, dev, errp); + } +} + static void acpi_ged_device_plug_cb(HotplugHandler *hotplug_dev, DeviceState *dev, Error **errp) { @@ -240,6 +258,8 @@ static void acpi_ged_device_plug_cb(HotplugHandler *hotplug_dev, } } else if (object_dynamic_cast(OBJECT(dev), TYPE_CPU)) { acpi_cpu_plug_cb(hotplug_dev, &s->cpuhp_state, dev, errp); + } else if (object_dynamic_cast(OBJECT(dev), TYPE_PCI_DEVICE)) { + acpi_pcihp_device_plug_cb(hotplug_dev, &s->pcihp_state, dev, errp); } else { error_setg(errp, "virt: device plug request for unsupported device" " type: %s", object_get_typename(OBJECT(dev))); @@ -256,6 +276,9 @@ static void acpi_ged_unplug_request_cb(HotplugHandler *hotplug_dev, acpi_memory_unplug_request_cb(hotplug_dev, &s->memhp_state, dev, errp); } else if (object_dynamic_cast(OBJECT(dev), TYPE_CPU)) { acpi_cpu_unplug_request_cb(hotplug_dev, &s->cpuhp_state, dev, errp); + } else if (object_dynamic_cast(OBJECT(dev), TYPE_PCI_DEVICE)) { + acpi_pcihp_device_unplug_request_cb(hotplug_dev, &s->pcihp_state, + dev, errp); } else { error_setg(errp, "acpi: device unplug request for unsupported device" " type: %s", object_get_typename(OBJECT(dev))); @@ -271,6 +294,8 @@ static void acpi_ged_unplug_cb(HotplugHandler *hotplug_dev, acpi_memory_unplug_cb(&s->memhp_state, dev, errp); } else if (object_dynamic_cast(OBJECT(dev), TYPE_CPU)) { acpi_cpu_unplug_cb(&s->cpuhp_state, dev, errp); + } else if (object_dynamic_cast(OBJECT(dev), TYPE_PCI_DEVICE)) { + acpi_pcihp_device_unplug_cb(hotplug_dev, &s->pcihp_state, dev, errp); } else { error_setg(errp, "acpi: device unplug for unsupported device" " type: %s", object_get_typename(OBJECT(dev))); @@ -299,6 +324,8 @@ static void acpi_ged_send_event(AcpiDeviceIf *adev, AcpiEventStatusBits ev) sel = ACPI_GED_NVDIMM_HOTPLUG_EVT; } else if (ev & ACPI_CPU_HOTPLUG_STATUS) { sel = ACPI_GED_CPU_HOTPLUG_EVT; + } else if (ev & ACPI_PCI_HOTPLUG_STATUS) { + sel = ACPI_GED_PCI_HOTPLUG_EVT; } else { /* Unknown event. Return without generating interrupt. */ warn_report("GED: Unsupported event %d. No irq injected", ev); @@ -318,6 +345,10 @@ static void acpi_ged_send_event(AcpiDeviceIf *adev, AcpiEventStatusBits ev) static const Property acpi_ged_properties[] = { DEFINE_PROP_UINT32("ged-event", AcpiGedState, ged_event_bitmap, 0), + DEFINE_PROP_BOOL(ACPI_PM_PROP_ACPI_PCIHP_BRIDGE, AcpiGedState, + pcihp_state.use_acpi_hotplug_bridge, 0), + DEFINE_PROP_LINK("bus", AcpiGedState, pcihp_state.root, + TYPE_PCI_BUS, PCIBus *), }; static const VMStateDescription vmstate_memhp_state = { @@ -386,6 +417,25 @@ static const VMStateDescription vmstate_ghes_state = { } }; +static bool pcihp_needed(void *opaque) +{ + AcpiGedState *s = opaque; + return s->pcihp_state.use_acpi_hotplug_bridge; +} + +static const VMStateDescription vmstate_pcihp_state = { + .name = "acpi-ged/pcihp", + .version_id = 1, + .minimum_version_id = 1, + .needed = pcihp_needed, + .fields = (const VMStateField[]) { + VMSTATE_PCI_HOTPLUG(pcihp_state, + AcpiGedState, + NULL, NULL), + VMSTATE_END_OF_LIST() + } +}; + static const VMStateDescription vmstate_acpi_ged = { .name = "acpi-ged", .version_id = 1, @@ -398,6 +448,7 @@ static const VMStateDescription vmstate_acpi_ged = { &vmstate_memhp_state, &vmstate_cpuhp_state, &vmstate_ghes_state, + &vmstate_pcihp_state, NULL } }; @@ -406,9 +457,13 @@ static void acpi_ged_realize(DeviceState *dev, Error **errp) { SysBusDevice *sbd = SYS_BUS_DEVICE(dev); AcpiGedState *s = ACPI_GED(dev); + AcpiPciHpState *pcihp_state = &s->pcihp_state; uint32_t ged_events; int i; + if (pcihp_state->use_acpi_hotplug_bridge) { + s->ged_event_bitmap |= ACPI_GED_PCI_HOTPLUG_EVT; + } ged_events = ctpop32(s->ged_event_bitmap); for (i = 0; i < ARRAY_SIZE(ged_supported_events) && ged_events; i++) { @@ -428,6 +483,13 @@ static void acpi_ged_realize(DeviceState *dev, Error **errp) cpu_hotplug_hw_init(&s->container_cpuhp, OBJECT(dev), &s->cpuhp_state, 0); break; + case ACPI_GED_PCI_HOTPLUG_EVT: + memory_region_init(&s->container_pcihp, OBJECT(dev), + ACPI_PCIHP_REGION_NAME, ACPI_PCIHP_SIZE); + sysbus_init_mmio(sbd, &s->container_pcihp); + acpi_pcihp_init(OBJECT(s), &s->pcihp_state, + &s->container_pcihp, 0); + qbus_set_hotplug_handler(BUS(s->pcihp_state.root), OBJECT(dev)); } ged_events--; } @@ -469,20 +531,34 @@ static void acpi_ged_initfn(Object *obj) sysbus_init_mmio(sbd, &ged_st->regs); } +static void ged_reset_hold(Object *obj, ResetType type) +{ + AcpiGedState *s = ACPI_GED(obj); + + if (s->pcihp_state.use_acpi_hotplug_bridge) { + acpi_pcihp_reset(&s->pcihp_state); + } +} + static void acpi_ged_class_init(ObjectClass *class, const void *data) { DeviceClass *dc = DEVICE_CLASS(class); HotplugHandlerClass *hc = HOTPLUG_HANDLER_CLASS(class); AcpiDeviceIfClass *adevc = ACPI_DEVICE_IF_CLASS(class); + ResettableClass *rc = RESETTABLE_CLASS(class); + AcpiGedClass *gedc = ACPI_GED_CLASS(class); dc->desc = "ACPI Generic Event Device"; device_class_set_props(dc, acpi_ged_properties); dc->vmsd = &vmstate_acpi_ged; dc->realize = acpi_ged_realize; + hc->pre_plug = acpi_ged_device_pre_plug_cb; hc->plug = acpi_ged_device_plug_cb; hc->unplug_request = acpi_ged_unplug_request_cb; hc->unplug = acpi_ged_unplug_cb; + resettable_class_set_parent_phases(rc, NULL, ged_reset_hold, NULL, + &gedc->parent_phases); adevc->ospm_status = acpi_ged_ospm_status; adevc->send_event = acpi_ged_send_event; @@ -494,6 +570,7 @@ static const TypeInfo acpi_ged_info = { .instance_size = sizeof(AcpiGedState), .instance_init = acpi_ged_initfn, .class_init = acpi_ged_class_init, + .class_size = sizeof(AcpiGedClass), .interfaces = (const InterfaceInfo[]) { { TYPE_HOTPLUG_HANDLER }, { TYPE_ACPI_DEVICE_IF }, diff --git a/hw/acpi/ich9.c b/hw/acpi/ich9.c index c7a735b..2b3b493 100644 --- a/hw/acpi/ich9.c +++ b/hw/acpi/ich9.c @@ -41,15 +41,6 @@ #include "hw/mem/pc-dimm.h" #include "hw/mem/nvdimm.h" -//#define DEBUG - -#ifdef DEBUG -#define ICH9_DEBUG(fmt, ...) \ -do { printf("%s "fmt, __func__, ## __VA_ARGS__); } while (0) -#else -#define ICH9_DEBUG(fmt, ...) do { } while (0) -#endif - static void ich9_pm_update_sci_fn(ACPIREGS *regs) { ICH9LPCPMRegs *pm = container_of(regs, ICH9LPCPMRegs, acpi_regs); @@ -135,8 +126,6 @@ static const MemoryRegionOps ich9_smi_ops = { void ich9_pm_iospace_update(ICH9LPCPMRegs *pm, uint32_t pm_io_base) { - ICH9_DEBUG("to 0x%x\n", pm_io_base); - assert((pm_io_base & ICH9_PMIO_MASK) == 0); pm->pm_io_base = pm_io_base; @@ -333,9 +322,10 @@ void ich9_pm_init(PCIDevice *lpc_pci, ICH9LPCPMRegs *pm, qemu_irq sci_irq) } if (pm->acpi_pci_hotplug.use_acpi_hotplug_bridge) { + object_property_set_link(OBJECT(lpc_pci), "bus", + OBJECT(pci_get_bus(lpc_pci)), &error_abort); acpi_pcihp_init(OBJECT(lpc_pci), &pm->acpi_pci_hotplug, - pci_get_bus(lpc_pci), pci_address_space_io(lpc_pci), ACPI_PCIHP_ADDR_ICH9); @@ -439,6 +429,10 @@ void ich9_pm_add_properties(Object *obj, ICH9LPCPMRegs *pm) object_property_add_uint32_ptr(obj, ACPI_PM_PROP_PM_IO_BASE, &pm->pm_io_base, OBJ_PROP_FLAG_READ); + object_property_add_link(obj, "bus", TYPE_PCI_BUS, + (Object **)&pm->acpi_pci_hotplug.root, + object_property_allow_set_link, + OBJ_PROP_LINK_STRONG); object_property_add(obj, ACPI_PM_PROP_GPE0_BLK, "uint32", ich9_pm_get_gpe0_blk, NULL, NULL, pm); @@ -570,7 +564,7 @@ void ich9_pm_device_unplug_cb(HotplugHandler *hotplug_dev, DeviceState *dev, bool ich9_pm_is_hotpluggable_bus(HotplugHandler *hotplug_dev, BusState *bus) { ICH9LPCState *lpc = ICH9_LPC_DEVICE(hotplug_dev); - return acpi_pcihp_is_hotpluggbale_bus(&lpc->pm.acpi_pci_hotplug, bus); + return acpi_pcihp_is_hotpluggable_bus(&lpc->pm.acpi_pci_hotplug, bus); } void ich9_pm_ospm_status(AcpiDeviceIf *adev, ACPIOSTInfoList ***list) diff --git a/hw/acpi/nvdimm.c b/hw/acpi/nvdimm.c index 9ba9080..732d613 100644 --- a/hw/acpi/nvdimm.c +++ b/hw/acpi/nvdimm.c @@ -535,7 +535,7 @@ nvdimm_dsm_no_payload(uint32_t func_ret_status, hwaddr dsm_mem_addr) #define NVDIMM_QEMU_RSVD_HANDLE_ROOT 0x10000 -/* Read FIT data, defined in docs/specs/acpi_nvdimm.txt. */ +/* Read FIT data, defined in docs/specs/acpi_nvdimm.rst. */ static void nvdimm_dsm_func_read_fit(NVDIMMState *state, NvdimmDsmIn *in, hwaddr dsm_mem_addr) { diff --git a/hw/acpi/pci-bridge.c b/hw/acpi/pci-bridge.c index 7baa703..394a919 100644 --- a/hw/acpi/pci-bridge.c +++ b/hw/acpi/pci-bridge.c @@ -35,3 +35,57 @@ void build_pci_bridge_aml(AcpiDevAmlIf *adev, Aml *scope) } } } + +Aml *build_pci_bridge_edsm(void) +{ + Aml *method, *ifctx; + Aml *zero = aml_int(0); + Aml *func = aml_arg(2); + Aml *ret = aml_local(0); + Aml *aidx = aml_local(1); + Aml *params = aml_arg(4); + + method = aml_method("EDSM", 5, AML_SERIALIZED); + + /* get supported functions */ + ifctx = aml_if(aml_equal(func, zero)); + { + /* 1: have supported functions */ + /* 7: support for function 7 */ + const uint8_t caps = 1 | BIT(7); + build_append_pci_dsm_func0_common(ifctx, ret); + aml_append(ifctx, aml_store(aml_int(caps), aml_index(ret, zero))); + aml_append(ifctx, aml_return(ret)); + } + aml_append(method, ifctx); + + /* handle specific functions requests */ + /* + * PCI Firmware Specification 3.1 + * 4.6.7. _DSM for Naming a PCI or PCI Express Device Under + * Operating Systems + */ + ifctx = aml_if(aml_equal(func, aml_int(7))); + { + Aml *pkg = aml_package(2); + aml_append(pkg, zero); + /* optional, if not impl. should return null string */ + aml_append(pkg, aml_string("%s", "")); + aml_append(ifctx, aml_store(pkg, ret)); + + /* + * IASL is fine when initializing Package with computational data, + * however it makes guest unhappy /it fails to process such AML/. + * So use runtime assignment to set acpi-index after initializer + * to make OSPM happy. + */ + aml_append(ifctx, + aml_store(aml_derefof(aml_index(params, aml_int(0))), aidx)); + aml_append(ifctx, aml_store(aidx, aml_index(ret, zero))); + aml_append(ifctx, aml_return(ret)); + } + aml_append(method, ifctx); + + return method; +} + diff --git a/hw/acpi/pci.c b/hw/acpi/pci.c index d511a85..2228f12 100644 --- a/hw/acpi/pci.c +++ b/hw/acpi/pci.c @@ -301,3 +301,53 @@ void build_srat_generic_affinity_structures(GArray *table_data) object_child_foreach_recursive(object_get_root(), build_acpi_generic_port, table_data); } + +Aml *build_pci_host_bridge_osc_method(bool enable_native_pcie_hotplug) +{ + Aml *if_ctx; + Aml *if_ctx2; + Aml *else_ctx; + Aml *method; + Aml *a_cwd1 = aml_name("CDW1"); + Aml *a_ctrl = aml_local(0); + + method = aml_method("_OSC", 4, AML_NOTSERIALIZED); + aml_append(method, aml_create_dword_field(aml_arg(3), aml_int(0), "CDW1")); + + if_ctx = aml_if(aml_equal( + aml_arg(0), aml_touuid("33DB4D5B-1FF7-401C-9657-7441C03DD766"))); + aml_append(if_ctx, aml_create_dword_field(aml_arg(3), aml_int(4), "CDW2")); + aml_append(if_ctx, aml_create_dword_field(aml_arg(3), aml_int(8), "CDW3")); + + aml_append(if_ctx, aml_store(aml_name("CDW3"), a_ctrl)); + + /* + * Always allow native PME, AER (no dependencies) + * Allow SHPC (PCI bridges can have SHPC controller) + * Disable PCIe Native Hot-plug if ACPI PCI Hot-plug is enabled. + */ + aml_append(if_ctx, aml_and(a_ctrl, + aml_int(0x1E | (enable_native_pcie_hotplug ? 0x1 : 0x0)), a_ctrl)); + + if_ctx2 = aml_if(aml_lnot(aml_equal(aml_arg(1), aml_int(1)))); + /* Unknown revision */ + aml_append(if_ctx2, aml_or(a_cwd1, aml_int(0x08), a_cwd1)); + aml_append(if_ctx, if_ctx2); + + if_ctx2 = aml_if(aml_lnot(aml_equal(aml_name("CDW3"), a_ctrl))); + /* Capabilities bits were masked */ + aml_append(if_ctx2, aml_or(a_cwd1, aml_int(0x10), a_cwd1)); + aml_append(if_ctx, if_ctx2); + + /* Update DWORD3 in the buffer */ + aml_append(if_ctx, aml_store(a_ctrl, aml_name("CDW3"))); + aml_append(method, if_ctx); + + else_ctx = aml_else(); + /* Unrecognized UUID */ + aml_append(else_ctx, aml_or(a_cwd1, aml_int(4), a_cwd1)); + aml_append(method, else_ctx); + + aml_append(method, aml_return(aml_arg(3))); + return method; +} diff --git a/hw/acpi/pcihp.c b/hw/acpi/pcihp.c index 5f79c90..4922bbc 100644 --- a/hw/acpi/pcihp.c +++ b/hw/acpi/pcihp.c @@ -3,7 +3,7 @@ * * QEMU supports PCI hotplug via ACPI. This module * implements the interface between QEMU and the ACPI BIOS. - * Interface specification - see docs/specs/acpi_pci_hotplug.txt + * Interface specification - see docs/specs/acpi_pci_hotplug.rst * * Copyright (c) 2013, Red Hat Inc, Michael S. Tsirkin (mst@redhat.com) * Copyright (c) 2006 Fabrice Bellard @@ -26,7 +26,8 @@ #include "qemu/osdep.h" #include "hw/acpi/pcihp.h" - +#include "hw/acpi/aml-build.h" +#include "hw/acpi/acpi_aml_interface.h" #include "hw/pci-host/i440fx.h" #include "hw/pci/pci.h" #include "hw/pci/pci_bridge.h" @@ -39,9 +40,9 @@ #include "migration/vmstate.h" #include "qapi/error.h" #include "qom/qom-qobject.h" +#include "qobject/qnum.h" #include "trace.h" -#define ACPI_PCIHP_SIZE 0x0018 #define PCI_UP_BASE 0x0000 #define PCI_DOWN_BASE 0x0004 #define PCI_EJ_BASE 0x0008 @@ -97,10 +98,10 @@ static void *acpi_set_bsel(PCIBus *bus, void *opaque) return info; } -static void acpi_set_pci_info(bool has_bridge_hotplug) +static void acpi_set_pci_info(AcpiPciHpState *s) { static bool bsel_is_set; - Object *host = acpi_get_i386_pci_host(); + bool has_bridge_hotplug = s->use_acpi_hotplug_bridge; PCIBus *bus; BSELInfo info = { .bsel_alloc = ACPI_PCIHP_BSEL_DEFAULT, .has_bridge_hotplug = has_bridge_hotplug }; @@ -110,11 +111,8 @@ static void acpi_set_pci_info(bool has_bridge_hotplug) } bsel_is_set = true; - if (!host) { - return; - } - bus = PCI_HOST_BRIDGE(host)->bus; + bus = s->root; if (bus) { /* Scan all PCI buses. Set property to enable acpi based hotplug. */ pci_for_each_bus_depth_first(bus, acpi_set_bsel, NULL, &info); @@ -264,7 +262,7 @@ static void acpi_pcihp_update(AcpiPciHpState *s) void acpi_pcihp_reset(AcpiPciHpState *s) { - acpi_set_pci_info(s->use_acpi_hotplug_bridge); + acpi_set_pci_info(s); acpi_pcihp_update(s); } @@ -371,7 +369,7 @@ void acpi_pcihp_device_unplug_request_cb(HotplugHandler *hotplug_dev, acpi_send_event(DEVICE(hotplug_dev), ACPI_PCI_HOTPLUG_STATUS); } -bool acpi_pcihp_is_hotpluggbale_bus(AcpiPciHpState *s, BusState *bus) +bool acpi_pcihp_is_hotpluggable_bus(AcpiPciHpState *s, BusState *bus) { Object *o = OBJECT(bus->parent); @@ -495,13 +493,13 @@ static const MemoryRegionOps acpi_pcihp_io_ops = { }, }; -void acpi_pcihp_init(Object *owner, AcpiPciHpState *s, PCIBus *root_bus, +void acpi_pcihp_init(Object *owner, AcpiPciHpState *s, MemoryRegion *io, uint16_t io_base) { s->io_len = ACPI_PCIHP_SIZE; s->io_base = io_base; - s->root = root_bus; + assert(s->root); memory_region_init_io(&s->io, owner, &acpi_pcihp_io_ops, s, "acpi-pci-hotplug", s->io_len); @@ -513,6 +511,425 @@ void acpi_pcihp_init(Object *owner, AcpiPciHpState *s, PCIBus *root_bus, OBJ_PROP_FLAG_READ); } +void build_append_pci_dsm_func0_common(Aml *ctx, Aml *retvar) +{ + Aml *UUID, *ifctx1; + uint8_t byte_list[1] = { 0 }; /* nothing supported yet */ + + aml_append(ctx, aml_store(aml_buffer(1, byte_list), retvar)); + /* + * PCI Firmware Specification 3.1 + * 4.6. _DSM Definitions for PCI + */ + UUID = aml_touuid("E5C937D0-3553-4D7A-9117-EA4D19C3434D"); + ifctx1 = aml_if(aml_lnot(aml_equal(aml_arg(0), UUID))); + { + /* call is for unsupported UUID, bail out */ + aml_append(ifctx1, aml_return(retvar)); + } + aml_append(ctx, ifctx1); + + ifctx1 = aml_if(aml_lless(aml_arg(1), aml_int(2))); + { + /* call is for unsupported REV, bail out */ + aml_append(ifctx1, aml_return(retvar)); + } + aml_append(ctx, ifctx1); +} + +static Aml *aml_pci_pdsm(void) +{ + Aml *method, *ifctx, *ifctx1; + Aml *ret = aml_local(0); + Aml *caps = aml_local(1); + Aml *acpi_index = aml_local(2); + Aml *zero = aml_int(0); + Aml *one = aml_int(1); + Aml *not_supp = aml_int(0xFFFFFFFF); + Aml *func = aml_arg(2); + Aml *params = aml_arg(4); + Aml *bnum = aml_derefof(aml_index(params, aml_int(0))); + Aml *sunum = aml_derefof(aml_index(params, aml_int(1))); + + method = aml_method("PDSM", 5, AML_SERIALIZED); + + /* get supported functions */ + ifctx = aml_if(aml_equal(func, zero)); + { + build_append_pci_dsm_func0_common(ifctx, ret); + + aml_append(ifctx, aml_store(zero, caps)); + aml_append(ifctx, + aml_store(aml_call2("AIDX", bnum, sunum), acpi_index)); + /* + * advertise function 7 if device has acpi-index + * acpi_index values: + * 0: not present (default value) + * FFFFFFFF: not supported (old QEMU without PIDX reg) + * other: device's acpi-index + */ + ifctx1 = aml_if(aml_lnot( + aml_or(aml_equal(acpi_index, zero), + aml_equal(acpi_index, not_supp), NULL) + )); + { + /* have supported functions */ + aml_append(ifctx1, aml_or(caps, one, caps)); + /* support for function 7 */ + aml_append(ifctx1, + aml_or(caps, aml_shiftleft(one, aml_int(7)), caps)); + } + aml_append(ifctx, ifctx1); + + aml_append(ifctx, aml_store(caps, aml_index(ret, zero))); + aml_append(ifctx, aml_return(ret)); + } + aml_append(method, ifctx); + + /* handle specific functions requests */ + /* + * PCI Firmware Specification 3.1 + * 4.6.7. _DSM for Naming a PCI or PCI Express Device Under + * Operating Systems + */ + ifctx = aml_if(aml_equal(func, aml_int(7))); + { + Aml *pkg = aml_package(2); + + aml_append(ifctx, aml_store(aml_call2("AIDX", bnum, sunum), acpi_index)); + aml_append(ifctx, aml_store(pkg, ret)); + /* + * Windows calls func=7 without checking if it's available, + * as workaround Microsoft has suggested to return invalid for func7 + * Package, so return 2 elements package but only initialize elements + * when acpi_index is supported and leave them uninitialized, which + * leads elements to being Uninitialized ObjectType and should trip + * Windows into discarding result as an unexpected and prevent setting + * bogus 'PCI Label' on the device. + */ + ifctx1 = aml_if(aml_lnot(aml_lor( + aml_equal(acpi_index, zero), aml_equal(acpi_index, not_supp) + ))); + { + aml_append(ifctx1, aml_store(acpi_index, aml_index(ret, zero))); + /* + * optional, if not impl. should return null string + */ + aml_append(ifctx1, aml_store(aml_string("%s", ""), + aml_index(ret, one))); + } + aml_append(ifctx, ifctx1); + + aml_append(ifctx, aml_return(ret)); + } + + aml_append(method, ifctx); + return method; +} + +void build_acpi_pci_hotplug(Aml *table, AmlRegionSpace rs, uint64_t pcihp_addr) +{ + Aml *scope; + Aml *field; + Aml *method; + + scope = aml_scope("_SB.PCI0"); + + aml_append(scope, + aml_operation_region("PCST", rs, aml_int(pcihp_addr), 0x08)); + field = aml_field("PCST", AML_DWORD_ACC, AML_NOLOCK, AML_WRITE_AS_ZEROS); + aml_append(field, aml_named_field("PCIU", 32)); + aml_append(field, aml_named_field("PCID", 32)); + aml_append(scope, field); + + aml_append(scope, + aml_operation_region("SEJ", rs, + aml_int(pcihp_addr + ACPI_PCIHP_SEJ_BASE), 0x04)); + field = aml_field("SEJ", AML_DWORD_ACC, AML_NOLOCK, AML_WRITE_AS_ZEROS); + aml_append(field, aml_named_field("B0EJ", 32)); + aml_append(scope, field); + + aml_append(scope, + aml_operation_region("BNMR", rs, + aml_int(pcihp_addr + ACPI_PCIHP_BNMR_BASE), 0x08)); + field = aml_field("BNMR", AML_DWORD_ACC, AML_NOLOCK, AML_WRITE_AS_ZEROS); + aml_append(field, aml_named_field("BNUM", 32)); + aml_append(field, aml_named_field("PIDX", 32)); + aml_append(scope, field); + + aml_append(scope, aml_mutex("BLCK", 0)); + + method = aml_method("PCEJ", 2, AML_NOTSERIALIZED); + aml_append(method, aml_acquire(aml_name("BLCK"), 0xFFFF)); + aml_append(method, aml_store(aml_arg(0), aml_name("BNUM"))); + aml_append(method, + aml_store(aml_shiftleft(aml_int(1), aml_arg(1)), aml_name("B0EJ"))); + aml_append(method, aml_release(aml_name("BLCK"))); + aml_append(method, aml_return(aml_int(0))); + aml_append(scope, method); + + method = aml_method("AIDX", 2, AML_NOTSERIALIZED); + aml_append(method, aml_acquire(aml_name("BLCK"), 0xFFFF)); + aml_append(method, aml_store(aml_arg(0), aml_name("BNUM"))); + aml_append(method, + aml_store(aml_shiftleft(aml_int(1), aml_arg(1)), aml_name("PIDX"))); + aml_append(method, aml_store(aml_name("PIDX"), aml_local(0))); + aml_append(method, aml_release(aml_name("BLCK"))); + aml_append(method, aml_return(aml_local(0))); + aml_append(scope, method); + + aml_append(scope, aml_pci_pdsm()); + + aml_append(table, scope); +} + +/* Reserve PCIHP resources */ +void build_append_pcihp_resources(Aml *scope /* \\_SB.PCI0 */, + uint64_t io_addr, uint64_t io_len) +{ + Aml *dev, *crs; + + dev = aml_device("PHPR"); + aml_append(dev, aml_name_decl("_HID", aml_string("PNP0A06"))); + aml_append(dev, + aml_name_decl("_UID", aml_string("PCI Hotplug resources"))); + /* device present, functioning, decoding, not shown in UI */ + aml_append(dev, aml_name_decl("_STA", aml_int(0xB))); + crs = aml_resource_template(); + aml_append(crs, aml_io(AML_DECODE16, io_addr, io_addr, 1, io_len)); + aml_append(dev, aml_name_decl("_CRS", crs)); + aml_append(scope, dev); +} + +bool build_append_notification_callback(Aml *parent_scope, const PCIBus *bus) +{ + Aml *method; + PCIBus *sec; + QObject *bsel; + int nr_notifiers = 0; + GQueue *pcnt_bus_list = g_queue_new(); + + QLIST_FOREACH(sec, &bus->child, sibling) { + Aml *br_scope = aml_scope("S%.02X", sec->parent_dev->devfn); + if (pci_bus_is_root(sec)) { + continue; + } + nr_notifiers = nr_notifiers + + build_append_notification_callback(br_scope, sec); + /* + * add new child scope to parent + * and keep track of bus that have PCNT, + * bus list is used later to call children PCNTs from this level PCNT + */ + if (nr_notifiers) { + g_queue_push_tail(pcnt_bus_list, sec); + aml_append(parent_scope, br_scope); + } + } + + /* + * Append PCNT method to notify about events on local and child buses. + * ps: hostbridge might not have hotplug (bsel) enabled but might have + * child bridges that do have bsel. + */ + method = aml_method("PCNT", 0, AML_NOTSERIALIZED); + + /* If bus supports hotplug select it and notify about local events */ + bsel = object_property_get_qobject(OBJECT(bus), ACPI_PCIHP_PROP_BSEL, NULL); + if (bsel) { + uint64_t bsel_val = qnum_get_uint(qobject_to(QNum, bsel)); + + aml_append(method, aml_store(aml_int(bsel_val), aml_name("BNUM"))); + aml_append(method, aml_call2("DVNT", aml_name("PCIU"), + aml_int(1))); /* Device Check */ + aml_append(method, aml_call2("DVNT", aml_name("PCID"), + aml_int(3))); /* Eject Request */ + nr_notifiers++; + } + + /* Notify about child bus events in any case */ + while ((sec = g_queue_pop_head(pcnt_bus_list))) { + aml_append(method, aml_name("^S%.02X.PCNT", sec->parent_dev->devfn)); + } + + aml_append(parent_scope, method); + qobject_unref(bsel); + g_queue_free(pcnt_bus_list); + return !!nr_notifiers; +} + +static Aml *aml_pci_device_dsm(void) +{ + Aml *method; + + method = aml_method("_DSM", 4, AML_SERIALIZED); + { + Aml *params = aml_local(0); + Aml *pkg = aml_package(2); + aml_append(pkg, aml_int(0)); + aml_append(pkg, aml_int(0)); + aml_append(method, aml_store(pkg, params)); + aml_append(method, + aml_store(aml_name("BSEL"), aml_index(params, aml_int(0)))); + aml_append(method, + aml_store(aml_name("ASUN"), aml_index(params, aml_int(1)))); + aml_append(method, + aml_return(aml_call5("PDSM", aml_arg(0), aml_arg(1), + aml_arg(2), aml_arg(3), params)) + ); + } + return method; +} + +static Aml *aml_pci_static_endpoint_dsm(PCIDevice *pdev) +{ + Aml *method; + + g_assert(pdev->acpi_index != 0); + method = aml_method("_DSM", 4, AML_SERIALIZED); + { + Aml *params = aml_local(0); + Aml *pkg = aml_package(1); + aml_append(pkg, aml_int(pdev->acpi_index)); + aml_append(method, aml_store(pkg, params)); + aml_append(method, + aml_return(aml_call5("EDSM", aml_arg(0), aml_arg(1), + aml_arg(2), aml_arg(3), params)) + ); + } + return method; +} + +static void build_append_pcihp_notify_entry(Aml *method, int slot) +{ + Aml *if_ctx; + int32_t devfn = PCI_DEVFN(slot, 0); + + if_ctx = aml_if(aml_and(aml_arg(0), aml_int(0x1U << slot), NULL)); + aml_append(if_ctx, aml_notify(aml_name("S%.02X", devfn), aml_arg(1))); + aml_append(method, if_ctx); +} + +static bool is_devfn_ignored_generic(const int devfn, const PCIBus *bus) +{ + const PCIDevice *pdev = bus->devices[devfn]; + + if (PCI_FUNC(devfn)) { + if (IS_PCI_BRIDGE(pdev)) { + /* + * Ignore only hotplugged PCI bridges on !0 functions, but + * allow describing cold plugged bridges on all functions + */ + if (DEVICE(pdev)->hotplugged) { + return true; + } + } + } + return false; +} + +static bool is_devfn_ignored_hotplug(const int devfn, const PCIBus *bus) +{ + PCIDevice *pdev = bus->devices[devfn]; + if (pdev) { + return is_devfn_ignored_generic(devfn, bus) || + !DEVICE_GET_CLASS(pdev)->hotpluggable || + /* Cold plugged bridges aren't themselves hot-pluggable */ + (IS_PCI_BRIDGE(pdev) && !DEVICE(pdev)->hotplugged); + } else { /* non populated slots */ + /* + * hotplug is supported only for non-multifunction device + * so generate device description only for function 0 + */ + if (PCI_FUNC(devfn) || + (pci_bus_is_express(bus) && PCI_SLOT(devfn) > 0)) { + return true; + } + } + return false; +} + +void build_append_pcihp_slots(Aml *parent_scope, PCIBus *bus) +{ + int devfn; + Aml *dev, *notify_method = NULL, *method; + QObject *bsel = object_property_get_qobject(OBJECT(bus), + ACPI_PCIHP_PROP_BSEL, NULL); + uint64_t bsel_val = qnum_get_uint(qobject_to(QNum, bsel)); + qobject_unref(bsel); + + aml_append(parent_scope, aml_name_decl("BSEL", aml_int(bsel_val))); + notify_method = aml_method("DVNT", 2, AML_NOTSERIALIZED); + + for (devfn = 0; devfn < ARRAY_SIZE(bus->devices); devfn++) { + int slot = PCI_SLOT(devfn); + int adr = slot << 16 | PCI_FUNC(devfn); + + if (is_devfn_ignored_hotplug(devfn, bus)) { + continue; + } + + if (bus->devices[devfn]) { + dev = aml_scope("S%.02X", devfn); + } else { + dev = aml_device("S%.02X", devfn); + aml_append(dev, aml_name_decl("_ADR", aml_int(adr))); + } + + /* + * Can't declare _SUN here for every device as it changes 'slot' + * enumeration order in linux kernel, so use another variable for it + */ + aml_append(dev, aml_name_decl("ASUN", aml_int(slot))); + aml_append(dev, aml_pci_device_dsm()); + + aml_append(dev, aml_name_decl("_SUN", aml_int(slot))); + /* add _EJ0 to make slot hotpluggable */ + method = aml_method("_EJ0", 1, AML_NOTSERIALIZED); + aml_append(method, + aml_call2("PCEJ", aml_name("BSEL"), aml_name("_SUN")) + ); + aml_append(dev, method); + + build_append_pcihp_notify_entry(notify_method, slot); + + /* device descriptor has been composed, add it into parent context */ + aml_append(parent_scope, dev); + } + aml_append(parent_scope, notify_method); +} + +void build_append_pci_bus_devices(Aml *parent_scope, PCIBus *bus) +{ + int devfn; + Aml *dev; + + for (devfn = 0; devfn < ARRAY_SIZE(bus->devices); devfn++) { + /* ACPI spec: 1.0b: Table 6-2 _ADR Object Bus Types, PCI type */ + int adr = PCI_SLOT(devfn) << 16 | PCI_FUNC(devfn); + PCIDevice *pdev = bus->devices[devfn]; + + if (!pdev || is_devfn_ignored_generic(devfn, bus)) { + continue; + } + + /* start to compose PCI device descriptor */ + dev = aml_device("S%.02X", devfn); + aml_append(dev, aml_name_decl("_ADR", aml_int(adr))); + + call_dev_aml_func(DEVICE(bus->devices[devfn]), dev); + /* add _DSM if device has acpi-index set */ + if (pdev->acpi_index && + !object_property_get_bool(OBJECT(pdev), "hotpluggable", + &error_abort)) { + aml_append(dev, aml_pci_static_endpoint_dsm(pdev)); + } + + /* device descriptor has been composed, add it into parent context */ + aml_append(parent_scope, dev); + } +} + const VMStateDescription vmstate_acpi_pcihp_pci_status = { .name = "acpi_pcihp_pci_status", .version_id = 1, diff --git a/hw/acpi/piix4.c b/hw/acpi/piix4.c index b16d45f..7a18f18 100644 --- a/hw/acpi/piix4.c +++ b/hw/acpi/piix4.c @@ -406,7 +406,7 @@ static bool piix4_is_hotpluggable_bus(HotplugHandler *hotplug_dev, BusState *bus) { PIIX4PMState *s = PIIX4_PM(hotplug_dev); - return acpi_pcihp_is_hotpluggbale_bus(&s->acpi_pci_hotplug, bus); + return acpi_pcihp_is_hotpluggable_bus(&s->acpi_pci_hotplug, bus); } static void piix4_pm_machine_ready(Notifier *n, void *opaque) @@ -567,7 +567,8 @@ static void piix4_acpi_system_hot_add_init(MemoryRegion *parent, if (s->acpi_pci_hotplug.use_acpi_hotplug_bridge || s->acpi_pci_hotplug.use_acpi_root_pci_hotplug) { - acpi_pcihp_init(OBJECT(s), &s->acpi_pci_hotplug, bus, parent, + object_property_set_link(OBJECT(s), "bus", OBJECT(bus), &error_abort); + acpi_pcihp_init(OBJECT(s), &s->acpi_pci_hotplug, parent, ACPI_PCIHP_ADDR_PIIX4); qbus_set_hotplug_handler(BUS(pci_get_bus(PCI_DEVICE(s))), OBJECT(s)); } @@ -611,6 +612,8 @@ static const Property piix4_pm_properties[] = { acpi_pci_hotplug.use_acpi_hotplug_bridge, true), DEFINE_PROP_BOOL(ACPI_PM_PROP_ACPI_PCI_ROOTHP, PIIX4PMState, acpi_pci_hotplug.use_acpi_root_pci_hotplug, true), + DEFINE_PROP_LINK("bus", PIIX4PMState, acpi_pci_hotplug.root, + TYPE_PCI_BUS, PCIBus *), DEFINE_PROP_BOOL("memory-hotplug-support", PIIX4PMState, acpi_memory_hotplug.is_enabled, true), DEFINE_PROP_BOOL("smm-compat", PIIX4PMState, smm_compat, false), diff --git a/hw/acpi/vmgenid.c b/hw/acpi/vmgenid.c index fac3d6d..33c35c8 100644 --- a/hw/acpi/vmgenid.c +++ b/hw/acpi/vmgenid.c @@ -38,7 +38,7 @@ void vmgenid_build_acpi(VmGenIdState *vms, GArray *table_data, GArray *guid, guid_le = qemu_uuid_bswap(vms->guid); /* The GUID is written at a fixed offset into the fw_cfg file * in order to implement the "OVMF SDT Header probe suppressor" - * see docs/specs/vmgenid.txt for more details + * see docs/specs/vmgenid.rst for more details */ g_array_insert_vals(guid, VMGENID_GUID_OFFSET, guid_le.data, ARRAY_SIZE(guid_le.data)); @@ -101,7 +101,7 @@ void vmgenid_build_acpi(VmGenIdState *vms, GArray *table_data, GArray *guid, * < 4GB, but write 64 bits anyway. * The address that is patched in is offset in order to implement * the "OVMF SDT Header probe suppressor" - * see docs/specs/vmgenid.txt for more details. + * see docs/specs/vmgenid.rst for more details. */ bios_linker_loader_write_pointer(linker, VMGENID_ADDR_FW_CFG_FILE, 0, sizeof(uint64_t), @@ -153,7 +153,7 @@ static void vmgenid_update_guest(VmGenIdState *vms) guid_le = qemu_uuid_bswap(vms->guid); /* The GUID is written at a fixed offset into the fw_cfg file * in order to implement the "OVMF SDT Header probe suppressor" - * see docs/specs/vmgenid.txt for more details. + * see docs/specs/vmgenid.rst for more details. */ cpu_physical_memory_write(vmgenid_addr, guid_le.data, sizeof(guid_le.data)); diff --git a/hw/arm/Kconfig b/hw/arm/Kconfig index a55b44d..2aa4b5d 100644 --- a/hw/arm/Kconfig +++ b/hw/arm/Kconfig @@ -34,6 +34,8 @@ config ARM_VIRT select ACPI_HW_REDUCED select ACPI_APEI select ACPI_VIOT + select ACPI_PCIHP + select ACPI_PCI_BRIDGE select VIRTIO_MEM_SUPPORTED select ACPI_CXL select ACPI_HMAT @@ -95,6 +97,12 @@ config INTEGRATOR select PL181 # display select SMC91C111 +config MAX78000FTHR + bool + default y + depends on TCG && ARM + select MAX78000_SOC + config MPS3R bool default y @@ -147,7 +155,6 @@ config OMAP bool select FRAMEBUFFER select I2C - select NAND select PFLASH_CFI01 select SD select SERIAL_MM @@ -358,6 +365,15 @@ config ALLWINNER_R40 select USB_EHCI_SYSBUS select SD +config MAX78000_SOC + bool + select ARM_V7M + select MAX78000_ICC + select MAX78000_UART + select MAX78000_GCR + select MAX78000_TRNG + select MAX78000_AES + config RASPI bool default y @@ -533,6 +549,7 @@ config ASPEED_SOC select I2C select DPS310 select PCA9552 + select PCA9554 select SERIAL_MM select SMBUS_EEPROM select PCA954X diff --git a/hw/arm/allwinner-r40.c b/hw/arm/allwinner-r40.c index 0bf7008..c8eda39 100644 --- a/hw/arm/allwinner-r40.c +++ b/hw/arm/allwinner-r40.c @@ -20,7 +20,6 @@ #include "qemu/osdep.h" #include "qapi/error.h" #include "qemu/error-report.h" -#include "qemu/bswap.h" #include "qemu/module.h" #include "qemu/units.h" #include "hw/boards.h" diff --git a/hw/arm/aspeed.c b/hw/arm/aspeed.c index d0b3336..c31bbe7 100644 --- a/hw/arm/aspeed.c +++ b/hw/arm/aspeed.c @@ -19,6 +19,7 @@ #include "hw/i2c/i2c_mux_pca954x.h" #include "hw/i2c/smbus_eeprom.h" #include "hw/gpio/pca9552.h" +#include "hw/gpio/pca9554.h" #include "hw/nvram/eeprom_at24c.h" #include "hw/sensor/tmp105.h" #include "hw/misc/led.h" @@ -197,9 +198,12 @@ struct AspeedMachineState { #define FUJI_BMC_HW_STRAP2 0x00000000 /* Bletchley hardware value */ -/* TODO: Leave same as EVB for now. */ -#define BLETCHLEY_BMC_HW_STRAP1 AST2600_EVB_HW_STRAP1 -#define BLETCHLEY_BMC_HW_STRAP2 AST2600_EVB_HW_STRAP2 +#define BLETCHLEY_BMC_HW_STRAP1 0x00002000 +#define BLETCHLEY_BMC_HW_STRAP2 0x00000801 + +/* GB200NVL hardware value */ +#define GB200NVL_BMC_HW_STRAP1 AST2600_EVB_HW_STRAP1 +#define GB200NVL_BMC_HW_STRAP2 AST2600_EVB_HW_STRAP2 /* Qualcomm DC-SCM hardware value */ #define QCOM_DC_SCM_V1_BMC_HW_STRAP1 0x00000000 @@ -465,6 +469,8 @@ static void aspeed_machine_init(MachineState *machine) aspeed_board_init_flashes(&bmc->soc->spi[0], bmc->spi_model ? bmc->spi_model : amc->spi_model, 1, amc->num_cs); + aspeed_board_init_flashes(&bmc->soc->spi[1], + amc->spi2_model, 1, amc->num_cs2); } if (machine->kernel_filename && sc->num_cpus > 1) { @@ -645,6 +651,12 @@ static void create_pca9552(AspeedSoCState *soc, int bus_id, int addr) TYPE_PCA9552, addr); } +static I2CSlave *create_pca9554(AspeedSoCState *soc, int bus_id, int addr) +{ + return i2c_slave_create_simple(aspeed_i2c_get_bus(&soc->i2c, bus_id), + TYPE_PCA9554, addr); +} + static void sonorapass_bmc_i2c_init(AspeedMachineState *bmc) { AspeedSoCState *soc = bmc->soc; @@ -1003,6 +1015,180 @@ static void fuji_bmc_i2c_init(AspeedMachineState *bmc) } #define TYPE_TMP421 "tmp421" +#define TYPE_DS1338 "ds1338" + +/* Catalina hardware value */ +#define CATALINA_BMC_HW_STRAP1 0x00002002 +#define CATALINA_BMC_HW_STRAP2 0x00000800 + +#define CATALINA_BMC_RAM_SIZE ASPEED_RAM_SIZE(2 * GiB) + +static void catalina_bmc_i2c_init(AspeedMachineState *bmc) +{ + /* Reference from v6.16-rc2 aspeed-bmc-facebook-catalina.dts */ + + AspeedSoCState *soc = bmc->soc; + I2CBus *i2c[16] = {}; + I2CSlave *i2c_mux; + + /* busses 0-15 are all used. */ + for (int i = 0; i < ARRAY_SIZE(i2c); i++) { + i2c[i] = aspeed_i2c_get_bus(&soc->i2c, i); + } + + /* &i2c0 */ + /* i2c-mux@71 (PCA9546) on i2c0 */ + i2c_slave_create_simple(i2c[0], TYPE_PCA9546, 0x71); + + /* i2c-mux@72 (PCA9546) on i2c0 */ + i2c_mux = i2c_slave_create_simple(i2c[0], TYPE_PCA9546, 0x72); + + /* i2c0mux1ch1 */ + /* io_expander7 - pca9535@20 */ + i2c_slave_create_simple(pca954x_i2c_get_bus(i2c_mux, 1), + TYPE_PCA9552, 0x20); + /* eeprom@50 */ + at24c_eeprom_init(pca954x_i2c_get_bus(i2c_mux, 1), 0x50, 8 * KiB); + + /* i2c-mux@73 (PCA9546) on i2c0 */ + i2c_slave_create_simple(i2c[0], TYPE_PCA9546, 0x73); + + /* i2c-mux@75 (PCA9546) on i2c0 */ + i2c_slave_create_simple(i2c[0], TYPE_PCA9546, 0x75); + + /* i2c-mux@76 (PCA9546) on i2c0 */ + i2c_mux = i2c_slave_create_simple(i2c[0], TYPE_PCA9546, 0x76); + + /* i2c0mux4ch1 */ + /* io_expander8 - pca9535@21 */ + i2c_slave_create_simple(pca954x_i2c_get_bus(i2c_mux, 1), + TYPE_PCA9552, 0x21); + /* eeprom@50 */ + at24c_eeprom_init(pca954x_i2c_get_bus(i2c_mux, 1), 0x50, 8 * KiB); + + /* i2c-mux@77 (PCA9546) on i2c0 */ + i2c_slave_create_simple(i2c[0], TYPE_PCA9546, 0x77); + + + /* &i2c1 */ + /* i2c-mux@70 (PCA9548) on i2c1 */ + i2c_mux = i2c_slave_create_simple(i2c[1], TYPE_PCA9548, 0x70); + /* i2c1mux0ch0 */ + /* ina238@41 - no model */ + /* ina238@42 - no model */ + /* ina238@44 - no model */ + /* i2c1mux0ch1 */ + /* ina238@41 - no model */ + /* ina238@43 - no model */ + /* i2c1mux0ch4 */ + /* ltc4287@42 - no model */ + /* ltc4287@43 - no model */ + + /* i2c1mux0ch5 */ + /* eeprom@54 */ + at24c_eeprom_init(pca954x_i2c_get_bus(i2c_mux, 5), 0x54, 8 * KiB); + /* tpm75@4f */ + i2c_slave_create_simple(pca954x_i2c_get_bus(i2c_mux, 5), TYPE_TMP75, 0x4f); + + /* i2c1mux0ch6 */ + /* io_expander5 - pca9554@27 */ + i2c_slave_create_simple(pca954x_i2c_get_bus(i2c_mux, 6), + TYPE_PCA9554, 0x27); + /* io_expander6 - pca9555@25 */ + i2c_slave_create_simple(pca954x_i2c_get_bus(i2c_mux, 6), + TYPE_PCA9552, 0x25); + /* eeprom@51 */ + at24c_eeprom_init(pca954x_i2c_get_bus(i2c_mux, 6), 0x51, 8 * KiB); + + /* i2c1mux0ch7 */ + /* eeprom@53 */ + at24c_eeprom_init(pca954x_i2c_get_bus(i2c_mux, 7), 0x53, 8 * KiB); + /* temperature-sensor@4b - tmp75 */ + i2c_slave_create_simple(pca954x_i2c_get_bus(i2c_mux, 7), TYPE_TMP75, 0x4b); + + /* &i2c2 */ + /* io_expander0 - pca9555@20 */ + i2c_slave_create_simple(i2c[2], TYPE_PCA9552, 0x20); + /* io_expander0 - pca9555@21 */ + i2c_slave_create_simple(i2c[2], TYPE_PCA9552, 0x21); + /* io_expander0 - pca9555@27 */ + i2c_slave_create_simple(i2c[2], TYPE_PCA9552, 0x27); + /* eeprom@50 */ + at24c_eeprom_init(i2c[2], 0x50, 8 * KiB); + /* eeprom@51 */ + at24c_eeprom_init(i2c[2], 0x51, 8 * KiB); + + /* &i2c5 */ + /* i2c-mux@70 (PCA9548) on i2c5 */ + i2c_mux = i2c_slave_create_simple(i2c[5], TYPE_PCA9548, 0x70); + /* i2c5mux0ch6 */ + /* eeprom@52 */ + at24c_eeprom_init(pca954x_i2c_get_bus(i2c_mux, 6), 0x52, 8 * KiB); + /* i2c5mux0ch7 */ + /* ina230@40 - no model */ + /* ina230@41 - no model */ + /* ina230@44 - no model */ + /* ina230@45 - no model */ + + /* &i2c6 */ + /* io_expander3 - pca9555@21 */ + i2c_slave_create_simple(i2c[6], TYPE_PCA9552, 0x21); + /* rtc@6f - nct3018y */ + i2c_slave_create_simple(i2c[6], TYPE_DS1338, 0x6f); + + /* &i2c9 */ + /* io_expander4 - pca9555@4f */ + i2c_slave_create_simple(i2c[9], TYPE_PCA9552, 0x4f); + /* temperature-sensor@4b - tpm75 */ + i2c_slave_create_simple(i2c[9], TYPE_TMP75, 0x4b); + /* eeprom@50 */ + at24c_eeprom_init(i2c[9], 0x50, 8 * KiB); + /* eeprom@56 */ + at24c_eeprom_init(i2c[9], 0x56, 8 * KiB); + + /* &i2c10 */ + /* temperature-sensor@1f - tpm421 */ + i2c_slave_create_simple(i2c[10], TYPE_TMP421, 0x1f); + /* eeprom@50 */ + at24c_eeprom_init(i2c[10], 0x50, 8 * KiB); + + /* &i2c11 */ + /* ssif-bmc@10 - no model */ + + /* &i2c12 */ + /* eeprom@50 */ + at24c_eeprom_init(i2c[12], 0x50, 8 * KiB); + + /* &i2c13 */ + /* eeprom@50 */ + at24c_eeprom_init(i2c[13], 0x50, 8 * KiB); + /* eeprom@54 */ + at24c_eeprom_init(i2c[13], 0x54, 256); + /* eeprom@55 */ + at24c_eeprom_init(i2c[13], 0x55, 256); + /* eeprom@57 */ + at24c_eeprom_init(i2c[13], 0x57, 256); + + /* &i2c14 */ + /* io_expander9 - pca9555@10 */ + i2c_slave_create_simple(i2c[14], TYPE_PCA9552, 0x10); + /* io_expander10 - pca9555@11 */ + i2c_slave_create_simple(i2c[14], TYPE_PCA9552, 0x11); + /* io_expander11 - pca9555@12 */ + i2c_slave_create_simple(i2c[14], TYPE_PCA9552, 0x12); + /* io_expander12 - pca9555@13 */ + i2c_slave_create_simple(i2c[14], TYPE_PCA9552, 0x13); + /* io_expander13 - pca9555@14 */ + i2c_slave_create_simple(i2c[14], TYPE_PCA9552, 0x14); + /* io_expander14 - pca9555@15 */ + i2c_slave_create_simple(i2c[14], TYPE_PCA9552, 0x15); + + /* &i2c15 */ + /* temperature-sensor@1f - tmp421 */ + i2c_slave_create_simple(i2c[15], TYPE_TMP421, 0x1f); + /* eeprom@52 */ + at24c_eeprom_init(i2c[15], 0x52, 8 * KiB); +} static void bletchley_bmc_i2c_init(AspeedMachineState *bmc) { @@ -1050,6 +1236,45 @@ static void bletchley_bmc_i2c_init(AspeedMachineState *bmc) i2c_slave_create_simple(i2c[12], TYPE_PCA9552, 0x67); } + +static void gb200nvl_bmc_i2c_init(AspeedMachineState *bmc) +{ + AspeedSoCState *soc = bmc->soc; + I2CBus *i2c[15] = {}; + DeviceState *dev; + for (int i = 0; i < sizeof(i2c) / sizeof(i2c[0]); i++) { + if ((i == 11) || (i == 12) || (i == 13)) { + continue; + } + i2c[i] = aspeed_i2c_get_bus(&soc->i2c, i); + } + + /* Bus 5 Expander */ + create_pca9554(soc, 4, 0x21); + + /* Mux I2c Expanders */ + i2c_slave_create_simple(i2c[5], "pca9546", 0x71); + i2c_slave_create_simple(i2c[5], "pca9546", 0x72); + i2c_slave_create_simple(i2c[5], "pca9546", 0x73); + i2c_slave_create_simple(i2c[5], "pca9546", 0x75); + i2c_slave_create_simple(i2c[5], "pca9546", 0x76); + i2c_slave_create_simple(i2c[5], "pca9546", 0x77); + + /* Bus 10 */ + dev = DEVICE(create_pca9554(soc, 9, 0x20)); + + /* Set FPGA_READY */ + object_property_set_str(OBJECT(dev), "pin1", "high", &error_fatal); + + create_pca9554(soc, 9, 0x21); + at24c_eeprom_init(i2c[9], 0x50, 64 * KiB); + at24c_eeprom_init(i2c[9], 0x51, 64 * KiB); + + /* Bus 11 */ + at24c_eeprom_init_rom(i2c[10], 0x50, 256, gb200nvl_bmc_fruid, + gb200nvl_bmc_fruid_len); +} + static void fby35_i2c_init(AspeedMachineState *bmc) { AspeedSoCState *soc = bmc->soc; @@ -1585,6 +1810,52 @@ static void aspeed_machine_bletchley_class_init(ObjectClass *oc, aspeed_machine_class_init_cpus_defaults(mc); } +static void aspeed_machine_catalina_class_init(ObjectClass *oc, + const void *data) +{ + MachineClass *mc = MACHINE_CLASS(oc); + AspeedMachineClass *amc = ASPEED_MACHINE_CLASS(oc); + + mc->desc = "Facebook Catalina BMC (Cortex-A7)"; + amc->soc_name = "ast2600-a3"; + amc->hw_strap1 = CATALINA_BMC_HW_STRAP1; + amc->hw_strap2 = CATALINA_BMC_HW_STRAP2; + amc->fmc_model = "w25q01jvq"; + amc->spi_model = NULL; + amc->num_cs = 2; + amc->macs_mask = ASPEED_MAC2_ON; + amc->i2c_init = catalina_bmc_i2c_init; + mc->auto_create_sdcard = true; + mc->default_ram_size = CATALINA_BMC_RAM_SIZE; + aspeed_machine_class_init_cpus_defaults(mc); + aspeed_machine_ast2600_class_emmc_init(oc); +} + +#define GB200NVL_BMC_RAM_SIZE ASPEED_RAM_SIZE(1 * GiB) + +static void aspeed_machine_gb200nvl_class_init(ObjectClass *oc, + const void *data) +{ + MachineClass *mc = MACHINE_CLASS(oc); + AspeedMachineClass *amc = ASPEED_MACHINE_CLASS(oc); + + mc->desc = "Nvidia GB200NVL BMC (Cortex-A7)"; + amc->soc_name = "ast2600-a3"; + amc->hw_strap1 = GB200NVL_BMC_HW_STRAP1; + amc->hw_strap2 = GB200NVL_BMC_HW_STRAP2; + amc->fmc_model = "mx66u51235f"; + amc->spi_model = "mx66u51235f"; + amc->num_cs = 2; + + amc->spi2_model = "mx66u51235f"; + amc->num_cs2 = 1; + amc->macs_mask = ASPEED_MAC0_ON | ASPEED_MAC1_ON; + amc->i2c_init = gb200nvl_bmc_i2c_init; + mc->default_ram_size = GB200NVL_BMC_RAM_SIZE; + aspeed_machine_class_init_cpus_defaults(mc); + aspeed_machine_ast2600_class_emmc_init(oc); +} + static void fby35_reset(MachineState *state, ResetType type) { AspeedMachineState *bmc = ASPEED_MACHINE(state); @@ -1878,6 +2149,14 @@ static const TypeInfo aspeed_machine_types[] = { .parent = TYPE_ASPEED_MACHINE, .class_init = aspeed_machine_bletchley_class_init, }, { + .name = MACHINE_TYPE_NAME("gb200nvl-bmc"), + .parent = TYPE_ASPEED_MACHINE, + .class_init = aspeed_machine_gb200nvl_class_init, + }, { + .name = MACHINE_TYPE_NAME("catalina-bmc"), + .parent = TYPE_ASPEED_MACHINE, + .class_init = aspeed_machine_catalina_class_init, + }, { .name = MACHINE_TYPE_NAME("fby35-bmc"), .parent = MACHINE_TYPE_NAME("ast2600-evb"), .class_init = aspeed_machine_fby35_class_init, diff --git a/hw/arm/aspeed_ast27x0-fc.c b/hw/arm/aspeed_ast27x0-fc.c index 125a3ad..7087be4 100644 --- a/hw/arm/aspeed_ast27x0-fc.c +++ b/hw/arm/aspeed_ast27x0-fc.c @@ -48,7 +48,7 @@ struct Ast2700FCState { bool mmio_exec; }; -#define AST2700FC_BMC_RAM_SIZE (2 * GiB) +#define AST2700FC_BMC_RAM_SIZE (1 * GiB) #define AST2700FC_CM4_DRAM_SIZE (32 * MiB) #define AST2700FC_HW_STRAP1 0x000000C0 @@ -68,6 +68,7 @@ static void ast2700fc_ca35_init(MachineState *machine) memory_region_init(&s->ca35_memory, OBJECT(&s->ca35), "ca35-memory", UINT64_MAX); + memory_region_add_subregion(get_system_memory(), 0, &s->ca35_memory); if (!memory_region_init_ram(&s->ca35_dram, OBJECT(&s->ca35), "ca35-dram", AST2700FC_BMC_RAM_SIZE, &error_abort)) { @@ -86,6 +87,13 @@ static void ast2700fc_ca35_init(MachineState *machine) AST2700FC_BMC_RAM_SIZE, &error_abort)) { return; } + + for (int i = 0; i < sc->macs_num; i++) { + if (!qemu_configure_nic_device(DEVICE(&soc->ftgmac100[i]), + true, NULL)) { + break; + } + } if (!object_property_set_int(OBJECT(&s->ca35), "hw-strap1", AST2700FC_HW_STRAP1, &error_abort)) { return; diff --git a/hw/arm/aspeed_ast27x0.c b/hw/arm/aspeed_ast27x0.c index 1974a25..6aa3841 100644 --- a/hw/arm/aspeed_ast27x0.c +++ b/hw/arm/aspeed_ast27x0.c @@ -23,14 +23,14 @@ #include "qobject/qlist.h" #include "qemu/log.h" -#define AST2700_SOC_IO_SIZE 0x01000000 +#define AST2700_SOC_IO_SIZE 0x00FE0000 #define AST2700_SOC_IOMEM_SIZE 0x01000000 #define AST2700_SOC_DPMCU_SIZE 0x00040000 #define AST2700_SOC_LTPI_SIZE 0x01000000 static const hwaddr aspeed_soc_ast2700_memmap[] = { - [ASPEED_DEV_IOMEM] = 0x00000000, [ASPEED_DEV_VBOOTROM] = 0x00000000, + [ASPEED_DEV_IOMEM] = 0x00020000, [ASPEED_DEV_SRAM] = 0x10000000, [ASPEED_DEV_DPMCU] = 0x11000000, [ASPEED_DEV_IOMEM0] = 0x12000000, @@ -346,8 +346,9 @@ static void aspeed_ram_capacity_write(void *opaque, hwaddr addr, uint64_t data, * If writes the data to the address which is beyond the ram size, * it would write the data to the "address % ram_size". */ - result = address_space_write(&s->dram_as, addr % ram_size, - MEMTXATTRS_UNSPECIFIED, &data, 4); + address_space_stl_le(&s->dram_as, addr % ram_size, data, + MEMTXATTRS_UNSPECIFIED, &result); + if (result != MEMTX_OK) { qemu_log_mask(LOG_GUEST_ERROR, "%s: DRAM write failed, addr:0x%" HWADDR_PRIx @@ -360,9 +361,10 @@ static const MemoryRegionOps aspeed_ram_capacity_ops = { .read = aspeed_ram_capacity_read, .write = aspeed_ram_capacity_write, .endianness = DEVICE_LITTLE_ENDIAN, + .impl.min_access_size = 4, .valid = { - .min_access_size = 1, - .max_access_size = 8, + .min_access_size = 4, + .max_access_size = 4, }, }; diff --git a/hw/arm/aspeed_eeprom.c b/hw/arm/aspeed_eeprom.c index daa3d32..8bbbdec 100644 --- a/hw/arm/aspeed_eeprom.c +++ b/hw/arm/aspeed_eeprom.c @@ -162,6 +162,25 @@ const uint8_t rainier_bmc_fruid[] = { 0x31, 0x50, 0x46, 0x04, 0x00, 0x00, 0x00, 0x00, 0x00, }; +const uint8_t gb200nvl_bmc_fruid[] = { + 0x01, 0x00, 0x00, 0x01, 0x0b, 0x00, 0x00, 0xf3, 0x01, 0x0a, 0x19, 0x1f, + 0x0f, 0xe6, 0xc6, 0x4e, 0x56, 0x49, 0x44, 0x49, 0x41, 0xc5, 0x50, 0x33, + 0x38, 0x30, 0x39, 0xcd, 0x31, 0x35, 0x38, 0x33, 0x33, 0x32, 0x34, 0x38, + 0x30, 0x30, 0x31, 0x35, 0x30, 0xd2, 0x36, 0x39, 0x39, 0x2d, 0x31, 0x33, + 0x38, 0x30, 0x39, 0x2d, 0x30, 0x34, 0x30, 0x34, 0x2d, 0x36, 0x30, 0x30, + 0xc0, 0x01, 0x01, 0xd6, 0x4d, 0x41, 0x43, 0x3a, 0x20, 0x33, 0x43, 0x3a, + 0x36, 0x44, 0x3a, 0x36, 0x36, 0x3a, 0x31, 0x34, 0x3a, 0x43, 0x38, 0x3a, + 0x37, 0x41, 0xc1, 0x3b, 0x01, 0x09, 0x19, 0xc6, 0x4e, 0x56, 0x49, 0x44, + 0x49, 0x41, 0xc9, 0x50, 0x33, 0x38, 0x30, 0x39, 0x2d, 0x42, 0x4d, 0x43, + 0xd2, 0x36, 0x39, 0x39, 0x2d, 0x31, 0x33, 0x38, 0x30, 0x39, 0x2d, 0x30, + 0x34, 0x30, 0x34, 0x2d, 0x36, 0x30, 0x30, 0xc4, 0x41, 0x45, 0x2e, 0x31, + 0xcd, 0x31, 0x35, 0x38, 0x33, 0x33, 0x32, 0x34, 0x38, 0x30, 0x30, 0x31, + 0x35, 0x30, 0xc0, 0xc4, 0x76, 0x30, 0x2e, 0x31, 0xc1, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0xb4, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, + 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff + +}; + const size_t tiogapass_bmc_fruid_len = sizeof(tiogapass_bmc_fruid); const size_t fby35_nic_fruid_len = sizeof(fby35_nic_fruid); const size_t fby35_bb_fruid_len = sizeof(fby35_bb_fruid); @@ -169,3 +188,5 @@ const size_t fby35_bmc_fruid_len = sizeof(fby35_bmc_fruid); const size_t yosemitev2_bmc_fruid_len = sizeof(yosemitev2_bmc_fruid); const size_t rainier_bb_fruid_len = sizeof(rainier_bb_fruid); const size_t rainier_bmc_fruid_len = sizeof(rainier_bmc_fruid); +const size_t gb200nvl_bmc_fruid_len = sizeof(gb200nvl_bmc_fruid); + diff --git a/hw/arm/aspeed_eeprom.h b/hw/arm/aspeed_eeprom.h index f08c16e..3ed9bc1 100644 --- a/hw/arm/aspeed_eeprom.h +++ b/hw/arm/aspeed_eeprom.h @@ -26,4 +26,7 @@ extern const size_t rainier_bb_fruid_len; extern const uint8_t rainier_bmc_fruid[]; extern const size_t rainier_bmc_fruid_len; +extern const uint8_t gb200nvl_bmc_fruid[]; +extern const size_t gb200nvl_bmc_fruid_len; + #endif diff --git a/hw/arm/boot.c b/hw/arm/boot.c index f94b940..d391cd0 100644 --- a/hw/arm/boot.c +++ b/hw/arm/boot.c @@ -15,10 +15,12 @@ #include "hw/arm/boot.h" #include "hw/arm/linux-boot-if.h" #include "cpu.h" +#include "exec/tswap.h" #include "exec/target_page.h" #include "system/kvm.h" #include "system/tcg.h" #include "system/system.h" +#include "system/memory.h" #include "system/numa.h" #include "hw/boards.h" #include "system/reset.h" @@ -28,6 +30,7 @@ #include "qemu/config-file.h" #include "qemu/option.h" #include "qemu/units.h" +#include "qemu/bswap.h" /* Kernel boot protocol is specified in the kernel docs * Documentation/arm/Booting and Documentation/arm64/booting.txt @@ -526,7 +529,7 @@ int arm_load_dtb(hwaddr addr, const struct arm_boot_info *binfo, if (binfo->dtb_filename) { char *filename; - filename = qemu_find_file(QEMU_FILE_TYPE_BIOS, binfo->dtb_filename); + filename = qemu_find_file(QEMU_FILE_TYPE_DTB, binfo->dtb_filename); if (!filename) { fprintf(stderr, "Couldn't open dtb file %s\n", binfo->dtb_filename); goto fail; @@ -743,7 +746,7 @@ static void do_cpu_reset(void *opaque) } else { if (arm_feature(env, ARM_FEATURE_EL3) && (info->secure_boot || - (info->secure_board_setup && cs == first_cpu))) { + (info->secure_board_setup && cpu == info->primary_cpu))) { /* Start this CPU in Secure SVC */ target_el = 3; } @@ -751,7 +754,7 @@ static void do_cpu_reset(void *opaque) arm_emulate_firmware_reset(cs, target_el); - if (cs == first_cpu) { + if (cpu == info->primary_cpu) { AddressSpace *as = arm_boot_address_space(cpu, info); cpu_set_pc(cs, info->loader_start); @@ -1238,6 +1241,9 @@ void arm_load_kernel(ARMCPU *cpu, MachineState *ms, struct arm_boot_info *info) info->dtb_filename = ms->dtb; info->dtb_limit = 0; + /* We assume the CPU passed as argument is the primary CPU. */ + info->primary_cpu = cpu; + /* Load the kernel. */ if (!info->kernel_filename || info->firmware_loaded) { arm_setup_firmware_boot(cpu, info); @@ -1287,12 +1293,8 @@ void arm_load_kernel(ARMCPU *cpu, MachineState *ms, struct arm_boot_info *info) object_property_set_int(cpuobj, "psci-conduit", info->psci_conduit, &error_abort); - /* - * Secondary CPUs start in PSCI powered-down state. Like the - * code in do_cpu_reset(), we assume first_cpu is the primary - * CPU. - */ - if (cs != first_cpu) { + /* Secondary CPUs start in PSCI powered-down state. */ + if (ARM_CPU(cs) != info->primary_cpu) { object_property_set_bool(cpuobj, "start-powered-off", true, &error_abort); } diff --git a/hw/arm/fby35.c b/hw/arm/fby35.c index e123fa6..c14fc2e 100644 --- a/hw/arm/fby35.c +++ b/hw/arm/fby35.c @@ -77,6 +77,7 @@ static void fby35_bmc_init(Fby35State *s) memory_region_init(&s->bmc_memory, OBJECT(&s->bmc), "bmc-memory", UINT64_MAX); + memory_region_add_subregion(get_system_memory(), 0, &s->bmc_memory); memory_region_init_ram(&s->bmc_dram, OBJECT(&s->bmc), "bmc-dram", FBY35_BMC_RAM_SIZE, &error_abort); diff --git a/hw/arm/fsl-imx8mp.c b/hw/arm/fsl-imx8mp.c index 23e662c..866f4d1 100644 --- a/hw/arm/fsl-imx8mp.c +++ b/hw/arm/fsl-imx8mp.c @@ -356,6 +356,10 @@ static void fsl_imx8mp_realize(DeviceState *dev, Error **errp) qdev_get_gpio_in(cpudev, ARM_CPU_IRQ)); sysbus_connect_irq(gicsbd, i + ms->smp.cpus, qdev_get_gpio_in(cpudev, ARM_CPU_FIQ)); + sysbus_connect_irq(gicsbd, i + 2 * ms->smp.cpus, + qdev_get_gpio_in(cpudev, ARM_CPU_VIRQ)); + sysbus_connect_irq(gicsbd, i + 3 * ms->smp.cpus, + qdev_get_gpio_in(cpudev, ARM_CPU_VFIQ)); } } diff --git a/hw/arm/highbank.c b/hw/arm/highbank.c index 3ae26eb..165c0b7 100644 --- a/hw/arm/highbank.c +++ b/hw/arm/highbank.c @@ -357,6 +357,7 @@ static void highbank_class_init(ObjectClass *oc, const void *data) mc->max_cpus = 4; mc->ignore_memory_transaction_failures = true; mc->default_ram_id = "highbank.dram"; + mc->deprecation_reason = "no known users left for this machine"; } static const TypeInfo highbank_type = { @@ -381,6 +382,7 @@ static void midway_class_init(ObjectClass *oc, const void *data) mc->max_cpus = 4; mc->ignore_memory_transaction_failures = true; mc->default_ram_id = "highbank.dram"; + mc->deprecation_reason = "no known users left for this machine"; } static const TypeInfo midway_type = { diff --git a/hw/arm/max78000_soc.c b/hw/arm/max78000_soc.c new file mode 100644 index 0000000..7f1856f --- /dev/null +++ b/hw/arm/max78000_soc.c @@ -0,0 +1,232 @@ +/* + * MAX78000 SOC + * + * Copyright (c) 2025 Jackson Donaldson <jcksn@duck.com> + * + * SPDX-License-Identifier: GPL-2.0-or-later + * + * Implementation based on stm32f205 and Max78000 user guide at + * https://www.analog.com/media/en/technical-documentation/user-guides/max78000-user-guide.pdf + */ + +#include "qemu/osdep.h" +#include "qapi/error.h" +#include "system/address-spaces.h" +#include "system/system.h" +#include "hw/arm/max78000_soc.h" +#include "hw/qdev-clock.h" +#include "hw/misc/unimp.h" + +static const uint32_t max78000_icc_addr[] = {0x4002a000, 0x4002a800}; +static const uint32_t max78000_uart_addr[] = {0x40042000, 0x40043000, + 0x40044000}; + +static const int max78000_uart_irq[] = {14, 15, 34}; + +static void max78000_soc_initfn(Object *obj) +{ + MAX78000State *s = MAX78000_SOC(obj); + int i; + + object_initialize_child(obj, "armv7m", &s->armv7m, TYPE_ARMV7M); + + object_initialize_child(obj, "gcr", &s->gcr, TYPE_MAX78000_GCR); + + for (i = 0; i < MAX78000_NUM_ICC; i++) { + g_autofree char *name = g_strdup_printf("icc%d", i); + object_initialize_child(obj, name, &s->icc[i], TYPE_MAX78000_ICC); + } + + for (i = 0; i < MAX78000_NUM_UART; i++) { + g_autofree char *name = g_strdup_printf("uart%d", i); + object_initialize_child(obj, name, &s->uart[i], + TYPE_MAX78000_UART); + } + + object_initialize_child(obj, "trng", &s->trng, TYPE_MAX78000_TRNG); + + object_initialize_child(obj, "aes", &s->aes, TYPE_MAX78000_AES); + + s->sysclk = qdev_init_clock_in(DEVICE(s), "sysclk", NULL, NULL, 0); +} + +static void max78000_soc_realize(DeviceState *dev_soc, Error **errp) +{ + MAX78000State *s = MAX78000_SOC(dev_soc); + MemoryRegion *system_memory = get_system_memory(); + DeviceState *dev, *gcrdev, *armv7m; + SysBusDevice *busdev; + Error *err = NULL; + int i; + + if (!clock_has_source(s->sysclk)) { + error_setg(errp, "sysclk clock must be wired up by the board code"); + return; + } + + memory_region_init_rom(&s->flash, OBJECT(dev_soc), "MAX78000.flash", + FLASH_SIZE, &err); + if (err != NULL) { + error_propagate(errp, err); + return; + } + + memory_region_add_subregion(system_memory, FLASH_BASE_ADDRESS, &s->flash); + + memory_region_init_ram(&s->sram, NULL, "MAX78000.sram", SRAM_SIZE, + &err); + + gcrdev = DEVICE(&s->gcr); + object_property_set_link(OBJECT(gcrdev), "sram", OBJECT(&s->sram), + &err); + + if (err != NULL) { + error_propagate(errp, err); + return; + } + memory_region_add_subregion(system_memory, SRAM_BASE_ADDRESS, &s->sram); + + armv7m = DEVICE(&s->armv7m); + + /* + * The MAX78000 user guide's Interrupt Vector Table section + * suggests that there are 120 IRQs in the text, while only listing + * 104 in table 5-1. Implement the more generous of the two. + * This has not been tested in hardware. + */ + qdev_prop_set_uint32(armv7m, "num-irq", 120); + qdev_prop_set_uint8(armv7m, "num-prio-bits", 3); + qdev_prop_set_string(armv7m, "cpu-type", ARM_CPU_TYPE_NAME("cortex-m4")); + qdev_prop_set_bit(armv7m, "enable-bitband", true); + qdev_connect_clock_in(armv7m, "cpuclk", s->sysclk); + object_property_set_link(OBJECT(&s->armv7m), "memory", + OBJECT(system_memory), &error_abort); + if (!sysbus_realize(SYS_BUS_DEVICE(&s->armv7m), errp)) { + return; + } + + for (i = 0; i < MAX78000_NUM_ICC; i++) { + dev = DEVICE(&(s->icc[i])); + sysbus_realize(SYS_BUS_DEVICE(dev), errp); + sysbus_mmio_map(SYS_BUS_DEVICE(dev), 0, max78000_icc_addr[i]); + } + + for (i = 0; i < MAX78000_NUM_UART; i++) { + g_autofree char *link = g_strdup_printf("uart%d", i); + dev = DEVICE(&(s->uart[i])); + qdev_prop_set_chr(dev, "chardev", serial_hd(i)); + if (!sysbus_realize(SYS_BUS_DEVICE(&s->uart[i]), errp)) { + return; + } + + object_property_set_link(OBJECT(gcrdev), link, OBJECT(dev), + &err); + + busdev = SYS_BUS_DEVICE(dev); + sysbus_mmio_map(busdev, 0, max78000_uart_addr[i]); + sysbus_connect_irq(busdev, 0, qdev_get_gpio_in(armv7m, + max78000_uart_irq[i])); + } + + dev = DEVICE(&s->trng); + sysbus_realize(SYS_BUS_DEVICE(dev), errp); + sysbus_mmio_map(SYS_BUS_DEVICE(dev), 0, 0x4004d000); + sysbus_connect_irq(SYS_BUS_DEVICE(dev), 0, qdev_get_gpio_in(armv7m, 4)); + + object_property_set_link(OBJECT(gcrdev), "trng", OBJECT(dev), &err); + + dev = DEVICE(&s->aes); + sysbus_realize(SYS_BUS_DEVICE(dev), errp); + sysbus_mmio_map(SYS_BUS_DEVICE(dev), 0, 0x40007400); + sysbus_connect_irq(SYS_BUS_DEVICE(dev), 0, qdev_get_gpio_in(armv7m, 5)); + + object_property_set_link(OBJECT(gcrdev), "aes", OBJECT(dev), &err); + + dev = DEVICE(&s->gcr); + sysbus_realize(SYS_BUS_DEVICE(dev), errp); + sysbus_mmio_map(SYS_BUS_DEVICE(dev), 0, 0x40000000); + + create_unimplemented_device("systemInterface", 0x40000400, 0x400); + create_unimplemented_device("functionControl", 0x40000800, 0x400); + create_unimplemented_device("watchdogTimer0", 0x40003000, 0x400); + create_unimplemented_device("dynamicVoltScale", 0x40003c00, 0x40); + create_unimplemented_device("SIMO", 0x40004400, 0x400); + create_unimplemented_device("trimSystemInit", 0x40005400, 0x400); + create_unimplemented_device("generalCtrlFunc", 0x40005800, 0x400); + create_unimplemented_device("wakeupTimer", 0x40006400, 0x400); + create_unimplemented_device("powerSequencer", 0x40006800, 0x400); + create_unimplemented_device("miscControl", 0x40006c00, 0x400); + + create_unimplemented_device("gpio0", 0x40008000, 0x1000); + create_unimplemented_device("gpio1", 0x40009000, 0x1000); + + create_unimplemented_device("parallelCamInterface", 0x4000e000, 0x1000); + create_unimplemented_device("CRC", 0x4000f000, 0x1000); + + create_unimplemented_device("timer0", 0x40010000, 0x1000); + create_unimplemented_device("timer1", 0x40011000, 0x1000); + create_unimplemented_device("timer2", 0x40012000, 0x1000); + create_unimplemented_device("timer3", 0x40013000, 0x1000); + + create_unimplemented_device("i2c0", 0x4001d000, 0x1000); + create_unimplemented_device("i2c1", 0x4001e000, 0x1000); + create_unimplemented_device("i2c2", 0x4001f000, 0x1000); + + create_unimplemented_device("standardDMA", 0x40028000, 0x1000); + create_unimplemented_device("flashController0", 0x40029000, 0x400); + + create_unimplemented_device("adc", 0x40034000, 0x1000); + create_unimplemented_device("pulseTrainEngine", 0x4003c000, 0xa0); + create_unimplemented_device("oneWireMaster", 0x4003d000, 0x1000); + create_unimplemented_device("semaphore", 0x4003e000, 0x1000); + + create_unimplemented_device("spi1", 0x40046000, 0x2000); + create_unimplemented_device("i2s", 0x40060000, 0x1000); + create_unimplemented_device("lowPowerControl", 0x40080000, 0x400); + create_unimplemented_device("gpio2", 0x40080400, 0x200); + create_unimplemented_device("lowPowerWatchdogTimer", 0x40080800, 0x400); + create_unimplemented_device("lowPowerTimer4", 0x40080c00, 0x400); + + create_unimplemented_device("lowPowerTimer5", 0x40081000, 0x400); + create_unimplemented_device("lowPowerUART0", 0x40081400, 0x400); + create_unimplemented_device("lowPowerComparator", 0x40088000, 0x400); + + create_unimplemented_device("spi0", 0x400be000, 0x400); + + /* + * The MAX78000 user guide's base address map lists the CNN TX FIFO as + * beginning at 0x400c0400 and ending at 0x400c0400. Given that CNN_FIFO + * is listed as having data accessible up to offset 0x1000, the user + * guide is likely incorrect. + */ + create_unimplemented_device("cnnTxFIFO", 0x400c0400, 0x2000); + + create_unimplemented_device("cnnGlobalControl", 0x50000000, 0x10000); + create_unimplemented_device("cnnx16quad0", 0x50100000, 0x40000); + create_unimplemented_device("cnnx16quad1", 0x50500000, 0x40000); + create_unimplemented_device("cnnx16quad2", 0x50900000, 0x40000); + create_unimplemented_device("cnnx16quad3", 0x50d00000, 0x40000); + +} + +static void max78000_soc_class_init(ObjectClass *klass, const void *data) +{ + DeviceClass *dc = DEVICE_CLASS(klass); + + dc->realize = max78000_soc_realize; +} + +static const TypeInfo max78000_soc_info = { + .name = TYPE_MAX78000_SOC, + .parent = TYPE_SYS_BUS_DEVICE, + .instance_size = sizeof(MAX78000State), + .instance_init = max78000_soc_initfn, + .class_init = max78000_soc_class_init, +}; + +static void max78000_soc_types(void) +{ + type_register_static(&max78000_soc_info); +} + +type_init(max78000_soc_types) diff --git a/hw/arm/max78000fthr.c b/hw/arm/max78000fthr.c new file mode 100644 index 0000000..c4f6b5b --- /dev/null +++ b/hw/arm/max78000fthr.c @@ -0,0 +1,50 @@ +/* + * MAX78000FTHR Evaluation Board + * + * Copyright (c) 2025 Jackson Donaldson <jcksn@duck.com> + * + * SPDX-License-Identifier: GPL-2.0-or-later + */ + +#include "qemu/osdep.h" +#include "qapi/error.h" +#include "hw/boards.h" +#include "hw/qdev-properties.h" +#include "hw/qdev-clock.h" +#include "qemu/error-report.h" +#include "hw/arm/max78000_soc.h" +#include "hw/arm/boot.h" + +/* 60MHz is the default, but other clocks can be selected. */ +#define SYSCLK_FRQ 60000000ULL +static void max78000_init(MachineState *machine) +{ + DeviceState *dev; + Clock *sysclk; + + sysclk = clock_new(OBJECT(machine), "SYSCLK"); + clock_set_hz(sysclk, SYSCLK_FRQ); + + dev = qdev_new(TYPE_MAX78000_SOC); + object_property_add_child(OBJECT(machine), "soc", OBJECT(dev)); + qdev_connect_clock_in(dev, "sysclk", sysclk); + sysbus_realize_and_unref(SYS_BUS_DEVICE(dev), &error_fatal); + + armv7m_load_kernel(ARM_CPU(first_cpu), + machine->kernel_filename, + 0x00000000, FLASH_SIZE); +} + +static void max78000_machine_init(MachineClass *mc) +{ + static const char * const valid_cpu_types[] = { + ARM_CPU_TYPE_NAME("cortex-m4"), + NULL + }; + + mc->desc = "MAX78000FTHR Board (Cortex-M4 / (Unimplemented) RISC-V)"; + mc->init = max78000_init; + mc->valid_cpu_types = valid_cpu_types; +} + +DEFINE_MACHINE("max78000fthr", max78000_machine_init) diff --git a/hw/arm/meson.build b/hw/arm/meson.build index 5098795..dc683913 100644 --- a/hw/arm/meson.build +++ b/hw/arm/meson.build @@ -8,7 +8,7 @@ arm_common_ss.add(when: 'CONFIG_HIGHBANK', if_true: files('highbank.c')) arm_common_ss.add(when: 'CONFIG_INTEGRATOR', if_true: files('integratorcp.c')) arm_common_ss.add(when: 'CONFIG_MICROBIT', if_true: files('microbit.c')) arm_common_ss.add(when: 'CONFIG_MPS3R', if_true: files('mps3r.c')) -arm_common_ss.add(when: 'CONFIG_MUSICPAL', if_true: [pixman, files('musicpal.c')]) +arm_common_ss.add(when: 'CONFIG_MUSICPAL', if_true: [files('musicpal.c')]) arm_common_ss.add(when: 'CONFIG_NETDUINOPLUS2', if_true: files('netduinoplus2.c')) arm_common_ss.add(when: 'CONFIG_OLIMEX_STM32_H405', if_true: files('olimex-stm32-h405.c')) arm_common_ss.add(when: 'CONFIG_NPCM7XX', if_true: files('npcm7xx.c', 'npcm7xx_boards.c')) @@ -27,6 +27,7 @@ arm_common_ss.add(when: 'CONFIG_OMAP', if_true: files('omap1.c')) arm_common_ss.add(when: 'CONFIG_ALLWINNER_A10', if_true: files('allwinner-a10.c', 'cubieboard.c')) arm_common_ss.add(when: 'CONFIG_ALLWINNER_H3', if_true: files('allwinner-h3.c', 'orangepi.c')) arm_common_ss.add(when: 'CONFIG_ALLWINNER_R40', if_true: files('allwinner-r40.c', 'bananapi_m2u.c')) +arm_common_ss.add(when: 'CONFIG_MAX78000_SOC', if_true: files('max78000_soc.c')) arm_ss.add(when: 'CONFIG_RASPI', if_true: files('bcm2836.c', 'raspi.c')) arm_common_ss.add(when: ['CONFIG_RASPI', 'TARGET_AARCH64'], if_true: files('bcm2838.c', 'raspi4b.c')) arm_common_ss.add(when: 'CONFIG_STM32F100_SOC', if_true: files('stm32f100_soc.c')) @@ -71,6 +72,7 @@ arm_ss.add(when: 'CONFIG_XEN', if_true: files( arm_common_ss.add(when: 'CONFIG_ARM_SMMUV3', if_true: files('smmu-common.c')) arm_common_ss.add(when: 'CONFIG_COLLIE', if_true: files('collie.c')) arm_common_ss.add(when: 'CONFIG_EXYNOS4', if_true: files('exynos4_boards.c')) +arm_common_ss.add(when: 'CONFIG_MAX78000FTHR', if_true: files('max78000fthr.c')) arm_common_ss.add(when: 'CONFIG_NETDUINO2', if_true: files('netduino2.c')) arm_common_ss.add(when: 'CONFIG_RASPI', if_true: files('bcm2835_peripherals.c')) arm_common_ss.add(when: 'CONFIG_RASPI', if_true: files('bcm2838_peripherals.c')) @@ -79,7 +81,7 @@ arm_common_ss.add(when: 'CONFIG_SX1', if_true: files('omap_sx1.c')) arm_common_ss.add(when: 'CONFIG_VERSATILE', if_true: files('versatilepb.c')) arm_common_ss.add(when: 'CONFIG_VEXPRESS', if_true: files('vexpress.c')) -arm_common_ss.add(fdt, files('boot.c')) +arm_common_ss.add(files('boot.c')) hw_arch += {'arm': arm_ss} hw_common_arch += {'arm': arm_common_ss} diff --git a/hw/arm/mps2.c b/hw/arm/mps2.c index 58efb41..bd378e3 100644 --- a/hw/arm/mps2.c +++ b/hw/arm/mps2.c @@ -224,7 +224,11 @@ static void mps2_common_init(MachineState *machine) switch (mmc->fpga_type) { case FPGA_AN385: case FPGA_AN386: + qdev_prop_set_uint32(armv7m, "num-irq", 32); + break; case FPGA_AN500: + /* The AN500 configures its Cortex-M7 with 16 MPU regions */ + qdev_prop_set_uint32(armv7m, "mpu-ns-regions", 16); qdev_prop_set_uint32(armv7m, "num-irq", 32); break; case FPGA_AN511: diff --git a/hw/arm/npcm7xx.c b/hw/arm/npcm7xx.c index 2f30c49..ecfae32 100644 --- a/hw/arm/npcm7xx.c +++ b/hw/arm/npcm7xx.c @@ -24,7 +24,7 @@ #include "hw/qdev-clock.h" #include "hw/qdev-properties.h" #include "qapi/error.h" -#include "qemu/bswap.h" +#include "exec/tswap.h" #include "qemu/units.h" #include "system/system.h" #include "target/arm/cpu-qom.h" diff --git a/hw/arm/npcm8xx.c b/hw/arm/npcm8xx.c index d7ee306..a276fea 100644 --- a/hw/arm/npcm8xx.c +++ b/hw/arm/npcm8xx.c @@ -67,6 +67,9 @@ /* SDHCI Modules */ #define NPCM8XX_MMC_BA 0xf0842000 +/* PCS Module */ +#define NPCM8XX_PCS_BA 0xf0780000 + /* PSPI Modules */ #define NPCM8XX_PSPI_BA 0xf0201000 @@ -85,6 +88,10 @@ enum NPCM8xxInterrupt { NPCM8XX_ADC_IRQ = 0, NPCM8XX_PECI_IRQ = 6, NPCM8XX_KCS_HIB_IRQ = 9, + NPCM8XX_GMAC1_IRQ = 14, + NPCM8XX_GMAC2_IRQ, + NPCM8XX_GMAC3_IRQ, + NPCM8XX_GMAC4_IRQ, NPCM8XX_MMC_IRQ = 26, NPCM8XX_PSPI_IRQ = 28, NPCM8XX_TIMER0_IRQ = 32, /* Timer Module 0 */ @@ -260,6 +267,14 @@ static const hwaddr npcm8xx_smbus_addr[] = { 0xfff0a000, }; +/* Register base address for each GMAC Module */ +static const hwaddr npcm8xx_gmac_addr[] = { + 0xf0802000, + 0xf0804000, + 0xf0806000, + 0xf0808000, +}; + /* Register base address for each USB host EHCI registers */ static const hwaddr npcm8xx_ehci_addr[] = { 0xf0828100, @@ -350,6 +365,7 @@ static struct arm_boot_info npcm8xx_binfo = { .secure_boot = false, .board_id = -1, .board_setup_addr = NPCM8XX_BOARD_SETUP_ADDR, + .psci_conduit = QEMU_PSCI_CONDUIT_SMC, }; void npcm8xx_load_kernel(MachineState *machine, NPCM8xxState *soc) @@ -444,6 +460,11 @@ static void npcm8xx_init(Object *obj) object_initialize_child(obj, "mft[*]", &s->mft[i], TYPE_NPCM7XX_MFT); } + for (i = 0; i < ARRAY_SIZE(s->gmac); i++) { + object_initialize_child(obj, "gmac[*]", &s->gmac[i], TYPE_NPCM_GMAC); + } + object_initialize_child(obj, "pcs", &s->pcs, TYPE_NPCM_PCS); + object_initialize_child(obj, "mmc", &s->mmc, TYPE_NPCM7XX_SDHCI); object_initialize_child(obj, "pspi", &s->pspi, TYPE_NPCM_PSPI); } @@ -669,6 +690,35 @@ static void npcm8xx_realize(DeviceState *dev, Error **errp) } /* + * GMAC Modules. Cannot fail. + */ + QEMU_BUILD_BUG_ON(ARRAY_SIZE(npcm8xx_gmac_addr) != ARRAY_SIZE(s->gmac)); + for (i = 0; i < ARRAY_SIZE(s->gmac); i++) { + SysBusDevice *sbd = SYS_BUS_DEVICE(&s->gmac[i]); + + /* This is used to make sure that the NIC can create the device */ + qemu_configure_nic_device(DEVICE(sbd), false, NULL); + + /* + * The device exists regardless of whether it's connected to a QEMU + * netdev backend. So always instantiate it even if there is no + * backend. + */ + sysbus_realize(sbd, &error_abort); + sysbus_mmio_map(sbd, 0, npcm8xx_gmac_addr[i]); + /* + * N.B. The values for the second argument sysbus_connect_irq are + * chosen to match the registration order in npcm7xx_emc_realize. + */ + sysbus_connect_irq(sbd, 0, npcm8xx_irq(s, NPCM8XX_GMAC1_IRQ + i)); + } + /* + * GMAC Physical Coding Sublayer(PCS) Module. Cannot fail. + */ + sysbus_realize(SYS_BUS_DEVICE(&s->pcs), &error_abort); + sysbus_mmio_map(SYS_BUS_DEVICE(&s->pcs), 0, NPCM8XX_PCS_BA); + + /* * Flash Interface Unit (FIU). Can fail if incorrect number of chip selects * specified, but this is a programming error. */ @@ -741,12 +791,7 @@ static void npcm8xx_realize(DeviceState *dev, Error **errp) create_unimplemented_device("npcm8xx.ahbpci", 0xf0400000, 1 * MiB); create_unimplemented_device("npcm8xx.dap", 0xf0500000, 960 * KiB); create_unimplemented_device("npcm8xx.mcphy", 0xf05f0000, 64 * KiB); - create_unimplemented_device("npcm8xx.pcs", 0xf0780000, 256 * KiB); create_unimplemented_device("npcm8xx.tsgen", 0xf07fc000, 8 * KiB); - create_unimplemented_device("npcm8xx.gmac1", 0xf0802000, 8 * KiB); - create_unimplemented_device("npcm8xx.gmac2", 0xf0804000, 8 * KiB); - create_unimplemented_device("npcm8xx.gmac3", 0xf0806000, 8 * KiB); - create_unimplemented_device("npcm8xx.gmac4", 0xf0808000, 8 * KiB); create_unimplemented_device("npcm8xx.copctl", 0xf080c000, 4 * KiB); create_unimplemented_device("npcm8xx.tipctl", 0xf080d000, 4 * KiB); create_unimplemented_device("npcm8xx.rst", 0xf080e000, 4 * KiB); diff --git a/hw/arm/omap1.c b/hw/arm/omap1.c index 91d7e3f..74458fb 100644 --- a/hw/arm/omap1.c +++ b/hw/arm/omap1.c @@ -144,7 +144,7 @@ static inline void omap_timer_update(struct omap_mpu_timer_s *timer) int64_t expires; if (timer->enable && timer->st && timer->rate) { - timer->val = timer->reset_val; /* Should skip this on clk enable */ + timer->val = timer->reset_val; /* Should skip this on clk enable */ expires = muldiv64((uint64_t) timer->val << (timer->ptv + 1), NANOSECONDS_PER_SECOND, timer->rate); @@ -212,13 +212,13 @@ static uint64_t omap_mpu_timer_read(void *opaque, hwaddr addr, } switch (addr) { - case 0x00: /* CNTL_TIMER */ + case 0x00: /* CNTL_TIMER */ return (s->enable << 5) | (s->ptv << 2) | (s->ar << 1) | s->st; - case 0x04: /* LOAD_TIM */ + case 0x04: /* LOAD_TIM */ break; - case 0x08: /* READ_TIM */ + case 0x08: /* READ_TIM */ return omap_timer_read(s); } @@ -237,7 +237,7 @@ static void omap_mpu_timer_write(void *opaque, hwaddr addr, } switch (addr) { - case 0x00: /* CNTL_TIMER */ + case 0x00: /* CNTL_TIMER */ omap_timer_sync(s); s->enable = (value >> 5) & 1; s->ptv = (value >> 2) & 7; @@ -246,11 +246,11 @@ static void omap_mpu_timer_write(void *opaque, hwaddr addr, omap_timer_update(s); return; - case 0x04: /* LOAD_TIM */ + case 0x04: /* LOAD_TIM */ s->reset_val = value; return; - case 0x08: /* READ_TIM */ + case 0x08: /* READ_TIM */ OMAP_RO_REG(addr); break; @@ -318,14 +318,14 @@ static uint64_t omap_wd_timer_read(void *opaque, hwaddr addr, } switch (addr) { - case 0x00: /* CNTL_TIMER */ + case 0x00: /* CNTL_TIMER */ return (s->timer.ptv << 9) | (s->timer.ar << 8) | (s->timer.st << 7) | (s->free << 1); - case 0x04: /* READ_TIMER */ + case 0x04: /* READ_TIMER */ return omap_timer_read(&s->timer); - case 0x08: /* TIMER_MODE */ + case 0x08: /* TIMER_MODE */ return s->mode << 15; } @@ -344,7 +344,7 @@ static void omap_wd_timer_write(void *opaque, hwaddr addr, } switch (addr) { - case 0x00: /* CNTL_TIMER */ + case 0x00: /* CNTL_TIMER */ omap_timer_sync(&s->timer); s->timer.ptv = (value >> 9) & 7; s->timer.ar = (value >> 8) & 1; @@ -353,11 +353,11 @@ static void omap_wd_timer_write(void *opaque, hwaddr addr, omap_timer_update(&s->timer); break; - case 0x04: /* LOAD_TIMER */ + case 0x04: /* LOAD_TIMER */ s->timer.reset_val = value & 0xffff; break; - case 0x08: /* TIMER_MODE */ + case 0x08: /* TIMER_MODE */ if (!s->mode && ((value >> 15) & 1)) omap_clk_get(s->timer.clk); s->mode |= (value >> 15) & 1; @@ -442,13 +442,13 @@ static uint64_t omap_os_timer_read(void *opaque, hwaddr addr, } switch (offset) { - case 0x00: /* TVR */ + case 0x00: /* TVR */ return s->timer.reset_val; - case 0x04: /* TCR */ + case 0x04: /* TCR */ return omap_timer_read(&s->timer); - case 0x08: /* CR */ + case 0x08: /* CR */ return (s->timer.ar << 3) | (s->timer.it_ena << 2) | s->timer.st; default: @@ -470,15 +470,15 @@ static void omap_os_timer_write(void *opaque, hwaddr addr, } switch (offset) { - case 0x00: /* TVR */ + case 0x00: /* TVR */ s->timer.reset_val = value & 0x00ffffff; break; - case 0x04: /* TCR */ + case 0x04: /* TCR */ OMAP_RO_REG(addr); break; - case 0x08: /* CR */ + case 0x08: /* CR */ s->timer.ar = (value >> 3) & 1; s->timer.it_ena = (value >> 2) & 1; if (s->timer.st != (value & 1) || (value & 2)) { @@ -543,34 +543,34 @@ static uint64_t omap_ulpd_pm_read(void *opaque, hwaddr addr, } switch (addr) { - case 0x14: /* IT_STATUS */ + case 0x14: /* IT_STATUS */ ret = s->ulpd_pm_regs[addr >> 2]; s->ulpd_pm_regs[addr >> 2] = 0; qemu_irq_lower(qdev_get_gpio_in(s->ih[1], OMAP_INT_GAUGE_32K)); return ret; - case 0x18: /* Reserved */ - case 0x1c: /* Reserved */ - case 0x20: /* Reserved */ - case 0x28: /* Reserved */ - case 0x2c: /* Reserved */ + case 0x18: /* Reserved */ + case 0x1c: /* Reserved */ + case 0x20: /* Reserved */ + case 0x28: /* Reserved */ + case 0x2c: /* Reserved */ OMAP_BAD_REG(addr); /* fall through */ - case 0x00: /* COUNTER_32_LSB */ - case 0x04: /* COUNTER_32_MSB */ - case 0x08: /* COUNTER_HIGH_FREQ_LSB */ - case 0x0c: /* COUNTER_HIGH_FREQ_MSB */ - case 0x10: /* GAUGING_CTRL */ - case 0x24: /* SETUP_ANALOG_CELL3_ULPD1 */ - case 0x30: /* CLOCK_CTRL */ - case 0x34: /* SOFT_REQ */ - case 0x38: /* COUNTER_32_FIQ */ - case 0x3c: /* DPLL_CTRL */ - case 0x40: /* STATUS_REQ */ + case 0x00: /* COUNTER_32_LSB */ + case 0x04: /* COUNTER_32_MSB */ + case 0x08: /* COUNTER_HIGH_FREQ_LSB */ + case 0x0c: /* COUNTER_HIGH_FREQ_MSB */ + case 0x10: /* GAUGING_CTRL */ + case 0x24: /* SETUP_ANALOG_CELL3_ULPD1 */ + case 0x30: /* CLOCK_CTRL */ + case 0x34: /* SOFT_REQ */ + case 0x38: /* COUNTER_32_FIQ */ + case 0x3c: /* DPLL_CTRL */ + case 0x40: /* STATUS_REQ */ /* XXX: check clk::usecount state for every clock */ - case 0x48: /* LOCL_TIME */ - case 0x4c: /* APLL_CTRL */ - case 0x50: /* POWER_CTRL */ + case 0x48: /* LOCL_TIME */ + case 0x4c: /* APLL_CTRL */ + case 0x50: /* POWER_CTRL */ return s->ulpd_pm_regs[addr >> 2]; } @@ -581,22 +581,22 @@ static uint64_t omap_ulpd_pm_read(void *opaque, hwaddr addr, static inline void omap_ulpd_clk_update(struct omap_mpu_state_s *s, uint16_t diff, uint16_t value) { - if (diff & (1 << 4)) /* USB_MCLK_EN */ + if (diff & (1 << 4)) /* USB_MCLK_EN */ omap_clk_onoff(omap_findclk(s, "usb_clk0"), (value >> 4) & 1); - if (diff & (1 << 5)) /* DIS_USB_PVCI_CLK */ + if (diff & (1 << 5)) /* DIS_USB_PVCI_CLK */ omap_clk_onoff(omap_findclk(s, "usb_w2fc_ck"), (~value >> 5) & 1); } static inline void omap_ulpd_req_update(struct omap_mpu_state_s *s, uint16_t diff, uint16_t value) { - if (diff & (1 << 0)) /* SOFT_DPLL_REQ */ + if (diff & (1 << 0)) /* SOFT_DPLL_REQ */ omap_clk_canidle(omap_findclk(s, "dpll4"), (~value >> 0) & 1); - if (diff & (1 << 1)) /* SOFT_COM_REQ */ + if (diff & (1 << 1)) /* SOFT_COM_REQ */ omap_clk_canidle(omap_findclk(s, "com_mclk_out"), (~value >> 1) & 1); - if (diff & (1 << 2)) /* SOFT_SDW_REQ */ + if (diff & (1 << 2)) /* SOFT_SDW_REQ */ omap_clk_canidle(omap_findclk(s, "bt_mclk_out"), (~value >> 2) & 1); - if (diff & (1 << 3)) /* SOFT_USB_REQ */ + if (diff & (1 << 3)) /* SOFT_USB_REQ */ omap_clk_canidle(omap_findclk(s, "usb_clk0"), (~value >> 3) & 1); } @@ -615,16 +615,16 @@ static void omap_ulpd_pm_write(void *opaque, hwaddr addr, } switch (addr) { - case 0x00: /* COUNTER_32_LSB */ - case 0x04: /* COUNTER_32_MSB */ - case 0x08: /* COUNTER_HIGH_FREQ_LSB */ - case 0x0c: /* COUNTER_HIGH_FREQ_MSB */ - case 0x14: /* IT_STATUS */ - case 0x40: /* STATUS_REQ */ + case 0x00: /* COUNTER_32_LSB */ + case 0x04: /* COUNTER_32_MSB */ + case 0x08: /* COUNTER_HIGH_FREQ_LSB */ + case 0x0c: /* COUNTER_HIGH_FREQ_MSB */ + case 0x14: /* IT_STATUS */ + case 0x40: /* STATUS_REQ */ OMAP_RO_REG(addr); break; - case 0x10: /* GAUGING_CTRL */ + case 0x10: /* GAUGING_CTRL */ /* Bits 0 and 1 seem to be confused in the OMAP 310 TRM */ if ((s->ulpd_pm_regs[addr >> 2] ^ value) & 1) { now = qemu_clock_get_ns(QEMU_CLOCK_VIRTUAL); @@ -638,50 +638,50 @@ static void omap_ulpd_pm_write(void *opaque, hwaddr addr, ticks = muldiv64(now, 32768, NANOSECONDS_PER_SECOND); s->ulpd_pm_regs[0x00 >> 2] = (ticks >> 0) & 0xffff; s->ulpd_pm_regs[0x04 >> 2] = (ticks >> 16) & 0xffff; - if (ticks >> 32) /* OVERFLOW_32K */ + if (ticks >> 32) /* OVERFLOW_32K */ s->ulpd_pm_regs[0x14 >> 2] |= 1 << 2; /* High frequency ticks */ ticks = muldiv64(now, 12000000, NANOSECONDS_PER_SECOND); s->ulpd_pm_regs[0x08 >> 2] = (ticks >> 0) & 0xffff; s->ulpd_pm_regs[0x0c >> 2] = (ticks >> 16) & 0xffff; - if (ticks >> 32) /* OVERFLOW_HI_FREQ */ + if (ticks >> 32) /* OVERFLOW_HI_FREQ */ s->ulpd_pm_regs[0x14 >> 2] |= 1 << 1; - s->ulpd_pm_regs[0x14 >> 2] |= 1 << 0; /* IT_GAUGING */ + s->ulpd_pm_regs[0x14 >> 2] |= 1 << 0; /* IT_GAUGING */ qemu_irq_raise(qdev_get_gpio_in(s->ih[1], OMAP_INT_GAUGE_32K)); } } s->ulpd_pm_regs[addr >> 2] = value; break; - case 0x18: /* Reserved */ - case 0x1c: /* Reserved */ - case 0x20: /* Reserved */ - case 0x28: /* Reserved */ - case 0x2c: /* Reserved */ + case 0x18: /* Reserved */ + case 0x1c: /* Reserved */ + case 0x20: /* Reserved */ + case 0x28: /* Reserved */ + case 0x2c: /* Reserved */ OMAP_BAD_REG(addr); /* fall through */ - case 0x24: /* SETUP_ANALOG_CELL3_ULPD1 */ - case 0x38: /* COUNTER_32_FIQ */ - case 0x48: /* LOCL_TIME */ - case 0x50: /* POWER_CTRL */ + case 0x24: /* SETUP_ANALOG_CELL3_ULPD1 */ + case 0x38: /* COUNTER_32_FIQ */ + case 0x48: /* LOCL_TIME */ + case 0x50: /* POWER_CTRL */ s->ulpd_pm_regs[addr >> 2] = value; break; - case 0x30: /* CLOCK_CTRL */ + case 0x30: /* CLOCK_CTRL */ diff = s->ulpd_pm_regs[addr >> 2] ^ value; s->ulpd_pm_regs[addr >> 2] = value & 0x3f; omap_ulpd_clk_update(s, diff, value); break; - case 0x34: /* SOFT_REQ */ + case 0x34: /* SOFT_REQ */ diff = s->ulpd_pm_regs[addr >> 2] ^ value; s->ulpd_pm_regs[addr >> 2] = value & 0x1f; omap_ulpd_req_update(s, diff, value); break; - case 0x3c: /* DPLL_CTRL */ + case 0x3c: /* DPLL_CTRL */ /* XXX: OMAP310 TRM claims bit 3 is PLL_ENABLE, and bit 4 is * omitted altogether, probably a typo. */ /* This register has identical semantics with DPLL(1:3) control @@ -689,11 +689,11 @@ static void omap_ulpd_pm_write(void *opaque, hwaddr addr, diff = s->ulpd_pm_regs[addr >> 2] & value; s->ulpd_pm_regs[addr >> 2] = value & 0x2fff; if (diff & (0x3ff << 2)) { - if (value & (1 << 4)) { /* PLL_ENABLE */ - div = ((value >> 5) & 3) + 1; /* PLL_DIV */ - mult = MIN((value >> 7) & 0x1f, 1); /* PLL_MULT */ + if (value & (1 << 4)) { /* PLL_ENABLE */ + div = ((value >> 5) & 3) + 1; /* PLL_DIV */ + mult = MIN((value >> 7) & 0x1f, 1); /* PLL_MULT */ } else { - div = bypass_div[((value >> 2) & 3)]; /* BYPASS_DIV */ + div = bypass_div[((value >> 2) & 3)]; /* BYPASS_DIV */ mult = 1; } omap_clk_setrate(omap_findclk(s, "dpll4"), div, mult); @@ -708,10 +708,10 @@ static void omap_ulpd_pm_write(void *opaque, hwaddr addr, s->ulpd_pm_regs[addr >> 2] |= 2; break; - case 0x4c: /* APLL_CTRL */ + case 0x4c: /* APLL_CTRL */ diff = s->ulpd_pm_regs[addr >> 2] & value; s->ulpd_pm_regs[addr >> 2] = value & 0xf; - if (diff & (1 << 0)) /* APLL_NDPLL_SWITCH */ + if (diff & (1 << 0)) /* APLL_NDPLL_SWITCH */ omap_clk_reparent(omap_findclk(s, "ck_48m"), omap_findclk(s, (value & (1 << 0)) ? "apll" : "dpll4")); break; @@ -775,43 +775,43 @@ static uint64_t omap_pin_cfg_read(void *opaque, hwaddr addr, } switch (addr) { - case 0x00: /* FUNC_MUX_CTRL_0 */ - case 0x04: /* FUNC_MUX_CTRL_1 */ - case 0x08: /* FUNC_MUX_CTRL_2 */ + case 0x00: /* FUNC_MUX_CTRL_0 */ + case 0x04: /* FUNC_MUX_CTRL_1 */ + case 0x08: /* FUNC_MUX_CTRL_2 */ return s->func_mux_ctrl[addr >> 2]; - case 0x0c: /* COMP_MODE_CTRL_0 */ + case 0x0c: /* COMP_MODE_CTRL_0 */ return s->comp_mode_ctrl[0]; - case 0x10: /* FUNC_MUX_CTRL_3 */ - case 0x14: /* FUNC_MUX_CTRL_4 */ - case 0x18: /* FUNC_MUX_CTRL_5 */ - case 0x1c: /* FUNC_MUX_CTRL_6 */ - case 0x20: /* FUNC_MUX_CTRL_7 */ - case 0x24: /* FUNC_MUX_CTRL_8 */ - case 0x28: /* FUNC_MUX_CTRL_9 */ - case 0x2c: /* FUNC_MUX_CTRL_A */ - case 0x30: /* FUNC_MUX_CTRL_B */ - case 0x34: /* FUNC_MUX_CTRL_C */ - case 0x38: /* FUNC_MUX_CTRL_D */ + case 0x10: /* FUNC_MUX_CTRL_3 */ + case 0x14: /* FUNC_MUX_CTRL_4 */ + case 0x18: /* FUNC_MUX_CTRL_5 */ + case 0x1c: /* FUNC_MUX_CTRL_6 */ + case 0x20: /* FUNC_MUX_CTRL_7 */ + case 0x24: /* FUNC_MUX_CTRL_8 */ + case 0x28: /* FUNC_MUX_CTRL_9 */ + case 0x2c: /* FUNC_MUX_CTRL_A */ + case 0x30: /* FUNC_MUX_CTRL_B */ + case 0x34: /* FUNC_MUX_CTRL_C */ + case 0x38: /* FUNC_MUX_CTRL_D */ return s->func_mux_ctrl[(addr >> 2) - 1]; - case 0x40: /* PULL_DWN_CTRL_0 */ - case 0x44: /* PULL_DWN_CTRL_1 */ - case 0x48: /* PULL_DWN_CTRL_2 */ - case 0x4c: /* PULL_DWN_CTRL_3 */ + case 0x40: /* PULL_DWN_CTRL_0 */ + case 0x44: /* PULL_DWN_CTRL_1 */ + case 0x48: /* PULL_DWN_CTRL_2 */ + case 0x4c: /* PULL_DWN_CTRL_3 */ return s->pull_dwn_ctrl[(addr & 0xf) >> 2]; - case 0x50: /* GATE_INH_CTRL_0 */ + case 0x50: /* GATE_INH_CTRL_0 */ return s->gate_inh_ctrl[0]; - case 0x60: /* VOLTAGE_CTRL_0 */ + case 0x60: /* VOLTAGE_CTRL_0 */ return s->voltage_ctrl[0]; - case 0x70: /* TEST_DBG_CTRL_0 */ + case 0x70: /* TEST_DBG_CTRL_0 */ return s->test_dbg_ctrl[0]; - case 0x80: /* MOD_CONF_CTRL_0 */ + case 0x80: /* MOD_CONF_CTRL_0 */ return s->mod_conf_ctrl[0]; } @@ -823,10 +823,10 @@ static inline void omap_pin_funcmux0_update(struct omap_mpu_state_s *s, uint32_t diff, uint32_t value) { if (s->compat1509) { - if (diff & (1 << 9)) /* BLUETOOTH */ + if (diff & (1 << 9)) /* BLUETOOTH */ omap_clk_onoff(omap_findclk(s, "bt_mclk_out"), (~value >> 9) & 1); - if (diff & (1 << 7)) /* USB.CLKO */ + if (diff & (1 << 7)) /* USB.CLKO */ omap_clk_onoff(omap_findclk(s, "usb.clko"), (value >> 7) & 1); } @@ -856,23 +856,23 @@ static inline void omap_pin_modconf1_update(struct omap_mpu_state_s *s, omap_findclk(s, ((value >> 31) & 1) ? "ck_48m" : "armper_ck")); } - if (diff & (1 << 30)) /* CONF_MOD_UART2_CLK_MODE_R */ + if (diff & (1 << 30)) /* CONF_MOD_UART2_CLK_MODE_R */ omap_clk_reparent(omap_findclk(s, "uart2_ck"), omap_findclk(s, ((value >> 30) & 1) ? "ck_48m" : "armper_ck")); - if (diff & (1 << 29)) /* CONF_MOD_UART1_CLK_MODE_R */ + if (diff & (1 << 29)) /* CONF_MOD_UART1_CLK_MODE_R */ omap_clk_reparent(omap_findclk(s, "uart1_ck"), omap_findclk(s, ((value >> 29) & 1) ? "ck_48m" : "armper_ck")); - if (diff & (1 << 23)) /* CONF_MOD_MMC_SD_CLK_REQ_R */ + if (diff & (1 << 23)) /* CONF_MOD_MMC_SD_CLK_REQ_R */ omap_clk_reparent(omap_findclk(s, "mmc_ck"), omap_findclk(s, ((value >> 23) & 1) ? "ck_48m" : "armper_ck")); - if (diff & (1 << 12)) /* CONF_MOD_COM_MCLK_12_48_S */ + if (diff & (1 << 12)) /* CONF_MOD_COM_MCLK_12_48_S */ omap_clk_reparent(omap_findclk(s, "com_mclk_out"), omap_findclk(s, ((value >> 12) & 1) ? "ck_48m" : "armper_ck")); - if (diff & (1 << 9)) /* CONF_MOD_USB_HOST_HHC_UHO */ + if (diff & (1 << 9)) /* CONF_MOD_USB_HOST_HHC_UHO */ omap_clk_onoff(omap_findclk(s, "usb_hhc_ck"), (value >> 9) & 1); } @@ -888,63 +888,63 @@ static void omap_pin_cfg_write(void *opaque, hwaddr addr, } switch (addr) { - case 0x00: /* FUNC_MUX_CTRL_0 */ + case 0x00: /* FUNC_MUX_CTRL_0 */ diff = s->func_mux_ctrl[addr >> 2] ^ value; s->func_mux_ctrl[addr >> 2] = value; omap_pin_funcmux0_update(s, diff, value); return; - case 0x04: /* FUNC_MUX_CTRL_1 */ + case 0x04: /* FUNC_MUX_CTRL_1 */ diff = s->func_mux_ctrl[addr >> 2] ^ value; s->func_mux_ctrl[addr >> 2] = value; omap_pin_funcmux1_update(s, diff, value); return; - case 0x08: /* FUNC_MUX_CTRL_2 */ + case 0x08: /* FUNC_MUX_CTRL_2 */ s->func_mux_ctrl[addr >> 2] = value; return; - case 0x0c: /* COMP_MODE_CTRL_0 */ + case 0x0c: /* COMP_MODE_CTRL_0 */ s->comp_mode_ctrl[0] = value; s->compat1509 = (value != 0x0000eaef); omap_pin_funcmux0_update(s, ~0, s->func_mux_ctrl[0]); omap_pin_funcmux1_update(s, ~0, s->func_mux_ctrl[1]); return; - case 0x10: /* FUNC_MUX_CTRL_3 */ - case 0x14: /* FUNC_MUX_CTRL_4 */ - case 0x18: /* FUNC_MUX_CTRL_5 */ - case 0x1c: /* FUNC_MUX_CTRL_6 */ - case 0x20: /* FUNC_MUX_CTRL_7 */ - case 0x24: /* FUNC_MUX_CTRL_8 */ - case 0x28: /* FUNC_MUX_CTRL_9 */ - case 0x2c: /* FUNC_MUX_CTRL_A */ - case 0x30: /* FUNC_MUX_CTRL_B */ - case 0x34: /* FUNC_MUX_CTRL_C */ - case 0x38: /* FUNC_MUX_CTRL_D */ + case 0x10: /* FUNC_MUX_CTRL_3 */ + case 0x14: /* FUNC_MUX_CTRL_4 */ + case 0x18: /* FUNC_MUX_CTRL_5 */ + case 0x1c: /* FUNC_MUX_CTRL_6 */ + case 0x20: /* FUNC_MUX_CTRL_7 */ + case 0x24: /* FUNC_MUX_CTRL_8 */ + case 0x28: /* FUNC_MUX_CTRL_9 */ + case 0x2c: /* FUNC_MUX_CTRL_A */ + case 0x30: /* FUNC_MUX_CTRL_B */ + case 0x34: /* FUNC_MUX_CTRL_C */ + case 0x38: /* FUNC_MUX_CTRL_D */ s->func_mux_ctrl[(addr >> 2) - 1] = value; return; - case 0x40: /* PULL_DWN_CTRL_0 */ - case 0x44: /* PULL_DWN_CTRL_1 */ - case 0x48: /* PULL_DWN_CTRL_2 */ - case 0x4c: /* PULL_DWN_CTRL_3 */ + case 0x40: /* PULL_DWN_CTRL_0 */ + case 0x44: /* PULL_DWN_CTRL_1 */ + case 0x48: /* PULL_DWN_CTRL_2 */ + case 0x4c: /* PULL_DWN_CTRL_3 */ s->pull_dwn_ctrl[(addr & 0xf) >> 2] = value; return; - case 0x50: /* GATE_INH_CTRL_0 */ + case 0x50: /* GATE_INH_CTRL_0 */ s->gate_inh_ctrl[0] = value; return; - case 0x60: /* VOLTAGE_CTRL_0 */ + case 0x60: /* VOLTAGE_CTRL_0 */ s->voltage_ctrl[0] = value; return; - case 0x70: /* TEST_DBG_CTRL_0 */ + case 0x70: /* TEST_DBG_CTRL_0 */ s->test_dbg_ctrl[0] = value; return; - case 0x80: /* MOD_CONF_CTRL_0 */ + case 0x80: /* MOD_CONF_CTRL_0 */ diff = s->mod_conf_ctrl[0] ^ value; s->mod_conf_ctrl[0] = value; omap_pin_modconf1_update(s, diff, value); @@ -998,17 +998,17 @@ static uint64_t omap_id_read(void *opaque, hwaddr addr, } switch (addr) { - case 0xfffe1800: /* DIE_ID_LSB */ + case 0xfffe1800: /* DIE_ID_LSB */ return 0xc9581f0e; - case 0xfffe1804: /* DIE_ID_MSB */ + case 0xfffe1804: /* DIE_ID_MSB */ return 0xa8858bfa; - case 0xfffe2000: /* PRODUCT_ID_LSB */ + case 0xfffe2000: /* PRODUCT_ID_LSB */ return 0x00aaaafc; - case 0xfffe2004: /* PRODUCT_ID_MSB */ + case 0xfffe2004: /* PRODUCT_ID_MSB */ return 0xcafeb574; - case 0xfffed400: /* JTAG_ID_LSB */ + case 0xfffed400: /* JTAG_ID_LSB */ switch (s->mpu_model) { case omap310: return 0x03310315; @@ -1019,7 +1019,7 @@ static uint64_t omap_id_read(void *opaque, hwaddr addr, } break; - case 0xfffed404: /* JTAG_ID_MSB */ + case 0xfffed404: /* JTAG_ID_MSB */ switch (s->mpu_model) { case omap310: return 0xfb57402f; @@ -1080,22 +1080,22 @@ static uint64_t omap_mpui_read(void *opaque, hwaddr addr, } switch (addr) { - case 0x00: /* CTRL */ + case 0x00: /* CTRL */ return s->mpui_ctrl; - case 0x04: /* DEBUG_ADDR */ + case 0x04: /* DEBUG_ADDR */ return 0x01ffffff; - case 0x08: /* DEBUG_DATA */ + case 0x08: /* DEBUG_DATA */ return 0xffffffff; - case 0x0c: /* DEBUG_FLAG */ + case 0x0c: /* DEBUG_FLAG */ return 0x00000800; - case 0x10: /* STATUS */ + case 0x10: /* STATUS */ return 0x00000000; /* Not in OMAP310 */ - case 0x14: /* DSP_STATUS */ - case 0x18: /* DSP_BOOT_CONFIG */ + case 0x14: /* DSP_STATUS */ + case 0x18: /* DSP_BOOT_CONFIG */ return 0x00000000; - case 0x1c: /* DSP_MPUI_CONFIG */ + case 0x1c: /* DSP_MPUI_CONFIG */ return 0x0000ffff; } @@ -1114,20 +1114,20 @@ static void omap_mpui_write(void *opaque, hwaddr addr, } switch (addr) { - case 0x00: /* CTRL */ + case 0x00: /* CTRL */ s->mpui_ctrl = value & 0x007fffff; break; - case 0x04: /* DEBUG_ADDR */ - case 0x08: /* DEBUG_DATA */ - case 0x0c: /* DEBUG_FLAG */ - case 0x10: /* STATUS */ + case 0x04: /* DEBUG_ADDR */ + case 0x08: /* DEBUG_DATA */ + case 0x0c: /* DEBUG_FLAG */ + case 0x10: /* STATUS */ /* Not in OMAP310 */ - case 0x14: /* DSP_STATUS */ + case 0x14: /* DSP_STATUS */ OMAP_RO_REG(addr); break; - case 0x18: /* DSP_BOOT_CONFIG */ - case 0x1c: /* DSP_MPUI_CONFIG */ + case 0x18: /* DSP_BOOT_CONFIG */ + case 0x1c: /* DSP_MPUI_CONFIG */ break; default: @@ -1178,19 +1178,19 @@ static uint64_t omap_tipb_bridge_read(void *opaque, hwaddr addr, } switch (addr) { - case 0x00: /* TIPB_CNTL */ + case 0x00: /* TIPB_CNTL */ return s->control; - case 0x04: /* TIPB_BUS_ALLOC */ + case 0x04: /* TIPB_BUS_ALLOC */ return s->alloc; - case 0x08: /* MPU_TIPB_CNTL */ + case 0x08: /* MPU_TIPB_CNTL */ return s->buffer; - case 0x0c: /* ENHANCED_TIPB_CNTL */ + case 0x0c: /* ENHANCED_TIPB_CNTL */ return s->enh_control; - case 0x10: /* ADDRESS_DBG */ - case 0x14: /* DATA_DEBUG_LOW */ - case 0x18: /* DATA_DEBUG_HIGH */ + case 0x10: /* ADDRESS_DBG */ + case 0x14: /* DATA_DEBUG_LOW */ + case 0x18: /* DATA_DEBUG_HIGH */ return 0xffff; - case 0x1c: /* DEBUG_CNTR_SIG */ + case 0x1c: /* DEBUG_CNTR_SIG */ return 0x00f8; } @@ -1209,27 +1209,27 @@ static void omap_tipb_bridge_write(void *opaque, hwaddr addr, } switch (addr) { - case 0x00: /* TIPB_CNTL */ + case 0x00: /* TIPB_CNTL */ s->control = value & 0xffff; break; - case 0x04: /* TIPB_BUS_ALLOC */ + case 0x04: /* TIPB_BUS_ALLOC */ s->alloc = value & 0x003f; break; - case 0x08: /* MPU_TIPB_CNTL */ + case 0x08: /* MPU_TIPB_CNTL */ s->buffer = value & 0x0003; break; - case 0x0c: /* ENHANCED_TIPB_CNTL */ + case 0x0c: /* ENHANCED_TIPB_CNTL */ s->width_intr = !(value & 2); s->enh_control = value & 0x000f; break; - case 0x10: /* ADDRESS_DBG */ - case 0x14: /* DATA_DEBUG_LOW */ - case 0x18: /* DATA_DEBUG_HIGH */ - case 0x1c: /* DEBUG_CNTR_SIG */ + case 0x10: /* ADDRESS_DBG */ + case 0x14: /* DATA_DEBUG_LOW */ + case 0x18: /* DATA_DEBUG_HIGH */ + case 0x1c: /* DEBUG_CNTR_SIG */ OMAP_RO_REG(addr); break; @@ -1280,23 +1280,23 @@ static uint64_t omap_tcmi_read(void *opaque, hwaddr addr, } switch (addr) { - case 0x00: /* IMIF_PRIO */ - case 0x04: /* EMIFS_PRIO */ - case 0x08: /* EMIFF_PRIO */ - case 0x0c: /* EMIFS_CONFIG */ - case 0x10: /* EMIFS_CS0_CONFIG */ - case 0x14: /* EMIFS_CS1_CONFIG */ - case 0x18: /* EMIFS_CS2_CONFIG */ - case 0x1c: /* EMIFS_CS3_CONFIG */ - case 0x24: /* EMIFF_MRS */ - case 0x28: /* TIMEOUT1 */ - case 0x2c: /* TIMEOUT2 */ - case 0x30: /* TIMEOUT3 */ - case 0x3c: /* EMIFF_SDRAM_CONFIG_2 */ - case 0x40: /* EMIFS_CFG_DYN_WAIT */ + case 0x00: /* IMIF_PRIO */ + case 0x04: /* EMIFS_PRIO */ + case 0x08: /* EMIFF_PRIO */ + case 0x0c: /* EMIFS_CONFIG */ + case 0x10: /* EMIFS_CS0_CONFIG */ + case 0x14: /* EMIFS_CS1_CONFIG */ + case 0x18: /* EMIFS_CS2_CONFIG */ + case 0x1c: /* EMIFS_CS3_CONFIG */ + case 0x24: /* EMIFF_MRS */ + case 0x28: /* TIMEOUT1 */ + case 0x2c: /* TIMEOUT2 */ + case 0x30: /* TIMEOUT3 */ + case 0x3c: /* EMIFF_SDRAM_CONFIG_2 */ + case 0x40: /* EMIFS_CFG_DYN_WAIT */ return s->tcmi_regs[addr >> 2]; - case 0x20: /* EMIFF_SDRAM_CONFIG */ + case 0x20: /* EMIFF_SDRAM_CONFIG */ ret = s->tcmi_regs[addr >> 2]; s->tcmi_regs[addr >> 2] &= ~1; /* XXX: Clear SLRF on SDRAM access */ /* XXX: We can try using the VGA_DIRTY flag for this */ @@ -1318,23 +1318,23 @@ static void omap_tcmi_write(void *opaque, hwaddr addr, } switch (addr) { - case 0x00: /* IMIF_PRIO */ - case 0x04: /* EMIFS_PRIO */ - case 0x08: /* EMIFF_PRIO */ - case 0x10: /* EMIFS_CS0_CONFIG */ - case 0x14: /* EMIFS_CS1_CONFIG */ - case 0x18: /* EMIFS_CS2_CONFIG */ - case 0x1c: /* EMIFS_CS3_CONFIG */ - case 0x20: /* EMIFF_SDRAM_CONFIG */ - case 0x24: /* EMIFF_MRS */ - case 0x28: /* TIMEOUT1 */ - case 0x2c: /* TIMEOUT2 */ - case 0x30: /* TIMEOUT3 */ - case 0x3c: /* EMIFF_SDRAM_CONFIG_2 */ - case 0x40: /* EMIFS_CFG_DYN_WAIT */ + case 0x00: /* IMIF_PRIO */ + case 0x04: /* EMIFS_PRIO */ + case 0x08: /* EMIFF_PRIO */ + case 0x10: /* EMIFS_CS0_CONFIG */ + case 0x14: /* EMIFS_CS1_CONFIG */ + case 0x18: /* EMIFS_CS2_CONFIG */ + case 0x1c: /* EMIFS_CS3_CONFIG */ + case 0x20: /* EMIFF_SDRAM_CONFIG */ + case 0x24: /* EMIFF_MRS */ + case 0x28: /* TIMEOUT1 */ + case 0x2c: /* TIMEOUT2 */ + case 0x30: /* TIMEOUT3 */ + case 0x3c: /* EMIFF_SDRAM_CONFIG_2 */ + case 0x40: /* EMIFS_CFG_DYN_WAIT */ s->tcmi_regs[addr >> 2] = value; break; - case 0x0c: /* EMIFS_CONFIG */ + case 0x0c: /* EMIFS_CONFIG */ s->tcmi_regs[addr >> 2] = (value & 0xf) | (1 << 4); break; @@ -1393,7 +1393,7 @@ static uint64_t omap_dpll_read(void *opaque, hwaddr addr, return omap_badwidth_read16(opaque, addr); } - if (addr == 0x00) /* CTL_REG */ + if (addr == 0x00) /* CTL_REG */ return s->mode; OMAP_BAD_REG(addr); @@ -1413,16 +1413,16 @@ static void omap_dpll_write(void *opaque, hwaddr addr, return; } - if (addr == 0x00) { /* CTL_REG */ + if (addr == 0x00) { /* CTL_REG */ /* See omap_ulpd_pm_write() too */ diff = s->mode & value; s->mode = value & 0x2fff; if (diff & (0x3ff << 2)) { - if (value & (1 << 4)) { /* PLL_ENABLE */ - div = ((value >> 5) & 3) + 1; /* PLL_DIV */ - mult = MIN((value >> 7) & 0x1f, 1); /* PLL_MULT */ + if (value & (1 << 4)) { /* PLL_ENABLE */ + div = ((value >> 5) & 3) + 1; /* PLL_DIV */ + mult = MIN((value >> 7) & 0x1f, 1); /* PLL_MULT */ } else { - div = bypass_div[((value >> 2) & 3)]; /* BYPASS_DIV */ + div = bypass_div[((value >> 2) & 3)]; /* BYPASS_DIV */ mult = 1; } omap_clk_setrate(s->dpll, div, mult); @@ -1474,31 +1474,31 @@ static uint64_t omap_clkm_read(void *opaque, hwaddr addr, } switch (addr) { - case 0x00: /* ARM_CKCTL */ + case 0x00: /* ARM_CKCTL */ return s->clkm.arm_ckctl; - case 0x04: /* ARM_IDLECT1 */ + case 0x04: /* ARM_IDLECT1 */ return s->clkm.arm_idlect1; - case 0x08: /* ARM_IDLECT2 */ + case 0x08: /* ARM_IDLECT2 */ return s->clkm.arm_idlect2; - case 0x0c: /* ARM_EWUPCT */ + case 0x0c: /* ARM_EWUPCT */ return s->clkm.arm_ewupct; - case 0x10: /* ARM_RSTCT1 */ + case 0x10: /* ARM_RSTCT1 */ return s->clkm.arm_rstct1; - case 0x14: /* ARM_RSTCT2 */ + case 0x14: /* ARM_RSTCT2 */ return s->clkm.arm_rstct2; - case 0x18: /* ARM_SYSST */ + case 0x18: /* ARM_SYSST */ return (s->clkm.clocking_scheme << 11) | s->clkm.cold_start; - case 0x1c: /* ARM_CKOUT1 */ + case 0x1c: /* ARM_CKOUT1 */ return s->clkm.arm_ckout1; - case 0x20: /* ARM_CKOUT2 */ + case 0x20: /* ARM_CKOUT2 */ break; } @@ -1511,7 +1511,7 @@ static inline void omap_clkm_ckctl_update(struct omap_mpu_state_s *s, { omap_clk clk; - if (diff & (1 << 14)) { /* ARM_INTHCK_SEL */ + if (diff & (1 << 14)) { /* ARM_INTHCK_SEL */ if (value & (1 << 14)) /* Reserved */; else { @@ -1519,7 +1519,7 @@ static inline void omap_clkm_ckctl_update(struct omap_mpu_state_s *s, omap_clk_reparent(clk, omap_findclk(s, "tc_ck")); } } - if (diff & (1 << 12)) { /* ARM_TIMXO */ + if (diff & (1 << 12)) { /* ARM_TIMXO */ clk = omap_findclk(s, "armtim_ck"); if (value & (1 << 12)) omap_clk_reparent(clk, omap_findclk(s, "clkin")); @@ -1527,27 +1527,27 @@ static inline void omap_clkm_ckctl_update(struct omap_mpu_state_s *s, omap_clk_reparent(clk, omap_findclk(s, "ck_gen1")); } /* XXX: en_dspck */ - if (diff & (3 << 10)) { /* DSPMMUDIV */ + if (diff & (3 << 10)) { /* DSPMMUDIV */ clk = omap_findclk(s, "dspmmu_ck"); omap_clk_setrate(clk, 1 << ((value >> 10) & 3), 1); } - if (diff & (3 << 8)) { /* TCDIV */ + if (diff & (3 << 8)) { /* TCDIV */ clk = omap_findclk(s, "tc_ck"); omap_clk_setrate(clk, 1 << ((value >> 8) & 3), 1); } - if (diff & (3 << 6)) { /* DSPDIV */ + if (diff & (3 << 6)) { /* DSPDIV */ clk = omap_findclk(s, "dsp_ck"); omap_clk_setrate(clk, 1 << ((value >> 6) & 3), 1); } - if (diff & (3 << 4)) { /* ARMDIV */ + if (diff & (3 << 4)) { /* ARMDIV */ clk = omap_findclk(s, "arm_ck"); omap_clk_setrate(clk, 1 << ((value >> 4) & 3), 1); } - if (diff & (3 << 2)) { /* LCDDIV */ + if (diff & (3 << 2)) { /* LCDDIV */ clk = omap_findclk(s, "lcd_ck"); omap_clk_setrate(clk, 1 << ((value >> 2) & 3), 1); } - if (diff & (3 << 0)) { /* PERDIV */ + if (diff & (3 << 0)) { /* PERDIV */ clk = omap_findclk(s, "armper_ck"); omap_clk_setrate(clk, 1 << ((value >> 0) & 3), 1); } @@ -1566,25 +1566,25 @@ static inline void omap_clkm_idlect1_update(struct omap_mpu_state_s *s, qemu_system_shutdown_request(SHUTDOWN_CAUSE_GUEST_SHUTDOWN); } -#define SET_CANIDLE(clock, bit) \ - if (diff & (1 << bit)) { \ - clk = omap_findclk(s, clock); \ - omap_clk_canidle(clk, (value >> bit) & 1); \ +#define SET_CANIDLE(clock, bit) \ + if (diff & (1 << bit)) { \ + clk = omap_findclk(s, clock); \ + omap_clk_canidle(clk, (value >> bit) & 1); \ } - SET_CANIDLE("mpuwd_ck", 0) /* IDLWDT_ARM */ - SET_CANIDLE("armxor_ck", 1) /* IDLXORP_ARM */ - SET_CANIDLE("mpuper_ck", 2) /* IDLPER_ARM */ - SET_CANIDLE("lcd_ck", 3) /* IDLLCD_ARM */ - SET_CANIDLE("lb_ck", 4) /* IDLLB_ARM */ - SET_CANIDLE("hsab_ck", 5) /* IDLHSAB_ARM */ - SET_CANIDLE("tipb_ck", 6) /* IDLIF_ARM */ - SET_CANIDLE("dma_ck", 6) /* IDLIF_ARM */ - SET_CANIDLE("tc_ck", 6) /* IDLIF_ARM */ - SET_CANIDLE("dpll1", 7) /* IDLDPLL_ARM */ - SET_CANIDLE("dpll2", 7) /* IDLDPLL_ARM */ - SET_CANIDLE("dpll3", 7) /* IDLDPLL_ARM */ - SET_CANIDLE("mpui_ck", 8) /* IDLAPI_ARM */ - SET_CANIDLE("armtim_ck", 9) /* IDLTIM_ARM */ + SET_CANIDLE("mpuwd_ck", 0) /* IDLWDT_ARM */ + SET_CANIDLE("armxor_ck", 1) /* IDLXORP_ARM */ + SET_CANIDLE("mpuper_ck", 2) /* IDLPER_ARM */ + SET_CANIDLE("lcd_ck", 3) /* IDLLCD_ARM */ + SET_CANIDLE("lb_ck", 4) /* IDLLB_ARM */ + SET_CANIDLE("hsab_ck", 5) /* IDLHSAB_ARM */ + SET_CANIDLE("tipb_ck", 6) /* IDLIF_ARM */ + SET_CANIDLE("dma_ck", 6) /* IDLIF_ARM */ + SET_CANIDLE("tc_ck", 6) /* IDLIF_ARM */ + SET_CANIDLE("dpll1", 7) /* IDLDPLL_ARM */ + SET_CANIDLE("dpll2", 7) /* IDLDPLL_ARM */ + SET_CANIDLE("dpll3", 7) /* IDLDPLL_ARM */ + SET_CANIDLE("mpui_ck", 8) /* IDLAPI_ARM */ + SET_CANIDLE("armtim_ck", 9) /* IDLTIM_ARM */ } static inline void omap_clkm_idlect2_update(struct omap_mpu_state_s *s, @@ -1592,22 +1592,22 @@ static inline void omap_clkm_idlect2_update(struct omap_mpu_state_s *s, { omap_clk clk; -#define SET_ONOFF(clock, bit) \ - if (diff & (1 << bit)) { \ - clk = omap_findclk(s, clock); \ - omap_clk_onoff(clk, (value >> bit) & 1); \ +#define SET_ONOFF(clock, bit) \ + if (diff & (1 << bit)) { \ + clk = omap_findclk(s, clock); \ + omap_clk_onoff(clk, (value >> bit) & 1); \ } - SET_ONOFF("mpuwd_ck", 0) /* EN_WDTCK */ - SET_ONOFF("armxor_ck", 1) /* EN_XORPCK */ - SET_ONOFF("mpuper_ck", 2) /* EN_PERCK */ - SET_ONOFF("lcd_ck", 3) /* EN_LCDCK */ - SET_ONOFF("lb_ck", 4) /* EN_LBCK */ - SET_ONOFF("hsab_ck", 5) /* EN_HSABCK */ - SET_ONOFF("mpui_ck", 6) /* EN_APICK */ - SET_ONOFF("armtim_ck", 7) /* EN_TIMCK */ - SET_CANIDLE("dma_ck", 8) /* DMACK_REQ */ - SET_ONOFF("arm_gpio_ck", 9) /* EN_GPIOCK */ - SET_ONOFF("lbfree_ck", 10) /* EN_LBFREECK */ + SET_ONOFF("mpuwd_ck", 0) /* EN_WDTCK */ + SET_ONOFF("armxor_ck", 1) /* EN_XORPCK */ + SET_ONOFF("mpuper_ck", 2) /* EN_PERCK */ + SET_ONOFF("lcd_ck", 3) /* EN_LCDCK */ + SET_ONOFF("lb_ck", 4) /* EN_LBCK */ + SET_ONOFF("hsab_ck", 5) /* EN_HSABCK */ + SET_ONOFF("mpui_ck", 6) /* EN_APICK */ + SET_ONOFF("armtim_ck", 7) /* EN_TIMCK */ + SET_CANIDLE("dma_ck", 8) /* DMACK_REQ */ + SET_ONOFF("arm_gpio_ck", 9) /* EN_GPIOCK */ + SET_ONOFF("lbfree_ck", 10) /* EN_LBFREECK */ } static inline void omap_clkm_ckout1_update(struct omap_mpu_state_s *s, @@ -1615,7 +1615,7 @@ static inline void omap_clkm_ckout1_update(struct omap_mpu_state_s *s, { omap_clk clk; - if (diff & (3 << 4)) { /* TCLKOUT */ + if (diff & (3 << 4)) { /* TCLKOUT */ clk = omap_findclk(s, "tclk_out"); switch ((value >> 4) & 3) { case 1: @@ -1630,7 +1630,7 @@ static inline void omap_clkm_ckout1_update(struct omap_mpu_state_s *s, omap_clk_onoff(clk, 0); } } - if (diff & (3 << 2)) { /* DCLKOUT */ + if (diff & (3 << 2)) { /* DCLKOUT */ clk = omap_findclk(s, "dclk_out"); switch ((value >> 2) & 3) { case 0: @@ -1647,7 +1647,7 @@ static inline void omap_clkm_ckout1_update(struct omap_mpu_state_s *s, break; } } - if (diff & (3 << 0)) { /* ACLKOUT */ + if (diff & (3 << 0)) { /* ACLKOUT */ clk = omap_findclk(s, "aclk_out"); switch ((value >> 0) & 3) { case 1: @@ -1685,51 +1685,51 @@ static void omap_clkm_write(void *opaque, hwaddr addr, } switch (addr) { - case 0x00: /* ARM_CKCTL */ + case 0x00: /* ARM_CKCTL */ diff = s->clkm.arm_ckctl ^ value; s->clkm.arm_ckctl = value & 0x7fff; omap_clkm_ckctl_update(s, diff, value); return; - case 0x04: /* ARM_IDLECT1 */ + case 0x04: /* ARM_IDLECT1 */ diff = s->clkm.arm_idlect1 ^ value; s->clkm.arm_idlect1 = value & 0x0fff; omap_clkm_idlect1_update(s, diff, value); return; - case 0x08: /* ARM_IDLECT2 */ + case 0x08: /* ARM_IDLECT2 */ diff = s->clkm.arm_idlect2 ^ value; s->clkm.arm_idlect2 = value & 0x07ff; omap_clkm_idlect2_update(s, diff, value); return; - case 0x0c: /* ARM_EWUPCT */ + case 0x0c: /* ARM_EWUPCT */ s->clkm.arm_ewupct = value & 0x003f; return; - case 0x10: /* ARM_RSTCT1 */ + case 0x10: /* ARM_RSTCT1 */ diff = s->clkm.arm_rstct1 ^ value; s->clkm.arm_rstct1 = value & 0x0007; if (value & 9) { qemu_system_reset_request(SHUTDOWN_CAUSE_GUEST_RESET); s->clkm.cold_start = 0xa; } - if (diff & ~value & 4) { /* DSP_RST */ + if (diff & ~value & 4) { /* DSP_RST */ omap_mpui_reset(s); omap_tipb_bridge_reset(s->private_tipb); omap_tipb_bridge_reset(s->public_tipb); } - if (diff & 2) { /* DSP_EN */ + if (diff & 2) { /* DSP_EN */ clk = omap_findclk(s, "dsp_ck"); omap_clk_canidle(clk, (~value >> 1) & 1); } return; - case 0x14: /* ARM_RSTCT2 */ + case 0x14: /* ARM_RSTCT2 */ s->clkm.arm_rstct2 = value & 0x0001; return; - case 0x18: /* ARM_SYSST */ + case 0x18: /* ARM_SYSST */ if ((s->clkm.clocking_scheme ^ (value >> 11)) & 7) { s->clkm.clocking_scheme = (value >> 11) & 7; trace_omap1_pwl_clocking_scheme( @@ -1738,13 +1738,13 @@ static void omap_clkm_write(void *opaque, hwaddr addr, s->clkm.cold_start &= value & 0x3f; return; - case 0x1c: /* ARM_CKOUT1 */ + case 0x1c: /* ARM_CKOUT1 */ diff = s->clkm.arm_ckout1 ^ value; s->clkm.arm_ckout1 = value & 0x003f; omap_clkm_ckout1_update(s, diff, value); return; - case 0x20: /* ARM_CKOUT2 */ + case 0x20: /* ARM_CKOUT2 */ default: OMAP_BAD_REG(addr); } @@ -1767,16 +1767,16 @@ static uint64_t omap_clkdsp_read(void *opaque, hwaddr addr, } switch (addr) { - case 0x04: /* DSP_IDLECT1 */ + case 0x04: /* DSP_IDLECT1 */ return s->clkm.dsp_idlect1; - case 0x08: /* DSP_IDLECT2 */ + case 0x08: /* DSP_IDLECT2 */ return s->clkm.dsp_idlect2; - case 0x14: /* DSP_RSTCT2 */ + case 0x14: /* DSP_RSTCT2 */ return s->clkm.dsp_rstct2; - case 0x18: /* DSP_SYSST */ + case 0x18: /* DSP_SYSST */ return (s->clkm.clocking_scheme << 11) | s->clkm.cold_start | (cpu->halted << 6); /* Quite useless... */ } @@ -1790,7 +1790,7 @@ static inline void omap_clkdsp_idlect1_update(struct omap_mpu_state_s *s, { omap_clk clk; - SET_CANIDLE("dspxor_ck", 1); /* IDLXORP_DSP */ + SET_CANIDLE("dspxor_ck", 1); /* IDLXORP_DSP */ } static inline void omap_clkdsp_idlect2_update(struct omap_mpu_state_s *s, @@ -1798,7 +1798,7 @@ static inline void omap_clkdsp_idlect2_update(struct omap_mpu_state_s *s, { omap_clk clk; - SET_ONOFF("dspxor_ck", 1); /* EN_XORPCK */ + SET_ONOFF("dspxor_ck", 1); /* EN_XORPCK */ } static void omap_clkdsp_write(void *opaque, hwaddr addr, @@ -1813,23 +1813,23 @@ static void omap_clkdsp_write(void *opaque, hwaddr addr, } switch (addr) { - case 0x04: /* DSP_IDLECT1 */ + case 0x04: /* DSP_IDLECT1 */ diff = s->clkm.dsp_idlect1 ^ value; s->clkm.dsp_idlect1 = value & 0x01f7; omap_clkdsp_idlect1_update(s, diff, value); break; - case 0x08: /* DSP_IDLECT2 */ + case 0x08: /* DSP_IDLECT2 */ s->clkm.dsp_idlect2 = value & 0x0037; diff = s->clkm.dsp_idlect1 ^ value; omap_clkdsp_idlect2_update(s, diff, value); break; - case 0x14: /* DSP_RSTCT2 */ + case 0x14: /* DSP_RSTCT2 */ s->clkm.dsp_rstct2 = value & 0x0001; break; - case 0x18: /* DSP_SYSST */ + case 0x18: /* DSP_SYSST */ s->clkm.cold_start &= value & 0x3f; break; @@ -1928,8 +1928,8 @@ static void omap_mpuio_set(void *opaque, int line, int level) qemu_irq_raise(s->irq); /* TODO: wakeup */ } - if ((s->event & (1 << 0)) && /* SET_GPIO_EVENT_MODE */ - (s->event >> 1) == line) /* PIN_SELECT */ + if ((s->event & (1 << 0)) && /* SET_GPIO_EVENT_MODE */ + (s->event >> 1) == line) /* PIN_SELECT */ s->latch = s->inputs; } } @@ -1959,47 +1959,47 @@ static uint64_t omap_mpuio_read(void *opaque, hwaddr addr, } switch (offset) { - case 0x00: /* INPUT_LATCH */ + case 0x00: /* INPUT_LATCH */ return s->inputs; - case 0x04: /* OUTPUT_REG */ + case 0x04: /* OUTPUT_REG */ return s->outputs; - case 0x08: /* IO_CNTL */ + case 0x08: /* IO_CNTL */ return s->dir; - case 0x10: /* KBR_LATCH */ + case 0x10: /* KBR_LATCH */ return s->row_latch; - case 0x14: /* KBC_REG */ + case 0x14: /* KBC_REG */ return s->cols; - case 0x18: /* GPIO_EVENT_MODE_REG */ + case 0x18: /* GPIO_EVENT_MODE_REG */ return s->event; - case 0x1c: /* GPIO_INT_EDGE_REG */ + case 0x1c: /* GPIO_INT_EDGE_REG */ return s->edge; - case 0x20: /* KBD_INT */ + case 0x20: /* KBD_INT */ return (~s->row_latch & 0x1f) && !s->kbd_mask; - case 0x24: /* GPIO_INT */ + case 0x24: /* GPIO_INT */ ret = s->ints; s->ints &= s->mask; if (ret) qemu_irq_lower(s->irq); return ret; - case 0x28: /* KBD_MASKIT */ + case 0x28: /* KBD_MASKIT */ return s->kbd_mask; - case 0x2c: /* GPIO_MASKIT */ + case 0x2c: /* GPIO_MASKIT */ return s->mask; - case 0x30: /* GPIO_DEBOUNCING_REG */ + case 0x30: /* GPIO_DEBOUNCING_REG */ return s->debounce; - case 0x34: /* GPIO_LATCH_REG */ + case 0x34: /* GPIO_LATCH_REG */ return s->latch; } @@ -2021,7 +2021,7 @@ static void omap_mpuio_write(void *opaque, hwaddr addr, } switch (offset) { - case 0x04: /* OUTPUT_REG */ + case 0x04: /* OUTPUT_REG */ diff = (s->outputs ^ value) & ~s->dir; s->outputs = value; while ((ln = ctz32(diff)) != 32) { @@ -2031,7 +2031,7 @@ static void omap_mpuio_write(void *opaque, hwaddr addr, } break; - case 0x08: /* IO_CNTL */ + case 0x08: /* IO_CNTL */ diff = s->outputs & (s->dir ^ value); s->dir = value; @@ -2043,37 +2043,37 @@ static void omap_mpuio_write(void *opaque, hwaddr addr, } break; - case 0x14: /* KBC_REG */ + case 0x14: /* KBC_REG */ s->cols = value; omap_mpuio_kbd_update(s); break; - case 0x18: /* GPIO_EVENT_MODE_REG */ + case 0x18: /* GPIO_EVENT_MODE_REG */ s->event = value & 0x1f; break; - case 0x1c: /* GPIO_INT_EDGE_REG */ + case 0x1c: /* GPIO_INT_EDGE_REG */ s->edge = value; break; - case 0x28: /* KBD_MASKIT */ + case 0x28: /* KBD_MASKIT */ s->kbd_mask = value & 1; omap_mpuio_kbd_update(s); break; - case 0x2c: /* GPIO_MASKIT */ + case 0x2c: /* GPIO_MASKIT */ s->mask = value; break; - case 0x30: /* GPIO_DEBOUNCING_REG */ + case 0x30: /* GPIO_DEBOUNCING_REG */ s->debounce = value & 0x1ff; break; - case 0x00: /* INPUT_LATCH */ - case 0x10: /* KBR_LATCH */ - case 0x20: /* KBD_INT */ - case 0x24: /* GPIO_INT */ - case 0x34: /* GPIO_LATCH_REG */ + case 0x00: /* INPUT_LATCH */ + case 0x10: /* KBR_LATCH */ + case 0x20: /* KBD_INT */ + case 0x24: /* GPIO_INT */ + case 0x34: /* GPIO_LATCH_REG */ OMAP_RO_REG(addr); return; @@ -2176,24 +2176,24 @@ struct omap_uwire_s { static void omap_uwire_transfer_start(struct omap_uwire_s *s) { - int chipselect = (s->control >> 10) & 3; /* INDEX */ + int chipselect = (s->control >> 10) & 3; /* INDEX */ - if ((s->control >> 5) & 0x1f) { /* NB_BITS_WR */ + if ((s->control >> 5) & 0x1f) { /* NB_BITS_WR */ if (s->control & (1 << 12)) { /* CS_CMD */ qemu_log_mask(LOG_UNIMP, "uWireSlave TX CS:%d data:0x%04x\n", chipselect, s->txbuf >> (16 - ((s->control >> 5) & 0x1f))); } - s->control &= ~(1 << 14); /* CSRB */ + s->control &= ~(1 << 14); /* CSRB */ /* TODO: depending on s->setup[4] bits [1:0] assert an IRQ or * a DRQ. When is the level IRQ supposed to be reset? */ } - if ((s->control >> 0) & 0x1f) { /* NB_BITS_RD */ + if ((s->control >> 0) & 0x1f) { /* NB_BITS_RD */ if (s->control & (1 << 12)) { /* CS_CMD */ qemu_log_mask(LOG_UNIMP, "uWireSlave RX CS:%d\n", chipselect); } - s->control |= 1 << 15; /* RDRB */ + s->control |= 1 << 15; /* RDRB */ /* TODO: depending on s->setup[4] bits [1:0] assert an IRQ or * a DRQ. When is the level IRQ supposed to be reset? */ } @@ -2209,22 +2209,22 @@ static uint64_t omap_uwire_read(void *opaque, hwaddr addr, unsigned size) } switch (offset) { - case 0x00: /* RDR */ - s->control &= ~(1 << 15); /* RDRB */ + case 0x00: /* RDR */ + s->control &= ~(1 << 15); /* RDRB */ return s->rxbuf; - case 0x04: /* CSR */ + case 0x04: /* CSR */ return s->control; - case 0x08: /* SR1 */ + case 0x08: /* SR1 */ return s->setup[0]; - case 0x0c: /* SR2 */ + case 0x0c: /* SR2 */ return s->setup[1]; - case 0x10: /* SR3 */ + case 0x10: /* SR3 */ return s->setup[2]; - case 0x14: /* SR4 */ + case 0x14: /* SR4 */ return s->setup[3]; - case 0x18: /* SR5 */ + case 0x18: /* SR5 */ return s->setup[4]; } @@ -2244,39 +2244,39 @@ static void omap_uwire_write(void *opaque, hwaddr addr, } switch (offset) { - case 0x00: /* TDR */ - s->txbuf = value; /* TD */ - if ((s->setup[4] & (1 << 2)) && /* AUTO_TX_EN */ - ((s->setup[4] & (1 << 3)) || /* CS_TOGGLE_TX_EN */ - (s->control & (1 << 12)))) { /* CS_CMD */ - s->control |= 1 << 14; /* CSRB */ + case 0x00: /* TDR */ + s->txbuf = value; /* TD */ + if ((s->setup[4] & (1 << 2)) && /* AUTO_TX_EN */ + ((s->setup[4] & (1 << 3)) || /* CS_TOGGLE_TX_EN */ + (s->control & (1 << 12)))) { /* CS_CMD */ + s->control |= 1 << 14; /* CSRB */ omap_uwire_transfer_start(s); } break; - case 0x04: /* CSR */ + case 0x04: /* CSR */ s->control = value & 0x1fff; - if (value & (1 << 13)) /* START */ + if (value & (1 << 13)) /* START */ omap_uwire_transfer_start(s); break; - case 0x08: /* SR1 */ + case 0x08: /* SR1 */ s->setup[0] = value & 0x003f; break; - case 0x0c: /* SR2 */ + case 0x0c: /* SR2 */ s->setup[1] = value & 0x0fc0; break; - case 0x10: /* SR3 */ + case 0x10: /* SR3 */ s->setup[2] = value & 0x0003; break; - case 0x14: /* SR4 */ + case 0x14: /* SR4 */ s->setup[3] = value & 0x0001; break; - case 0x18: /* SR5 */ + case 0x18: /* SR5 */ s->setup[4] = value & 0x000f; break; @@ -2350,9 +2350,9 @@ static uint64_t omap_pwl_read(void *opaque, hwaddr addr, unsigned size) } switch (offset) { - case 0x00: /* PWL_LEVEL */ + case 0x00: /* PWL_LEVEL */ return s->level; - case 0x04: /* PWL_CTRL */ + case 0x04: /* PWL_CTRL */ return s->enable; } OMAP_BAD_REG(addr); @@ -2371,11 +2371,11 @@ static void omap_pwl_write(void *opaque, hwaddr addr, } switch (offset) { - case 0x00: /* PWL_LEVEL */ + case 0x00: /* PWL_LEVEL */ s->level = value; omap_pwl_update(s); break; - case 0x04: /* PWL_CTRL */ + case 0x04: /* PWL_CTRL */ s->enable = value & 1; omap_pwl_update(s); break; @@ -2443,11 +2443,11 @@ static uint64_t omap_pwt_read(void *opaque, hwaddr addr, unsigned size) } switch (offset) { - case 0x00: /* FRC */ + case 0x00: /* FRC */ return s->frc; - case 0x04: /* VCR */ + case 0x04: /* VCR */ return s->vrc; - case 0x08: /* GCR */ + case 0x08: /* GCR */ return s->gcr; } OMAP_BAD_REG(addr); @@ -2466,10 +2466,10 @@ static void omap_pwt_write(void *opaque, hwaddr addr, } switch (offset) { - case 0x00: /* FRC */ + case 0x00: /* FRC */ s->frc = value & 0x3f; break; - case 0x04: /* VRC */ + case 0x04: /* VRC */ if ((value ^ s->vrc) & 1) { if (value & 1) { trace_omap1_pwt_buzz( @@ -2494,7 +2494,7 @@ static void omap_pwt_write(void *opaque, hwaddr addr, } s->vrc = value & 0x7f; break; - case 0x08: /* GCR */ + case 0x08: /* GCR */ s->gcr = value & 3; break; default: @@ -2577,69 +2577,69 @@ static uint64_t omap_rtc_read(void *opaque, hwaddr addr, unsigned size) } switch (offset) { - case 0x00: /* SECONDS_REG */ + case 0x00: /* SECONDS_REG */ return to_bcd(s->current_tm.tm_sec); - case 0x04: /* MINUTES_REG */ + case 0x04: /* MINUTES_REG */ return to_bcd(s->current_tm.tm_min); - case 0x08: /* HOURS_REG */ + case 0x08: /* HOURS_REG */ if (s->pm_am) return ((s->current_tm.tm_hour > 11) << 7) | to_bcd(((s->current_tm.tm_hour - 1) % 12) + 1); else return to_bcd(s->current_tm.tm_hour); - case 0x0c: /* DAYS_REG */ + case 0x0c: /* DAYS_REG */ return to_bcd(s->current_tm.tm_mday); - case 0x10: /* MONTHS_REG */ + case 0x10: /* MONTHS_REG */ return to_bcd(s->current_tm.tm_mon + 1); - case 0x14: /* YEARS_REG */ + case 0x14: /* YEARS_REG */ return to_bcd(s->current_tm.tm_year % 100); - case 0x18: /* WEEK_REG */ + case 0x18: /* WEEK_REG */ return s->current_tm.tm_wday; - case 0x20: /* ALARM_SECONDS_REG */ + case 0x20: /* ALARM_SECONDS_REG */ return to_bcd(s->alarm_tm.tm_sec); - case 0x24: /* ALARM_MINUTES_REG */ + case 0x24: /* ALARM_MINUTES_REG */ return to_bcd(s->alarm_tm.tm_min); - case 0x28: /* ALARM_HOURS_REG */ + case 0x28: /* ALARM_HOURS_REG */ if (s->pm_am) return ((s->alarm_tm.tm_hour > 11) << 7) | to_bcd(((s->alarm_tm.tm_hour - 1) % 12) + 1); else return to_bcd(s->alarm_tm.tm_hour); - case 0x2c: /* ALARM_DAYS_REG */ + case 0x2c: /* ALARM_DAYS_REG */ return to_bcd(s->alarm_tm.tm_mday); - case 0x30: /* ALARM_MONTHS_REG */ + case 0x30: /* ALARM_MONTHS_REG */ return to_bcd(s->alarm_tm.tm_mon + 1); - case 0x34: /* ALARM_YEARS_REG */ + case 0x34: /* ALARM_YEARS_REG */ return to_bcd(s->alarm_tm.tm_year % 100); - case 0x40: /* RTC_CTRL_REG */ + case 0x40: /* RTC_CTRL_REG */ return (s->pm_am << 3) | (s->auto_comp << 2) | (s->round << 1) | s->running; - case 0x44: /* RTC_STATUS_REG */ + case 0x44: /* RTC_STATUS_REG */ i = s->status; s->status &= ~0x3d; return i; - case 0x48: /* RTC_INTERRUPTS_REG */ + case 0x48: /* RTC_INTERRUPTS_REG */ return s->interrupts; - case 0x4c: /* RTC_COMP_LSB_REG */ + case 0x4c: /* RTC_COMP_LSB_REG */ return ((uint16_t) s->comp_reg) & 0xff; - case 0x50: /* RTC_COMP_MSB_REG */ + case 0x50: /* RTC_COMP_MSB_REG */ return ((uint16_t) s->comp_reg) >> 8; } @@ -2661,17 +2661,17 @@ static void omap_rtc_write(void *opaque, hwaddr addr, } switch (offset) { - case 0x00: /* SECONDS_REG */ + case 0x00: /* SECONDS_REG */ s->ti -= s->current_tm.tm_sec; s->ti += from_bcd(value); return; - case 0x04: /* MINUTES_REG */ + case 0x04: /* MINUTES_REG */ s->ti -= s->current_tm.tm_min * 60; s->ti += from_bcd(value) * 60; return; - case 0x08: /* HOURS_REG */ + case 0x08: /* HOURS_REG */ s->ti -= s->current_tm.tm_hour * 3600; if (s->pm_am) { s->ti += (from_bcd(value & 0x3f) & 12) * 3600; @@ -2680,12 +2680,12 @@ static void omap_rtc_write(void *opaque, hwaddr addr, s->ti += from_bcd(value & 0x3f) * 3600; return; - case 0x0c: /* DAYS_REG */ + case 0x0c: /* DAYS_REG */ s->ti -= s->current_tm.tm_mday * 86400; s->ti += from_bcd(value) * 86400; return; - case 0x10: /* MONTHS_REG */ + case 0x10: /* MONTHS_REG */ memcpy(&new_tm, &s->current_tm, sizeof(new_tm)); new_tm.tm_mon = from_bcd(value); ti[0] = mktimegm(&s->current_tm); @@ -2701,7 +2701,7 @@ static void omap_rtc_write(void *opaque, hwaddr addr, } return; - case 0x14: /* YEARS_REG */ + case 0x14: /* YEARS_REG */ memcpy(&new_tm, &s->current_tm, sizeof(new_tm)); new_tm.tm_year += from_bcd(value) - (new_tm.tm_year % 100); ti[0] = mktimegm(&s->current_tm); @@ -2717,20 +2717,20 @@ static void omap_rtc_write(void *opaque, hwaddr addr, } return; - case 0x18: /* WEEK_REG */ - return; /* Ignored */ + case 0x18: /* WEEK_REG */ + return; /* Ignored */ - case 0x20: /* ALARM_SECONDS_REG */ + case 0x20: /* ALARM_SECONDS_REG */ s->alarm_tm.tm_sec = from_bcd(value); omap_rtc_alarm_update(s); return; - case 0x24: /* ALARM_MINUTES_REG */ + case 0x24: /* ALARM_MINUTES_REG */ s->alarm_tm.tm_min = from_bcd(value); omap_rtc_alarm_update(s); return; - case 0x28: /* ALARM_HOURS_REG */ + case 0x28: /* ALARM_HOURS_REG */ if (s->pm_am) s->alarm_tm.tm_hour = ((from_bcd(value & 0x3f)) % 12) + @@ -2740,22 +2740,22 @@ static void omap_rtc_write(void *opaque, hwaddr addr, omap_rtc_alarm_update(s); return; - case 0x2c: /* ALARM_DAYS_REG */ + case 0x2c: /* ALARM_DAYS_REG */ s->alarm_tm.tm_mday = from_bcd(value); omap_rtc_alarm_update(s); return; - case 0x30: /* ALARM_MONTHS_REG */ + case 0x30: /* ALARM_MONTHS_REG */ s->alarm_tm.tm_mon = from_bcd(value); omap_rtc_alarm_update(s); return; - case 0x34: /* ALARM_YEARS_REG */ + case 0x34: /* ALARM_YEARS_REG */ s->alarm_tm.tm_year = from_bcd(value); omap_rtc_alarm_update(s); return; - case 0x40: /* RTC_CTRL_REG */ + case 0x40: /* RTC_CTRL_REG */ s->pm_am = (value >> 3) & 1; s->auto_comp = (value >> 2) & 1; s->round = (value >> 1) & 1; @@ -2764,21 +2764,21 @@ static void omap_rtc_write(void *opaque, hwaddr addr, s->status |= s->running << 1; return; - case 0x44: /* RTC_STATUS_REG */ + case 0x44: /* RTC_STATUS_REG */ s->status &= ~((value & 0xc0) ^ 0x80); omap_rtc_interrupts_update(s); return; - case 0x48: /* RTC_INTERRUPTS_REG */ + case 0x48: /* RTC_INTERRUPTS_REG */ s->interrupts = value; return; - case 0x4c: /* RTC_COMP_LSB_REG */ + case 0x4c: /* RTC_COMP_LSB_REG */ s->comp_reg &= 0xff00; s->comp_reg |= 0x00ff & value; return; - case 0x50: /* RTC_COMP_MSB_REG */ + case 0x50: /* RTC_COMP_MSB_REG */ s->comp_reg &= 0x00ff; s->comp_reg |= 0xff00 & (value << 8); return; @@ -2929,12 +2929,12 @@ static void omap_mcbsp_intr_update(struct omap_mcbsp_s *s) { int irq; - switch ((s->spcr[0] >> 4) & 3) { /* RINTM */ + switch ((s->spcr[0] >> 4) & 3) { /* RINTM */ case 0: - irq = (s->spcr[0] >> 1) & 1; /* RRDY */ + irq = (s->spcr[0] >> 1) & 1; /* RRDY */ break; case 3: - irq = (s->spcr[0] >> 3) & 1; /* RSYNCERR */ + irq = (s->spcr[0] >> 3) & 1; /* RSYNCERR */ break; default: irq = 0; @@ -2944,12 +2944,12 @@ static void omap_mcbsp_intr_update(struct omap_mcbsp_s *s) if (irq) qemu_irq_pulse(s->rxirq); - switch ((s->spcr[1] >> 4) & 3) { /* XINTM */ + switch ((s->spcr[1] >> 4) & 3) { /* XINTM */ case 0: - irq = (s->spcr[1] >> 1) & 1; /* XRDY */ + irq = (s->spcr[1] >> 1) & 1; /* XRDY */ break; case 3: - irq = (s->spcr[1] >> 3) & 1; /* XSYNCERR */ + irq = (s->spcr[1] >> 3) & 1; /* XSYNCERR */ break; default: irq = 0; @@ -2962,9 +2962,9 @@ static void omap_mcbsp_intr_update(struct omap_mcbsp_s *s) static void omap_mcbsp_rx_newdata(struct omap_mcbsp_s *s) { - if ((s->spcr[0] >> 1) & 1) /* RRDY */ - s->spcr[0] |= 1 << 2; /* RFULL */ - s->spcr[0] |= 1 << 1; /* RRDY */ + if ((s->spcr[0] >> 1) & 1) /* RRDY */ + s->spcr[0] |= 1 << 2; /* RFULL */ + s->spcr[0] |= 1 << 1; /* RRDY */ qemu_irq_raise(s->rxdrq); omap_mcbsp_intr_update(s); } @@ -3004,14 +3004,14 @@ static void omap_mcbsp_rx_stop(struct omap_mcbsp_s *s) static void omap_mcbsp_rx_done(struct omap_mcbsp_s *s) { - s->spcr[0] &= ~(1 << 1); /* RRDY */ + s->spcr[0] &= ~(1 << 1); /* RRDY */ qemu_irq_lower(s->rxdrq); omap_mcbsp_intr_update(s); } static void omap_mcbsp_tx_newdata(struct omap_mcbsp_s *s) { - s->spcr[1] |= 1 << 1; /* XRDY */ + s->spcr[1] |= 1 << 1; /* XRDY */ qemu_irq_raise(s->txdrq); omap_mcbsp_intr_update(s); } @@ -3046,7 +3046,7 @@ static void omap_mcbsp_tx_start(struct omap_mcbsp_s *s) static void omap_mcbsp_tx_done(struct omap_mcbsp_s *s) { - s->spcr[1] &= ~(1 << 1); /* XRDY */ + s->spcr[1] &= ~(1 << 1); /* XRDY */ qemu_irq_lower(s->txdrq); omap_mcbsp_intr_update(s); if (s->codec && s->codec->cts) @@ -3064,27 +3064,27 @@ static void omap_mcbsp_req_update(struct omap_mcbsp_s *s) { int prev_rx_rate, prev_tx_rate; int rx_rate = 0, tx_rate = 0; - int cpu_rate = 1500000; /* XXX */ + int cpu_rate = 1500000; /* XXX */ /* TODO: check CLKSTP bit */ - if (s->spcr[1] & (1 << 6)) { /* GRST */ - if (s->spcr[0] & (1 << 0)) { /* RRST */ - if ((s->srgr[1] & (1 << 13)) && /* CLKSM */ - (s->pcr & (1 << 8))) { /* CLKRM */ - if (~s->pcr & (1 << 7)) /* SCLKME */ + if (s->spcr[1] & (1 << 6)) { /* GRST */ + if (s->spcr[0] & (1 << 0)) { /* RRST */ + if ((s->srgr[1] & (1 << 13)) && /* CLKSM */ + (s->pcr & (1 << 8))) { /* CLKRM */ + if (~s->pcr & (1 << 7)) /* SCLKME */ rx_rate = cpu_rate / - ((s->srgr[0] & 0xff) + 1); /* CLKGDV */ + ((s->srgr[0] & 0xff) + 1); /* CLKGDV */ } else if (s->codec) rx_rate = s->codec->rx_rate; } - if (s->spcr[1] & (1 << 0)) { /* XRST */ - if ((s->srgr[1] & (1 << 13)) && /* CLKSM */ - (s->pcr & (1 << 9))) { /* CLKXM */ - if (~s->pcr & (1 << 7)) /* SCLKME */ + if (s->spcr[1] & (1 << 0)) { /* XRST */ + if ((s->srgr[1] & (1 << 13)) && /* CLKSM */ + (s->pcr & (1 << 9))) { /* CLKXM */ + if (~s->pcr & (1 << 7)) /* SCLKME */ tx_rate = cpu_rate / - ((s->srgr[0] & 0xff) + 1); /* CLKGDV */ + ((s->srgr[0] & 0xff) + 1); /* CLKGDV */ } else if (s->codec) tx_rate = s->codec->tx_rate; @@ -3121,11 +3121,11 @@ static uint64_t omap_mcbsp_read(void *opaque, hwaddr addr, } switch (offset) { - case 0x00: /* DRR2 */ - if (((s->rcr[0] >> 5) & 7) < 3) /* RWDLEN1 */ + case 0x00: /* DRR2 */ + if (((s->rcr[0] >> 5) & 7) < 3) /* RWDLEN1 */ return 0x0000; /* Fall through. */ - case 0x02: /* DRR1 */ + case 0x02: /* DRR1 */ if (s->rx_req < 2) { qemu_log_mask(LOG_GUEST_ERROR, "%s: Rx FIFO underrun\n", __func__); omap_mcbsp_rx_done(s); @@ -3143,63 +3143,63 @@ static uint64_t omap_mcbsp_read(void *opaque, hwaddr addr, } return 0x0000; - case 0x04: /* DXR2 */ - case 0x06: /* DXR1 */ + case 0x04: /* DXR2 */ + case 0x06: /* DXR1 */ return 0x0000; - case 0x08: /* SPCR2 */ + case 0x08: /* SPCR2 */ return s->spcr[1]; - case 0x0a: /* SPCR1 */ + case 0x0a: /* SPCR1 */ return s->spcr[0]; - case 0x0c: /* RCR2 */ + case 0x0c: /* RCR2 */ return s->rcr[1]; - case 0x0e: /* RCR1 */ + case 0x0e: /* RCR1 */ return s->rcr[0]; - case 0x10: /* XCR2 */ + case 0x10: /* XCR2 */ return s->xcr[1]; - case 0x12: /* XCR1 */ + case 0x12: /* XCR1 */ return s->xcr[0]; - case 0x14: /* SRGR2 */ + case 0x14: /* SRGR2 */ return s->srgr[1]; - case 0x16: /* SRGR1 */ + case 0x16: /* SRGR1 */ return s->srgr[0]; - case 0x18: /* MCR2 */ + case 0x18: /* MCR2 */ return s->mcr[1]; - case 0x1a: /* MCR1 */ + case 0x1a: /* MCR1 */ return s->mcr[0]; - case 0x1c: /* RCERA */ + case 0x1c: /* RCERA */ return s->rcer[0]; - case 0x1e: /* RCERB */ + case 0x1e: /* RCERB */ return s->rcer[1]; - case 0x20: /* XCERA */ + case 0x20: /* XCERA */ return s->xcer[0]; - case 0x22: /* XCERB */ + case 0x22: /* XCERB */ return s->xcer[1]; - case 0x24: /* PCR0 */ + case 0x24: /* PCR0 */ return s->pcr; - case 0x26: /* RCERC */ + case 0x26: /* RCERC */ return s->rcer[2]; - case 0x28: /* RCERD */ + case 0x28: /* RCERD */ return s->rcer[3]; - case 0x2a: /* XCERC */ + case 0x2a: /* XCERC */ return s->xcer[2]; - case 0x2c: /* XCERD */ + case 0x2c: /* XCERD */ return s->xcer[3]; - case 0x2e: /* RCERE */ + case 0x2e: /* RCERE */ return s->rcer[4]; - case 0x30: /* RCERF */ + case 0x30: /* RCERF */ return s->rcer[5]; - case 0x32: /* XCERE */ + case 0x32: /* XCERE */ return s->xcer[4]; - case 0x34: /* XCERF */ + case 0x34: /* XCERF */ return s->xcer[5]; - case 0x36: /* RCERG */ + case 0x36: /* RCERG */ return s->rcer[6]; - case 0x38: /* RCERH */ + case 0x38: /* RCERH */ return s->rcer[7]; - case 0x3a: /* XCERG */ + case 0x3a: /* XCERG */ return s->xcer[6]; - case 0x3c: /* XCERH */ + case 0x3c: /* XCERH */ return s->xcer[7]; } @@ -3214,16 +3214,16 @@ static void omap_mcbsp_writeh(void *opaque, hwaddr addr, int offset = addr & OMAP_MPUI_REG_MASK; switch (offset) { - case 0x00: /* DRR2 */ - case 0x02: /* DRR1 */ + case 0x00: /* DRR2 */ + case 0x02: /* DRR1 */ OMAP_RO_REG(addr); return; - case 0x04: /* DXR2 */ - if (((s->xcr[0] >> 5) & 7) < 3) /* XWDLEN1 */ + case 0x04: /* DXR2 */ + if (((s->xcr[0] >> 5) & 7) < 3) /* XWDLEN1 */ return; /* Fall through. */ - case 0x06: /* DXR1 */ + case 0x06: /* DXR1 */ if (s->tx_req > 1) { s->tx_req -= 2; if (s->codec && s->codec->cts) { @@ -3237,15 +3237,15 @@ static void omap_mcbsp_writeh(void *opaque, hwaddr addr, } return; - case 0x08: /* SPCR2 */ + case 0x08: /* SPCR2 */ s->spcr[1] &= 0x0002; s->spcr[1] |= 0x03f9 & value; - s->spcr[1] |= 0x0004 & (value << 2); /* XEMPTY := XRST */ - if (~value & 1) /* XRST */ + s->spcr[1] |= 0x0004 & (value << 2); /* XEMPTY := XRST */ + if (~value & 1) /* XRST */ s->spcr[1] &= ~6; omap_mcbsp_req_update(s); return; - case 0x0a: /* SPCR1 */ + case 0x0a: /* SPCR1 */ s->spcr[0] &= 0x0006; s->spcr[0] |= 0xf8f9 & value; if (value & (1 << 15)) { /* DLB */ @@ -3253,7 +3253,7 @@ static void omap_mcbsp_writeh(void *opaque, hwaddr addr, "%s: Digital Loopback mode enable attempt\n", __func__); } - if (~value & 1) { /* RRST */ + if (~value & 1) { /* RRST */ s->spcr[0] &= ~6; s->rx_req = 0; omap_mcbsp_rx_done(s); @@ -3261,27 +3261,27 @@ static void omap_mcbsp_writeh(void *opaque, hwaddr addr, omap_mcbsp_req_update(s); return; - case 0x0c: /* RCR2 */ + case 0x0c: /* RCR2 */ s->rcr[1] = value & 0xffff; return; - case 0x0e: /* RCR1 */ + case 0x0e: /* RCR1 */ s->rcr[0] = value & 0x7fe0; return; - case 0x10: /* XCR2 */ + case 0x10: /* XCR2 */ s->xcr[1] = value & 0xffff; return; - case 0x12: /* XCR1 */ + case 0x12: /* XCR1 */ s->xcr[0] = value & 0x7fe0; return; - case 0x14: /* SRGR2 */ + case 0x14: /* SRGR2 */ s->srgr[1] = value & 0xffff; omap_mcbsp_req_update(s); return; - case 0x16: /* SRGR1 */ + case 0x16: /* SRGR1 */ s->srgr[0] = value & 0xffff; omap_mcbsp_req_update(s); return; - case 0x18: /* MCR2 */ + case 0x18: /* MCR2 */ s->mcr[1] = value & 0x03e3; if (value & 3) { /* XMCM */ qemu_log_mask(LOG_UNIMP, @@ -3289,7 +3289,7 @@ static void omap_mcbsp_writeh(void *opaque, hwaddr addr, __func__); } return; - case 0x1a: /* MCR1 */ + case 0x1a: /* MCR1 */ s->mcr[0] = value & 0x03e1; if (value & 1) { /* RMCM */ qemu_log_mask(LOG_UNIMP, @@ -3297,55 +3297,55 @@ static void omap_mcbsp_writeh(void *opaque, hwaddr addr, __func__); } return; - case 0x1c: /* RCERA */ + case 0x1c: /* RCERA */ s->rcer[0] = value & 0xffff; return; - case 0x1e: /* RCERB */ + case 0x1e: /* RCERB */ s->rcer[1] = value & 0xffff; return; - case 0x20: /* XCERA */ + case 0x20: /* XCERA */ s->xcer[0] = value & 0xffff; return; - case 0x22: /* XCERB */ + case 0x22: /* XCERB */ s->xcer[1] = value & 0xffff; return; - case 0x24: /* PCR0 */ + case 0x24: /* PCR0 */ s->pcr = value & 0x7faf; return; - case 0x26: /* RCERC */ + case 0x26: /* RCERC */ s->rcer[2] = value & 0xffff; return; - case 0x28: /* RCERD */ + case 0x28: /* RCERD */ s->rcer[3] = value & 0xffff; return; - case 0x2a: /* XCERC */ + case 0x2a: /* XCERC */ s->xcer[2] = value & 0xffff; return; - case 0x2c: /* XCERD */ + case 0x2c: /* XCERD */ s->xcer[3] = value & 0xffff; return; - case 0x2e: /* RCERE */ + case 0x2e: /* RCERE */ s->rcer[4] = value & 0xffff; return; - case 0x30: /* RCERF */ + case 0x30: /* RCERF */ s->rcer[5] = value & 0xffff; return; - case 0x32: /* XCERE */ + case 0x32: /* XCERE */ s->xcer[4] = value & 0xffff; return; - case 0x34: /* XCERF */ + case 0x34: /* XCERF */ s->xcer[5] = value & 0xffff; return; - case 0x36: /* RCERG */ + case 0x36: /* RCERG */ s->rcer[6] = value & 0xffff; return; - case 0x38: /* RCERH */ + case 0x38: /* RCERH */ s->rcer[7] = value & 0xffff; return; - case 0x3a: /* XCERG */ + case 0x3a: /* XCERG */ s->xcer[6] = value & 0xffff; return; - case 0x3c: /* XCERH */ + case 0x3c: /* XCERH */ s->xcer[7] = value & 0xffff; return; } @@ -3359,8 +3359,8 @@ static void omap_mcbsp_writew(void *opaque, hwaddr addr, struct omap_mcbsp_s *s = opaque; int offset = addr & OMAP_MPUI_REG_MASK; - if (offset == 0x04) { /* DXR */ - if (((s->xcr[0] >> 5) & 7) < 3) /* XWDLEN1 */ + if (offset == 0x04) { /* DXR */ + if (((s->xcr[0] >> 5) & 7) < 3) /* XWDLEN1 */ return; if (s->tx_req > 3) { s->tx_req -= 4; @@ -3504,15 +3504,15 @@ static void omap_lpg_update(struct omap_lpg_s *s) int64_t on, period = 1, ticks = 1000; static const int per[8] = { 1, 2, 4, 8, 12, 16, 20, 24 }; - if (~s->control & (1 << 6)) /* LPGRES */ + if (~s->control & (1 << 6)) /* LPGRES */ on = 0; - else if (s->control & (1 << 7)) /* PERM_ON */ + else if (s->control & (1 << 7)) /* PERM_ON */ on = period; else { - period = muldiv64(ticks, per[s->control & 7], /* PERCTRL */ + period = muldiv64(ticks, per[s->control & 7], /* PERCTRL */ 256 / 32); on = (s->clk && s->power) ? muldiv64(ticks, - per[(s->control >> 3) & 7], 256) : 0; /* ONCTRL */ + per[(s->control >> 3) & 7], 256) : 0; /* ONCTRL */ } timer_del(s->tm); @@ -3550,10 +3550,10 @@ static uint64_t omap_lpg_read(void *opaque, hwaddr addr, unsigned size) } switch (offset) { - case 0x00: /* LCR */ + case 0x00: /* LCR */ return s->control; - case 0x04: /* PMR */ + case 0x04: /* PMR */ return s->power; } @@ -3573,14 +3573,14 @@ static void omap_lpg_write(void *opaque, hwaddr addr, } switch (offset) { - case 0x00: /* LCR */ - if (~value & (1 << 6)) /* LPGRES */ + case 0x00: /* LCR */ + if (~value & (1 << 6)) /* LPGRES */ omap_lpg_reset(s); s->control = value & 0xff; omap_lpg_update(s); return; - case 0x04: /* PMR */ + case 0x04: /* PMR */ s->power = value & 0x01; omap_lpg_update(s); return; @@ -3630,7 +3630,7 @@ static uint64_t omap_mpui_io_read(void *opaque, hwaddr addr, return omap_badwidth_read16(opaque, addr); } - if (addr == OMAP_MPUI_BASE) /* CMR */ + if (addr == OMAP_MPUI_BASE) /* CMR */ return 0xfe4d; OMAP_BAD_REG(addr); @@ -3703,25 +3703,25 @@ static const struct omap_map_s { const char *name; } omap15xx_dsp_mm[] = { /* Strobe 0 */ - { 0xe1010000, 0xfffb0000, 0x800, "UART1 BT" }, /* CS0 */ - { 0xe1010800, 0xfffb0800, 0x800, "UART2 COM" }, /* CS1 */ - { 0xe1011800, 0xfffb1800, 0x800, "McBSP1 audio" }, /* CS3 */ - { 0xe1012000, 0xfffb2000, 0x800, "MCSI2 communication" }, /* CS4 */ - { 0xe1012800, 0xfffb2800, 0x800, "MCSI1 BT u-Law" }, /* CS5 */ - { 0xe1013000, 0xfffb3000, 0x800, "uWire" }, /* CS6 */ - { 0xe1013800, 0xfffb3800, 0x800, "I^2C" }, /* CS7 */ - { 0xe1014000, 0xfffb4000, 0x800, "USB W2FC" }, /* CS8 */ - { 0xe1014800, 0xfffb4800, 0x800, "RTC" }, /* CS9 */ - { 0xe1015000, 0xfffb5000, 0x800, "MPUIO" }, /* CS10 */ - { 0xe1015800, 0xfffb5800, 0x800, "PWL" }, /* CS11 */ - { 0xe1016000, 0xfffb6000, 0x800, "PWT" }, /* CS12 */ - { 0xe1017000, 0xfffb7000, 0x800, "McBSP3" }, /* CS14 */ - { 0xe1017800, 0xfffb7800, 0x800, "MMC" }, /* CS15 */ - { 0xe1019000, 0xfffb9000, 0x800, "32-kHz timer" }, /* CS18 */ - { 0xe1019800, 0xfffb9800, 0x800, "UART3" }, /* CS19 */ - { 0xe101c800, 0xfffbc800, 0x800, "TIPB switches" }, /* CS25 */ + { 0xe1010000, 0xfffb0000, 0x800, "UART1 BT" }, /* CS0 */ + { 0xe1010800, 0xfffb0800, 0x800, "UART2 COM" }, /* CS1 */ + { 0xe1011800, 0xfffb1800, 0x800, "McBSP1 audio" }, /* CS3 */ + { 0xe1012000, 0xfffb2000, 0x800, "MCSI2 communication" }, /* CS4 */ + { 0xe1012800, 0xfffb2800, 0x800, "MCSI1 BT u-Law" }, /* CS5 */ + { 0xe1013000, 0xfffb3000, 0x800, "uWire" }, /* CS6 */ + { 0xe1013800, 0xfffb3800, 0x800, "I^2C" }, /* CS7 */ + { 0xe1014000, 0xfffb4000, 0x800, "USB W2FC" }, /* CS8 */ + { 0xe1014800, 0xfffb4800, 0x800, "RTC" }, /* CS9 */ + { 0xe1015000, 0xfffb5000, 0x800, "MPUIO" }, /* CS10 */ + { 0xe1015800, 0xfffb5800, 0x800, "PWL" }, /* CS11 */ + { 0xe1016000, 0xfffb6000, 0x800, "PWT" }, /* CS12 */ + { 0xe1017000, 0xfffb7000, 0x800, "McBSP3" }, /* CS14 */ + { 0xe1017800, 0xfffb7800, 0x800, "MMC" }, /* CS15 */ + { 0xe1019000, 0xfffb9000, 0x800, "32-kHz timer" }, /* CS18 */ + { 0xe1019800, 0xfffb9800, 0x800, "UART3" }, /* CS19 */ + { 0xe101c800, 0xfffbc800, 0x800, "TIPB switches" }, /* CS25 */ /* Strobe 1 */ - { 0xe101e000, 0xfffce000, 0x800, "GPIOs" }, /* CS28 */ + { 0xe101e000, 0xfffce000, 0x800, "GPIOs" }, /* CS28 */ { 0 } }; @@ -4025,18 +4025,18 @@ struct omap_mpu_state_s *omap310_mpu_init(MemoryRegion *dram, 0xfffbd800, omap_findclk(s, "clk32-kHz")); /* Register mappings not currently implemented: - * MCSI2 Comm fffb2000 - fffb27ff (not mapped on OMAP310) - * MCSI1 Bluetooth fffb2800 - fffb2fff (not mapped on OMAP310) - * USB W2FC fffb4000 - fffb47ff - * Camera Interface fffb6800 - fffb6fff - * USB Host fffba000 - fffba7ff - * FAC fffba800 - fffbafff - * HDQ/1-Wire fffbc000 - fffbc7ff - * TIPB switches fffbc800 - fffbcfff - * Mailbox fffcf000 - fffcf7ff - * Local bus IF fffec100 - fffec1ff - * Local bus MMU fffec200 - fffec2ff - * DSP MMU fffed200 - fffed2ff + * MCSI2 Comm fffb2000 - fffb27ff (not mapped on OMAP310) + * MCSI1 Bluetooth fffb2800 - fffb2fff (not mapped on OMAP310) + * USB W2FC fffb4000 - fffb47ff + * Camera Interface fffb6800 - fffb6fff + * USB Host fffba000 - fffba7ff + * FAC fffba800 - fffbafff + * HDQ/1-Wire fffbc000 - fffbc7ff + * TIPB switches fffbc800 - fffbcfff + * Mailbox fffcf000 - fffcf7ff + * Local bus IF fffec100 - fffec1ff + * Local bus MMU fffec200 - fffec2ff + * DSP MMU fffed200 - fffed2ff */ omap_setup_dsp_mapping(system_memory, omap15xx_dsp_mm); diff --git a/hw/arm/omap_sx1.c b/hw/arm/omap_sx1.c index 1d89a20..5d4a31b 100644 --- a/hw/arm/omap_sx1.c +++ b/hw/arm/omap_sx1.c @@ -1,7 +1,7 @@ /* omap_sx1.c Support for the Siemens SX1 smartphone emulation. * * Copyright (C) 2008 - * Jean-Christophe PLAGNIOL-VILLARD <plagnioj@jcrosoft.com> + * Jean-Christophe PLAGNIOL-VILLARD <plagnioj@jcrosoft.com> * Copyright (C) 2007 Vladimir Ananiev <vovan888@gmail.com> * * based on PalmOne's (TM) PDAs support (palm.c) diff --git a/hw/arm/sbsa-ref.c b/hw/arm/sbsa-ref.c index deae5cf..15c1ff4 100644 --- a/hw/arm/sbsa-ref.c +++ b/hw/arm/sbsa-ref.c @@ -19,6 +19,7 @@ */ #include "qemu/osdep.h" +#include "qemu/cutils.h" #include "qemu/datadir.h" #include "qapi/error.h" #include "qemu/error-report.h" @@ -53,8 +54,7 @@ #include "target/arm/cpu-qom.h" #include "target/arm/gtimer.h" -#define RAMLIMIT_GB 8192 -#define RAMLIMIT_BYTES (RAMLIMIT_GB * GiB) +#define RAMLIMIT_BYTES (8 * TiB) #define NUM_IRQS 256 #define NUM_SMMU_IRQS 4 @@ -756,7 +756,9 @@ static void sbsa_ref_init(MachineState *machine) sms->smp_cpus = smp_cpus; if (machine->ram_size > sbsa_ref_memmap[SBSA_MEM].size) { - error_report("sbsa-ref: cannot model more than %dGB RAM", RAMLIMIT_GB); + char *size_str = size_to_str(RAMLIMIT_BYTES); + + error_report("sbsa-ref: cannot model more than %s of RAM", size_str); exit(1); } diff --git a/hw/arm/smmu-common.c b/hw/arm/smmu-common.c index f39b99e..0dcaf2f 100644 --- a/hw/arm/smmu-common.c +++ b/hw/arm/smmu-common.c @@ -319,7 +319,7 @@ void smmu_iotlb_inv_vmid(SMMUState *s, int vmid) g_hash_table_foreach_remove(s->iotlb, smmu_hash_remove_by_vmid, &vmid); } -inline void smmu_iotlb_inv_vmid_s1(SMMUState *s, int vmid) +void smmu_iotlb_inv_vmid_s1(SMMUState *s, int vmid) { trace_smmu_iotlb_inv_vmid_s1(vmid); g_hash_table_foreach_remove(s->iotlb, smmu_hash_remove_by_vmid_s1, &vmid); diff --git a/hw/arm/virt-acpi-build.c b/hw/arm/virt-acpi-build.c index 7e8e0f0..b01fc4f 100644 --- a/hw/arm/virt-acpi-build.c +++ b/hw/arm/virt-acpi-build.c @@ -34,15 +34,18 @@ #include "hw/core/cpu.h" #include "hw/acpi/acpi-defs.h" #include "hw/acpi/acpi.h" +#include "hw/acpi/pcihp.h" #include "hw/nvram/fw_cfg_acpi.h" #include "hw/acpi/bios-linker-loader.h" #include "hw/acpi/aml-build.h" #include "hw/acpi/utils.h" #include "hw/acpi/pci.h" +#include "hw/acpi/cxl.h" #include "hw/acpi/memory_hotplug.h" #include "hw/acpi/generic_event_device.h" #include "hw/acpi/tpm.h" #include "hw/acpi/hmat.h" +#include "hw/cxl/cxl.h" #include "hw/pci/pcie_host.h" #include "hw/pci/pci.h" #include "hw/pci/pci_bus.h" @@ -119,16 +122,44 @@ static void acpi_dsdt_add_flash(Aml *scope, const MemMapEntry *flash_memmap) aml_append(scope, dev); } +static void build_acpi0017(Aml *table) +{ + Aml *dev, *scope, *method; + + scope = aml_scope("_SB"); + dev = aml_device("CXLM"); + aml_append(dev, aml_name_decl("_HID", aml_string("ACPI0017"))); + + method = aml_method("_STA", 0, AML_NOTSERIALIZED); + aml_append(method, aml_return(aml_int(0x0B))); + aml_append(dev, method); + build_cxl_dsm_method(dev); + + aml_append(scope, dev); + aml_append(table, scope); +} + static void acpi_dsdt_add_pci(Aml *scope, const MemMapEntry *memmap, uint32_t irq, VirtMachineState *vms) { int ecam_id = VIRT_ECAM_ID(vms->highmem_ecam); + bool cxl_present = false; + PCIBus *bus = vms->bus; + bool acpi_pcihp = false; + + if (vms->acpi_dev) { + acpi_pcihp = object_property_get_bool(OBJECT(vms->acpi_dev), + ACPI_PM_PROP_ACPI_PCIHP_BRIDGE, + NULL); + } + struct GPEXConfig cfg = { .mmio32 = memmap[VIRT_PCIE_MMIO], .pio = memmap[VIRT_PCIE_PIO], .ecam = memmap[ecam_id], .irq = irq, .bus = vms->bus, + .pci_native_hotplug = !acpi_pcihp, }; if (vms->highmem_mmio) { @@ -136,6 +167,14 @@ static void acpi_dsdt_add_pci(Aml *scope, const MemMapEntry *memmap, } acpi_dsdt_add_gpex(scope, &cfg); + QLIST_FOREACH(bus, &vms->bus->child, sibling) { + if (pci_bus_is_cxl(bus)) { + cxl_present = true; + } + } + if (cxl_present) { + build_acpi0017(scope); + } } static void acpi_dsdt_add_gpio(Aml *scope, const MemMapEntry *gpio_memmap, @@ -266,6 +305,43 @@ static int iort_idmap_compare(gconstpointer a, gconstpointer b) return idmap_a->input_base - idmap_b->input_base; } +/* Compute ID ranges (RIDs) from RC that are directed to the ITS Group node */ +static void create_rc_its_idmaps(GArray *its_idmaps, GArray *smmu_idmaps) +{ + AcpiIortIdMapping *idmap; + AcpiIortIdMapping next_range = {0}; + + /* + * Based on the RID ranges that are directed to the SMMU, determine the + * bypassed RID ranges, i.e., the ones that are directed to the ITS Group + * node and do not pass through the SMMU, by subtracting the SMMU-bound + * ranges from the full RID range (0x0000–0xFFFF). + */ + for (int i = 0; i < smmu_idmaps->len; i++) { + idmap = &g_array_index(smmu_idmaps, AcpiIortIdMapping, i); + + if (next_range.input_base < idmap->input_base) { + next_range.id_count = idmap->input_base - next_range.input_base; + g_array_append_val(its_idmaps, next_range); + } + + next_range.input_base = idmap->input_base + idmap->id_count; + } + + /* + * Append the last RC -> ITS ID mapping. + * + * RIDs are 16-bit, according to the PCI Express 2.0 Base Specification, rev + * 0.9, section 2.2.6.2, "Transaction Descriptor - Transaction ID Field", + * hence the end of the range is 0x10000. + */ + if (next_range.input_base < 0x10000) { + next_range.id_count = 0x10000 - next_range.input_base; + g_array_append_val(its_idmaps, next_range); + } +} + + /* * Input Output Remapping Table (IORT) * Conforms to "IO Remapping Table System Software on ARM Platforms", @@ -276,10 +352,9 @@ build_iort(GArray *table_data, BIOSLinker *linker, VirtMachineState *vms) { int i, nb_nodes, rc_mapping_count; size_t node_size, smmu_offset = 0; - AcpiIortIdMapping *idmap; uint32_t id = 0; - GArray *smmu_idmaps = g_array_new(false, true, sizeof(AcpiIortIdMapping)); - GArray *its_idmaps = g_array_new(false, true, sizeof(AcpiIortIdMapping)); + GArray *rc_smmu_idmaps = g_array_new(false, true, sizeof(AcpiIortIdMapping)); + GArray *rc_its_idmaps = g_array_new(false, true, sizeof(AcpiIortIdMapping)); AcpiTable table = { .sig = "IORT", .rev = 3, .oem_id = vms->oem_id, .oem_table_id = vms->oem_table_id }; @@ -287,40 +362,33 @@ build_iort(GArray *table_data, BIOSLinker *linker, VirtMachineState *vms) acpi_table_begin(&table, table_data); if (vms->iommu == VIRT_IOMMU_SMMUV3) { - AcpiIortIdMapping next_range = {0}; - object_child_foreach_recursive(object_get_root(), - iort_host_bridges, smmu_idmaps); + iort_host_bridges, rc_smmu_idmaps); /* Sort the smmu idmap by input_base */ - g_array_sort(smmu_idmaps, iort_idmap_compare); - - /* - * Split the whole RIDs by mapping from RC to SMMU, - * build the ID mapping from RC to ITS directly. - */ - for (i = 0; i < smmu_idmaps->len; i++) { - idmap = &g_array_index(smmu_idmaps, AcpiIortIdMapping, i); + g_array_sort(rc_smmu_idmaps, iort_idmap_compare); - if (next_range.input_base < idmap->input_base) { - next_range.id_count = idmap->input_base - next_range.input_base; - g_array_append_val(its_idmaps, next_range); - } + nb_nodes = 2; /* RC and SMMUv3 */ + rc_mapping_count = rc_smmu_idmaps->len; - next_range.input_base = idmap->input_base + idmap->id_count; - } + if (vms->its) { + /* + * Knowing the ID ranges from the RC to the SMMU, it's possible to + * determine the ID ranges from RC that go directly to ITS. + */ + create_rc_its_idmaps(rc_its_idmaps, rc_smmu_idmaps); - /* Append the last RC -> ITS ID mapping */ - if (next_range.input_base < 0x10000) { - next_range.id_count = 0x10000 - next_range.input_base; - g_array_append_val(its_idmaps, next_range); + nb_nodes++; /* ITS */ + rc_mapping_count += rc_its_idmaps->len; } - - nb_nodes = 3; /* RC, ITS, SMMUv3 */ - rc_mapping_count = smmu_idmaps->len + its_idmaps->len; } else { - nb_nodes = 2; /* RC, ITS */ - rc_mapping_count = 1; + if (vms->its) { + nb_nodes = 2; /* RC and ITS */ + rc_mapping_count = 1; /* Direct map to ITS */ + } else { + nb_nodes = 1; /* RC only */ + rc_mapping_count = 0; /* No output mapping */ + } } /* Number of IORT Nodes */ build_append_int_noprefix(table_data, nb_nodes, 4); @@ -329,31 +397,43 @@ build_iort(GArray *table_data, BIOSLinker *linker, VirtMachineState *vms) build_append_int_noprefix(table_data, IORT_NODE_OFFSET, 4); build_append_int_noprefix(table_data, 0, 4); /* Reserved */ - /* Table 12 ITS Group Format */ - build_append_int_noprefix(table_data, 0 /* ITS Group */, 1); /* Type */ - node_size = 20 /* fixed header size */ + 4 /* 1 GIC ITS Identifier */; - build_append_int_noprefix(table_data, node_size, 2); /* Length */ - build_append_int_noprefix(table_data, 1, 1); /* Revision */ - build_append_int_noprefix(table_data, id++, 4); /* Identifier */ - build_append_int_noprefix(table_data, 0, 4); /* Number of ID mappings */ - build_append_int_noprefix(table_data, 0, 4); /* Reference to ID Array */ - build_append_int_noprefix(table_data, 1, 4); /* Number of ITSs */ - /* GIC ITS Identifier Array */ - build_append_int_noprefix(table_data, 0 /* MADT translation_id */, 4); + if (vms->its) { + /* Table 12 ITS Group Format */ + build_append_int_noprefix(table_data, 0 /* ITS Group */, 1); /* Type */ + node_size = 20 /* fixed header size */ + 4 /* 1 GIC ITS Identifier */; + build_append_int_noprefix(table_data, node_size, 2); /* Length */ + build_append_int_noprefix(table_data, 1, 1); /* Revision */ + build_append_int_noprefix(table_data, id++, 4); /* Identifier */ + build_append_int_noprefix(table_data, 0, 4); /* Number of ID mappings */ + build_append_int_noprefix(table_data, 0, 4); /* Reference to ID Array */ + build_append_int_noprefix(table_data, 1, 4); /* Number of ITSs */ + /* GIC ITS Identifier Array */ + build_append_int_noprefix(table_data, 0 /* MADT translation_id */, 4); + } if (vms->iommu == VIRT_IOMMU_SMMUV3) { int irq = vms->irqmap[VIRT_SMMU] + ARM_SPI_BASE; - + int smmu_mapping_count, offset_to_id_array; + + if (vms->its) { + smmu_mapping_count = 1; /* ITS Group node */ + offset_to_id_array = SMMU_V3_ENTRY_SIZE; /* Just after the header */ + } else { + smmu_mapping_count = 0; /* No ID mappings */ + offset_to_id_array = 0; /* No ID mappings array */ + } smmu_offset = table_data->len - table.table_offset; /* Table 9 SMMUv3 Format */ build_append_int_noprefix(table_data, 4 /* SMMUv3 */, 1); /* Type */ - node_size = SMMU_V3_ENTRY_SIZE + ID_MAPPING_ENTRY_SIZE; + node_size = SMMU_V3_ENTRY_SIZE + + (ID_MAPPING_ENTRY_SIZE * smmu_mapping_count); build_append_int_noprefix(table_data, node_size, 2); /* Length */ build_append_int_noprefix(table_data, 4, 1); /* Revision */ build_append_int_noprefix(table_data, id++, 4); /* Identifier */ - build_append_int_noprefix(table_data, 1, 4); /* Number of ID mappings */ + /* Number of ID mappings */ + build_append_int_noprefix(table_data, smmu_mapping_count, 4); /* Reference to ID Array */ - build_append_int_noprefix(table_data, SMMU_V3_ENTRY_SIZE, 4); + build_append_int_noprefix(table_data, offset_to_id_array, 4); /* Base address */ build_append_int_noprefix(table_data, vms->memmap[VIRT_SMMU].base, 8); /* Flags */ @@ -369,9 +449,11 @@ build_iort(GArray *table_data, BIOSLinker *linker, VirtMachineState *vms) build_append_int_noprefix(table_data, 0, 4); /* Proximity domain */ /* DeviceID mapping index (ignored since interrupts are GSIV based) */ build_append_int_noprefix(table_data, 0, 4); - - /* output IORT node is the ITS group node (the first node) */ - build_iort_id_mapping(table_data, 0, 0x10000, IORT_NODE_OFFSET); + /* Array of ID mappings */ + if (smmu_mapping_count) { + /* Output IORT node is the ITS Group node (the first node). */ + build_iort_id_mapping(table_data, 0, 0x10000, IORT_NODE_OFFSET); + } } /* Table 17 Root Complex Node */ @@ -407,29 +489,44 @@ build_iort(GArray *table_data, BIOSLinker *linker, VirtMachineState *vms) if (vms->iommu == VIRT_IOMMU_SMMUV3) { AcpiIortIdMapping *range; - /* translated RIDs connect to SMMUv3 node: RC -> SMMUv3 -> ITS */ - for (i = 0; i < smmu_idmaps->len; i++) { - range = &g_array_index(smmu_idmaps, AcpiIortIdMapping, i); - /* output IORT node is the smmuv3 node */ + /* + * Map RIDs (input) from RC to SMMUv3 nodes: RC -> SMMUv3. + * + * N.B.: The mapping from SMMUv3 to ITS Group node (SMMUv3 -> ITS) is + * defined in the SMMUv3 table, where all SMMUv3 IDs are mapped to the + * ITS Group node, if ITS is available. + */ + for (i = 0; i < rc_smmu_idmaps->len; i++) { + range = &g_array_index(rc_smmu_idmaps, AcpiIortIdMapping, i); + /* Output IORT node is the SMMUv3 node. */ build_iort_id_mapping(table_data, range->input_base, range->id_count, smmu_offset); } - /* bypassed RIDs connect to ITS group node directly: RC -> ITS */ - for (i = 0; i < its_idmaps->len; i++) { - range = &g_array_index(its_idmaps, AcpiIortIdMapping, i); - /* output IORT node is the ITS group node (the first node) */ - build_iort_id_mapping(table_data, range->input_base, - range->id_count, IORT_NODE_OFFSET); + if (vms->its) { + /* + * Map bypassed (don't go through the SMMU) RIDs (input) to + * ITS Group node directly: RC -> ITS. + */ + for (i = 0; i < rc_its_idmaps->len; i++) { + range = &g_array_index(rc_its_idmaps, AcpiIortIdMapping, i); + /* Output IORT node is the ITS Group node (the first node). */ + build_iort_id_mapping(table_data, range->input_base, + range->id_count, IORT_NODE_OFFSET); + } } } else { - /* output IORT node is the ITS group node (the first node) */ + /* + * Map all RIDs (input) to ITS Group node directly, since there is no + * SMMU: RC -> ITS. + * Output IORT node is the ITS Group node (the first node). + */ build_iort_id_mapping(table_data, 0, 0x10000, IORT_NODE_OFFSET); } acpi_table_end(linker, &table); - g_array_free(smmu_idmaps, true); - g_array_free(its_idmaps, true); + g_array_free(rc_smmu_idmaps, true); + g_array_free(rc_its_idmaps, true); } /* @@ -737,7 +834,7 @@ build_madt(GArray *table_data, BIOSLinker *linker, VirtMachineState *vms) memmap[VIRT_HIGH_GIC_REDIST2].size); } - if (its_class_name()) { + if (vms->its) { /* * ACPI spec, Revision 6.0 Errata A * (original 6.0 definition has invalid Length) @@ -810,6 +907,7 @@ build_dsdt(GArray *table_data, BIOSLinker *linker, VirtMachineState *vms) const int *irqmap = vms->irqmap; AcpiTable table = { .sig = "DSDT", .rev = 2, .oem_id = vms->oem_id, .oem_table_id = vms->oem_table_id }; + Aml *pci0_scope; acpi_table_begin(&table, table_data); dsdt = init_aml_allocator(); @@ -863,6 +961,33 @@ build_dsdt(GArray *table_data, BIOSLinker *linker, VirtMachineState *vms) aml_append(dsdt, scope); + pci0_scope = aml_scope("\\_SB.PCI0"); + + aml_append(pci0_scope, build_pci_bridge_edsm()); + build_append_pci_bus_devices(pci0_scope, vms->bus); + if (object_property_find(OBJECT(vms->bus), ACPI_PCIHP_PROP_BSEL)) { + build_append_pcihp_slots(pci0_scope, vms->bus); + } + + if (vms->acpi_dev) { + bool acpi_pcihp; + + acpi_pcihp = object_property_get_bool(OBJECT(vms->acpi_dev), + ACPI_PM_PROP_ACPI_PCIHP_BRIDGE, + NULL); + + if (acpi_pcihp) { + build_acpi_pci_hotplug(dsdt, AML_SYSTEM_MEMORY, + memmap[VIRT_ACPI_PCIHP].base); + build_append_pcihp_resources(pci0_scope, + memmap[VIRT_ACPI_PCIHP].base, + memmap[VIRT_ACPI_PCIHP].size); + + build_append_notification_callback(pci0_scope, vms->bus); + } + } + aml_append(dsdt, pci0_scope); + /* copy AML table into ACPI tables blob */ g_array_append_vals(table_data, dsdt->buf->data, dsdt->buf->len); @@ -936,7 +1061,10 @@ void virt_acpi_build(VirtMachineState *vms, AcpiBuildTables *tables) } acpi_add_table(table_offsets, tables_blob); - spcr_setup(tables_blob, tables->linker, vms); + + if (ms->acpi_spcr_enabled) { + spcr_setup(tables_blob, tables->linker, vms); + } acpi_add_table(table_offsets, tables_blob); build_dbg2(tables_blob, tables->linker, vms); @@ -963,16 +1091,19 @@ void virt_acpi_build(VirtMachineState *vms, AcpiBuildTables *tables) } } + if (vms->cxl_devices_state.is_enabled) { + cxl_build_cedt(table_offsets, tables_blob, tables->linker, + vms->oem_id, vms->oem_table_id, &vms->cxl_devices_state); + } + if (ms->nvdimms_state->is_enabled) { nvdimm_build_acpi(table_offsets, tables_blob, tables->linker, ms->nvdimms_state, ms->ram_slots, vms->oem_id, vms->oem_table_id); } - if (its_class_name()) { - acpi_add_table(table_offsets, tables_blob); - build_iort(tables_blob, tables->linker, vms); - } + acpi_add_table(table_offsets, tables_blob); + build_iort(tables_blob, tables->linker, vms); #ifdef CONFIG_TPM if (tpm_get_version(tpm_find()) == TPM_VERSION_2_0) { diff --git a/hw/arm/virt.c b/hw/arm/virt.c index 9a6cd08..ef6be36 100644 --- a/hw/arm/virt.c +++ b/hw/arm/virt.c @@ -57,6 +57,7 @@ #include "qemu/error-report.h" #include "qemu/module.h" #include "hw/pci-host/gpex.h" +#include "hw/pci-bridge/pci_expander_bridge.h" #include "hw/virtio/virtio-pci.h" #include "hw/core/sysbus-fdt.h" #include "hw/platform-bus.h" @@ -75,6 +76,7 @@ #include "standard-headers/linux/input.h" #include "hw/arm/smmuv3.h" #include "hw/acpi/acpi.h" +#include "hw/acpi/pcihp.h" #include "target/arm/cpu-qom.h" #include "target/arm/internals.h" #include "target/arm/multiprocessing.h" @@ -86,6 +88,8 @@ #include "hw/virtio/virtio-md-pci.h" #include "hw/virtio/virtio-iommu.h" #include "hw/char/pl011.h" +#include "hw/cxl/cxl.h" +#include "hw/cxl/cxl_host.h" #include "qemu/guest-random.h" static GlobalProperty arm_virt_compat[] = { @@ -183,6 +187,7 @@ static const MemMapEntry base_memmap[] = { [VIRT_NVDIMM_ACPI] = { 0x09090000, NVDIMM_ACPI_IO_LEN}, [VIRT_PVTIME] = { 0x090a0000, 0x00010000 }, [VIRT_SECURE_GPIO] = { 0x090b0000, 0x00001000 }, + [VIRT_ACPI_PCIHP] = { 0x090c0000, ACPI_PCIHP_SIZE }, [VIRT_MMIO] = { 0x0a000000, 0x00000200 }, /* ...repeating for a total of NUM_VIRTIO_TRANSPORTS, each of that size */ [VIRT_PLATFORM_BUS] = { 0x0c000000, 0x02000000 }, @@ -220,9 +225,11 @@ static const MemMapEntry base_memmap[] = { static MemMapEntry extended_memmap[] = { /* Additional 64 MB redist region (can contain up to 512 redistributors) */ [VIRT_HIGH_GIC_REDIST2] = { 0x0, 64 * MiB }, + [VIRT_CXL_HOST] = { 0x0, 64 * KiB * 16 }, /* 16 UID */ [VIRT_HIGH_PCIE_ECAM] = { 0x0, 256 * MiB }, /* Second PCIe window */ [VIRT_HIGH_PCIE_MMIO] = { 0x0, DEFAULT_HIGH_PCIE_MMIO_SIZE }, + /* Any CXL Fixed memory windows come here */ }; static const int a15irqmap[] = { @@ -681,8 +688,10 @@ static inline DeviceState *create_acpi_ged(VirtMachineState *vms) { DeviceState *dev; MachineState *ms = MACHINE(vms); + SysBusDevice *sbdev; int irq = vms->irqmap[VIRT_ACPI_GED]; uint32_t event = ACPI_GED_PWR_DOWN_EVT; + bool acpi_pcihp; if (ms->ram_slots) { event |= ACPI_GED_MEM_HOTPLUG_EVT; @@ -694,32 +703,44 @@ static inline DeviceState *create_acpi_ged(VirtMachineState *vms) dev = qdev_new(TYPE_ACPI_GED); qdev_prop_set_uint32(dev, "ged-event", event); - sysbus_realize_and_unref(SYS_BUS_DEVICE(dev), &error_fatal); + object_property_set_link(OBJECT(dev), "bus", OBJECT(vms->bus), &error_abort); + sbdev = SYS_BUS_DEVICE(dev); + sysbus_realize_and_unref(sbdev, &error_fatal); + + sysbus_mmio_map_name(sbdev, TYPE_ACPI_GED, vms->memmap[VIRT_ACPI_GED].base); + sysbus_mmio_map_name(sbdev, ACPI_MEMHP_REGION_NAME, + vms->memmap[VIRT_PCDIMM_ACPI].base); + + acpi_pcihp = object_property_get_bool(OBJECT(dev), + ACPI_PM_PROP_ACPI_PCIHP_BRIDGE, NULL); - sysbus_mmio_map(SYS_BUS_DEVICE(dev), 0, vms->memmap[VIRT_ACPI_GED].base); - sysbus_mmio_map(SYS_BUS_DEVICE(dev), 1, vms->memmap[VIRT_PCDIMM_ACPI].base); - sysbus_connect_irq(SYS_BUS_DEVICE(dev), 0, qdev_get_gpio_in(vms->gic, irq)); + if (acpi_pcihp) { + int pcihp_region_index; + + pcihp_region_index = sysbus_mmio_map_name(sbdev, ACPI_PCIHP_REGION_NAME, + vms->memmap[VIRT_ACPI_PCIHP].base); + assert(pcihp_region_index >= 0); + } + + sysbus_connect_irq(sbdev, 0, qdev_get_gpio_in(vms->gic, irq)); return dev; } static void create_its(VirtMachineState *vms) { - const char *itsclass = its_class_name(); DeviceState *dev; - if (!strcmp(itsclass, "arm-gicv3-its")) { - if (!vms->tcg_its) { - itsclass = NULL; - } - } - - if (!itsclass) { - /* Do nothing if not supported */ + assert(vms->its); + if (!kvm_irqchip_in_kernel() && !vms->tcg_its) { + /* + * Do nothing if ITS is neither supported by the host nor emulated by + * the machine. + */ return; } - dev = qdev_new(itsclass); + dev = qdev_new(its_class_name()); object_property_set_link(OBJECT(dev), "parent-gicv3", OBJECT(vms->gic), &error_abort); @@ -795,6 +816,13 @@ static void create_gic(VirtMachineState *vms, MemoryRegion *mem) default: g_assert_not_reached(); } + + if (kvm_enabled() && vms->virt && + (revision != 3 || !kvm_irqchip_in_kernel())) { + error_report("KVM EL2 is only supported with in-kernel GICv3"); + exit(1); + } + vms->gic = qdev_new(gictype); qdev_prop_set_uint32(vms->gic, "revision", revision); qdev_prop_set_uint32(vms->gic, "num-cpu", smp_cpus); @@ -831,6 +859,9 @@ static void create_gic(VirtMachineState *vms, MemoryRegion *mem) OBJECT(mem), &error_fatal); qdev_prop_set_bit(vms->gic, "has-lpi", true); } + } else if (vms->virt) { + qdev_prop_set_uint32(vms->gic, "maintenance-interrupt-id", + ARCH_GIC_MAINT_IRQ); } } else { if (!kvm_irqchip_in_kernel()) { @@ -1487,9 +1518,12 @@ static void create_virtio_iommu_dt_bindings(VirtMachineState *vms) qemu_fdt_setprop_cell(ms->fdt, node, "phandle", vms->iommu_phandle); g_free(node); - qemu_fdt_setprop_cells(ms->fdt, vms->pciehb_nodename, "iommu-map", - 0x0, vms->iommu_phandle, 0x0, bdf, - bdf + 1, vms->iommu_phandle, bdf + 1, 0xffff - bdf); + if (!vms->default_bus_bypass_iommu) { + qemu_fdt_setprop_cells(ms->fdt, vms->pciehb_nodename, "iommu-map", + 0x0, vms->iommu_phandle, 0x0, bdf, + bdf + 1, vms->iommu_phandle, bdf + 1, + 0xffff - bdf); + } } static void create_pcie(VirtMachineState *vms) @@ -1612,8 +1646,10 @@ static void create_pcie(VirtMachineState *vms) switch (vms->iommu) { case VIRT_IOMMU_SMMUV3: create_smmu(vms, vms->bus); - qemu_fdt_setprop_cells(ms->fdt, nodename, "iommu-map", - 0x0, vms->iommu_phandle, 0x0, 0x10000); + if (!vms->default_bus_bypass_iommu) { + qemu_fdt_setprop_cells(ms->fdt, nodename, "iommu-map", + 0x0, vms->iommu_phandle, 0x0, 0x10000); + } break; default: g_assert_not_reached(); @@ -1621,6 +1657,17 @@ static void create_pcie(VirtMachineState *vms) } } +static void create_cxl_host_reg_region(VirtMachineState *vms) +{ + MemoryRegion *sysmem = get_system_memory(); + MemoryRegion *mr = &vms->cxl_devices_state.host_mr; + + memory_region_init(mr, OBJECT(vms), "cxl_host_reg", + vms->memmap[VIRT_CXL_HOST].size); + memory_region_add_subregion(sysmem, vms->memmap[VIRT_CXL_HOST].base, mr); + vms->highmem_cxl = true; +} + static void create_platform_bus(VirtMachineState *vms) { DeviceState *dev; @@ -1737,6 +1784,12 @@ void virt_machine_done(Notifier *notifier, void *data) struct arm_boot_info *info = &vms->bootinfo; AddressSpace *as = arm_boot_address_space(cpu, info); + cxl_hook_up_pxb_registers(vms->bus, &vms->cxl_devices_state, + &error_fatal); + + if (vms->cxl_devices_state.is_enabled) { + cxl_fmws_link_targets(&error_fatal); + } /* * If the user provided a dtb, we assume the dynamic sysbus nodes * already are integrated there. This corresponds to a use case where @@ -1783,6 +1836,7 @@ static inline bool *virt_get_high_memmap_enabled(VirtMachineState *vms, { bool *enabled_array[] = { &vms->highmem_redists, + &vms->highmem_cxl, &vms->highmem_ecam, &vms->highmem_mmio, }; @@ -1890,6 +1944,9 @@ static void virt_set_memmap(VirtMachineState *vms, int pa_bits) if (device_memory_size > 0) { machine_memory_devices_init(ms, device_memory_base, device_memory_size); } + vms->highest_gpa = cxl_fmws_set_memmap(ROUND_UP(vms->highest_gpa + 1, + 256 * MiB), + BIT_ULL(pa_bits)) - 1; } static VirtGICType finalize_gic_version_do(const char *accel_name, @@ -2024,10 +2081,11 @@ static void finalize_gic_version(VirtMachineState *vms) } /* - * virt_cpu_post_init() must be called after the CPUs have - * been realized and the GIC has been created. + * virt_post_cpus_gic_realized() must be called after the CPUs and + * the GIC have both been realized. */ -static void virt_cpu_post_init(VirtMachineState *vms, MemoryRegion *sysmem) +static void virt_post_cpus_gic_realized(VirtMachineState *vms, + MemoryRegion *sysmem) { int max_cpus = MACHINE(vms)->smp.max_cpus; bool aarch64, pmu, steal_time; @@ -2060,6 +2118,10 @@ static void virt_cpu_post_init(VirtMachineState *vms, MemoryRegion *sysmem) memory_region_init_ram(pvtime, NULL, "pvtime", pvtime_size, NULL); memory_region_add_subregion(sysmem, pvtime_reg_base, pvtime); } + if (!aarch64 && vms->virt) { + error_report("KVM does not support EL2 on an AArch32 vCPU"); + exit(1); + } CPU_FOREACH(cpu) { if (pmu) { @@ -2198,14 +2260,20 @@ static void machvirt_init(MachineState *machine) exit(1); } - if (vms->secure && (kvm_enabled() || hvf_enabled())) { + if (vms->secure && !tcg_enabled() && !qtest_enabled()) { error_report("mach-virt: %s does not support providing " "Security extensions (TrustZone) to the guest CPU", current_accel_name()); exit(1); } - if (vms->virt && (kvm_enabled() || hvf_enabled())) { + if (vms->virt && kvm_enabled() && !kvm_arm_el2_supported()) { + error_report("mach-virt: host kernel KVM does not support providing " + "Virtualization extensions to the guest CPU"); + exit(1); + } + + if (vms->virt && !kvm_enabled() && !tcg_enabled() && !qtest_enabled()) { error_report("mach-virt: %s does not support providing " "Virtualization extensions to the guest CPU", current_accel_name()); @@ -2340,11 +2408,13 @@ static void machvirt_init(MachineState *machine) memory_region_add_subregion(sysmem, vms->memmap[VIRT_MEM].base, machine->ram); + cxl_fmws_update_mmio(); + virt_flash_fdt(vms, sysmem, secure_sysmem ?: sysmem); create_gic(vms, sysmem); - virt_cpu_post_init(vms, sysmem); + virt_post_cpus_gic_realized(vms, sysmem); fdt_add_pmu_nodes(vms); @@ -2395,6 +2465,7 @@ static void machvirt_init(MachineState *machine) create_rtc(vms); create_pcie(vms); + create_cxl_host_reg_region(vms); if (has_ged && aarch64 && firmware_loaded && virt_is_acpi_enabled(vms)) { vms->acpi_dev = create_acpi_ged(vms); @@ -3337,12 +3408,8 @@ static void virt_instance_init(Object *obj) /* Default allows ITS instantiation */ vms->its = true; - - if (vmc->no_tcg_its) { - vms->tcg_its = false; - } else { - vms->tcg_its = true; - } + /* Allow ITS emulation if the machine version supports it */ + vms->tcg_its = !vmc->no_tcg_its; /* Default disallows iommu instantiation */ vms->iommu = VIRT_IOMMU_NONE; @@ -3365,6 +3432,7 @@ static void virt_instance_init(Object *obj) vms->oem_id = g_strndup(ACPI_BUILD_APPNAME6, 6); vms->oem_table_id = g_strndup(ACPI_BUILD_APPNAME8, 8); + cxl_machine_init(obj, &vms->cxl_devices_state); } static const TypeInfo virt_machine_info = { diff --git a/hw/arm/xen-pvh.c b/hw/arm/xen-pvh.c index 4b26bcf..1a9eeb0 100644 --- a/hw/arm/xen-pvh.c +++ b/hw/arm/xen-pvh.c @@ -10,7 +10,6 @@ #include "hw/boards.h" #include "system/system.h" #include "hw/xen/xen-pvh-common.h" -#include "hw/xen/arch_hvm.h" #define TYPE_XEN_ARM MACHINE_TYPE_NAME("xenpvh") diff --git a/hw/audio/ac97.c b/hw/audio/ac97.c index 669a046..eb7a847 100644 --- a/hw/audio/ac97.c +++ b/hw/audio/ac97.c @@ -886,7 +886,7 @@ static void nabm_writel(void *opaque, uint32_t addr, uint32_t val) static int write_audio(AC97LinkState *s, AC97BusMasterRegs *r, int max, int *stop) { - uint8_t tmpbuf[4096]; + QEMU_UNINITIALIZED uint8_t tmpbuf[4096]; uint32_t addr = r->bd.addr; uint32_t temp = r->picb << 1; uint32_t written = 0; @@ -959,7 +959,7 @@ static void write_bup(AC97LinkState *s, int elapsed) static int read_audio(AC97LinkState *s, AC97BusMasterRegs *r, int max, int *stop) { - uint8_t tmpbuf[4096]; + QEMU_UNINITIALIZED uint8_t tmpbuf[4096]; uint32_t addr = r->bd.addr; uint32_t temp = r->picb << 1; uint32_t nread = 0; diff --git a/hw/audio/asc.c b/hw/audio/asc.c index 18382cc..edd42d6 100644 --- a/hw/audio/asc.c +++ b/hw/audio/asc.c @@ -12,6 +12,7 @@ #include "qemu/osdep.h" #include "qemu/timer.h" +#include "qapi/error.h" #include "hw/sysbus.h" #include "hw/irq.h" #include "audio/audio.h" @@ -653,11 +654,17 @@ static void asc_realize(DeviceState *dev, Error **errp) s->voice = AUD_open_out(&s->card, s->voice, "asc.out", s, asc_out_cb, &as); + if (!s->voice) { + AUD_remove_card(&s->card); + error_setg(errp, "Initializing audio stream failed"); + return; + } + s->shift = 1; s->samples = AUD_get_buffer_size_out(s->voice) >> s->shift; s->mixbuf = g_malloc0(s->samples << s->shift); - s->silentbuf = g_malloc0(s->samples << s->shift); + s->silentbuf = g_malloc(s->samples << s->shift); memset(s->silentbuf, 0x80, s->samples << s->shift); /* Add easc registers if required */ diff --git a/hw/audio/cs4231a.c b/hw/audio/cs4231a.c index 06b44da..6dfff20 100644 --- a/hw/audio/cs4231a.c +++ b/hw/audio/cs4231a.c @@ -528,7 +528,7 @@ static int cs_write_audio (CSState *s, int nchan, int dma_pos, int dma_len, int len) { int temp, net; - uint8_t tmpbuf[4096]; + QEMU_UNINITIALIZED uint8_t tmpbuf[4096]; IsaDmaClass *k = ISADMA_GET_CLASS(s->isa_dma); temp = len; @@ -547,7 +547,7 @@ static int cs_write_audio (CSState *s, int nchan, int dma_pos, copied = k->read_memory(s->isa_dma, nchan, tmpbuf, dma_pos, to_copy); if (s->tab) { int i; - int16_t linbuf[4096]; + QEMU_UNINITIALIZED int16_t linbuf[4096]; for (i = 0; i < copied; ++i) linbuf[i] = s->tab[tmpbuf[i]]; @@ -682,6 +682,10 @@ static void cs4231a_realizefn (DeviceState *dev, Error **errp) return; } + if (s->irq >= ISA_NUM_IRQS) { + error_setg(errp, "Invalid IRQ %d (max %d)", s->irq, ISA_NUM_IRQS - 1); + return; + } s->pic = isa_bus_get_irq(bus, s->irq); k = ISADMA_GET_CLASS(s->isa_dma); k->register_channel(s->isa_dma, s->dma, cs_dma_read, s); diff --git a/hw/audio/es1370.c b/hw/audio/es1370.c index 8efb969..a6a32a6 100644 --- a/hw/audio/es1370.c +++ b/hw/audio/es1370.c @@ -604,7 +604,7 @@ static uint64_t es1370_read(void *opaque, hwaddr addr, unsigned size) static void es1370_transfer_audio (ES1370State *s, struct chan *d, int loop_sel, int max, bool *irq) { - uint8_t tmpbuf[4096]; + QEMU_UNINITIALIZED uint8_t tmpbuf[4096]; size_t to_transfer; uint32_t addr = d->frame_addr; int sc = d->scount & 0xffff; diff --git a/hw/audio/gus.c b/hw/audio/gus.c index 87e8634..c36df02 100644 --- a/hw/audio/gus.c +++ b/hw/audio/gus.c @@ -183,7 +183,7 @@ static int GUS_read_DMA (void *opaque, int nchan, int dma_pos, int dma_len) { GUSState *s = opaque; IsaDmaClass *k = ISADMA_GET_CLASS(s->isa_dma); - char tmpbuf[4096]; + QEMU_UNINITIALIZED char tmpbuf[4096]; int pos = dma_pos, mode, left = dma_len - dma_pos; ldebug ("read DMA %#x %d\n", dma_pos, dma_len); diff --git a/hw/audio/marvell_88w8618.c b/hw/audio/marvell_88w8618.c index 6d3ebbb..c5c79d0 100644 --- a/hw/audio/marvell_88w8618.c +++ b/hw/audio/marvell_88w8618.c @@ -66,7 +66,7 @@ static void mv88w8618_audio_callback(void *opaque, int free_out, int free_in) { mv88w8618_audio_state *s = opaque; int16_t *codec_buffer; - int8_t buf[4096]; + QEMU_UNINITIALIZED int8_t buf[4096]; int8_t *mem_buffer; int pos, block_size; diff --git a/hw/audio/sb16.c b/hw/audio/sb16.c index 19fd3b9..bac6411 100644 --- a/hw/audio/sb16.c +++ b/hw/audio/sb16.c @@ -1181,7 +1181,7 @@ static int write_audio (SB16State *s, int nchan, int dma_pos, IsaDma *isa_dma = nchan == s->dma ? s->isa_dma : s->isa_hdma; IsaDmaClass *k = ISADMA_GET_CLASS(isa_dma); int temp, net; - uint8_t tmpbuf[4096]; + QEMU_UNINITIALIZED uint8_t tmpbuf[4096]; temp = len; net = 0; diff --git a/hw/audio/via-ac97.c b/hw/audio/via-ac97.c index 1e0a5c7..d5231e1 100644 --- a/hw/audio/via-ac97.c +++ b/hw/audio/via-ac97.c @@ -175,7 +175,7 @@ static void out_cb(void *opaque, int avail) ViaAC97SGDChannel *c = &s->aur; int temp, to_copy, copied; bool stop = false; - uint8_t tmpbuf[4096]; + QEMU_UNINITIALIZED uint8_t tmpbuf[4096]; if (c->stat & STAT_PAUSED) { return; diff --git a/hw/block/Kconfig b/hw/block/Kconfig index a898e04..737dbcd 100644 --- a/hw/block/Kconfig +++ b/hw/block/Kconfig @@ -13,9 +13,6 @@ config FDC_SYSBUS config SSI_M25P80 bool -config NAND - bool - config PFLASH_CFI01 bool diff --git a/hw/block/hd-geometry.c b/hw/block/hd-geometry.c index f3939e7..db22190 100644 --- a/hw/block/hd-geometry.c +++ b/hw/block/hd-geometry.c @@ -33,7 +33,6 @@ #include "qemu/osdep.h" #include "system/block-backend.h" #include "qapi/qapi-types-block.h" -#include "qemu/bswap.h" #include "hw/block/block.h" #include "trace.h" diff --git a/hw/block/meson.build b/hw/block/meson.build index 16a51bf..43ed296 100644 --- a/hw/block/meson.build +++ b/hw/block/meson.build @@ -6,7 +6,6 @@ system_ss.add(files( system_ss.add(when: 'CONFIG_FDC', if_true: files('fdc.c')) system_ss.add(when: 'CONFIG_FDC_ISA', if_true: files('fdc-isa.c')) system_ss.add(when: 'CONFIG_FDC_SYSBUS', if_true: files('fdc-sysbus.c')) -system_ss.add(when: 'CONFIG_NAND', if_true: files('nand.c')) system_ss.add(when: 'CONFIG_PFLASH_CFI01', if_true: files('pflash_cfi01.c')) system_ss.add(when: 'CONFIG_PFLASH_CFI02', if_true: files('pflash_cfi02.c')) system_ss.add(when: 'CONFIG_SSI_M25P80', if_true: files('m25p80.c')) @@ -14,7 +13,9 @@ system_ss.add(when: 'CONFIG_SSI_M25P80', if_true: files('m25p80_sfdp.c')) system_ss.add(when: 'CONFIG_SWIM', if_true: files('swim.c')) system_ss.add(when: 'CONFIG_XEN_BUS', if_true: files('xen-block.c')) -specific_ss.add(when: 'CONFIG_VIRTIO_BLK', if_true: files('virtio-blk.c', 'virtio-blk-common.c')) -specific_ss.add(when: 'CONFIG_VHOST_USER_BLK', if_true: files('vhost-user-blk.c', 'virtio-blk-common.c')) +specific_ss.add(when: 'CONFIG_VIRTIO_BLK', if_true: files('virtio-blk.c')) +system_ss.add(when: 'CONFIG_VIRTIO_BLK', if_true: files('virtio-blk-common.c')) +specific_ss.add(when: 'CONFIG_VHOST_USER_BLK', if_true: files('vhost-user-blk.c')) +system_ss.add(when: 'CONFIG_VHOST_USER_BLK', if_true: files('virtio-blk-common.c')) subdir('dataplane') diff --git a/hw/block/nand.c b/hw/block/nand.c deleted file mode 100644 index c80bf78..0000000 --- a/hw/block/nand.c +++ /dev/null @@ -1,835 +0,0 @@ -/* - * Flash NAND memory emulation. Based on "16M x 8 Bit NAND Flash - * Memory" datasheet for the KM29U128AT / K9F2808U0A chips from - * Samsung Electronic. - * - * Copyright (c) 2006 Openedhand Ltd. - * Written by Andrzej Zaborowski <balrog@zabor.org> - * - * Support for additional features based on "MT29F2G16ABCWP 2Gx16" - * datasheet from Micron Technology and "NAND02G-B2C" datasheet - * from ST Microelectronics. - * - * This code is licensed under the GNU GPL v2. - * - * Contributions after 2012-01-13 are licensed under the terms of the - * GNU GPL, version 2 or (at your option) any later version. - */ - -#ifndef NAND_IO - -#include "qemu/osdep.h" -#include "hw/hw.h" -#include "hw/qdev-properties.h" -#include "hw/qdev-properties-system.h" -#include "hw/block/flash.h" -#include "system/block-backend.h" -#include "migration/vmstate.h" -#include "qapi/error.h" -#include "qemu/error-report.h" -#include "qemu/module.h" -#include "qom/object.h" - -# define NAND_CMD_READ0 0x00 -# define NAND_CMD_READ1 0x01 -# define NAND_CMD_READ2 0x50 -# define NAND_CMD_LPREAD2 0x30 -# define NAND_CMD_NOSERIALREAD2 0x35 -# define NAND_CMD_RANDOMREAD1 0x05 -# define NAND_CMD_RANDOMREAD2 0xe0 -# define NAND_CMD_READID 0x90 -# define NAND_CMD_RESET 0xff -# define NAND_CMD_PAGEPROGRAM1 0x80 -# define NAND_CMD_PAGEPROGRAM2 0x10 -# define NAND_CMD_CACHEPROGRAM2 0x15 -# define NAND_CMD_BLOCKERASE1 0x60 -# define NAND_CMD_BLOCKERASE2 0xd0 -# define NAND_CMD_READSTATUS 0x70 -# define NAND_CMD_COPYBACKPRG1 0x85 - -# define NAND_IOSTATUS_ERROR (1 << 0) -# define NAND_IOSTATUS_PLANE0 (1 << 1) -# define NAND_IOSTATUS_PLANE1 (1 << 2) -# define NAND_IOSTATUS_PLANE2 (1 << 3) -# define NAND_IOSTATUS_PLANE3 (1 << 4) -# define NAND_IOSTATUS_READY (1 << 6) -# define NAND_IOSTATUS_UNPROTCT (1 << 7) - -# define MAX_PAGE 0x800 -# define MAX_OOB 0x40 - -typedef struct NANDFlashState NANDFlashState; -struct NANDFlashState { - DeviceState parent_obj; - - uint8_t manf_id, chip_id; - uint8_t buswidth; /* in BYTES */ - int size, pages; - int page_shift, oob_shift, erase_shift, addr_shift; - uint8_t *storage; - BlockBackend *blk; - int mem_oob; - - uint8_t cle, ale, ce, wp, gnd; - - uint8_t io[MAX_PAGE + MAX_OOB + 0x400]; - uint8_t *ioaddr; - int iolen; - - uint32_t cmd; - uint64_t addr; - int addrlen; - int status; - int offset; - - void (*blk_write)(NANDFlashState *s); - void (*blk_erase)(NANDFlashState *s); - /* - * Returns %true when block containing (@addr + @offset) is - * successfully loaded, otherwise %false. - */ - bool (*blk_load)(NANDFlashState *s, uint64_t addr, unsigned offset); - - uint32_t ioaddr_vmstate; -}; - -#define TYPE_NAND "nand" - -OBJECT_DECLARE_SIMPLE_TYPE(NANDFlashState, NAND) - -static void mem_and(uint8_t *dest, const uint8_t *src, size_t n) -{ - /* Like memcpy() but we logical-AND the data into the destination */ - int i; - for (i = 0; i < n; i++) { - dest[i] &= src[i]; - } -} - -# define NAND_NO_AUTOINCR 0x00000001 -# define NAND_BUSWIDTH_16 0x00000002 -# define NAND_NO_PADDING 0x00000004 -# define NAND_CACHEPRG 0x00000008 -# define NAND_COPYBACK 0x00000010 -# define NAND_IS_AND 0x00000020 -# define NAND_4PAGE_ARRAY 0x00000040 -# define NAND_NO_READRDY 0x00000100 -# define NAND_SAMSUNG_LP (NAND_NO_PADDING | NAND_COPYBACK) - -# define NAND_IO - -# define PAGE(addr) ((addr) >> ADDR_SHIFT) -# define PAGE_START(page) (PAGE(page) * (NAND_PAGE_SIZE + OOB_SIZE)) -# define PAGE_MASK ((1 << ADDR_SHIFT) - 1) -# define OOB_SHIFT (PAGE_SHIFT - 5) -# define OOB_SIZE (1 << OOB_SHIFT) -# define SECTOR(addr) ((addr) >> (9 + ADDR_SHIFT - PAGE_SHIFT)) -# define SECTOR_OFFSET(addr) ((addr) & ((511 >> PAGE_SHIFT) << 8)) - -# define NAND_PAGE_SIZE 256 -# define PAGE_SHIFT 8 -# define PAGE_SECTORS 1 -# define ADDR_SHIFT 8 -# include "nand.c" -# define NAND_PAGE_SIZE 512 -# define PAGE_SHIFT 9 -# define PAGE_SECTORS 1 -# define ADDR_SHIFT 8 -# include "nand.c" -# define NAND_PAGE_SIZE 2048 -# define PAGE_SHIFT 11 -# define PAGE_SECTORS 4 -# define ADDR_SHIFT 16 -# include "nand.c" - -/* Information based on Linux drivers/mtd/nand/raw/nand_ids.c */ -static const struct { - int size; - int width; - int page_shift; - int erase_shift; - uint32_t options; -} nand_flash_ids[0x100] = { - [0 ... 0xff] = { 0 }, - - [0x6b] = { 4, 8, 9, 4, 0 }, - [0xe3] = { 4, 8, 9, 4, 0 }, - [0xe5] = { 4, 8, 9, 4, 0 }, - [0xd6] = { 8, 8, 9, 4, 0 }, - [0xe6] = { 8, 8, 9, 4, 0 }, - - [0x33] = { 16, 8, 9, 5, 0 }, - [0x73] = { 16, 8, 9, 5, 0 }, - [0x43] = { 16, 16, 9, 5, NAND_BUSWIDTH_16 }, - [0x53] = { 16, 16, 9, 5, NAND_BUSWIDTH_16 }, - - [0x35] = { 32, 8, 9, 5, 0 }, - [0x75] = { 32, 8, 9, 5, 0 }, - [0x45] = { 32, 16, 9, 5, NAND_BUSWIDTH_16 }, - [0x55] = { 32, 16, 9, 5, NAND_BUSWIDTH_16 }, - - [0x36] = { 64, 8, 9, 5, 0 }, - [0x76] = { 64, 8, 9, 5, 0 }, - [0x46] = { 64, 16, 9, 5, NAND_BUSWIDTH_16 }, - [0x56] = { 64, 16, 9, 5, NAND_BUSWIDTH_16 }, - - [0x78] = { 128, 8, 9, 5, 0 }, - [0x39] = { 128, 8, 9, 5, 0 }, - [0x79] = { 128, 8, 9, 5, 0 }, - [0x72] = { 128, 16, 9, 5, NAND_BUSWIDTH_16 }, - [0x49] = { 128, 16, 9, 5, NAND_BUSWIDTH_16 }, - [0x74] = { 128, 16, 9, 5, NAND_BUSWIDTH_16 }, - [0x59] = { 128, 16, 9, 5, NAND_BUSWIDTH_16 }, - - [0x71] = { 256, 8, 9, 5, 0 }, - - /* - * These are the new chips with large page size. The pagesize and the - * erasesize is determined from the extended id bytes - */ -# define LP_OPTIONS (NAND_SAMSUNG_LP | NAND_NO_READRDY | NAND_NO_AUTOINCR) -# define LP_OPTIONS16 (LP_OPTIONS | NAND_BUSWIDTH_16) - - /* 512 Megabit */ - [0xa2] = { 64, 8, 0, 0, LP_OPTIONS }, - [0xf2] = { 64, 8, 0, 0, LP_OPTIONS }, - [0xb2] = { 64, 16, 0, 0, LP_OPTIONS16 }, - [0xc2] = { 64, 16, 0, 0, LP_OPTIONS16 }, - - /* 1 Gigabit */ - [0xa1] = { 128, 8, 0, 0, LP_OPTIONS }, - [0xf1] = { 128, 8, 0, 0, LP_OPTIONS }, - [0xb1] = { 128, 16, 0, 0, LP_OPTIONS16 }, - [0xc1] = { 128, 16, 0, 0, LP_OPTIONS16 }, - - /* 2 Gigabit */ - [0xaa] = { 256, 8, 0, 0, LP_OPTIONS }, - [0xda] = { 256, 8, 0, 0, LP_OPTIONS }, - [0xba] = { 256, 16, 0, 0, LP_OPTIONS16 }, - [0xca] = { 256, 16, 0, 0, LP_OPTIONS16 }, - - /* 4 Gigabit */ - [0xac] = { 512, 8, 0, 0, LP_OPTIONS }, - [0xdc] = { 512, 8, 0, 0, LP_OPTIONS }, - [0xbc] = { 512, 16, 0, 0, LP_OPTIONS16 }, - [0xcc] = { 512, 16, 0, 0, LP_OPTIONS16 }, - - /* 8 Gigabit */ - [0xa3] = { 1024, 8, 0, 0, LP_OPTIONS }, - [0xd3] = { 1024, 8, 0, 0, LP_OPTIONS }, - [0xb3] = { 1024, 16, 0, 0, LP_OPTIONS16 }, - [0xc3] = { 1024, 16, 0, 0, LP_OPTIONS16 }, - - /* 16 Gigabit */ - [0xa5] = { 2048, 8, 0, 0, LP_OPTIONS }, - [0xd5] = { 2048, 8, 0, 0, LP_OPTIONS }, - [0xb5] = { 2048, 16, 0, 0, LP_OPTIONS16 }, - [0xc5] = { 2048, 16, 0, 0, LP_OPTIONS16 }, -}; - -static void nand_reset(DeviceState *dev) -{ - NANDFlashState *s = NAND(dev); - s->cmd = NAND_CMD_READ0; - s->addr = 0; - s->addrlen = 0; - s->iolen = 0; - s->offset = 0; - s->status &= NAND_IOSTATUS_UNPROTCT; - s->status |= NAND_IOSTATUS_READY; -} - -static inline void nand_pushio_byte(NANDFlashState *s, uint8_t value) -{ - s->ioaddr[s->iolen++] = value; - for (value = s->buswidth; --value;) { - s->ioaddr[s->iolen++] = 0; - } -} - -/* - * nand_load_block: Load block containing (s->addr + @offset). - * Returns length of data available at @offset in this block. - */ -static unsigned nand_load_block(NANDFlashState *s, unsigned offset) -{ - unsigned iolen; - - if (!s->blk_load(s, s->addr, offset)) { - return 0; - } - - iolen = (1 << s->page_shift); - if (s->gnd) { - iolen += 1 << s->oob_shift; - } - assert(offset <= iolen); - iolen -= offset; - - return iolen; -} - -static void nand_command(NANDFlashState *s) -{ - switch (s->cmd) { - case NAND_CMD_READ0: - s->iolen = 0; - break; - - case NAND_CMD_READID: - s->ioaddr = s->io; - s->iolen = 0; - nand_pushio_byte(s, s->manf_id); - nand_pushio_byte(s, s->chip_id); - nand_pushio_byte(s, 'Q'); /* Don't-care byte (often 0xa5) */ - if (nand_flash_ids[s->chip_id].options & NAND_SAMSUNG_LP) { - /* Page Size, Block Size, Spare Size; bit 6 indicates - * 8 vs 16 bit width NAND. - */ - nand_pushio_byte(s, (s->buswidth == 2) ? 0x55 : 0x15); - } else { - nand_pushio_byte(s, 0xc0); /* Multi-plane */ - } - break; - - case NAND_CMD_RANDOMREAD2: - case NAND_CMD_NOSERIALREAD2: - if (!(nand_flash_ids[s->chip_id].options & NAND_SAMSUNG_LP)) - break; - s->iolen = nand_load_block(s, s->addr & ((1 << s->addr_shift) - 1)); - break; - - case NAND_CMD_RESET: - nand_reset(DEVICE(s)); - break; - - case NAND_CMD_PAGEPROGRAM1: - s->ioaddr = s->io; - s->iolen = 0; - break; - - case NAND_CMD_PAGEPROGRAM2: - if (s->wp) { - s->blk_write(s); - } - break; - - case NAND_CMD_BLOCKERASE1: - break; - - case NAND_CMD_BLOCKERASE2: - s->addr &= (1ull << s->addrlen * 8) - 1; - s->addr <<= nand_flash_ids[s->chip_id].options & NAND_SAMSUNG_LP ? - 16 : 8; - - if (s->wp) { - s->blk_erase(s); - } - break; - - case NAND_CMD_READSTATUS: - s->ioaddr = s->io; - s->iolen = 0; - nand_pushio_byte(s, s->status); - break; - - default: - printf("%s: Unknown NAND command 0x%02x\n", __func__, s->cmd); - } -} - -static int nand_pre_save(void *opaque) -{ - NANDFlashState *s = NAND(opaque); - - s->ioaddr_vmstate = s->ioaddr - s->io; - - return 0; -} - -static int nand_post_load(void *opaque, int version_id) -{ - NANDFlashState *s = NAND(opaque); - - if (s->ioaddr_vmstate > sizeof(s->io)) { - return -EINVAL; - } - s->ioaddr = s->io + s->ioaddr_vmstate; - - return 0; -} - -static const VMStateDescription vmstate_nand = { - .name = "nand", - .version_id = 1, - .minimum_version_id = 1, - .pre_save = nand_pre_save, - .post_load = nand_post_load, - .fields = (const VMStateField[]) { - VMSTATE_UINT8(cle, NANDFlashState), - VMSTATE_UINT8(ale, NANDFlashState), - VMSTATE_UINT8(ce, NANDFlashState), - VMSTATE_UINT8(wp, NANDFlashState), - VMSTATE_UINT8(gnd, NANDFlashState), - VMSTATE_BUFFER(io, NANDFlashState), - VMSTATE_UINT32(ioaddr_vmstate, NANDFlashState), - VMSTATE_INT32(iolen, NANDFlashState), - VMSTATE_UINT32(cmd, NANDFlashState), - VMSTATE_UINT64(addr, NANDFlashState), - VMSTATE_INT32(addrlen, NANDFlashState), - VMSTATE_INT32(status, NANDFlashState), - VMSTATE_INT32(offset, NANDFlashState), - /* XXX: do we want to save s->storage too? */ - VMSTATE_END_OF_LIST() - } -}; - -static void nand_realize(DeviceState *dev, Error **errp) -{ - int pagesize; - NANDFlashState *s = NAND(dev); - int ret; - - - s->buswidth = nand_flash_ids[s->chip_id].width >> 3; - s->size = nand_flash_ids[s->chip_id].size << 20; - if (nand_flash_ids[s->chip_id].options & NAND_SAMSUNG_LP) { - s->page_shift = 11; - s->erase_shift = 6; - } else { - s->page_shift = nand_flash_ids[s->chip_id].page_shift; - s->erase_shift = nand_flash_ids[s->chip_id].erase_shift; - } - - switch (1 << s->page_shift) { - case 256: - nand_init_256(s); - break; - case 512: - nand_init_512(s); - break; - case 2048: - nand_init_2048(s); - break; - default: - error_setg(errp, "Unsupported NAND block size %#x", - 1 << s->page_shift); - return; - } - - pagesize = 1 << s->oob_shift; - s->mem_oob = 1; - if (s->blk) { - if (!blk_supports_write_perm(s->blk)) { - error_setg(errp, "Can't use a read-only drive"); - return; - } - ret = blk_set_perm(s->blk, BLK_PERM_CONSISTENT_READ | BLK_PERM_WRITE, - BLK_PERM_ALL, errp); - if (ret < 0) { - return; - } - if (blk_getlength(s->blk) >= - (s->pages << s->page_shift) + (s->pages << s->oob_shift)) { - pagesize = 0; - s->mem_oob = 0; - } - } else { - pagesize += 1 << s->page_shift; - } - if (pagesize) { - s->storage = (uint8_t *) memset(g_malloc(s->pages * pagesize), - 0xff, s->pages * pagesize); - } - /* Give s->ioaddr a sane value in case we save state before it is used. */ - s->ioaddr = s->io; -} - -static const Property nand_properties[] = { - DEFINE_PROP_UINT8("manufacturer_id", NANDFlashState, manf_id, 0), - DEFINE_PROP_UINT8("chip_id", NANDFlashState, chip_id, 0), - DEFINE_PROP_DRIVE("drive", NANDFlashState, blk), -}; - -static void nand_class_init(ObjectClass *klass, const void *data) -{ - DeviceClass *dc = DEVICE_CLASS(klass); - - dc->realize = nand_realize; - device_class_set_legacy_reset(dc, nand_reset); - dc->vmsd = &vmstate_nand; - device_class_set_props(dc, nand_properties); - set_bit(DEVICE_CATEGORY_STORAGE, dc->categories); -} - -static const TypeInfo nand_info = { - .name = TYPE_NAND, - .parent = TYPE_DEVICE, - .instance_size = sizeof(NANDFlashState), - .class_init = nand_class_init, -}; - -static void nand_register_types(void) -{ - type_register_static(&nand_info); -} - -/* - * Chip inputs are CLE, ALE, CE, WP, GND and eight I/O pins. Chip - * outputs are R/B and eight I/O pins. - * - * CE, WP and R/B are active low. - */ -void nand_setpins(DeviceState *dev, uint8_t cle, uint8_t ale, - uint8_t ce, uint8_t wp, uint8_t gnd) -{ - NANDFlashState *s = NAND(dev); - - s->cle = cle; - s->ale = ale; - s->ce = ce; - s->wp = wp; - s->gnd = gnd; - if (wp) { - s->status |= NAND_IOSTATUS_UNPROTCT; - } else { - s->status &= ~NAND_IOSTATUS_UNPROTCT; - } -} - -void nand_getpins(DeviceState *dev, int *rb) -{ - *rb = 1; -} - -void nand_setio(DeviceState *dev, uint32_t value) -{ - int i; - NANDFlashState *s = NAND(dev); - - if (!s->ce && s->cle) { - if (nand_flash_ids[s->chip_id].options & NAND_SAMSUNG_LP) { - if (s->cmd == NAND_CMD_READ0 && value == NAND_CMD_LPREAD2) - return; - if (value == NAND_CMD_RANDOMREAD1) { - s->addr &= ~((1 << s->addr_shift) - 1); - s->addrlen = 0; - return; - } - } - if (value == NAND_CMD_READ0) { - s->offset = 0; - } else if (value == NAND_CMD_READ1) { - s->offset = 0x100; - value = NAND_CMD_READ0; - } else if (value == NAND_CMD_READ2) { - s->offset = 1 << s->page_shift; - value = NAND_CMD_READ0; - } - - s->cmd = value; - - if (s->cmd == NAND_CMD_READSTATUS || - s->cmd == NAND_CMD_PAGEPROGRAM2 || - s->cmd == NAND_CMD_BLOCKERASE1 || - s->cmd == NAND_CMD_BLOCKERASE2 || - s->cmd == NAND_CMD_NOSERIALREAD2 || - s->cmd == NAND_CMD_RANDOMREAD2 || - s->cmd == NAND_CMD_RESET) { - nand_command(s); - } - - if (s->cmd != NAND_CMD_RANDOMREAD2) { - s->addrlen = 0; - } - } - - if (s->ale) { - unsigned int shift = s->addrlen * 8; - uint64_t mask = ~(0xffull << shift); - uint64_t v = (uint64_t)value << shift; - - s->addr = (s->addr & mask) | v; - s->addrlen ++; - - switch (s->addrlen) { - case 1: - if (s->cmd == NAND_CMD_READID) { - nand_command(s); - } - break; - case 2: /* fix cache address as a byte address */ - s->addr <<= (s->buswidth - 1); - break; - case 3: - if (!(nand_flash_ids[s->chip_id].options & NAND_SAMSUNG_LP) && - (s->cmd == NAND_CMD_READ0 || - s->cmd == NAND_CMD_PAGEPROGRAM1)) { - nand_command(s); - } - break; - case 4: - if ((nand_flash_ids[s->chip_id].options & NAND_SAMSUNG_LP) && - nand_flash_ids[s->chip_id].size < 256 && /* 1Gb or less */ - (s->cmd == NAND_CMD_READ0 || - s->cmd == NAND_CMD_PAGEPROGRAM1)) { - nand_command(s); - } - break; - case 5: - if ((nand_flash_ids[s->chip_id].options & NAND_SAMSUNG_LP) && - nand_flash_ids[s->chip_id].size >= 256 && /* 2Gb or more */ - (s->cmd == NAND_CMD_READ0 || - s->cmd == NAND_CMD_PAGEPROGRAM1)) { - nand_command(s); - } - break; - default: - break; - } - } - - if (!s->cle && !s->ale && s->cmd == NAND_CMD_PAGEPROGRAM1) { - if (s->iolen < (1 << s->page_shift) + (1 << s->oob_shift)) { - for (i = s->buswidth; i--; value >>= 8) { - s->io[s->iolen ++] = (uint8_t) (value & 0xff); - } - } - } else if (!s->cle && !s->ale && s->cmd == NAND_CMD_COPYBACKPRG1) { - if ((s->addr & ((1 << s->addr_shift) - 1)) < - (1 << s->page_shift) + (1 << s->oob_shift)) { - for (i = s->buswidth; i--; s->addr++, value >>= 8) { - s->io[s->iolen + (s->addr & ((1 << s->addr_shift) - 1))] = - (uint8_t) (value & 0xff); - } - } - } -} - -uint32_t nand_getio(DeviceState *dev) -{ - int offset; - uint32_t x = 0; - NANDFlashState *s = NAND(dev); - - /* Allow sequential reading */ - if (!s->iolen && s->cmd == NAND_CMD_READ0) { - offset = (int) (s->addr & ((1 << s->addr_shift) - 1)) + s->offset; - s->offset = 0; - s->iolen = nand_load_block(s, offset); - } - - if (s->ce || s->iolen <= 0) { - return 0; - } - - for (offset = s->buswidth; offset--;) { - x |= s->ioaddr[offset] << (offset << 3); - } - /* after receiving READ STATUS command all subsequent reads will - * return the status register value until another command is issued - */ - if (s->cmd != NAND_CMD_READSTATUS) { - s->addr += s->buswidth; - s->ioaddr += s->buswidth; - s->iolen -= s->buswidth; - } - return x; -} - -uint32_t nand_getbuswidth(DeviceState *dev) -{ - NANDFlashState *s = (NANDFlashState *) dev; - return s->buswidth << 3; -} - -DeviceState *nand_init(BlockBackend *blk, int manf_id, int chip_id) -{ - DeviceState *dev; - - if (nand_flash_ids[chip_id].size == 0) { - hw_error("%s: Unsupported NAND chip ID.\n", __func__); - } - dev = qdev_new(TYPE_NAND); - qdev_prop_set_uint8(dev, "manufacturer_id", manf_id); - qdev_prop_set_uint8(dev, "chip_id", chip_id); - if (blk) { - qdev_prop_set_drive_err(dev, "drive", blk, &error_fatal); - } - - qdev_realize(dev, NULL, &error_fatal); - return dev; -} - -type_init(nand_register_types) - -#else - -/* Program a single page */ -static void glue(nand_blk_write_, NAND_PAGE_SIZE)(NANDFlashState *s) -{ - uint64_t off, page, sector, soff; - uint8_t iobuf[(PAGE_SECTORS + 2) * 0x200]; - if (PAGE(s->addr) >= s->pages) - return; - - if (!s->blk) { - mem_and(s->storage + PAGE_START(s->addr) + (s->addr & PAGE_MASK) + - s->offset, s->io, s->iolen); - } else if (s->mem_oob) { - sector = SECTOR(s->addr); - off = (s->addr & PAGE_MASK) + s->offset; - soff = SECTOR_OFFSET(s->addr); - if (blk_pread(s->blk, sector << BDRV_SECTOR_BITS, - PAGE_SECTORS << BDRV_SECTOR_BITS, iobuf, 0) < 0) { - printf("%s: read error in sector %" PRIu64 "\n", __func__, sector); - return; - } - - mem_and(iobuf + (soff | off), s->io, MIN(s->iolen, NAND_PAGE_SIZE - off)); - if (off + s->iolen > NAND_PAGE_SIZE) { - page = PAGE(s->addr); - mem_and(s->storage + (page << OOB_SHIFT), s->io + NAND_PAGE_SIZE - off, - MIN(OOB_SIZE, off + s->iolen - NAND_PAGE_SIZE)); - } - - if (blk_pwrite(s->blk, sector << BDRV_SECTOR_BITS, - PAGE_SECTORS << BDRV_SECTOR_BITS, iobuf, 0) < 0) { - printf("%s: write error in sector %" PRIu64 "\n", __func__, sector); - } - } else { - off = PAGE_START(s->addr) + (s->addr & PAGE_MASK) + s->offset; - sector = off >> 9; - soff = off & 0x1ff; - if (blk_pread(s->blk, sector << BDRV_SECTOR_BITS, - (PAGE_SECTORS + 2) << BDRV_SECTOR_BITS, iobuf, 0) < 0) { - printf("%s: read error in sector %" PRIu64 "\n", __func__, sector); - return; - } - - mem_and(iobuf + soff, s->io, s->iolen); - - if (blk_pwrite(s->blk, sector << BDRV_SECTOR_BITS, - (PAGE_SECTORS + 2) << BDRV_SECTOR_BITS, iobuf, 0) < 0) { - printf("%s: write error in sector %" PRIu64 "\n", __func__, sector); - } - } - s->offset = 0; -} - -/* Erase a single block */ -static void glue(nand_blk_erase_, NAND_PAGE_SIZE)(NANDFlashState *s) -{ - uint64_t i, page, addr; - uint8_t iobuf[0x200] = { [0 ... 0x1ff] = 0xff, }; - addr = s->addr & ~((1 << (ADDR_SHIFT + s->erase_shift)) - 1); - - if (PAGE(addr) >= s->pages) { - return; - } - - if (!s->blk) { - memset(s->storage + PAGE_START(addr), - 0xff, (NAND_PAGE_SIZE + OOB_SIZE) << s->erase_shift); - } else if (s->mem_oob) { - memset(s->storage + (PAGE(addr) << OOB_SHIFT), - 0xff, OOB_SIZE << s->erase_shift); - i = SECTOR(addr); - page = SECTOR(addr + (1 << (ADDR_SHIFT + s->erase_shift))); - for (; i < page; i ++) - if (blk_pwrite(s->blk, i << BDRV_SECTOR_BITS, - BDRV_SECTOR_SIZE, iobuf, 0) < 0) { - printf("%s: write error in sector %" PRIu64 "\n", __func__, i); - } - } else { - addr = PAGE_START(addr); - page = addr >> 9; - if (blk_pread(s->blk, page << BDRV_SECTOR_BITS, - BDRV_SECTOR_SIZE, iobuf, 0) < 0) { - printf("%s: read error in sector %" PRIu64 "\n", __func__, page); - } - memset(iobuf + (addr & 0x1ff), 0xff, (~addr & 0x1ff) + 1); - if (blk_pwrite(s->blk, page << BDRV_SECTOR_BITS, - BDRV_SECTOR_SIZE, iobuf, 0) < 0) { - printf("%s: write error in sector %" PRIu64 "\n", __func__, page); - } - - memset(iobuf, 0xff, 0x200); - i = (addr & ~0x1ff) + 0x200; - for (addr += ((NAND_PAGE_SIZE + OOB_SIZE) << s->erase_shift) - 0x200; - i < addr; i += 0x200) { - if (blk_pwrite(s->blk, i, BDRV_SECTOR_SIZE, iobuf, 0) < 0) { - printf("%s: write error in sector %" PRIu64 "\n", - __func__, i >> 9); - } - } - - page = i >> 9; - if (blk_pread(s->blk, page << BDRV_SECTOR_BITS, - BDRV_SECTOR_SIZE, iobuf, 0) < 0) { - printf("%s: read error in sector %" PRIu64 "\n", __func__, page); - } - memset(iobuf, 0xff, ((addr - 1) & 0x1ff) + 1); - if (blk_pwrite(s->blk, page << BDRV_SECTOR_BITS, - BDRV_SECTOR_SIZE, iobuf, 0) < 0) { - printf("%s: write error in sector %" PRIu64 "\n", __func__, page); - } - } -} - -static bool glue(nand_blk_load_, NAND_PAGE_SIZE)(NANDFlashState *s, - uint64_t addr, unsigned offset) -{ - if (PAGE(addr) >= s->pages) { - return false; - } - - if (offset > NAND_PAGE_SIZE + OOB_SIZE) { - return false; - } - - if (s->blk) { - if (s->mem_oob) { - if (blk_pread(s->blk, SECTOR(addr) << BDRV_SECTOR_BITS, - PAGE_SECTORS << BDRV_SECTOR_BITS, s->io, 0) < 0) { - printf("%s: read error in sector %" PRIu64 "\n", - __func__, SECTOR(addr)); - } - memcpy(s->io + SECTOR_OFFSET(s->addr) + NAND_PAGE_SIZE, - s->storage + (PAGE(s->addr) << OOB_SHIFT), - OOB_SIZE); - s->ioaddr = s->io + SECTOR_OFFSET(s->addr) + offset; - } else { - if (blk_pread(s->blk, PAGE_START(addr), - (PAGE_SECTORS + 2) << BDRV_SECTOR_BITS, s->io, 0) - < 0) { - printf("%s: read error in sector %" PRIu64 "\n", - __func__, PAGE_START(addr) >> 9); - } - s->ioaddr = s->io + (PAGE_START(addr) & 0x1ff) + offset; - } - } else { - memcpy(s->io, s->storage + PAGE_START(s->addr) + - offset, NAND_PAGE_SIZE + OOB_SIZE - offset); - s->ioaddr = s->io; - } - - return true; -} - -static void glue(nand_init_, NAND_PAGE_SIZE)(NANDFlashState *s) -{ - s->oob_shift = PAGE_SHIFT - 5; - s->pages = s->size >> PAGE_SHIFT; - s->addr_shift = ADDR_SHIFT; - - s->blk_erase = glue(nand_blk_erase_, NAND_PAGE_SIZE); - s->blk_write = glue(nand_blk_write_, NAND_PAGE_SIZE); - s->blk_load = glue(nand_blk_load_, NAND_PAGE_SIZE); -} - -# undef NAND_PAGE_SIZE -# undef PAGE_SHIFT -# undef PAGE_SECTORS -# undef ADDR_SHIFT -#endif /* NAND_IO */ diff --git a/hw/block/vhost-user-blk.c b/hw/block/vhost-user-blk.c index 4bb5ed2..c0cc5f6 100644 --- a/hw/block/vhost-user-blk.c +++ b/hw/block/vhost-user-blk.c @@ -204,34 +204,39 @@ err_host_notifiers: return ret; } -static void vhost_user_blk_stop(VirtIODevice *vdev) +static int vhost_user_blk_stop(VirtIODevice *vdev) { VHostUserBlk *s = VHOST_USER_BLK(vdev); BusState *qbus = BUS(qdev_get_parent_bus(DEVICE(vdev))); VirtioBusClass *k = VIRTIO_BUS_GET_CLASS(qbus); int ret; + bool force_stop = false; if (!s->started_vu) { - return; + return 0; } s->started_vu = false; if (!k->set_guest_notifiers) { - return; + return 0; } - vhost_dev_stop(&s->dev, vdev, true); + force_stop = s->skip_get_vring_base_on_force_shutdown && + qemu_force_shutdown_requested(); - ret = k->set_guest_notifiers(qbus->parent, s->dev.nvqs, false); - if (ret < 0) { + ret = force_stop ? vhost_dev_force_stop(&s->dev, vdev, true) : + vhost_dev_stop(&s->dev, vdev, true); + + if (k->set_guest_notifiers(qbus->parent, s->dev.nvqs, false) < 0) { error_report("vhost guest notifier cleanup failed: %d", ret); - return; + return -1; } vhost_dev_disable_notifiers(&s->dev, vdev); + return ret; } -static void vhost_user_blk_set_status(VirtIODevice *vdev, uint8_t status) +static int vhost_user_blk_set_status(VirtIODevice *vdev, uint8_t status) { VHostUserBlk *s = VHOST_USER_BLK(vdev); bool should_start = virtio_device_should_start(vdev, status); @@ -239,11 +244,11 @@ static void vhost_user_blk_set_status(VirtIODevice *vdev, uint8_t status) int ret; if (!s->connected) { - return; + return -1; } if (vhost_dev_is_started(&s->dev) == should_start) { - return; + return 0; } if (should_start) { @@ -253,9 +258,12 @@ static void vhost_user_blk_set_status(VirtIODevice *vdev, uint8_t status) qemu_chr_fe_disconnect(&s->chardev); } } else { - vhost_user_blk_stop(vdev); + ret = vhost_user_blk_stop(vdev); + if (ret < 0) { + return ret; + } } - + return 0; } static uint64_t vhost_user_blk_get_features(VirtIODevice *vdev, @@ -581,6 +589,8 @@ static const Property vhost_user_blk_properties[] = { VIRTIO_BLK_F_DISCARD, true), DEFINE_PROP_BIT64("write-zeroes", VHostUserBlk, parent_obj.host_features, VIRTIO_BLK_F_WRITE_ZEROES, true), + DEFINE_PROP_BOOL("skip-get-vring-base-on-force-shutdown", VHostUserBlk, + skip_get_vring_base_on_force_shutdown, false), }; static void vhost_user_blk_class_init(ObjectClass *klass, const void *data) diff --git a/hw/block/virtio-blk.c b/hw/block/virtio-blk.c index b54d01d..9bab271 100644 --- a/hw/block/virtio-blk.c +++ b/hw/block/virtio-blk.c @@ -1270,7 +1270,7 @@ static uint64_t virtio_blk_get_features(VirtIODevice *vdev, uint64_t features, return features; } -static void virtio_blk_set_status(VirtIODevice *vdev, uint8_t status) +static int virtio_blk_set_status(VirtIODevice *vdev, uint8_t status) { VirtIOBlock *s = VIRTIO_BLK(vdev); @@ -1279,7 +1279,7 @@ static void virtio_blk_set_status(VirtIODevice *vdev, uint8_t status) } if (!(status & VIRTIO_CONFIG_S_DRIVER_OK)) { - return; + return 0; } /* A guest that supports VIRTIO_BLK_F_CONFIG_WCE must be able to send @@ -1302,6 +1302,7 @@ static void virtio_blk_set_status(VirtIODevice *vdev, uint8_t status) virtio_vdev_has_feature(vdev, VIRTIO_BLK_F_WCE)); } + return 0; } static void virtio_blk_save_device(VirtIODevice *vdev, QEMUFile *f) @@ -1802,7 +1803,7 @@ static void virtio_blk_device_realize(DeviceState *dev, Error **errp) * called after ->start_ioeventfd() has already set blk's AioContext. */ s->change = - qdev_add_vm_change_state_handler(dev, virtio_blk_dma_restart_cb, s); + qdev_add_vm_change_state_handler(dev, virtio_blk_dma_restart_cb, NULL, s); blk_ram_registrar_init(&s->blk_ram_registrar, s->blk); blk_set_dev_ops(s->blk, &virtio_block_ops, s); diff --git a/hw/char/Kconfig b/hw/char/Kconfig index 9d517f3..020c0a8 100644 --- a/hw/char/Kconfig +++ b/hw/char/Kconfig @@ -48,6 +48,9 @@ config VIRTIO_SERIAL default y depends on VIRTIO +config MAX78000_UART + bool + config STM32F2XX_USART bool diff --git a/hw/char/max78000_uart.c b/hw/char/max78000_uart.c new file mode 100644 index 0000000..19506d5 --- /dev/null +++ b/hw/char/max78000_uart.c @@ -0,0 +1,285 @@ +/* + * MAX78000 UART + * + * Copyright (c) 2025 Jackson Donaldson <jcksn@duck.com> + * + * SPDX-License-Identifier: GPL-2.0-or-later + */ + +#include "qemu/osdep.h" +#include "hw/char/max78000_uart.h" +#include "hw/irq.h" +#include "hw/qdev-properties.h" +#include "hw/qdev-properties-system.h" +#include "qemu/log.h" +#include "qemu/module.h" +#include "migration/vmstate.h" +#include "trace.h" + + +static int max78000_uart_can_receive(void *opaque) +{ + Max78000UartState *s = opaque; + if (!(s->ctrl & UART_BCLKEN)) { + return 0; + } + return fifo8_num_free(&s->rx_fifo); +} + +static void max78000_update_irq(Max78000UartState *s) +{ + int interrupt_level; + + interrupt_level = s->int_fl & s->int_en; + qemu_set_irq(s->irq, interrupt_level); +} + +static void max78000_uart_receive(void *opaque, const uint8_t *buf, int size) +{ + Max78000UartState *s = opaque; + + assert(size <= fifo8_num_free(&s->rx_fifo)); + + fifo8_push_all(&s->rx_fifo, buf, size); + + uint32_t rx_threshold = s->ctrl & 0xf; + + if (fifo8_num_used(&s->rx_fifo) >= rx_threshold) { + s->int_fl |= UART_RX_THD; + } + + max78000_update_irq(s); +} + +static void max78000_uart_reset_hold(Object *obj, ResetType type) +{ + Max78000UartState *s = MAX78000_UART(obj); + + s->ctrl = 0; + s->status = UART_TX_EM | UART_RX_EM; + s->int_en = 0; + s->int_fl = 0; + s->osr = 0; + s->txpeek = 0; + s->pnr = UART_RTS; + s->fifo = 0; + s->dma = 0; + s->wken = 0; + s->wkfl = 0; + fifo8_reset(&s->rx_fifo); +} + +static uint64_t max78000_uart_read(void *opaque, hwaddr addr, + unsigned int size) +{ + Max78000UartState *s = opaque; + uint64_t retvalue = 0; + switch (addr) { + case UART_CTRL: + retvalue = s->ctrl; + break; + case UART_STATUS: + retvalue = (fifo8_num_used(&s->rx_fifo) << UART_RX_LVL) | + UART_TX_EM | + (fifo8_is_empty(&s->rx_fifo) ? UART_RX_EM : 0); + break; + case UART_INT_EN: + retvalue = s->int_en; + break; + case UART_INT_FL: + retvalue = s->int_fl; + break; + case UART_CLKDIV: + retvalue = s->clkdiv; + break; + case UART_OSR: + retvalue = s->osr; + break; + case UART_TXPEEK: + if (!fifo8_is_empty(&s->rx_fifo)) { + retvalue = fifo8_peek(&s->rx_fifo); + } + break; + case UART_PNR: + retvalue = s->pnr; + break; + case UART_FIFO: + if (!fifo8_is_empty(&s->rx_fifo)) { + retvalue = fifo8_pop(&s->rx_fifo); + max78000_update_irq(s); + } + break; + case UART_DMA: + /* DMA not implemented */ + retvalue = s->dma; + break; + case UART_WKEN: + retvalue = s->wken; + break; + case UART_WKFL: + retvalue = s->wkfl; + break; + default: + qemu_log_mask(LOG_GUEST_ERROR, + "%s: Bad offset 0x%"HWADDR_PRIx"\n", __func__, addr); + break; + } + + return retvalue; +} + +static void max78000_uart_write(void *opaque, hwaddr addr, + uint64_t val64, unsigned int size) +{ + Max78000UartState *s = opaque; + + uint32_t value = val64; + uint8_t data; + + switch (addr) { + case UART_CTRL: + if (value & UART_FLUSH_RX) { + fifo8_reset(&s->rx_fifo); + } + if (value & UART_BCLKEN) { + value = value | UART_BCLKRDY; + } + s->ctrl = value & ~(UART_FLUSH_RX | UART_FLUSH_TX); + + /* + * Software can manage UART flow control manually by setting hfc_en + * in UART_CTRL. This would require emulating uart at a lower level, + * and is currently unimplemented. + */ + + return; + case UART_STATUS: + /* UART_STATUS is read only */ + return; + case UART_INT_EN: + s->int_en = value; + return; + case UART_INT_FL: + s->int_fl = s->int_fl & ~(value); + max78000_update_irq(s); + return; + case UART_CLKDIV: + s->clkdiv = value; + return; + case UART_OSR: + s->osr = value; + return; + case UART_PNR: + s->pnr = value; + return; + case UART_FIFO: + data = value & 0xff; + /* + * XXX this blocks entire thread. Rewrite to use + * qemu_chr_fe_write and background I/O callbacks + */ + qemu_chr_fe_write_all(&s->chr, &data, 1); + + /* TX is always empty */ + s->int_fl |= UART_TX_HE; + max78000_update_irq(s); + + return; + case UART_DMA: + /* DMA not implemented */ + s->dma = value; + return; + case UART_WKEN: + s->wken = value; + return; + case UART_WKFL: + s->wkfl = value; + return; + default: + qemu_log_mask(LOG_GUEST_ERROR, "%s: Bad offset 0x%" + HWADDR_PRIx "\n", __func__, addr); + } +} + +static const MemoryRegionOps max78000_uart_ops = { + .read = max78000_uart_read, + .write = max78000_uart_write, + .endianness = DEVICE_LITTLE_ENDIAN, + .valid.min_access_size = 4, + .valid.max_access_size = 4, +}; + +static const Property max78000_uart_properties[] = { + DEFINE_PROP_CHR("chardev", Max78000UartState, chr), +}; + +static const VMStateDescription max78000_uart_vmstate = { + .name = TYPE_MAX78000_UART, + .version_id = 1, + .minimum_version_id = 1, + .fields = (VMStateField[]) { + VMSTATE_UINT32(ctrl, Max78000UartState), + VMSTATE_UINT32(status, Max78000UartState), + VMSTATE_UINT32(int_en, Max78000UartState), + VMSTATE_UINT32(int_fl, Max78000UartState), + VMSTATE_UINT32(clkdiv, Max78000UartState), + VMSTATE_UINT32(osr, Max78000UartState), + VMSTATE_UINT32(txpeek, Max78000UartState), + VMSTATE_UINT32(pnr, Max78000UartState), + VMSTATE_UINT32(fifo, Max78000UartState), + VMSTATE_UINT32(dma, Max78000UartState), + VMSTATE_UINT32(wken, Max78000UartState), + VMSTATE_UINT32(wkfl, Max78000UartState), + VMSTATE_FIFO8(rx_fifo, Max78000UartState), + VMSTATE_END_OF_LIST() + } +}; + +static void max78000_uart_init(Object *obj) +{ + Max78000UartState *s = MAX78000_UART(obj); + fifo8_create(&s->rx_fifo, 8); + + sysbus_init_irq(SYS_BUS_DEVICE(obj), &s->irq); + + memory_region_init_io(&s->mmio, obj, &max78000_uart_ops, s, + TYPE_MAX78000_UART, 0x400); + sysbus_init_mmio(SYS_BUS_DEVICE(obj), &s->mmio); +} + +static void max78000_uart_realize(DeviceState *dev, Error **errp) +{ + Max78000UartState *s = MAX78000_UART(dev); + + qemu_chr_fe_set_handlers(&s->chr, max78000_uart_can_receive, + max78000_uart_receive, NULL, NULL, + s, NULL, true); +} + +static void max78000_uart_class_init(ObjectClass *klass, const void *data) +{ + DeviceClass *dc = DEVICE_CLASS(klass); + ResettableClass *rc = RESETTABLE_CLASS(klass); + + rc->phases.hold = max78000_uart_reset_hold; + + device_class_set_props(dc, max78000_uart_properties); + dc->realize = max78000_uart_realize; + + dc->vmsd = &max78000_uart_vmstate; +} + +static const TypeInfo max78000_uart_info = { + .name = TYPE_MAX78000_UART, + .parent = TYPE_SYS_BUS_DEVICE, + .instance_size = sizeof(Max78000UartState), + .instance_init = max78000_uart_init, + .class_init = max78000_uart_class_init, +}; + +static void max78000_uart_register_types(void) +{ + type_register_static(&max78000_uart_info); +} + +type_init(max78000_uart_register_types) diff --git a/hw/char/meson.build b/hw/char/meson.build index 4e439da..a9e1dc2 100644 --- a/hw/char/meson.build +++ b/hw/char/meson.build @@ -26,6 +26,7 @@ system_ss.add(when: 'CONFIG_AVR_USART', if_true: files('avr_usart.c')) system_ss.add(when: 'CONFIG_COLDFIRE', if_true: files('mcf_uart.c')) system_ss.add(when: 'CONFIG_DIGIC', if_true: files('digic-uart.c')) system_ss.add(when: 'CONFIG_EXYNOS4', if_true: files('exynos4210_uart.c')) +system_ss.add(when: 'CONFIG_MAX78000_UART', if_true: files('max78000_uart.c')) system_ss.add(when: 'CONFIG_OMAP', if_true: files('omap_uart.c')) system_ss.add(when: 'CONFIG_RASPI', if_true: files('bcm2835_aux.c')) system_ss.add(when: 'CONFIG_RENESAS_SCI', if_true: files('renesas_sci.c')) diff --git a/hw/char/riscv_htif.c b/hw/char/riscv_htif.c index c884be5..a78ea9b 100644 --- a/hw/char/riscv_htif.c +++ b/hw/char/riscv_htif.c @@ -29,7 +29,6 @@ #include "qemu/timer.h" #include "qemu/error-report.h" #include "system/address-spaces.h" -#include "exec/tswap.h" #include "system/dma.h" #include "system/runstate.h" #include "trace.h" diff --git a/hw/char/sclpconsole-lm.c b/hw/char/sclpconsole-lm.c index e9580aa..3e40d5e 100644 --- a/hw/char/sclpconsole-lm.c +++ b/hw/char/sclpconsole-lm.c @@ -214,7 +214,7 @@ static int process_mdb(SCLPEvent *event, MDBO *mdbo) { int rc; int len; - uint8_t buffer[SIZE_BUFFER]; + QEMU_UNINITIALIZED uint8_t buffer[SIZE_BUFFER]; len = be16_to_cpu(mdbo->length); len -= sizeof(mdbo->length) + sizeof(mdbo->type) diff --git a/hw/char/sh_serial.c b/hw/char/sh_serial.c index 6abd803..30447fa 100644 --- a/hw/char/sh_serial.c +++ b/hw/char/sh_serial.c @@ -78,10 +78,6 @@ struct SHSerialState { qemu_irq bri; }; -typedef struct {} SHSerialStateClass; - -OBJECT_DEFINE_TYPE(SHSerialState, sh_serial, SH_SERIAL, SYS_BUS_DEVICE) - static void sh_serial_clear_fifo(SHSerialState *s) { memset(s->rx_fifo, 0, SH_RX_FIFO_LENGTH); @@ -434,17 +430,13 @@ static void sh_serial_realize(DeviceState *d, Error **errp) s->etu = NANOSECONDS_PER_SECOND / 9600; } -static void sh_serial_finalize(Object *obj) +static void sh_serial_unrealize(DeviceState *dev) { - SHSerialState *s = SH_SERIAL(obj); + SHSerialState *s = SH_SERIAL(dev); timer_del(&s->fifo_timeout_timer); } -static void sh_serial_init(Object *obj) -{ -} - static const Property sh_serial_properties[] = { DEFINE_PROP_CHR("chardev", SHSerialState, chr), DEFINE_PROP_UINT8("features", SHSerialState, feat, 0), @@ -456,7 +448,19 @@ static void sh_serial_class_init(ObjectClass *oc, const void *data) device_class_set_props(dc, sh_serial_properties); dc->realize = sh_serial_realize; + dc->unrealize = sh_serial_unrealize; device_class_set_legacy_reset(dc, sh_serial_reset); /* Reason: part of SuperH CPU/SoC, needs to be wired up */ dc->user_creatable = false; } + +static const TypeInfo sh_serial_types[] = { + { + .name = TYPE_SH_SERIAL, + .parent = TYPE_SYS_BUS_DEVICE, + .instance_size = sizeof(SHSerialState), + .class_init = sh_serial_class_init, + }, +}; + +DEFINE_TYPES(sh_serial_types) diff --git a/hw/char/sifive_uart.c b/hw/char/sifive_uart.c index 0fc89e7..9bc697a 100644 --- a/hw/char/sifive_uart.c +++ b/hw/char/sifive_uart.c @@ -128,8 +128,10 @@ static void sifive_uart_write_tx_fifo(SiFiveUARTState *s, const uint8_t *buf, s->txfifo |= SIFIVE_UART_TXFIFO_FULL; } - timer_mod(s->fifo_trigger_handle, current_time + - TX_INTERRUPT_TRIGGER_DELAY_NS); + if (!timer_pending(s->fifo_trigger_handle)) { + timer_mod(s->fifo_trigger_handle, current_time + + TX_INTERRUPT_TRIGGER_DELAY_NS); + } } static uint64_t diff --git a/hw/char/virtio-serial-bus.c b/hw/char/virtio-serial-bus.c index eb79f52..673c50f 100644 --- a/hw/char/virtio-serial-bus.c +++ b/hw/char/virtio-serial-bus.c @@ -622,7 +622,7 @@ static void guest_reset(VirtIOSerial *vser) } } -static void set_status(VirtIODevice *vdev, uint8_t status) +static int set_status(VirtIODevice *vdev, uint8_t status) { VirtIOSerial *vser; VirtIOSerialPort *port; @@ -650,6 +650,7 @@ static void set_status(VirtIODevice *vdev, uint8_t status) vsc->enable_backend(port, vdev->vm_running); } } + return 0; } static void vser_reset(VirtIODevice *vdev) diff --git a/hw/core/cpu-common.c b/hw/core/cpu-common.c index 92c40b6..39e674a 100644 --- a/hw/core/cpu-common.c +++ b/hw/core/cpu-common.c @@ -234,6 +234,8 @@ bool cpu_exec_realizefn(CPUState *cpu, Error **errp) return false; } + gdb_init_cpu(cpu); + /* Wait until cpu initialization complete before exposing cpu. */ cpu_list_add(cpu); @@ -304,7 +306,6 @@ static void cpu_common_initfn(Object *obj) /* cache the cpu class for the hotpath */ cpu->cc = CPU_GET_CLASS(cpu); - gdb_init_cpu(cpu); cpu->cpu_index = UNASSIGNED_CPU_INDEX; cpu->cluster_index = UNASSIGNED_CLUSTER_INDEX; cpu->as = NULL; diff --git a/hw/core/cpu-system.c b/hw/core/cpu-system.c index 3c84176..a975405 100644 --- a/hw/core/cpu-system.c +++ b/hw/core/cpu-system.c @@ -24,7 +24,7 @@ #include "exec/cputlb.h" #include "system/memory.h" #include "exec/tb-flush.h" -#include "exec/tswap.h" +#include "qemu/target-info.h" #include "hw/qdev-core.h" #include "hw/qdev-properties.h" #include "hw/core/sysemu-cpu-ops.h" diff --git a/hw/core/loader.c b/hw/core/loader.c index b792a54..e7056ba 100644 --- a/hw/core/loader.c +++ b/hw/core/loader.c @@ -1333,20 +1333,6 @@ void rom_set_fw(FWCfgState *f) fw_cfg = f; } -void rom_set_order_override(int order) -{ - if (!fw_cfg) - return; - fw_cfg_set_order_override(fw_cfg, order); -} - -void rom_reset_order_override(void) -{ - if (!fw_cfg) - return; - fw_cfg_reset_order_override(fw_cfg); -} - void rom_transaction_begin(void) { Rom *rom; diff --git a/hw/core/machine-hmp-cmds.c b/hw/core/machine-hmp-cmds.c index c6325cd..3a612e2 100644 --- a/hw/core/machine-hmp-cmds.c +++ b/hw/core/machine-hmp-cmds.c @@ -18,6 +18,7 @@ #include "monitor/monitor.h" #include "qapi/error.h" #include "qapi/qapi-builtin-visit.h" +#include "qapi/qapi-commands-accelerator.h" #include "qapi/qapi-commands-machine.h" #include "qobject/qdict.h" #include "qapi/string-output-visitor.h" @@ -32,6 +33,7 @@ void hmp_info_cpus(Monitor *mon, const QDict *qdict) cpu_list = qmp_query_cpus_fast(NULL); for (cpu = cpu_list; cpu; cpu = cpu->next) { + g_autofree char *cpu_model = cpu_model_from_type(cpu->value->qom_type); int active = ' '; if (cpu->value->cpu_index == monitor_get_cpu_index(mon)) { @@ -40,7 +42,8 @@ void hmp_info_cpus(Monitor *mon, const QDict *qdict) monitor_printf(mon, "%c CPU #%" PRId64 ":", active, cpu->value->cpu_index); - monitor_printf(mon, " thread_id=%" PRId64 "\n", cpu->value->thread_id); + monitor_printf(mon, " thread_id=%" PRId64 " model=%s\n", + cpu->value->thread_id, cpu_model); } qapi_free_CpuInfoFastList(cpu_list); diff --git a/hw/core/machine-qmp-cmds.c b/hw/core/machine-qmp-cmds.c index d82043e..6aca1a6 100644 --- a/hw/core/machine-qmp-cmds.c +++ b/hw/core/machine-qmp-cmds.c @@ -14,12 +14,13 @@ #include "hw/mem/memory-device.h" #include "qapi/error.h" #include "qapi/qapi-builtin-visit.h" +#include "qapi/qapi-commands-accelerator.h" #include "qapi/qapi-commands-machine.h" #include "qobject/qobject.h" #include "qapi/qobject-input-visitor.h" #include "qapi/type-helpers.h" #include "qemu/uuid.h" -#include "qemu/target-info.h" +#include "qemu/target-info-qapi.h" #include "qom/qom-qobject.h" #include "system/hostmem.h" #include "system/hw_accel.h" @@ -37,8 +38,7 @@ CpuInfoFastList *qmp_query_cpus_fast(Error **errp) MachineState *ms = MACHINE(qdev_get_machine()); MachineClass *mc = MACHINE_GET_CLASS(ms); CpuInfoFastList *head = NULL, **tail = &head; - SysEmuTarget target = qapi_enum_parse(&SysEmuTarget_lookup, target_name(), - -1, &error_abort); + SysEmuTarget target = target_arch(); CPUState *cpu; CPU_FOREACH(cpu) { @@ -47,6 +47,7 @@ CpuInfoFastList *qmp_query_cpus_fast(Error **errp) value->cpu_index = cpu->cpu_index; value->qom_path = object_get_canonical_path(OBJECT(cpu)); value->thread_id = cpu->thread_id; + value->qom_type = g_strdup(object_get_typename(OBJECT(cpu))); if (mc->cpu_index_to_instance_props) { CpuInstanceProperties *props; @@ -139,8 +140,7 @@ QemuTargetInfo *qmp_query_target(Error **errp) { QemuTargetInfo *info = g_malloc0(sizeof(*info)); - info->arch = qapi_enum_parse(&SysEmuTarget_lookup, target_name(), -1, - &error_abort); + info->arch = target_arch(); return info; } diff --git a/hw/core/machine.c b/hw/core/machine.c index ed01798..bd47527 100644 --- a/hw/core/machine.c +++ b/hw/core/machine.c @@ -37,11 +37,16 @@ #include "hw/virtio/virtio-iommu.h" #include "audio/audio.h" -GlobalProperty hw_compat_10_0[] = {}; +GlobalProperty hw_compat_10_0[] = { + { "scsi-hd", "dpofua", "off" }, + { "vfio-pci", "x-migration-load-config-after-iter", "off" }, + { "ramfb", "use-legacy-x86-rom", "true"}, + { "vfio-pci-nohotplug", "use-legacy-x86-rom", "true" }, +}; const size_t hw_compat_10_0_len = G_N_ELEMENTS(hw_compat_10_0); GlobalProperty hw_compat_9_2[] = { - {"arm-cpu", "backcompat-pauth-default-use-qarma5", "true"}, + { "arm-cpu", "backcompat-pauth-default-use-qarma5", "true"}, { "virtio-balloon-pci", "vectors", "0" }, { "virtio-balloon-pci-transitional", "vectors", "0" }, { "virtio-balloon-pci-non-transitional", "vectors", "0" }, @@ -58,12 +63,12 @@ GlobalProperty hw_compat_9_1[] = { const size_t hw_compat_9_1_len = G_N_ELEMENTS(hw_compat_9_1); GlobalProperty hw_compat_9_0[] = { - {"arm-cpu", "backcompat-cntfrq", "true" }, + { "arm-cpu", "backcompat-cntfrq", "true" }, { "scsi-hd", "migrate-emulated-scsi-request", "false" }, { "scsi-cd", "migrate-emulated-scsi-request", "false" }, - {"vfio-pci", "skip-vsc-check", "false" }, + { "vfio-pci", "skip-vsc-check", "false" }, { "virtio-pci", "x-pcie-pm-no-soft-reset", "off" }, - {"sd-card", "spec_version", "2" }, + { "sd-card", "spec_version", "2" }, }; const size_t hw_compat_9_0_len = G_N_ELEMENTS(hw_compat_9_0); @@ -283,24 +288,6 @@ GlobalProperty hw_compat_2_6[] = { }; const size_t hw_compat_2_6_len = G_N_ELEMENTS(hw_compat_2_6); -GlobalProperty hw_compat_2_5[] = { - { "isa-fdc", "fallback", "144" }, - { "pvscsi", "x-old-pci-configuration", "on" }, - { "pvscsi", "x-disable-pcie", "on" }, - { "vmxnet3", "x-old-msi-offsets", "on" }, - { "vmxnet3", "x-disable-pcie", "on" }, -}; -const size_t hw_compat_2_5_len = G_N_ELEMENTS(hw_compat_2_5); - -GlobalProperty hw_compat_2_4[] = { - { "e1000", "extra_mac_registers", "off" }, - { "virtio-pci", "x-disable-pcie", "on" }, - { "virtio-pci", "migrate-extra", "off" }, - { "fw_cfg_mem", "dma_enabled", "off" }, - { "fw_cfg_io", "dma_enabled", "off" } -}; -const size_t hw_compat_2_4_len = G_N_ELEMENTS(hw_compat_2_4); - MachineState *current_machine; static char *machine_get_kernel(Object *obj, Error **errp) @@ -593,6 +580,20 @@ static void machine_set_nvdimm(Object *obj, bool value, Error **errp) ms->nvdimms_state->is_enabled = value; } +static bool machine_get_spcr(Object *obj, Error **errp) +{ + MachineState *ms = MACHINE(obj); + + return ms->acpi_spcr_enabled; +} + +static void machine_set_spcr(Object *obj, bool value, Error **errp) +{ + MachineState *ms = MACHINE(obj); + + ms->acpi_spcr_enabled = value; +} + static bool machine_get_hmat(Object *obj, Error **errp) { MachineState *ms = MACHINE(obj); @@ -1297,6 +1298,14 @@ static void machine_initfn(Object *obj) "Table (HMAT)"); } + /* SPCR */ + ms->acpi_spcr_enabled = true; + object_property_add_bool(obj, "spcr", machine_get_spcr, machine_set_spcr); + object_property_set_description(obj, "spcr", + "Set on/off to enable/disable " + "ACPI Serial Port Console Redirection " + "Table (spcr)"); + /* default to mc->default_cpus */ ms->smp.cpus = mc->default_cpus; ms->smp.max_cpus = mc->default_cpus; diff --git a/hw/core/meson.build b/hw/core/meson.build index 547de65..b5a545a 100644 --- a/hw/core/meson.build +++ b/hw/core/meson.build @@ -26,7 +26,7 @@ system_ss.add(when: 'CONFIG_XILINX_AXI', if_true: files('stream.c')) system_ss.add(when: 'CONFIG_PLATFORM_BUS', if_true: files('sysbus-fdt.c')) system_ss.add(when: 'CONFIG_EIF', if_true: [files('eif.c'), zlib, libcbor, gnutls]) -libsystem_ss.add(files( +system_ss.add(files( 'cpu-system.c', 'fw-path-provider.c', 'gpio.c', @@ -46,7 +46,7 @@ libsystem_ss.add(files( 'vm-change-state-handler.c', 'clock-vmstate.c', )) -libuser_ss.add(files( +user_ss.add(files( 'cpu-user.c', 'qdev-user.c', )) diff --git a/hw/core/qdev-properties-system.c b/hw/core/qdev-properties-system.c index 8e11e63..1f810b7 100644 --- a/hw/core/qdev-properties-system.c +++ b/hw/core/qdev-properties-system.c @@ -145,6 +145,7 @@ static void set_drive_helper(Object *obj, Visitor *v, const char *name, if (ctx != bdrv_get_aio_context(bs)) { error_setg(errp, "Different aio context is not supported for new " "node"); + return; } blk_replace_bs(blk, bs, errp); @@ -1298,3 +1299,47 @@ const PropertyInfo qdev_prop_vmapple_virtio_blk_variant = { .set = qdev_propinfo_set_enum, .set_default_value = qdev_propinfo_set_default_value_enum, }; + +/* --- VirtIOGPUOutputList --- */ + +static void get_virtio_gpu_output_list(Object *obj, Visitor *v, + const char *name, void *opaque, Error **errp) +{ + VirtIOGPUOutputList **prop_ptr = + object_field_prop_ptr(obj, opaque); + + visit_type_VirtIOGPUOutputList(v, name, prop_ptr, errp); +} + +static void set_virtio_gpu_output_list(Object *obj, Visitor *v, + const char *name, void *opaque, Error **errp) +{ + VirtIOGPUOutputList **prop_ptr = + object_field_prop_ptr(obj, opaque); + VirtIOGPUOutputList *list; + + if (!visit_type_VirtIOGPUOutputList(v, name, &list, errp)) { + return; + } + + qapi_free_VirtIOGPUOutputList(*prop_ptr); + *prop_ptr = list; +} + +static void release_virtio_gpu_output_list(Object *obj, + const char *name, void *opaque) +{ + VirtIOGPUOutputList **prop_ptr = + object_field_prop_ptr(obj, opaque); + + qapi_free_VirtIOGPUOutputList(*prop_ptr); + *prop_ptr = NULL; +} + +const PropertyInfo qdev_prop_virtio_gpu_output_list = { + .type = "VirtIOGPUOutputList", + .description = "VirtIO GPU output list [{\"name\":\"<name>\"},...]", + .get = get_virtio_gpu_output_list, + .set = set_virtio_gpu_output_list, + .release = release_virtio_gpu_output_list, +}; diff --git a/hw/core/qdev-properties.c b/hw/core/qdev-properties.c index 147b3ff..b7e8a89 100644 --- a/hw/core/qdev-properties.c +++ b/hw/core/qdev-properties.c @@ -2,6 +2,7 @@ #include "hw/qdev-properties.h" #include "qapi/error.h" #include "qapi/qapi-types-misc.h" +#include "qapi/qapi-visit-common.h" #include "qobject/qlist.h" #include "qemu/ctype.h" #include "qemu/error-report.h" @@ -180,7 +181,8 @@ const PropertyInfo qdev_prop_bit = { static uint64_t qdev_get_prop_mask64(const Property *prop) { - assert(prop->info == &qdev_prop_bit64); + assert(prop->info == &qdev_prop_bit64 || + prop->info == &qdev_prop_on_off_auto_bit64); return 0x1ull << prop->bitnr; } @@ -225,6 +227,69 @@ const PropertyInfo qdev_prop_bit64 = { .set_default_value = set_default_value_bool, }; +static void prop_get_on_off_auto_bit64(Object *obj, Visitor *v, + const char *name, void *opaque, + Error **errp) +{ + Property *prop = opaque; + OnOffAutoBit64 *p = object_field_prop_ptr(obj, prop); + OnOffAuto value; + uint64_t mask = qdev_get_prop_mask64(prop); + + if (p->auto_bits & mask) { + value = ON_OFF_AUTO_AUTO; + } else if (p->on_bits & mask) { + value = ON_OFF_AUTO_ON; + } else { + value = ON_OFF_AUTO_OFF; + } + + visit_type_OnOffAuto(v, name, &value, errp); +} + +static void prop_set_on_off_auto_bit64(Object *obj, Visitor *v, + const char *name, void *opaque, + Error **errp) +{ + Property *prop = opaque; + OnOffAutoBit64 *p = object_field_prop_ptr(obj, prop); + OnOffAuto value; + uint64_t mask = qdev_get_prop_mask64(prop); + + if (!visit_type_OnOffAuto(v, name, &value, errp)) { + return; + } + + switch (value) { + case ON_OFF_AUTO_AUTO: + p->on_bits &= ~mask; + p->auto_bits |= mask; + break; + + case ON_OFF_AUTO_ON: + p->on_bits |= mask; + p->auto_bits &= ~mask; + break; + + case ON_OFF_AUTO_OFF: + p->on_bits &= ~mask; + p->auto_bits &= ~mask; + break; + + case ON_OFF_AUTO__MAX: + g_assert_not_reached(); + } +} + +const PropertyInfo qdev_prop_on_off_auto_bit64 = { + .type = "OnOffAuto", + .description = "on/off/auto", + .enum_table = &OnOffAuto_lookup, + .get = prop_get_on_off_auto_bit64, + .set = prop_set_on_off_auto_bit64, + .set_default_value = qdev_propinfo_set_default_value_enum, +}; + /* --- bool --- */ static void get_bool(Object *obj, Visitor *v, const char *name, void *opaque, diff --git a/hw/core/sysbus.c b/hw/core/sysbus.c index e71367a..ec69e87 100644 --- a/hw/core/sysbus.c +++ b/hw/core/sysbus.c @@ -151,6 +151,17 @@ void sysbus_mmio_map(SysBusDevice *dev, int n, hwaddr addr) sysbus_mmio_map_common(dev, n, addr, false, 0); } +int sysbus_mmio_map_name(SysBusDevice *dev, const char *name, hwaddr addr) +{ + for (int i = 0; i < dev->num_mmio; i++) { + if (!strcmp(dev->mmio[i].memory->name, name)) { + sysbus_mmio_map(dev, i, addr); + return i; + } + } + return -1; +} + void sysbus_mmio_map_overlap(SysBusDevice *dev, int n, hwaddr addr, int priority) { diff --git a/hw/core/vm-change-state-handler.c b/hw/core/vm-change-state-handler.c index 7064995..99c642b 100644 --- a/hw/core/vm-change-state-handler.c +++ b/hw/core/vm-change-state-handler.c @@ -40,6 +40,7 @@ static int qdev_get_dev_tree_depth(DeviceState *dev) * qdev_add_vm_change_state_handler: * @dev: the device that owns this handler * @cb: the callback function to be invoked + * @cb_ret: the callback function with return value to be invoked * @opaque: user data passed to the callback function * * This function works like qemu_add_vm_change_state_handler() except callbacks @@ -50,25 +51,30 @@ static int qdev_get_dev_tree_depth(DeviceState *dev) * controller's callback is invoked before the children on its bus when the VM * starts running. The order is reversed when the VM stops running. * + * Note that the parameter `cb` and `cb_ret` are mutually exclusive. + * * Returns: an entry to be freed with qemu_del_vm_change_state_handler() */ VMChangeStateEntry *qdev_add_vm_change_state_handler(DeviceState *dev, VMChangeStateHandler *cb, + VMChangeStateHandlerWithRet *cb_ret, void *opaque) { - return qdev_add_vm_change_state_handler_full(dev, cb, NULL, opaque); + assert(!cb || !cb_ret); + return qdev_add_vm_change_state_handler_full(dev, cb, NULL, cb_ret, opaque); } /* * Exactly like qdev_add_vm_change_state_handler() but passes a prepare_cb - * argument too. + * and the cb_ret arguments too. */ VMChangeStateEntry *qdev_add_vm_change_state_handler_full( - DeviceState *dev, VMChangeStateHandler *cb, - VMChangeStateHandler *prepare_cb, void *opaque) + DeviceState *dev, VMChangeStateHandler *cb, VMChangeStateHandler *prepare_cb, + VMChangeStateHandlerWithRet *cb_ret, void *opaque) { int depth = qdev_get_dev_tree_depth(dev); - return qemu_add_vm_change_state_handler_prio_full(cb, prepare_cb, opaque, - depth); + assert(!cb || !cb_ret); + return qemu_add_vm_change_state_handler_prio_full(cb, prepare_cb, cb_ret, + opaque, depth); } diff --git a/hw/cxl/cxl-device-utils.c b/hw/cxl/cxl-device-utils.c index 52ad1e4..e150d74 100644 --- a/hw/cxl/cxl-device-utils.c +++ b/hw/cxl/cxl-device-utils.c @@ -95,11 +95,15 @@ static uint64_t mailbox_reg_read(void *opaque, hwaddr offset, unsigned size) } if (offset == A_CXL_DEV_MAILBOX_STS) { uint64_t status_reg = cxl_dstate->mbox_reg_state64[offset / size]; - if (cci->bg.complete_pct) { - status_reg = FIELD_DP64(status_reg, CXL_DEV_MAILBOX_STS, BG_OP, - 0); - cxl_dstate->mbox_reg_state64[offset / size] = status_reg; - } + int bgop; + + qemu_mutex_lock(&cci->bg.lock); + bgop = !(cci->bg.complete_pct == 100 || cci->bg.aborted); + + status_reg = FIELD_DP64(status_reg, CXL_DEV_MAILBOX_STS, BG_OP, + bgop); + cxl_dstate->mbox_reg_state64[offset / size] = status_reg; + qemu_mutex_unlock(&cci->bg.lock); } return cxl_dstate->mbox_reg_state64[offset / size]; default: diff --git a/hw/cxl/cxl-events.c b/hw/cxl/cxl-events.c index 12dee2e..7583dd9 100644 --- a/hw/cxl/cxl-events.c +++ b/hw/cxl/cxl-events.c @@ -8,8 +8,6 @@ */ #include "qemu/osdep.h" - -#include "qemu/bswap.h" #include "qemu/error-report.h" #include "hw/pci/msi.h" #include "hw/pci/msix.h" @@ -260,3 +258,41 @@ void cxl_event_irq_assert(CXLType3Dev *ct3d) } } } + +void cxl_create_dc_event_records_for_extents(CXLType3Dev *ct3d, + CXLDCEventType type, + CXLDCExtentRaw extents[], + uint32_t ext_count) +{ + CXLEventDynamicCapacity event_rec = {}; + int i; + + cxl_assign_event_header(&event_rec.hdr, + &dynamic_capacity_uuid, + (1 << CXL_EVENT_TYPE_INFO), + sizeof(event_rec), + cxl_device_get_timestamp(&ct3d->cxl_dstate)); + event_rec.type = type; + event_rec.validity_flags = 1; + event_rec.host_id = 0; + event_rec.updated_region_id = 0; + event_rec.extents_avail = CXL_NUM_EXTENTS_SUPPORTED - + ct3d->dc.total_extent_count; + + for (i = 0; i < ext_count; i++) { + memcpy(&event_rec.dynamic_capacity_extent, + &extents[i], + sizeof(CXLDCExtentRaw)); + event_rec.flags = 0; + if (i < ext_count - 1) { + /* Set "More" flag */ + event_rec.flags |= BIT(0); + } + + if (cxl_event_insert(&ct3d->cxl_dstate, + CXL_EVENT_TYPE_DYNAMIC_CAP, + (CXLEventRecordRaw *)&event_rec)) { + cxl_event_irq_assert(ct3d); + } + } +} diff --git a/hw/cxl/cxl-host-stubs.c b/hw/cxl/cxl-host-stubs.c index cae4afc..c015baa 100644 --- a/hw/cxl/cxl-host-stubs.c +++ b/hw/cxl/cxl-host-stubs.c @@ -8,8 +8,13 @@ #include "hw/cxl/cxl.h" #include "hw/cxl/cxl_host.h" -void cxl_fmws_link_targets(CXLState *stat, Error **errp) {}; +void cxl_fmws_link_targets(Error **errp) {}; void cxl_machine_init(Object *obj, CXLState *state) {}; void cxl_hook_up_pxb_registers(PCIBus *bus, CXLState *state, Error **errp) {}; +hwaddr cxl_fmws_set_memmap(hwaddr base, hwaddr max_addr) +{ + return base; +}; +void cxl_fmws_update_mmio(void) {}; const MemoryRegionOps cfmws_ops; diff --git a/hw/cxl/cxl-host.c b/hw/cxl/cxl-host.c index e010163..5c2ce25 100644 --- a/hw/cxl/cxl-host.c +++ b/hw/cxl/cxl-host.c @@ -22,15 +22,17 @@ #include "hw/pci/pcie_port.h" #include "hw/pci-bridge/pci_expander_bridge.h" -static void cxl_fixed_memory_window_config(CXLState *cxl_state, - CXLFixedMemoryWindowOptions *object, - Error **errp) +static void cxl_fixed_memory_window_config(CXLFixedMemoryWindowOptions *object, + int index, Error **errp) { ERRP_GUARD(); - g_autofree CXLFixedWindow *fw = g_malloc0(sizeof(*fw)); + DeviceState *dev = qdev_new(TYPE_CXL_FMW); + CXLFixedWindow *fw = CXL_FMW(dev); strList *target; int i; + fw->index = index; + for (target = object->targets; target; target = target->next) { fw->num_targets++; } @@ -65,35 +67,39 @@ static void cxl_fixed_memory_window_config(CXLState *cxl_state, fw->targets[i] = g_strdup(target->value); } - cxl_state->fixed_windows = g_list_append(cxl_state->fixed_windows, - g_steal_pointer(&fw)); + sysbus_realize_and_unref(SYS_BUS_DEVICE(dev), errp); } -void cxl_fmws_link_targets(CXLState *cxl_state, Error **errp) +static int cxl_fmws_link(Object *obj, void *opaque) { - if (cxl_state && cxl_state->fixed_windows) { - GList *it; - - for (it = cxl_state->fixed_windows; it; it = it->next) { - CXLFixedWindow *fw = it->data; - int i; - - for (i = 0; i < fw->num_targets; i++) { - Object *o; - bool ambig; - - o = object_resolve_path_type(fw->targets[i], - TYPE_PXB_CXL_DEV, - &ambig); - if (!o) { - error_setg(errp, "Could not resolve CXLFM target %s", - fw->targets[i]); - return; - } - fw->target_hbs[i] = PXB_CXL_DEV(o); - } + struct CXLFixedWindow *fw; + int i; + + if (!object_dynamic_cast(obj, TYPE_CXL_FMW)) { + return 0; + } + fw = CXL_FMW(obj); + + for (i = 0; i < fw->num_targets; i++) { + Object *o; + bool ambig; + + o = object_resolve_path_type(fw->targets[i], TYPE_PXB_CXL_DEV, + &ambig); + if (!o) { + error_setg(&error_fatal, "Could not resolve CXLFM target %s", + fw->targets[i]); + return 1; } + fw->target_hbs[i] = PXB_CXL_DEV(o); } + return 0; +} + +void cxl_fmws_link_targets(Error **errp) +{ + /* Order doesn't matter for this, so no need to build list */ + object_child_foreach_recursive(object_get_root(), cxl_fmws_link, NULL); } static bool cxl_hdm_find_target(uint32_t *cache_mem, hwaddr addr, @@ -325,14 +331,15 @@ static void machine_set_cfmw(Object *obj, Visitor *v, const char *name, CXLState *state = opaque; CXLFixedMemoryWindowOptionsList *cfmw_list = NULL; CXLFixedMemoryWindowOptionsList *it; + int index; visit_type_CXLFixedMemoryWindowOptionsList(v, name, &cfmw_list, errp); if (!cfmw_list) { return; } - for (it = cfmw_list; it; it = it->next) { - cxl_fixed_memory_window_config(state, it->value, errp); + for (it = cfmw_list, index = 0; it; it = it->next, index++) { + cxl_fixed_memory_window_config(it->value, index, errp); } state->cfmw_list = cfmw_list; } @@ -370,3 +377,110 @@ void cxl_hook_up_pxb_registers(PCIBus *bus, CXLState *state, Error **errp) } } } + +static int cxl_fmws_find(Object *obj, void *opaque) +{ + GSList **list = opaque; + + if (!object_dynamic_cast(obj, TYPE_CXL_FMW)) { + return 0; + } + *list = g_slist_prepend(*list, obj); + + return 0; +} + +static GSList *cxl_fmws_get_all(void) +{ + GSList *list = NULL; + + object_child_foreach_recursive(object_get_root(), cxl_fmws_find, &list); + + return list; +} + +static gint cfmws_cmp(gconstpointer a, gconstpointer b, gpointer d) +{ + const struct CXLFixedWindow *ap = a; + const struct CXLFixedWindow *bp = b; + + return ap->index > bp->index; +} + +GSList *cxl_fmws_get_all_sorted(void) +{ + return g_slist_sort_with_data(cxl_fmws_get_all(), cfmws_cmp, NULL); +} + +static int cxl_fmws_mmio_map(Object *obj, void *opaque) +{ + struct CXLFixedWindow *fw; + + if (!object_dynamic_cast(obj, TYPE_CXL_FMW)) { + return 0; + } + fw = CXL_FMW(obj); + sysbus_mmio_map(SYS_BUS_DEVICE(fw), 0, fw->base); + + return 0; +} + +void cxl_fmws_update_mmio(void) +{ + /* Ordering is not required for this */ + object_child_foreach_recursive(object_get_root(), cxl_fmws_mmio_map, NULL); +} + +hwaddr cxl_fmws_set_memmap(hwaddr base, hwaddr max_addr) +{ + GSList *cfmws_list, *iter; + CXLFixedWindow *fw; + + cfmws_list = cxl_fmws_get_all_sorted(); + for (iter = cfmws_list; iter; iter = iter->next) { + fw = CXL_FMW(iter->data); + if (base + fw->size <= max_addr) { + fw->base = base; + base += fw->size; + } + } + g_slist_free(cfmws_list); + + return base; +} + +static void cxl_fmw_realize(DeviceState *dev, Error **errp) +{ + CXLFixedWindow *fw = CXL_FMW(dev); + + memory_region_init_io(&fw->mr, OBJECT(dev), &cfmws_ops, fw, + "cxl-fixed-memory-region", fw->size); + sysbus_init_mmio(SYS_BUS_DEVICE(dev), &fw->mr); +} + +/* + * Note: Fixed memory windows represent fixed address decoders on the host and + * as such have no dynamic state to reset or migrate + */ +static void cxl_fmw_class_init(ObjectClass *klass, const void *data) +{ + DeviceClass *dc = DEVICE_CLASS(klass); + + dc->desc = "CXL Fixed Memory Window"; + dc->realize = cxl_fmw_realize; + /* Reason - created by machines as tightly coupled to machine memory map */ + dc->user_creatable = false; +} + +static const TypeInfo cxl_fmw_info = { + .name = TYPE_CXL_FMW, + .parent = TYPE_SYS_BUS_DEVICE, + .instance_size = sizeof(CXLFixedWindow), + .class_init = cxl_fmw_class_init, +}; + +static void cxl_host_register_types(void) +{ + type_register_static(&cxl_fmw_info); +} +type_init(cxl_host_register_types) diff --git a/hw/cxl/cxl-mailbox-utils.c b/hw/cxl/cxl-mailbox-utils.c index 516c01d..68c7cc9 100644 --- a/hw/cxl/cxl-mailbox-utils.c +++ b/hw/cxl/cxl-mailbox-utils.c @@ -7,6 +7,8 @@ * COPYING file in the top-level directory. */ +#include <math.h> + #include "qemu/osdep.h" #include "hw/pci/msi.h" #include "hw/pci/msix.h" @@ -16,16 +18,22 @@ #include "hw/pci/pci.h" #include "hw/pci-bridge/cxl_upstream_port.h" #include "qemu/cutils.h" +#include "qemu/host-utils.h" #include "qemu/log.h" #include "qemu/units.h" #include "qemu/uuid.h" #include "system/hostmem.h" #include "qemu/range.h" +#include "qapi/qapi-types-cxl.h" #define CXL_CAPACITY_MULTIPLIER (256 * MiB) #define CXL_DC_EVENT_LOG_SIZE 8 -#define CXL_NUM_EXTENTS_SUPPORTED 512 #define CXL_NUM_TAGS_SUPPORTED 0 +#define CXL_ALERTS_LIFE_USED_WARN_THRESH (1 << 0) +#define CXL_ALERTS_OVER_TEMP_WARN_THRESH (1 << 1) +#define CXL_ALERTS_UNDER_TEMP_WARN_THRESH (1 << 2) +#define CXL_ALERTS_COR_VMEM_ERR_WARN_THRESH (1 << 3) +#define CXL_ALERTS_COR_PMEM_ERR_WARN_THRESH (1 << 4) /* * How to add a new command, example. The command set FOO, with cmd BAR. @@ -56,6 +64,9 @@ enum { INFOSTAT = 0x00, #define IS_IDENTIFY 0x1 #define BACKGROUND_OPERATION_STATUS 0x2 + #define GET_RESPONSE_MSG_LIMIT 0x3 + #define SET_RESPONSE_MSG_LIMIT 0x4 + #define BACKGROUND_OPERATION_ABORT 0x5 EVENTS = 0x01, #define GET_RECORDS 0x0 #define CLEAR_RECORDS 0x1 @@ -81,9 +92,13 @@ enum { #define GET_PARTITION_INFO 0x0 #define GET_LSA 0x2 #define SET_LSA 0x3 + HEALTH_INFO_ALERTS = 0x42, + #define GET_ALERT_CONFIG 0x1 + #define SET_ALERT_CONFIG 0x2 SANITIZE = 0x44, #define OVERWRITE 0x0 #define SECURE_ERASE 0x1 + #define MEDIA_OPERATIONS 0x2 PERSISTENT_MEM = 0x45, #define GET_SECURITY_STATE 0x0 MEDIA_AND_POISON = 0x43, @@ -103,6 +118,13 @@ enum { #define GET_PHYSICAL_PORT_STATE 0x1 TUNNEL = 0x53, #define MANAGEMENT_COMMAND 0x0 + FMAPI_DCD_MGMT = 0x56, + #define GET_DCD_INFO 0x0 + #define GET_HOST_DC_REGION_CONFIG 0x1 + #define SET_DC_REGION_CONFIG 0x2 + #define GET_DC_REGION_EXTENT_LIST 0x3 + #define INITIATE_DC_ADD 0x4 + #define INITIATE_DC_RELEASE 0x5 }; /* CCI Message Format CXL r3.1 Figure 7-19 */ @@ -412,12 +434,58 @@ static CXLRetCode cmd_infostat_identify(const struct cxl_cmd *cmd, is_identify->component_type = 0x3; /* Type 3 */ } - /* TODO: Allow this to vary across different CCIs */ - is_identify->max_message_size = 9; /* 512 bytes - MCTP_CXL_MAILBOX_BYTES */ + is_identify->max_message_size = (uint8_t)log2(cci->payload_max); *len_out = sizeof(*is_identify); return CXL_MBOX_SUCCESS; } +/* CXL r3.1 section 8.2.9.1.3: Get Response Message Limit (Opcode 0003h) */ +static CXLRetCode cmd_get_response_msg_limit(const struct cxl_cmd *cmd, + uint8_t *payload_in, + size_t len_in, + uint8_t *payload_out, + size_t *len_out, + CXLCCI *cci) +{ + struct { + uint8_t rsp_limit; + } QEMU_PACKED *get_rsp_msg_limit = (void *)payload_out; + QEMU_BUILD_BUG_ON(sizeof(*get_rsp_msg_limit) != 1); + + get_rsp_msg_limit->rsp_limit = (uint8_t)log2(cci->payload_max); + + *len_out = sizeof(*get_rsp_msg_limit); + return CXL_MBOX_SUCCESS; +} + +/* CXL r3.1 section 8.2.9.1.4: Set Response Message Limit (Opcode 0004h) */ +static CXLRetCode cmd_set_response_msg_limit(const struct cxl_cmd *cmd, + uint8_t *payload_in, + size_t len_in, + uint8_t *payload_out, + size_t *len_out, + CXLCCI *cci) +{ + struct { + uint8_t rsp_limit; + } QEMU_PACKED *in = (void *)payload_in; + QEMU_BUILD_BUG_ON(sizeof(*in) != 1); + struct { + uint8_t rsp_limit; + } QEMU_PACKED *out = (void *)payload_out; + QEMU_BUILD_BUG_ON(sizeof(*out) != 1); + + if (in->rsp_limit < 8 || in->rsp_limit > 10) { + return CXL_MBOX_INVALID_INPUT; + } + + cci->payload_max = 1 << in->rsp_limit; + out->rsp_limit = in->rsp_limit; + + *len_out = sizeof(*out); + return CXL_MBOX_SUCCESS; +} + static void cxl_set_dsp_active_bm(PCIBus *b, PCIDevice *d, void *private) { @@ -636,6 +704,41 @@ static CXLRetCode cmd_infostat_bg_op_sts(const struct cxl_cmd *cmd, return CXL_MBOX_SUCCESS; } +/* + * CXL r3.1 Section 8.2.9.1.5: + * Request Abort Background Operation (Opcode 0005h) + */ +static CXLRetCode cmd_infostat_bg_op_abort(const struct cxl_cmd *cmd, + uint8_t *payload_in, + size_t len_in, + uint8_t *payload_out, + size_t *len_out, + CXLCCI *cci) +{ + int bg_set = cci->bg.opcode >> 8; + int bg_cmd = cci->bg.opcode & 0xff; + const struct cxl_cmd *bg_c = &cci->cxl_cmd_set[bg_set][bg_cmd]; + + if (!(bg_c->effect & CXL_MBOX_BACKGROUND_OPERATION_ABORT)) { + return CXL_MBOX_REQUEST_ABORT_NOTSUP; + } + + qemu_mutex_lock(&cci->bg.lock); + if (cci->bg.runtime) { + /* operation is near complete, let it finish */ + if (cci->bg.complete_pct < 85) { + timer_del(cci->bg.timer); + cci->bg.ret_code = CXL_MBOX_ABORTED; + cci->bg.starttime = 0; + cci->bg.runtime = 0; + cci->bg.aborted = true; + } + } + qemu_mutex_unlock(&cci->bg.lock); + + return CXL_MBOX_SUCCESS; +} + #define CXL_FW_SLOTS 2 #define CXL_FW_SIZE 0x02000000 /* 32 mb */ @@ -1523,6 +1626,97 @@ static CXLRetCode cmd_ccls_set_lsa(const struct cxl_cmd *cmd, return CXL_MBOX_SUCCESS; } +/* CXL r3.2 Section 8.2.10.9.3.2 Get Alert Configuration (Opcode 4201h) */ +static CXLRetCode cmd_get_alert_config(const struct cxl_cmd *cmd, + uint8_t *payload_in, + size_t len_in, + uint8_t *payload_out, + size_t *len_out, + CXLCCI *cci) +{ + CXLType3Dev *ct3d = CXL_TYPE3(cci->d); + CXLAlertConfig *out = (CXLAlertConfig *)payload_out; + + memcpy(out, &ct3d->alert_config, sizeof(ct3d->alert_config)); + *len_out = sizeof(ct3d->alert_config); + + return CXL_MBOX_SUCCESS; +} + +/* CXL r3.2 Section 8.2.10.9.3.3 Set Alert Configuration (Opcode 4202h) */ +static CXLRetCode cmd_set_alert_config(const struct cxl_cmd *cmd, + uint8_t *payload_in, + size_t len_in, + uint8_t *payload_out, + size_t *len_out, + CXLCCI *cci) +{ + CXLType3Dev *ct3d = CXL_TYPE3(cci->d); + CXLAlertConfig *alert_config = &ct3d->alert_config; + struct { + uint8_t valid_alert_actions; + uint8_t enable_alert_actions; + uint8_t life_used_warn_thresh; + uint8_t rsvd; + uint16_t over_temp_warn_thresh; + uint16_t under_temp_warn_thresh; + uint16_t cor_vmem_err_warn_thresh; + uint16_t cor_pmem_err_warn_thresh; + } QEMU_PACKED *in = (void *)payload_in; + + if (in->valid_alert_actions & CXL_ALERTS_LIFE_USED_WARN_THRESH) { + /* + * CXL r3.2 Table 8-149 The life used warning threshold shall be + * less than the life used critical alert value. + */ + if (in->life_used_warn_thresh >= + alert_config->life_used_crit_alert_thresh) { + return CXL_MBOX_INVALID_INPUT; + } + alert_config->life_used_warn_thresh = in->life_used_warn_thresh; + alert_config->enable_alerts |= CXL_ALERTS_LIFE_USED_WARN_THRESH; + } + + if (in->valid_alert_actions & CXL_ALERTS_OVER_TEMP_WARN_THRESH) { + /* + * CXL r3.2 Table 8-149 The Device Over-Temperature Warning Threshold + * shall be less than the the Device Over-Temperature Critical + * Alert Threshold. + */ + if (in->over_temp_warn_thresh >= + alert_config->over_temp_crit_alert_thresh) { + return CXL_MBOX_INVALID_INPUT; + } + alert_config->over_temp_warn_thresh = in->over_temp_warn_thresh; + alert_config->enable_alerts |= CXL_ALERTS_OVER_TEMP_WARN_THRESH; + } + + if (in->valid_alert_actions & CXL_ALERTS_UNDER_TEMP_WARN_THRESH) { + /* + * CXL r3.2 Table 8-149 The Device Under-Temperature Warning Threshold + * shall be higher than the the Device Under-Temperature Critical + * Alert Threshold. + */ + if (in->under_temp_warn_thresh <= + alert_config->under_temp_crit_alert_thresh) { + return CXL_MBOX_INVALID_INPUT; + } + alert_config->under_temp_warn_thresh = in->under_temp_warn_thresh; + alert_config->enable_alerts |= CXL_ALERTS_UNDER_TEMP_WARN_THRESH; + } + + if (in->valid_alert_actions & CXL_ALERTS_COR_VMEM_ERR_WARN_THRESH) { + alert_config->cor_vmem_err_warn_thresh = in->cor_vmem_err_warn_thresh; + alert_config->enable_alerts |= CXL_ALERTS_COR_VMEM_ERR_WARN_THRESH; + } + + if (in->valid_alert_actions & CXL_ALERTS_COR_PMEM_ERR_WARN_THRESH) { + alert_config->cor_pmem_err_warn_thresh = in->cor_pmem_err_warn_thresh; + alert_config->enable_alerts |= CXL_ALERTS_COR_PMEM_ERR_WARN_THRESH; + } + return CXL_MBOX_SUCCESS; +} + /* Perform the actual device zeroing */ static void __do_sanitization(CXLType3Dev *ct3d) { @@ -1553,34 +1747,10 @@ static void __do_sanitization(CXLType3Dev *ct3d) cxl_discard_all_event_records(&ct3d->cxl_dstate); } -/* - * CXL r3.1 Section 8.2.9.9.5.1: Sanitize (Opcode 4400h) - * - * Once the Sanitize command has started successfully, the device shall be - * placed in the media disabled state. If the command fails or is interrupted - * by a reset or power failure, it shall remain in the media disabled state - * until a successful Sanitize command has been completed. During this state: - * - * 1. Memory writes to the device will have no effect, and all memory reads - * will return random values (no user data returned, even for locations that - * the failed Sanitize operation didn’t sanitize yet). - * - * 2. Mailbox commands shall still be processed in the disabled state, except - * that commands that access Sanitized areas shall fail with the Media Disabled - * error code. - */ -static CXLRetCode cmd_sanitize_overwrite(const struct cxl_cmd *cmd, - uint8_t *payload_in, - size_t len_in, - uint8_t *payload_out, - size_t *len_out, - CXLCCI *cci) +static int get_sanitize_duration(uint64_t total_mem) { - CXLType3Dev *ct3d = CXL_TYPE3(cci->d); - uint64_t total_mem; /* in Mb */ - int secs; + int secs = 0; - total_mem = (ct3d->cxl_dstate.vmem_size + ct3d->cxl_dstate.pmem_size) >> 20; if (total_mem <= 512) { secs = 4; } else if (total_mem <= 1024) { @@ -1609,6 +1779,39 @@ static CXLRetCode cmd_sanitize_overwrite(const struct cxl_cmd *cmd, secs = 240 * 60; /* max 4 hrs */ } + return secs; +} + +/* + * CXL r3.1 Section 8.2.9.9.5.1: Sanitize (Opcode 4400h) + * + * Once the Sanitize command has started successfully, the device shall be + * placed in the media disabled state. If the command fails or is interrupted + * by a reset or power failure, it shall remain in the media disabled state + * until a successful Sanitize command has been completed. During this state: + * + * 1. Memory writes to the device will have no effect, and all memory reads + * will return random values (no user data returned, even for locations that + * the failed Sanitize operation didn’t sanitize yet). + * + * 2. Mailbox commands shall still be processed in the disabled state, except + * that commands that access Sanitized areas shall fail with the Media Disabled + * error code. + */ +static CXLRetCode cmd_sanitize_overwrite(const struct cxl_cmd *cmd, + uint8_t *payload_in, + size_t len_in, + uint8_t *payload_out, + size_t *len_out, + CXLCCI *cci) +{ + CXLType3Dev *ct3d = CXL_TYPE3(cci->d); + uint64_t total_mem; /* in Mb */ + int secs; + + total_mem = (ct3d->cxl_dstate.vmem_size + ct3d->cxl_dstate.pmem_size) >> 20; + secs = get_sanitize_duration(total_mem); + /* EBUSY other bg cmds as of now */ cci->bg.runtime = secs * 1000UL; *len_out = 0; @@ -1619,6 +1822,324 @@ static CXLRetCode cmd_sanitize_overwrite(const struct cxl_cmd *cmd, return CXL_MBOX_BG_STARTED; } +struct dpa_range_list_entry { + uint64_t starting_dpa; + uint64_t length; +} QEMU_PACKED; + +struct CXLSanitizeInfo { + uint32_t dpa_range_count; + uint8_t fill_value; + struct dpa_range_list_entry dpa_range_list[]; +} QEMU_PACKED; + +static uint64_t get_vmr_size(CXLType3Dev *ct3d, MemoryRegion **vmr) +{ + MemoryRegion *mr; + if (ct3d->hostvmem) { + mr = host_memory_backend_get_memory(ct3d->hostvmem); + if (vmr) { + *vmr = mr; + } + return memory_region_size(mr); + } + return 0; +} + +static uint64_t get_pmr_size(CXLType3Dev *ct3d, MemoryRegion **pmr) +{ + MemoryRegion *mr; + if (ct3d->hostpmem) { + mr = host_memory_backend_get_memory(ct3d->hostpmem); + if (pmr) { + *pmr = mr; + } + return memory_region_size(mr); + } + return 0; +} + +static uint64_t get_dc_size(CXLType3Dev *ct3d, MemoryRegion **dc_mr) +{ + MemoryRegion *mr; + if (ct3d->dc.host_dc) { + mr = host_memory_backend_get_memory(ct3d->dc.host_dc); + if (dc_mr) { + *dc_mr = mr; + } + return memory_region_size(mr); + } + return 0; +} + +static int validate_dpa_addr(CXLType3Dev *ct3d, uint64_t dpa_addr, + size_t length) +{ + uint64_t vmr_size, pmr_size, dc_size; + + if ((dpa_addr % CXL_CACHE_LINE_SIZE) || + (length % CXL_CACHE_LINE_SIZE) || + (length <= 0)) { + return -EINVAL; + } + + vmr_size = get_vmr_size(ct3d, NULL); + pmr_size = get_pmr_size(ct3d, NULL); + dc_size = get_dc_size(ct3d, NULL); + + if (dpa_addr + length > vmr_size + pmr_size + dc_size) { + return -EINVAL; + } + + if (dpa_addr > vmr_size + pmr_size) { + if (!ct3_test_region_block_backed(ct3d, dpa_addr, length)) { + return -ENODEV; + } + } + + return 0; +} + +static int sanitize_range(CXLType3Dev *ct3d, uint64_t dpa_addr, size_t length, + uint8_t fill_value) +{ + + uint64_t vmr_size, pmr_size; + AddressSpace *as = NULL; + MemTxAttrs mem_attrs = {}; + + vmr_size = get_vmr_size(ct3d, NULL); + pmr_size = get_pmr_size(ct3d, NULL); + + if (dpa_addr < vmr_size) { + as = &ct3d->hostvmem_as; + } else if (dpa_addr < vmr_size + pmr_size) { + as = &ct3d->hostpmem_as; + } else { + if (!ct3_test_region_block_backed(ct3d, dpa_addr, length)) { + return -ENODEV; + } + as = &ct3d->dc.host_dc_as; + } + + return address_space_set(as, dpa_addr, fill_value, length, mem_attrs); +} + +/* Perform the actual device zeroing */ +static void __do_sanitize(CXLType3Dev *ct3d) +{ + struct CXLSanitizeInfo *san_info = ct3d->media_op_sanitize; + int dpa_range_count = san_info->dpa_range_count; + int rc = 0; + int i; + + for (i = 0; i < dpa_range_count; i++) { + rc = sanitize_range(ct3d, san_info->dpa_range_list[i].starting_dpa, + san_info->dpa_range_list[i].length, + san_info->fill_value); + if (rc) { + goto exit; + } + } +exit: + g_free(ct3d->media_op_sanitize); + ct3d->media_op_sanitize = NULL; + return; +} + +enum { + MEDIA_OP_CLASS_GENERAL = 0x0, + #define MEDIA_OP_GEN_SUBC_DISCOVERY 0x0 + MEDIA_OP_CLASS_SANITIZE = 0x1, + #define MEDIA_OP_SAN_SUBC_SANITIZE 0x0 + #define MEDIA_OP_SAN_SUBC_ZERO 0x1 +}; + +struct media_op_supported_list_entry { + uint8_t media_op_class; + uint8_t media_op_subclass; +}; + +struct media_op_discovery_out_pl { + uint64_t dpa_range_granularity; + uint16_t total_supported_operations; + uint16_t num_of_supported_operations; + struct media_op_supported_list_entry entry[]; +} QEMU_PACKED; + +static const struct media_op_supported_list_entry media_op_matrix[] = { + { MEDIA_OP_CLASS_GENERAL, MEDIA_OP_GEN_SUBC_DISCOVERY }, + { MEDIA_OP_CLASS_SANITIZE, MEDIA_OP_SAN_SUBC_SANITIZE }, + { MEDIA_OP_CLASS_SANITIZE, MEDIA_OP_SAN_SUBC_ZERO }, +}; + +static CXLRetCode media_operations_discovery(uint8_t *payload_in, + size_t len_in, + uint8_t *payload_out, + size_t *len_out) +{ + struct { + uint8_t media_operation_class; + uint8_t media_operation_subclass; + uint8_t rsvd[2]; + uint32_t dpa_range_count; + struct { + uint16_t start_index; + uint16_t num_ops; + } discovery_osa; + } QEMU_PACKED *media_op_in_disc_pl = (void *)payload_in; + struct media_op_discovery_out_pl *media_out_pl = + (struct media_op_discovery_out_pl *)payload_out; + int num_ops, start_index, i; + int count = 0; + + if (len_in < sizeof(*media_op_in_disc_pl)) { + return CXL_MBOX_INVALID_PAYLOAD_LENGTH; + } + + num_ops = media_op_in_disc_pl->discovery_osa.num_ops; + start_index = media_op_in_disc_pl->discovery_osa.start_index; + + /* + * As per spec CXL r3.2 8.2.10.9.5.3 dpa_range_count should be zero and + * start index should not exceed the total number of entries for discovery + * sub class command. + */ + if (media_op_in_disc_pl->dpa_range_count || + start_index > ARRAY_SIZE(media_op_matrix)) { + return CXL_MBOX_INVALID_INPUT; + } + + media_out_pl->dpa_range_granularity = CXL_CACHE_LINE_SIZE; + media_out_pl->total_supported_operations = + ARRAY_SIZE(media_op_matrix); + if (num_ops > 0) { + for (i = start_index; i < start_index + num_ops; i++) { + media_out_pl->entry[count].media_op_class = + media_op_matrix[i].media_op_class; + media_out_pl->entry[count].media_op_subclass = + media_op_matrix[i].media_op_subclass; + count++; + if (count == num_ops) { + break; + } + } + } + + media_out_pl->num_of_supported_operations = count; + *len_out = sizeof(*media_out_pl) + count * sizeof(*media_out_pl->entry); + return CXL_MBOX_SUCCESS; +} + +static CXLRetCode media_operations_sanitize(CXLType3Dev *ct3d, + uint8_t *payload_in, + size_t len_in, + uint8_t *payload_out, + size_t *len_out, + uint8_t fill_value, + CXLCCI *cci) +{ + struct media_operations_sanitize { + uint8_t media_operation_class; + uint8_t media_operation_subclass; + uint8_t rsvd[2]; + uint32_t dpa_range_count; + struct dpa_range_list_entry dpa_range_list[]; + } QEMU_PACKED *media_op_in_sanitize_pl = (void *)payload_in; + uint32_t dpa_range_count = media_op_in_sanitize_pl->dpa_range_count; + uint64_t total_mem = 0; + size_t dpa_range_list_size; + int secs = 0, i; + + if (dpa_range_count == 0) { + return CXL_MBOX_SUCCESS; + } + + dpa_range_list_size = dpa_range_count * sizeof(struct dpa_range_list_entry); + if (len_in < (sizeof(*media_op_in_sanitize_pl) + dpa_range_list_size)) { + return CXL_MBOX_INVALID_PAYLOAD_LENGTH; + } + + for (i = 0; i < dpa_range_count; i++) { + uint64_t start_dpa = + media_op_in_sanitize_pl->dpa_range_list[i].starting_dpa; + uint64_t length = media_op_in_sanitize_pl->dpa_range_list[i].length; + + if (validate_dpa_addr(ct3d, start_dpa, length)) { + return CXL_MBOX_INVALID_INPUT; + } + total_mem += length; + } + ct3d->media_op_sanitize = g_malloc0(sizeof(struct CXLSanitizeInfo) + + dpa_range_list_size); + + ct3d->media_op_sanitize->dpa_range_count = dpa_range_count; + ct3d->media_op_sanitize->fill_value = fill_value; + memcpy(ct3d->media_op_sanitize->dpa_range_list, + media_op_in_sanitize_pl->dpa_range_list, + dpa_range_list_size); + secs = get_sanitize_duration(total_mem >> 20); + + /* EBUSY other bg cmds as of now */ + cci->bg.runtime = secs * 1000UL; + *len_out = 0; + /* + * media op sanitize is targeted so no need to disable media or + * clear event logs + */ + return CXL_MBOX_BG_STARTED; +} + +static CXLRetCode cmd_media_operations(const struct cxl_cmd *cmd, + uint8_t *payload_in, + size_t len_in, + uint8_t *payload_out, + size_t *len_out, + CXLCCI *cci) +{ + struct { + uint8_t media_operation_class; + uint8_t media_operation_subclass; + uint8_t rsvd[2]; + uint32_t dpa_range_count; + } QEMU_PACKED *media_op_in_common_pl = (void *)payload_in; + CXLType3Dev *ct3d = CXL_TYPE3(cci->d); + uint8_t media_op_cl = 0; + uint8_t media_op_subclass = 0; + + if (len_in < sizeof(*media_op_in_common_pl)) { + return CXL_MBOX_INVALID_PAYLOAD_LENGTH; + } + + media_op_cl = media_op_in_common_pl->media_operation_class; + media_op_subclass = media_op_in_common_pl->media_operation_subclass; + + switch (media_op_cl) { + case MEDIA_OP_CLASS_GENERAL: + if (media_op_subclass != MEDIA_OP_GEN_SUBC_DISCOVERY) { + return CXL_MBOX_UNSUPPORTED; + } + + return media_operations_discovery(payload_in, len_in, payload_out, + len_out); + case MEDIA_OP_CLASS_SANITIZE: + switch (media_op_subclass) { + case MEDIA_OP_SAN_SUBC_SANITIZE: + return media_operations_sanitize(ct3d, payload_in, len_in, + payload_out, len_out, 0xF, + cci); + case MEDIA_OP_SAN_SUBC_ZERO: + return media_operations_sanitize(ct3d, payload_in, len_in, + payload_out, len_out, 0, + cci); + default: + return CXL_MBOX_UNSUPPORTED; + } + default: + return CXL_MBOX_UNSUPPORTED; + } +} + static CXLRetCode cmd_get_security_state(const struct cxl_cmd *cmd, uint8_t *payload_in, size_t len_in, @@ -2237,7 +2758,7 @@ static CXLRetCode cmd_dcd_get_dyn_cap_ext_list(const struct cxl_cmd *cmd, uint16_t out_pl_len, size; CXLDCExtent *ent; - if (start_extent_id > ct3d->dc.total_extent_count) { + if (start_extent_id > ct3d->dc.nr_extents_accepted) { return CXL_MBOX_INVALID_INPUT; } @@ -2248,7 +2769,7 @@ static CXLRetCode cmd_dcd_get_dyn_cap_ext_list(const struct cxl_cmd *cmd, out_pl_len = sizeof(*out) + record_count * sizeof(out->records[0]); stl_le_p(&out->count, record_count); - stl_le_p(&out->total_extents, ct3d->dc.total_extent_count); + stl_le_p(&out->total_extents, ct3d->dc.nr_extents_accepted); stl_le_p(&out->generation_num, ct3d->dc.ext_list_gen_seq); if (record_count > 0) { @@ -2370,16 +2891,20 @@ void cxl_extent_group_list_insert_tail(CXLDCExtentGroupList *list, QTAILQ_INSERT_TAIL(list, group, node); } -void cxl_extent_group_list_delete_front(CXLDCExtentGroupList *list) +uint32_t cxl_extent_group_list_delete_front(CXLDCExtentGroupList *list) { CXLDCExtent *ent, *ent_next; CXLDCExtentGroup *group = QTAILQ_FIRST(list); + uint32_t extents_deleted = 0; QTAILQ_REMOVE(list, group, node); QTAILQ_FOREACH_SAFE(ent, &group->list, node, ent_next) { cxl_remove_extent_from_extent_list(&group->list, ent); + extents_deleted++; } g_free(group); + + return extents_deleted; } /* @@ -2498,7 +3023,7 @@ static CXLRetCode cmd_dcd_add_dyn_cap_rsp(const struct cxl_cmd *cmd, CXLUpdateDCExtentListInPl *in = (void *)payload_in; CXLType3Dev *ct3d = CXL_TYPE3(cci->d); CXLDCExtentList *extent_list = &ct3d->dc.extents; - uint32_t i; + uint32_t i, num; uint64_t dpa, len; CXLRetCode ret; @@ -2507,7 +3032,8 @@ static CXLRetCode cmd_dcd_add_dyn_cap_rsp(const struct cxl_cmd *cmd, } if (in->num_entries_updated == 0) { - cxl_extent_group_list_delete_front(&ct3d->dc.extents_pending); + num = cxl_extent_group_list_delete_front(&ct3d->dc.extents_pending); + ct3d->dc.total_extent_count -= num; return CXL_MBOX_SUCCESS; } @@ -2538,10 +3064,12 @@ static CXLRetCode cmd_dcd_add_dyn_cap_rsp(const struct cxl_cmd *cmd, cxl_insert_extent_to_extent_list(extent_list, dpa, len, NULL, 0); ct3d->dc.total_extent_count += 1; + ct3d->dc.nr_extents_accepted += 1; ct3_set_region_block_backed(ct3d, dpa, len); } /* Remove the first extent group in the pending list */ - cxl_extent_group_list_delete_front(&ct3d->dc.extents_pending); + num = cxl_extent_group_list_delete_front(&ct3d->dc.extents_pending); + ct3d->dc.total_extent_count -= num; return CXL_MBOX_SUCCESS; } @@ -2647,7 +3175,7 @@ free_and_exit: } *updated_list_size = 0; } else { - *updated_list_size = ct3d->dc.total_extent_count + cnt_delta; + *updated_list_size = ct3d->dc.nr_extents_accepted + cnt_delta; } return ret; @@ -2709,12 +3237,498 @@ static CXLRetCode cmd_dcd_release_dyn_cap(const struct cxl_cmd *cmd, ct3_set_region_block_backed(ct3d, ent->start_dpa, ent->len); cxl_remove_extent_from_extent_list(&updated_list, ent); } - ct3d->dc.total_extent_count = updated_list_size; + ct3d->dc.total_extent_count += (updated_list_size - + ct3d->dc.nr_extents_accepted); + + ct3d->dc.nr_extents_accepted = updated_list_size; + + return CXL_MBOX_SUCCESS; +} + +/* CXL r3.2 section 7.6.7.6.1: Get DCD Info (Opcode 5600h) */ +static CXLRetCode cmd_fm_get_dcd_info(const struct cxl_cmd *cmd, + uint8_t *payload_in, + size_t len_in, + uint8_t *payload_out, + size_t *len_out, + CXLCCI *cci) +{ + struct { + uint8_t num_hosts; + uint8_t num_regions_supported; + uint8_t rsvd1[2]; + uint16_t supported_add_sel_policy_bitmask; + uint8_t rsvd2[2]; + uint16_t supported_removal_policy_bitmask; + uint8_t sanitize_on_release_bitmask; + uint8_t rsvd3; + uint64_t total_dynamic_capacity; + uint64_t region_blk_size_bitmasks[8]; + } QEMU_PACKED *out = (void *)payload_out; + CXLType3Dev *ct3d = CXL_TYPE3(cci->d); + CXLDCRegion *region; + int i; + + out->num_hosts = 1; + out->num_regions_supported = ct3d->dc.num_regions; + stw_le_p(&out->supported_add_sel_policy_bitmask, + BIT(CXL_EXTENT_SELECTION_POLICY_PRESCRIPTIVE)); + stw_le_p(&out->supported_removal_policy_bitmask, + BIT(CXL_EXTENT_REMOVAL_POLICY_PRESCRIPTIVE)); + out->sanitize_on_release_bitmask = 0; + + stq_le_p(&out->total_dynamic_capacity, + ct3d->dc.total_capacity / CXL_CAPACITY_MULTIPLIER); + + for (i = 0; i < ct3d->dc.num_regions; i++) { + region = &ct3d->dc.regions[i]; + memcpy(&out->region_blk_size_bitmasks[i], + ®ion->supported_blk_size_bitmask, + sizeof(out->region_blk_size_bitmasks[i])); + } + + *len_out = sizeof(*out); + return CXL_MBOX_SUCCESS; +} + +static void build_dsmas_flags(uint8_t *flags, CXLDCRegion *region) +{ + *flags = 0; + + if (region->nonvolatile) { + *flags |= BIT(CXL_DSMAS_FLAGS_NONVOLATILE); + } + if (region->sharable) { + *flags |= BIT(CXL_DSMAS_FLAGS_SHARABLE); + } + if (region->hw_managed_coherency) { + *flags |= BIT(CXL_DSMAS_FLAGS_HW_MANAGED_COHERENCY); + } + if (region->ic_specific_dc_management) { + *flags |= BIT(CXL_DSMAS_FLAGS_IC_SPECIFIC_DC_MANAGEMENT); + } + if (region->rdonly) { + *flags |= BIT(CXL_DSMAS_FLAGS_RDONLY); + } +} + +/* + * CXL r3.2 section 7.6.7.6.2: + * Get Host DC Region Configuration (Opcode 5601h) + */ +static CXLRetCode cmd_fm_get_host_dc_region_config(const struct cxl_cmd *cmd, + uint8_t *payload_in, + size_t len_in, + uint8_t *payload_out, + size_t *len_out, + CXLCCI *cci) +{ + struct { + uint16_t host_id; + uint8_t region_cnt; + uint8_t start_rid; + } QEMU_PACKED *in = (void *)payload_in; + struct { + uint16_t host_id; + uint8_t num_regions; + uint8_t regions_returned; + struct { + uint64_t base; + uint64_t decode_len; + uint64_t region_len; + uint64_t block_size; + uint8_t flags; + uint8_t rsvd1[3]; + uint8_t sanitize; + uint8_t rsvd2[3]; + } QEMU_PACKED records[]; + } QEMU_PACKED *out = (void *)payload_out; + struct { + uint32_t num_extents_supported; + uint32_t num_extents_available; + uint32_t num_tags_supported; + uint32_t num_tags_available; + } QEMU_PACKED *extra_out; + CXLType3Dev *ct3d = CXL_TYPE3(cci->d); + uint16_t record_count, out_pl_len, i; + + if (in->start_rid >= ct3d->dc.num_regions) { + return CXL_MBOX_INVALID_INPUT; + } + record_count = MIN(ct3d->dc.num_regions - in->start_rid, in->region_cnt); + + out_pl_len = sizeof(*out) + record_count * sizeof(out->records[0]); + extra_out = (void *)out + out_pl_len; + out_pl_len += sizeof(*extra_out); + + assert(out_pl_len <= CXL_MAILBOX_MAX_PAYLOAD_SIZE); + + stw_le_p(&out->host_id, 0); + out->num_regions = ct3d->dc.num_regions; + out->regions_returned = record_count; + + for (i = 0; i < record_count; i++) { + stq_le_p(&out->records[i].base, + ct3d->dc.regions[in->start_rid + i].base); + stq_le_p(&out->records[i].decode_len, + ct3d->dc.regions[in->start_rid + i].decode_len / + CXL_CAPACITY_MULTIPLIER); + stq_le_p(&out->records[i].region_len, + ct3d->dc.regions[in->start_rid + i].len); + stq_le_p(&out->records[i].block_size, + ct3d->dc.regions[in->start_rid + i].block_size); + build_dsmas_flags(&out->records[i].flags, + &ct3d->dc.regions[in->start_rid + i]); + /* Sanitize is bit 0 of flags. */ + out->records[i].sanitize = + ct3d->dc.regions[in->start_rid + i].flags & BIT(0); + } + + stl_le_p(&extra_out->num_extents_supported, CXL_NUM_EXTENTS_SUPPORTED); + stl_le_p(&extra_out->num_extents_available, CXL_NUM_EXTENTS_SUPPORTED - + ct3d->dc.total_extent_count); + stl_le_p(&extra_out->num_tags_supported, CXL_NUM_TAGS_SUPPORTED); + stl_le_p(&extra_out->num_tags_available, CXL_NUM_TAGS_SUPPORTED); + + *len_out = out_pl_len; + return CXL_MBOX_SUCCESS; +} + +/* CXL r3.2 section 7.6.7.6.3: Set Host DC Region Configuration (Opcode 5602) */ +static CXLRetCode cmd_fm_set_dc_region_config(const struct cxl_cmd *cmd, + uint8_t *payload_in, + size_t len_in, + uint8_t *payload_out, + size_t *len_out, + CXLCCI *cci) +{ + struct { + uint8_t reg_id; + uint8_t rsvd[3]; + uint64_t block_sz; + uint8_t flags; + uint8_t rsvd2[3]; + } QEMU_PACKED *in = (void *)payload_in; + CXLType3Dev *ct3d = CXL_TYPE3(cci->d); + CXLEventDynamicCapacity dcEvent = {}; + CXLDCRegion *region = &ct3d->dc.regions[in->reg_id]; + + /* + * CXL r3.2 7.6.7.6.3: Set DC Region Configuration + * This command shall fail with Unsupported when the Sanitize on Release + * field does not match the region’s configuration... and the device + * does not support reconfiguration of the Sanitize on Release setting. + * + * Currently not reconfigurable, so always fail if sanitize bit (bit 0) + * doesn't match. + */ + if ((in->flags & 0x1) != (region->flags & 0x1)) { + return CXL_MBOX_UNSUPPORTED; + } + + if (in->reg_id >= DCD_MAX_NUM_REGION) { + return CXL_MBOX_UNSUPPORTED; + } + + /* Check that no extents are in the region being reconfigured */ + if (!bitmap_empty(region->blk_bitmap, region->len / region->block_size)) { + return CXL_MBOX_UNSUPPORTED; + } + + /* Check that new block size is supported */ + if (!is_power_of_2(in->block_sz) || + !(in->block_sz & region->supported_blk_size_bitmask)) { + return CXL_MBOX_INVALID_INPUT; + } + /* Return success if new block size == current block size */ + if (in->block_sz == region->block_size) { + return CXL_MBOX_SUCCESS; + } + + /* Free bitmap and create new one for new block size. */ + qemu_mutex_lock(®ion->bitmap_lock); + g_free(region->blk_bitmap); + region->blk_bitmap = bitmap_new(region->len / in->block_sz); + qemu_mutex_unlock(®ion->bitmap_lock); + region->block_size = in->block_sz; + + /* Create event record and insert into event log */ + cxl_assign_event_header(&dcEvent.hdr, + &dynamic_capacity_uuid, + (1 << CXL_EVENT_TYPE_INFO), + sizeof(dcEvent), + cxl_device_get_timestamp(&ct3d->cxl_dstate)); + dcEvent.type = DC_EVENT_REGION_CONFIG_UPDATED; + dcEvent.validity_flags = 1; + dcEvent.host_id = 0; + dcEvent.updated_region_id = in->reg_id; + + if (cxl_event_insert(&ct3d->cxl_dstate, + CXL_EVENT_TYPE_DYNAMIC_CAP, + (CXLEventRecordRaw *)&dcEvent)) { + cxl_event_irq_assert(ct3d); + } + return CXL_MBOX_SUCCESS; +} + +/* CXL r3.2 section 7.6.7.6.4: Get DC Region Extent Lists (Opcode 5603h) */ +static CXLRetCode cmd_fm_get_dc_region_extent_list(const struct cxl_cmd *cmd, + uint8_t *payload_in, + size_t len_in, + uint8_t *payload_out, + size_t *len_out, + CXLCCI *cci) +{ + struct { + uint16_t host_id; + uint8_t rsvd[2]; + uint32_t extent_cnt; + uint32_t start_extent_id; + } QEMU_PACKED *in = (void *)payload_in; + struct { + uint16_t host_id; + uint8_t rsvd[2]; + uint32_t start_extent_id; + uint32_t extents_returned; + uint32_t total_extents; + uint32_t list_generation_num; + uint8_t rsvd2[4]; + CXLDCExtentRaw records[]; + } QEMU_PACKED *out = (void *)payload_out; + QEMU_BUILD_BUG_ON(sizeof(*in) != 0xc); + CXLType3Dev *ct3d = CXL_TYPE3(cci->d); + CXLDCExtent *ent; + CXLDCExtentRaw *out_rec; + uint16_t record_count = 0, record_done = 0, i = 0; + uint16_t out_pl_len, max_size; + + if (in->host_id != 0) { + return CXL_MBOX_INVALID_INPUT; + } + + if (in->start_extent_id > ct3d->dc.nr_extents_accepted) { + return CXL_MBOX_INVALID_INPUT; + } + + record_count = MIN(in->extent_cnt, + ct3d->dc.nr_extents_accepted - in->start_extent_id); + max_size = CXL_MAILBOX_MAX_PAYLOAD_SIZE - sizeof(*out); + record_count = MIN(record_count, max_size / sizeof(out->records[0])); + out_pl_len = sizeof(*out) + record_count * sizeof(out->records[0]); + + stw_le_p(&out->host_id, in->host_id); + stl_le_p(&out->start_extent_id, in->start_extent_id); + stl_le_p(&out->extents_returned, record_count); + stl_le_p(&out->total_extents, ct3d->dc.nr_extents_accepted); + stl_le_p(&out->list_generation_num, ct3d->dc.ext_list_gen_seq); + + if (record_count > 0) { + QTAILQ_FOREACH(ent, &ct3d->dc.extents, node) { + if (i++ < in->start_extent_id) { + continue; + } + out_rec = &out->records[record_done]; + stq_le_p(&out_rec->start_dpa, ent->start_dpa); + stq_le_p(&out_rec->len, ent->len); + memcpy(&out_rec->tag, ent->tag, 0x10); + stw_le_p(&out_rec->shared_seq, ent->shared_seq); + + record_done++; + if (record_done == record_count) { + break; + } + } + } + + *len_out = out_pl_len; return CXL_MBOX_SUCCESS; } +/* + * Helper function to convert CXLDCExtentRaw to CXLUpdateDCExtentListInPl + * in order to reuse cxl_detect_malformed_extent_list() function which accepts + * CXLUpdateDCExtentListInPl as a parameter. + */ +static void convert_raw_extents(CXLDCExtentRaw raw_extents[], + CXLUpdateDCExtentListInPl *extent_list, + int count) +{ + int i; + + extent_list->num_entries_updated = count; + + for (i = 0; i < count; i++) { + extent_list->updated_entries[i].start_dpa = raw_extents[i].start_dpa; + extent_list->updated_entries[i].len = raw_extents[i].len; + } +} + +/* CXL r3.2 Section 7.6.7.6.5: Initiate Dynamic Capacity Add (Opcode 5604h) */ +static CXLRetCode cmd_fm_initiate_dc_add(const struct cxl_cmd *cmd, + uint8_t *payload_in, + size_t len_in, + uint8_t *payload_out, + size_t *len_out, + CXLCCI *cci) +{ + struct { + uint16_t host_id; + uint8_t selection_policy; + uint8_t reg_num; + uint64_t length; + uint8_t tag[0x10]; + uint32_t ext_count; + CXLDCExtentRaw extents[]; + } QEMU_PACKED *in = (void *)payload_in; + CXLType3Dev *ct3d = CXL_TYPE3(cci->d); + int i, rc; + + switch (in->selection_policy) { + case CXL_EXTENT_SELECTION_POLICY_PRESCRIPTIVE: { + /* Adding extents exceeds device's extent tracking ability. */ + if (in->ext_count + ct3d->dc.total_extent_count > + CXL_NUM_EXTENTS_SUPPORTED) { + return CXL_MBOX_RESOURCES_EXHAUSTED; + } + + g_autofree CXLUpdateDCExtentListInPl *list = + g_malloc0(sizeof(*list) + + in->ext_count * sizeof(*list->updated_entries)); + + convert_raw_extents(in->extents, list, in->ext_count); + rc = cxl_detect_malformed_extent_list(ct3d, list); + + for (i = 0; i < in->ext_count; i++) { + CXLDCExtentRaw *ext = &in->extents[i]; + + /* Check requested extents do not overlap with pending ones. */ + if (cxl_extent_groups_overlaps_dpa_range(&ct3d->dc.extents_pending, + ext->start_dpa, + ext->len)) { + return CXL_MBOX_INVALID_EXTENT_LIST; + } + /* Check requested extents do not overlap with existing ones. */ + if (cxl_extents_overlaps_dpa_range(&ct3d->dc.extents, + ext->start_dpa, + ext->len)) { + return CXL_MBOX_INVALID_EXTENT_LIST; + } + } + + if (rc) { + return rc; + } + + CXLDCExtentGroup *group = NULL; + for (i = 0; i < in->ext_count; i++) { + CXLDCExtentRaw *ext = &in->extents[i]; + + group = cxl_insert_extent_to_extent_group(group, ext->start_dpa, + ext->len, ext->tag, + ext->shared_seq); + } + + cxl_extent_group_list_insert_tail(&ct3d->dc.extents_pending, group); + ct3d->dc.total_extent_count += in->ext_count; + cxl_create_dc_event_records_for_extents(ct3d, + DC_EVENT_ADD_CAPACITY, + in->extents, + in->ext_count); + + return CXL_MBOX_SUCCESS; + } + default: { + qemu_log_mask(LOG_UNIMP, + "CXL extent selection policy not supported.\n"); + return CXL_MBOX_INVALID_INPUT; + } + } +} + +#define CXL_EXTENT_REMOVAL_POLICY_MASK 0x0F +#define CXL_FORCED_REMOVAL_MASK (1 << 4) +/* + * CXL r3.2 Section 7.6.7.6.6: + * Initiate Dynamic Capacity Release (Opcode 5605h) + */ +static CXLRetCode cmd_fm_initiate_dc_release(const struct cxl_cmd *cmd, + uint8_t *payload_in, + size_t len_in, + uint8_t *payload_out, + size_t *len_out, + CXLCCI *cci) +{ + struct { + uint16_t host_id; + uint8_t flags; + uint8_t reg_num; + uint64_t length; + uint8_t tag[0x10]; + uint32_t ext_count; + CXLDCExtentRaw extents[]; + } QEMU_PACKED *in = (void *)payload_in; + CXLType3Dev *ct3d = CXL_TYPE3(cci->d); + int i, rc; + + switch (in->flags & CXL_EXTENT_REMOVAL_POLICY_MASK) { + case CXL_EXTENT_REMOVAL_POLICY_PRESCRIPTIVE: { + CXLDCExtentList updated_list; + uint32_t updated_list_size; + g_autofree CXLUpdateDCExtentListInPl *list = + g_malloc0(sizeof(*list) + + in->ext_count * sizeof(*list->updated_entries)); + + convert_raw_extents(in->extents, list, in->ext_count); + rc = cxl_detect_malformed_extent_list(ct3d, list); + if (rc) { + return rc; + } + + /* + * Fail with Invalid PA if an extent is pending and Forced Removal + * flag not set. + */ + if (!(in->flags & CXL_FORCED_REMOVAL_MASK)) { + for (i = 0; i < in->ext_count; i++) { + CXLDCExtentRaw ext = in->extents[i]; + /* + * Check requested extents don't overlap with pending + * extents. + */ + if (cxl_extent_groups_overlaps_dpa_range( + &ct3d->dc.extents_pending, + ext.start_dpa, + ext.len)) { + return CXL_MBOX_INVALID_PA; + } + } + } + + rc = cxl_dc_extent_release_dry_run(ct3d, + list, + &updated_list, + &updated_list_size); + if (rc) { + return rc; + } + cxl_create_dc_event_records_for_extents(ct3d, + DC_EVENT_RELEASE_CAPACITY, + in->extents, + in->ext_count); + return CXL_MBOX_SUCCESS; + } + default: { + qemu_log_mask(LOG_UNIMP, + "CXL extent removal policy not supported.\n"); + return CXL_MBOX_INVALID_INPUT; + } + } +} + static const struct cxl_cmd cxl_cmd_set[256][256] = { + [INFOSTAT][BACKGROUND_OPERATION_ABORT] = { "BACKGROUND_OPERATION_ABORT", + cmd_infostat_bg_op_abort, 0, 0 }, [EVENTS][GET_RECORDS] = { "EVENTS_GET_RECORDS", cmd_events_get_records, 1, 0 }, [EVENTS][CLEAR_RECORDS] = { "EVENTS_CLEAR_RECORDS", @@ -2727,9 +3741,11 @@ static const struct cxl_cmd cxl_cmd_set[256][256] = { [FIRMWARE_UPDATE][GET_INFO] = { "FIRMWARE_UPDATE_GET_INFO", cmd_firmware_update_get_info, 0, 0 }, [FIRMWARE_UPDATE][TRANSFER] = { "FIRMWARE_UPDATE_TRANSFER", - cmd_firmware_update_transfer, ~0, CXL_MBOX_BACKGROUND_OPERATION }, + cmd_firmware_update_transfer, ~0, + CXL_MBOX_BACKGROUND_OPERATION | CXL_MBOX_BACKGROUND_OPERATION_ABORT }, [FIRMWARE_UPDATE][ACTIVATE] = { "FIRMWARE_UPDATE_ACTIVATE", - cmd_firmware_update_activate, 2, CXL_MBOX_BACKGROUND_OPERATION }, + cmd_firmware_update_activate, 2, + CXL_MBOX_BACKGROUND_OPERATION | CXL_MBOX_BACKGROUND_OPERATION_ABORT }, [TIMESTAMP][GET] = { "TIMESTAMP_GET", cmd_timestamp_get, 0, 0 }, [TIMESTAMP][SET] = { "TIMESTAMP_SET", cmd_timestamp_set, 8, CXL_MBOX_IMMEDIATE_POLICY_CHANGE }, @@ -2755,9 +3771,20 @@ static const struct cxl_cmd cxl_cmd_set[256][256] = { [CCLS][GET_LSA] = { "CCLS_GET_LSA", cmd_ccls_get_lsa, 8, 0 }, [CCLS][SET_LSA] = { "CCLS_SET_LSA", cmd_ccls_set_lsa, ~0, CXL_MBOX_IMMEDIATE_CONFIG_CHANGE | CXL_MBOX_IMMEDIATE_DATA_CHANGE }, + [HEALTH_INFO_ALERTS][GET_ALERT_CONFIG] = { + "HEALTH_INFO_ALERTS_GET_ALERT_CONFIG", + cmd_get_alert_config, 0, 0 }, + [HEALTH_INFO_ALERTS][SET_ALERT_CONFIG] = { + "HEALTH_INFO_ALERTS_SET_ALERT_CONFIG", + cmd_set_alert_config, 12, CXL_MBOX_IMMEDIATE_POLICY_CHANGE }, [SANITIZE][OVERWRITE] = { "SANITIZE_OVERWRITE", cmd_sanitize_overwrite, 0, (CXL_MBOX_IMMEDIATE_DATA_CHANGE | CXL_MBOX_SECURITY_STATE_CHANGE | + CXL_MBOX_BACKGROUND_OPERATION | + CXL_MBOX_BACKGROUND_OPERATION_ABORT)}, + [SANITIZE][MEDIA_OPERATIONS] = { "MEDIA_OPERATIONS", cmd_media_operations, + ~0, + (CXL_MBOX_IMMEDIATE_DATA_CHANGE | CXL_MBOX_BACKGROUND_OPERATION)}, [PERSISTENT_MEM][GET_SECURITY_STATE] = { "GET_SECURITY_STATE", cmd_get_security_state, 0, 0 }, @@ -2771,7 +3798,8 @@ static const struct cxl_cmd cxl_cmd_set[256][256] = { "MEDIA_AND_POISON_GET_SCAN_MEDIA_CAPABILITIES", cmd_media_get_scan_media_capabilities, 16, 0 }, [MEDIA_AND_POISON][SCAN_MEDIA] = { "MEDIA_AND_POISON_SCAN_MEDIA", - cmd_media_scan_media, 17, CXL_MBOX_BACKGROUND_OPERATION }, + cmd_media_scan_media, 17, + (CXL_MBOX_BACKGROUND_OPERATION | CXL_MBOX_BACKGROUND_OPERATION_ABORT)}, [MEDIA_AND_POISON][GET_SCAN_MEDIA_RESULTS] = { "MEDIA_AND_POISON_GET_SCAN_MEDIA_RESULTS", cmd_media_get_scan_media_results, 0, 0 }, @@ -2795,6 +3823,8 @@ static const struct cxl_cmd cxl_cmd_set_sw[256][256] = { [INFOSTAT][IS_IDENTIFY] = { "IDENTIFY", cmd_infostat_identify, 0, 0 }, [INFOSTAT][BACKGROUND_OPERATION_STATUS] = { "BACKGROUND_OPERATION_STATUS", cmd_infostat_bg_op_sts, 0, 0 }, + [INFOSTAT][BACKGROUND_OPERATION_ABORT] = { "BACKGROUND_OPERATION_ABORT", + cmd_infostat_bg_op_abort, 0, 0 }, [TIMESTAMP][GET] = { "TIMESTAMP_GET", cmd_timestamp_get, 0, 0 }, [TIMESTAMP][SET] = { "TIMESTAMP_SET", cmd_timestamp_set, 8, CXL_MBOX_IMMEDIATE_POLICY_CHANGE }, @@ -2809,6 +3839,36 @@ static const struct cxl_cmd cxl_cmd_set_sw[256][256] = { cmd_tunnel_management_cmd, ~0, 0 }, }; +static const struct cxl_cmd cxl_cmd_set_fm_dcd[256][256] = { + [FMAPI_DCD_MGMT][GET_DCD_INFO] = { "GET_DCD_INFO", + cmd_fm_get_dcd_info, 0, 0 }, + [FMAPI_DCD_MGMT][GET_HOST_DC_REGION_CONFIG] = { "GET_HOST_DC_REGION_CONFIG", + cmd_fm_get_host_dc_region_config, 4, 0 }, + [FMAPI_DCD_MGMT][SET_DC_REGION_CONFIG] = { "SET_DC_REGION_CONFIG", + cmd_fm_set_dc_region_config, 16, + (CXL_MBOX_CONFIG_CHANGE_COLD_RESET | + CXL_MBOX_CONFIG_CHANGE_CONV_RESET | + CXL_MBOX_CONFIG_CHANGE_CXL_RESET | + CXL_MBOX_IMMEDIATE_CONFIG_CHANGE | + CXL_MBOX_IMMEDIATE_DATA_CHANGE) }, + [FMAPI_DCD_MGMT][GET_DC_REGION_EXTENT_LIST] = { "GET_DC_REGION_EXTENT_LIST", + cmd_fm_get_dc_region_extent_list, 12, 0 }, + [FMAPI_DCD_MGMT][INITIATE_DC_ADD] = { "INIT_DC_ADD", + cmd_fm_initiate_dc_add, ~0, + (CXL_MBOX_CONFIG_CHANGE_COLD_RESET | + CXL_MBOX_CONFIG_CHANGE_CONV_RESET | + CXL_MBOX_CONFIG_CHANGE_CXL_RESET | + CXL_MBOX_IMMEDIATE_CONFIG_CHANGE | + CXL_MBOX_IMMEDIATE_DATA_CHANGE) }, + [FMAPI_DCD_MGMT][INITIATE_DC_RELEASE] = { "INIT_DC_RELEASE", + cmd_fm_initiate_dc_release, ~0, + (CXL_MBOX_CONFIG_CHANGE_COLD_RESET | + CXL_MBOX_CONFIG_CHANGE_CONV_RESET | + CXL_MBOX_CONFIG_CHANGE_CXL_RESET | + CXL_MBOX_IMMEDIATE_CONFIG_CHANGE | + CXL_MBOX_IMMEDIATE_DATA_CHANGE) }, +}; + /* * While the command is executing in the background, the device should * update the percentage complete in the Background Command Status Register @@ -2881,6 +3941,7 @@ int cxl_process_cci_message(CXLCCI *cci, uint8_t set, uint8_t cmd, cci->bg.opcode = (set << 8) | cmd; cci->bg.complete_pct = 0; + cci->bg.aborted = false; cci->bg.ret_code = 0; now = qemu_clock_get_ms(QEMU_CLOCK_VIRTUAL); @@ -2894,10 +3955,12 @@ int cxl_process_cci_message(CXLCCI *cci, uint8_t set, uint8_t cmd, static void bg_timercb(void *opaque) { CXLCCI *cci = opaque; - uint64_t now = qemu_clock_get_ms(QEMU_CLOCK_VIRTUAL); - uint64_t total_time = cci->bg.starttime + cci->bg.runtime; + uint64_t now, total_time; + + qemu_mutex_lock(&cci->bg.lock); - assert(cci->bg.runtime > 0); + now = qemu_clock_get_ms(QEMU_CLOCK_VIRTUAL); + total_time = cci->bg.starttime + cci->bg.runtime; if (now >= total_time) { /* we are done */ uint16_t ret = CXL_MBOX_SUCCESS; @@ -2916,6 +3979,12 @@ static void bg_timercb(void *opaque) cxl_dev_enable_media(&ct3d->cxl_dstate); } break; + case 0x4402: /* Media Operations sanitize */ + { + CXLType3Dev *ct3d = CXL_TYPE3(cci->d); + __do_sanitize(ct3d); + } + break; case 0x4304: /* scan media */ { CXLType3Dev *ct3d = CXL_TYPE3(cci->d); @@ -2950,6 +4019,8 @@ static void bg_timercb(void *opaque) msi_notify(pdev, cxl_dstate->mbox_msi_n); } } + + qemu_mutex_unlock(&cci->bg.lock); } static void cxl_rebuild_cel(CXLCCI *cci) @@ -2978,12 +4049,21 @@ void cxl_init_cci(CXLCCI *cci, size_t payload_max) cci->bg.complete_pct = 0; cci->bg.starttime = 0; cci->bg.runtime = 0; + cci->bg.aborted = false; cci->bg.timer = timer_new_ms(QEMU_CLOCK_VIRTUAL, bg_timercb, cci); + qemu_mutex_init(&cci->bg.lock); memset(&cci->fw, 0, sizeof(cci->fw)); cci->fw.active_slot = 1; cci->fw.slot[cci->fw.active_slot - 1] = true; + cci->initialized = true; +} + +void cxl_destroy_cci(CXLCCI *cci) +{ + qemu_mutex_destroy(&cci->bg.lock); + cci->initialized = false; } static void cxl_copy_cci_commands(CXLCCI *cci, const struct cxl_cmd (*cxl_cmds)[256]) @@ -3047,6 +4127,10 @@ void cxl_initialize_t3_ld_cci(CXLCCI *cci, DeviceState *d, DeviceState *intf, static const struct cxl_cmd cxl_cmd_set_t3_fm_owned_ld_mctp[256][256] = { [INFOSTAT][IS_IDENTIFY] = { "IDENTIFY", cmd_infostat_identify, 0, 0}, + [INFOSTAT][GET_RESPONSE_MSG_LIMIT] = { "GET_RESPONSE_MSG_LIMIT", + cmd_get_response_msg_limit, 0, 0 }, + [INFOSTAT][SET_RESPONSE_MSG_LIMIT] = { "SET_RESPONSE_MSG_LIMIT", + cmd_set_response_msg_limit, 1, 0 }, [LOGS][GET_SUPPORTED] = { "LOGS_GET_SUPPORTED", cmd_logs_get_supported, 0, 0 }, [LOGS][GET_LOG] = { "LOGS_GET_LOG", cmd_logs_get_log, 0x18, 0 }, @@ -3059,7 +4143,12 @@ void cxl_initialize_t3_fm_owned_ld_mctpcci(CXLCCI *cci, DeviceState *d, DeviceState *intf, size_t payload_max) { + CXLType3Dev *ct3d = CXL_TYPE3(d); + cxl_copy_cci_commands(cci, cxl_cmd_set_t3_fm_owned_ld_mctp); + if (ct3d->dc.num_regions) { + cxl_copy_cci_commands(cci, cxl_cmd_set_fm_dcd); + } cci->d = d; cci->intf = intf; cxl_init_cci(cci, payload_max); diff --git a/hw/display/apple-gfx.m b/hw/display/apple-gfx.m index 2ff1c90..174d56a 100644 --- a/hw/display/apple-gfx.m +++ b/hw/display/apple-gfx.m @@ -69,7 +69,7 @@ struct PGTask_s { mach_vm_address_t address; uint64_t len; /* - * All unique MemoryRegions for which a mapping has been created in in this + * All unique MemoryRegions for which a mapping has been created in this * task, and on which we have thus called memory_region_ref(). There are * usually very few regions of system RAM in total, so we expect this array * to be very short. Therefore, no need for sorting or fancy search @@ -454,7 +454,7 @@ static void set_cursor_glyph(void *opaque) /* ------ DMA (device reading system memory) ------ */ typedef struct AppleGFXReadMemoryJob { - QemuSemaphore sem; + QemuEvent event; hwaddr physical_address; uint64_t length; void *dst; @@ -470,7 +470,7 @@ static void apple_gfx_do_read_memory(void *opaque) job->dst, job->length, MEMTXATTRS_UNSPECIFIED); job->success = (r == MEMTX_OK); - qemu_sem_post(&job->sem); + qemu_event_set(&job->event); } static bool apple_gfx_read_memory(AppleGFXState *s, hwaddr physical_address, @@ -483,11 +483,11 @@ static bool apple_gfx_read_memory(AppleGFXState *s, hwaddr physical_address, trace_apple_gfx_read_memory(physical_address, length, dst); /* Performing DMA requires BQL, so do it in a BH. */ - qemu_sem_init(&job.sem, 0); + qemu_event_init(&job.event, 0); aio_bh_schedule_oneshot(qemu_get_aio_context(), apple_gfx_do_read_memory, &job); - qemu_sem_wait(&job.sem); - qemu_sem_destroy(&job.sem); + qemu_event_wait(&job.event); + qemu_event_destroy(&job.event); return job.success; } diff --git a/hw/display/artist.c b/hw/display/artist.c index 3fafc8a..3c884c9 100644 --- a/hw/display/artist.c +++ b/hw/display/artist.c @@ -12,6 +12,7 @@ #include "qemu/log.h" #include "qemu/module.h" #include "qemu/units.h" +#include "qemu/bswap.h" #include "qapi/error.h" #include "hw/sysbus.h" #include "hw/loader.h" diff --git a/hw/display/ati.c b/hw/display/ati.c index 7de2773..f7c0006 100644 --- a/hw/display/ati.c +++ b/hw/display/ati.c @@ -22,6 +22,7 @@ #include "vga-access.h" #include "hw/qdev-properties.h" #include "vga_regs.h" +#include "qemu/bswap.h" #include "qemu/log.h" #include "qemu/module.h" #include "qemu/error-report.h" diff --git a/hw/display/qxl-render.c b/hw/display/qxl-render.c index eda6d3d..c6a9ac1 100644 --- a/hw/display/qxl-render.c +++ b/hw/display/qxl-render.c @@ -222,6 +222,7 @@ static void qxl_unpack_chunks(void *dest, size_t size, PCIQXLDevice *qxl, uint32_t max_chunks = 32; size_t offset = 0; size_t bytes; + QXLPHYSICAL next_chunk_phys = 0; for (;;) { bytes = MIN(size - offset, chunk->data_size); @@ -230,7 +231,15 @@ static void qxl_unpack_chunks(void *dest, size_t size, PCIQXLDevice *qxl, if (offset == size) { return; } - chunk = qxl_phys2virt(qxl, chunk->next_chunk, group_id, + next_chunk_phys = chunk->next_chunk; + /* fist time, only get the next chunk's data size */ + chunk = qxl_phys2virt(qxl, next_chunk_phys, group_id, + sizeof(QXLDataChunk)); + if (!chunk) { + return; + } + /* second time, check data size and get data */ + chunk = qxl_phys2virt(qxl, next_chunk_phys, group_id, sizeof(QXLDataChunk) + chunk->data_size); if (!chunk) { return; diff --git a/hw/display/ramfb-standalone.c b/hw/display/ramfb-standalone.c index 08f2d5d..72b2071 100644 --- a/hw/display/ramfb-standalone.c +++ b/hw/display/ramfb-standalone.c @@ -17,6 +17,7 @@ struct RAMFBStandaloneState { QemuConsole *con; RAMFBState *state; bool migrate; + bool use_legacy_x86_rom; }; static void display_update_wrapper(void *dev) @@ -39,7 +40,7 @@ static void ramfb_realizefn(DeviceState *dev, Error **errp) RAMFBStandaloneState *ramfb = RAMFB(dev); ramfb->con = graphic_console_init(dev, 0, &wrapper_ops, dev); - ramfb->state = ramfb_setup(errp); + ramfb->state = ramfb_setup(ramfb->use_legacy_x86_rom, errp); } static bool migrate_needed(void *opaque) @@ -62,6 +63,8 @@ static const VMStateDescription ramfb_dev_vmstate = { static const Property ramfb_properties[] = { DEFINE_PROP_BOOL("x-migrate", RAMFBStandaloneState, migrate, true), + DEFINE_PROP_BOOL("use-legacy-x86-rom", RAMFBStandaloneState, + use_legacy_x86_rom, false), }; static void ramfb_class_initfn(ObjectClass *klass, const void *data) diff --git a/hw/display/ramfb-stubs.c b/hw/display/ramfb-stubs.c index cf64733..b835513 100644 --- a/hw/display/ramfb-stubs.c +++ b/hw/display/ramfb-stubs.c @@ -8,7 +8,7 @@ void ramfb_display_update(QemuConsole *con, RAMFBState *s) { } -RAMFBState *ramfb_setup(Error **errp) +RAMFBState *ramfb_setup(bool romfile, Error **errp) { error_setg(errp, "ramfb support not available"); return NULL; diff --git a/hw/display/ramfb.c b/hw/display/ramfb.c index 8c0f907..9a17d97 100644 --- a/hw/display/ramfb.c +++ b/hw/display/ramfb.c @@ -135,7 +135,7 @@ const VMStateDescription ramfb_vmstate = { } }; -RAMFBState *ramfb_setup(Error **errp) +RAMFBState *ramfb_setup(bool romfile, Error **errp) { FWCfgState *fw_cfg = fw_cfg_find(); RAMFBState *s; @@ -147,7 +147,9 @@ RAMFBState *ramfb_setup(Error **errp) s = g_new0(RAMFBState, 1); - rom_add_vga("vgabios-ramfb.bin"); + if (romfile) { + rom_add_vga("vgabios-ramfb.bin"); + } fw_cfg_add_file_callback(fw_cfg, "etc/ramfb", NULL, ramfb_fw_cfg_write, s, &s->cfg, sizeof(s->cfg), false); diff --git a/hw/display/sm501.c b/hw/display/sm501.c index 6d2f186..bc091b3 100644 --- a/hw/display/sm501.c +++ b/hw/display/sm501.c @@ -26,6 +26,7 @@ #include "qemu/osdep.h" #include "qemu/units.h" #include "qapi/error.h" +#include "qemu/error-report.h" #include "qemu/log.h" #include "qemu/module.h" #include "hw/usb/hcd-ohci.h" diff --git a/hw/display/vga.c b/hw/display/vga.c index 20475eb..90b89cf 100644 --- a/hw/display/vga.c +++ b/hw/display/vga.c @@ -26,7 +26,7 @@ #include "qemu/units.h" #include "system/reset.h" #include "qapi/error.h" -#include "exec/tswap.h" +#include "qemu/target-info.h" #include "hw/display/vga.h" #include "hw/i386/x86.h" #include "hw/pci/pci.h" diff --git a/hw/display/vhost-user-gpu.c b/hw/display/vhost-user-gpu.c index 43d4c08..9fc6bbc 100644 --- a/hw/display/vhost-user-gpu.c +++ b/hw/display/vhost-user-gpu.c @@ -516,7 +516,7 @@ vhost_user_gpu_set_config(VirtIODevice *vdev, } } -static void +static int vhost_user_gpu_set_status(VirtIODevice *vdev, uint8_t val) { VhostUserGPU *g = VHOST_USER_GPU(vdev); @@ -525,18 +525,24 @@ vhost_user_gpu_set_status(VirtIODevice *vdev, uint8_t val) if (val & VIRTIO_CONFIG_S_DRIVER_OK && vdev->vm_running) { if (!vhost_user_gpu_do_set_socket(g, &err)) { error_report_err(err); - return; + return 0; } vhost_user_backend_start(g->vhost); } else { + int ret; + /* unblock any wait and stop processing */ if (g->vhost_gpu_fd != -1) { vhost_user_gpu_update_blocked(g, true); qemu_chr_fe_deinit(&g->vhost_chr, true); g->vhost_gpu_fd = -1; } - vhost_user_backend_stop(g->vhost); + ret = vhost_user_backend_stop(g->vhost); + if (ret < 0) { + return ret; + } } + return 0; } static bool diff --git a/hw/display/virtio-gpu-base.c b/hw/display/virtio-gpu-base.c index 9eb806b..7269477 100644 --- a/hw/display/virtio-gpu-base.c +++ b/hw/display/virtio-gpu-base.c @@ -19,6 +19,7 @@ #include "qemu/error-report.h" #include "hw/display/edid.h" #include "trace.h" +#include "qapi/qapi-types-virtio.h" void virtio_gpu_base_reset(VirtIOGPUBase *g) @@ -56,6 +57,8 @@ void virtio_gpu_base_generate_edid(VirtIOGPUBase *g, int scanout, struct virtio_gpu_resp_edid *edid) { + size_t output_idx; + VirtIOGPUOutputList *node; qemu_edid_info info = { .width_mm = g->req_state[scanout].width_mm, .height_mm = g->req_state[scanout].height_mm, @@ -64,6 +67,14 @@ virtio_gpu_base_generate_edid(VirtIOGPUBase *g, int scanout, .refresh_rate = g->req_state[scanout].refresh_rate, }; + for (output_idx = 0, node = g->conf.outputs; + output_idx <= scanout && node; output_idx++, node = node->next) { + if (output_idx == scanout && node->value && node->value->name) { + info.name = node->value->name; + break; + } + } + edid->size = cpu_to_le32(sizeof(edid->edid)); qemu_edid_generate(edid->edid, sizeof(edid->edid), &info); } @@ -172,6 +183,8 @@ virtio_gpu_base_device_realize(DeviceState *qdev, VirtIOHandleOutput cursor_cb, Error **errp) { + size_t output_idx; + VirtIOGPUOutputList *node; VirtIODevice *vdev = VIRTIO_DEVICE(qdev); VirtIOGPUBase *g = VIRTIO_GPU_BASE(qdev); int i; @@ -181,6 +194,20 @@ virtio_gpu_base_device_realize(DeviceState *qdev, return false; } + for (output_idx = 0, node = g->conf.outputs; + node; output_idx++, node = node->next) { + if (output_idx == g->conf.max_outputs) { + error_setg(errp, "invalid outputs > %d", g->conf.max_outputs); + return false; + } + if (node->value && node->value->name && + strlen(node->value->name) > EDID_NAME_MAX_LENGTH) { + error_setg(errp, "invalid output name '%s' > %d", + node->value->name, EDID_NAME_MAX_LENGTH); + return false; + } + } + if (virtio_gpu_virgl_enabled(g->conf)) { error_setg(&g->migration_blocker, "virgl is not yet migratable"); if (migrate_add_blocker(&g->migration_blocker, errp) < 0) { diff --git a/hw/display/virtio-gpu-virgl.c b/hw/display/virtio-gpu-virgl.c index 145a0b3..94ddc01 100644 --- a/hw/display/virtio-gpu-virgl.c +++ b/hw/display/virtio-gpu-virgl.c @@ -970,6 +970,15 @@ void virtio_gpu_virgl_process_cmd(VirtIOGPU *g, } trace_virtio_gpu_fence_ctrl(cmd->cmd_hdr.fence_id, cmd->cmd_hdr.type); +#if VIRGL_VERSION_MAJOR >= 1 + if (cmd->cmd_hdr.flags & VIRTIO_GPU_FLAG_INFO_RING_IDX) { + virgl_renderer_context_create_fence(cmd->cmd_hdr.ctx_id, + VIRGL_RENDERER_FENCE_FLAG_MERGEABLE, + cmd->cmd_hdr.ring_idx, + cmd->cmd_hdr.fence_id); + return; + } +#endif virgl_renderer_create_fence(cmd->cmd_hdr.fence_id, cmd->cmd_hdr.type); } @@ -983,6 +992,11 @@ static void virgl_write_fence(void *opaque, uint32_t fence) * the guest can end up emitting fences out of order * so we should check all fenced cmds not just the first one. */ +#if VIRGL_VERSION_MAJOR >= 1 + if (cmd->cmd_hdr.flags & VIRTIO_GPU_FLAG_INFO_RING_IDX) { + continue; + } +#endif if (cmd->cmd_hdr.fence_id > fence) { continue; } @@ -997,6 +1011,29 @@ static void virgl_write_fence(void *opaque, uint32_t fence) } } +#if VIRGL_VERSION_MAJOR >= 1 +static void virgl_write_context_fence(void *opaque, uint32_t ctx_id, + uint32_t ring_idx, uint64_t fence_id) { + VirtIOGPU *g = opaque; + struct virtio_gpu_ctrl_command *cmd, *tmp; + + QTAILQ_FOREACH_SAFE(cmd, &g->fenceq, next, tmp) { + if (cmd->cmd_hdr.flags & VIRTIO_GPU_FLAG_INFO_RING_IDX && + cmd->cmd_hdr.ctx_id == ctx_id && cmd->cmd_hdr.ring_idx == ring_idx && + cmd->cmd_hdr.fence_id <= fence_id) { + trace_virtio_gpu_fence_resp(cmd->cmd_hdr.fence_id); + virtio_gpu_ctrl_response_nodata(g, cmd, VIRTIO_GPU_RESP_OK_NODATA); + QTAILQ_REMOVE(&g->fenceq, cmd, next); + g_free(cmd); + g->inflight--; + if (virtio_gpu_stats_enabled(g->parent_obj.conf)) { + trace_virtio_gpu_dec_inflight_fences(g->inflight); + } + } + } +} +#endif + static virgl_renderer_gl_context virgl_create_context(void *opaque, int scanout_idx, struct virgl_renderer_gl_ctx_param *params) @@ -1031,11 +1068,18 @@ static int virgl_make_context_current(void *opaque, int scanout_idx, } static struct virgl_renderer_callbacks virtio_gpu_3d_cbs = { +#if VIRGL_VERSION_MAJOR >= 1 + .version = 3, +#else .version = 1, +#endif .write_fence = virgl_write_fence, .create_gl_context = virgl_create_context, .destroy_gl_context = virgl_destroy_context, .make_current = virgl_make_context_current, +#if VIRGL_VERSION_MAJOR >= 1 + .write_context_fence = virgl_write_context_fence, +#endif }; static void virtio_gpu_print_stats(void *opaque) diff --git a/hw/display/vmware_vga.c b/hw/display/vmware_vga.c index 544bb65..bc1a8ed 100644 --- a/hw/display/vmware_vga.c +++ b/hw/display/vmware_vga.c @@ -618,7 +618,7 @@ static void vmsvga_fifo_run(struct vmsvga_state_s *s) uint32_t cmd, colour; int args, len, maxloop = 1024; int x, y, dx, dy, width, height; - struct vmsvga_cursor_definition_s cursor; + QEMU_UNINITIALIZED struct vmsvga_cursor_definition_s cursor; uint32_t cmd_start; len = vmsvga_fifo_length(s); diff --git a/hw/dma/omap_dma.c b/hw/dma/omap_dma.c index 9a8c3c3..101f91f 100644 --- a/hw/dma/omap_dma.c +++ b/hw/dma/omap_dma.c @@ -131,9 +131,9 @@ struct omap_dma_s { #define LAST_FRAME_INTR (1 << 4) #define END_BLOCK_INTR (1 << 5) #define SYNC (1 << 6) -#define END_PKT_INTR (1 << 7) -#define TRANS_ERR_INTR (1 << 8) -#define MISALIGN_INTR (1 << 11) +#define END_PKT_INTR (1 << 7) +#define TRANS_ERR_INTR (1 << 8) +#define MISALIGN_INTR (1 << 11) static inline void omap_dma_interrupts_update(struct omap_dma_s *s) { @@ -526,12 +526,12 @@ static void omap_dma_transfer_setup(struct soc_dma_ch_s *dma) /* Check all the conditions that terminate the transfer starting * with those that can occur the soonest. */ -#define INTR_CHECK(cond, id, nelements) \ - if (cond) { \ - elements[id] = nelements; \ - if (elements[id] < min_elems) \ - min_elems = elements[id]; \ - } else \ +#define INTR_CHECK(cond, id, nelements) \ + if (cond) { \ + elements[id] = nelements; \ + if (elements[id] < min_elems) \ + min_elems = elements[id]; \ + } else \ elements[id] = INT_MAX; /* Elements */ @@ -740,7 +740,7 @@ static int omap_dma_ch_reg_read(struct omap_dma_s *s, struct omap_dma_channel_s *ch, int reg, uint16_t *value) { switch (reg) { - case 0x00: /* SYS_DMA_CSDP_CH0 */ + case 0x00: /* SYS_DMA_CSDP_CH0 */ *value = (ch->burst[1] << 14) | (ch->pack[1] << 13) | (ch->port[1] << 9) | @@ -750,9 +750,9 @@ static int omap_dma_ch_reg_read(struct omap_dma_s *s, (ch->data_type >> 1); break; - case 0x02: /* SYS_DMA_CCR_CH0 */ + case 0x02: /* SYS_DMA_CCR_CH0 */ if (s->model <= omap_dma_3_1) - *value = 0 << 10; /* FIFO_FLUSH reads as 0 */ + *value = 0 << 10; /* FIFO_FLUSH reads as 0 */ else *value = ch->omap_3_1_compatible_disable << 10; *value |= (ch->mode[1] << 14) | @@ -765,11 +765,11 @@ static int omap_dma_ch_reg_read(struct omap_dma_s *s, (ch->fs << 5) | ch->sync; break; - case 0x04: /* SYS_DMA_CICR_CH0 */ + case 0x04: /* SYS_DMA_CICR_CH0 */ *value = ch->interrupts; break; - case 0x06: /* SYS_DMA_CSR_CH0 */ + case 0x06: /* SYS_DMA_CSR_CH0 */ *value = ch->status; ch->status &= SYNC; if (!ch->omap_3_1_compatible_disable && ch->sibling) { @@ -779,77 +779,77 @@ static int omap_dma_ch_reg_read(struct omap_dma_s *s, qemu_irq_lower(ch->irq); break; - case 0x08: /* SYS_DMA_CSSA_L_CH0 */ + case 0x08: /* SYS_DMA_CSSA_L_CH0 */ *value = ch->addr[0] & 0x0000ffff; break; - case 0x0a: /* SYS_DMA_CSSA_U_CH0 */ + case 0x0a: /* SYS_DMA_CSSA_U_CH0 */ *value = ch->addr[0] >> 16; break; - case 0x0c: /* SYS_DMA_CDSA_L_CH0 */ + case 0x0c: /* SYS_DMA_CDSA_L_CH0 */ *value = ch->addr[1] & 0x0000ffff; break; - case 0x0e: /* SYS_DMA_CDSA_U_CH0 */ + case 0x0e: /* SYS_DMA_CDSA_U_CH0 */ *value = ch->addr[1] >> 16; break; - case 0x10: /* SYS_DMA_CEN_CH0 */ + case 0x10: /* SYS_DMA_CEN_CH0 */ *value = ch->elements; break; - case 0x12: /* SYS_DMA_CFN_CH0 */ + case 0x12: /* SYS_DMA_CFN_CH0 */ *value = ch->frames; break; - case 0x14: /* SYS_DMA_CFI_CH0 */ + case 0x14: /* SYS_DMA_CFI_CH0 */ *value = ch->frame_index[0]; break; - case 0x16: /* SYS_DMA_CEI_CH0 */ + case 0x16: /* SYS_DMA_CEI_CH0 */ *value = ch->element_index[0]; break; - case 0x18: /* SYS_DMA_CPC_CH0 or DMA_CSAC */ + case 0x18: /* SYS_DMA_CPC_CH0 or DMA_CSAC */ if (ch->omap_3_1_compatible_disable) - *value = ch->active_set.src & 0xffff; /* CSAC */ + *value = ch->active_set.src & 0xffff; /* CSAC */ else *value = ch->cpc; break; - case 0x1a: /* DMA_CDAC */ - *value = ch->active_set.dest & 0xffff; /* CDAC */ + case 0x1a: /* DMA_CDAC */ + *value = ch->active_set.dest & 0xffff; /* CDAC */ break; - case 0x1c: /* DMA_CDEI */ + case 0x1c: /* DMA_CDEI */ *value = ch->element_index[1]; break; - case 0x1e: /* DMA_CDFI */ + case 0x1e: /* DMA_CDFI */ *value = ch->frame_index[1]; break; - case 0x20: /* DMA_COLOR_L */ + case 0x20: /* DMA_COLOR_L */ *value = ch->color & 0xffff; break; - case 0x22: /* DMA_COLOR_U */ + case 0x22: /* DMA_COLOR_U */ *value = ch->color >> 16; break; - case 0x24: /* DMA_CCR2 */ + case 0x24: /* DMA_CCR2 */ *value = (ch->bs << 2) | (ch->transparent_copy << 1) | ch->constant_fill; break; - case 0x28: /* DMA_CLNK_CTRL */ + case 0x28: /* DMA_CLNK_CTRL */ *value = (ch->link_enabled << 15) | (ch->link_next_ch & 0xf); break; - case 0x2a: /* DMA_LCH_CTRL */ + case 0x2a: /* DMA_LCH_CTRL */ *value = (ch->interleave_disabled << 15) | ch->type; break; @@ -864,7 +864,7 @@ static int omap_dma_ch_reg_write(struct omap_dma_s *s, struct omap_dma_channel_s *ch, int reg, uint16_t value) { switch (reg) { - case 0x00: /* SYS_DMA_CSDP_CH0 */ + case 0x00: /* SYS_DMA_CSDP_CH0 */ ch->burst[1] = (value & 0xc000) >> 14; ch->pack[1] = (value & 0x2000) >> 13; ch->port[1] = (enum omap_dma_port) ((value & 0x1e00) >> 9); @@ -887,7 +887,7 @@ static int omap_dma_ch_reg_write(struct omap_dma_s *s, } break; - case 0x02: /* SYS_DMA_CCR_CH0 */ + case 0x02: /* SYS_DMA_CCR_CH0 */ ch->mode[1] = (omap_dma_addressing_t) ((value & 0xc000) >> 14); ch->mode[0] = (omap_dma_addressing_t) ((value & 0x3000) >> 12); ch->end_prog = (value & 0x0800) >> 11; @@ -909,88 +909,88 @@ static int omap_dma_ch_reg_write(struct omap_dma_s *s, break; - case 0x04: /* SYS_DMA_CICR_CH0 */ + case 0x04: /* SYS_DMA_CICR_CH0 */ ch->interrupts = value & 0x3f; break; - case 0x06: /* SYS_DMA_CSR_CH0 */ + case 0x06: /* SYS_DMA_CSR_CH0 */ OMAP_RO_REG((hwaddr) reg); break; - case 0x08: /* SYS_DMA_CSSA_L_CH0 */ + case 0x08: /* SYS_DMA_CSSA_L_CH0 */ ch->addr[0] &= 0xffff0000; ch->addr[0] |= value; break; - case 0x0a: /* SYS_DMA_CSSA_U_CH0 */ + case 0x0a: /* SYS_DMA_CSSA_U_CH0 */ ch->addr[0] &= 0x0000ffff; ch->addr[0] |= (uint32_t) value << 16; break; - case 0x0c: /* SYS_DMA_CDSA_L_CH0 */ + case 0x0c: /* SYS_DMA_CDSA_L_CH0 */ ch->addr[1] &= 0xffff0000; ch->addr[1] |= value; break; - case 0x0e: /* SYS_DMA_CDSA_U_CH0 */ + case 0x0e: /* SYS_DMA_CDSA_U_CH0 */ ch->addr[1] &= 0x0000ffff; ch->addr[1] |= (uint32_t) value << 16; break; - case 0x10: /* SYS_DMA_CEN_CH0 */ + case 0x10: /* SYS_DMA_CEN_CH0 */ ch->elements = value; break; - case 0x12: /* SYS_DMA_CFN_CH0 */ + case 0x12: /* SYS_DMA_CFN_CH0 */ ch->frames = value; break; - case 0x14: /* SYS_DMA_CFI_CH0 */ + case 0x14: /* SYS_DMA_CFI_CH0 */ ch->frame_index[0] = (int16_t) value; break; - case 0x16: /* SYS_DMA_CEI_CH0 */ + case 0x16: /* SYS_DMA_CEI_CH0 */ ch->element_index[0] = (int16_t) value; break; - case 0x18: /* SYS_DMA_CPC_CH0 or DMA_CSAC */ + case 0x18: /* SYS_DMA_CPC_CH0 or DMA_CSAC */ OMAP_RO_REG((hwaddr) reg); break; - case 0x1c: /* DMA_CDEI */ + case 0x1c: /* DMA_CDEI */ ch->element_index[1] = (int16_t) value; break; - case 0x1e: /* DMA_CDFI */ + case 0x1e: /* DMA_CDFI */ ch->frame_index[1] = (int16_t) value; break; - case 0x20: /* DMA_COLOR_L */ + case 0x20: /* DMA_COLOR_L */ ch->color &= 0xffff0000; ch->color |= value; break; - case 0x22: /* DMA_COLOR_U */ + case 0x22: /* DMA_COLOR_U */ ch->color &= 0xffff; ch->color |= (uint32_t)value << 16; break; - case 0x24: /* DMA_CCR2 */ + case 0x24: /* DMA_CCR2 */ ch->bs = (value >> 2) & 0x1; ch->transparent_copy = (value >> 1) & 0x1; ch->constant_fill = value & 0x1; break; - case 0x28: /* DMA_CLNK_CTRL */ + case 0x28: /* DMA_CLNK_CTRL */ ch->link_enabled = (value >> 15) & 0x1; - if (value & (1 << 14)) { /* Stop_Lnk */ + if (value & (1 << 14)) { /* Stop_Lnk */ ch->link_enabled = 0; omap_dma_disable_channel(s, ch); } ch->link_next_ch = value & 0x1f; break; - case 0x2a: /* DMA_LCH_CTRL */ + case 0x2a: /* DMA_LCH_CTRL */ ch->interleave_disabled = (value >> 15) & 0x1; ch->type = value & 0xf; break; @@ -1005,7 +1005,7 @@ static int omap_dma_3_2_lcd_write(struct omap_dma_lcd_channel_s *s, int offset, uint16_t value) { switch (offset) { - case 0xbc0: /* DMA_LCD_CSDP */ + case 0xbc0: /* DMA_LCD_CSDP */ s->brust_f2 = (value >> 14) & 0x3; s->pack_f2 = (value >> 13) & 0x1; s->data_type_f2 = (1 << ((value >> 11) & 0x3)); @@ -1014,7 +1014,7 @@ static int omap_dma_3_2_lcd_write(struct omap_dma_lcd_channel_s *s, int offset, s->data_type_f1 = (1 << ((value >> 0) & 0x3)); break; - case 0xbc2: /* DMA_LCD_CCR */ + case 0xbc2: /* DMA_LCD_CCR */ s->mode_f2 = (value >> 14) & 0x3; s->mode_f1 = (value >> 12) & 0x3; s->end_prog = (value >> 11) & 0x1; @@ -1026,7 +1026,7 @@ static int omap_dma_3_2_lcd_write(struct omap_dma_lcd_channel_s *s, int offset, s->bs = (value >> 4) & 0x1; break; - case 0xbc4: /* DMA_LCD_CTRL */ + case 0xbc4: /* DMA_LCD_CTRL */ s->dst = (value >> 8) & 0x1; s->src = ((value >> 6) & 0x3) << 1; s->condition = 0; @@ -1035,91 +1035,91 @@ static int omap_dma_3_2_lcd_write(struct omap_dma_lcd_channel_s *s, int offset, s->dual = value & 1; break; - case 0xbc8: /* TOP_B1_L */ + case 0xbc8: /* TOP_B1_L */ s->src_f1_top &= 0xffff0000; s->src_f1_top |= 0x0000ffff & value; break; - case 0xbca: /* TOP_B1_U */ + case 0xbca: /* TOP_B1_U */ s->src_f1_top &= 0x0000ffff; s->src_f1_top |= (uint32_t)value << 16; break; - case 0xbcc: /* BOT_B1_L */ + case 0xbcc: /* BOT_B1_L */ s->src_f1_bottom &= 0xffff0000; s->src_f1_bottom |= 0x0000ffff & value; break; - case 0xbce: /* BOT_B1_U */ + case 0xbce: /* BOT_B1_U */ s->src_f1_bottom &= 0x0000ffff; s->src_f1_bottom |= (uint32_t) value << 16; break; - case 0xbd0: /* TOP_B2_L */ + case 0xbd0: /* TOP_B2_L */ s->src_f2_top &= 0xffff0000; s->src_f2_top |= 0x0000ffff & value; break; - case 0xbd2: /* TOP_B2_U */ + case 0xbd2: /* TOP_B2_U */ s->src_f2_top &= 0x0000ffff; s->src_f2_top |= (uint32_t) value << 16; break; - case 0xbd4: /* BOT_B2_L */ + case 0xbd4: /* BOT_B2_L */ s->src_f2_bottom &= 0xffff0000; s->src_f2_bottom |= 0x0000ffff & value; break; - case 0xbd6: /* BOT_B2_U */ + case 0xbd6: /* BOT_B2_U */ s->src_f2_bottom &= 0x0000ffff; s->src_f2_bottom |= (uint32_t) value << 16; break; - case 0xbd8: /* DMA_LCD_SRC_EI_B1 */ + case 0xbd8: /* DMA_LCD_SRC_EI_B1 */ s->element_index_f1 = value; break; - case 0xbda: /* DMA_LCD_SRC_FI_B1_L */ + case 0xbda: /* DMA_LCD_SRC_FI_B1_L */ s->frame_index_f1 &= 0xffff0000; s->frame_index_f1 |= 0x0000ffff & value; break; - case 0xbf4: /* DMA_LCD_SRC_FI_B1_U */ + case 0xbf4: /* DMA_LCD_SRC_FI_B1_U */ s->frame_index_f1 &= 0x0000ffff; s->frame_index_f1 |= (uint32_t) value << 16; break; - case 0xbdc: /* DMA_LCD_SRC_EI_B2 */ + case 0xbdc: /* DMA_LCD_SRC_EI_B2 */ s->element_index_f2 = value; break; - case 0xbde: /* DMA_LCD_SRC_FI_B2_L */ + case 0xbde: /* DMA_LCD_SRC_FI_B2_L */ s->frame_index_f2 &= 0xffff0000; s->frame_index_f2 |= 0x0000ffff & value; break; - case 0xbf6: /* DMA_LCD_SRC_FI_B2_U */ + case 0xbf6: /* DMA_LCD_SRC_FI_B2_U */ s->frame_index_f2 &= 0x0000ffff; s->frame_index_f2 |= (uint32_t) value << 16; break; - case 0xbe0: /* DMA_LCD_SRC_EN_B1 */ + case 0xbe0: /* DMA_LCD_SRC_EN_B1 */ s->elements_f1 = value; break; - case 0xbe4: /* DMA_LCD_SRC_FN_B1 */ + case 0xbe4: /* DMA_LCD_SRC_FN_B1 */ s->frames_f1 = value; break; - case 0xbe2: /* DMA_LCD_SRC_EN_B2 */ + case 0xbe2: /* DMA_LCD_SRC_EN_B2 */ s->elements_f2 = value; break; - case 0xbe6: /* DMA_LCD_SRC_FN_B2 */ + case 0xbe6: /* DMA_LCD_SRC_FN_B2 */ s->frames_f2 = value; break; - case 0xbea: /* DMA_LCD_LCH_CTRL */ + case 0xbea: /* DMA_LCD_LCH_CTRL */ s->lch_type = value & 0xf; break; @@ -1133,7 +1133,7 @@ static int omap_dma_3_2_lcd_read(struct omap_dma_lcd_channel_s *s, int offset, uint16_t *ret) { switch (offset) { - case 0xbc0: /* DMA_LCD_CSDP */ + case 0xbc0: /* DMA_LCD_CSDP */ *ret = (s->brust_f2 << 14) | (s->pack_f2 << 13) | ((s->data_type_f2 >> 1) << 11) | @@ -1142,7 +1142,7 @@ static int omap_dma_3_2_lcd_read(struct omap_dma_lcd_channel_s *s, int offset, ((s->data_type_f1 >> 1) << 0); break; - case 0xbc2: /* DMA_LCD_CCR */ + case 0xbc2: /* DMA_LCD_CCR */ *ret = (s->mode_f2 << 14) | (s->mode_f1 << 12) | (s->end_prog << 11) | @@ -1154,7 +1154,7 @@ static int omap_dma_3_2_lcd_read(struct omap_dma_lcd_channel_s *s, int offset, (s->bs << 4); break; - case 0xbc4: /* DMA_LCD_CTRL */ + case 0xbc4: /* DMA_LCD_CTRL */ qemu_irq_lower(s->irq); *ret = (s->dst << 8) | ((s->src & 0x6) << 5) | @@ -1163,79 +1163,79 @@ static int omap_dma_3_2_lcd_read(struct omap_dma_lcd_channel_s *s, int offset, s->dual; break; - case 0xbc8: /* TOP_B1_L */ + case 0xbc8: /* TOP_B1_L */ *ret = s->src_f1_top & 0xffff; break; - case 0xbca: /* TOP_B1_U */ + case 0xbca: /* TOP_B1_U */ *ret = s->src_f1_top >> 16; break; - case 0xbcc: /* BOT_B1_L */ + case 0xbcc: /* BOT_B1_L */ *ret = s->src_f1_bottom & 0xffff; break; - case 0xbce: /* BOT_B1_U */ + case 0xbce: /* BOT_B1_U */ *ret = s->src_f1_bottom >> 16; break; - case 0xbd0: /* TOP_B2_L */ + case 0xbd0: /* TOP_B2_L */ *ret = s->src_f2_top & 0xffff; break; - case 0xbd2: /* TOP_B2_U */ + case 0xbd2: /* TOP_B2_U */ *ret = s->src_f2_top >> 16; break; - case 0xbd4: /* BOT_B2_L */ + case 0xbd4: /* BOT_B2_L */ *ret = s->src_f2_bottom & 0xffff; break; - case 0xbd6: /* BOT_B2_U */ + case 0xbd6: /* BOT_B2_U */ *ret = s->src_f2_bottom >> 16; break; - case 0xbd8: /* DMA_LCD_SRC_EI_B1 */ + case 0xbd8: /* DMA_LCD_SRC_EI_B1 */ *ret = s->element_index_f1; break; - case 0xbda: /* DMA_LCD_SRC_FI_B1_L */ + case 0xbda: /* DMA_LCD_SRC_FI_B1_L */ *ret = s->frame_index_f1 & 0xffff; break; - case 0xbf4: /* DMA_LCD_SRC_FI_B1_U */ + case 0xbf4: /* DMA_LCD_SRC_FI_B1_U */ *ret = s->frame_index_f1 >> 16; break; - case 0xbdc: /* DMA_LCD_SRC_EI_B2 */ + case 0xbdc: /* DMA_LCD_SRC_EI_B2 */ *ret = s->element_index_f2; break; - case 0xbde: /* DMA_LCD_SRC_FI_B2_L */ + case 0xbde: /* DMA_LCD_SRC_FI_B2_L */ *ret = s->frame_index_f2 & 0xffff; break; - case 0xbf6: /* DMA_LCD_SRC_FI_B2_U */ + case 0xbf6: /* DMA_LCD_SRC_FI_B2_U */ *ret = s->frame_index_f2 >> 16; break; - case 0xbe0: /* DMA_LCD_SRC_EN_B1 */ + case 0xbe0: /* DMA_LCD_SRC_EN_B1 */ *ret = s->elements_f1; break; - case 0xbe4: /* DMA_LCD_SRC_FN_B1 */ + case 0xbe4: /* DMA_LCD_SRC_FN_B1 */ *ret = s->frames_f1; break; - case 0xbe2: /* DMA_LCD_SRC_EN_B2 */ + case 0xbe2: /* DMA_LCD_SRC_EN_B2 */ *ret = s->elements_f2; break; - case 0xbe6: /* DMA_LCD_SRC_FN_B2 */ + case 0xbe6: /* DMA_LCD_SRC_FN_B2 */ *ret = s->frames_f2; break; - case 0xbea: /* DMA_LCD_LCH_CTRL */ + case 0xbea: /* DMA_LCD_LCH_CTRL */ *ret = s->lch_type; break; @@ -1249,7 +1249,7 @@ static int omap_dma_3_1_lcd_write(struct omap_dma_lcd_channel_s *s, int offset, uint16_t value) { switch (offset) { - case 0x300: /* SYS_DMA_LCD_CTRL */ + case 0x300: /* SYS_DMA_LCD_CTRL */ s->src = (value & 0x40) ? imif : emiff; s->condition = 0; /* Assume no bus errors and thus no BUS_ERROR irq bits. */ @@ -1257,42 +1257,42 @@ static int omap_dma_3_1_lcd_write(struct omap_dma_lcd_channel_s *s, int offset, s->dual = value & 1; break; - case 0x302: /* SYS_DMA_LCD_TOP_F1_L */ + case 0x302: /* SYS_DMA_LCD_TOP_F1_L */ s->src_f1_top &= 0xffff0000; s->src_f1_top |= 0x0000ffff & value; break; - case 0x304: /* SYS_DMA_LCD_TOP_F1_U */ + case 0x304: /* SYS_DMA_LCD_TOP_F1_U */ s->src_f1_top &= 0x0000ffff; s->src_f1_top |= (uint32_t)value << 16; break; - case 0x306: /* SYS_DMA_LCD_BOT_F1_L */ + case 0x306: /* SYS_DMA_LCD_BOT_F1_L */ s->src_f1_bottom &= 0xffff0000; s->src_f1_bottom |= 0x0000ffff & value; break; - case 0x308: /* SYS_DMA_LCD_BOT_F1_U */ + case 0x308: /* SYS_DMA_LCD_BOT_F1_U */ s->src_f1_bottom &= 0x0000ffff; s->src_f1_bottom |= (uint32_t)value << 16; break; - case 0x30a: /* SYS_DMA_LCD_TOP_F2_L */ + case 0x30a: /* SYS_DMA_LCD_TOP_F2_L */ s->src_f2_top &= 0xffff0000; s->src_f2_top |= 0x0000ffff & value; break; - case 0x30c: /* SYS_DMA_LCD_TOP_F2_U */ + case 0x30c: /* SYS_DMA_LCD_TOP_F2_U */ s->src_f2_top &= 0x0000ffff; s->src_f2_top |= (uint32_t)value << 16; break; - case 0x30e: /* SYS_DMA_LCD_BOT_F2_L */ + case 0x30e: /* SYS_DMA_LCD_BOT_F2_L */ s->src_f2_bottom &= 0xffff0000; s->src_f2_bottom |= 0x0000ffff & value; break; - case 0x310: /* SYS_DMA_LCD_BOT_F2_U */ + case 0x310: /* SYS_DMA_LCD_BOT_F2_U */ s->src_f2_bottom &= 0x0000ffff; s->src_f2_bottom |= (uint32_t)value << 16; break; @@ -1309,7 +1309,7 @@ static int omap_dma_3_1_lcd_read(struct omap_dma_lcd_channel_s *s, int offset, int i; switch (offset) { - case 0x300: /* SYS_DMA_LCD_CTRL */ + case 0x300: /* SYS_DMA_LCD_CTRL */ i = s->condition; s->condition = 0; qemu_irq_lower(s->irq); @@ -1317,35 +1317,35 @@ static int omap_dma_3_1_lcd_read(struct omap_dma_lcd_channel_s *s, int offset, (s->interrupts << 1) | s->dual; break; - case 0x302: /* SYS_DMA_LCD_TOP_F1_L */ + case 0x302: /* SYS_DMA_LCD_TOP_F1_L */ *ret = s->src_f1_top & 0xffff; break; - case 0x304: /* SYS_DMA_LCD_TOP_F1_U */ + case 0x304: /* SYS_DMA_LCD_TOP_F1_U */ *ret = s->src_f1_top >> 16; break; - case 0x306: /* SYS_DMA_LCD_BOT_F1_L */ + case 0x306: /* SYS_DMA_LCD_BOT_F1_L */ *ret = s->src_f1_bottom & 0xffff; break; - case 0x308: /* SYS_DMA_LCD_BOT_F1_U */ + case 0x308: /* SYS_DMA_LCD_BOT_F1_U */ *ret = s->src_f1_bottom >> 16; break; - case 0x30a: /* SYS_DMA_LCD_TOP_F2_L */ + case 0x30a: /* SYS_DMA_LCD_TOP_F2_L */ *ret = s->src_f2_top & 0xffff; break; - case 0x30c: /* SYS_DMA_LCD_TOP_F2_U */ + case 0x30c: /* SYS_DMA_LCD_TOP_F2_U */ *ret = s->src_f2_top >> 16; break; - case 0x30e: /* SYS_DMA_LCD_BOT_F2_L */ + case 0x30e: /* SYS_DMA_LCD_BOT_F2_L */ *ret = s->src_f2_bottom & 0xffff; break; - case 0x310: /* SYS_DMA_LCD_BOT_F2_U */ + case 0x310: /* SYS_DMA_LCD_BOT_F2_U */ *ret = s->src_f2_bottom >> 16; break; @@ -1358,18 +1358,18 @@ static int omap_dma_3_1_lcd_read(struct omap_dma_lcd_channel_s *s, int offset, static int omap_dma_sys_write(struct omap_dma_s *s, int offset, uint16_t value) { switch (offset) { - case 0x400: /* SYS_DMA_GCR */ + case 0x400: /* SYS_DMA_GCR */ s->gcr = value; break; - case 0x404: /* DMA_GSCR */ + case 0x404: /* DMA_GSCR */ if (value & 0x8) omap_dma_disable_3_1_mapping(s); else omap_dma_enable_3_1_mapping(s); break; - case 0x408: /* DMA_GRST */ + case 0x408: /* DMA_GRST */ if (value & 0x1) omap_dma_reset(s->dma); break; @@ -1384,57 +1384,57 @@ static int omap_dma_sys_read(struct omap_dma_s *s, int offset, uint16_t *ret) { switch (offset) { - case 0x400: /* SYS_DMA_GCR */ + case 0x400: /* SYS_DMA_GCR */ *ret = s->gcr; break; - case 0x404: /* DMA_GSCR */ + case 0x404: /* DMA_GSCR */ *ret = s->omap_3_1_mapping_disabled << 3; break; - case 0x408: /* DMA_GRST */ + case 0x408: /* DMA_GRST */ *ret = 0; break; - case 0x442: /* DMA_HW_ID */ - case 0x444: /* DMA_PCh2_ID */ - case 0x446: /* DMA_PCh0_ID */ - case 0x448: /* DMA_PCh1_ID */ - case 0x44a: /* DMA_PChG_ID */ - case 0x44c: /* DMA_PChD_ID */ + case 0x442: /* DMA_HW_ID */ + case 0x444: /* DMA_PCh2_ID */ + case 0x446: /* DMA_PCh0_ID */ + case 0x448: /* DMA_PCh1_ID */ + case 0x44a: /* DMA_PChG_ID */ + case 0x44c: /* DMA_PChD_ID */ *ret = 1; break; - case 0x44e: /* DMA_CAPS_0_U */ + case 0x44e: /* DMA_CAPS_0_U */ *ret = (s->caps[0] >> 16) & 0xffff; break; - case 0x450: /* DMA_CAPS_0_L */ + case 0x450: /* DMA_CAPS_0_L */ *ret = (s->caps[0] >> 0) & 0xffff; break; - case 0x452: /* DMA_CAPS_1_U */ + case 0x452: /* DMA_CAPS_1_U */ *ret = (s->caps[1] >> 16) & 0xffff; break; - case 0x454: /* DMA_CAPS_1_L */ + case 0x454: /* DMA_CAPS_1_L */ *ret = (s->caps[1] >> 0) & 0xffff; break; - case 0x456: /* DMA_CAPS_2 */ + case 0x456: /* DMA_CAPS_2 */ *ret = s->caps[2]; break; - case 0x458: /* DMA_CAPS_3 */ + case 0x458: /* DMA_CAPS_3 */ *ret = s->caps[3]; break; - case 0x45a: /* DMA_CAPS_4 */ + case 0x45a: /* DMA_CAPS_4 */ *ret = s->caps[4]; break; - case 0x460: /* DMA_PCh2_SR */ - case 0x480: /* DMA_PCh0_SR */ - case 0x482: /* DMA_PCh1_SR */ - case 0x4c0: /* DMA_PChD_SR_0 */ + case 0x460: /* DMA_PCh2_SR */ + case 0x480: /* DMA_PCh0_SR */ + case 0x482: /* DMA_PCh1_SR */ + case 0x4c0: /* DMA_PChD_SR_0 */ qemu_log_mask(LOG_UNIMP, "%s: Physical Channel Status Registers not implemented\n", __func__); @@ -1582,38 +1582,38 @@ static void omap_dma_setcaps(struct omap_dma_s *s) case omap_dma_3_2: /* XXX Only available for sDMA */ s->caps[0] = - (1 << 19) | /* Constant Fill Capability */ - (1 << 18); /* Transparent BLT Capability */ + (1 << 19) | /* Constant Fill Capability */ + (1 << 18); /* Transparent BLT Capability */ s->caps[1] = - (1 << 1); /* 1-bit palettized capability (DMA 3.2 only) */ + (1 << 1); /* 1-bit palettized capability (DMA 3.2 only) */ s->caps[2] = - (1 << 8) | /* SEPARATE_SRC_AND_DST_INDEX_CPBLTY */ - (1 << 7) | /* DST_DOUBLE_INDEX_ADRS_CPBLTY */ - (1 << 6) | /* DST_SINGLE_INDEX_ADRS_CPBLTY */ - (1 << 5) | /* DST_POST_INCRMNT_ADRS_CPBLTY */ - (1 << 4) | /* DST_CONST_ADRS_CPBLTY */ - (1 << 3) | /* SRC_DOUBLE_INDEX_ADRS_CPBLTY */ - (1 << 2) | /* SRC_SINGLE_INDEX_ADRS_CPBLTY */ - (1 << 1) | /* SRC_POST_INCRMNT_ADRS_CPBLTY */ - (1 << 0); /* SRC_CONST_ADRS_CPBLTY */ + (1 << 8) | /* SEPARATE_SRC_AND_DST_INDEX_CPBLTY */ + (1 << 7) | /* DST_DOUBLE_INDEX_ADRS_CPBLTY */ + (1 << 6) | /* DST_SINGLE_INDEX_ADRS_CPBLTY */ + (1 << 5) | /* DST_POST_INCRMNT_ADRS_CPBLTY */ + (1 << 4) | /* DST_CONST_ADRS_CPBLTY */ + (1 << 3) | /* SRC_DOUBLE_INDEX_ADRS_CPBLTY */ + (1 << 2) | /* SRC_SINGLE_INDEX_ADRS_CPBLTY */ + (1 << 1) | /* SRC_POST_INCRMNT_ADRS_CPBLTY */ + (1 << 0); /* SRC_CONST_ADRS_CPBLTY */ s->caps[3] = - (1 << 6) | /* BLOCK_SYNCHR_CPBLTY (DMA 4 only) */ - (1 << 7) | /* PKT_SYNCHR_CPBLTY (DMA 4 only) */ - (1 << 5) | /* CHANNEL_CHAINING_CPBLTY */ - (1 << 4) | /* LCh_INTERLEAVE_CPBLTY */ - (1 << 3) | /* AUTOINIT_REPEAT_CPBLTY (DMA 3.2 only) */ - (1 << 2) | /* AUTOINIT_ENDPROG_CPBLTY (DMA 3.2 only) */ - (1 << 1) | /* FRAME_SYNCHR_CPBLTY */ - (1 << 0); /* ELMNT_SYNCHR_CPBLTY */ + (1 << 6) | /* BLOCK_SYNCHR_CPBLTY (DMA 4 only) */ + (1 << 7) | /* PKT_SYNCHR_CPBLTY (DMA 4 only) */ + (1 << 5) | /* CHANNEL_CHAINING_CPBLTY */ + (1 << 4) | /* LCh_INTERLEAVE_CPBLTY */ + (1 << 3) | /* AUTOINIT_REPEAT_CPBLTY (DMA 3.2 only) */ + (1 << 2) | /* AUTOINIT_ENDPROG_CPBLTY (DMA 3.2 only) */ + (1 << 1) | /* FRAME_SYNCHR_CPBLTY */ + (1 << 0); /* ELMNT_SYNCHR_CPBLTY */ s->caps[4] = - (1 << 7) | /* PKT_INTERRUPT_CPBLTY (DMA 4 only) */ - (1 << 6) | /* SYNC_STATUS_CPBLTY */ - (1 << 5) | /* BLOCK_INTERRUPT_CPBLTY */ - (1 << 4) | /* LAST_FRAME_INTERRUPT_CPBLTY */ - (1 << 3) | /* FRAME_INTERRUPT_CPBLTY */ - (1 << 2) | /* HALF_FRAME_INTERRUPT_CPBLTY */ - (1 << 1) | /* EVENT_DROP_INTERRUPT_CPBLTY */ - (1 << 0); /* TIMEOUT_INTERRUPT_CPBLTY (DMA 3.2 only) */ + (1 << 7) | /* PKT_INTERRUPT_CPBLTY (DMA 4 only) */ + (1 << 6) | /* SYNC_STATUS_CPBLTY */ + (1 << 5) | /* BLOCK_INTERRUPT_CPBLTY */ + (1 << 4) | /* LAST_FRAME_INTERRUPT_CPBLTY */ + (1 << 3) | /* FRAME_INTERRUPT_CPBLTY */ + (1 << 2) | /* HALF_FRAME_INTERRUPT_CPBLTY */ + (1 << 1) | /* EVENT_DROP_INTERRUPT_CPBLTY */ + (1 << 0); /* TIMEOUT_INTERRUPT_CPBLTY (DMA 3.2 only) */ break; } } diff --git a/hw/dma/xlnx_csu_dma.c b/hw/dma/xlnx_csu_dma.c index 3db3904..d8c7da1 100644 --- a/hw/dma/xlnx_csu_dma.c +++ b/hw/dma/xlnx_csu_dma.c @@ -287,7 +287,7 @@ static uint32_t xlnx_csu_dma_advance(XlnxCSUDMA *s, uint32_t len) static void xlnx_csu_dma_src_notify(void *opaque) { XlnxCSUDMA *s = XLNX_CSU_DMA(opaque); - unsigned char buf[4 * 1024]; + QEMU_UNINITIALIZED unsigned char buf[4 * 1024]; size_t rlen = 0; ptimer_transaction_begin(s->src_timer); diff --git a/hw/gpio/omap_gpio.c b/hw/gpio/omap_gpio.c index 61ea786..f27806b 100644 --- a/hw/gpio/omap_gpio.c +++ b/hw/gpio/omap_gpio.c @@ -80,25 +80,25 @@ static uint64_t omap_gpio_read(void *opaque, hwaddr addr, } switch (offset) { - case 0x00: /* DATA_INPUT */ + case 0x00: /* DATA_INPUT */ return s->inputs & s->pins; - case 0x04: /* DATA_OUTPUT */ + case 0x04: /* DATA_OUTPUT */ return s->outputs; - case 0x08: /* DIRECTION_CONTROL */ + case 0x08: /* DIRECTION_CONTROL */ return s->dir; - case 0x0c: /* INTERRUPT_CONTROL */ + case 0x0c: /* INTERRUPT_CONTROL */ return s->edge; - case 0x10: /* INTERRUPT_MASK */ + case 0x10: /* INTERRUPT_MASK */ return s->mask; - case 0x14: /* INTERRUPT_STATUS */ + case 0x14: /* INTERRUPT_STATUS */ return s->ints; - case 0x18: /* PIN_CONTROL (not in OMAP310) */ + case 0x18: /* PIN_CONTROL (not in OMAP310) */ OMAP_BAD_REG(addr); return s->pins; } @@ -121,11 +121,11 @@ static void omap_gpio_write(void *opaque, hwaddr addr, } switch (offset) { - case 0x00: /* DATA_INPUT */ + case 0x00: /* DATA_INPUT */ OMAP_RO_REG(addr); return; - case 0x04: /* DATA_OUTPUT */ + case 0x04: /* DATA_OUTPUT */ diff = (s->outputs ^ value) & ~s->dir; s->outputs = value; while ((ln = ctz32(diff)) != 32) { @@ -135,7 +135,7 @@ static void omap_gpio_write(void *opaque, hwaddr addr, } break; - case 0x08: /* DIRECTION_CONTROL */ + case 0x08: /* DIRECTION_CONTROL */ diff = s->outputs & (s->dir ^ value); s->dir = value; @@ -147,21 +147,21 @@ static void omap_gpio_write(void *opaque, hwaddr addr, } break; - case 0x0c: /* INTERRUPT_CONTROL */ + case 0x0c: /* INTERRUPT_CONTROL */ s->edge = value; break; - case 0x10: /* INTERRUPT_MASK */ + case 0x10: /* INTERRUPT_MASK */ s->mask = value; break; - case 0x14: /* INTERRUPT_STATUS */ + case 0x14: /* INTERRUPT_STATUS */ s->ints &= ~value; if (!s->ints) qemu_irq_lower(s->irq); break; - case 0x18: /* PIN_CONTROL (not in OMAP310 TRM) */ + case 0x18: /* PIN_CONTROL (not in OMAP310 TRM) */ OMAP_BAD_REG(addr); s->pins = value; break; diff --git a/hw/gpio/pca9552.c b/hw/gpio/pca9552.c index d65c0a2..1e10238 100644 --- a/hw/gpio/pca9552.c +++ b/hw/gpio/pca9552.c @@ -76,7 +76,7 @@ static void pca955x_display_pins_status(PCA955xState *s, return; } if (trace_event_get_state_backends(TRACE_PCA955X_GPIO_STATUS)) { - char *buf = g_newa(char, k->pin_count + 1); + char buf[PCA955X_PIN_COUNT_MAX + 1]; for (i = 0; i < k->pin_count; i++) { if (extract32(pins_status, i, 1)) { diff --git a/hw/hyperv/hv-balloon.c b/hw/hyperv/hv-balloon.c index 94b0abb..6dbcb2d 100644 --- a/hw/hyperv/hv-balloon.c +++ b/hw/hyperv/hv-balloon.c @@ -67,10 +67,6 @@ * these requests */ -struct HvBalloonClass { - VMBusDeviceClass parent_class; -} HvBalloonClass; - typedef enum State { /* not a real state */ S_NO_CHANGE = 0, @@ -162,8 +158,9 @@ typedef struct HvBalloon { MemoryRegion *mr; } HvBalloon; -OBJECT_DEFINE_TYPE_WITH_INTERFACES(HvBalloon, hv_balloon, HV_BALLOON, VMBUS_DEVICE, \ - { TYPE_MEMORY_DEVICE }, { }) +OBJECT_DEFINE_SIMPLE_TYPE_WITH_INTERFACES(HvBalloon, hv_balloon, \ + HV_BALLOON, VMBUS_DEVICE, \ + { TYPE_MEMORY_DEVICE }, { }) #define HV_BALLOON_SET_STATE(hvb, news) \ do { \ diff --git a/hw/hyperv/syndbg.c b/hw/hyperv/syndbg.c index 8b8a147..ac7e15f 100644 --- a/hw/hyperv/syndbg.c +++ b/hw/hyperv/syndbg.c @@ -192,7 +192,7 @@ static uint16_t handle_recv_msg(HvSynDbg *syndbg, uint64_t outgpa, { uint16_t ret; g_assert(MSG_BUFSZ >= qemu_target_page_size()); - uint8_t data_buf[MSG_BUFSZ]; + QEMU_UNINITIALIZED uint8_t data_buf[MSG_BUFSZ]; hwaddr out_len; void *out_data; ssize_t recv_byte_count; diff --git a/hw/i2c/omap_i2c.c b/hw/i2c/omap_i2c.c index 2e45266..751bf74 100644 --- a/hw/i2c/omap_i2c.c +++ b/hw/i2c/omap_i2c.c @@ -55,16 +55,16 @@ struct OMAPI2CState { uint16_t test; }; -#define OMAP2_INTR_REV 0x34 -#define OMAP2_GC_REV 0x34 +#define OMAP2_INTR_REV 0x34 +#define OMAP2_GC_REV 0x34 static void omap_i2c_interrupts_update(OMAPI2CState *s) { qemu_set_irq(s->irq, s->stat & s->mask); - if ((s->dma >> 15) & 1) /* RDMA_EN */ - qemu_set_irq(s->drq[0], (s->stat >> 3) & 1); /* RRDY */ - if ((s->dma >> 7) & 1) /* XDMA_EN */ - qemu_set_irq(s->drq[1], (s->stat >> 4) & 1); /* XRDY */ + if ((s->dma >> 15) & 1) /* RDMA_EN */ + qemu_set_irq(s->drq[0], (s->stat >> 3) & 1); /* RRDY */ + if ((s->dma >> 7) & 1) /* XDMA_EN */ + qemu_set_irq(s->drq[1], (s->stat >> 4) & 1); /* XRDY */ } static void omap_i2c_fifo_run(OMAPI2CState *s) @@ -74,25 +74,25 @@ static void omap_i2c_fifo_run(OMAPI2CState *s) if (!i2c_bus_busy(s->bus)) return; - if ((s->control >> 2) & 1) { /* RM */ - if ((s->control >> 1) & 1) { /* STP */ + if ((s->control >> 2) & 1) { /* RM */ + if ((s->control >> 1) & 1) { /* STP */ i2c_end_transfer(s->bus); - s->control &= ~(1 << 1); /* STP */ + s->control &= ~(1 << 1); /* STP */ s->count_cur = s->count; s->txlen = 0; - } else if ((s->control >> 9) & 1) { /* TRX */ + } else if ((s->control >> 9) & 1) { /* TRX */ while (ack && s->txlen) ack = (i2c_send(s->bus, (s->fifo >> ((-- s->txlen) << 3)) & 0xff) >= 0); - s->stat |= 1 << 4; /* XRDY */ + s->stat |= 1 << 4; /* XRDY */ } else { while (s->rxlen < 4) s->fifo |= i2c_recv(s->bus) << ((s->rxlen ++) << 3); - s->stat |= 1 << 3; /* RRDY */ + s->stat |= 1 << 3; /* RRDY */ } } else { - if ((s->control >> 9) & 1) { /* TRX */ + if ((s->control >> 9) & 1) { /* TRX */ while (ack && s->count_cur && s->txlen) { ack = (i2c_send(s->bus, (s->fifo >> ((-- s->txlen) << 3)) & @@ -100,12 +100,12 @@ static void omap_i2c_fifo_run(OMAPI2CState *s) s->count_cur --; } if (ack && s->count_cur) - s->stat |= 1 << 4; /* XRDY */ + s->stat |= 1 << 4; /* XRDY */ else - s->stat &= ~(1 << 4); /* XRDY */ + s->stat &= ~(1 << 4); /* XRDY */ if (!s->count_cur) { - s->stat |= 1 << 2; /* ARDY */ - s->control &= ~(1 << 10); /* MST */ + s->stat |= 1 << 2; /* ARDY */ + s->control &= ~(1 << 10); /* MST */ } } else { while (s->count_cur && s->rxlen < 4) { @@ -113,26 +113,26 @@ static void omap_i2c_fifo_run(OMAPI2CState *s) s->count_cur --; } if (s->rxlen) - s->stat |= 1 << 3; /* RRDY */ + s->stat |= 1 << 3; /* RRDY */ else - s->stat &= ~(1 << 3); /* RRDY */ + s->stat &= ~(1 << 3); /* RRDY */ } if (!s->count_cur) { - if ((s->control >> 1) & 1) { /* STP */ + if ((s->control >> 1) & 1) { /* STP */ i2c_end_transfer(s->bus); - s->control &= ~(1 << 1); /* STP */ + s->control &= ~(1 << 1); /* STP */ s->count_cur = s->count; s->txlen = 0; } else { - s->stat |= 1 << 2; /* ARDY */ - s->control &= ~(1 << 10); /* MST */ + s->stat |= 1 << 2; /* ARDY */ + s->control &= ~(1 << 10); /* MST */ } } } - s->stat |= (!ack) << 1; /* NACK */ + s->stat |= (!ack) << 1; /* NACK */ if (!ack) - s->control &= ~(1 << 1); /* STP */ + s->control &= ~(1 << 1); /* STP */ } static void omap_i2c_reset(DeviceState *dev) @@ -163,16 +163,16 @@ static uint32_t omap_i2c_read(void *opaque, hwaddr addr) uint16_t ret; switch (offset) { - case 0x00: /* I2C_REV */ - return s->revision; /* REV */ + case 0x00: /* I2C_REV */ + return s->revision; /* REV */ - case 0x04: /* I2C_IE */ + case 0x04: /* I2C_IE */ return s->mask; - case 0x08: /* I2C_STAT */ + case 0x08: /* I2C_STAT */ return s->stat | (i2c_bus_busy(s->bus) << 12); - case 0x0c: /* I2C_IV */ + case 0x0c: /* I2C_IV */ if (s->revision >= OMAP2_INTR_REV) break; ret = ctz32(s->stat & s->mask); @@ -185,18 +185,18 @@ static uint32_t omap_i2c_read(void *opaque, hwaddr addr) omap_i2c_interrupts_update(s); return ret; - case 0x10: /* I2C_SYSS */ - return (s->control >> 15) & 1; /* I2C_EN */ + case 0x10: /* I2C_SYSS */ + return (s->control >> 15) & 1; /* I2C_EN */ - case 0x14: /* I2C_BUF */ + case 0x14: /* I2C_BUF */ return s->dma; - case 0x18: /* I2C_CNT */ - return s->count_cur; /* DCOUNT */ + case 0x18: /* I2C_CNT */ + return s->count_cur; /* DCOUNT */ - case 0x1c: /* I2C_DATA */ + case 0x1c: /* I2C_DATA */ ret = 0; - if (s->control & (1 << 14)) { /* BE */ + if (s->control & (1 << 14)) { /* BE */ ret |= ((s->fifo >> 0) & 0xff) << 8; ret |= ((s->fifo >> 8) & 0xff) << 0; } else { @@ -204,7 +204,7 @@ static uint32_t omap_i2c_read(void *opaque, hwaddr addr) ret |= ((s->fifo >> 0) & 0xff) << 0; } if (s->rxlen == 1) { - s->stat |= 1 << 15; /* SBD */ + s->stat |= 1 << 15; /* SBD */ s->rxlen = 0; } else if (s->rxlen > 1) { if (s->rxlen > 2) @@ -214,41 +214,41 @@ static uint32_t omap_i2c_read(void *opaque, hwaddr addr) /* XXX: remote access (qualifier) error - what's that? */ } if (!s->rxlen) { - s->stat &= ~(1 << 3); /* RRDY */ - if (((s->control >> 10) & 1) && /* MST */ - ((~s->control >> 9) & 1)) { /* TRX */ - s->stat |= 1 << 2; /* ARDY */ - s->control &= ~(1 << 10); /* MST */ + s->stat &= ~(1 << 3); /* RRDY */ + if (((s->control >> 10) & 1) && /* MST */ + ((~s->control >> 9) & 1)) { /* TRX */ + s->stat |= 1 << 2; /* ARDY */ + s->control &= ~(1 << 10); /* MST */ } } - s->stat &= ~(1 << 11); /* ROVR */ + s->stat &= ~(1 << 11); /* ROVR */ omap_i2c_fifo_run(s); omap_i2c_interrupts_update(s); return ret; - case 0x20: /* I2C_SYSC */ + case 0x20: /* I2C_SYSC */ return 0; - case 0x24: /* I2C_CON */ + case 0x24: /* I2C_CON */ return s->control; - case 0x28: /* I2C_OA */ + case 0x28: /* I2C_OA */ return s->addr[0]; - case 0x2c: /* I2C_SA */ + case 0x2c: /* I2C_SA */ return s->addr[1]; - case 0x30: /* I2C_PSC */ + case 0x30: /* I2C_PSC */ return s->divider; - case 0x34: /* I2C_SCLL */ + case 0x34: /* I2C_SCLL */ return s->times[0]; - case 0x38: /* I2C_SCLH */ + case 0x38: /* I2C_SCLH */ return s->times[1]; - case 0x3c: /* I2C_SYSTEST */ - if (s->test & (1 << 15)) { /* ST_EN */ + case 0x3c: /* I2C_SYSTEST */ + if (s->test & (1 << 15)) { /* ST_EN */ s->test ^= 0xa; return s->test; } else @@ -267,17 +267,17 @@ static void omap_i2c_write(void *opaque, hwaddr addr, int nack; switch (offset) { - case 0x00: /* I2C_REV */ - case 0x0c: /* I2C_IV */ - case 0x10: /* I2C_SYSS */ + case 0x00: /* I2C_REV */ + case 0x0c: /* I2C_IV */ + case 0x10: /* I2C_SYSS */ OMAP_RO_REG(addr); return; - case 0x04: /* I2C_IE */ + case 0x04: /* I2C_IE */ s->mask = value & (s->revision < OMAP2_GC_REV ? 0x1f : 0x3f); break; - case 0x08: /* I2C_STAT */ + case 0x08: /* I2C_STAT */ if (s->revision < OMAP2_INTR_REV) { OMAP_RO_REG(addr); return; @@ -288,40 +288,40 @@ static void omap_i2c_write(void *opaque, hwaddr addr, omap_i2c_interrupts_update(s); break; - case 0x14: /* I2C_BUF */ + case 0x14: /* I2C_BUF */ s->dma = value & 0x8080; - if (value & (1 << 15)) /* RDMA_EN */ - s->mask &= ~(1 << 3); /* RRDY_IE */ - if (value & (1 << 7)) /* XDMA_EN */ - s->mask &= ~(1 << 4); /* XRDY_IE */ + if (value & (1 << 15)) /* RDMA_EN */ + s->mask &= ~(1 << 3); /* RRDY_IE */ + if (value & (1 << 7)) /* XDMA_EN */ + s->mask &= ~(1 << 4); /* XRDY_IE */ break; - case 0x18: /* I2C_CNT */ - s->count = value; /* DCOUNT */ + case 0x18: /* I2C_CNT */ + s->count = value; /* DCOUNT */ break; - case 0x1c: /* I2C_DATA */ + case 0x1c: /* I2C_DATA */ if (s->txlen > 2) { /* XXX: remote access (qualifier) error - what's that? */ break; } s->fifo <<= 16; s->txlen += 2; - if (s->control & (1 << 14)) { /* BE */ + if (s->control & (1 << 14)) { /* BE */ s->fifo |= ((value >> 8) & 0xff) << 8; s->fifo |= ((value >> 0) & 0xff) << 0; } else { s->fifo |= ((value >> 0) & 0xff) << 8; s->fifo |= ((value >> 8) & 0xff) << 0; } - s->stat &= ~(1 << 10); /* XUDF */ + s->stat &= ~(1 << 10); /* XUDF */ if (s->txlen > 2) - s->stat &= ~(1 << 4); /* XRDY */ + s->stat &= ~(1 << 4); /* XRDY */ omap_i2c_fifo_run(s); omap_i2c_interrupts_update(s); break; - case 0x20: /* I2C_SYSC */ + case 0x20: /* I2C_SYSC */ if (s->revision < OMAP2_INTR_REV) { OMAP_BAD_REG(addr); return; @@ -332,9 +332,9 @@ static void omap_i2c_write(void *opaque, hwaddr addr, } break; - case 0x24: /* I2C_CON */ + case 0x24: /* I2C_CON */ s->control = value & 0xcf87; - if (~value & (1 << 15)) { /* I2C_EN */ + if (~value & (1 << 15)) { /* I2C_EN */ if (s->revision < OMAP2_INTR_REV) { omap_i2c_reset(DEVICE(s)); } @@ -351,14 +351,14 @@ static void omap_i2c_write(void *opaque, hwaddr addr, __func__); break; } - if ((value & (1 << 15)) && value & (1 << 0)) { /* STT */ - nack = !!i2c_start_transfer(s->bus, s->addr[1], /* SA */ - (~value >> 9) & 1); /* TRX */ - s->stat |= nack << 1; /* NACK */ - s->control &= ~(1 << 0); /* STT */ + if ((value & (1 << 15)) && value & (1 << 0)) { /* STT */ + nack = !!i2c_start_transfer(s->bus, s->addr[1], /* SA */ + (~value >> 9) & 1); /* TRX */ + s->stat |= nack << 1; /* NACK */ + s->control &= ~(1 << 0); /* STT */ s->fifo = 0; if (nack) - s->control &= ~(1 << 1); /* STP */ + s->control &= ~(1 << 1); /* STP */ else { s->count_cur = s->count; omap_i2c_fifo_run(s); @@ -367,34 +367,34 @@ static void omap_i2c_write(void *opaque, hwaddr addr, } break; - case 0x28: /* I2C_OA */ + case 0x28: /* I2C_OA */ s->addr[0] = value & 0x3ff; break; - case 0x2c: /* I2C_SA */ + case 0x2c: /* I2C_SA */ s->addr[1] = value & 0x3ff; break; - case 0x30: /* I2C_PSC */ + case 0x30: /* I2C_PSC */ s->divider = value; break; - case 0x34: /* I2C_SCLL */ + case 0x34: /* I2C_SCLL */ s->times[0] = value; break; - case 0x38: /* I2C_SCLH */ + case 0x38: /* I2C_SCLH */ s->times[1] = value; break; - case 0x3c: /* I2C_SYSTEST */ + case 0x3c: /* I2C_SYSTEST */ s->test = value & 0xf80f; - if (value & (1 << 11)) /* SBB */ + if (value & (1 << 11)) /* SBB */ if (s->revision >= OMAP2_INTR_REV) { s->stat |= 0x3f; omap_i2c_interrupts_update(s); } - if (value & (1 << 15)) { /* ST_EN */ + if (value & (1 << 15)) { /* ST_EN */ qemu_log_mask(LOG_UNIMP, "%s: System Test not supported\n", __func__); } @@ -413,7 +413,7 @@ static void omap_i2c_writeb(void *opaque, hwaddr addr, int offset = addr & OMAP_MPUI_REG_MASK; switch (offset) { - case 0x1c: /* I2C_DATA */ + case 0x1c: /* I2C_DATA */ if (s->txlen > 2) { /* XXX: remote access (qualifier) error - what's that? */ break; @@ -421,9 +421,9 @@ static void omap_i2c_writeb(void *opaque, hwaddr addr, s->fifo <<= 8; s->txlen += 1; s->fifo |= value & 0xff; - s->stat &= ~(1 << 10); /* XUDF */ + s->stat &= ~(1 << 10); /* XUDF */ if (s->txlen > 2) - s->stat &= ~(1 << 4); /* XRDY */ + s->stat &= ~(1 << 4); /* XRDY */ omap_i2c_fifo_run(s); omap_i2c_interrupts_update(s); break; diff --git a/hw/i386/Kconfig b/hw/i386/Kconfig index d34ce07..5139d23 100644 --- a/hw/i386/Kconfig +++ b/hw/i386/Kconfig @@ -4,12 +4,17 @@ config X86_FW_OVMF config SEV bool select X86_FW_OVMF - depends on KVM + depends on KVM && X86_64 config SGX bool depends on KVM +config TDX + bool + select X86_FW_OVMF + depends on KVM && X86_64 + config PC bool imply APPLESMC @@ -26,6 +31,7 @@ config PC imply QXL imply SEV imply SGX + imply TDX imply TEST_DEVICES imply TPM_CRB imply TPM_TIS_ISA diff --git a/hw/i386/acpi-build.c b/hw/i386/acpi-build.c index f40ad06..423c495 100644 --- a/hw/i386/acpi-build.c +++ b/hw/i386/acpi-build.c @@ -338,405 +338,6 @@ build_facs(GArray *table_data) g_array_append_vals(table_data, reserved, 40); /* Reserved */ } -Aml *aml_pci_device_dsm(void) -{ - Aml *method; - - method = aml_method("_DSM", 4, AML_SERIALIZED); - { - Aml *params = aml_local(0); - Aml *pkg = aml_package(2); - aml_append(pkg, aml_int(0)); - aml_append(pkg, aml_int(0)); - aml_append(method, aml_store(pkg, params)); - aml_append(method, - aml_store(aml_name("BSEL"), aml_index(params, aml_int(0)))); - aml_append(method, - aml_store(aml_name("ASUN"), aml_index(params, aml_int(1)))); - aml_append(method, - aml_return(aml_call5("PDSM", aml_arg(0), aml_arg(1), - aml_arg(2), aml_arg(3), params)) - ); - } - return method; -} - -static void build_append_pci_dsm_func0_common(Aml *ctx, Aml *retvar) -{ - Aml *UUID, *ifctx1; - uint8_t byte_list[1] = { 0 }; /* nothing supported yet */ - - aml_append(ctx, aml_store(aml_buffer(1, byte_list), retvar)); - /* - * PCI Firmware Specification 3.1 - * 4.6. _DSM Definitions for PCI - */ - UUID = aml_touuid("E5C937D0-3553-4D7A-9117-EA4D19C3434D"); - ifctx1 = aml_if(aml_lnot(aml_equal(aml_arg(0), UUID))); - { - /* call is for unsupported UUID, bail out */ - aml_append(ifctx1, aml_return(retvar)); - } - aml_append(ctx, ifctx1); - - ifctx1 = aml_if(aml_lless(aml_arg(1), aml_int(2))); - { - /* call is for unsupported REV, bail out */ - aml_append(ifctx1, aml_return(retvar)); - } - aml_append(ctx, ifctx1); -} - -static Aml *aml_pci_edsm(void) -{ - Aml *method, *ifctx; - Aml *zero = aml_int(0); - Aml *func = aml_arg(2); - Aml *ret = aml_local(0); - Aml *aidx = aml_local(1); - Aml *params = aml_arg(4); - - method = aml_method("EDSM", 5, AML_SERIALIZED); - - /* get supported functions */ - ifctx = aml_if(aml_equal(func, zero)); - { - /* 1: have supported functions */ - /* 7: support for function 7 */ - const uint8_t caps = 1 | BIT(7); - build_append_pci_dsm_func0_common(ifctx, ret); - aml_append(ifctx, aml_store(aml_int(caps), aml_index(ret, zero))); - aml_append(ifctx, aml_return(ret)); - } - aml_append(method, ifctx); - - /* handle specific functions requests */ - /* - * PCI Firmware Specification 3.1 - * 4.6.7. _DSM for Naming a PCI or PCI Express Device Under - * Operating Systems - */ - ifctx = aml_if(aml_equal(func, aml_int(7))); - { - Aml *pkg = aml_package(2); - aml_append(pkg, zero); - /* optional, if not impl. should return null string */ - aml_append(pkg, aml_string("%s", "")); - aml_append(ifctx, aml_store(pkg, ret)); - - /* - * IASL is fine when initializing Package with computational data, - * however it makes guest unhappy /it fails to process such AML/. - * So use runtime assignment to set acpi-index after initializer - * to make OSPM happy. - */ - aml_append(ifctx, - aml_store(aml_derefof(aml_index(params, aml_int(0))), aidx)); - aml_append(ifctx, aml_store(aidx, aml_index(ret, zero))); - aml_append(ifctx, aml_return(ret)); - } - aml_append(method, ifctx); - - return method; -} - -static Aml *aml_pci_static_endpoint_dsm(PCIDevice *pdev) -{ - Aml *method; - - g_assert(pdev->acpi_index != 0); - method = aml_method("_DSM", 4, AML_SERIALIZED); - { - Aml *params = aml_local(0); - Aml *pkg = aml_package(1); - aml_append(pkg, aml_int(pdev->acpi_index)); - aml_append(method, aml_store(pkg, params)); - aml_append(method, - aml_return(aml_call5("EDSM", aml_arg(0), aml_arg(1), - aml_arg(2), aml_arg(3), params)) - ); - } - return method; -} - -static void build_append_pcihp_notify_entry(Aml *method, int slot) -{ - Aml *if_ctx; - int32_t devfn = PCI_DEVFN(slot, 0); - - if_ctx = aml_if(aml_and(aml_arg(0), aml_int(0x1U << slot), NULL)); - aml_append(if_ctx, aml_notify(aml_name("S%.02X", devfn), aml_arg(1))); - aml_append(method, if_ctx); -} - -static bool is_devfn_ignored_generic(const int devfn, const PCIBus *bus) -{ - const PCIDevice *pdev = bus->devices[devfn]; - - if (PCI_FUNC(devfn)) { - if (IS_PCI_BRIDGE(pdev)) { - /* - * Ignore only hotplugged PCI bridges on !0 functions, but - * allow describing cold plugged bridges on all functions - */ - if (DEVICE(pdev)->hotplugged) { - return true; - } - } - } - return false; -} - -static bool is_devfn_ignored_hotplug(const int devfn, const PCIBus *bus) -{ - PCIDevice *pdev = bus->devices[devfn]; - if (pdev) { - return is_devfn_ignored_generic(devfn, bus) || - !DEVICE_GET_CLASS(pdev)->hotpluggable || - /* Cold plugged bridges aren't themselves hot-pluggable */ - (IS_PCI_BRIDGE(pdev) && !DEVICE(pdev)->hotplugged); - } else { /* non populated slots */ - /* - * hotplug is supported only for non-multifunction device - * so generate device description only for function 0 - */ - if (PCI_FUNC(devfn) || - (pci_bus_is_express(bus) && PCI_SLOT(devfn) > 0)) { - return true; - } - } - return false; -} - -void build_append_pcihp_slots(Aml *parent_scope, PCIBus *bus) -{ - int devfn; - Aml *dev, *notify_method = NULL, *method; - QObject *bsel = object_property_get_qobject(OBJECT(bus), - ACPI_PCIHP_PROP_BSEL, NULL); - uint64_t bsel_val = qnum_get_uint(qobject_to(QNum, bsel)); - qobject_unref(bsel); - - aml_append(parent_scope, aml_name_decl("BSEL", aml_int(bsel_val))); - notify_method = aml_method("DVNT", 2, AML_NOTSERIALIZED); - - for (devfn = 0; devfn < ARRAY_SIZE(bus->devices); devfn++) { - int slot = PCI_SLOT(devfn); - int adr = slot << 16 | PCI_FUNC(devfn); - - if (is_devfn_ignored_hotplug(devfn, bus)) { - continue; - } - - if (bus->devices[devfn]) { - dev = aml_scope("S%.02X", devfn); - } else { - dev = aml_device("S%.02X", devfn); - aml_append(dev, aml_name_decl("_ADR", aml_int(adr))); - } - - /* - * Can't declare _SUN here for every device as it changes 'slot' - * enumeration order in linux kernel, so use another variable for it - */ - aml_append(dev, aml_name_decl("ASUN", aml_int(slot))); - aml_append(dev, aml_pci_device_dsm()); - - aml_append(dev, aml_name_decl("_SUN", aml_int(slot))); - /* add _EJ0 to make slot hotpluggable */ - method = aml_method("_EJ0", 1, AML_NOTSERIALIZED); - aml_append(method, - aml_call2("PCEJ", aml_name("BSEL"), aml_name("_SUN")) - ); - aml_append(dev, method); - - build_append_pcihp_notify_entry(notify_method, slot); - - /* device descriptor has been composed, add it into parent context */ - aml_append(parent_scope, dev); - } - aml_append(parent_scope, notify_method); -} - -void build_append_pci_bus_devices(Aml *parent_scope, PCIBus *bus) -{ - int devfn; - Aml *dev; - - for (devfn = 0; devfn < ARRAY_SIZE(bus->devices); devfn++) { - /* ACPI spec: 1.0b: Table 6-2 _ADR Object Bus Types, PCI type */ - int adr = PCI_SLOT(devfn) << 16 | PCI_FUNC(devfn); - PCIDevice *pdev = bus->devices[devfn]; - - if (!pdev || is_devfn_ignored_generic(devfn, bus)) { - continue; - } - - /* start to compose PCI device descriptor */ - dev = aml_device("S%.02X", devfn); - aml_append(dev, aml_name_decl("_ADR", aml_int(adr))); - - call_dev_aml_func(DEVICE(bus->devices[devfn]), dev); - /* add _DSM if device has acpi-index set */ - if (pdev->acpi_index && - !object_property_get_bool(OBJECT(pdev), "hotpluggable", - &error_abort)) { - aml_append(dev, aml_pci_static_endpoint_dsm(pdev)); - } - - /* device descriptor has been composed, add it into parent context */ - aml_append(parent_scope, dev); - } -} - -static bool build_append_notification_callback(Aml *parent_scope, - const PCIBus *bus) -{ - Aml *method; - PCIBus *sec; - QObject *bsel; - int nr_notifiers = 0; - GQueue *pcnt_bus_list = g_queue_new(); - - QLIST_FOREACH(sec, &bus->child, sibling) { - Aml *br_scope = aml_scope("S%.02X", sec->parent_dev->devfn); - if (pci_bus_is_root(sec)) { - continue; - } - nr_notifiers = nr_notifiers + - build_append_notification_callback(br_scope, sec); - /* - * add new child scope to parent - * and keep track of bus that have PCNT, - * bus list is used later to call children PCNTs from this level PCNT - */ - if (nr_notifiers) { - g_queue_push_tail(pcnt_bus_list, sec); - aml_append(parent_scope, br_scope); - } - } - - /* - * Append PCNT method to notify about events on local and child buses. - * ps: hostbridge might not have hotplug (bsel) enabled but might have - * child bridges that do have bsel. - */ - method = aml_method("PCNT", 0, AML_NOTSERIALIZED); - - /* If bus supports hotplug select it and notify about local events */ - bsel = object_property_get_qobject(OBJECT(bus), ACPI_PCIHP_PROP_BSEL, NULL); - if (bsel) { - uint64_t bsel_val = qnum_get_uint(qobject_to(QNum, bsel)); - - aml_append(method, aml_store(aml_int(bsel_val), aml_name("BNUM"))); - aml_append(method, aml_call2("DVNT", aml_name("PCIU"), - aml_int(1))); /* Device Check */ - aml_append(method, aml_call2("DVNT", aml_name("PCID"), - aml_int(3))); /* Eject Request */ - nr_notifiers++; - } - - /* Notify about child bus events in any case */ - while ((sec = g_queue_pop_head(pcnt_bus_list))) { - aml_append(method, aml_name("^S%.02X.PCNT", sec->parent_dev->devfn)); - } - - aml_append(parent_scope, method); - qobject_unref(bsel); - g_queue_free(pcnt_bus_list); - return !!nr_notifiers; -} - -static Aml *aml_pci_pdsm(void) -{ - Aml *method, *ifctx, *ifctx1; - Aml *ret = aml_local(0); - Aml *caps = aml_local(1); - Aml *acpi_index = aml_local(2); - Aml *zero = aml_int(0); - Aml *one = aml_int(1); - Aml *not_supp = aml_int(0xFFFFFFFF); - Aml *func = aml_arg(2); - Aml *params = aml_arg(4); - Aml *bnum = aml_derefof(aml_index(params, aml_int(0))); - Aml *sunum = aml_derefof(aml_index(params, aml_int(1))); - - method = aml_method("PDSM", 5, AML_SERIALIZED); - - /* get supported functions */ - ifctx = aml_if(aml_equal(func, zero)); - { - build_append_pci_dsm_func0_common(ifctx, ret); - - aml_append(ifctx, aml_store(zero, caps)); - aml_append(ifctx, - aml_store(aml_call2("AIDX", bnum, sunum), acpi_index)); - /* - * advertise function 7 if device has acpi-index - * acpi_index values: - * 0: not present (default value) - * FFFFFFFF: not supported (old QEMU without PIDX reg) - * other: device's acpi-index - */ - ifctx1 = aml_if(aml_lnot( - aml_or(aml_equal(acpi_index, zero), - aml_equal(acpi_index, not_supp), NULL) - )); - { - /* have supported functions */ - aml_append(ifctx1, aml_or(caps, one, caps)); - /* support for function 7 */ - aml_append(ifctx1, - aml_or(caps, aml_shiftleft(one, aml_int(7)), caps)); - } - aml_append(ifctx, ifctx1); - - aml_append(ifctx, aml_store(caps, aml_index(ret, zero))); - aml_append(ifctx, aml_return(ret)); - } - aml_append(method, ifctx); - - /* handle specific functions requests */ - /* - * PCI Firmware Specification 3.1 - * 4.6.7. _DSM for Naming a PCI or PCI Express Device Under - * Operating Systems - */ - ifctx = aml_if(aml_equal(func, aml_int(7))); - { - Aml *pkg = aml_package(2); - - aml_append(ifctx, aml_store(aml_call2("AIDX", bnum, sunum), acpi_index)); - aml_append(ifctx, aml_store(pkg, ret)); - /* - * Windows calls func=7 without checking if it's available, - * as workaround Microsoft has suggested to return invalid for func7 - * Package, so return 2 elements package but only initialize elements - * when acpi_index is supported and leave them uninitialized, which - * leads elements to being Uninitialized ObjectType and should trip - * Windows into discarding result as an unexpected and prevent setting - * bogus 'PCI Label' on the device. - */ - ifctx1 = aml_if(aml_lnot(aml_lor( - aml_equal(acpi_index, zero), aml_equal(acpi_index, not_supp) - ))); - { - aml_append(ifctx1, aml_store(acpi_index, aml_index(ret, zero))); - /* - * optional, if not impl. should return null string - */ - aml_append(ifctx1, aml_store(aml_string("%s", ""), - aml_index(ret, one))); - } - aml_append(ifctx, ifctx1); - - aml_append(ifctx, aml_return(ret)); - } - - aml_append(method, ifctx); - return method; -} - /* * build_prt - Define interrupt routing rules * @@ -1227,112 +828,6 @@ static Aml *build_q35_dram_controller(const AcpiMcfgInfo *mcfg) return dev; } -static void build_x86_acpi_pci_hotplug(Aml *table, uint64_t pcihp_addr) -{ - Aml *scope; - Aml *field; - Aml *method; - - scope = aml_scope("_SB.PCI0"); - - aml_append(scope, - aml_operation_region("PCST", AML_SYSTEM_IO, aml_int(pcihp_addr), 0x08)); - field = aml_field("PCST", AML_DWORD_ACC, AML_NOLOCK, AML_WRITE_AS_ZEROS); - aml_append(field, aml_named_field("PCIU", 32)); - aml_append(field, aml_named_field("PCID", 32)); - aml_append(scope, field); - - aml_append(scope, - aml_operation_region("SEJ", AML_SYSTEM_IO, - aml_int(pcihp_addr + ACPI_PCIHP_SEJ_BASE), 0x04)); - field = aml_field("SEJ", AML_DWORD_ACC, AML_NOLOCK, AML_WRITE_AS_ZEROS); - aml_append(field, aml_named_field("B0EJ", 32)); - aml_append(scope, field); - - aml_append(scope, - aml_operation_region("BNMR", AML_SYSTEM_IO, - aml_int(pcihp_addr + ACPI_PCIHP_BNMR_BASE), 0x08)); - field = aml_field("BNMR", AML_DWORD_ACC, AML_NOLOCK, AML_WRITE_AS_ZEROS); - aml_append(field, aml_named_field("BNUM", 32)); - aml_append(field, aml_named_field("PIDX", 32)); - aml_append(scope, field); - - aml_append(scope, aml_mutex("BLCK", 0)); - - method = aml_method("PCEJ", 2, AML_NOTSERIALIZED); - aml_append(method, aml_acquire(aml_name("BLCK"), 0xFFFF)); - aml_append(method, aml_store(aml_arg(0), aml_name("BNUM"))); - aml_append(method, - aml_store(aml_shiftleft(aml_int(1), aml_arg(1)), aml_name("B0EJ"))); - aml_append(method, aml_release(aml_name("BLCK"))); - aml_append(method, aml_return(aml_int(0))); - aml_append(scope, method); - - method = aml_method("AIDX", 2, AML_NOTSERIALIZED); - aml_append(method, aml_acquire(aml_name("BLCK"), 0xFFFF)); - aml_append(method, aml_store(aml_arg(0), aml_name("BNUM"))); - aml_append(method, - aml_store(aml_shiftleft(aml_int(1), aml_arg(1)), aml_name("PIDX"))); - aml_append(method, aml_store(aml_name("PIDX"), aml_local(0))); - aml_append(method, aml_release(aml_name("BLCK"))); - aml_append(method, aml_return(aml_local(0))); - aml_append(scope, method); - - aml_append(scope, aml_pci_pdsm()); - - aml_append(table, scope); -} - -static Aml *build_q35_osc_method(bool enable_native_pcie_hotplug) -{ - Aml *if_ctx; - Aml *if_ctx2; - Aml *else_ctx; - Aml *method; - Aml *a_cwd1 = aml_name("CDW1"); - Aml *a_ctrl = aml_local(0); - - method = aml_method("_OSC", 4, AML_NOTSERIALIZED); - aml_append(method, aml_create_dword_field(aml_arg(3), aml_int(0), "CDW1")); - - if_ctx = aml_if(aml_equal( - aml_arg(0), aml_touuid("33DB4D5B-1FF7-401C-9657-7441C03DD766"))); - aml_append(if_ctx, aml_create_dword_field(aml_arg(3), aml_int(4), "CDW2")); - aml_append(if_ctx, aml_create_dword_field(aml_arg(3), aml_int(8), "CDW3")); - - aml_append(if_ctx, aml_store(aml_name("CDW3"), a_ctrl)); - - /* - * Always allow native PME, AER (no dependencies) - * Allow SHPC (PCI bridges can have SHPC controller) - * Disable PCIe Native Hot-plug if ACPI PCI Hot-plug is enabled. - */ - aml_append(if_ctx, aml_and(a_ctrl, - aml_int(0x1E | (enable_native_pcie_hotplug ? 0x1 : 0x0)), a_ctrl)); - - if_ctx2 = aml_if(aml_lnot(aml_equal(aml_arg(1), aml_int(1)))); - /* Unknown revision */ - aml_append(if_ctx2, aml_or(a_cwd1, aml_int(0x08), a_cwd1)); - aml_append(if_ctx, if_ctx2); - - if_ctx2 = aml_if(aml_lnot(aml_equal(aml_name("CDW3"), a_ctrl))); - /* Capabilities bits were masked */ - aml_append(if_ctx2, aml_or(a_cwd1, aml_int(0x10), a_cwd1)); - aml_append(if_ctx, if_ctx2); - - /* Update DWORD3 in the buffer */ - aml_append(if_ctx, aml_store(a_ctrl, aml_name("CDW3"))); - aml_append(method, if_ctx); - - else_ctx = aml_else(); - /* Unrecognized UUID */ - aml_append(else_ctx, aml_or(a_cwd1, aml_int(4), a_cwd1)); - aml_append(method, else_ctx); - - aml_append(method, aml_return(aml_arg(3))); - return method; -} - static void build_acpi0017(Aml *table) { Aml *dev, *scope, *method; @@ -1389,12 +884,12 @@ build_dsdt(GArray *table_data, BIOSLinker *linker, dev = aml_device("PCI0"); aml_append(dev, aml_name_decl("_HID", aml_eisaid("PNP0A03"))); aml_append(dev, aml_name_decl("_UID", aml_int(pcmc->pci_root_uid))); - aml_append(dev, aml_pci_edsm()); + aml_append(dev, build_pci_bridge_edsm()); aml_append(sb_scope, dev); aml_append(dsdt, sb_scope); if (pm->pcihp_bridge_en || pm->pcihp_root_en) { - build_x86_acpi_pci_hotplug(dsdt, pm->pcihp_io_base); + build_acpi_pci_hotplug(dsdt, AML_SYSTEM_IO, pm->pcihp_io_base); } build_piix4_pci0_int(dsdt); } else if (q35) { @@ -1403,8 +898,8 @@ build_dsdt(GArray *table_data, BIOSLinker *linker, aml_append(dev, aml_name_decl("_HID", aml_eisaid("PNP0A08"))); aml_append(dev, aml_name_decl("_CID", aml_eisaid("PNP0A03"))); aml_append(dev, aml_name_decl("_UID", aml_int(pcmc->pci_root_uid))); - aml_append(dev, build_q35_osc_method(!pm->pcihp_bridge_en)); - aml_append(dev, aml_pci_edsm()); + aml_append(dev, build_pci_host_bridge_osc_method(!pm->pcihp_bridge_en)); + aml_append(dev, build_pci_bridge_edsm()); aml_append(sb_scope, dev); if (mcfg_valid) { aml_append(sb_scope, build_q35_dram_controller(&mcfg)); @@ -1438,7 +933,7 @@ build_dsdt(GArray *table_data, BIOSLinker *linker, aml_append(dsdt, sb_scope); if (pm->pcihp_bridge_en) { - build_x86_acpi_pci_hotplug(dsdt, pm->pcihp_io_base); + build_acpi_pci_hotplug(dsdt, AML_SYSTEM_IO, pm->pcihp_io_base); } build_q35_pci0_int(dsdt); } @@ -1525,7 +1020,7 @@ build_dsdt(GArray *table_data, BIOSLinker *linker, aml_append(dev, aml_name_decl("_CID", aml_eisaid("PNP0A03"))); /* Expander bridges do not have ACPI PCI Hot-plug enabled */ - aml_append(dev, build_q35_osc_method(true)); + aml_append(dev, build_pci_host_bridge_osc_method(true)); } else { aml_append(dev, aml_name_decl("_HID", aml_eisaid("PNP0A03"))); } @@ -1654,19 +1149,8 @@ build_dsdt(GArray *table_data, BIOSLinker *linker, /* reserve PCIHP resources */ if (pm->pcihp_io_len && (pm->pcihp_bridge_en || pm->pcihp_root_en)) { - dev = aml_device("PHPR"); - aml_append(dev, aml_name_decl("_HID", aml_string("PNP0A06"))); - aml_append(dev, - aml_name_decl("_UID", aml_string("PCI Hotplug resources"))); - /* device present, functioning, decoding, not shown in UI */ - aml_append(dev, aml_name_decl("_STA", aml_int(0xB))); - crs = aml_resource_template(); - aml_append(crs, - aml_io(AML_DECODE16, pm->pcihp_io_base, pm->pcihp_io_base, 1, - pm->pcihp_io_len) - ); - aml_append(dev, aml_name_decl("_CRS", crs)); - aml_append(scope, dev); + build_append_pcihp_resources(scope, + pm->pcihp_io_base, pm->pcihp_io_len); } aml_append(dsdt, scope); @@ -2333,10 +1817,10 @@ build_amd_iommu(GArray *table_data, BIOSLinker *linker, const char *oem_id, build_append_int_noprefix(table_data, ivhd_blob->len + 24, 2); /* DeviceID */ build_append_int_noprefix(table_data, - object_property_get_int(OBJECT(&s->pci), "addr", + object_property_get_int(OBJECT(s->pci), "addr", &error_abort), 2); /* Capability offset */ - build_append_int_noprefix(table_data, s->pci.capab_offset, 2); + build_append_int_noprefix(table_data, s->pci->capab_offset, 2); /* IOMMU base address */ build_append_int_noprefix(table_data, s->mr_mmio.addr, 8); /* PCI Segment Group */ @@ -2368,10 +1852,10 @@ build_amd_iommu(GArray *table_data, BIOSLinker *linker, const char *oem_id, build_append_int_noprefix(table_data, ivhd_blob->len + 40, 2); /* DeviceID */ build_append_int_noprefix(table_data, - object_property_get_int(OBJECT(&s->pci), "addr", + object_property_get_int(OBJECT(s->pci), "addr", &error_abort), 2); /* Capability offset */ - build_append_int_noprefix(table_data, s->pci.capab_offset, 2); + build_append_int_noprefix(table_data, s->pci->capab_offset, 2); /* IOMMU base address */ build_append_int_noprefix(table_data, s->mr_mmio.addr, 8); /* PCI Segment Group */ diff --git a/hw/i386/acpi-build.h b/hw/i386/acpi-build.h index 275ec05..8ba3c33 100644 --- a/hw/i386/acpi-build.h +++ b/hw/i386/acpi-build.h @@ -5,10 +5,6 @@ extern const struct AcpiGenericAddress x86_nvdimm_acpi_dsmio; -/* PCI Hot-plug registers' base. See docs/specs/acpi_pci_hotplug.rst */ -#define ACPI_PCIHP_SEJ_BASE 0x8 -#define ACPI_PCIHP_BNMR_BASE 0x10 - void acpi_setup(void); Object *acpi_get_i386_pci_host(void); diff --git a/hw/i386/amd_iommu.c b/hw/i386/amd_iommu.c index 2cf7e24..26be69b 100644 --- a/hw/i386/amd_iommu.c +++ b/hw/i386/amd_iommu.c @@ -123,8 +123,13 @@ static void amdvi_writew(AMDVIState *s, hwaddr addr, uint16_t val) uint16_t romask = lduw_le_p(&s->romask[addr]); uint16_t w1cmask = lduw_le_p(&s->w1cmask[addr]); uint16_t oldval = lduw_le_p(&s->mmior[addr]); + + uint16_t oldval_preserved = oldval & (romask | w1cmask); + uint16_t newval_write = val & ~romask; + uint16_t newval_w1c_set = val & w1cmask; + stw_le_p(&s->mmior[addr], - ((oldval & romask) | (val & ~romask)) & ~(val & w1cmask)); + (oldval_preserved | newval_write) & ~newval_w1c_set); } static void amdvi_writel(AMDVIState *s, hwaddr addr, uint32_t val) @@ -132,23 +137,33 @@ static void amdvi_writel(AMDVIState *s, hwaddr addr, uint32_t val) uint32_t romask = ldl_le_p(&s->romask[addr]); uint32_t w1cmask = ldl_le_p(&s->w1cmask[addr]); uint32_t oldval = ldl_le_p(&s->mmior[addr]); + + uint32_t oldval_preserved = oldval & (romask | w1cmask); + uint32_t newval_write = val & ~romask; + uint32_t newval_w1c_set = val & w1cmask; + stl_le_p(&s->mmior[addr], - ((oldval & romask) | (val & ~romask)) & ~(val & w1cmask)); + (oldval_preserved | newval_write) & ~newval_w1c_set); } static void amdvi_writeq(AMDVIState *s, hwaddr addr, uint64_t val) { uint64_t romask = ldq_le_p(&s->romask[addr]); uint64_t w1cmask = ldq_le_p(&s->w1cmask[addr]); - uint32_t oldval = ldq_le_p(&s->mmior[addr]); + uint64_t oldval = ldq_le_p(&s->mmior[addr]); + + uint64_t oldval_preserved = oldval & (romask | w1cmask); + uint64_t newval_write = val & ~romask; + uint64_t newval_w1c_set = val & w1cmask; + stq_le_p(&s->mmior[addr], - ((oldval & romask) | (val & ~romask)) & ~(val & w1cmask)); + (oldval_preserved | newval_write) & ~newval_w1c_set); } -/* OR a 64-bit register with a 64-bit value */ +/* AND a 64-bit register with a 64-bit value */ static bool amdvi_test_mask(AMDVIState *s, hwaddr addr, uint64_t val) { - return amdvi_readq(s, addr) | val; + return amdvi_readq(s, addr) & val; } /* OR a 64-bit register with a 64-bit value storing result in the register */ @@ -167,29 +182,41 @@ static void amdvi_generate_msi_interrupt(AMDVIState *s) { MSIMessage msg = {}; MemTxAttrs attrs = { - .requester_id = pci_requester_id(&s->pci.dev) + .requester_id = pci_requester_id(&s->pci->dev) }; - if (msi_enabled(&s->pci.dev)) { - msg = msi_get_message(&s->pci.dev, 0); + if (msi_enabled(&s->pci->dev)) { + msg = msi_get_message(&s->pci->dev, 0); address_space_stl_le(&address_space_memory, msg.address, msg.data, attrs, NULL); } } +static uint32_t get_next_eventlog_entry(AMDVIState *s) +{ + uint32_t evtlog_size = s->evtlog_len * AMDVI_EVENT_LEN; + return (s->evtlog_tail + AMDVI_EVENT_LEN) % evtlog_size; +} + static void amdvi_log_event(AMDVIState *s, uint64_t *evt) { + uint32_t evtlog_tail_next; + /* event logging not enabled */ if (!s->evtlog_enabled || amdvi_test_mask(s, AMDVI_MMIO_STATUS, AMDVI_MMIO_STATUS_EVT_OVF)) { return; } + evtlog_tail_next = get_next_eventlog_entry(s); + /* event log buffer full */ - if (s->evtlog_tail >= s->evtlog_len) { - amdvi_assign_orq(s, AMDVI_MMIO_STATUS, AMDVI_MMIO_STATUS_EVT_OVF); - /* generate interrupt */ - amdvi_generate_msi_interrupt(s); + if (evtlog_tail_next == s->evtlog_head) { + /* generate overflow interrupt */ + if (s->evtlog_intr) { + amdvi_assign_orq(s, AMDVI_MMIO_STATUS, AMDVI_MMIO_STATUS_EVT_OVF); + amdvi_generate_msi_interrupt(s); + } return; } @@ -198,9 +225,13 @@ static void amdvi_log_event(AMDVIState *s, uint64_t *evt) trace_amdvi_evntlog_fail(s->evtlog, s->evtlog_tail); } - s->evtlog_tail += AMDVI_EVENT_LEN; - amdvi_assign_orq(s, AMDVI_MMIO_STATUS, AMDVI_MMIO_STATUS_COMP_INT); - amdvi_generate_msi_interrupt(s); + s->evtlog_tail = evtlog_tail_next; + amdvi_writeq_raw(s, AMDVI_MMIO_EVENT_TAIL, s->evtlog_tail); + + if (s->evtlog_intr) { + amdvi_assign_orq(s, AMDVI_MMIO_STATUS, AMDVI_MMIO_STATUS_EVENT_INT); + amdvi_generate_msi_interrupt(s); + } } static void amdvi_setevent_bits(uint64_t *buffer, uint64_t value, int start, @@ -239,7 +270,7 @@ static void amdvi_page_fault(AMDVIState *s, uint16_t devid, info |= AMDVI_EVENT_IOPF_I | AMDVI_EVENT_IOPF; amdvi_encode_event(evt, devid, addr, info); amdvi_log_event(s, evt); - pci_word_test_and_set_mask(s->pci.dev.config + PCI_STATUS, + pci_word_test_and_set_mask(s->pci->dev.config + PCI_STATUS, PCI_STATUS_SIG_TARGET_ABORT); } /* @@ -256,7 +287,7 @@ static void amdvi_log_devtab_error(AMDVIState *s, uint16_t devid, amdvi_encode_event(evt, devid, devtab, info); amdvi_log_event(s, evt); - pci_word_test_and_set_mask(s->pci.dev.config + PCI_STATUS, + pci_word_test_and_set_mask(s->pci->dev.config + PCI_STATUS, PCI_STATUS_SIG_TARGET_ABORT); } /* log an event trying to access command buffer @@ -269,7 +300,7 @@ static void amdvi_log_command_error(AMDVIState *s, hwaddr addr) amdvi_encode_event(evt, 0, addr, info); amdvi_log_event(s, evt); - pci_word_test_and_set_mask(s->pci.dev.config + PCI_STATUS, + pci_word_test_and_set_mask(s->pci->dev.config + PCI_STATUS, PCI_STATUS_SIG_TARGET_ABORT); } /* log an illegal command event @@ -310,7 +341,7 @@ static void amdvi_log_pagetab_error(AMDVIState *s, uint16_t devid, info |= AMDVI_EVENT_PAGE_TAB_HW_ERROR; amdvi_encode_event(evt, devid, addr, info); amdvi_log_event(s, evt); - pci_word_test_and_set_mask(s->pci.dev.config + PCI_STATUS, + pci_word_test_and_set_mask(s->pci->dev.config + PCI_STATUS, PCI_STATUS_SIG_TARGET_ABORT); } @@ -508,7 +539,7 @@ static void amdvi_inval_inttable(AMDVIState *s, uint64_t *cmd) static void iommu_inval_iotlb(AMDVIState *s, uint64_t *cmd) { - uint16_t devid = extract64(cmd[0], 0, 16); + uint16_t devid = cpu_to_le16(extract64(cmd[0], 0, 16)); if (extract64(cmd[1], 1, 1) || extract64(cmd[1], 3, 1) || extract64(cmd[1], 6, 6)) { amdvi_log_illegalcom_error(s, extract64(cmd[0], 60, 4), @@ -521,7 +552,7 @@ static void iommu_inval_iotlb(AMDVIState *s, uint64_t *cmd) &devid); } else { amdvi_iotlb_remove_page(s, cpu_to_le64(extract64(cmd[1], 12, 52)) << 12, - cpu_to_le16(extract64(cmd[1], 0, 16))); + devid); } trace_amdvi_iotlb_inval(); } @@ -592,18 +623,31 @@ static void amdvi_cmdbuf_run(AMDVIState *s) } } -static void amdvi_mmio_trace(hwaddr addr, unsigned size) +static inline uint8_t amdvi_mmio_get_index(hwaddr addr) { uint8_t index = (addr & ~0x2000) / 8; if ((addr & 0x2000)) { /* high table */ index = index >= AMDVI_MMIO_REGS_HIGH ? AMDVI_MMIO_REGS_HIGH : index; - trace_amdvi_mmio_read(amdvi_mmio_high[index], addr, size, addr & ~0x07); } else { index = index >= AMDVI_MMIO_REGS_LOW ? AMDVI_MMIO_REGS_LOW : index; - trace_amdvi_mmio_read(amdvi_mmio_low[index], addr, size, addr & ~0x07); } + + return index; +} + +static void amdvi_mmio_trace_read(hwaddr addr, unsigned size) +{ + uint8_t index = amdvi_mmio_get_index(addr); + trace_amdvi_mmio_read(amdvi_mmio_low[index], addr, size, addr & ~0x07); +} + +static void amdvi_mmio_trace_write(hwaddr addr, unsigned size, uint64_t val) +{ + uint8_t index = amdvi_mmio_get_index(addr); + trace_amdvi_mmio_write(amdvi_mmio_low[index], addr, size, val, + addr & ~0x07); } static uint64_t amdvi_mmio_read(void *opaque, hwaddr addr, unsigned size) @@ -623,7 +667,7 @@ static uint64_t amdvi_mmio_read(void *opaque, hwaddr addr, unsigned size) } else if (size == 8) { val = amdvi_readq(s, addr); } - amdvi_mmio_trace(addr, size); + amdvi_mmio_trace_read(addr, size); return val; } @@ -633,7 +677,6 @@ static void amdvi_handle_control_write(AMDVIState *s) unsigned long control = amdvi_readq(s, AMDVI_MMIO_CONTROL); s->enabled = !!(control & AMDVI_MMIO_CONTROL_AMDVIEN); - s->ats_enabled = !!(control & AMDVI_MMIO_CONTROL_HTTUNEN); s->evtlog_enabled = s->enabled && !!(control & AMDVI_MMIO_CONTROL_EVENTLOGEN); @@ -665,8 +708,8 @@ static inline void amdvi_handle_devtab_write(AMDVIState *s) uint64_t val = amdvi_readq(s, AMDVI_MMIO_DEVICE_TABLE); s->devtab = (val & AMDVI_MMIO_DEVTAB_BASE_MASK); - /* set device table length */ - s->devtab_len = ((val & AMDVI_MMIO_DEVTAB_SIZE_MASK) + 1 * + /* set device table length (i.e. number of entries table can hold) */ + s->devtab_len = (((val & AMDVI_MMIO_DEVTAB_SIZE_MASK) + 1) * (AMDVI_MMIO_DEVTAB_SIZE_UNIT / AMDVI_MMIO_DEVTAB_ENTRY_SIZE)); } @@ -704,9 +747,19 @@ static inline void amdvi_handle_excllim_write(AMDVIState *s) static inline void amdvi_handle_evtbase_write(AMDVIState *s) { uint64_t val = amdvi_readq(s, AMDVI_MMIO_EVENT_BASE); + + if (amdvi_readq(s, AMDVI_MMIO_STATUS) & AMDVI_MMIO_STATUS_EVENT_INT) + /* Do not reset if eventlog interrupt bit is set*/ + return; + s->evtlog = val & AMDVI_MMIO_EVTLOG_BASE_MASK; s->evtlog_len = 1UL << (amdvi_readq(s, AMDVI_MMIO_EVTLOG_SIZE_BYTE) & AMDVI_MMIO_EVTLOG_SIZE_MASK); + + /* clear tail and head pointer to 0 when event base is updated */ + s->evtlog_tail = s->evtlog_head = 0; + amdvi_writeq_raw(s, AMDVI_MMIO_EVENT_HEAD, s->evtlog_head); + amdvi_writeq_raw(s, AMDVI_MMIO_EVENT_TAIL, s->evtlog_tail); } static inline void amdvi_handle_evttail_write(AMDVIState *s) @@ -770,7 +823,7 @@ static void amdvi_mmio_write(void *opaque, hwaddr addr, uint64_t val, return; } - amdvi_mmio_trace(addr, size); + amdvi_mmio_trace_write(addr, size, val); switch (addr & ~0x07) { case AMDVI_MMIO_CONTROL: amdvi_mmio_reg_write(s, size, val, addr); @@ -835,6 +888,9 @@ static void amdvi_mmio_write(void *opaque, hwaddr addr, uint64_t val, amdvi_mmio_reg_write(s, size, val, addr); amdvi_handle_pprtail_write(s); break; + case AMDVI_MMIO_STATUS: + amdvi_mmio_reg_write(s, size, val, addr); + break; } } @@ -848,9 +904,10 @@ static inline uint64_t amdvi_get_perms(uint64_t entry) static bool amdvi_validate_dte(AMDVIState *s, uint16_t devid, uint64_t *dte) { - if ((dte[0] & AMDVI_DTE_LOWER_QUAD_RESERVED) - || (dte[1] & AMDVI_DTE_MIDDLE_QUAD_RESERVED) - || (dte[2] & AMDVI_DTE_UPPER_QUAD_RESERVED) || dte[3]) { + if ((dte[0] & AMDVI_DTE_QUAD0_RESERVED) || + (dte[1] & AMDVI_DTE_QUAD1_RESERVED) || + (dte[2] & AMDVI_DTE_QUAD2_RESERVED) || + (dte[3] & AMDVI_DTE_QUAD3_RESERVED)) { amdvi_log_illegaldevtab_error(s, devid, s->devtab + devid * AMDVI_DEVTAB_ENTRY_SIZE, 0); @@ -1426,7 +1483,6 @@ static AddressSpace *amdvi_host_dma_iommu(PCIBus *bus, void *opaque, int devfn) AMDVIState *s = opaque; AMDVIAddressSpace **iommu_as, *amdvi_dev_as; int bus_num = pci_bus_num(bus); - X86IOMMUState *x86_iommu = X86_IOMMU_DEVICE(s); iommu_as = s->address_spaces[bus_num]; @@ -1486,15 +1542,8 @@ static AddressSpace *amdvi_host_dma_iommu(PCIBus *bus, void *opaque, int devfn) AMDVI_INT_ADDR_FIRST, &amdvi_dev_as->iommu_ir, 1); - if (!x86_iommu->pt_supported) { - memory_region_set_enabled(&amdvi_dev_as->iommu_nodma, false); - memory_region_set_enabled(MEMORY_REGION(&amdvi_dev_as->iommu), - true); - } else { - memory_region_set_enabled(MEMORY_REGION(&amdvi_dev_as->iommu), - false); - memory_region_set_enabled(&amdvi_dev_as->iommu_nodma, true); - } + memory_region_set_enabled(&amdvi_dev_as->iommu_nodma, false); + memory_region_set_enabled(MEMORY_REGION(&amdvi_dev_as->iommu), true); } return &iommu_as[devfn]->as; } @@ -1549,7 +1598,6 @@ static void amdvi_init(AMDVIState *s) s->excl_allow = false; s->mmio_enabled = false; s->enabled = false; - s->ats_enabled = false; s->cmdbuf_enabled = false; /* reset MMIO */ @@ -1607,28 +1655,92 @@ static void amdvi_sysbus_reset(DeviceState *dev) { AMDVIState *s = AMD_IOMMU_DEVICE(dev); - msi_reset(&s->pci.dev); + msi_reset(&s->pci->dev); amdvi_init(s); } +static const VMStateDescription vmstate_amdvi_sysbus_migratable = { + .name = "amd-iommu", + .version_id = 1, + .minimum_version_id = 1, + .priority = MIG_PRI_IOMMU, + .fields = (VMStateField[]) { + /* Updated in amdvi_handle_control_write() */ + VMSTATE_BOOL(enabled, AMDVIState), + VMSTATE_BOOL(ga_enabled, AMDVIState), + /* bool ats_enabled is obsolete */ + VMSTATE_UNUSED(1), /* was ats_enabled */ + VMSTATE_BOOL(cmdbuf_enabled, AMDVIState), + VMSTATE_BOOL(completion_wait_intr, AMDVIState), + VMSTATE_BOOL(evtlog_enabled, AMDVIState), + VMSTATE_BOOL(evtlog_intr, AMDVIState), + /* Updated in amdvi_handle_devtab_write() */ + VMSTATE_UINT64(devtab, AMDVIState), + VMSTATE_UINT64(devtab_len, AMDVIState), + /* Updated in amdvi_handle_cmdbase_write() */ + VMSTATE_UINT64(cmdbuf, AMDVIState), + VMSTATE_UINT64(cmdbuf_len, AMDVIState), + /* Updated in amdvi_handle_cmdhead_write() */ + VMSTATE_UINT32(cmdbuf_head, AMDVIState), + /* Updated in amdvi_handle_cmdtail_write() */ + VMSTATE_UINT32(cmdbuf_tail, AMDVIState), + /* Updated in amdvi_handle_evtbase_write() */ + VMSTATE_UINT64(evtlog, AMDVIState), + VMSTATE_UINT32(evtlog_len, AMDVIState), + /* Updated in amdvi_handle_evthead_write() */ + VMSTATE_UINT32(evtlog_head, AMDVIState), + /* Updated in amdvi_handle_evttail_write() */ + VMSTATE_UINT32(evtlog_tail, AMDVIState), + /* Updated in amdvi_handle_pprbase_write() */ + VMSTATE_UINT64(ppr_log, AMDVIState), + VMSTATE_UINT32(pprlog_len, AMDVIState), + /* Updated in amdvi_handle_pprhead_write() */ + VMSTATE_UINT32(pprlog_head, AMDVIState), + /* Updated in amdvi_handle_tailhead_write() */ + VMSTATE_UINT32(pprlog_tail, AMDVIState), + /* MMIO registers */ + VMSTATE_UINT8_ARRAY(mmior, AMDVIState, AMDVI_MMIO_SIZE), + VMSTATE_UINT8_ARRAY(romask, AMDVIState, AMDVI_MMIO_SIZE), + VMSTATE_UINT8_ARRAY(w1cmask, AMDVIState, AMDVI_MMIO_SIZE), + VMSTATE_END_OF_LIST() + } +}; + static void amdvi_sysbus_realize(DeviceState *dev, Error **errp) { + DeviceClass *dc = (DeviceClass *) object_get_class(OBJECT(dev)); AMDVIState *s = AMD_IOMMU_DEVICE(dev); MachineState *ms = MACHINE(qdev_get_machine()); PCMachineState *pcms = PC_MACHINE(ms); X86MachineState *x86ms = X86_MACHINE(ms); PCIBus *bus = pcms->pcibus; - s->iotlb = g_hash_table_new_full(amdvi_uint64_hash, - amdvi_uint64_equal, g_free, g_free); + if (s->pci_id) { + PCIDevice *pdev = NULL; + int ret = pci_qdev_find_device(s->pci_id, &pdev); - /* This device should take care of IOMMU PCI properties */ - if (!qdev_realize(DEVICE(&s->pci), &bus->qbus, errp)) { - return; + if (ret) { + error_report("Cannot find PCI device '%s'", s->pci_id); + return; + } + + if (!object_dynamic_cast(OBJECT(pdev), TYPE_AMD_IOMMU_PCI)) { + error_report("Device '%s' must be an AMDVI-PCI device type", s->pci_id); + return; + } + + s->pci = AMD_IOMMU_PCI(pdev); + dc->vmsd = &vmstate_amdvi_sysbus_migratable; + } else { + s->pci = AMD_IOMMU_PCI(object_new(TYPE_AMD_IOMMU_PCI)); + /* This device should take care of IOMMU PCI properties */ + if (!qdev_realize(DEVICE(s->pci), &bus->qbus, errp)) { + return; + } } - /* Pseudo address space under root PCI bus. */ - x86ms->ioapic_as = amdvi_host_dma_iommu(bus, s, AMDVI_IOAPIC_SB_DEVID); + s->iotlb = g_hash_table_new_full(amdvi_uint64_hash, + amdvi_uint64_equal, g_free, g_free); /* set up MMIO */ memory_region_init_io(&s->mr_mmio, OBJECT(s), &mmio_mem_ops, s, @@ -1652,17 +1764,29 @@ static void amdvi_sysbus_realize(DeviceState *dev, Error **errp) memory_region_add_subregion_overlap(&s->mr_sys, AMDVI_INT_ADDR_FIRST, &s->mr_ir, 1); + /* Pseudo address space under root PCI bus. */ + x86ms->ioapic_as = amdvi_host_dma_iommu(bus, s, AMDVI_IOAPIC_SB_DEVID); + if (kvm_enabled() && x86ms->apic_id_limit > 255 && !s->xtsup) { error_report("AMD IOMMU with x2APIC configuration requires xtsup=on"); exit(EXIT_FAILURE); } + if (s->xtsup) { + if (kvm_irqchip_is_split() && !kvm_enable_x2apic()) { + error_report("AMD IOMMU xtsup=on requires x2APIC support on " + "the KVM side"); + exit(EXIT_FAILURE); + } + } + pci_setup_iommu(bus, &amdvi_iommu_ops, s); amdvi_init(s); } static const Property amdvi_properties[] = { DEFINE_PROP_BOOL("xtsup", AMDVIState, xtsup, false), + DEFINE_PROP_STRING("pci-id", AMDVIState, pci_id), }; static const VMStateDescription vmstate_amdvi_sysbus = { @@ -1670,13 +1794,6 @@ static const VMStateDescription vmstate_amdvi_sysbus = { .unmigratable = 1 }; -static void amdvi_sysbus_instance_init(Object *klass) -{ - AMDVIState *s = AMD_IOMMU_DEVICE(klass); - - object_initialize(&s->pci, sizeof(s->pci), TYPE_AMD_IOMMU_PCI); -} - static void amdvi_sysbus_class_init(ObjectClass *klass, const void *data) { DeviceClass *dc = DEVICE_CLASS(klass); @@ -1696,7 +1813,6 @@ static const TypeInfo amdvi_sysbus = { .name = TYPE_AMD_IOMMU_DEVICE, .parent = TYPE_X86_IOMMU_DEVICE, .instance_size = sizeof(AMDVIState), - .instance_init = amdvi_sysbus_instance_init, .class_init = amdvi_sysbus_class_init }; diff --git a/hw/i386/amd_iommu.h b/hw/i386/amd_iommu.h index 2812513..2476296 100644 --- a/hw/i386/amd_iommu.h +++ b/hw/i386/amd_iommu.h @@ -25,6 +25,8 @@ #include "hw/i386/x86-iommu.h" #include "qom/object.h" +#define GENMASK64(h, l) (((~0ULL) >> (63 - (h) + (l))) << (l)) + /* Capability registers */ #define AMDVI_CAPAB_BAR_LOW 0x04 #define AMDVI_CAPAB_BAR_HIGH 0x08 @@ -66,34 +68,34 @@ #define AMDVI_MMIO_SIZE 0x4000 -#define AMDVI_MMIO_DEVTAB_SIZE_MASK ((1ULL << 12) - 1) -#define AMDVI_MMIO_DEVTAB_BASE_MASK (((1ULL << 52) - 1) & ~ \ - AMDVI_MMIO_DEVTAB_SIZE_MASK) +#define AMDVI_MMIO_DEVTAB_SIZE_MASK GENMASK64(8, 0) +#define AMDVI_MMIO_DEVTAB_BASE_MASK GENMASK64(51, 12) + #define AMDVI_MMIO_DEVTAB_ENTRY_SIZE 32 #define AMDVI_MMIO_DEVTAB_SIZE_UNIT 4096 /* some of this are similar but just for readability */ #define AMDVI_MMIO_CMDBUF_SIZE_BYTE (AMDVI_MMIO_COMMAND_BASE + 7) #define AMDVI_MMIO_CMDBUF_SIZE_MASK 0x0f -#define AMDVI_MMIO_CMDBUF_BASE_MASK AMDVI_MMIO_DEVTAB_BASE_MASK -#define AMDVI_MMIO_CMDBUF_HEAD_MASK (((1ULL << 19) - 1) & ~0x0f) -#define AMDVI_MMIO_CMDBUF_TAIL_MASK AMDVI_MMIO_EVTLOG_HEAD_MASK +#define AMDVI_MMIO_CMDBUF_BASE_MASK GENMASK64(51, 12) +#define AMDVI_MMIO_CMDBUF_HEAD_MASK GENMASK64(18, 4) +#define AMDVI_MMIO_CMDBUF_TAIL_MASK GENMASK64(18, 4) #define AMDVI_MMIO_EVTLOG_SIZE_BYTE (AMDVI_MMIO_EVENT_BASE + 7) -#define AMDVI_MMIO_EVTLOG_SIZE_MASK AMDVI_MMIO_CMDBUF_SIZE_MASK -#define AMDVI_MMIO_EVTLOG_BASE_MASK AMDVI_MMIO_CMDBUF_BASE_MASK -#define AMDVI_MMIO_EVTLOG_HEAD_MASK (((1ULL << 19) - 1) & ~0x0f) -#define AMDVI_MMIO_EVTLOG_TAIL_MASK AMDVI_MMIO_EVTLOG_HEAD_MASK +#define AMDVI_MMIO_EVTLOG_SIZE_MASK 0x0f +#define AMDVI_MMIO_EVTLOG_BASE_MASK GENMASK64(51, 12) +#define AMDVI_MMIO_EVTLOG_HEAD_MASK GENMASK64(18, 4) +#define AMDVI_MMIO_EVTLOG_TAIL_MASK GENMASK64(18, 4) -#define AMDVI_MMIO_PPRLOG_SIZE_BYTE (AMDVI_MMIO_EVENT_BASE + 7) -#define AMDVI_MMIO_PPRLOG_HEAD_MASK AMDVI_MMIO_EVTLOG_HEAD_MASK -#define AMDVI_MMIO_PPRLOG_TAIL_MASK AMDVI_MMIO_EVTLOG_HEAD_MASK -#define AMDVI_MMIO_PPRLOG_BASE_MASK AMDVI_MMIO_EVTLOG_BASE_MASK -#define AMDVI_MMIO_PPRLOG_SIZE_MASK AMDVI_MMIO_EVTLOG_SIZE_MASK +#define AMDVI_MMIO_PPRLOG_SIZE_BYTE (AMDVI_MMIO_PPR_BASE + 7) +#define AMDVI_MMIO_PPRLOG_SIZE_MASK 0x0f +#define AMDVI_MMIO_PPRLOG_BASE_MASK GENMASK64(51, 12) +#define AMDVI_MMIO_PPRLOG_HEAD_MASK GENMASK64(18, 4) +#define AMDVI_MMIO_PPRLOG_TAIL_MASK GENMASK64(18, 4) #define AMDVI_MMIO_EXCL_ENABLED_MASK (1ULL << 0) #define AMDVI_MMIO_EXCL_ALLOW_MASK (1ULL << 1) -#define AMDVI_MMIO_EXCL_LIMIT_MASK AMDVI_MMIO_DEVTAB_BASE_MASK +#define AMDVI_MMIO_EXCL_LIMIT_MASK GENMASK64(51, 12) #define AMDVI_MMIO_EXCL_LIMIT_LOW 0xfff /* mmio control register flags */ @@ -109,6 +111,7 @@ #define AMDVI_MMIO_STATUS_CMDBUF_RUN (1 << 4) #define AMDVI_MMIO_STATUS_EVT_RUN (1 << 3) #define AMDVI_MMIO_STATUS_COMP_INT (1 << 2) +#define AMDVI_MMIO_STATUS_EVENT_INT (1 << 1) #define AMDVI_MMIO_STATUS_EVT_OVF (1 << 0) #define AMDVI_CMDBUF_ID_BYTE 0x07 @@ -130,14 +133,14 @@ #define AMDVI_DEV_TRANSLATION_VALID (1ULL << 1) #define AMDVI_DEV_MODE_MASK 0x7 #define AMDVI_DEV_MODE_RSHIFT 9 -#define AMDVI_DEV_PT_ROOT_MASK 0xffffffffff000 +#define AMDVI_DEV_PT_ROOT_MASK GENMASK64(51, 12) #define AMDVI_DEV_PT_ROOT_RSHIFT 12 #define AMDVI_DEV_PERM_SHIFT 61 #define AMDVI_DEV_PERM_READ (1ULL << 61) #define AMDVI_DEV_PERM_WRITE (1ULL << 62) /* Device table entry bits 64:127 */ -#define AMDVI_DEV_DOMID_ID_MASK ((1ULL << 16) - 1) +#define AMDVI_DEV_DOMID_ID_MASK GENMASK64(15, 0) /* Event codes and flags, as stored in the info field */ #define AMDVI_EVENT_ILLEGAL_DEVTAB_ENTRY (0x1U << 12) @@ -162,9 +165,10 @@ #define AMDVI_FEATURE_PC (1ULL << 9) /* Perf counters */ /* reserved DTE bits */ -#define AMDVI_DTE_LOWER_QUAD_RESERVED 0x80300000000000fc -#define AMDVI_DTE_MIDDLE_QUAD_RESERVED 0x0000000000000100 -#define AMDVI_DTE_UPPER_QUAD_RESERVED 0x08f0000000000000 +#define AMDVI_DTE_QUAD0_RESERVED (GENMASK64(6, 2) | GENMASK64(63, 63)) +#define AMDVI_DTE_QUAD1_RESERVED 0 +#define AMDVI_DTE_QUAD2_RESERVED GENMASK64(53, 52) +#define AMDVI_DTE_QUAD3_RESERVED (GENMASK64(14, 0) | GENMASK64(53, 48)) /* AMDVI paging mode */ #define AMDVI_GATS_MODE (2ULL << 12) @@ -194,16 +198,12 @@ #define AMDVI_PAGE_SIZE (1ULL << AMDVI_PAGE_SHIFT) #define AMDVI_PAGE_SHIFT_4K 12 -#define AMDVI_PAGE_MASK_4K (~((1ULL << AMDVI_PAGE_SHIFT_4K) - 1)) - -#define AMDVI_MAX_VA_ADDR (48UL << 5) -#define AMDVI_MAX_PH_ADDR (40UL << 8) -#define AMDVI_MAX_GVA_ADDR (48UL << 15) +#define AMDVI_PAGE_MASK_4K GENMASK64(63, 12) -/* Completion Wait data size */ -#define AMDVI_COMPLETION_DATA_SIZE 8 +#define AMDVI_MAX_GVA_ADDR (2UL << 5) +#define AMDVI_MAX_PH_ADDR (40UL << 8) +#define AMDVI_MAX_VA_ADDR (48UL << 15) -#define AMDVI_COMMAND_SIZE 16 /* Completion Wait data size */ #define AMDVI_COMPLETION_DATA_SIZE 8 @@ -228,7 +228,7 @@ #define AMDVI_IR_INTCTL_PASS 1 #define AMDVI_IR_INTCTL_REMAP 2 -#define AMDVI_IR_PHYS_ADDR_MASK (((1ULL << 45) - 1) << 6) +#define AMDVI_IR_PHYS_ADDR_MASK GENMASK64(51, 6) /* MSI data 10:0 bits (section 2.2.5.1 Fig 14) */ #define AMDVI_IRTE_OFFSET 0x7ff @@ -315,20 +315,20 @@ struct AMDVIPCIState { struct AMDVIState { X86IOMMUState iommu; /* IOMMU bus device */ - AMDVIPCIState pci; /* IOMMU PCI device */ + AMDVIPCIState *pci; /* IOMMU PCI device */ + char *pci_id; /* ID of AMDVI-PCI device, if user created */ uint32_t version; uint64_t mmio_addr; bool enabled; /* IOMMU enabled */ - bool ats_enabled; /* address translation enabled */ bool cmdbuf_enabled; /* command buffer enabled */ bool evtlog_enabled; /* event log enabled */ bool excl_enabled; hwaddr devtab; /* base address device table */ - size_t devtab_len; /* device table length */ + uint64_t devtab_len; /* device table length */ hwaddr cmdbuf; /* command buffer base address */ uint64_t cmdbuf_len; /* command buffer length */ diff --git a/hw/i386/intel_iommu.c b/hw/i386/intel_iommu.c index 5f8ed12..83c5e44 100644 --- a/hw/i386/intel_iommu.c +++ b/hw/i386/intel_iommu.c @@ -1728,8 +1728,6 @@ static bool vtd_as_pt_enabled(VTDAddressSpace *as) static bool vtd_switch_address_space(VTDAddressSpace *as) { bool use_iommu, pt; - /* Whether we need to take the BQL on our own */ - bool take_bql = !bql_locked(); assert(as); @@ -1746,9 +1744,7 @@ static bool vtd_switch_address_space(VTDAddressSpace *as) * from vtd_pt_enable_fast_path(). However the memory APIs need * it. We'd better make sure we have had it already, or, take it. */ - if (take_bql) { - bql_lock(); - } + BQL_LOCK_GUARD(); /* Turn off first then on the other */ if (use_iommu) { @@ -1801,10 +1797,6 @@ static bool vtd_switch_address_space(VTDAddressSpace *as) memory_region_set_enabled(&as->iommu_ir_fault, false); } - if (take_bql) { - bql_unlock(); - } - return use_iommu; } @@ -1995,9 +1987,9 @@ static int vtd_iova_to_flpte(IntelIOMMUState *s, VTDContextEntry *ce, uint32_t pasid) { dma_addr_t addr = vtd_get_iova_pgtbl_base(s, ce, pasid); - uint32_t level = vtd_get_iova_level(s, ce, pasid); uint32_t offset; uint64_t flpte, flag_ad = VTD_FL_A; + *flpte_level = vtd_get_iova_level(s, ce, pasid); if (!vtd_iova_fl_check_canonical(s, iova, ce, pasid)) { error_report_once("%s: detected non canonical IOVA (iova=0x%" PRIx64 "," @@ -2006,11 +1998,11 @@ static int vtd_iova_to_flpte(IntelIOMMUState *s, VTDContextEntry *ce, } while (true) { - offset = vtd_iova_level_offset(iova, level); + offset = vtd_iova_level_offset(iova, *flpte_level); flpte = vtd_get_pte(addr, offset); if (flpte == (uint64_t)-1) { - if (level == vtd_get_iova_level(s, ce, pasid)) { + if (*flpte_level == vtd_get_iova_level(s, ce, pasid)) { /* Invalid programming of pasid-entry */ return -VTD_FR_PASID_ENTRY_FSPTPTR_INV; } else { @@ -2036,15 +2028,15 @@ static int vtd_iova_to_flpte(IntelIOMMUState *s, VTDContextEntry *ce, if (is_write && !(flpte & VTD_FL_RW)) { return -VTD_FR_SM_WRITE; } - if (vtd_flpte_nonzero_rsvd(flpte, level)) { + if (vtd_flpte_nonzero_rsvd(flpte, *flpte_level)) { error_report_once("%s: detected flpte reserved non-zero " "iova=0x%" PRIx64 ", level=0x%" PRIx32 "flpte=0x%" PRIx64 ", pasid=0x%" PRIX32 ")", - __func__, iova, level, flpte, pasid); + __func__, iova, *flpte_level, flpte, pasid); return -VTD_FR_FS_PAGING_ENTRY_RSVD; } - if (vtd_is_last_pte(flpte, level) && is_write) { + if (vtd_is_last_pte(flpte, *flpte_level) && is_write) { flag_ad |= VTD_FL_D; } @@ -2052,14 +2044,13 @@ static int vtd_iova_to_flpte(IntelIOMMUState *s, VTDContextEntry *ce, return -VTD_FR_FS_BIT_UPDATE_FAILED; } - if (vtd_is_last_pte(flpte, level)) { + if (vtd_is_last_pte(flpte, *flpte_level)) { *flptep = flpte; - *flpte_level = level; return 0; } addr = vtd_get_pte_addr(flpte, aw_bits); - level--; + (*flpte_level)--; } } @@ -2100,7 +2091,8 @@ static bool vtd_do_iommu_translate(VTDAddressSpace *vtd_as, PCIBus *bus, uint8_t bus_num = pci_bus_num(bus); VTDContextCacheEntry *cc_entry; uint64_t pte, page_mask; - uint32_t level, pasid = vtd_as->pasid; + uint32_t level = UINT32_MAX; + uint32_t pasid = vtd_as->pasid; uint16_t source_id = PCI_BUILD_BDF(bus_num, devfn); int ret_fr; bool is_fpd_set = false; @@ -2259,14 +2251,19 @@ out: entry->iova = addr & page_mask; entry->translated_addr = vtd_get_pte_addr(pte, s->aw_bits) & page_mask; entry->addr_mask = ~page_mask; - entry->perm = access_flags; + entry->perm = (is_write ? access_flags : (access_flags & (~IOMMU_WO))); return true; error: vtd_iommu_unlock(s); entry->iova = 0; entry->translated_addr = 0; - entry->addr_mask = 0; + /* + * Set the mask for ATS (the range must be present even when the + * translation fails : PCIe rev 5 10.2.3.5) + */ + entry->addr_mask = (level != UINT32_MAX) ? + (~vtd_pt_level_page_mask(level)) : (~VTD_PAGE_MASK_4K); entry->perm = IOMMU_NONE; return false; } @@ -2511,6 +2508,7 @@ static void vtd_iotlb_page_invalidate_notify(IntelIOMMUState *s, .translated_addr = 0, .addr_mask = size - 1, .perm = IOMMU_NONE, + .pasid = vtd_as->pasid, }, }; memory_region_notify_iommu(&vtd_as->iommu, 0, event); @@ -2830,6 +2828,7 @@ static bool vtd_process_wait_desc(IntelIOMMUState *s, VTDInvDesc *inv_desc) { uint64_t mask[4] = {VTD_INV_DESC_WAIT_RSVD_LO, VTD_INV_DESC_WAIT_RSVD_HI, VTD_INV_DESC_ALL_ONE, VTD_INV_DESC_ALL_ONE}; + bool ret = true; if (!vtd_inv_desc_reserved_check(s, inv_desc, mask, false, __func__, "wait")) { @@ -2841,8 +2840,6 @@ static bool vtd_process_wait_desc(IntelIOMMUState *s, VTDInvDesc *inv_desc) uint32_t status_data = (uint32_t)(inv_desc->lo >> VTD_INV_DESC_WAIT_DATA_SHIFT); - assert(!(inv_desc->lo & VTD_INV_DESC_WAIT_IF)); - /* FIXME: need to be masked with HAW? */ dma_addr_t status_addr = inv_desc->hi; trace_vtd_inv_desc_wait_sw(status_addr, status_data); @@ -2851,18 +2848,22 @@ static bool vtd_process_wait_desc(IntelIOMMUState *s, VTDInvDesc *inv_desc) &status_data, sizeof(status_data), MEMTXATTRS_UNSPECIFIED)) { trace_vtd_inv_desc_wait_write_fail(inv_desc->hi, inv_desc->lo); - return false; + ret = false; } - } else if (inv_desc->lo & VTD_INV_DESC_WAIT_IF) { + } + + if (inv_desc->lo & VTD_INV_DESC_WAIT_IF) { /* Interrupt flag */ vtd_generate_completion_event(s); - } else { + } + + if (!(inv_desc->lo & (VTD_INV_DESC_WAIT_IF | VTD_INV_DESC_WAIT_SW))) { error_report_once("%s: invalid wait desc: hi=%"PRIx64", lo=%"PRIx64 " (unknown type)", __func__, inv_desc->hi, inv_desc->lo); return false; } - return true; + return ret; } static bool vtd_process_context_cache_desc(IntelIOMMUState *s, @@ -3098,6 +3099,7 @@ static void do_invalidate_device_tlb(VTDAddressSpace *vtd_dev_as, event.entry.iova = addr; event.entry.perm = IOMMU_NONE; event.entry.translated_addr = 0; + event.entry.pasid = vtd_dev_as->pasid; memory_region_notify_iommu(&vtd_dev_as->iommu, 0, event); } @@ -3680,6 +3682,7 @@ static IOMMUTLBEntry vtd_iommu_translate(IOMMUMemoryRegion *iommu, hwaddr addr, IOMMUTLBEntry iotlb = { /* We'll fill in the rest later. */ .target_as = &address_space_memory, + .pasid = vtd_as->pasid, }; bool success; @@ -4213,9 +4216,30 @@ VTDAddressSpace *vtd_find_add_as(IntelIOMMUState *s, PCIBus *bus, VTDAddressSpace *vtd_dev_as; char name[128]; + vtd_iommu_lock(s); vtd_dev_as = g_hash_table_lookup(s->vtd_address_spaces, &key); + vtd_iommu_unlock(s); + if (!vtd_dev_as) { - struct vtd_as_key *new_key = g_malloc(sizeof(*new_key)); + struct vtd_as_key *new_key; + /* Slow path */ + + /* + * memory_region_add_subregion_overlap requires the bql, + * make sure we own it. + */ + BQL_LOCK_GUARD(); + vtd_iommu_lock(s); + + /* Check again as we released the lock for a moment */ + vtd_dev_as = g_hash_table_lookup(s->vtd_address_spaces, &key); + if (vtd_dev_as) { + vtd_iommu_unlock(s); + return vtd_dev_as; + } + + /* Still nothing, allocate a new address space */ + new_key = g_malloc(sizeof(*new_key)); new_key->bus = bus; new_key->devfn = devfn; @@ -4306,6 +4330,8 @@ VTDAddressSpace *vtd_find_add_as(IntelIOMMUState *s, PCIBus *bus, vtd_switch_address_space(vtd_dev_as); g_hash_table_insert(s->vtd_address_spaces, new_key, vtd_dev_as); + + vtd_iommu_unlock(s); } return vtd_dev_as; } @@ -4572,7 +4598,7 @@ static void vtd_cap_init(IntelIOMMUState *s) } if (s->pasid) { - s->ecap |= VTD_ECAP_PASID; + s->ecap |= VTD_ECAP_PASID | VTD_ECAP_PSS; } } @@ -4715,10 +4741,118 @@ static AddressSpace *vtd_host_dma_iommu(PCIBus *bus, void *opaque, int devfn) return &vtd_as->as; } +static IOMMUTLBEntry vtd_iommu_ats_do_translate(IOMMUMemoryRegion *iommu, + hwaddr addr, + IOMMUAccessFlags flags) +{ + IOMMUTLBEntry entry; + VTDAddressSpace *vtd_as = container_of(iommu, VTDAddressSpace, iommu); + + if (vtd_is_interrupt_addr(addr)) { + vtd_report_ir_illegal_access(vtd_as, addr, flags & IOMMU_WO); + entry.target_as = &address_space_memory; + entry.iova = 0; + entry.translated_addr = 0; + entry.addr_mask = ~VTD_PAGE_MASK_4K; + entry.perm = IOMMU_NONE; + entry.pasid = PCI_NO_PASID; + } else { + entry = vtd_iommu_translate(iommu, addr, flags, 0); + } + + return entry; +} + +static ssize_t vtd_ats_request_translation(PCIBus *bus, void *opaque, + int devfn, uint32_t pasid, + bool priv_req, bool exec_req, + hwaddr addr, size_t length, + bool no_write, IOMMUTLBEntry *result, + size_t result_length, + uint32_t *err_count) +{ + IntelIOMMUState *s = opaque; + VTDAddressSpace *vtd_as; + IOMMUAccessFlags flags = IOMMU_ACCESS_FLAG_FULL(true, !no_write, exec_req, + priv_req, false, false); + ssize_t res_index = 0; + hwaddr target_address = addr + length; + IOMMUTLBEntry entry; + + vtd_as = vtd_find_add_as(s, bus, devfn, pasid); + *err_count = 0; + + while ((addr < target_address) && (res_index < result_length)) { + entry = vtd_iommu_ats_do_translate(&vtd_as->iommu, addr, flags); + entry.perm &= ~IOMMU_GLOBAL; /* Spec 4.1.2: Global Mapping never set */ + + if ((entry.perm & flags) != flags) { + *err_count += 1; /* Less than expected */ + } + + result[res_index] = entry; + res_index += 1; + addr = (addr & (~entry.addr_mask)) + (entry.addr_mask + 1); + } + + /* Buffer too small */ + if (addr < target_address) { + return -ENOMEM; + } + + return res_index; +} + +static void vtd_init_iotlb_notifier(PCIBus *bus, void *opaque, int devfn, + IOMMUNotifier *n, IOMMUNotify fn, + void *user_opaque) +{ + n->opaque = user_opaque; + iommu_notifier_init(n, fn, IOMMU_NOTIFIER_DEVIOTLB_EVENTS, 0, + HWADDR_MAX, 0); +} + +static void vtd_get_iotlb_info(void *opaque, uint8_t *addr_width, + uint32_t *min_page_size) +{ + IntelIOMMUState *s = opaque; + + *addr_width = s->aw_bits; + *min_page_size = VTD_PAGE_SIZE; +} + +static void vtd_register_iotlb_notifier(PCIBus *bus, void *opaque, + int devfn, uint32_t pasid, + IOMMUNotifier *n) +{ + IntelIOMMUState *s = opaque; + VTDAddressSpace *vtd_as; + + vtd_as = vtd_find_add_as(s, bus, devfn, pasid); + memory_region_register_iommu_notifier(MEMORY_REGION(&vtd_as->iommu), n, + &error_fatal); +} + +static void vtd_unregister_iotlb_notifier(PCIBus *bus, void *opaque, + int devfn, uint32_t pasid, + IOMMUNotifier *n) +{ + IntelIOMMUState *s = opaque; + VTDAddressSpace *vtd_as; + + vtd_as = vtd_find_add_as(s, bus, devfn, pasid); + memory_region_unregister_iommu_notifier(MEMORY_REGION(&vtd_as->iommu), n); +} + static PCIIOMMUOps vtd_iommu_ops = { .get_address_space = vtd_host_dma_iommu, .set_iommu_device = vtd_dev_set_iommu_device, .unset_iommu_device = vtd_dev_unset_iommu_device, + .get_iotlb_info = vtd_get_iotlb_info, + .init_iotlb_notifier = vtd_init_iotlb_notifier, + .register_iotlb_notifier = vtd_register_iotlb_notifier, + .unregister_iotlb_notifier = vtd_unregister_iotlb_notifier, + .ats_request_translation = vtd_ats_request_translation, }; static bool vtd_decide_config(IntelIOMMUState *s, Error **errp) diff --git a/hw/i386/intel_iommu_internal.h b/hw/i386/intel_iommu_internal.h index e8b211e..360e937 100644 --- a/hw/i386/intel_iommu_internal.h +++ b/hw/i386/intel_iommu_internal.h @@ -192,6 +192,7 @@ #define VTD_ECAP_SC (1ULL << 7) #define VTD_ECAP_MHMV (15ULL << 20) #define VTD_ECAP_SRS (1ULL << 31) +#define VTD_ECAP_PSS (7ULL << 35) /* limit: MemTxAttrs::pid */ #define VTD_ECAP_PASID (1ULL << 40) #define VTD_ECAP_SMTS (1ULL << 43) #define VTD_ECAP_SLTS (1ULL << 46) diff --git a/hw/i386/kvm/apic.c b/hw/i386/kvm/apic.c index 39035db..1be9bfe 100644 --- a/hw/i386/kvm/apic.c +++ b/hw/i386/kvm/apic.c @@ -17,6 +17,7 @@ #include "system/hw_accel.h" #include "system/kvm.h" #include "kvm/kvm_i386.h" +#include "kvm/tdx.h" static inline void kvm_apic_set_reg(struct kvm_lapic_state *kapic, int reg_id, uint32_t val) @@ -141,6 +142,10 @@ static void kvm_apic_put(CPUState *cs, run_on_cpu_data data) struct kvm_lapic_state kapic; int ret; + if (is_tdx_vm()) { + return; + } + kvm_put_apicbase(s->cpu, s->apicbase); kvm_put_apic_state(s, &kapic); diff --git a/hw/i386/kvm/xen-stubs.c b/hw/i386/kvm/xen-stubs.c index d03131e..ce73119 100644 --- a/hw/i386/kvm/xen-stubs.c +++ b/hw/i386/kvm/xen-stubs.c @@ -12,7 +12,6 @@ #include "qemu/osdep.h" #include "qapi/error.h" -#include "qapi/qapi-commands-misc-target.h" #include "xen_evtchn.h" #include "xen_primary_console.h" @@ -38,15 +37,3 @@ void xen_primary_console_create(void) void xen_primary_console_set_be_port(uint16_t port) { } -#ifdef TARGET_I386 -EvtchnInfoList *qmp_xen_event_list(Error **errp) -{ - error_setg(errp, "Xen event channel emulation not enabled"); - return NULL; -} - -void qmp_xen_event_inject(uint32_t port, Error **errp) -{ - error_setg(errp, "Xen event channel emulation not enabled"); -} -#endif diff --git a/hw/i386/kvm/xen_evtchn.c b/hw/i386/kvm/xen_evtchn.c index b519054..dd566c4 100644 --- a/hw/i386/kvm/xen_evtchn.c +++ b/hw/i386/kvm/xen_evtchn.c @@ -19,7 +19,7 @@ #include "monitor/monitor.h" #include "monitor/hmp.h" #include "qapi/error.h" -#include "qapi/qapi-commands-misc-target.h" +#include "qapi/qapi-commands-misc-i386.h" #include "qobject/qdict.h" #include "qom/object.h" #include "exec/target_page.h" diff --git a/hw/i386/meson.build b/hw/i386/meson.build index 10bdfde..7896f34 100644 --- a/hw/i386/meson.build +++ b/hw/i386/meson.build @@ -32,6 +32,7 @@ i386_ss.add(when: 'CONFIG_PC', if_true: files( 'port92.c')) i386_ss.add(when: 'CONFIG_X86_FW_OVMF', if_true: files('pc_sysfw_ovmf.c'), if_false: files('pc_sysfw_ovmf-stubs.c')) +i386_ss.add(when: 'CONFIG_TDX', if_true: files('tdvf.c', 'tdvf-hob.c')) subdir('kvm') subdir('xen') diff --git a/hw/i386/microvm.c b/hw/i386/microvm.c index e0daf0d..94d22a2 100644 --- a/hw/i386/microvm.c +++ b/hw/i386/microvm.c @@ -49,6 +49,7 @@ #include "hw/acpi/generic_event_device.h" #include "hw/pci-host/gpex.h" #include "hw/usb/xhci.h" +#include "hw/vfio/types.h" #include "elf.h" #include "kvm/kvm_i386.h" @@ -633,6 +634,8 @@ GlobalProperty microvm_properties[] = { * so reserving io space is not going to work. Turn it off. */ { "pcie-root-port", "io-reserve", "0" }, + { TYPE_RAMFB_DEVICE, "use-legacy-x86-rom", "true" }, + { TYPE_VFIO_PCI_NOHOTPLUG, "use-legacy-x86-rom", "true" }, }; static void microvm_class_init(ObjectClass *oc, const void *data) diff --git a/hw/i386/monitor.c b/hw/i386/monitor.c index 1921e4d..79df965 100644 --- a/hw/i386/monitor.c +++ b/hw/i386/monitor.c @@ -26,7 +26,7 @@ #include "monitor/monitor.h" #include "qobject/qdict.h" #include "qapi/error.h" -#include "qapi/qapi-commands-misc-target.h" +#include "qapi/qapi-commands-misc-i386.h" #include "hw/i386/x86.h" #include "hw/rtc/mc146818rtc.h" diff --git a/hw/i386/pc.c b/hw/i386/pc.c index 7065615..2f58e73 100644 --- a/hw/i386/pc.c +++ b/hw/i386/pc.c @@ -44,6 +44,7 @@ #include "system/xen.h" #include "system/reset.h" #include "kvm/kvm_i386.h" +#include "kvm/tdx.h" #include "hw/xen/xen.h" #include "qobject/qlist.h" #include "qemu/error-report.h" @@ -80,7 +81,10 @@ { "qemu64-" TYPE_X86_CPU, "model-id", "QEMU Virtual CPU version " v, },\ { "athlon-" TYPE_X86_CPU, "model-id", "QEMU Virtual CPU version " v, }, -GlobalProperty pc_compat_10_0[] = {}; +GlobalProperty pc_compat_10_0[] = { + { TYPE_X86_CPU, "x-consistent-cache", "false" }, + { TYPE_X86_CPU, "x-vendor-cpuid-only-v2", "false" }, +}; const size_t pc_compat_10_0_len = G_N_ELEMENTS(pc_compat_10_0); GlobalProperty pc_compat_9_2[] = {}; @@ -259,28 +263,6 @@ GlobalProperty pc_compat_2_6[] = { }; const size_t pc_compat_2_6_len = G_N_ELEMENTS(pc_compat_2_6); -GlobalProperty pc_compat_2_5[] = {}; -const size_t pc_compat_2_5_len = G_N_ELEMENTS(pc_compat_2_5); - -GlobalProperty pc_compat_2_4[] = { - PC_CPU_MODEL_IDS("2.4.0") - { "Haswell-" TYPE_X86_CPU, "abm", "off" }, - { "Haswell-noTSX-" TYPE_X86_CPU, "abm", "off" }, - { "Broadwell-" TYPE_X86_CPU, "abm", "off" }, - { "Broadwell-noTSX-" TYPE_X86_CPU, "abm", "off" }, - { "host" "-" TYPE_X86_CPU, "host-cache-info", "on" }, - { TYPE_X86_CPU, "check", "off" }, - { "qemu64" "-" TYPE_X86_CPU, "sse4a", "on" }, - { "qemu64" "-" TYPE_X86_CPU, "abm", "on" }, - { "qemu64" "-" TYPE_X86_CPU, "popcnt", "on" }, - { "qemu32" "-" TYPE_X86_CPU, "popcnt", "on" }, - { "Opteron_G2" "-" TYPE_X86_CPU, "rdtscp", "on" }, - { "Opteron_G3" "-" TYPE_X86_CPU, "rdtscp", "on" }, - { "Opteron_G4" "-" TYPE_X86_CPU, "rdtscp", "on" }, - { "Opteron_G5" "-" TYPE_X86_CPU, "rdtscp", "on", } -}; -const size_t pc_compat_2_4_len = G_N_ELEMENTS(pc_compat_2_4); - /* * @PC_FW_DATA: * Size of the chunk of memory at the top of RAM for the BIOS ACPI tables @@ -630,7 +612,7 @@ void pc_machine_done(Notifier *notifier, void *data) &error_fatal); if (pcms->cxl_devices_state.is_enabled) { - cxl_fmws_link_targets(&pcms->cxl_devices_state, &error_fatal); + cxl_fmws_link_targets(&error_fatal); } /* set the number of CPUs */ @@ -739,20 +721,28 @@ static uint64_t pc_get_cxl_range_start(PCMachineState *pcms) return cxl_base; } -static uint64_t pc_get_cxl_range_end(PCMachineState *pcms) +static int cxl_get_fmw_end(Object *obj, void *opaque) { - uint64_t start = pc_get_cxl_range_start(pcms) + MiB; - - if (pcms->cxl_devices_state.fixed_windows) { - GList *it; + struct CXLFixedWindow *fw; + uint64_t *start = opaque; - start = ROUND_UP(start, 256 * MiB); - for (it = pcms->cxl_devices_state.fixed_windows; it; it = it->next) { - CXLFixedWindow *fw = it->data; - start += fw->size; - } + if (!object_dynamic_cast(obj, TYPE_CXL_FMW)) { + return 0; } + fw = CXL_FMW(obj); + + *start += fw->size; + + return 0; +} + +static uint64_t pc_get_cxl_range_end(PCMachineState *pcms) +{ + uint64_t start = pc_get_cxl_range_start(pcms) + MiB; + /* Ordering doesn't matter so no need to build a sorted list */ + object_child_foreach_recursive(object_get_root(), cxl_get_fmw_end, + &start); return start; } @@ -954,43 +944,31 @@ void pc_memory_init(PCMachineState *pcms, cxl_base = pc_get_cxl_range_start(pcms); memory_region_init(mr, OBJECT(machine), "cxl_host_reg", cxl_size); memory_region_add_subregion(system_memory, cxl_base, mr); - cxl_resv_end = cxl_base + cxl_size; - if (pcms->cxl_devices_state.fixed_windows) { - hwaddr cxl_fmw_base; - GList *it; - - cxl_fmw_base = ROUND_UP(cxl_base + cxl_size, 256 * MiB); - for (it = pcms->cxl_devices_state.fixed_windows; it; it = it->next) { - CXLFixedWindow *fw = it->data; - - fw->base = cxl_fmw_base; - memory_region_init_io(&fw->mr, OBJECT(machine), &cfmws_ops, fw, - "cxl-fixed-memory-region", fw->size); - memory_region_add_subregion(system_memory, fw->base, &fw->mr); - cxl_fmw_base += fw->size; - cxl_resv_end = cxl_fmw_base; - } - } + cxl_base = ROUND_UP(cxl_base + cxl_size, 256 * MiB); + cxl_resv_end = cxl_fmws_set_memmap(cxl_base, maxphysaddr); + cxl_fmws_update_mmio(); } /* Initialize PC system firmware */ pc_system_firmware_init(pcms, rom_memory); - option_rom_mr = g_malloc(sizeof(*option_rom_mr)); - if (machine_require_guest_memfd(machine)) { - memory_region_init_ram_guest_memfd(option_rom_mr, NULL, "pc.rom", - PC_ROM_SIZE, &error_fatal); - } else { - memory_region_init_ram(option_rom_mr, NULL, "pc.rom", PC_ROM_SIZE, - &error_fatal); - if (pcmc->pci_enabled) { - memory_region_set_readonly(option_rom_mr, true); + if (!is_tdx_vm()) { + option_rom_mr = g_malloc(sizeof(*option_rom_mr)); + if (machine_require_guest_memfd(machine)) { + memory_region_init_ram_guest_memfd(option_rom_mr, NULL, "pc.rom", + PC_ROM_SIZE, &error_fatal); + } else { + memory_region_init_ram(option_rom_mr, NULL, "pc.rom", PC_ROM_SIZE, + &error_fatal); + if (pcmc->pci_enabled) { + memory_region_set_readonly(option_rom_mr, true); + } } + memory_region_add_subregion_overlap(rom_memory, + PC_ROM_MIN_VGA, + option_rom_mr, + 1); } - memory_region_add_subregion_overlap(rom_memory, - PC_ROM_MIN_VGA, - option_rom_mr, - 1); fw_cfg = fw_cfg_arch_create(machine, x86ms->boot_cpus, x86ms->apic_id_limit); @@ -999,14 +977,13 @@ void pc_memory_init(PCMachineState *pcms, if (machine->device_memory) { uint64_t *val = g_malloc(sizeof(*val)); - uint64_t res_mem_end = machine->device_memory->base; - - if (!pcmc->broken_reserved_end) { - res_mem_end += memory_region_size(&machine->device_memory->mr); - } + uint64_t res_mem_end; if (pcms->cxl_devices_state.is_enabled) { res_mem_end = cxl_resv_end; + } else { + res_mem_end = machine->device_memory->base + + memory_region_size(&machine->device_memory->mr); } *val = cpu_to_le64(ROUND_UP(res_mem_end, 1 * GiB)); fw_cfg_add_file(fw_cfg, "etc/reserved-memory-end", val, sizeof(*val)); @@ -1044,9 +1021,7 @@ uint64_t pc_pci_hole64_start(void) hole64_start = pc_get_cxl_range_end(pcms); } else if (pcmc->has_reserved_memory && (ms->ram_size < ms->maxram_size)) { pc_get_device_memory_range(pcms, &hole64_start, &size); - if (!pcmc->broken_reserved_end) { - hole64_start += size; - } + hole64_start += size; } else { hole64_start = pc_above_4g_end(pcms); } @@ -1058,7 +1033,6 @@ DeviceState *pc_vga_init(ISABus *isa_bus, PCIBus *pci_bus) { DeviceState *dev = NULL; - rom_set_order_override(FW_CFG_ORDER_OVERRIDE_VGA); if (pci_bus) { PCIDevice *pcidev = pci_vga_init(pci_bus); dev = pcidev ? &pcidev->qdev : NULL; @@ -1066,7 +1040,7 @@ DeviceState *pc_vga_init(ISABus *isa_bus, PCIBus *pci_bus) ISADevice *isadev = isa_vga_init(isa_bus); dev = isadev ? DEVICE(isadev) : NULL; } - rom_reset_order_override(); + return dev; } @@ -1256,8 +1230,6 @@ void pc_nic_init(PCMachineClass *pcmc, ISABus *isa_bus, PCIBus *pci_bus) bool default_is_ne2k = g_str_equal(mc->default_nic, TYPE_ISA_NE2000); NICInfo *nd; - rom_set_order_override(FW_CFG_ORDER_OVERRIDE_NIC); - while ((nd = qemu_find_nic_info(TYPE_ISA_NE2000, default_is_ne2k, NULL))) { pc_init_ne2k_isa(isa_bus, nd, &error_fatal); } @@ -1266,8 +1238,6 @@ void pc_nic_init(PCMachineClass *pcmc, ISABus *isa_bus, PCIBus *pci_bus) if (pci_bus) { pci_init_nic_devices(pci_bus, mc->default_nic); } - - rom_reset_order_override(); } void pc_i8259_create(ISABus *isa_bus, qemu_irq *i8259_irqs) @@ -1860,6 +1830,18 @@ static void pc_machine_class_init(ObjectClass *oc, const void *data) object_class_property_add_bool(oc, "fd-bootchk", pc_machine_get_fd_bootchk, pc_machine_set_fd_bootchk); + +#if defined(CONFIG_IGVM) + object_class_property_add_link(oc, "igvm-cfg", + TYPE_IGVM_CFG, + offsetof(X86MachineState, igvm), + object_property_allow_set_link, + OBJ_PROP_LINK_STRONG); + object_class_property_set_description(oc, "igvm-cfg", + "Set IGVM configuration"); +#endif + + } static const TypeInfo pc_machine_info = { diff --git a/hw/i386/pc_piix.c b/hw/i386/pc_piix.c index 0dce512..c033242 100644 --- a/hw/i386/pc_piix.c +++ b/hw/i386/pc_piix.c @@ -49,6 +49,7 @@ #include "hw/i2c/smbus_eeprom.h" #include "system/memory.h" #include "hw/acpi/acpi.h" +#include "hw/vfio/types.h" #include "qapi/error.h" #include "qemu/error-report.h" #include "system/xen.h" @@ -77,6 +78,13 @@ static const int ide_iobase2[MAX_IDE_BUS] = { 0x3f6, 0x376 }; static const int ide_irq[MAX_IDE_BUS] = { 14, 15 }; #endif +static GlobalProperty pc_piix_compat_defaults[] = { + { TYPE_RAMFB_DEVICE, "use-legacy-x86-rom", "true" }, + { TYPE_VFIO_PCI_NOHOTPLUG, "use-legacy-x86-rom", "true" }, +}; +static const size_t pc_piix_compat_defaults_len = + G_N_ELEMENTS(pc_piix_compat_defaults); + /* * Return the global irq number corresponding to a given device irq * pin. We could also use the bus number to have a more precise mapping. @@ -285,6 +293,8 @@ static void pc_init1(MachineState *machine, const char *pci_type) pcms->idebus[0] = qdev_get_child_bus(dev, "ide.0"); pcms->idebus[1] = qdev_get_child_bus(dev, "ide.1"); } else { + uint32_t irq; + isa_bus = isa_bus_new(NULL, system_memory, system_io, &error_abort); isa_bus_register_input_irqs(isa_bus, x86ms->gsi); @@ -292,6 +302,9 @@ static void pc_init1(MachineState *machine, const char *pci_type) x86ms->rtc = isa_new(TYPE_MC146818_RTC); qdev_prop_set_int32(DEVICE(x86ms->rtc), "base_year", 2000); isa_realize_and_unref(x86ms->rtc, isa_bus, &error_fatal); + irq = object_property_get_uint(OBJECT(x86ms->rtc), "irq", + &error_fatal); + isa_connect_gpio_out(ISA_DEVICE(x86ms->rtc), 0, irq); i8257_dma_init(OBJECT(machine), isa_bus, 0); pcms->hpet_enabled = false; @@ -361,6 +374,16 @@ static void pc_init1(MachineState *machine, const char *pci_type) x86_nvdimm_acpi_dsmio, x86ms->fw_cfg, OBJECT(pcms)); } + +#if defined(CONFIG_IGVM) + /* Apply guest state from IGVM if supplied */ + if (x86ms->igvm) { + if (IGVM_CFG_GET_CLASS(x86ms->igvm) + ->process(x86ms->igvm, machine->cgs, false, &error_fatal) < 0) { + g_assert_not_reached(); + } + } +#endif } typedef enum PCSouthBridgeOption { @@ -477,6 +500,8 @@ static void pc_i440fx_machine_options(MachineClass *m) pc_set_south_bridge); object_class_property_set_description(oc, "x-south-bridge", "Use a different south bridge than PIIX3"); + compat_props_add(m->compat_props, + pc_piix_compat_defaults, pc_piix_compat_defaults_len); } static void pc_i440fx_machine_10_1_options(MachineClass *m) @@ -778,32 +803,6 @@ static void pc_i440fx_machine_2_6_options(MachineClass *m) DEFINE_I440FX_MACHINE(2, 6); -static void pc_i440fx_machine_2_5_options(MachineClass *m) -{ - X86MachineClass *x86mc = X86_MACHINE_CLASS(m); - - pc_i440fx_machine_2_6_options(m); - x86mc->save_tsc_khz = false; - m->legacy_fw_cfg_order = 1; - compat_props_add(m->compat_props, hw_compat_2_5, hw_compat_2_5_len); - compat_props_add(m->compat_props, pc_compat_2_5, pc_compat_2_5_len); -} - -DEFINE_I440FX_MACHINE(2, 5); - -static void pc_i440fx_machine_2_4_options(MachineClass *m) -{ - PCMachineClass *pcmc = PC_MACHINE_CLASS(m); - - pc_i440fx_machine_2_5_options(m); - m->hw_version = "2.4.0"; - pcmc->broken_reserved_end = true; - compat_props_add(m->compat_props, hw_compat_2_4, hw_compat_2_4_len); - compat_props_add(m->compat_props, pc_compat_2_4, pc_compat_2_4_len); -} - -DEFINE_I440FX_MACHINE(2, 4); - #ifdef CONFIG_ISAPC static void isapc_machine_options(MachineClass *m) { diff --git a/hw/i386/pc_q35.c b/hw/i386/pc_q35.c index c538b3d..b309b2b 100644 --- a/hw/i386/pc_q35.c +++ b/hw/i386/pc_q35.c @@ -45,6 +45,7 @@ #include "hw/i386/pc.h" #include "hw/i386/amd_iommu.h" #include "hw/i386/intel_iommu.h" +#include "hw/vfio/types.h" #include "hw/virtio/virtio-iommu.h" #include "hw/display/ramfb.h" #include "hw/ide/pci.h" @@ -67,6 +68,8 @@ static GlobalProperty pc_q35_compat_defaults[] = { { TYPE_VIRTIO_IOMMU_PCI, "aw-bits", "39" }, + { TYPE_RAMFB_DEVICE, "use-legacy-x86-rom", "true" }, + { TYPE_VFIO_PCI_NOHOTPLUG, "use-legacy-x86-rom", "true" }, }; static const size_t pc_q35_compat_defaults_len = G_N_ELEMENTS(pc_q35_compat_defaults); @@ -325,6 +328,16 @@ static void pc_q35_init(MachineState *machine) x86_nvdimm_acpi_dsmio, x86ms->fw_cfg, OBJECT(pcms)); } + +#if defined(CONFIG_IGVM) + /* Apply guest state from IGVM if supplied */ + if (x86ms->igvm) { + if (IGVM_CFG_GET_CLASS(x86ms->igvm) + ->process(x86ms->igvm, machine->cgs, false, &error_fatal) < 0) { + g_assert_not_reached(); + } + } +#endif } #define DEFINE_Q35_MACHINE(major, minor) \ @@ -672,29 +685,3 @@ static void pc_q35_machine_2_6_options(MachineClass *m) } DEFINE_Q35_MACHINE(2, 6); - -static void pc_q35_machine_2_5_options(MachineClass *m) -{ - X86MachineClass *x86mc = X86_MACHINE_CLASS(m); - - pc_q35_machine_2_6_options(m); - x86mc->save_tsc_khz = false; - m->legacy_fw_cfg_order = 1; - compat_props_add(m->compat_props, hw_compat_2_5, hw_compat_2_5_len); - compat_props_add(m->compat_props, pc_compat_2_5, pc_compat_2_5_len); -} - -DEFINE_Q35_MACHINE(2, 5); - -static void pc_q35_machine_2_4_options(MachineClass *m) -{ - PCMachineClass *pcmc = PC_MACHINE_CLASS(m); - - pc_q35_machine_2_5_options(m); - m->hw_version = "2.4.0"; - pcmc->broken_reserved_end = true; - compat_props_add(m->compat_props, hw_compat_2_4, hw_compat_2_4_len); - compat_props_add(m->compat_props, pc_compat_2_4, pc_compat_2_4_len); -} - -DEFINE_Q35_MACHINE(2, 4); diff --git a/hw/i386/pc_sysfw.c b/hw/i386/pc_sysfw.c index 1eeb58a..1a12b63 100644 --- a/hw/i386/pc_sysfw.c +++ b/hw/i386/pc_sysfw.c @@ -37,6 +37,7 @@ #include "hw/block/flash.h" #include "system/kvm.h" #include "target/i386/sev.h" +#include "kvm/tdx.h" #define FLASH_SECTOR_SIZE 4096 @@ -219,7 +220,13 @@ void pc_system_firmware_init(PCMachineState *pcms, BlockBackend *pflash_blk[ARRAY_SIZE(pcms->flash)]; if (!pcmc->pci_enabled) { - x86_bios_rom_init(X86_MACHINE(pcms), "bios.bin", rom_memory, true); + /* + * If an IGVM file is specified then the firmware must be provided + * in the IGVM file. + */ + if (!X86_MACHINE(pcms)->igvm) { + x86_bios_rom_init(X86_MACHINE(pcms), "bios.bin", rom_memory, true); + } return; } @@ -239,8 +246,13 @@ void pc_system_firmware_init(PCMachineState *pcms, } if (!pflash_blk[0]) { - /* Machine property pflash0 not set, use ROM mode */ - x86_bios_rom_init(X86_MACHINE(pcms), "bios.bin", rom_memory, false); + /* + * Machine property pflash0 not set, use ROM mode unless using IGVM, + * in which case the firmware must be provided by the IGVM file. + */ + if (!X86_MACHINE(pcms)->igvm) { + x86_bios_rom_init(X86_MACHINE(pcms), "bios.bin", rom_memory, false); + } } else { if (kvm_enabled() && !kvm_readonly_mem_enabled()) { /* @@ -256,6 +268,20 @@ void pc_system_firmware_init(PCMachineState *pcms, } pc_system_flash_cleanup_unused(pcms); + + /* + * The user should not have specified any pflash devices when using IGVM + * to configure the guest. + */ + if (X86_MACHINE(pcms)->igvm) { + for (i = 0; i < ARRAY_SIZE(pcms->flash); i++) { + if (pcms->flash[i]) { + error_report("pflash devices cannot be configured when " + "using IGVM"); + exit(1); + } + } + } } void x86_firmware_configure(hwaddr gpa, void *ptr, int size) @@ -280,5 +306,11 @@ void x86_firmware_configure(hwaddr gpa, void *ptr, int size) } sev_encrypt_flash(gpa, ptr, size, &error_fatal); + } else if (is_tdx_vm()) { + ret = tdx_parse_tdvf(ptr, size); + if (ret) { + error_report("failed to parse TDVF for TDX VM"); + exit(1); + } } } diff --git a/hw/i386/sgx-stub.c b/hw/i386/sgx-stub.c index 38ff75e..d295e54 100644 --- a/hw/i386/sgx-stub.c +++ b/hw/i386/sgx-stub.c @@ -3,20 +3,20 @@ #include "monitor/hmp-target.h" #include "hw/i386/pc.h" #include "hw/i386/sgx-epc.h" +#include "qapi/qapi-commands-misc-i386.h" #include "qapi/error.h" -#include "qapi/qapi-commands-misc-target.h" void sgx_epc_build_srat(GArray *table_data) { } -SGXInfo *qmp_query_sgx(Error **errp) +SgxInfo *qmp_query_sgx(Error **errp) { error_setg(errp, "SGX support is not compiled in"); return NULL; } -SGXInfo *qmp_query_sgx_capabilities(Error **errp) +SgxInfo *qmp_query_sgx_capabilities(Error **errp) { error_setg(errp, "SGX support is not compiled in"); return NULL; diff --git a/hw/i386/sgx.c b/hw/i386/sgx.c index 5685c4f..e280154 100644 --- a/hw/i386/sgx.c +++ b/hw/i386/sgx.c @@ -19,7 +19,7 @@ #include "monitor/hmp-target.h" #include "qapi/error.h" #include "qemu/error-report.h" -#include "qapi/qapi-commands-misc-target.h" +#include "qapi/qapi-commands-misc-i386.h" #include "system/address-spaces.h" #include "system/hw_accel.h" #include "system/reset.h" @@ -84,10 +84,10 @@ static uint64_t sgx_calc_section_metric(uint64_t low, uint64_t high) ((high & MAKE_64BIT_MASK(0, 20)) << 32); } -static SGXEPCSectionList *sgx_calc_host_epc_sections(void) +static SgxEpcSectionList *sgx_calc_host_epc_sections(void) { - SGXEPCSectionList *head = NULL, **tail = &head; - SGXEPCSection *section; + SgxEpcSectionList *head = NULL, **tail = &head; + SgxEpcSection *section; uint32_t i, type; uint32_t eax, ebx, ecx, edx; uint32_t j = 0; @@ -104,7 +104,7 @@ static SGXEPCSectionList *sgx_calc_host_epc_sections(void) break; } - section = g_new0(SGXEPCSection, 1); + section = g_new0(SgxEpcSection, 1); section->node = j++; section->size = sgx_calc_section_metric(ecx, edx); QAPI_LIST_APPEND(tail, section); @@ -153,9 +153,9 @@ static void sgx_epc_reset(void *opaque) } } -SGXInfo *qmp_query_sgx_capabilities(Error **errp) +SgxInfo *qmp_query_sgx_capabilities(Error **errp) { - SGXInfo *info = NULL; + SgxInfo *info = NULL; uint32_t eax, ebx, ecx, edx; Error *local_err = NULL; @@ -166,7 +166,7 @@ SGXInfo *qmp_query_sgx_capabilities(Error **errp) return NULL; } - info = g_new0(SGXInfo, 1); + info = g_new0(SgxInfo, 1); host_cpuid(0x7, 0, &eax, &ebx, &ecx, &edx); info->sgx = ebx & (1U << 2) ? true : false; @@ -183,17 +183,17 @@ SGXInfo *qmp_query_sgx_capabilities(Error **errp) return info; } -static SGXEPCSectionList *sgx_get_epc_sections_list(void) +static SgxEpcSectionList *sgx_get_epc_sections_list(void) { GSList *device_list = sgx_epc_get_device_list(); - SGXEPCSectionList *head = NULL, **tail = &head; - SGXEPCSection *section; + SgxEpcSectionList *head = NULL, **tail = &head; + SgxEpcSection *section; for (; device_list; device_list = device_list->next) { DeviceState *dev = device_list->data; Object *obj = OBJECT(dev); - section = g_new0(SGXEPCSection, 1); + section = g_new0(SgxEpcSection, 1); section->node = object_property_get_uint(obj, SGX_EPC_NUMA_NODE_PROP, &error_abort); section->size = object_property_get_uint(obj, SGX_EPC_SIZE_PROP, @@ -205,9 +205,9 @@ static SGXEPCSectionList *sgx_get_epc_sections_list(void) return head; } -SGXInfo *qmp_query_sgx(Error **errp) +SgxInfo *qmp_query_sgx(Error **errp) { - SGXInfo *info = NULL; + SgxInfo *info = NULL; X86MachineState *x86ms; PCMachineState *pcms = (PCMachineState *)object_dynamic_cast(qdev_get_machine(), @@ -223,7 +223,7 @@ SGXInfo *qmp_query_sgx(Error **errp) return NULL; } - info = g_new0(SGXInfo, 1); + info = g_new0(SgxInfo, 1); info->sgx = true; info->sgx1 = true; @@ -237,8 +237,8 @@ SGXInfo *qmp_query_sgx(Error **errp) void hmp_info_sgx(Monitor *mon, const QDict *qdict) { Error *err = NULL; - SGXEPCSectionList *section_list, *section; - g_autoptr(SGXInfo) info = qmp_query_sgx(&err); + SgxEpcSectionList *section_list, *section; + g_autoptr(SgxInfo) info = qmp_query_sgx(&err); uint64_t size = 0; if (err) { diff --git a/hw/i386/tdvf-hob.c b/hw/i386/tdvf-hob.c new file mode 100644 index 0000000..782b3d1 --- /dev/null +++ b/hw/i386/tdvf-hob.c @@ -0,0 +1,130 @@ +/* + * Copyright (c) 2025 Intel Corporation + * Author: Isaku Yamahata <isaku.yamahata at gmail.com> + * <isaku.yamahata at intel.com> + * Xiaoyao Li <xiaoyao.li@intel.com> + * + * SPDX-License-Identifier: GPL-2.0-or-later + */ + +#include "qemu/osdep.h" +#include "qemu/error-report.h" +#include "standard-headers/uefi/uefi.h" +#include "hw/pci/pcie_host.h" +#include "tdvf-hob.h" + +typedef struct TdvfHob { + hwaddr hob_addr; + void *ptr; + int size; + + /* working area */ + void *current; + void *end; +} TdvfHob; + +static uint64_t tdvf_current_guest_addr(const TdvfHob *hob) +{ + return hob->hob_addr + (hob->current - hob->ptr); +} + +static void tdvf_align(TdvfHob *hob, size_t align) +{ + hob->current = QEMU_ALIGN_PTR_UP(hob->current, align); +} + +static void *tdvf_get_area(TdvfHob *hob, uint64_t size) +{ + void *ret; + + if (hob->current + size > hob->end) { + error_report("TD_HOB overrun, size = 0x%" PRIx64, size); + exit(1); + } + + ret = hob->current; + hob->current += size; + tdvf_align(hob, 8); + return ret; +} + +static void tdvf_hob_add_memory_resources(TdxGuest *tdx, TdvfHob *hob) +{ + EFI_HOB_RESOURCE_DESCRIPTOR *region; + EFI_RESOURCE_ATTRIBUTE_TYPE attr; + EFI_RESOURCE_TYPE resource_type; + + TdxRamEntry *e; + int i; + + for (i = 0; i < tdx->nr_ram_entries; i++) { + e = &tdx->ram_entries[i]; + + if (e->type == TDX_RAM_UNACCEPTED) { + resource_type = EFI_RESOURCE_MEMORY_UNACCEPTED; + attr = EFI_RESOURCE_ATTRIBUTE_TDVF_UNACCEPTED; + } else if (e->type == TDX_RAM_ADDED) { + resource_type = EFI_RESOURCE_SYSTEM_MEMORY; + attr = EFI_RESOURCE_ATTRIBUTE_TDVF_PRIVATE; + } else { + error_report("unknown TDX_RAM_ENTRY type %d", e->type); + exit(1); + } + + region = tdvf_get_area(hob, sizeof(*region)); + *region = (EFI_HOB_RESOURCE_DESCRIPTOR) { + .Header = { + .HobType = EFI_HOB_TYPE_RESOURCE_DESCRIPTOR, + .HobLength = cpu_to_le16(sizeof(*region)), + .Reserved = cpu_to_le32(0), + }, + .Owner = EFI_HOB_OWNER_ZERO, + .ResourceType = cpu_to_le32(resource_type), + .ResourceAttribute = cpu_to_le32(attr), + .PhysicalStart = cpu_to_le64(e->address), + .ResourceLength = cpu_to_le64(e->length), + }; + } +} + +void tdvf_hob_create(TdxGuest *tdx, TdxFirmwareEntry *td_hob) +{ + TdvfHob hob = { + .hob_addr = td_hob->address, + .size = td_hob->size, + .ptr = td_hob->mem_ptr, + + .current = td_hob->mem_ptr, + .end = td_hob->mem_ptr + td_hob->size, + }; + + EFI_HOB_GENERIC_HEADER *last_hob; + EFI_HOB_HANDOFF_INFO_TABLE *hit; + + /* Note, Efi{Free}Memory{Bottom,Top} are ignored, leave 'em zeroed. */ + hit = tdvf_get_area(&hob, sizeof(*hit)); + *hit = (EFI_HOB_HANDOFF_INFO_TABLE) { + .Header = { + .HobType = EFI_HOB_TYPE_HANDOFF, + .HobLength = cpu_to_le16(sizeof(*hit)), + .Reserved = cpu_to_le32(0), + }, + .Version = cpu_to_le32(EFI_HOB_HANDOFF_TABLE_VERSION), + .BootMode = cpu_to_le32(0), + .EfiMemoryTop = cpu_to_le64(0), + .EfiMemoryBottom = cpu_to_le64(0), + .EfiFreeMemoryTop = cpu_to_le64(0), + .EfiFreeMemoryBottom = cpu_to_le64(0), + .EfiEndOfHobList = cpu_to_le64(0), /* initialized later */ + }; + + tdvf_hob_add_memory_resources(tdx, &hob); + + last_hob = tdvf_get_area(&hob, sizeof(*last_hob)); + *last_hob = (EFI_HOB_GENERIC_HEADER) { + .HobType = EFI_HOB_TYPE_END_OF_HOB_LIST, + .HobLength = cpu_to_le16(sizeof(*last_hob)), + .Reserved = cpu_to_le32(0), + }; + hit->EfiEndOfHobList = tdvf_current_guest_addr(&hob); +} diff --git a/hw/i386/tdvf-hob.h b/hw/i386/tdvf-hob.h new file mode 100644 index 0000000..4fc6a37 --- /dev/null +++ b/hw/i386/tdvf-hob.h @@ -0,0 +1,26 @@ +/* SPDX-License-Identifier: GPL-2.0-or-later */ + +#ifndef HW_I386_TD_HOB_H +#define HW_I386_TD_HOB_H + +#include "hw/i386/tdvf.h" +#include "target/i386/kvm/tdx.h" + +void tdvf_hob_create(TdxGuest *tdx, TdxFirmwareEntry *td_hob); + +#define EFI_RESOURCE_ATTRIBUTE_TDVF_PRIVATE \ + (EFI_RESOURCE_ATTRIBUTE_PRESENT | \ + EFI_RESOURCE_ATTRIBUTE_INITIALIZED | \ + EFI_RESOURCE_ATTRIBUTE_TESTED) + +#define EFI_RESOURCE_ATTRIBUTE_TDVF_UNACCEPTED \ + (EFI_RESOURCE_ATTRIBUTE_PRESENT | \ + EFI_RESOURCE_ATTRIBUTE_INITIALIZED | \ + EFI_RESOURCE_ATTRIBUTE_TESTED) + +#define EFI_RESOURCE_ATTRIBUTE_TDVF_MMIO \ + (EFI_RESOURCE_ATTRIBUTE_PRESENT | \ + EFI_RESOURCE_ATTRIBUTE_INITIALIZED | \ + EFI_RESOURCE_ATTRIBUTE_UNCACHEABLE) + +#endif diff --git a/hw/i386/tdvf.c b/hw/i386/tdvf.c new file mode 100644 index 0000000..645d9d1 --- /dev/null +++ b/hw/i386/tdvf.c @@ -0,0 +1,189 @@ +/* + * Copyright (c) 2025 Intel Corporation + * Author: Isaku Yamahata <isaku.yamahata at gmail.com> + * <isaku.yamahata at intel.com> + * Xiaoyao Li <xiaoyao.li@intel.com> + * + * SPDX-License-Identifier: GPL-2.0-or-later + */ + +#include "qemu/osdep.h" +#include "qemu/error-report.h" + +#include "hw/i386/pc.h" +#include "hw/i386/tdvf.h" +#include "system/kvm.h" + +#define TDX_METADATA_OFFSET_GUID "e47a6535-984a-4798-865e-4685a7bf8ec2" +#define TDX_METADATA_VERSION 1 +#define TDVF_SIGNATURE 0x46564454 /* TDVF as little endian */ +#define TDVF_ALIGNMENT 4096 + +/* + * the raw structs read from TDVF keeps the name convention in + * TDVF Design Guide spec. + */ +typedef struct { + uint32_t DataOffset; + uint32_t RawDataSize; + uint64_t MemoryAddress; + uint64_t MemoryDataSize; + uint32_t Type; + uint32_t Attributes; +} TdvfSectionEntry; + +typedef struct { + uint32_t Signature; + uint32_t Length; + uint32_t Version; + uint32_t NumberOfSectionEntries; + TdvfSectionEntry SectionEntries[]; +} TdvfMetadata; + +struct tdx_metadata_offset { + uint32_t offset; +}; + +static TdvfMetadata *tdvf_get_metadata(void *flash_ptr, int size) +{ + TdvfMetadata *metadata; + uint32_t offset = 0; + uint8_t *data; + + if ((uint32_t) size != size) { + return NULL; + } + + if (pc_system_ovmf_table_find(TDX_METADATA_OFFSET_GUID, &data, NULL)) { + offset = size - le32_to_cpu(((struct tdx_metadata_offset *)data)->offset); + + if (offset + sizeof(*metadata) > size) { + return NULL; + } + } else { + error_report("Cannot find TDX_METADATA_OFFSET_GUID"); + return NULL; + } + + metadata = flash_ptr + offset; + + /* Finally, verify the signature to determine if this is a TDVF image. */ + metadata->Signature = le32_to_cpu(metadata->Signature); + if (metadata->Signature != TDVF_SIGNATURE) { + error_report("Invalid TDVF signature in metadata!"); + return NULL; + } + + /* Sanity check that the TDVF doesn't overlap its own metadata. */ + metadata->Length = le32_to_cpu(metadata->Length); + if (offset + metadata->Length > size) { + return NULL; + } + + /* Only version 1 is supported/defined. */ + metadata->Version = le32_to_cpu(metadata->Version); + if (metadata->Version != TDX_METADATA_VERSION) { + return NULL; + } + + return metadata; +} + +static int tdvf_parse_and_check_section_entry(const TdvfSectionEntry *src, + TdxFirmwareEntry *entry) +{ + entry->data_offset = le32_to_cpu(src->DataOffset); + entry->data_len = le32_to_cpu(src->RawDataSize); + entry->address = le64_to_cpu(src->MemoryAddress); + entry->size = le64_to_cpu(src->MemoryDataSize); + entry->type = le32_to_cpu(src->Type); + entry->attributes = le32_to_cpu(src->Attributes); + + /* sanity check */ + if (entry->size < entry->data_len) { + error_report("Broken metadata RawDataSize 0x%x MemoryDataSize 0x%"PRIx64, + entry->data_len, entry->size); + return -1; + } + if (!QEMU_IS_ALIGNED(entry->address, TDVF_ALIGNMENT)) { + error_report("MemoryAddress 0x%"PRIx64" not page aligned", entry->address); + return -1; + } + if (!QEMU_IS_ALIGNED(entry->size, TDVF_ALIGNMENT)) { + error_report("MemoryDataSize 0x%"PRIx64" not page aligned", entry->size); + return -1; + } + + switch (entry->type) { + case TDVF_SECTION_TYPE_BFV: + case TDVF_SECTION_TYPE_CFV: + /* The sections that must be copied from firmware image to TD memory */ + if (entry->data_len == 0) { + error_report("%d section with RawDataSize == 0", entry->type); + return -1; + } + break; + case TDVF_SECTION_TYPE_TD_HOB: + case TDVF_SECTION_TYPE_TEMP_MEM: + /* The sections that no need to be copied from firmware image */ + if (entry->data_len != 0) { + error_report("%d section with RawDataSize 0x%x != 0", + entry->type, entry->data_len); + return -1; + } + break; + default: + error_report("TDVF contains unsupported section type %d", entry->type); + return -1; + } + + return 0; +} + +int tdvf_parse_metadata(TdxFirmware *fw, void *flash_ptr, int size) +{ + g_autofree TdvfSectionEntry *sections = NULL; + TdvfMetadata *metadata; + ssize_t entries_size; + int i; + + metadata = tdvf_get_metadata(flash_ptr, size); + if (!metadata) { + return -EINVAL; + } + + /* load and parse metadata entries */ + fw->nr_entries = le32_to_cpu(metadata->NumberOfSectionEntries); + if (fw->nr_entries < 2) { + error_report("Invalid number of fw entries (%u) in TDVF Metadata", + fw->nr_entries); + return -EINVAL; + } + + entries_size = fw->nr_entries * sizeof(TdvfSectionEntry); + if (metadata->Length != sizeof(*metadata) + entries_size) { + error_report("TDVF metadata len (0x%x) mismatch, expected (0x%x)", + metadata->Length, + (uint32_t)(sizeof(*metadata) + entries_size)); + return -EINVAL; + } + + fw->entries = g_new(TdxFirmwareEntry, fw->nr_entries); + sections = g_new(TdvfSectionEntry, fw->nr_entries); + + memcpy(sections, (void *)metadata + sizeof(*metadata), entries_size); + + for (i = 0; i < fw->nr_entries; i++) { + if (tdvf_parse_and_check_section_entry(§ions[i], &fw->entries[i])) { + goto err; + } + } + + fw->mem_ptr = flash_ptr; + return 0; + +err: + fw->entries = 0; + g_free(fw->entries); + return -EINVAL; +} diff --git a/hw/i386/x86-common.c b/hw/i386/x86-common.c index 1b0671c..b1b5f11 100644 --- a/hw/i386/x86-common.c +++ b/hw/i386/x86-common.c @@ -44,6 +44,7 @@ #include "standard-headers/asm-x86/bootparam.h" #include CONFIG_DEVICES #include "kvm/kvm_i386.h" +#include "kvm/tdx.h" #ifdef CONFIG_XEN_EMU #include "hw/xen/xen.h" @@ -1035,11 +1036,14 @@ void x86_bios_rom_init(X86MachineState *x86ms, const char *default_firmware, if (machine_require_guest_memfd(MACHINE(x86ms))) { memory_region_init_ram_guest_memfd(&x86ms->bios, NULL, "pc.bios", bios_size, &error_fatal); + if (is_tdx_vm()) { + tdx_set_tdvf_region(&x86ms->bios); + } } else { memory_region_init_ram(&x86ms->bios, NULL, "pc.bios", bios_size, &error_fatal); } - if (sev_enabled()) { + if (sev_enabled() || is_tdx_vm()) { /* * The concept of a "reset" simply doesn't exist for * confidential computing guests, we have to destroy and diff --git a/hw/i386/x86.c b/hw/i386/x86.c index e2d0409..f80533d 100644 --- a/hw/i386/x86.c +++ b/hw/i386/x86.c @@ -382,7 +382,6 @@ static void x86_machine_class_init(ObjectClass *oc, const void *data) mc->get_default_cpu_node_id = x86_get_default_cpu_node_id; mc->possible_cpu_arch_ids = x86_possible_cpu_arch_ids; mc->kvm_type = x86_kvm_type; - x86mc->save_tsc_khz = true; x86mc->fwcfg_dma_enabled = true; nc->nmi_monitor_handler = x86_nmi; diff --git a/hw/input/virtio-input.c b/hw/input/virtio-input.c index 1818cbd..a3f554f 100644 --- a/hw/input/virtio-input.c +++ b/hw/input/virtio-input.c @@ -189,7 +189,7 @@ static uint64_t virtio_input_get_features(VirtIODevice *vdev, uint64_t f, return f; } -static void virtio_input_set_status(VirtIODevice *vdev, uint8_t val) +static int virtio_input_set_status(VirtIODevice *vdev, uint8_t val) { VirtIOInputClass *vic = VIRTIO_INPUT_GET_CLASS(vdev); VirtIOInput *vinput = VIRTIO_INPUT(vdev); @@ -202,6 +202,7 @@ static void virtio_input_set_status(VirtIODevice *vdev, uint8_t val) } } } + return 0; } static void virtio_input_reset(VirtIODevice *vdev) diff --git a/hw/intc/arm_gic.c b/hw/intc/arm_gic.c index d18bef4..899f133 100644 --- a/hw/intc/arm_gic.c +++ b/hw/intc/arm_gic.c @@ -59,7 +59,7 @@ static const uint8_t gic_id_gicv2[] = { static inline int gic_get_current_cpu(GICState *s) { if (!qtest_enabled() && s->num_cpu > 1) { - return current_cpu->cpu_index; + return current_cpu->cpu_index - s->first_cpu_index; } return 0; } diff --git a/hw/intc/arm_gic_common.c b/hw/intc/arm_gic_common.c index 0f0c48d..ed5be05 100644 --- a/hw/intc/arm_gic_common.c +++ b/hw/intc/arm_gic_common.c @@ -350,6 +350,7 @@ static void arm_gic_common_linux_init(ARMLinuxBootIf *obj, static const Property arm_gic_common_properties[] = { DEFINE_PROP_UINT32("num-cpu", GICState, num_cpu, 1), + DEFINE_PROP_UINT32("first-cpu-index", GICState, first_cpu_index, 0), DEFINE_PROP_UINT32("num-irq", GICState, num_irq, 32), /* Revision can be 1 or 2 for GIC architecture specification * versions 1 or 2, or 0 to indicate the legacy 11MPCore GIC. diff --git a/hw/intc/arm_gicv3_common.c b/hw/intc/arm_gicv3_common.c index 1cee681..e438d8c 100644 --- a/hw/intc/arm_gicv3_common.c +++ b/hw/intc/arm_gicv3_common.c @@ -612,6 +612,7 @@ static const Property arm_gicv3_common_properties[] = { DEFINE_PROP_BOOL("has-lpi", GICv3State, lpi_enable, 0), DEFINE_PROP_BOOL("has-nmi", GICv3State, nmi_support, 0), DEFINE_PROP_BOOL("has-security-extensions", GICv3State, security_extn, 0), + DEFINE_PROP_UINT32("maintenance-interrupt-id", GICv3State, maint_irq, 0), /* * Compatibility property: force 8 bits of physical priority, even * if the CPU being emulated should have fewer. diff --git a/hw/intc/arm_gicv3_kvm.c b/hw/intc/arm_gicv3_kvm.c index 3be3bf6..8ed88e7 100644 --- a/hw/intc/arm_gicv3_kvm.c +++ b/hw/intc/arm_gicv3_kvm.c @@ -22,6 +22,7 @@ #include "qemu/osdep.h" #include "qapi/error.h" #include "hw/intc/arm_gicv3_common.h" +#include "hw/arm/virt.h" #include "qemu/error-report.h" #include "qemu/module.h" #include "system/kvm.h" @@ -825,6 +826,34 @@ static void kvm_arm_gicv3_realize(DeviceState *dev, Error **errp) return; } + if (s->maint_irq) { + Error *kvm_nv_migration_blocker = NULL; + int ret; + + error_setg(&kvm_nv_migration_blocker, + "Live migration disabled because KVM nested virt is enabled"); + if (migrate_add_blocker(&kvm_nv_migration_blocker, errp)) { + error_free(kvm_nv_migration_blocker); + return; + } + + ret = kvm_device_check_attr(s->dev_fd, + KVM_DEV_ARM_VGIC_GRP_MAINT_IRQ, 0); + if (!ret) { + error_setg_errno(errp, errno, + "VGICv3 setting maintenance IRQ is not " + "supported by this host kernel"); + return; + } + + ret = kvm_device_access(s->dev_fd, KVM_DEV_ARM_VGIC_GRP_MAINT_IRQ, 0, + &s->maint_irq, true, errp); + if (ret) { + error_setg_errno(errp, errno, "Failed to set VGIC maintenance IRQ"); + return; + } + } + multiple_redist_region_allowed = kvm_device_check_attr(s->dev_fd, KVM_DEV_ARM_VGIC_GRP_ADDR, KVM_VGIC_V3_ADDR_TYPE_REDIST_REGION); diff --git a/hw/intc/armv7m_nvic.c b/hw/intc/armv7m_nvic.c index 83ff74f..7c78961 100644 --- a/hw/intc/armv7m_nvic.c +++ b/hw/intc/armv7m_nvic.c @@ -988,6 +988,7 @@ static void nvic_nmi_trigger(void *opaque, int n, int level) static uint32_t nvic_readl(NVICState *s, uint32_t offset, MemTxAttrs attrs) { ARMCPU *cpu = s->cpu; + ARMISARegisters *isar = &cpu->isar; uint32_t val; switch (offset) { @@ -1263,74 +1264,74 @@ static uint32_t nvic_readl(NVICState *s, uint32_t offset, MemTxAttrs attrs) if (!arm_feature(&cpu->env, ARM_FEATURE_M_MAIN)) { goto bad_offset; } - return cpu->isar.id_pfr0; + return GET_IDREG(isar, ID_PFR0); case 0xd44: /* PFR1. */ if (!arm_feature(&cpu->env, ARM_FEATURE_M_MAIN)) { goto bad_offset; } - return cpu->isar.id_pfr1; + return GET_IDREG(isar, ID_PFR1); case 0xd48: /* DFR0. */ if (!arm_feature(&cpu->env, ARM_FEATURE_M_MAIN)) { goto bad_offset; } - return cpu->isar.id_dfr0; + return GET_IDREG(isar, ID_DFR0); case 0xd4c: /* AFR0. */ if (!arm_feature(&cpu->env, ARM_FEATURE_M_MAIN)) { goto bad_offset; } - return cpu->id_afr0; + return GET_IDREG(isar, ID_AFR0); case 0xd50: /* MMFR0. */ if (!arm_feature(&cpu->env, ARM_FEATURE_M_MAIN)) { goto bad_offset; } - return cpu->isar.id_mmfr0; + return GET_IDREG(isar, ID_MMFR0); case 0xd54: /* MMFR1. */ if (!arm_feature(&cpu->env, ARM_FEATURE_M_MAIN)) { goto bad_offset; } - return cpu->isar.id_mmfr1; + return GET_IDREG(isar, ID_MMFR1); case 0xd58: /* MMFR2. */ if (!arm_feature(&cpu->env, ARM_FEATURE_M_MAIN)) { goto bad_offset; } - return cpu->isar.id_mmfr2; + return GET_IDREG(isar, ID_MMFR2); case 0xd5c: /* MMFR3. */ if (!arm_feature(&cpu->env, ARM_FEATURE_M_MAIN)) { goto bad_offset; } - return cpu->isar.id_mmfr3; + return GET_IDREG(isar, ID_MMFR3); case 0xd60: /* ISAR0. */ if (!arm_feature(&cpu->env, ARM_FEATURE_M_MAIN)) { goto bad_offset; } - return cpu->isar.id_isar0; + return GET_IDREG(&cpu->isar, ID_ISAR0); case 0xd64: /* ISAR1. */ if (!arm_feature(&cpu->env, ARM_FEATURE_M_MAIN)) { goto bad_offset; } - return cpu->isar.id_isar1; + return GET_IDREG(&cpu->isar, ID_ISAR1); case 0xd68: /* ISAR2. */ if (!arm_feature(&cpu->env, ARM_FEATURE_M_MAIN)) { goto bad_offset; } - return cpu->isar.id_isar2; + return GET_IDREG(&cpu->isar, ID_ISAR2); case 0xd6c: /* ISAR3. */ if (!arm_feature(&cpu->env, ARM_FEATURE_M_MAIN)) { goto bad_offset; } - return cpu->isar.id_isar3; + return GET_IDREG(&cpu->isar, ID_ISAR3); case 0xd70: /* ISAR4. */ if (!arm_feature(&cpu->env, ARM_FEATURE_M_MAIN)) { goto bad_offset; } - return cpu->isar.id_isar4; + return GET_IDREG(&cpu->isar, ID_ISAR4); case 0xd74: /* ISAR5. */ if (!arm_feature(&cpu->env, ARM_FEATURE_M_MAIN)) { goto bad_offset; } - return cpu->isar.id_isar5; + return GET_IDREG(&cpu->isar, ID_ISAR5); case 0xd78: /* CLIDR */ - return cpu->clidr; + return GET_IDREG(&cpu->isar, CLIDR); case 0xd7c: /* CTR */ return cpu->ctr; case 0xd80: /* CSSIDR */ diff --git a/hw/intc/aspeed_intc.c b/hw/intc/aspeed_intc.c index 33fcbe7..5cd786d 100644 --- a/hw/intc/aspeed_intc.c +++ b/hw/intc/aspeed_intc.c @@ -737,6 +737,7 @@ static const MemoryRegionOps aspeed_intc_ops = { .read = aspeed_intc_read, .write = aspeed_intc_write, .endianness = DEVICE_LITTLE_ENDIAN, + .impl.min_access_size = 4, .valid = { .min_access_size = 4, .max_access_size = 4, @@ -747,6 +748,7 @@ static const MemoryRegionOps aspeed_intcio_ops = { .read = aspeed_intcio_read, .write = aspeed_intcio_write, .endianness = DEVICE_LITTLE_ENDIAN, + .impl.min_access_size = 4, .valid = { .min_access_size = 4, .max_access_size = 4, @@ -757,6 +759,7 @@ static const MemoryRegionOps aspeed_ssp_intc_ops = { .read = aspeed_intc_read, .write = aspeed_ssp_intc_write, .endianness = DEVICE_LITTLE_ENDIAN, + .impl.min_access_size = 4, .valid = { .min_access_size = 4, .max_access_size = 4, @@ -767,6 +770,7 @@ static const MemoryRegionOps aspeed_ssp_intcio_ops = { .read = aspeed_intcio_read, .write = aspeed_ssp_intcio_write, .endianness = DEVICE_LITTLE_ENDIAN, + .impl.min_access_size = 4, .valid = { .min_access_size = 4, .max_access_size = 4, @@ -777,6 +781,7 @@ static const MemoryRegionOps aspeed_tsp_intc_ops = { .read = aspeed_intc_read, .write = aspeed_tsp_intc_write, .endianness = DEVICE_LITTLE_ENDIAN, + .impl.min_access_size = 4, .valid = { .min_access_size = 4, .max_access_size = 4, @@ -787,6 +792,7 @@ static const MemoryRegionOps aspeed_tsp_intcio_ops = { .read = aspeed_intcio_read, .write = aspeed_tsp_intcio_write, .endianness = DEVICE_LITTLE_ENDIAN, + .impl.min_access_size = 4, .valid = { .min_access_size = 4, .max_access_size = 4, @@ -995,7 +1001,8 @@ static AspeedINTCIRQ aspeed_2700ssp_intcio_irqs[ASPEED_INTC_MAX_INPINS] = { {5, 5, 1, R_SSPINT165_EN, R_SSPINT165_STATUS}, }; -static void aspeed_2700ssp_intcio_class_init(ObjectClass *klass, const void *data) +static void aspeed_2700ssp_intcio_class_init(ObjectClass *klass, + const void *data) { DeviceClass *dc = DEVICE_CLASS(klass); AspeedINTCClass *aic = ASPEED_INTC_CLASS(klass); @@ -1063,7 +1070,8 @@ static AspeedINTCIRQ aspeed_2700tsp_intcio_irqs[ASPEED_INTC_MAX_INPINS] = { {5, 5, 1, R_TSPINT165_EN, R_TSPINT165_STATUS}, }; -static void aspeed_2700tsp_intcio_class_init(ObjectClass *klass, const void *data) +static void aspeed_2700tsp_intcio_class_init(ObjectClass *klass, + const void *data) { DeviceClass *dc = DEVICE_CLASS(klass); AspeedINTCClass *aic = ASPEED_INTC_CLASS(klass); diff --git a/hw/intc/loongarch_extioi.c b/hw/intc/loongarch_extioi.c index 7c38c4c..3e9c88d 100644 --- a/hw/intc/loongarch_extioi.c +++ b/hw/intc/loongarch_extioi.c @@ -12,6 +12,7 @@ #include "hw/irq.h" #include "hw/loongarch/virt.h" #include "system/address-spaces.h" +#include "system/kvm.h" #include "hw/intc/loongarch_extioi.h" #include "trace.h" @@ -351,30 +352,29 @@ static void loongarch_extioi_realize(DeviceState *dev, Error **errp) return; } - for (i = 0; i < EXTIOI_IRQS; i++) { - sysbus_init_irq(sbd, &s->irq[i]); - } - - qdev_init_gpio_in(dev, extioi_setirq, EXTIOI_IRQS); - memory_region_init_io(&s->extioi_system_mem, OBJECT(s), &extioi_ops, - s, "extioi_system_mem", 0x900); - sysbus_init_mmio(sbd, &s->extioi_system_mem); - if (s->features & BIT(EXTIOI_HAS_VIRT_EXTENSION)) { - memory_region_init_io(&s->virt_extend, OBJECT(s), &extioi_virt_ops, - s, "extioi_virt", EXTIOI_VIRT_SIZE); - sysbus_init_mmio(sbd, &s->virt_extend); s->features |= EXTIOI_VIRT_HAS_FEATURES; } else { s->status |= BIT(EXTIOI_ENABLE); } -} -static void loongarch_extioi_unrealize(DeviceState *dev) -{ - LoongArchExtIOICommonState *s = LOONGARCH_EXTIOI_COMMON(dev); + if (kvm_irqchip_in_kernel()) { + kvm_extioi_realize(dev, errp); + } else { + for (i = 0; i < EXTIOI_IRQS; i++) { + sysbus_init_irq(sbd, &s->irq[i]); + } - g_free(s->cpu); + qdev_init_gpio_in(dev, extioi_setirq, EXTIOI_IRQS); + memory_region_init_io(&s->extioi_system_mem, OBJECT(s), &extioi_ops, + s, "extioi_system_mem", 0x900); + sysbus_init_mmio(sbd, &s->extioi_system_mem); + if (s->features & BIT(EXTIOI_HAS_VIRT_EXTENSION)) { + memory_region_init_io(&s->virt_extend, OBJECT(s), &extioi_virt_ops, + s, "extioi_virt", EXTIOI_VIRT_SIZE); + sysbus_init_mmio(sbd, &s->virt_extend); + } + } } static void loongarch_extioi_reset_hold(Object *obj, ResetType type) @@ -384,6 +384,19 @@ static void loongarch_extioi_reset_hold(Object *obj, ResetType type) if (lec->parent_phases.hold) { lec->parent_phases.hold(obj, type); } + + if (kvm_irqchip_in_kernel()) { + kvm_extioi_put(obj, 0); + } +} + +static int vmstate_extioi_pre_save(void *opaque) +{ + if (kvm_irqchip_in_kernel()) { + return kvm_extioi_get(opaque); + } + + return 0; } static int vmstate_extioi_post_load(void *opaque, int version_id) @@ -391,6 +404,10 @@ static int vmstate_extioi_post_load(void *opaque, int version_id) LoongArchExtIOICommonState *s = LOONGARCH_EXTIOI_COMMON(opaque); int i, start_irq; + if (kvm_irqchip_in_kernel()) { + return kvm_extioi_put(opaque, version_id); + } + for (i = 0; i < (EXTIOI_IRQS / 4); i++) { start_irq = i * 4; extioi_update_sw_coremap(s, start_irq, s->coremap[i], false); @@ -412,10 +429,9 @@ static void loongarch_extioi_class_init(ObjectClass *klass, const void *data) device_class_set_parent_realize(dc, loongarch_extioi_realize, &lec->parent_realize); - device_class_set_parent_unrealize(dc, loongarch_extioi_unrealize, - &lec->parent_unrealize); resettable_class_set_parent_phases(rc, NULL, loongarch_extioi_reset_hold, NULL, &lec->parent_phases); + lecc->pre_save = vmstate_extioi_pre_save; lecc->post_load = vmstate_extioi_post_load; } diff --git a/hw/intc/loongarch_extioi_common.c b/hw/intc/loongarch_extioi_common.c index 4a904b3..ba03383 100644 --- a/hw/intc/loongarch_extioi_common.c +++ b/hw/intc/loongarch_extioi_common.c @@ -108,6 +108,13 @@ static void loongarch_extioi_common_realize(DeviceState *dev, Error **errp) } } +static void loongarch_extioi_common_unrealize(DeviceState *dev) +{ + LoongArchExtIOICommonState *s = LOONGARCH_EXTIOI_COMMON(dev); + + g_free(s->cpu); +} + static void loongarch_extioi_common_reset_hold(Object *obj, ResetType type) { LoongArchExtIOICommonClass *lecc = LOONGARCH_EXTIOI_COMMON_GET_CLASS(obj); @@ -221,6 +228,8 @@ static void loongarch_extioi_common_class_init(ObjectClass *klass, device_class_set_parent_realize(dc, loongarch_extioi_common_realize, &lecc->parent_realize); + device_class_set_parent_unrealize(dc, loongarch_extioi_common_unrealize, + &lecc->parent_unrealize); resettable_class_set_parent_phases(rc, NULL, loongarch_extioi_common_reset_hold, NULL, &lecc->parent_phases); diff --git a/hw/intc/loongarch_extioi_kvm.c b/hw/intc/loongarch_extioi_kvm.c new file mode 100644 index 0000000..aa2e8c7 --- /dev/null +++ b/hw/intc/loongarch_extioi_kvm.c @@ -0,0 +1,139 @@ +/* SPDX-License-Identifier: GPL-2.0-or-later */ +/* + * LoongArch EXTIOI interrupt kvm support + * + * Copyright (C) 2025 Loongson Technology Corporation Limited + */ + +#include "qemu/osdep.h" +#include "hw/intc/loongarch_extioi.h" +#include "linux/kvm.h" +#include "qapi/error.h" +#include "system/kvm.h" + +static void kvm_extioi_access_reg(int fd, uint64_t addr, void *val, bool write) +{ + kvm_device_access(fd, KVM_DEV_LOONGARCH_EXTIOI_GRP_REGS, + addr, val, write, &error_abort); +} + +static void kvm_extioi_access_sw_state(int fd, uint64_t addr, + void *val, bool write) +{ + kvm_device_access(fd, KVM_DEV_LOONGARCH_EXTIOI_GRP_SW_STATUS, + addr, val, write, &error_abort); +} + +static void kvm_extioi_access_sw_status(void *opaque, bool write) +{ + LoongArchExtIOICommonState *lecs = LOONGARCH_EXTIOI_COMMON(opaque); + LoongArchExtIOIState *les = LOONGARCH_EXTIOI(opaque); + int addr; + + addr = KVM_DEV_LOONGARCH_EXTIOI_SW_STATUS_STATE; + kvm_extioi_access_sw_state(les->dev_fd, addr, &lecs->status, write); +} + +static void kvm_extioi_access_regs(void *opaque, bool write) +{ + LoongArchExtIOICommonState *lecs = LOONGARCH_EXTIOI_COMMON(opaque); + LoongArchExtIOIState *les = LOONGARCH_EXTIOI(opaque); + int fd = les->dev_fd; + int addr, offset, cpu; + + for (addr = EXTIOI_NODETYPE_START; addr < EXTIOI_NODETYPE_END; addr += 4) { + offset = (addr - EXTIOI_NODETYPE_START) / 4; + kvm_extioi_access_reg(fd, addr, &lecs->nodetype[offset], write); + } + + for (addr = EXTIOI_IPMAP_START; addr < EXTIOI_IPMAP_END; addr += 4) { + offset = (addr - EXTIOI_IPMAP_START) / 4; + kvm_extioi_access_reg(fd, addr, &lecs->ipmap[offset], write); + } + + for (addr = EXTIOI_ENABLE_START; addr < EXTIOI_ENABLE_END; addr += 4) { + offset = (addr - EXTIOI_ENABLE_START) / 4; + kvm_extioi_access_reg(fd, addr, &lecs->enable[offset], write); + } + + for (addr = EXTIOI_BOUNCE_START; addr < EXTIOI_BOUNCE_END; addr += 4) { + offset = (addr - EXTIOI_BOUNCE_START) / 4; + kvm_extioi_access_reg(fd, addr, &lecs->bounce[offset], write); + } + + for (addr = EXTIOI_ISR_START; addr < EXTIOI_ISR_END; addr += 4) { + offset = (addr - EXTIOI_ISR_START) / 4; + kvm_extioi_access_reg(fd, addr, &lecs->isr[offset], write); + } + + for (addr = EXTIOI_COREMAP_START; addr < EXTIOI_COREMAP_END; addr += 4) { + offset = (addr - EXTIOI_COREMAP_START) / 4; + kvm_extioi_access_reg(fd, addr, &lecs->coremap[offset], write); + } + + for (cpu = 0; cpu < lecs->num_cpu; cpu++) { + for (addr = EXTIOI_COREISR_START; + addr < EXTIOI_COREISR_END; addr += 4) { + offset = (addr - EXTIOI_COREISR_START) / 4; + kvm_extioi_access_reg(fd, (cpu << 16) | addr, + &lecs->cpu[cpu].coreisr[offset], write); + } + } +} + +int kvm_extioi_get(void *opaque) +{ + kvm_extioi_access_regs(opaque, false); + kvm_extioi_access_sw_status(opaque, false); + return 0; +} + +int kvm_extioi_put(void *opaque, int version_id) +{ + LoongArchExtIOIState *les = LOONGARCH_EXTIOI(opaque); + int fd = les->dev_fd; + + if (fd == 0) { + return 0; + } + + kvm_extioi_access_regs(opaque, true); + kvm_extioi_access_sw_status(opaque, true); + kvm_device_access(fd, KVM_DEV_LOONGARCH_EXTIOI_GRP_CTRL, + KVM_DEV_LOONGARCH_EXTIOI_CTRL_LOAD_FINISHED, + NULL, true, &error_abort); + return 0; +} + +void kvm_extioi_realize(DeviceState *dev, Error **errp) +{ + LoongArchExtIOICommonState *lecs = LOONGARCH_EXTIOI_COMMON(dev); + LoongArchExtIOIState *les = LOONGARCH_EXTIOI(dev); + int ret; + + ret = kvm_create_device(kvm_state, KVM_DEV_TYPE_LOONGARCH_EIOINTC, false); + if (ret < 0) { + fprintf(stderr, "create KVM_LOONGARCH_EIOINTC failed: %s\n", + strerror(-ret)); + abort(); + } + + les->dev_fd = ret; + ret = kvm_device_access(les->dev_fd, KVM_DEV_LOONGARCH_EXTIOI_GRP_CTRL, + KVM_DEV_LOONGARCH_EXTIOI_CTRL_INIT_NUM_CPU, + &lecs->num_cpu, true, NULL); + if (ret < 0) { + fprintf(stderr, "KVM_LOONGARCH_EXTIOI_INIT_NUM_CPU failed: %s\n", + strerror(-ret)); + abort(); + } + + ret = kvm_device_access(les->dev_fd, KVM_DEV_LOONGARCH_EXTIOI_GRP_CTRL, + KVM_DEV_LOONGARCH_EXTIOI_CTRL_INIT_FEATURE, + &lecs->features, true, NULL); + if (ret < 0) { + fprintf(stderr, "KVM_LOONGARCH_EXTIOI_INIT_FEATURE failed: %s\n", + strerror(-ret)); + abort(); + } +} diff --git a/hw/intc/loongarch_ipi.c b/hw/intc/loongarch_ipi.c index 74372a2..fc8005c 100644 --- a/hw/intc/loongarch_ipi.c +++ b/hw/intc/loongarch_ipi.c @@ -11,6 +11,7 @@ #include "qapi/error.h" #include "hw/intc/loongarch_ipi.h" #include "hw/qdev-properties.h" +#include "system/kvm.h" #include "target/loongarch/cpu.h" static AddressSpace *get_iocsr_as(CPUState *cpu) @@ -91,6 +92,10 @@ static void loongarch_ipi_realize(DeviceState *dev, Error **errp) lics->cpu[i].ipi = lics; qdev_init_gpio_out(dev, &lics->cpu[i].irq, 1); } + + if (kvm_irqchip_in_kernel()) { + kvm_ipi_realize(dev, errp); + } } static void loongarch_ipi_reset_hold(Object *obj, ResetType type) @@ -117,6 +122,10 @@ static void loongarch_ipi_reset_hold(Object *obj, ResetType type) core->clear = 0; memset(core->buf, 0, sizeof(core->buf)); } + + if (kvm_irqchip_in_kernel()) { + kvm_ipi_put(obj, 0); + } } static void loongarch_ipi_cpu_plug(HotplugHandler *hotplug_dev, @@ -166,6 +175,24 @@ static void loongarch_ipi_cpu_unplug(HotplugHandler *hotplug_dev, core->cpu = NULL; } +static int loongarch_ipi_pre_save(void *opaque) +{ + if (kvm_irqchip_in_kernel()) { + return kvm_ipi_get(opaque); + } + + return 0; +} + +static int loongarch_ipi_post_load(void *opaque, int version_id) +{ + if (kvm_irqchip_in_kernel()) { + return kvm_ipi_put(opaque, version_id); + } + + return 0; +} + static void loongarch_ipi_class_init(ObjectClass *klass, const void *data) { LoongsonIPICommonClass *licc = LOONGSON_IPI_COMMON_CLASS(klass); @@ -182,6 +209,8 @@ static void loongarch_ipi_class_init(ObjectClass *klass, const void *data) licc->cpu_by_arch_id = loongarch_cpu_by_arch_id; hc->plug = loongarch_ipi_cpu_plug; hc->unplug = loongarch_ipi_cpu_unplug; + licc->pre_save = loongarch_ipi_pre_save; + licc->post_load = loongarch_ipi_post_load; } static const TypeInfo loongarch_ipi_types[] = { diff --git a/hw/intc/loongarch_ipi_kvm.c b/hw/intc/loongarch_ipi_kvm.c new file mode 100644 index 0000000..dd4c367 --- /dev/null +++ b/hw/intc/loongarch_ipi_kvm.c @@ -0,0 +1,90 @@ +/* SPDX-License-Identifier: GPL-2.0-or-later */ +/* + * LoongArch IPI interrupt KVM support + * + * Copyright (C) 2025 Loongson Technology Corporation Limited + */ + +#include "qemu/osdep.h" +#include "qapi/error.h" +#include "hw/intc/loongarch_ipi.h" +#include "system/kvm.h" +#include "target/loongarch/cpu.h" + +static void kvm_ipi_access_reg(int fd, uint64_t addr, uint32_t *val, bool write) +{ + kvm_device_access(fd, KVM_DEV_LOONGARCH_IPI_GRP_REGS, + addr, val, write, &error_abort); +} + +static void kvm_ipi_access_regs(void *opaque, bool write) +{ + LoongsonIPICommonState *ipi = (LoongsonIPICommonState *)opaque; + LoongarchIPIState *lis = LOONGARCH_IPI(opaque); + IPICore *core; + uint64_t attr; + int i, cpu_index, fd = lis->dev_fd; + + if (fd == 0) { + return; + } + + for (i = 0; i < ipi->num_cpu; i++) { + core = &ipi->cpu[i]; + if (core->cpu == NULL) { + continue; + } + cpu_index = i; + + attr = (cpu_index << 16) | CORE_STATUS_OFF; + kvm_ipi_access_reg(fd, attr, &core->status, write); + + attr = (cpu_index << 16) | CORE_EN_OFF; + kvm_ipi_access_reg(fd, attr, &core->en, write); + + attr = (cpu_index << 16) | CORE_SET_OFF; + kvm_ipi_access_reg(fd, attr, &core->set, write); + + attr = (cpu_index << 16) | CORE_CLEAR_OFF; + kvm_ipi_access_reg(fd, attr, &core->clear, write); + + attr = (cpu_index << 16) | CORE_BUF_20; + kvm_ipi_access_reg(fd, attr, &core->buf[0], write); + + attr = (cpu_index << 16) | CORE_BUF_28; + kvm_ipi_access_reg(fd, attr, &core->buf[2], write); + + attr = (cpu_index << 16) | CORE_BUF_30; + kvm_ipi_access_reg(fd, attr, &core->buf[4], write); + + attr = (cpu_index << 16) | CORE_BUF_38; + kvm_ipi_access_reg(fd, attr, &core->buf[6], write); + } +} + +int kvm_ipi_get(void *opaque) +{ + kvm_ipi_access_regs(opaque, false); + return 0; +} + +int kvm_ipi_put(void *opaque, int version_id) +{ + kvm_ipi_access_regs(opaque, true); + return 0; +} + +void kvm_ipi_realize(DeviceState *dev, Error **errp) +{ + LoongarchIPIState *lis = LOONGARCH_IPI(dev); + int ret; + + ret = kvm_create_device(kvm_state, KVM_DEV_TYPE_LOONGARCH_IPI, false); + if (ret < 0) { + fprintf(stderr, "IPI KVM_CREATE_DEVICE failed: %s\n", + strerror(-ret)); + abort(); + } + + lis->dev_fd = ret; +} diff --git a/hw/intc/loongarch_pch_msi.c b/hw/intc/loongarch_pch_msi.c index 06eb944..f6d1631 100644 --- a/hw/intc/loongarch_pch_msi.c +++ b/hw/intc/loongarch_pch_msi.c @@ -13,6 +13,7 @@ #include "hw/pci/msi.h" #include "hw/misc/unimp.h" #include "migration/vmstate.h" +#include "system/kvm.h" #include "trace.h" static uint64_t loongarch_msi_mem_read(void *opaque, hwaddr addr, unsigned size) @@ -26,6 +27,15 @@ static void loongarch_msi_mem_write(void *opaque, hwaddr addr, LoongArchPCHMSI *s = (LoongArchPCHMSI *)opaque; int irq_num; + if (kvm_irqchip_in_kernel()) { + MSIMessage msg; + + msg.address = addr; + msg.data = val; + kvm_irqchip_send_msi(kvm_state, msg); + return; + } + /* * vector number is irq number from upper extioi intc * need subtract irq base to get msi vector offset diff --git a/hw/intc/loongarch_pch_pic.c b/hw/intc/loongarch_pch_pic.c index 8340962..c4b242d 100644 --- a/hw/intc/loongarch_pch_pic.c +++ b/hw/intc/loongarch_pch_pic.c @@ -7,8 +7,10 @@ #include "qemu/osdep.h" #include "qemu/bitops.h" +#include "qemu/log.h" #include "hw/irq.h" #include "hw/intc/loongarch_pch_pic.h" +#include "system/kvm.h" #include "trace.h" #include "qapi/error.h" @@ -47,6 +49,11 @@ static void pch_pic_irq_handler(void *opaque, int irq, int level) assert(irq < s->irq_num); trace_loongarch_pch_pic_irq_handler(irq, level); + if (kvm_irqchip_in_kernel()) { + kvm_set_irq(kvm_state, irq, !!level); + return; + } + if (s->intedge & mask) { /* Edge triggered */ if (level) { @@ -71,285 +78,181 @@ static void pch_pic_irq_handler(void *opaque, int irq, int level) pch_pic_update_irq(s, mask, level); } -static uint64_t loongarch_pch_pic_low_readw(void *opaque, hwaddr addr, - unsigned size) +static uint64_t pch_pic_read(void *opaque, hwaddr addr, uint64_t field_mask) { LoongArchPICCommonState *s = LOONGARCH_PIC_COMMON(opaque); uint64_t val = 0; - uint32_t offset = addr & 0xfff; + uint32_t offset; - switch (offset) { - case PCH_PIC_INT_ID_LO: - val = PCH_PIC_INT_ID_VAL; + offset = addr & 7; + addr -= offset; + switch (addr) { + case PCH_PIC_INT_ID: + val = cpu_to_le64(s->id.data); break; - case PCH_PIC_INT_ID_HI: - /* - * With 7A1000 manual - * bit 0-15 pch irqchip version - * bit 16-31 irq number supported with pch irqchip - */ - val = deposit32(PCH_PIC_INT_ID_VER, 16, 16, s->irq_num - 1); + case PCH_PIC_INT_MASK: + val = s->int_mask; break; - case PCH_PIC_INT_MASK_LO: - val = (uint32_t)s->int_mask; + case PCH_PIC_INT_EDGE: + val = s->intedge; break; - case PCH_PIC_INT_MASK_HI: - val = s->int_mask >> 32; + case PCH_PIC_HTMSI_EN: + val = s->htmsi_en; break; - case PCH_PIC_INT_EDGE_LO: - val = (uint32_t)s->intedge; + case PCH_PIC_AUTO_CTRL0: + case PCH_PIC_AUTO_CTRL1: + /* PCH PIC connect to EXTIOI always, discard auto_ctrl access */ break; - case PCH_PIC_INT_EDGE_HI: - val = s->intedge >> 32; + case PCH_PIC_INT_STATUS: + val = s->intisr & (~s->int_mask); break; - case PCH_PIC_HTMSI_EN_LO: - val = (uint32_t)s->htmsi_en; + case PCH_PIC_INT_POL: + val = s->int_polarity; break; - case PCH_PIC_HTMSI_EN_HI: - val = s->htmsi_en >> 32; + case PCH_PIC_HTMSI_VEC ... PCH_PIC_HTMSI_VEC_END: + val = *(uint64_t *)(s->htmsi_vector + addr - PCH_PIC_HTMSI_VEC); break; - case PCH_PIC_AUTO_CTRL0_LO: - case PCH_PIC_AUTO_CTRL0_HI: - case PCH_PIC_AUTO_CTRL1_LO: - case PCH_PIC_AUTO_CTRL1_HI: + case PCH_PIC_ROUTE_ENTRY ... PCH_PIC_ROUTE_ENTRY_END: + val = *(uint64_t *)(s->route_entry + addr - PCH_PIC_ROUTE_ENTRY); break; default: + qemu_log_mask(LOG_GUEST_ERROR, + "pch_pic_read: Bad address 0x%"PRIx64"\n", addr); break; } - trace_loongarch_pch_pic_low_readw(size, addr, val); - return val; + return (val >> (offset * 8)) & field_mask; } -static uint64_t get_writew_val(uint64_t value, uint32_t target, bool hi) -{ - uint64_t mask = 0xffffffff00000000; - uint64_t data = target; - - return hi ? (value & ~mask) | (data << 32) : (value & mask) | data; -} - -static void loongarch_pch_pic_low_writew(void *opaque, hwaddr addr, - uint64_t value, unsigned size) +static void pch_pic_write(void *opaque, hwaddr addr, uint64_t value, + uint64_t field_mask) { LoongArchPICCommonState *s = LOONGARCH_PIC_COMMON(opaque); - uint32_t offset, old_valid, data = (uint32_t)value; - uint64_t old, int_mask; - offset = addr & 0xfff; - - trace_loongarch_pch_pic_low_writew(size, addr, data); - - switch (offset) { - case PCH_PIC_INT_MASK_LO: - old = s->int_mask; - s->int_mask = get_writew_val(old, data, 0); - old_valid = (uint32_t)old; - if (old_valid & ~data) { - pch_pic_update_irq(s, (old_valid & ~data), 1); - } - if (~old_valid & data) { - pch_pic_update_irq(s, (~old_valid & data), 0); - } - break; - case PCH_PIC_INT_MASK_HI: + uint32_t offset; + uint64_t old, mask, data, *ptemp; + + offset = addr & 7; + addr -= offset; + mask = field_mask << (offset * 8); + data = (value & field_mask) << (offset * 8); + switch (addr) { + case PCH_PIC_INT_MASK: old = s->int_mask; - s->int_mask = get_writew_val(old, data, 1); - old_valid = (uint32_t)(old >> 32); - int_mask = old_valid & ~data; - if (int_mask) { - pch_pic_update_irq(s, int_mask << 32, 1); + s->int_mask = (old & ~mask) | data; + if (old & ~data) { + pch_pic_update_irq(s, old & ~data, 1); } - int_mask = ~old_valid & data; - if (int_mask) { - pch_pic_update_irq(s, int_mask << 32, 0); + + if (~old & data) { + pch_pic_update_irq(s, ~old & data, 0); } break; - case PCH_PIC_INT_EDGE_LO: - s->intedge = get_writew_val(s->intedge, data, 0); - break; - case PCH_PIC_INT_EDGE_HI: - s->intedge = get_writew_val(s->intedge, data, 1); + case PCH_PIC_INT_EDGE: + s->intedge = (s->intedge & ~mask) | data; break; - case PCH_PIC_INT_CLEAR_LO: + case PCH_PIC_INT_CLEAR: if (s->intedge & data) { - s->intirr &= (~data); + s->intirr &= ~data; pch_pic_update_irq(s, data, 0); - s->intisr &= (~data); + s->intisr &= ~data; } break; - case PCH_PIC_INT_CLEAR_HI: - value <<= 32; - if (s->intedge & value) { - s->intirr &= (~value); - pch_pic_update_irq(s, value, 0); - s->intisr &= (~value); - } + case PCH_PIC_HTMSI_EN: + s->htmsi_en = (s->htmsi_en & ~mask) | data; break; - case PCH_PIC_HTMSI_EN_LO: - s->htmsi_en = get_writew_val(s->htmsi_en, data, 0); + case PCH_PIC_AUTO_CTRL0: + case PCH_PIC_AUTO_CTRL1: + /* Discard auto_ctrl access */ break; - case PCH_PIC_HTMSI_EN_HI: - s->htmsi_en = get_writew_val(s->htmsi_en, data, 1); + case PCH_PIC_INT_POL: + s->int_polarity = (s->int_polarity & ~mask) | data; break; - case PCH_PIC_AUTO_CTRL0_LO: - case PCH_PIC_AUTO_CTRL0_HI: - case PCH_PIC_AUTO_CTRL1_LO: - case PCH_PIC_AUTO_CTRL1_HI: + case PCH_PIC_HTMSI_VEC ... PCH_PIC_HTMSI_VEC_END: + ptemp = (uint64_t *)(s->htmsi_vector + addr - PCH_PIC_HTMSI_VEC); + *ptemp = (*ptemp & ~mask) | data; + break; + case PCH_PIC_ROUTE_ENTRY ... PCH_PIC_ROUTE_ENTRY_END: + ptemp = (uint64_t *)(s->route_entry + addr - PCH_PIC_ROUTE_ENTRY); + *ptemp = (*ptemp & ~mask) | data; break; default: + qemu_log_mask(LOG_GUEST_ERROR, + "pch_pic_write: Bad address 0x%"PRIx64"\n", addr); break; } } -static uint64_t loongarch_pch_pic_high_readw(void *opaque, hwaddr addr, - unsigned size) +static uint64_t loongarch_pch_pic_read(void *opaque, hwaddr addr, + unsigned size) { - LoongArchPICCommonState *s = LOONGARCH_PIC_COMMON(opaque); uint64_t val = 0; - uint32_t offset = addr & 0xfff; - switch (offset) { - case STATUS_LO_START: - val = (uint32_t)(s->intisr & (~s->int_mask)); + switch (size) { + case 1: + val = pch_pic_read(opaque, addr, UCHAR_MAX); break; - case STATUS_HI_START: - val = (s->intisr & (~s->int_mask)) >> 32; + case 2: + val = pch_pic_read(opaque, addr, USHRT_MAX); break; - case POL_LO_START: - val = (uint32_t)s->int_polarity; + case 4: + val = pch_pic_read(opaque, addr, UINT_MAX); break; - case POL_HI_START: - val = s->int_polarity >> 32; + case 8: + val = pch_pic_read(opaque, addr, UINT64_MAX); break; default: + qemu_log_mask(LOG_GUEST_ERROR, + "loongarch_pch_pic_read: Bad size %d\n", size); break; } - trace_loongarch_pch_pic_high_readw(size, addr, val); + trace_loongarch_pch_pic_read(size, addr, val); return val; } -static void loongarch_pch_pic_high_writew(void *opaque, hwaddr addr, - uint64_t value, unsigned size) +static void loongarch_pch_pic_write(void *opaque, hwaddr addr, + uint64_t value, unsigned size) { - LoongArchPICCommonState *s = LOONGARCH_PIC_COMMON(opaque); - uint32_t offset, data = (uint32_t)value; - offset = addr & 0xfff; - - trace_loongarch_pch_pic_high_writew(size, addr, data); + trace_loongarch_pch_pic_write(size, addr, value); - switch (offset) { - case STATUS_LO_START: - s->intisr = get_writew_val(s->intisr, data, 0); - break; - case STATUS_HI_START: - s->intisr = get_writew_val(s->intisr, data, 1); + switch (size) { + case 1: + pch_pic_write(opaque, addr, value, UCHAR_MAX); break; - case POL_LO_START: - s->int_polarity = get_writew_val(s->int_polarity, data, 0); + case 2: + pch_pic_write(opaque, addr, value, USHRT_MAX); break; - case POL_HI_START: - s->int_polarity = get_writew_val(s->int_polarity, data, 1); break; - default: + case 4: + pch_pic_write(opaque, addr, value, UINT_MAX); break; - } -} - -static uint64_t loongarch_pch_pic_readb(void *opaque, hwaddr addr, - unsigned size) -{ - LoongArchPICCommonState *s = LOONGARCH_PIC_COMMON(opaque); - uint64_t val = 0; - uint32_t offset = (addr & 0xfff) + PCH_PIC_ROUTE_ENTRY_OFFSET; - int64_t offset_tmp; - - switch (offset) { - case PCH_PIC_HTMSI_VEC_OFFSET ... PCH_PIC_HTMSI_VEC_END: - offset_tmp = offset - PCH_PIC_HTMSI_VEC_OFFSET; - if (offset_tmp >= 0 && offset_tmp < 64) { - val = s->htmsi_vector[offset_tmp]; - } - break; - case PCH_PIC_ROUTE_ENTRY_OFFSET ... PCH_PIC_ROUTE_ENTRY_END: - offset_tmp = offset - PCH_PIC_ROUTE_ENTRY_OFFSET; - if (offset_tmp >= 0 && offset_tmp < 64) { - val = s->route_entry[offset_tmp]; - } + case 8: + pch_pic_write(opaque, addr, value, UINT64_MAX); break; default: + qemu_log_mask(LOG_GUEST_ERROR, + "loongarch_pch_pic_write: Bad size %d\n", size); break; } - - trace_loongarch_pch_pic_readb(size, addr, val); - return val; } -static void loongarch_pch_pic_writeb(void *opaque, hwaddr addr, - uint64_t data, unsigned size) -{ - LoongArchPICCommonState *s = LOONGARCH_PIC_COMMON(opaque); - int32_t offset_tmp; - uint32_t offset = (addr & 0xfff) + PCH_PIC_ROUTE_ENTRY_OFFSET; - - trace_loongarch_pch_pic_writeb(size, addr, data); - - switch (offset) { - case PCH_PIC_HTMSI_VEC_OFFSET ... PCH_PIC_HTMSI_VEC_END: - offset_tmp = offset - PCH_PIC_HTMSI_VEC_OFFSET; - if (offset_tmp >= 0 && offset_tmp < 64) { - s->htmsi_vector[offset_tmp] = (uint8_t)(data & 0xff); - } - break; - case PCH_PIC_ROUTE_ENTRY_OFFSET ... PCH_PIC_ROUTE_ENTRY_END: - offset_tmp = offset - PCH_PIC_ROUTE_ENTRY_OFFSET; - if (offset_tmp >= 0 && offset_tmp < 64) { - s->route_entry[offset_tmp] = (uint8_t)(data & 0xff); - } - break; - default: - break; - } -} - -static const MemoryRegionOps loongarch_pch_pic_reg32_low_ops = { - .read = loongarch_pch_pic_low_readw, - .write = loongarch_pch_pic_low_writew, - .valid = { - .min_access_size = 4, - .max_access_size = 8, - }, - .impl = { - .min_access_size = 4, - .max_access_size = 4, - }, - .endianness = DEVICE_LITTLE_ENDIAN, -}; - -static const MemoryRegionOps loongarch_pch_pic_reg32_high_ops = { - .read = loongarch_pch_pic_high_readw, - .write = loongarch_pch_pic_high_writew, - .valid = { - .min_access_size = 4, - .max_access_size = 8, - }, - .impl = { - .min_access_size = 4, - .max_access_size = 4, - }, - .endianness = DEVICE_LITTLE_ENDIAN, -}; - -static const MemoryRegionOps loongarch_pch_pic_reg8_ops = { - .read = loongarch_pch_pic_readb, - .write = loongarch_pch_pic_writeb, +static const MemoryRegionOps loongarch_pch_pic_ops = { + .read = loongarch_pch_pic_read, + .write = loongarch_pch_pic_write, .valid = { .min_access_size = 1, - .max_access_size = 1, + .max_access_size = 8, + /* + * PCH PIC device would not work correctly if the guest was doing + * unaligned access. This might not be a limitation on the real + * device but in practice there is no reason for a guest to access + * this device unaligned. + */ + .unaligned = false, }, .impl = { .min_access_size = 1, - .max_access_size = 1, + .max_access_size = 8, }, .endianness = DEVICE_LITTLE_ENDIAN, }; @@ -361,6 +264,10 @@ static void loongarch_pic_reset_hold(Object *obj, ResetType type) if (lpc->parent_phases.hold) { lpc->parent_phases.hold(obj, type); } + + if (kvm_irqchip_in_kernel()) { + kvm_pic_put(obj, 0); + } } static void loongarch_pic_realize(DeviceState *dev, Error **errp) @@ -378,30 +285,49 @@ static void loongarch_pic_realize(DeviceState *dev, Error **errp) qdev_init_gpio_out(dev, s->parent_irq, s->irq_num); qdev_init_gpio_in(dev, pch_pic_irq_handler, s->irq_num); - memory_region_init_io(&s->iomem32_low, OBJECT(dev), - &loongarch_pch_pic_reg32_low_ops, - s, PCH_PIC_NAME(.reg32_part1), 0x100); - memory_region_init_io(&s->iomem8, OBJECT(dev), &loongarch_pch_pic_reg8_ops, - s, PCH_PIC_NAME(.reg8), 0x2a0); - memory_region_init_io(&s->iomem32_high, OBJECT(dev), - &loongarch_pch_pic_reg32_high_ops, - s, PCH_PIC_NAME(.reg32_part2), 0xc60); - sysbus_init_mmio(sbd, &s->iomem32_low); - sysbus_init_mmio(sbd, &s->iomem8); - sysbus_init_mmio(sbd, &s->iomem32_high); + if (kvm_irqchip_in_kernel()) { + kvm_pic_realize(dev, errp); + } else { + memory_region_init_io(&s->iomem, OBJECT(dev), + &loongarch_pch_pic_ops, + s, TYPE_LOONGARCH_PIC, VIRT_PCH_REG_SIZE); + sysbus_init_mmio(sbd, &s->iomem); + } +} + +static int loongarch_pic_pre_save(LoongArchPICCommonState *opaque) +{ + if (kvm_irqchip_in_kernel()) { + return kvm_pic_get(opaque); + } + + return 0; +} + +static int loongarch_pic_post_load(LoongArchPICCommonState *opaque, + int version_id) +{ + if (kvm_irqchip_in_kernel()) { + return kvm_pic_put(opaque, version_id); + } + + return 0; } static void loongarch_pic_class_init(ObjectClass *klass, const void *data) { DeviceClass *dc = DEVICE_CLASS(klass); LoongarchPICClass *lpc = LOONGARCH_PIC_CLASS(klass); + LoongArchPICCommonClass *lpcc = LOONGARCH_PIC_COMMON_CLASS(klass); ResettableClass *rc = RESETTABLE_CLASS(klass); resettable_class_set_parent_phases(rc, NULL, loongarch_pic_reset_hold, NULL, &lpc->parent_phases); device_class_set_parent_realize(dc, loongarch_pic_realize, &lpc->parent_realize); + lpcc->pre_save = loongarch_pic_pre_save; + lpcc->post_load = loongarch_pic_post_load; } static const TypeInfo loongarch_pic_types[] = { diff --git a/hw/intc/loongarch_pic_common.c b/hw/intc/loongarch_pic_common.c index 6dccacc..de17050 100644 --- a/hw/intc/loongarch_pic_common.c +++ b/hw/intc/loongarch_pic_common.c @@ -49,6 +49,19 @@ static void loongarch_pic_common_reset_hold(Object *obj, ResetType type) LoongArchPICCommonState *s = LOONGARCH_PIC_COMMON(obj); int i; + /* + * With Loongson 7A1000 user manual + * Chapter 5.2 "Description of Interrupt-related Registers" + * + * Interrupt controller identification register 1 + * Bit 24-31 Interrupt Controller ID + * Interrupt controller identification register 2 + * Bit 0-7 Interrupt Controller version number + * Bit 16-23 The number of interrupt sources supported + */ + s->id.desc.id = PCH_PIC_INT_ID_VAL; + s->id.desc.version = PCH_PIC_INT_ID_VER; + s->id.desc.irq_num = s->irq_num - 1; s->int_mask = UINT64_MAX; s->htmsi_en = 0x0; s->intedge = 0x0; diff --git a/hw/intc/loongarch_pic_kvm.c b/hw/intc/loongarch_pic_kvm.c new file mode 100644 index 0000000..dd504ec --- /dev/null +++ b/hw/intc/loongarch_pic_kvm.c @@ -0,0 +1,89 @@ +/* SPDX-License-Identifier: GPL-2.0-or-later */ +/* + * LoongArch kvm pch pic interrupt support + * + * Copyright (C) 2025 Loongson Technology Corporation Limited + */ + +#include "qemu/osdep.h" +#include "qapi/error.h" +#include "hw/boards.h" +#include "hw/intc/loongarch_pch_pic.h" +#include "hw/loongarch/virt.h" +#include "hw/pci-host/ls7a.h" +#include "system/kvm.h" + +static void kvm_pch_pic_access_reg(int fd, uint64_t addr, void *val, bool write) +{ + kvm_device_access(fd, KVM_DEV_LOONGARCH_PCH_PIC_GRP_REGS, + addr, val, write, &error_abort); +} + +static void kvm_pch_pic_access(void *opaque, bool write) +{ + LoongArchPICCommonState *s = LOONGARCH_PIC_COMMON(opaque); + LoongarchPICState *lps = LOONGARCH_PIC(opaque); + int fd = lps->dev_fd; + int addr, offset; + + if (fd == 0) { + return; + } + + kvm_pch_pic_access_reg(fd, PCH_PIC_INT_MASK, &s->int_mask, write); + kvm_pch_pic_access_reg(fd, PCH_PIC_HTMSI_EN, &s->htmsi_en, write); + kvm_pch_pic_access_reg(fd, PCH_PIC_INT_EDGE, &s->intedge, write); + kvm_pch_pic_access_reg(fd, PCH_PIC_AUTO_CTRL0, &s->auto_crtl0, write); + kvm_pch_pic_access_reg(fd, PCH_PIC_AUTO_CTRL1, &s->auto_crtl1, write); + + for (addr = PCH_PIC_ROUTE_ENTRY; + addr < PCH_PIC_ROUTE_ENTRY_END; addr++) { + offset = addr - PCH_PIC_ROUTE_ENTRY; + kvm_pch_pic_access_reg(fd, addr, &s->route_entry[offset], write); + } + + for (addr = PCH_PIC_HTMSI_VEC; addr < PCH_PIC_HTMSI_VEC_END; addr++) { + offset = addr - PCH_PIC_HTMSI_VEC; + kvm_pch_pic_access_reg(fd, addr, &s->htmsi_vector[offset], write); + } + + kvm_pch_pic_access_reg(fd, PCH_PIC_INT_REQUEST, &s->intirr, write); + kvm_pch_pic_access_reg(fd, PCH_PIC_INT_STATUS, &s->intisr, write); + kvm_pch_pic_access_reg(fd, PCH_PIC_INT_POL, &s->int_polarity, write); +} + +int kvm_pic_get(void *opaque) +{ + kvm_pch_pic_access(opaque, false); + return 0; +} + +int kvm_pic_put(void *opaque, int version_id) +{ + kvm_pch_pic_access(opaque, true); + return 0; +} + +void kvm_pic_realize(DeviceState *dev, Error **errp) +{ + LoongarchPICState *lps = LOONGARCH_PIC(dev); + uint64_t pch_pic_base = VIRT_PCH_REG_BASE; + int ret; + + ret = kvm_create_device(kvm_state, KVM_DEV_TYPE_LOONGARCH_PCHPIC, false); + if (ret < 0) { + fprintf(stderr, "Create KVM_LOONGARCH_PCHPIC failed: %s\n", + strerror(-ret)); + abort(); + } + + lps->dev_fd = ret; + ret = kvm_device_access(lps->dev_fd, KVM_DEV_LOONGARCH_PCH_PIC_GRP_CTRL, + KVM_DEV_LOONGARCH_PCH_PIC_CTRL_INIT, + &pch_pic_base, true, NULL); + if (ret < 0) { + fprintf(stderr, "KVM_LOONGARCH_PCH_PIC_INIT failed: %s\n", + strerror(-ret)); + abort(); + } +} diff --git a/hw/intc/loongson_ipi_common.c b/hw/intc/loongson_ipi_common.c index f32661c..8cd78d4 100644 --- a/hw/intc/loongson_ipi_common.c +++ b/hw/intc/loongson_ipi_common.c @@ -11,6 +11,7 @@ #include "hw/irq.h" #include "qemu/log.h" #include "migration/vmstate.h" +#include "system/kvm.h" #include "trace.h" MemTxResult loongson_ipi_core_readl(void *opaque, hwaddr addr, uint64_t *data, @@ -255,6 +256,10 @@ static void loongson_ipi_common_realize(DeviceState *dev, Error **errp) LoongsonIPICommonState *s = LOONGSON_IPI_COMMON(dev); SysBusDevice *sbd = SYS_BUS_DEVICE(dev); + if (kvm_irqchip_in_kernel()) { + return; + } + memory_region_init_io(&s->ipi_iocsr_mem, OBJECT(dev), &loongson_ipi_iocsr_ops, s, "loongson_ipi_iocsr", 0x48); @@ -277,10 +282,38 @@ static void loongson_ipi_common_unrealize(DeviceState *dev) g_free(s->cpu); } +static int loongson_ipi_common_pre_save(void *opaque) +{ + IPICore *ipicore = (IPICore *)opaque; + LoongsonIPICommonState *s = ipicore->ipi; + LoongsonIPICommonClass *licc = LOONGSON_IPI_COMMON_GET_CLASS(s); + + if (licc->pre_save) { + return licc->pre_save(s); + } + + return 0; +} + +static int loongson_ipi_common_post_load(void *opaque, int version_id) +{ + IPICore *ipicore = (IPICore *)opaque; + LoongsonIPICommonState *s = ipicore->ipi; + LoongsonIPICommonClass *licc = LOONGSON_IPI_COMMON_GET_CLASS(s); + + if (licc->post_load) { + return licc->post_load(s, version_id); + } + + return 0; +} + static const VMStateDescription vmstate_ipi_core = { .name = "ipi-single", .version_id = 2, .minimum_version_id = 2, + .pre_save = loongson_ipi_common_pre_save, + .post_load = loongson_ipi_common_post_load, .fields = (const VMStateField[]) { VMSTATE_UINT32(status, IPICore), VMSTATE_UINT32(en, IPICore), diff --git a/hw/intc/meson.build b/hw/intc/meson.build index 602da30..3137521 100644 --- a/hw/intc/meson.build +++ b/hw/intc/meson.build @@ -71,6 +71,12 @@ specific_ss.add(when: 'CONFIG_M68K_IRQC', if_true: files('m68k_irqc.c')) specific_ss.add(when: 'CONFIG_LOONGSON_IPI_COMMON', if_true: files('loongson_ipi_common.c')) specific_ss.add(when: 'CONFIG_LOONGSON_IPI', if_true: files('loongson_ipi.c')) specific_ss.add(when: 'CONFIG_LOONGARCH_IPI', if_true: files('loongarch_ipi.c')) +specific_ss.add(when: ['CONFIG_KVM', 'CONFIG_LOONGARCH_IPI'], + if_true: files('loongarch_ipi_kvm.c')) specific_ss.add(when: 'CONFIG_LOONGARCH_PCH_PIC', if_true: files('loongarch_pch_pic.c', 'loongarch_pic_common.c')) +specific_ss.add(when: ['CONFIG_KVM', 'CONFIG_LOONGARCH_PCH_PIC'], + if_true: files('loongarch_pic_kvm.c')) specific_ss.add(when: 'CONFIG_LOONGARCH_PCH_MSI', if_true: files('loongarch_pch_msi.c')) specific_ss.add(when: 'CONFIG_LOONGARCH_EXTIOI', if_true: files('loongarch_extioi.c', 'loongarch_extioi_common.c')) +specific_ss.add(when: ['CONFIG_KVM', 'CONFIG_LOONGARCH_EXTIOI'], + if_true: files('loongarch_extioi_kvm.c')) diff --git a/hw/intc/omap_intc.c b/hw/intc/omap_intc.c index 9e8737b..c61158b 100644 --- a/hw/intc/omap_intc.c +++ b/hw/intc/omap_intc.c @@ -102,8 +102,8 @@ static inline void omap_inth_update(OMAPIntcState *s, int is_fiq) } } -#define INT_FALLING_EDGE 0 -#define INT_LOW_LEVEL 1 +#define INT_FALLING_EDGE 0 +#define INT_LOW_LEVEL 1 static void omap_set_intr(void *opaque, int irq, int req) { @@ -142,13 +142,13 @@ static uint64_t omap_inth_read(void *opaque, hwaddr addr, offset &= 0xff; switch (offset) { - case 0x00: /* ITR */ + case 0x00: /* ITR */ return bank->irqs; - case 0x04: /* MIR */ + case 0x04: /* MIR */ return bank->mask; - case 0x10: /* SIR_IRQ_CODE */ + case 0x10: /* SIR_IRQ_CODE */ case 0x14: /* SIR_FIQ_CODE */ if (bank_no != 0) break; @@ -159,49 +159,49 @@ static uint64_t omap_inth_read(void *opaque, hwaddr addr, bank->irqs &= ~(1 << i); return line_no; - case 0x18: /* CONTROL_REG */ + case 0x18: /* CONTROL_REG */ if (bank_no != 0) break; return 0; - case 0x1c: /* ILR0 */ - case 0x20: /* ILR1 */ - case 0x24: /* ILR2 */ - case 0x28: /* ILR3 */ - case 0x2c: /* ILR4 */ - case 0x30: /* ILR5 */ - case 0x34: /* ILR6 */ - case 0x38: /* ILR7 */ - case 0x3c: /* ILR8 */ - case 0x40: /* ILR9 */ - case 0x44: /* ILR10 */ - case 0x48: /* ILR11 */ - case 0x4c: /* ILR12 */ - case 0x50: /* ILR13 */ - case 0x54: /* ILR14 */ - case 0x58: /* ILR15 */ - case 0x5c: /* ILR16 */ - case 0x60: /* ILR17 */ - case 0x64: /* ILR18 */ - case 0x68: /* ILR19 */ - case 0x6c: /* ILR20 */ - case 0x70: /* ILR21 */ - case 0x74: /* ILR22 */ - case 0x78: /* ILR23 */ - case 0x7c: /* ILR24 */ - case 0x80: /* ILR25 */ - case 0x84: /* ILR26 */ - case 0x88: /* ILR27 */ - case 0x8c: /* ILR28 */ - case 0x90: /* ILR29 */ - case 0x94: /* ILR30 */ - case 0x98: /* ILR31 */ + case 0x1c: /* ILR0 */ + case 0x20: /* ILR1 */ + case 0x24: /* ILR2 */ + case 0x28: /* ILR3 */ + case 0x2c: /* ILR4 */ + case 0x30: /* ILR5 */ + case 0x34: /* ILR6 */ + case 0x38: /* ILR7 */ + case 0x3c: /* ILR8 */ + case 0x40: /* ILR9 */ + case 0x44: /* ILR10 */ + case 0x48: /* ILR11 */ + case 0x4c: /* ILR12 */ + case 0x50: /* ILR13 */ + case 0x54: /* ILR14 */ + case 0x58: /* ILR15 */ + case 0x5c: /* ILR16 */ + case 0x60: /* ILR17 */ + case 0x64: /* ILR18 */ + case 0x68: /* ILR19 */ + case 0x6c: /* ILR20 */ + case 0x70: /* ILR21 */ + case 0x74: /* ILR22 */ + case 0x78: /* ILR23 */ + case 0x7c: /* ILR24 */ + case 0x80: /* ILR25 */ + case 0x84: /* ILR26 */ + case 0x88: /* ILR27 */ + case 0x8c: /* ILR28 */ + case 0x90: /* ILR29 */ + case 0x94: /* ILR30 */ + case 0x98: /* ILR31 */ i = (offset - 0x1c) >> 2; return (bank->priority[i] << 2) | (((bank->sens_edge >> i) & 1) << 1) | ((bank->fiq >> i) & 1); - case 0x9c: /* ISR */ + case 0x9c: /* ISR */ return 0x00000000; } @@ -219,24 +219,24 @@ static void omap_inth_write(void *opaque, hwaddr addr, offset &= 0xff; switch (offset) { - case 0x00: /* ITR */ + case 0x00: /* ITR */ /* Important: ignore the clearing if the IRQ is level-triggered and the input bit is 1 */ bank->irqs &= value | (bank->inputs & bank->sens_edge); return; - case 0x04: /* MIR */ + case 0x04: /* MIR */ bank->mask = value; omap_inth_update(s, 0); omap_inth_update(s, 1); return; - case 0x10: /* SIR_IRQ_CODE */ - case 0x14: /* SIR_FIQ_CODE */ + case 0x10: /* SIR_IRQ_CODE */ + case 0x14: /* SIR_FIQ_CODE */ OMAP_RO_REG(addr); break; - case 0x18: /* CONTROL_REG */ + case 0x18: /* CONTROL_REG */ if (bank_no != 0) break; if (value & 2) { @@ -251,38 +251,38 @@ static void omap_inth_write(void *opaque, hwaddr addr, } return; - case 0x1c: /* ILR0 */ - case 0x20: /* ILR1 */ - case 0x24: /* ILR2 */ - case 0x28: /* ILR3 */ - case 0x2c: /* ILR4 */ - case 0x30: /* ILR5 */ - case 0x34: /* ILR6 */ - case 0x38: /* ILR7 */ - case 0x3c: /* ILR8 */ - case 0x40: /* ILR9 */ - case 0x44: /* ILR10 */ - case 0x48: /* ILR11 */ - case 0x4c: /* ILR12 */ - case 0x50: /* ILR13 */ - case 0x54: /* ILR14 */ - case 0x58: /* ILR15 */ - case 0x5c: /* ILR16 */ - case 0x60: /* ILR17 */ - case 0x64: /* ILR18 */ - case 0x68: /* ILR19 */ - case 0x6c: /* ILR20 */ - case 0x70: /* ILR21 */ - case 0x74: /* ILR22 */ - case 0x78: /* ILR23 */ - case 0x7c: /* ILR24 */ - case 0x80: /* ILR25 */ - case 0x84: /* ILR26 */ - case 0x88: /* ILR27 */ - case 0x8c: /* ILR28 */ - case 0x90: /* ILR29 */ - case 0x94: /* ILR30 */ - case 0x98: /* ILR31 */ + case 0x1c: /* ILR0 */ + case 0x20: /* ILR1 */ + case 0x24: /* ILR2 */ + case 0x28: /* ILR3 */ + case 0x2c: /* ILR4 */ + case 0x30: /* ILR5 */ + case 0x34: /* ILR6 */ + case 0x38: /* ILR7 */ + case 0x3c: /* ILR8 */ + case 0x40: /* ILR9 */ + case 0x44: /* ILR10 */ + case 0x48: /* ILR11 */ + case 0x4c: /* ILR12 */ + case 0x50: /* ILR13 */ + case 0x54: /* ILR14 */ + case 0x58: /* ILR15 */ + case 0x5c: /* ILR16 */ + case 0x60: /* ILR17 */ + case 0x64: /* ILR18 */ + case 0x68: /* ILR19 */ + case 0x6c: /* ILR20 */ + case 0x70: /* ILR21 */ + case 0x74: /* ILR22 */ + case 0x78: /* ILR23 */ + case 0x7c: /* ILR24 */ + case 0x80: /* ILR25 */ + case 0x84: /* ILR26 */ + case 0x88: /* ILR27 */ + case 0x8c: /* ILR28 */ + case 0x90: /* ILR29 */ + case 0x94: /* ILR30 */ + case 0x98: /* ILR31 */ i = (offset - 0x1c) >> 2; bank->priority[i] = (value >> 2) & 0x1f; bank->sens_edge &= ~(1 << i); @@ -291,7 +291,7 @@ static void omap_inth_write(void *opaque, hwaddr addr, bank->fiq |= (value & 1) << i; return; - case 0x9c: /* ISR */ + case 0x9c: /* ISR */ for (i = 0; i < 32; i ++) if (value & (1 << i)) { omap_set_intr(s, 32 * bank_no + i, 1); diff --git a/hw/intc/pnv_xive.c b/hw/intc/pnv_xive.c index 935c0e4..c2ca40b 100644 --- a/hw/intc/pnv_xive.c +++ b/hw/intc/pnv_xive.c @@ -470,14 +470,13 @@ static bool pnv_xive_is_cpu_enabled(PnvXive *xive, PowerPCCPU *cpu) return xive->regs[reg >> 3] & PPC_BIT(bit); } -static int pnv_xive_match_nvt(XivePresenter *xptr, uint8_t format, - uint8_t nvt_blk, uint32_t nvt_idx, - bool crowd, bool cam_ignore, uint8_t priority, - uint32_t logic_serv, XiveTCTXMatch *match) +static bool pnv_xive_match_nvt(XivePresenter *xptr, uint8_t format, + uint8_t nvt_blk, uint32_t nvt_idx, + bool crowd, bool cam_ignore, uint8_t priority, + uint32_t logic_serv, XiveTCTXMatch *match) { PnvXive *xive = PNV_XIVE(xptr); PnvChip *chip = xive->chip; - int count = 0; int i, j; for (i = 0; i < chip->nr_cores; i++) { @@ -510,17 +509,18 @@ static int pnv_xive_match_nvt(XivePresenter *xptr, uint8_t format, qemu_log_mask(LOG_GUEST_ERROR, "XIVE: already found a " "thread context NVT %x/%x\n", nvt_blk, nvt_idx); - return -1; + match->count++; + continue; } match->ring = ring; match->tctx = tctx; - count++; + match->count++; } } } - return count; + return !!match->count; } static uint32_t pnv_xive_presenter_get_config(XivePresenter *xptr) diff --git a/hw/intc/pnv_xive2.c b/hw/intc/pnv_xive2.c index ec8b0c6..e019cad 100644 --- a/hw/intc/pnv_xive2.c +++ b/hw/intc/pnv_xive2.c @@ -101,12 +101,10 @@ static uint32_t pnv_xive2_block_id(PnvXive2 *xive) } /* - * Remote access to controllers. HW uses MMIOs. For now, a simple scan - * of the chips is good enough. - * - * TODO: Block scope support + * Remote access to INT controllers. HW uses MMIOs(?). For now, a simple + * scan of all the chips INT controller is good enough. */ -static PnvXive2 *pnv_xive2_get_remote(uint8_t blk) +static PnvXive2 *pnv_xive2_get_remote(uint32_t vsd_type, hwaddr fwd_addr) { PnvMachineState *pnv = PNV_MACHINE(qdev_get_machine()); int i; @@ -115,10 +113,23 @@ static PnvXive2 *pnv_xive2_get_remote(uint8_t blk) Pnv10Chip *chip10 = PNV10_CHIP(pnv->chips[i]); PnvXive2 *xive = &chip10->xive; - if (pnv_xive2_block_id(xive) == blk) { + /* + * Is this the XIVE matching the forwarded VSD address is for this + * VSD type + */ + if ((vsd_type == VST_ESB && fwd_addr == xive->esb_base) || + (vsd_type == VST_END && fwd_addr == xive->end_base) || + ((vsd_type == VST_NVP || + vsd_type == VST_NVG) && fwd_addr == xive->nvpg_base) || + (vsd_type == VST_NVC && fwd_addr == xive->nvc_base)) { return xive; } } + + qemu_log_mask(LOG_GUEST_ERROR, + "XIVE: >>>>> %s vsd_type %u fwd_addr 0x%"HWADDR_PRIx + " NOT FOUND\n", + __func__, vsd_type, fwd_addr); return NULL; } @@ -251,8 +262,7 @@ static uint64_t pnv_xive2_vst_addr(PnvXive2 *xive, uint32_t type, uint8_t blk, /* Remote VST access */ if (GETFIELD(VSD_MODE, vsd) == VSD_MODE_FORWARD) { - xive = pnv_xive2_get_remote(blk); - + xive = pnv_xive2_get_remote(type, (vsd & VSD_ADDRESS_MASK)); return xive ? pnv_xive2_vst_addr(xive, type, blk, idx) : 0; } @@ -595,20 +605,28 @@ static uint32_t pnv_xive2_get_config(Xive2Router *xrtr) { PnvXive2 *xive = PNV_XIVE2(xrtr); uint32_t cfg = 0; + uint64_t reg = xive->cq_regs[CQ_XIVE_CFG >> 3]; - if (xive->cq_regs[CQ_XIVE_CFG >> 3] & CQ_XIVE_CFG_GEN1_TIMA_OS) { + if (reg & CQ_XIVE_CFG_GEN1_TIMA_OS) { cfg |= XIVE2_GEN1_TIMA_OS; } - if (xive->cq_regs[CQ_XIVE_CFG >> 3] & CQ_XIVE_CFG_EN_VP_SAVE_RESTORE) { + if (reg & CQ_XIVE_CFG_EN_VP_SAVE_RESTORE) { cfg |= XIVE2_VP_SAVE_RESTORE; } - if (GETFIELD(CQ_XIVE_CFG_HYP_HARD_RANGE, - xive->cq_regs[CQ_XIVE_CFG >> 3]) == CQ_XIVE_CFG_THREADID_8BITS) { + if (GETFIELD(CQ_XIVE_CFG_HYP_HARD_RANGE, reg) == + CQ_XIVE_CFG_THREADID_8BITS) { cfg |= XIVE2_THREADID_8BITS; } + if (reg & CQ_XIVE_CFG_EN_VP_GRP_PRIORITY) { + cfg |= XIVE2_EN_VP_GRP_PRIORITY; + } + + cfg = SETFIELD(XIVE2_VP_INT_PRIO, cfg, + GETFIELD(CQ_XIVE_CFG_VP_INT_PRIO, reg)); + return cfg; } @@ -622,24 +640,28 @@ static bool pnv_xive2_is_cpu_enabled(PnvXive2 *xive, PowerPCCPU *cpu) return xive->tctxt_regs[reg >> 3] & PPC_BIT(bit); } -static int pnv_xive2_match_nvt(XivePresenter *xptr, uint8_t format, - uint8_t nvt_blk, uint32_t nvt_idx, - bool crowd, bool cam_ignore, uint8_t priority, - uint32_t logic_serv, XiveTCTXMatch *match) +static bool pnv_xive2_match_nvt(XivePresenter *xptr, uint8_t format, + uint8_t nvt_blk, uint32_t nvt_idx, + bool crowd, bool cam_ignore, uint8_t priority, + uint32_t logic_serv, XiveTCTXMatch *match) { PnvXive2 *xive = PNV_XIVE2(xptr); PnvChip *chip = xive->chip; - int count = 0; int i, j; bool gen1_tima_os = xive->cq_regs[CQ_XIVE_CFG >> 3] & CQ_XIVE_CFG_GEN1_TIMA_OS; + static int next_start_core; + static int next_start_thread; + int start_core = next_start_core; + int start_thread = next_start_thread; for (i = 0; i < chip->nr_cores; i++) { - PnvCore *pc = chip->cores[i]; + PnvCore *pc = chip->cores[(i + start_core) % chip->nr_cores]; CPUCore *cc = CPU_CORE(pc); for (j = 0; j < cc->nr_threads; j++) { - PowerPCCPU *cpu = pc->threads[j]; + /* Start search for match with different thread each call */ + PowerPCCPU *cpu = pc->threads[(j + start_thread) % cc->nr_threads]; XiveTCTX *tctx; int ring; @@ -669,7 +691,8 @@ static int pnv_xive2_match_nvt(XivePresenter *xptr, uint8_t format, "thread context NVT %x/%x\n", nvt_blk, nvt_idx); /* Should set a FIR if we ever model it */ - return -1; + match->count++; + continue; } /* * For a group notification, we need to know if the @@ -684,14 +707,23 @@ static int pnv_xive2_match_nvt(XivePresenter *xptr, uint8_t format, if (!match->tctx) { match->ring = ring; match->tctx = tctx; + + next_start_thread = j + start_thread + 1; + if (next_start_thread >= cc->nr_threads) { + next_start_thread = 0; + next_start_core = i + start_core + 1; + if (next_start_core >= chip->nr_cores) { + next_start_core = 0; + } + } } - count++; + match->count++; } } } } - return count; + return !!match->count; } static uint32_t pnv_xive2_presenter_get_config(XivePresenter *xptr) @@ -1173,7 +1205,8 @@ static void pnv_xive2_ic_cq_write(void *opaque, hwaddr offset, case CQ_FIRMASK_OR: /* FIR error reporting */ break; default: - xive2_error(xive, "CQ: invalid write 0x%"HWADDR_PRIx, offset); + xive2_error(xive, "CQ: invalid write 0x%"HWADDR_PRIx" value 0x%"PRIx64, + offset, val); return; } @@ -1304,7 +1337,6 @@ static uint64_t pnv_xive2_ic_vc_read(void *opaque, hwaddr offset, case VC_ENDC_WATCH2_SPEC: case VC_ENDC_WATCH3_SPEC: watch_engine = (offset - VC_ENDC_WATCH0_SPEC) >> 6; - xive->vc_regs[reg] &= ~(VC_ENDC_WATCH_FULL | VC_ENDC_WATCH_CONFLICT); pnv_xive2_endc_cache_watch_release(xive, watch_engine); val = xive->vc_regs[reg]; break; @@ -1315,10 +1347,11 @@ static uint64_t pnv_xive2_ic_vc_read(void *opaque, hwaddr offset, case VC_ENDC_WATCH3_DATA0: /* * Load DATA registers from cache with data requested by the - * SPEC register + * SPEC register. Clear gen_flipped bit in word 1. */ watch_engine = (offset - VC_ENDC_WATCH0_DATA0) >> 6; pnv_xive2_end_cache_load(xive, watch_engine); + xive->vc_regs[reg] &= ~(uint64_t)END2_W1_GEN_FLIPPED; val = xive->vc_regs[reg]; break; @@ -1386,7 +1419,14 @@ static void pnv_xive2_ic_vc_write(void *opaque, hwaddr offset, /* * ESB cache updates (not modeled) */ - /* case VC_ESBC_FLUSH_CTRL: */ + case VC_ESBC_FLUSH_CTRL: + if (val & VC_ESBC_FLUSH_CTRL_WANT_CACHE_DISABLE) { + xive2_error(xive, "VC: unsupported write @0x%"HWADDR_PRIx + " value 0x%"PRIx64" bit[2] poll_want_cache_disable", + offset, val); + return; + } + break; case VC_ESBC_FLUSH_POLL: xive->vc_regs[VC_ESBC_FLUSH_CTRL >> 3] |= VC_ESBC_FLUSH_CTRL_POLL_VALID; /* ESB update */ @@ -1402,7 +1442,14 @@ static void pnv_xive2_ic_vc_write(void *opaque, hwaddr offset, /* * EAS cache updates (not modeled) */ - /* case VC_EASC_FLUSH_CTRL: */ + case VC_EASC_FLUSH_CTRL: + if (val & VC_EASC_FLUSH_CTRL_WANT_CACHE_DISABLE) { + xive2_error(xive, "VC: unsupported write @0x%"HWADDR_PRIx + " value 0x%"PRIx64" bit[2] poll_want_cache_disable", + offset, val); + return; + } + break; case VC_EASC_FLUSH_POLL: xive->vc_regs[VC_EASC_FLUSH_CTRL >> 3] |= VC_EASC_FLUSH_CTRL_POLL_VALID; /* EAS update */ @@ -1441,7 +1488,14 @@ static void pnv_xive2_ic_vc_write(void *opaque, hwaddr offset, break; - /* case VC_ENDC_FLUSH_CTRL: */ + case VC_ENDC_FLUSH_CTRL: + if (val & VC_ENDC_FLUSH_CTRL_WANT_CACHE_DISABLE) { + xive2_error(xive, "VC: unsupported write @0x%"HWADDR_PRIx + " value 0x%"PRIx64" bit[2] poll_want_cache_disable", + offset, val); + return; + } + break; case VC_ENDC_FLUSH_POLL: xive->vc_regs[VC_ENDC_FLUSH_CTRL >> 3] |= VC_ENDC_FLUSH_CTRL_POLL_VALID; break; @@ -1470,7 +1524,8 @@ static void pnv_xive2_ic_vc_write(void *opaque, hwaddr offset, break; default: - xive2_error(xive, "VC: invalid write @%"HWADDR_PRIx, offset); + xive2_error(xive, "VC: invalid write @0x%"HWADDR_PRIx" value 0x%"PRIx64, + offset, val); return; } @@ -1661,7 +1716,14 @@ static void pnv_xive2_ic_pc_write(void *opaque, hwaddr offset, pnv_xive2_nxc_update(xive, watch_engine); break; - /* case PC_NXC_FLUSH_CTRL: */ + case PC_NXC_FLUSH_CTRL: + if (val & PC_NXC_FLUSH_CTRL_WANT_CACHE_DISABLE) { + xive2_error(xive, "VC: unsupported write @0x%"HWADDR_PRIx + " value 0x%"PRIx64" bit[2] poll_want_cache_disable", + offset, val); + return; + } + break; case PC_NXC_FLUSH_POLL: xive->pc_regs[PC_NXC_FLUSH_CTRL >> 3] |= PC_NXC_FLUSH_CTRL_POLL_VALID; break; @@ -1678,7 +1740,8 @@ static void pnv_xive2_ic_pc_write(void *opaque, hwaddr offset, break; default: - xive2_error(xive, "PC: invalid write @%"HWADDR_PRIx, offset); + xive2_error(xive, "PC: invalid write @0x%"HWADDR_PRIx" value 0x%"PRIx64, + offset, val); return; } @@ -1765,7 +1828,8 @@ static void pnv_xive2_ic_tctxt_write(void *opaque, hwaddr offset, xive->tctxt_regs[reg] = val; break; default: - xive2_error(xive, "TCTXT: invalid write @%"HWADDR_PRIx, offset); + xive2_error(xive, "TCTXT: invalid write @0x%"HWADDR_PRIx + " data 0x%"PRIx64, offset, val); return; } } @@ -1836,7 +1900,8 @@ static void pnv_xive2_xscom_write(void *opaque, hwaddr offset, pnv_xive2_ic_tctxt_write(opaque, mmio_offset, val, size); break; default: - xive2_error(xive, "XSCOM: invalid write @%"HWADDR_PRIx, offset); + xive2_error(xive, "XSCOM: invalid write @%"HWADDR_PRIx + " value 0x%"PRIx64, offset, val); } } @@ -1904,7 +1969,8 @@ static void pnv_xive2_ic_notify_write(void *opaque, hwaddr offset, break; default: - xive2_error(xive, "NOTIFY: invalid write @%"HWADDR_PRIx, offset); + xive2_error(xive, "NOTIFY: invalid write @%"HWADDR_PRIx + " value 0x%"PRIx64, offset, val); } } @@ -1946,7 +2012,8 @@ static void pnv_xive2_ic_lsi_write(void *opaque, hwaddr offset, { PnvXive2 *xive = PNV_XIVE2(opaque); - xive2_error(xive, "LSI: invalid write @%"HWADDR_PRIx, offset); + xive2_error(xive, "LSI: invalid write @%"HWADDR_PRIx" value 0x%"PRIx64, + offset, val); } static const MemoryRegionOps pnv_xive2_ic_lsi_ops = { @@ -2049,7 +2116,8 @@ static void pnv_xive2_ic_sync_write(void *opaque, hwaddr offset, inject_type = PNV_XIVE2_QUEUE_NXC_ST_RMT_CI; break; default: - xive2_error(xive, "SYNC: invalid write @%"HWADDR_PRIx, offset); + xive2_error(xive, "SYNC: invalid write @%"HWADDR_PRIx" value 0x%"PRIx64, + offset, val); return; } diff --git a/hw/intc/pnv_xive2_regs.h b/hw/intc/pnv_xive2_regs.h index e8b87b3..d53300f 100644 --- a/hw/intc/pnv_xive2_regs.h +++ b/hw/intc/pnv_xive2_regs.h @@ -66,6 +66,7 @@ #define CQ_XIVE_CFG_GEN1_TIMA_HYP_BLK0 PPC_BIT(26) /* 0 if bit[25]=0 */ #define CQ_XIVE_CFG_GEN1_TIMA_CROWD_DIS PPC_BIT(27) /* 0 if bit[25]=0 */ #define CQ_XIVE_CFG_GEN1_END_ESX PPC_BIT(28) +#define CQ_XIVE_CFG_EN_VP_GRP_PRIORITY PPC_BIT(32) /* 0 if bit[25]=1 */ #define CQ_XIVE_CFG_EN_VP_SAVE_RESTORE PPC_BIT(38) /* 0 if bit[25]=1 */ #define CQ_XIVE_CFG_EN_VP_SAVE_REST_STRICT PPC_BIT(39) /* 0 if bit[25]=1 */ diff --git a/hw/intc/riscv_aclint.c b/hw/intc/riscv_aclint.c index b0139f0..4623cfa0 100644 --- a/hw/intc/riscv_aclint.c +++ b/hw/intc/riscv_aclint.c @@ -28,6 +28,7 @@ #include "qemu/module.h" #include "hw/sysbus.h" #include "target/riscv/cpu.h" +#include "target/riscv/time_helper.h" #include "hw/qdev-properties.h" #include "hw/intc/riscv_aclint.h" #include "qemu/timer.h" @@ -240,6 +241,10 @@ static void riscv_aclint_mtimer_write(void *opaque, hwaddr addr, riscv_aclint_mtimer_write_timecmp(mtimer, RISCV_CPU(cpu), mtimer->hartid_base + i, mtimer->timecmp[i]); + riscv_timer_write_timecmp(env, env->stimer, env->stimecmp, 0, MIP_STIP); + riscv_timer_write_timecmp(env, env->vstimer, env->vstimecmp, + env->htimedelta, MIP_VSTIP); + } return; } diff --git a/hw/intc/riscv_aplic.c b/hw/intc/riscv_aplic.c index 8bcd9f4..a1d9fa5 100644 --- a/hw/intc/riscv_aplic.c +++ b/hw/intc/riscv_aplic.c @@ -628,7 +628,7 @@ static void riscv_aplic_request(void *opaque, int irq, int level) static uint64_t riscv_aplic_read(void *opaque, hwaddr addr, unsigned size) { - uint32_t irq, word, idc; + uint32_t irq, word, idc, sm; RISCVAPLICState *aplic = opaque; /* Reads must be 4 byte words */ @@ -696,6 +696,10 @@ static uint64_t riscv_aplic_read(void *opaque, hwaddr addr, unsigned size) } else if ((APLIC_TARGET_BASE <= addr) && (addr < (APLIC_TARGET_BASE + (aplic->num_irqs - 1) * 4))) { irq = ((addr - APLIC_TARGET_BASE) >> 2) + 1; + sm = aplic->sourcecfg[irq] & APLIC_SOURCECFG_SM_MASK; + if (sm == APLIC_SOURCECFG_SM_INACTIVE) { + return 0; + } return aplic->target[irq]; } else if (!aplic->msimode && (APLIC_IDC_BASE <= addr) && (addr < (APLIC_IDC_BASE + aplic->num_harts * APLIC_IDC_SIZE))) { @@ -962,10 +966,18 @@ static const Property riscv_aplic_properties[] = { DEFINE_PROP_BOOL("mmode", RISCVAPLICState, mmode, 0), }; +static bool riscv_aplic_state_needed(void *opaque) +{ + RISCVAPLICState *aplic = opaque; + + return riscv_use_emulated_aplic(aplic->msimode); +} + static const VMStateDescription vmstate_riscv_aplic = { .name = "riscv_aplic", - .version_id = 2, - .minimum_version_id = 2, + .version_id = 3, + .minimum_version_id = 3, + .needed = riscv_aplic_state_needed, .fields = (const VMStateField[]) { VMSTATE_UINT32(domaincfg, RISCVAPLICState), VMSTATE_UINT32(mmsicfgaddr, RISCVAPLICState), diff --git a/hw/intc/riscv_imsic.c b/hw/intc/riscv_imsic.c index 2169988..6174e1a 100644 --- a/hw/intc/riscv_imsic.c +++ b/hw/intc/riscv_imsic.c @@ -398,10 +398,16 @@ static const Property riscv_imsic_properties[] = { DEFINE_PROP_UINT32("num-irqs", RISCVIMSICState, num_irqs, 0), }; +static bool riscv_imsic_state_needed(void *opaque) +{ + return !kvm_irqchip_in_kernel(); +} + static const VMStateDescription vmstate_riscv_imsic = { .name = "riscv_imsic", - .version_id = 1, - .minimum_version_id = 1, + .version_id = 2, + .minimum_version_id = 2, + .needed = riscv_imsic_state_needed, .fields = (const VMStateField[]) { VMSTATE_VARRAY_UINT32(eidelivery, RISCVIMSICState, num_pages, 0, diff --git a/hw/intc/spapr_xive.c b/hw/intc/spapr_xive.c index 440edb9..e393f5d 100644 --- a/hw/intc/spapr_xive.c +++ b/hw/intc/spapr_xive.c @@ -428,14 +428,13 @@ static int spapr_xive_write_nvt(XiveRouter *xrtr, uint8_t nvt_blk, g_assert_not_reached(); } -static int spapr_xive_match_nvt(XivePresenter *xptr, uint8_t format, - uint8_t nvt_blk, uint32_t nvt_idx, - bool crowd, bool cam_ignore, - uint8_t priority, - uint32_t logic_serv, XiveTCTXMatch *match) +static bool spapr_xive_match_nvt(XivePresenter *xptr, uint8_t format, + uint8_t nvt_blk, uint32_t nvt_idx, + bool crowd, bool cam_ignore, + uint8_t priority, + uint32_t logic_serv, XiveTCTXMatch *match) { CPUState *cs; - int count = 0; CPU_FOREACH(cs) { PowerPCCPU *cpu = POWERPC_CPU(cs); @@ -463,16 +462,17 @@ static int spapr_xive_match_nvt(XivePresenter *xptr, uint8_t format, if (match->tctx) { qemu_log_mask(LOG_GUEST_ERROR, "XIVE: already found a thread " "context NVT %x/%x\n", nvt_blk, nvt_idx); - return -1; + match->count++; + continue; } match->ring = ring; match->tctx = tctx; - count++; + match->count++; } } - return count; + return !!match->count; } static uint32_t spapr_xive_presenter_get_config(XivePresenter *xptr) diff --git a/hw/intc/trace-events b/hw/intc/trace-events index 0ba9a02..018c609 100644 --- a/hw/intc/trace-events +++ b/hw/intc/trace-events @@ -274,11 +274,13 @@ kvm_xive_cpu_connect(uint32_t id) "connect CPU%d to KVM device" kvm_xive_source_reset(uint32_t srcno) "IRQ 0x%x" # xive.c -xive_tctx_accept(uint32_t index, uint8_t ring, uint8_t ipb, uint8_t pipr, uint8_t cppr, uint8_t nsr) "target=%d ring=0x%x IBP=0x%02x PIPR=0x%02x CPPR=0x%02x NSR=0x%02x ACK" -xive_tctx_notify(uint32_t index, uint8_t ring, uint8_t ipb, uint8_t pipr, uint8_t cppr, uint8_t nsr) "target=%d ring=0x%x IBP=0x%02x PIPR=0x%02x CPPR=0x%02x NSR=0x%02x raise !" -xive_tctx_set_cppr(uint32_t index, uint8_t ring, uint8_t ipb, uint8_t pipr, uint8_t cppr, uint8_t nsr) "target=%d ring=0x%x IBP=0x%02x PIPR=0x%02x new CPPR=0x%02x NSR=0x%02x" +xive_tctx_accept(uint32_t index, uint8_t ring, uint8_t ipb, uint8_t pipr, uint8_t cppr, uint8_t nsr) "target=%d ring=0x%x IPB=0x%02x PIPR=0x%02x CPPR=0x%02x NSR=0x%02x ACK" +xive_tctx_notify(uint32_t index, uint8_t ring, uint8_t ipb, uint8_t pipr, uint8_t cppr, uint8_t nsr) "target=%d ring=0x%x IPB=0x%02x PIPR=0x%02x CPPR=0x%02x NSR=0x%02x raise !" +xive_tctx_set_cppr(uint32_t index, uint8_t ring, uint8_t ipb, uint8_t pipr, uint8_t cppr, uint8_t nsr) "target=%d ring=0x%x IPB=0x%02x PIPR=0x%02x new CPPR=0x%02x NSR=0x%02x" xive_source_esb_read(uint64_t addr, uint32_t srcno, uint64_t value) "@0x%"PRIx64" IRQ 0x%x val=0x%"PRIx64 xive_source_esb_write(uint64_t addr, uint32_t srcno, uint64_t value) "@0x%"PRIx64" IRQ 0x%x val=0x%"PRIx64 +xive_source_notify(uint32_t srcno) "Processing notification for queued IRQ 0x%x" +xive_source_blocked(uint32_t srcno) "No action needed for IRQ 0x%x currently" xive_router_end_notify(uint8_t end_blk, uint32_t end_idx, uint32_t end_data) "END 0x%02x/0x%04x -> enqueue 0x%08x" xive_router_end_escalate(uint8_t end_blk, uint32_t end_idx, uint8_t esc_blk, uint32_t esc_idx, uint32_t end_data) "END 0x%02x/0x%04x -> escalate END 0x%02x/0x%04x data 0x%08x" xive_tctx_tm_write(uint32_t index, uint64_t offset, unsigned int size, uint64_t value) "target=%d @0x%"PRIx64" sz=%d val=0x%" PRIx64 @@ -289,6 +291,10 @@ xive_end_source_read(uint8_t end_blk, uint32_t end_idx, uint64_t addr) "END 0x%x # xive2.c xive_nvp_backlog_op(uint8_t blk, uint32_t idx, uint8_t op, uint8_t priority, uint8_t rc) "NVP 0x%x/0x%x operation=%d priority=%d rc=%d" xive_nvgc_backlog_op(bool c, uint8_t blk, uint32_t idx, uint8_t op, uint8_t priority, uint32_t rc) "NVGC crowd=%d 0x%x/0x%x operation=%d priority=%d rc=%d" +xive_redistribute(uint32_t index, uint8_t ring, uint8_t end_blk, uint32_t end_idx) "Redistribute from target=%d ring=0x%x NVP 0x%x/0x%x" +xive_end_enqueue(uint8_t end_blk, uint32_t end_idx, uint32_t end_data) "Queue event for END 0x%x/0x%x data=0x%x" +xive_escalate_end(uint8_t end_blk, uint32_t end_idx, uint8_t esc_blk, uint32_t esc_idx, uint32_t esc_data) "Escalate from END 0x%x/0x%x to END 0x%x/0x%x data=0x%x" +xive_escalate_esb(uint8_t end_blk, uint32_t end_idx, uint32_t lisn) "Escalate from END 0x%x/0x%x to LISN=0x%x" # pnv_xive.c pnv_xive_ic_hw_trigger(uint64_t addr, uint64_t val) "@0x%"PRIx64" val=0x%"PRIx64 @@ -314,12 +320,8 @@ loongson_ipi_read(unsigned size, uint64_t addr, uint64_t val) "size: %u addr: 0x loongson_ipi_write(unsigned size, uint64_t addr, uint64_t val) "size: %u addr: 0x%"PRIx64 "val: 0x%"PRIx64 # loongarch_pch_pic.c loongarch_pch_pic_irq_handler(int irq, int level) "irq %d level %d" -loongarch_pch_pic_low_readw(unsigned size, uint64_t addr, uint64_t val) "size: %u addr: 0x%"PRIx64 "val: 0x%" PRIx64 -loongarch_pch_pic_low_writew(unsigned size, uint64_t addr, uint64_t val) "size: %u addr: 0x%"PRIx64 "val: 0x%" PRIx64 -loongarch_pch_pic_high_readw(unsigned size, uint64_t addr, uint64_t val) "size: %u addr: 0x%"PRIx64 "val: 0x%" PRIx64 -loongarch_pch_pic_high_writew(unsigned size, uint64_t addr, uint64_t val) "size: %u addr: 0x%"PRIx64 "val: 0x%" PRIx64 -loongarch_pch_pic_readb(unsigned size, uint64_t addr, uint64_t val) "size: %u addr: 0x%"PRIx64 "val: 0x%" PRIx64 -loongarch_pch_pic_writeb(unsigned size, uint64_t addr, uint64_t val) "size: %u addr: 0x%"PRIx64 "val: 0x%" PRIx64 +loongarch_pch_pic_read(unsigned size, uint64_t addr, uint64_t val) "size: %u addr: 0x%"PRIx64 "val: 0x%" PRIx64 +loongarch_pch_pic_write(unsigned size, uint64_t addr, uint64_t val) "size: %u addr: 0x%"PRIx64 "val: 0x%" PRIx64 # loongarch_pch_msi.c loongarch_msi_set_irq(int irq_num) "set msi irq %d" diff --git a/hw/intc/xive.c b/hw/intc/xive.c index 27b473e..e0ffcf8 100644 --- a/hw/intc/xive.c +++ b/hw/intc/xive.c @@ -25,6 +25,58 @@ /* * XIVE Thread Interrupt Management context */ +bool xive_ring_valid(XiveTCTX *tctx, uint8_t ring) +{ + uint8_t cur_ring; + + for (cur_ring = ring; cur_ring <= TM_QW3_HV_PHYS; + cur_ring += XIVE_TM_RING_SIZE) { + if (!(tctx->regs[cur_ring + TM_WORD2] & 0x80)) { + return false; + } + } + return true; +} + +bool xive_nsr_indicates_exception(uint8_t ring, uint8_t nsr) +{ + switch (ring) { + case TM_QW1_OS: + return !!(nsr & TM_QW1_NSR_EO); + case TM_QW2_HV_POOL: + case TM_QW3_HV_PHYS: + return !!(nsr & TM_QW3_NSR_HE); + default: + g_assert_not_reached(); + } +} + +bool xive_nsr_indicates_group_exception(uint8_t ring, uint8_t nsr) +{ + if ((nsr & TM_NSR_GRP_LVL) > 0) { + g_assert(xive_nsr_indicates_exception(ring, nsr)); + return true; + } + return false; +} + +uint8_t xive_nsr_exception_ring(uint8_t ring, uint8_t nsr) +{ + /* NSR determines if pool/phys ring is for phys or pool interrupt */ + if ((ring == TM_QW3_HV_PHYS) || (ring == TM_QW2_HV_POOL)) { + uint8_t he = (nsr & TM_QW3_NSR_HE) >> 6; + + if (he == TM_QW3_NSR_HE_PHYS) { + return TM_QW3_HV_PHYS; + } else if (he == TM_QW3_NSR_HE_POOL) { + return TM_QW2_HV_POOL; + } else { + /* Don't support LSI mode */ + g_assert_not_reached(); + } + } + return ring; +} static qemu_irq xive_tctx_output(XiveTCTX *tctx, uint8_t ring) { @@ -41,74 +93,83 @@ static qemu_irq xive_tctx_output(XiveTCTX *tctx, uint8_t ring) } } -static uint64_t xive_tctx_accept(XiveTCTX *tctx, uint8_t ring) +/* + * interrupt is accepted on the presentation ring, for PHYS ring the NSR + * directs it to the PHYS or POOL rings. + */ +uint64_t xive_tctx_accept(XiveTCTX *tctx, uint8_t sig_ring) { - uint8_t *regs = &tctx->regs[ring]; - uint8_t nsr = regs[TM_NSR]; + uint8_t *sig_regs = &tctx->regs[sig_ring]; + uint8_t nsr = sig_regs[TM_NSR]; - qemu_irq_lower(xive_tctx_output(tctx, ring)); + g_assert(sig_ring == TM_QW1_OS || sig_ring == TM_QW3_HV_PHYS); - if (regs[TM_NSR] != 0) { - uint8_t cppr = regs[TM_PIPR]; - uint8_t alt_ring; - uint8_t *alt_regs; + g_assert(tctx->regs[TM_QW2_HV_POOL + TM_NSR] == 0); + g_assert(tctx->regs[TM_QW2_HV_POOL + TM_PIPR] == 0); + g_assert(tctx->regs[TM_QW2_HV_POOL + TM_CPPR] == 0); - /* POOL interrupt uses IPB in QW2, POOL ring */ - if ((ring == TM_QW3_HV_PHYS) && (nsr & (TM_QW3_NSR_HE_POOL << 6))) { - alt_ring = TM_QW2_HV_POOL; - } else { - alt_ring = ring; - } - alt_regs = &tctx->regs[alt_ring]; + if (xive_nsr_indicates_exception(sig_ring, nsr)) { + uint8_t cppr = sig_regs[TM_PIPR]; + uint8_t ring; + uint8_t *regs; + + ring = xive_nsr_exception_ring(sig_ring, nsr); + regs = &tctx->regs[ring]; - regs[TM_CPPR] = cppr; + sig_regs[TM_CPPR] = cppr; /* * If the interrupt was for a specific VP, reset the pending * buffer bit, otherwise clear the logical server indicator */ - if (regs[TM_NSR] & TM_NSR_GRP_LVL) { - regs[TM_NSR] &= ~TM_NSR_GRP_LVL; - } else { - alt_regs[TM_IPB] &= ~xive_priority_to_ipb(cppr); + if (!xive_nsr_indicates_group_exception(sig_ring, nsr)) { + regs[TM_IPB] &= ~xive_priority_to_ipb(cppr); } - /* Drop the exception bit and any group/crowd */ - regs[TM_NSR] = 0; + /* Clear the exception from NSR */ + sig_regs[TM_NSR] = 0; + qemu_irq_lower(xive_tctx_output(tctx, sig_ring)); - trace_xive_tctx_accept(tctx->cs->cpu_index, alt_ring, - alt_regs[TM_IPB], regs[TM_PIPR], - regs[TM_CPPR], regs[TM_NSR]); + trace_xive_tctx_accept(tctx->cs->cpu_index, ring, + regs[TM_IPB], sig_regs[TM_PIPR], + sig_regs[TM_CPPR], sig_regs[TM_NSR]); } - return ((uint64_t)nsr << 8) | regs[TM_CPPR]; + return ((uint64_t)nsr << 8) | sig_regs[TM_CPPR]; } -void xive_tctx_notify(XiveTCTX *tctx, uint8_t ring, uint8_t group_level) +/* Change PIPR and calculate NSR and irq based on PIPR, CPPR, group */ +void xive_tctx_pipr_set(XiveTCTX *tctx, uint8_t ring, uint8_t pipr, + uint8_t group_level) { - /* HV_POOL ring uses HV_PHYS NSR, CPPR and PIPR registers */ - uint8_t alt_ring = (ring == TM_QW2_HV_POOL) ? TM_QW3_HV_PHYS : ring; - uint8_t *alt_regs = &tctx->regs[alt_ring]; + uint8_t *sig_regs = xive_tctx_signal_regs(tctx, ring); uint8_t *regs = &tctx->regs[ring]; - if (alt_regs[TM_PIPR] < alt_regs[TM_CPPR]) { + g_assert(!xive_nsr_indicates_group_exception(ring, sig_regs[TM_NSR])); + + sig_regs[TM_PIPR] = pipr; + + if (pipr < sig_regs[TM_CPPR]) { switch (ring) { case TM_QW1_OS: - regs[TM_NSR] = TM_QW1_NSR_EO | (group_level & 0x3F); + sig_regs[TM_NSR] = TM_QW1_NSR_EO | (group_level & 0x3F); break; case TM_QW2_HV_POOL: - alt_regs[TM_NSR] = (TM_QW3_NSR_HE_POOL << 6) | (group_level & 0x3F); + sig_regs[TM_NSR] = (TM_QW3_NSR_HE_POOL << 6) | (group_level & 0x3F); break; case TM_QW3_HV_PHYS: - regs[TM_NSR] = (TM_QW3_NSR_HE_PHYS << 6) | (group_level & 0x3F); + sig_regs[TM_NSR] = (TM_QW3_NSR_HE_PHYS << 6) | (group_level & 0x3F); break; default: g_assert_not_reached(); } trace_xive_tctx_notify(tctx->cs->cpu_index, ring, - regs[TM_IPB], alt_regs[TM_PIPR], - alt_regs[TM_CPPR], alt_regs[TM_NSR]); + regs[TM_IPB], pipr, + sig_regs[TM_CPPR], sig_regs[TM_NSR]); qemu_irq_raise(xive_tctx_output(tctx, ring)); + } else { + sig_regs[TM_NSR] = 0; + qemu_irq_lower(xive_tctx_output(tctx, ring)); } } @@ -124,25 +185,32 @@ void xive_tctx_reset_signal(XiveTCTX *tctx, uint8_t ring) static void xive_tctx_set_cppr(XiveTCTX *tctx, uint8_t ring, uint8_t cppr) { - uint8_t *regs = &tctx->regs[ring]; + uint8_t *sig_regs = &tctx->regs[ring]; uint8_t pipr_min; uint8_t ring_min; + g_assert(ring == TM_QW1_OS || ring == TM_QW3_HV_PHYS); + + g_assert(tctx->regs[TM_QW2_HV_POOL + TM_NSR] == 0); + g_assert(tctx->regs[TM_QW2_HV_POOL + TM_PIPR] == 0); + g_assert(tctx->regs[TM_QW2_HV_POOL + TM_CPPR] == 0); + + /* XXX: should show pool IPB for PHYS ring */ trace_xive_tctx_set_cppr(tctx->cs->cpu_index, ring, - regs[TM_IPB], regs[TM_PIPR], - cppr, regs[TM_NSR]); + sig_regs[TM_IPB], sig_regs[TM_PIPR], + cppr, sig_regs[TM_NSR]); if (cppr > XIVE_PRIORITY_MAX) { cppr = 0xff; } - tctx->regs[ring + TM_CPPR] = cppr; + sig_regs[TM_CPPR] = cppr; /* * Recompute the PIPR based on local pending interrupts. The PHYS * ring must take the minimum of both the PHYS and POOL PIPR values. */ - pipr_min = xive_ipb_to_pipr(regs[TM_IPB]); + pipr_min = xive_ipb_to_pipr(sig_regs[TM_IPB]); ring_min = ring; /* PHYS updates also depend on POOL values */ @@ -151,7 +219,6 @@ static void xive_tctx_set_cppr(XiveTCTX *tctx, uint8_t ring, uint8_t cppr) /* POOL values only matter if POOL ctx is valid */ if (pool_regs[TM_WORD2] & 0x80) { - uint8_t pool_pipr = xive_ipb_to_pipr(pool_regs[TM_IPB]); /* @@ -165,30 +232,39 @@ static void xive_tctx_set_cppr(XiveTCTX *tctx, uint8_t ring, uint8_t cppr) } } - regs[TM_PIPR] = pipr_min; + /* CPPR has changed, this may present or preclude a pending exception */ + xive_tctx_pipr_set(tctx, ring_min, pipr_min, 0); +} + +static void xive_tctx_pipr_recompute_from_ipb(XiveTCTX *tctx, uint8_t ring) +{ + uint8_t *sig_regs = xive_tctx_signal_regs(tctx, ring); + uint8_t *regs = &tctx->regs[ring]; - /* CPPR has changed, check if we need to raise a pending exception */ - xive_tctx_notify(tctx, ring_min, 0); + /* Does not support a presented group interrupt */ + g_assert(!xive_nsr_indicates_group_exception(ring, sig_regs[TM_NSR])); + + xive_tctx_pipr_set(tctx, ring, xive_ipb_to_pipr(regs[TM_IPB]), 0); } -void xive_tctx_pipr_update(XiveTCTX *tctx, uint8_t ring, uint8_t priority, - uint8_t group_level) - { - /* HV_POOL ring uses HV_PHYS NSR, CPPR and PIPR registers */ - uint8_t alt_ring = (ring == TM_QW2_HV_POOL) ? TM_QW3_HV_PHYS : ring; - uint8_t *alt_regs = &tctx->regs[alt_ring]; +void xive_tctx_pipr_present(XiveTCTX *tctx, uint8_t ring, uint8_t priority, + uint8_t group_level) +{ + uint8_t *sig_regs = xive_tctx_signal_regs(tctx, ring); uint8_t *regs = &tctx->regs[ring]; + uint8_t pipr = xive_priority_to_pipr(priority); if (group_level == 0) { - /* VP-specific */ regs[TM_IPB] |= xive_priority_to_ipb(priority); - alt_regs[TM_PIPR] = xive_ipb_to_pipr(regs[TM_IPB]); - } else { - /* VP-group */ - alt_regs[TM_PIPR] = xive_priority_to_pipr(priority); + if (pipr >= sig_regs[TM_PIPR]) { + /* VP interrupts can come here with lower priority than PIPR */ + return; + } } - xive_tctx_notify(tctx, ring, group_level); - } + g_assert(pipr <= xive_ipb_to_pipr(regs[TM_IPB])); + g_assert(pipr < sig_regs[TM_PIPR]); + xive_tctx_pipr_set(tctx, ring, pipr, group_level); +} /* * XIVE Thread Interrupt Management Area (TIMA) @@ -206,25 +282,78 @@ static uint64_t xive_tm_ack_hv_reg(XivePresenter *xptr, XiveTCTX *tctx, return xive_tctx_accept(tctx, TM_QW3_HV_PHYS); } +static void xive_pool_cam_decode(uint32_t cam, uint8_t *nvt_blk, + uint32_t *nvt_idx, bool *vp) +{ + if (nvt_blk) { + *nvt_blk = xive_nvt_blk(cam); + } + if (nvt_idx) { + *nvt_idx = xive_nvt_idx(cam); + } + if (vp) { + *vp = !!(cam & TM_QW2W2_VP); + } +} + +static uint32_t xive_tctx_get_pool_cam(XiveTCTX *tctx, uint8_t *nvt_blk, + uint32_t *nvt_idx, bool *vp) +{ + uint32_t qw2w2 = xive_tctx_word2(&tctx->regs[TM_QW2_HV_POOL]); + uint32_t cam = be32_to_cpu(qw2w2); + + xive_pool_cam_decode(cam, nvt_blk, nvt_idx, vp); + return qw2w2; +} + +static void xive_tctx_set_pool_cam(XiveTCTX *tctx, uint32_t qw2w2) +{ + memcpy(&tctx->regs[TM_QW2_HV_POOL + TM_WORD2], &qw2w2, 4); +} + static uint64_t xive_tm_pull_pool_ctx(XivePresenter *xptr, XiveTCTX *tctx, hwaddr offset, unsigned size) { - uint32_t qw2w2_prev = xive_tctx_word2(&tctx->regs[TM_QW2_HV_POOL]); uint32_t qw2w2; + uint32_t qw2w2_new; + uint8_t nvt_blk; + uint32_t nvt_idx; + bool vp; + + qw2w2 = xive_tctx_get_pool_cam(tctx, &nvt_blk, &nvt_idx, &vp); + + if (!vp) { + qemu_log_mask(LOG_GUEST_ERROR, "XIVE: pull invalid POOL NVT %x/%x !?\n", + nvt_blk, nvt_idx); + } + + /* Invalidate CAM line */ + qw2w2_new = xive_set_field32(TM_QW2W2_VP, qw2w2, 0); + xive_tctx_set_pool_cam(tctx, qw2w2_new); + + xive_tctx_reset_signal(tctx, TM_QW1_OS); + xive_tctx_reset_signal(tctx, TM_QW2_HV_POOL); + /* Re-check phys for interrupts if pool was disabled */ + xive_tctx_pipr_recompute_from_ipb(tctx, TM_QW3_HV_PHYS); - qw2w2 = xive_set_field32(TM_QW2W2_VP, qw2w2_prev, 0); - memcpy(&tctx->regs[TM_QW2_HV_POOL + TM_WORD2], &qw2w2, 4); return qw2w2; } static uint64_t xive_tm_pull_phys_ctx(XivePresenter *xptr, XiveTCTX *tctx, hwaddr offset, unsigned size) { - uint8_t qw3b8_prev = tctx->regs[TM_QW3_HV_PHYS + TM_WORD2]; - uint8_t qw3b8; + uint8_t qw3b8 = tctx->regs[TM_QW3_HV_PHYS + TM_WORD2]; + uint8_t qw3b8_new; + + qw3b8 = tctx->regs[TM_QW3_HV_PHYS + TM_WORD2]; + if (!(qw3b8 & TM_QW3B8_VT)) { + qemu_log_mask(LOG_GUEST_ERROR, "XIVE: pulling invalid PHYS thread!?\n"); + } + qw3b8_new = qw3b8 & ~TM_QW3B8_VT; + tctx->regs[TM_QW3_HV_PHYS + TM_WORD2] = qw3b8_new; - qw3b8 = qw3b8_prev & ~TM_QW3B8_VT; - tctx->regs[TM_QW3_HV_PHYS + TM_WORD2] = qw3b8; + xive_tctx_reset_signal(tctx, TM_QW1_OS); + xive_tctx_reset_signal(tctx, TM_QW3_HV_PHYS); return qw3b8; } @@ -255,14 +384,14 @@ static uint64_t xive_tm_vt_poll(XivePresenter *xptr, XiveTCTX *tctx, static const uint8_t xive_tm_hw_view[] = { 3, 0, 0, 0, 0, 0, 0, 0, 3, 3, 3, 3, 0, 0, 0, 0, /* QW-0 User */ - 3, 3, 3, 3, 3, 3, 0, 2, 3, 3, 3, 3, 0, 0, 0, 0, /* QW-1 OS */ + 3, 3, 3, 3, 3, 3, 0, 2, 3, 3, 3, 3, 0, 0, 0, 3, /* QW-1 OS */ 0, 0, 3, 3, 0, 3, 3, 0, 3, 3, 3, 3, 0, 0, 0, 0, /* QW-2 POOL */ 3, 3, 3, 3, 0, 3, 0, 2, 3, 0, 0, 3, 3, 3, 3, 0, /* QW-3 PHYS */ }; static const uint8_t xive_tm_hv_view[] = { 3, 0, 0, 0, 0, 0, 0, 0, 3, 3, 3, 3, 0, 0, 0, 0, /* QW-0 User */ - 3, 3, 3, 3, 3, 3, 0, 2, 3, 3, 3, 3, 0, 0, 0, 0, /* QW-1 OS */ + 3, 3, 3, 3, 3, 3, 0, 2, 3, 3, 3, 3, 0, 0, 0, 3, /* QW-1 OS */ 0, 0, 3, 3, 0, 3, 3, 0, 0, 3, 3, 3, 0, 0, 0, 0, /* QW-2 POOL */ 3, 3, 3, 3, 0, 3, 0, 2, 3, 0, 0, 3, 0, 0, 0, 0, /* QW-3 PHYS */ }; @@ -326,7 +455,7 @@ static void xive_tm_raw_write(XiveTCTX *tctx, hwaddr offset, uint64_t value, */ if (size < 4 || !mask || ring_offset == TM_QW0_USER) { qemu_log_mask(LOG_GUEST_ERROR, "XIVE: invalid write access at TIMA @%" - HWADDR_PRIx"\n", offset); + HWADDR_PRIx" size %d\n", offset, size); return; } @@ -357,7 +486,7 @@ static uint64_t xive_tm_raw_read(XiveTCTX *tctx, hwaddr offset, unsigned size) */ if (size < 4 || !mask || ring_offset == TM_QW0_USER) { qemu_log_mask(LOG_GUEST_ERROR, "XIVE: invalid read access at TIMA @%" - HWADDR_PRIx"\n", offset); + HWADDR_PRIx" size %d\n", offset, size); return -1; } @@ -403,6 +532,12 @@ static void xive_tm_set_os_lgs(XivePresenter *xptr, XiveTCTX *tctx, xive_tctx_set_lgs(tctx, TM_QW1_OS, value & 0xff); } +static void xive_tm_set_pool_lgs(XivePresenter *xptr, XiveTCTX *tctx, + hwaddr offset, uint64_t value, unsigned size) +{ + xive_tctx_set_lgs(tctx, TM_QW2_HV_POOL, value & 0xff); +} + /* * Adjust the PIPR to allow a CPU to process event queues of other * priorities during one physical interrupt cycle. @@ -410,7 +545,12 @@ static void xive_tm_set_os_lgs(XivePresenter *xptr, XiveTCTX *tctx, static void xive_tm_set_os_pending(XivePresenter *xptr, XiveTCTX *tctx, hwaddr offset, uint64_t value, unsigned size) { - xive_tctx_pipr_update(tctx, TM_QW1_OS, value & 0xff, 0); + uint8_t ring = TM_QW1_OS; + uint8_t *regs = &tctx->regs[ring]; + + /* XXX: how should this work exactly? */ + regs[TM_IPB] |= xive_priority_to_ipb(value & 0xff); + xive_tctx_pipr_recompute_from_ipb(tctx, ring); } static void xive_os_cam_decode(uint32_t cam, uint8_t *nvt_blk, @@ -454,7 +594,7 @@ static uint64_t xive_tm_pull_os_ctx(XivePresenter *xptr, XiveTCTX *tctx, qw1w2 = xive_tctx_get_os_cam(tctx, &nvt_blk, &nvt_idx, &vo); if (!vo) { - qemu_log_mask(LOG_GUEST_ERROR, "XIVE: pulling invalid NVT %x/%x !?\n", + qemu_log_mask(LOG_GUEST_ERROR, "XIVE: pull invalid OS NVT %x/%x !?\n", nvt_blk, nvt_idx); } @@ -466,7 +606,7 @@ static uint64_t xive_tm_pull_os_ctx(XivePresenter *xptr, XiveTCTX *tctx, return qw1w2; } -static void xive_tctx_need_resend(XiveRouter *xrtr, XiveTCTX *tctx, +static void xive_tctx_restore_nvp(XiveRouter *xrtr, XiveTCTX *tctx, uint8_t nvt_blk, uint32_t nvt_idx) { XiveNVT nvt; @@ -492,16 +632,6 @@ static void xive_tctx_need_resend(XiveRouter *xrtr, XiveTCTX *tctx, uint8_t *regs = &tctx->regs[TM_QW1_OS]; regs[TM_IPB] |= ipb; } - - /* - * Always call xive_tctx_pipr_update(). Even if there were no - * escalation triggered, there could be a pending interrupt which - * was saved when the context was pulled and that we need to take - * into account by recalculating the PIPR (which is not - * saved/restored). - * It will also raise the External interrupt signal if needed. - */ - xive_tctx_pipr_update(tctx, TM_QW1_OS, 0xFF, 0); /* fxb */ } /* @@ -523,7 +653,17 @@ static void xive_tm_push_os_ctx(XivePresenter *xptr, XiveTCTX *tctx, /* Check the interrupt pending bits */ if (vo) { - xive_tctx_need_resend(XIVE_ROUTER(xptr), tctx, nvt_blk, nvt_idx); + xive_tctx_restore_nvp(XIVE_ROUTER(xptr), tctx, nvt_blk, nvt_idx); + + /* + * Always call xive_tctx_recompute_from_ipb(). Even if there were no + * escalation triggered, there could be a pending interrupt which + * was saved when the context was pulled and that we need to take + * into account by recalculating the PIPR (which is not + * saved/restored). + * It will also raise the External interrupt signal if needed. + */ + xive_tctx_pipr_recompute_from_ipb(tctx, TM_QW1_OS); /* fxb */ } } @@ -542,6 +682,8 @@ typedef struct XiveTmOp { uint8_t page_offset; uint32_t op_offset; unsigned size; + bool hw_ok; + bool sw_ok; void (*write_handler)(XivePresenter *xptr, XiveTCTX *tctx, hwaddr offset, uint64_t value, unsigned size); @@ -554,34 +696,34 @@ static const XiveTmOp xive_tm_operations[] = { * MMIOs below 2K : raw values and special operations without side * effects */ - { XIVE_TM_OS_PAGE, TM_QW1_OS + TM_CPPR, 1, xive_tm_set_os_cppr, - NULL }, - { XIVE_TM_HV_PAGE, TM_QW1_OS + TM_WORD2, 4, xive_tm_push_os_ctx, - NULL }, - { XIVE_TM_HV_PAGE, TM_QW3_HV_PHYS + TM_CPPR, 1, xive_tm_set_hv_cppr, - NULL }, - { XIVE_TM_HV_PAGE, TM_QW3_HV_PHYS + TM_WORD2, 1, xive_tm_vt_push, - NULL }, - { XIVE_TM_HV_PAGE, TM_QW3_HV_PHYS + TM_WORD2, 1, NULL, - xive_tm_vt_poll }, + { XIVE_TM_OS_PAGE, TM_QW1_OS + TM_CPPR, 1, true, true, + xive_tm_set_os_cppr, NULL }, + { XIVE_TM_HV_PAGE, TM_QW1_OS + TM_WORD2, 4, true, true, + xive_tm_push_os_ctx, NULL }, + { XIVE_TM_HV_PAGE, TM_QW3_HV_PHYS + TM_CPPR, 1, true, true, + xive_tm_set_hv_cppr, NULL }, + { XIVE_TM_HV_PAGE, TM_QW3_HV_PHYS + TM_WORD2, 1, false, true, + xive_tm_vt_push, NULL }, + { XIVE_TM_HV_PAGE, TM_QW3_HV_PHYS + TM_WORD2, 1, true, true, + NULL, xive_tm_vt_poll }, /* MMIOs above 2K : special operations with side effects */ - { XIVE_TM_OS_PAGE, TM_SPC_ACK_OS_REG, 2, NULL, - xive_tm_ack_os_reg }, - { XIVE_TM_OS_PAGE, TM_SPC_SET_OS_PENDING, 1, xive_tm_set_os_pending, - NULL }, - { XIVE_TM_HV_PAGE, TM_SPC_PULL_OS_CTX, 4, NULL, - xive_tm_pull_os_ctx }, - { XIVE_TM_HV_PAGE, TM_SPC_PULL_OS_CTX, 8, NULL, - xive_tm_pull_os_ctx }, - { XIVE_TM_HV_PAGE, TM_SPC_ACK_HV_REG, 2, NULL, - xive_tm_ack_hv_reg }, - { XIVE_TM_HV_PAGE, TM_SPC_PULL_POOL_CTX, 4, NULL, - xive_tm_pull_pool_ctx }, - { XIVE_TM_HV_PAGE, TM_SPC_PULL_POOL_CTX, 8, NULL, - xive_tm_pull_pool_ctx }, - { XIVE_TM_HV_PAGE, TM_SPC_PULL_PHYS_CTX, 1, NULL, - xive_tm_pull_phys_ctx }, + { XIVE_TM_OS_PAGE, TM_SPC_ACK_OS_REG, 2, true, false, + NULL, xive_tm_ack_os_reg }, + { XIVE_TM_OS_PAGE, TM_SPC_SET_OS_PENDING, 1, true, false, + xive_tm_set_os_pending, NULL }, + { XIVE_TM_HV_PAGE, TM_SPC_PULL_OS_CTX, 4, true, false, + NULL, xive_tm_pull_os_ctx }, + { XIVE_TM_HV_PAGE, TM_SPC_PULL_OS_CTX, 8, true, false, + NULL, xive_tm_pull_os_ctx }, + { XIVE_TM_HV_PAGE, TM_SPC_ACK_HV_REG, 2, true, false, + NULL, xive_tm_ack_hv_reg }, + { XIVE_TM_HV_PAGE, TM_SPC_PULL_POOL_CTX, 4, true, false, + NULL, xive_tm_pull_pool_ctx }, + { XIVE_TM_HV_PAGE, TM_SPC_PULL_POOL_CTX, 8, true, false, + NULL, xive_tm_pull_pool_ctx }, + { XIVE_TM_HV_PAGE, TM_SPC_PULL_PHYS_CTX, 1, true, false, + NULL, xive_tm_pull_phys_ctx }, }; static const XiveTmOp xive2_tm_operations[] = { @@ -589,50 +731,58 @@ static const XiveTmOp xive2_tm_operations[] = { * MMIOs below 2K : raw values and special operations without side * effects */ - { XIVE_TM_OS_PAGE, TM_QW1_OS + TM_CPPR, 1, xive2_tm_set_os_cppr, - NULL }, - { XIVE_TM_HV_PAGE, TM_QW1_OS + TM_WORD2, 4, xive2_tm_push_os_ctx, - NULL }, - { XIVE_TM_HV_PAGE, TM_QW1_OS + TM_WORD2, 8, xive2_tm_push_os_ctx, - NULL }, - { XIVE_TM_OS_PAGE, TM_QW1_OS + TM_LGS, 1, xive_tm_set_os_lgs, - NULL }, - { XIVE_TM_HV_PAGE, TM_QW3_HV_PHYS + TM_CPPR, 1, xive2_tm_set_hv_cppr, - NULL }, - { XIVE_TM_HV_PAGE, TM_QW3_HV_PHYS + TM_WORD2, 1, xive_tm_vt_push, - NULL }, - { XIVE_TM_HV_PAGE, TM_QW3_HV_PHYS + TM_WORD2, 1, NULL, - xive_tm_vt_poll }, - { XIVE_TM_HV_PAGE, TM_QW3_HV_PHYS + TM_T, 1, xive2_tm_set_hv_target, - NULL }, + { XIVE_TM_OS_PAGE, TM_QW1_OS + TM_CPPR, 1, true, true, + xive2_tm_set_os_cppr, NULL }, + { XIVE_TM_HV_PAGE, TM_QW1_OS + TM_WORD2, 4, true, true, + xive2_tm_push_os_ctx, NULL }, + { XIVE_TM_HV_PAGE, TM_QW1_OS + TM_WORD2, 8, true, true, + xive2_tm_push_os_ctx, NULL }, + { XIVE_TM_OS_PAGE, TM_QW1_OS + TM_LGS, 1, true, true, + xive_tm_set_os_lgs, NULL }, + { XIVE_TM_HV_PAGE, TM_QW2_HV_POOL + TM_WORD2, 4, true, true, + xive2_tm_push_pool_ctx, NULL }, + { XIVE_TM_HV_PAGE, TM_QW2_HV_POOL + TM_WORD2, 8, true, true, + xive2_tm_push_pool_ctx, NULL }, + { XIVE_TM_HV_PAGE, TM_QW2_HV_POOL + TM_LGS, 1, true, true, + xive_tm_set_pool_lgs, NULL }, + { XIVE_TM_HV_PAGE, TM_QW3_HV_PHYS + TM_CPPR, 1, true, true, + xive2_tm_set_hv_cppr, NULL }, + { XIVE_TM_HV_PAGE, TM_QW3_HV_PHYS + TM_WORD2, 1, false, true, + xive2_tm_push_phys_ctx, NULL }, + { XIVE_TM_HV_PAGE, TM_QW3_HV_PHYS + TM_WORD2, 1, true, true, + NULL, xive_tm_vt_poll }, + { XIVE_TM_HV_PAGE, TM_QW3_HV_PHYS + TM_T, 1, true, true, + xive2_tm_set_hv_target, NULL }, /* MMIOs above 2K : special operations with side effects */ - { XIVE_TM_OS_PAGE, TM_SPC_ACK_OS_REG, 2, NULL, - xive_tm_ack_os_reg }, - { XIVE_TM_OS_PAGE, TM_SPC_SET_OS_PENDING, 1, xive_tm_set_os_pending, - NULL }, - { XIVE_TM_HV_PAGE, TM_SPC_PULL_OS_CTX_G2, 4, NULL, - xive2_tm_pull_os_ctx }, - { XIVE_TM_HV_PAGE, TM_SPC_PULL_OS_CTX, 4, NULL, - xive2_tm_pull_os_ctx }, - { XIVE_TM_HV_PAGE, TM_SPC_PULL_OS_CTX, 8, NULL, - xive2_tm_pull_os_ctx }, - { XIVE_TM_HV_PAGE, TM_SPC_ACK_HV_REG, 2, NULL, - xive_tm_ack_hv_reg }, - { XIVE_TM_HV_PAGE, TM_SPC_PULL_POOL_CTX_G2, 4, NULL, - xive_tm_pull_pool_ctx }, - { XIVE_TM_HV_PAGE, TM_SPC_PULL_POOL_CTX, 4, NULL, - xive_tm_pull_pool_ctx }, - { XIVE_TM_HV_PAGE, TM_SPC_PULL_POOL_CTX, 8, NULL, - xive_tm_pull_pool_ctx }, - { XIVE_TM_HV_PAGE, TM_SPC_PULL_OS_CTX_OL, 1, xive2_tm_pull_os_ctx_ol, - NULL }, - { XIVE_TM_HV_PAGE, TM_SPC_PULL_PHYS_CTX_G2, 4, NULL, - xive_tm_pull_phys_ctx }, - { XIVE_TM_HV_PAGE, TM_SPC_PULL_PHYS_CTX, 1, NULL, - xive_tm_pull_phys_ctx }, - { XIVE_TM_HV_PAGE, TM_SPC_PULL_PHYS_CTX_OL, 1, xive2_tm_pull_phys_ctx_ol, - NULL }, + { XIVE_TM_OS_PAGE, TM_SPC_ACK_OS_REG, 2, true, false, + NULL, xive_tm_ack_os_reg }, + { XIVE_TM_OS_PAGE, TM_SPC_SET_OS_PENDING, 1, true, false, + xive2_tm_set_os_pending, NULL }, + { XIVE_TM_HV_PAGE, TM_SPC_PULL_OS_CTX_G2, 4, true, false, + NULL, xive2_tm_pull_os_ctx }, + { XIVE_TM_HV_PAGE, TM_SPC_PULL_OS_CTX, 4, true, false, + NULL, xive2_tm_pull_os_ctx }, + { XIVE_TM_HV_PAGE, TM_SPC_PULL_OS_CTX, 8, true, false, + NULL, xive2_tm_pull_os_ctx }, + { XIVE_TM_HV_PAGE, TM_SPC_ACK_HV_REG, 2, true, false, + NULL, xive_tm_ack_hv_reg }, + { XIVE_TM_HV_PAGE, TM_SPC_PULL_POOL_CTX_G2, 4, true, false, + NULL, xive2_tm_pull_pool_ctx }, + { XIVE_TM_HV_PAGE, TM_SPC_PULL_POOL_CTX, 4, true, false, + NULL, xive2_tm_pull_pool_ctx }, + { XIVE_TM_HV_PAGE, TM_SPC_PULL_POOL_CTX, 8, true, false, + NULL, xive2_tm_pull_pool_ctx }, + { XIVE_TM_HV_PAGE, TM_SPC_PULL_OS_CTX_OL, 1, true, false, + xive2_tm_pull_os_ctx_ol, NULL }, + { XIVE_TM_HV_PAGE, TM_SPC_PULL_PHYS_CTX_G2, 4, true, false, + NULL, xive2_tm_pull_phys_ctx }, + { XIVE_TM_HV_PAGE, TM_SPC_PULL_PHYS_CTX, 1, true, false, + NULL, xive2_tm_pull_phys_ctx }, + { XIVE_TM_HV_PAGE, TM_SPC_PULL_PHYS_CTX_OL, 1, true, false, + xive2_tm_pull_phys_ctx_ol, NULL }, + { XIVE_TM_OS_PAGE, TM_SPC_ACK_OS_EL, 1, true, false, + xive2_tm_ack_os_el, NULL }, }; static const XiveTmOp *xive_tm_find_op(XivePresenter *xptr, hwaddr offset, @@ -674,21 +824,31 @@ void xive_tctx_tm_write(XivePresenter *xptr, XiveTCTX *tctx, hwaddr offset, uint64_t value, unsigned size) { const XiveTmOp *xto; + uint8_t ring = offset & TM_RING_OFFSET; + bool is_valid = xive_ring_valid(tctx, ring); + bool hw_owned = is_valid; trace_xive_tctx_tm_write(tctx->cs->cpu_index, offset, size, value); /* - * TODO: check V bit in Q[0-3]W2 - */ - - /* * First, check for special operations in the 2K region */ + xto = xive_tm_find_op(tctx->xptr, offset, size, true); + if (xto) { + if (hw_owned && !xto->hw_ok) { + qemu_log_mask(LOG_GUEST_ERROR, "XIVE: undefined write to HW TIMA " + "@%"HWADDR_PRIx" size %d\n", offset, size); + } + if (!hw_owned && !xto->sw_ok) { + qemu_log_mask(LOG_GUEST_ERROR, "XIVE: undefined write to SW TIMA " + "@%"HWADDR_PRIx" size %d\n", offset, size); + } + } + if (offset & TM_SPECIAL_OP) { - xto = xive_tm_find_op(tctx->xptr, offset, size, true); if (!xto) { qemu_log_mask(LOG_GUEST_ERROR, "XIVE: invalid write access at TIMA " - "@%"HWADDR_PRIx"\n", offset); + "@%"HWADDR_PRIx" size %d\n", offset, size); } else { xto->write_handler(xptr, tctx, offset, value, size); } @@ -698,7 +858,6 @@ void xive_tctx_tm_write(XivePresenter *xptr, XiveTCTX *tctx, hwaddr offset, /* * Then, for special operations in the region below 2K. */ - xto = xive_tm_find_op(tctx->xptr, offset, size, true); if (xto) { xto->write_handler(xptr, tctx, offset, value, size); return; @@ -707,6 +866,11 @@ void xive_tctx_tm_write(XivePresenter *xptr, XiveTCTX *tctx, hwaddr offset, /* * Finish with raw access to the register values */ + if (hw_owned) { + /* Store context operations are dangerous when context is valid */ + qemu_log_mask(LOG_GUEST_ERROR, "XIVE: undefined write to HW TIMA " + "@%"HWADDR_PRIx" size %d\n", offset, size); + } xive_tm_raw_write(tctx, offset, value, size); } @@ -714,20 +878,30 @@ uint64_t xive_tctx_tm_read(XivePresenter *xptr, XiveTCTX *tctx, hwaddr offset, unsigned size) { const XiveTmOp *xto; + uint8_t ring = offset & TM_RING_OFFSET; + bool is_valid = xive_ring_valid(tctx, ring); + bool hw_owned = is_valid; uint64_t ret; - /* - * TODO: check V bit in Q[0-3]W2 - */ + xto = xive_tm_find_op(tctx->xptr, offset, size, false); + if (xto) { + if (hw_owned && !xto->hw_ok) { + qemu_log_mask(LOG_GUEST_ERROR, "XIVE: undefined read to HW TIMA " + "@%"HWADDR_PRIx" size %d\n", offset, size); + } + if (!hw_owned && !xto->sw_ok) { + qemu_log_mask(LOG_GUEST_ERROR, "XIVE: undefined read to SW TIMA " + "@%"HWADDR_PRIx" size %d\n", offset, size); + } + } /* * First, check for special operations in the 2K region */ if (offset & TM_SPECIAL_OP) { - xto = xive_tm_find_op(tctx->xptr, offset, size, false); if (!xto) { qemu_log_mask(LOG_GUEST_ERROR, "XIVE: invalid read access to TIMA" - "@%"HWADDR_PRIx"\n", offset); + "@%"HWADDR_PRIx" size %d\n", offset, size); return -1; } ret = xto->read_handler(xptr, tctx, offset, size); @@ -737,7 +911,6 @@ uint64_t xive_tctx_tm_read(XivePresenter *xptr, XiveTCTX *tctx, hwaddr offset, /* * Then, for special operations in the region below 2K. */ - xto = xive_tm_find_op(tctx->xptr, offset, size, false); if (xto) { ret = xto->read_handler(xptr, tctx, offset, size); goto out; @@ -1191,6 +1364,7 @@ static uint64_t xive_source_esb_read(void *opaque, hwaddr addr, unsigned size) /* Forward the source event notification for routing */ if (ret) { + trace_xive_source_notify(srcno); xive_source_notify(xsrc, srcno); } break; @@ -1286,6 +1460,8 @@ out: /* Forward the source event notification for routing */ if (notify) { xive_source_notify(xsrc, srcno); + } else { + trace_xive_source_blocked(srcno); } } @@ -1672,8 +1848,8 @@ uint32_t xive_get_vpgroup_size(uint32_t nvp_index) return 1U << (first_zero + 1); } -static uint8_t xive_get_group_level(bool crowd, bool ignore, - uint32_t nvp_blk, uint32_t nvp_index) +uint8_t xive_get_group_level(bool crowd, bool ignore, + uint32_t nvp_blk, uint32_t nvp_index) { int first_zero; uint8_t level; @@ -1791,15 +1967,14 @@ int xive_presenter_tctx_match(XivePresenter *xptr, XiveTCTX *tctx, * This is our simple Xive Presenter Engine model. It is merged in the * Router as it does not require an extra object. */ -bool xive_presenter_notify(XiveFabric *xfb, uint8_t format, +bool xive_presenter_match(XiveFabric *xfb, uint8_t format, uint8_t nvt_blk, uint32_t nvt_idx, bool crowd, bool cam_ignore, uint8_t priority, - uint32_t logic_serv, bool *precluded) + uint32_t logic_serv, XiveTCTXMatch *match) { XiveFabricClass *xfc = XIVE_FABRIC_GET_CLASS(xfb); - XiveTCTXMatch match = { .tctx = NULL, .ring = 0, .precluded = false }; - uint8_t group_level; - int count; + + memset(match, 0, sizeof(*match)); /* * Ask the machine to scan the interrupt controllers for a match. @@ -1824,22 +1999,8 @@ bool xive_presenter_notify(XiveFabric *xfb, uint8_t format, * a new command to the presenters (the equivalent of the "assign" * power bus command in the documented full notify sequence. */ - count = xfc->match_nvt(xfb, format, nvt_blk, nvt_idx, crowd, cam_ignore, - priority, logic_serv, &match); - if (count < 0) { - return false; - } - - /* handle CPU exception delivery */ - if (count) { - group_level = xive_get_group_level(crowd, cam_ignore, nvt_blk, nvt_idx); - trace_xive_presenter_notify(nvt_blk, nvt_idx, match.ring, group_level); - xive_tctx_pipr_update(match.tctx, match.ring, priority, group_level); - } else { - *precluded = match.precluded; - } - - return !!count; + return xfc->match_nvt(xfb, format, nvt_blk, nvt_idx, crowd, cam_ignore, + priority, logic_serv, match); } /* @@ -1876,7 +2037,7 @@ void xive_router_end_notify(XiveRouter *xrtr, XiveEAS *eas) uint8_t nvt_blk; uint32_t nvt_idx; XiveNVT nvt; - bool found, precluded; + XiveTCTXMatch match; uint8_t end_blk = xive_get_field64(EAS_END_BLOCK, eas->w); uint32_t end_idx = xive_get_field64(EAS_END_INDEX, eas->w); @@ -1956,16 +2117,16 @@ void xive_router_end_notify(XiveRouter *xrtr, XiveEAS *eas) return; } - found = xive_presenter_notify(xrtr->xfb, format, nvt_blk, nvt_idx, - false /* crowd */, - xive_get_field32(END_W7_F0_IGNORE, end.w7), - priority, - xive_get_field32(END_W7_F1_LOG_SERVER_ID, end.w7), - &precluded); - /* we don't support VP-group notification on P9, so precluded is not used */ /* TODO: Auto EOI. */ - - if (found) { + /* we don't support VP-group notification on P9, so precluded is not used */ + if (xive_presenter_match(xrtr->xfb, format, nvt_blk, nvt_idx, + false /* crowd */, + xive_get_field32(END_W7_F0_IGNORE, end.w7), + priority, + xive_get_field32(END_W7_F1_LOG_SERVER_ID, end.w7), + &match)) { + trace_xive_presenter_notify(nvt_blk, nvt_idx, match.ring, 0); + xive_tctx_pipr_present(match.tctx, match.ring, priority, 0); return; } diff --git a/hw/intc/xive2.c b/hw/intc/xive2.c index a08cf90..ee5fa26 100644 --- a/hw/intc/xive2.c +++ b/hw/intc/xive2.c @@ -19,6 +19,13 @@ #include "hw/ppc/xive2_regs.h" #include "trace.h" +static void xive2_router_end_notify(Xive2Router *xrtr, uint8_t end_blk, + uint32_t end_idx, uint32_t end_data, + bool redistribute); + +static int xive2_tctx_get_nvp_indexes(XiveTCTX *tctx, uint8_t ring, + uint8_t *nvp_blk, uint32_t *nvp_idx); + uint32_t xive2_router_get_config(Xive2Router *xrtr) { Xive2RouterClass *xrc = XIVE2_ROUTER_GET_CLASS(xrtr); @@ -188,12 +195,27 @@ void xive2_eas_pic_print_info(Xive2Eas *eas, uint32_t lisn, GString *buf) (uint32_t) xive_get_field64(EAS2_END_DATA, eas->w)); } +#define XIVE2_QSIZE_CHUNK_CL 128 +#define XIVE2_QSIZE_CHUNK_4k 4096 +/* Calculate max number of queue entries for an END */ +static uint32_t xive2_end_get_qentries(Xive2End *end) +{ + uint32_t w3 = end->w3; + uint32_t qsize = xive_get_field32(END2_W3_QSIZE, w3); + if (xive_get_field32(END2_W3_CL, w3)) { + g_assert(qsize <= 4); + return (XIVE2_QSIZE_CHUNK_CL << qsize) / sizeof(uint32_t); + } else { + g_assert(qsize <= 12); + return (XIVE2_QSIZE_CHUNK_4k << qsize) / sizeof(uint32_t); + } +} + void xive2_end_queue_pic_print_info(Xive2End *end, uint32_t width, GString *buf) { uint64_t qaddr_base = xive2_end_qaddr(end); - uint32_t qsize = xive_get_field32(END2_W3_QSIZE, end->w3); uint32_t qindex = xive_get_field32(END2_W1_PAGE_OFF, end->w1); - uint32_t qentries = 1 << (qsize + 10); + uint32_t qentries = xive2_end_get_qentries(end); int i; /* @@ -223,8 +245,7 @@ void xive2_end_pic_print_info(Xive2End *end, uint32_t end_idx, GString *buf) uint64_t qaddr_base = xive2_end_qaddr(end); uint32_t qindex = xive_get_field32(END2_W1_PAGE_OFF, end->w1); uint32_t qgen = xive_get_field32(END2_W1_GENERATION, end->w1); - uint32_t qsize = xive_get_field32(END2_W3_QSIZE, end->w3); - uint32_t qentries = 1 << (qsize + 10); + uint32_t qentries = xive2_end_get_qentries(end); uint32_t nvx_blk = xive_get_field32(END2_W6_VP_BLOCK, end->w6); uint32_t nvx_idx = xive_get_field32(END2_W6_VP_OFFSET, end->w6); @@ -341,13 +362,12 @@ void xive2_nvgc_pic_print_info(Xive2Nvgc *nvgc, uint32_t nvgc_idx, GString *buf) static void xive2_end_enqueue(Xive2End *end, uint32_t data) { uint64_t qaddr_base = xive2_end_qaddr(end); - uint32_t qsize = xive_get_field32(END2_W3_QSIZE, end->w3); uint32_t qindex = xive_get_field32(END2_W1_PAGE_OFF, end->w1); uint32_t qgen = xive_get_field32(END2_W1_GENERATION, end->w1); uint64_t qaddr = qaddr_base + (qindex << 2); uint32_t qdata = cpu_to_be32((qgen << 31) | (data & 0x7fffffff)); - uint32_t qentries = 1 << (qsize + 10); + uint32_t qentries = xive2_end_get_qentries(end); if (dma_memory_write(&address_space_memory, qaddr, &qdata, sizeof(qdata), MEMTXATTRS_UNSPECIFIED)) { @@ -361,8 +381,8 @@ static void xive2_end_enqueue(Xive2End *end, uint32_t data) qgen ^= 1; end->w1 = xive_set_field32(END2_W1_GENERATION, end->w1, qgen); - /* TODO(PowerNV): reset GF bit on a cache watch operation */ - end->w1 = xive_set_field32(END2_W1_GEN_FLIPPED, end->w1, qgen); + /* Set gen flipped to 1, it gets reset on a cache watch operation */ + end->w1 = xive_set_field32(END2_W1_GEN_FLIPPED, end->w1, 1); } end->w1 = xive_set_field32(END2_W1_PAGE_OFF, end->w1, qindex); } @@ -492,12 +512,13 @@ static void xive2_presenter_backlog_decr(XivePresenter *xptr, */ static void xive2_tctx_save_ctx(Xive2Router *xrtr, XiveTCTX *tctx, - uint8_t nvp_blk, uint32_t nvp_idx, - uint8_t ring) + uint8_t ring, + uint8_t nvp_blk, uint32_t nvp_idx) { CPUPPCState *env = &POWERPC_CPU(tctx->cs)->env; uint32_t pir = env->spr_cb[SPR_PIR].default_value; Xive2Nvp nvp; + uint8_t *sig_regs = xive_tctx_signal_regs(tctx, ring); uint8_t *regs = &tctx->regs[ring]; if (xive2_router_get_nvp(xrtr, nvp_blk, nvp_idx, &nvp)) { @@ -533,7 +554,14 @@ static void xive2_tctx_save_ctx(Xive2Router *xrtr, XiveTCTX *tctx, } nvp.w2 = xive_set_field32(NVP2_W2_IPB, nvp.w2, regs[TM_IPB]); - nvp.w2 = xive_set_field32(NVP2_W2_CPPR, nvp.w2, regs[TM_CPPR]); + + if ((nvp.w0 & NVP2_W0_P) || ring != TM_QW2_HV_POOL) { + /* + * Non-pool contexts always save CPPR (ignore p bit). XXX: Clarify + * whether that is the correct behaviour. + */ + nvp.w2 = xive_set_field32(NVP2_W2_CPPR, nvp.w2, sig_regs[TM_CPPR]); + } if (nvp.w0 & NVP2_W0_L) { /* * Typically not used. If LSMFB is restored with 0, it will @@ -555,6 +583,7 @@ static void xive2_tctx_save_ctx(Xive2Router *xrtr, XiveTCTX *tctx, xive2_router_write_nvp(xrtr, nvp_blk, nvp_idx, &nvp, 1); } +/* POOL cam is the same as OS cam encoding */ static void xive2_cam_decode(uint32_t cam, uint8_t *nvp_blk, uint32_t *nvp_idx, bool *valid, bool *hw) { @@ -584,6 +613,79 @@ static uint32_t xive2_tctx_hw_cam_line(XivePresenter *xptr, XiveTCTX *tctx) return xive2_nvp_cam_line(blk, 1 << tid_shift | (pir & tid_mask)); } +static void xive2_redistribute(Xive2Router *xrtr, XiveTCTX *tctx, uint8_t ring) +{ + uint8_t *sig_regs = xive_tctx_signal_regs(tctx, ring); + uint8_t nsr = sig_regs[TM_NSR]; + uint8_t pipr = sig_regs[TM_PIPR]; + uint8_t crowd = NVx_CROWD_LVL(nsr); + uint8_t group = NVx_GROUP_LVL(nsr); + uint8_t nvgc_blk, end_blk, nvp_blk; + uint32_t nvgc_idx, end_idx, nvp_idx; + Xive2Nvgc nvgc; + uint8_t prio_limit; + uint32_t cfg; + + /* redistribution is only for group/crowd interrupts */ + if (!xive_nsr_indicates_group_exception(ring, nsr)) { + return; + } + + /* Don't check return code since ring is expected to be invalidated */ + xive2_tctx_get_nvp_indexes(tctx, ring, &nvp_blk, &nvp_idx); + + trace_xive_redistribute(tctx->cs->cpu_index, ring, nvp_blk, nvp_idx); + + trace_xive_redistribute(tctx->cs->cpu_index, ring, nvp_blk, nvp_idx); + /* convert crowd/group to blk/idx */ + if (group > 0) { + nvgc_idx = (nvp_idx & (0xffffffff << group)) | + ((1 << (group - 1)) - 1); + } else { + nvgc_idx = nvp_idx; + } + + if (crowd > 0) { + crowd = (crowd == 3) ? 4 : crowd; + nvgc_blk = (nvp_blk & (0xffffffff << crowd)) | + ((1 << (crowd - 1)) - 1); + } else { + nvgc_blk = nvp_blk; + } + + /* Use blk/idx to retrieve the NVGC */ + if (xive2_router_get_nvgc(xrtr, crowd, nvgc_blk, nvgc_idx, &nvgc)) { + qemu_log_mask(LOG_GUEST_ERROR, "XIVE: no %s %x/%x\n", + crowd ? "NVC" : "NVG", nvgc_blk, nvgc_idx); + return; + } + + /* retrieve the END blk/idx from the NVGC */ + end_blk = xive_get_field32(NVGC2_W1_END_BLK, nvgc.w1); + end_idx = xive_get_field32(NVGC2_W1_END_IDX, nvgc.w1); + + /* determine number of priorities being used */ + cfg = xive2_router_get_config(xrtr); + if (cfg & XIVE2_EN_VP_GRP_PRIORITY) { + prio_limit = 1 << GETFIELD(NVGC2_W1_PSIZE, nvgc.w1); + } else { + prio_limit = 1 << GETFIELD(XIVE2_VP_INT_PRIO, cfg); + } + + /* add priority offset to end index */ + end_idx += pipr % prio_limit; + + /* trigger the group END */ + xive2_router_end_notify(xrtr, end_blk, end_idx, 0, true); + + /* clear interrupt indication for the context */ + sig_regs[TM_NSR] = 0; + sig_regs[TM_PIPR] = sig_regs[TM_CPPR]; + xive_tctx_reset_signal(tctx, ring); +} + +static void xive2_tctx_process_pending(XiveTCTX *tctx, uint8_t sig_ring); + static uint64_t xive2_tm_pull_ctx(XivePresenter *xptr, XiveTCTX *tctx, hwaddr offset, unsigned size, uint8_t ring) { @@ -595,10 +697,11 @@ static uint64_t xive2_tm_pull_ctx(XivePresenter *xptr, XiveTCTX *tctx, uint8_t cur_ring; bool valid; bool do_save; + uint8_t nsr; xive2_cam_decode(cam, &nvp_blk, &nvp_idx, &valid, &do_save); - if (!valid) { + if (xive2_tctx_get_nvp_indexes(tctx, ring, &nvp_blk, &nvp_idx)) { qemu_log_mask(LOG_GUEST_ERROR, "XIVE: pulling invalid NVP %x/%x !?\n", nvp_blk, nvp_idx); } @@ -608,21 +711,53 @@ static uint64_t xive2_tm_pull_ctx(XivePresenter *xptr, XiveTCTX *tctx, cur_ring += XIVE_TM_RING_SIZE) { uint32_t ringw2 = xive_tctx_word2(&tctx->regs[cur_ring]); uint32_t ringw2_new = xive_set_field32(TM2_QW1W2_VO, ringw2, 0); + bool is_valid = !!(xive_get_field32(TM2_QW1W2_VO, ringw2)); + uint8_t *sig_regs; + memcpy(&tctx->regs[cur_ring + TM_WORD2], &ringw2_new, 4); + + /* Skip the rest for USER or invalid contexts */ + if ((cur_ring == TM_QW0_USER) || !is_valid) { + continue; + } + + /* Active group/crowd interrupts need to be redistributed */ + sig_regs = xive_tctx_signal_regs(tctx, ring); + nsr = sig_regs[TM_NSR]; + if (xive_nsr_indicates_group_exception(cur_ring, nsr)) { + /* Ensure ring matches NSR (for HV NSR POOL vs PHYS rings) */ + if (cur_ring == xive_nsr_exception_ring(cur_ring, nsr)) { + xive2_redistribute(xrtr, tctx, cur_ring); + } + } + + /* + * Lower external interrupt line of requested ring and below except for + * USER, which doesn't exist. + */ + if (xive_nsr_indicates_exception(cur_ring, nsr)) { + if (cur_ring == xive_nsr_exception_ring(cur_ring, nsr)) { + xive_tctx_reset_signal(tctx, cur_ring); + } + } } - if (xive2_router_get_config(xrtr) & XIVE2_VP_SAVE_RESTORE && do_save) { - xive2_tctx_save_ctx(xrtr, tctx, nvp_blk, nvp_idx, ring); + if (ring == TM_QW2_HV_POOL) { + /* Re-check phys for interrupts if pool was disabled */ + nsr = tctx->regs[TM_QW3_HV_PHYS + TM_NSR]; + if (xive_nsr_indicates_exception(TM_QW3_HV_PHYS, nsr)) { + /* Ring must be PHYS because POOL would have been redistributed */ + g_assert(xive_nsr_exception_ring(TM_QW3_HV_PHYS, nsr) == + TM_QW3_HV_PHYS); + } else { + xive2_tctx_process_pending(tctx, TM_QW3_HV_PHYS); + } } - /* - * Lower external interrupt line of requested ring and below except for - * USER, which doesn't exist. - */ - for (cur_ring = TM_QW1_OS; cur_ring <= ring; - cur_ring += XIVE_TM_RING_SIZE) { - xive_tctx_reset_signal(tctx, cur_ring); + if (xive2_router_get_config(xrtr) & XIVE2_VP_SAVE_RESTORE && do_save) { + xive2_tctx_save_ctx(xrtr, tctx, ring, nvp_blk, nvp_idx); } + return target_ringw2; } @@ -632,6 +767,18 @@ uint64_t xive2_tm_pull_os_ctx(XivePresenter *xptr, XiveTCTX *tctx, return xive2_tm_pull_ctx(xptr, tctx, offset, size, TM_QW1_OS); } +uint64_t xive2_tm_pull_pool_ctx(XivePresenter *xptr, XiveTCTX *tctx, + hwaddr offset, unsigned size) +{ + return xive2_tm_pull_ctx(xptr, tctx, offset, size, TM_QW2_HV_POOL); +} + +uint64_t xive2_tm_pull_phys_ctx(XivePresenter *xptr, XiveTCTX *tctx, + hwaddr offset, unsigned size) +{ + return xive2_tm_pull_ctx(xptr, tctx, offset, size, TM_QW3_HV_PHYS); +} + #define REPORT_LINE_GEN1_SIZE 16 static void xive2_tm_report_line_gen1(XiveTCTX *tctx, uint8_t *data, @@ -741,12 +888,15 @@ void xive2_tm_pull_phys_ctx_ol(XivePresenter *xptr, XiveTCTX *tctx, xive2_tm_pull_ctx_ol(xptr, tctx, offset, value, size, TM_QW3_HV_PHYS); } -static uint8_t xive2_tctx_restore_os_ctx(Xive2Router *xrtr, XiveTCTX *tctx, - uint8_t nvp_blk, uint32_t nvp_idx, - Xive2Nvp *nvp) +static uint8_t xive2_tctx_restore_ctx(Xive2Router *xrtr, XiveTCTX *tctx, + uint8_t ring, + uint8_t nvp_blk, uint32_t nvp_idx, + Xive2Nvp *nvp) { CPUPPCState *env = &POWERPC_CPU(tctx->cs)->env; uint32_t pir = env->spr_cb[SPR_PIR].default_value; + uint8_t *sig_regs = xive_tctx_signal_regs(tctx, ring); + uint8_t *regs = &tctx->regs[ring]; uint8_t cppr; if (!xive2_nvp_is_hw(nvp)) { @@ -759,10 +909,10 @@ static uint8_t xive2_tctx_restore_os_ctx(Xive2Router *xrtr, XiveTCTX *tctx, nvp->w2 = xive_set_field32(NVP2_W2_CPPR, nvp->w2, 0); xive2_router_write_nvp(xrtr, nvp_blk, nvp_idx, nvp, 2); - tctx->regs[TM_QW1_OS + TM_CPPR] = cppr; - tctx->regs[TM_QW1_OS + TM_LSMFB] = xive_get_field32(NVP2_W2_LSMFB, nvp->w2); - tctx->regs[TM_QW1_OS + TM_LGS] = xive_get_field32(NVP2_W2_LGS, nvp->w2); - tctx->regs[TM_QW1_OS + TM_T] = xive_get_field32(NVP2_W2_T, nvp->w2); + sig_regs[TM_CPPR] = cppr; + regs[TM_LSMFB] = xive_get_field32(NVP2_W2_LSMFB, nvp->w2); + regs[TM_LGS] = xive_get_field32(NVP2_W2_LGS, nvp->w2); + regs[TM_T] = xive_get_field32(NVP2_W2_T, nvp->w2); nvp->w1 = xive_set_field32(NVP2_W1_CO, nvp->w1, 1); nvp->w1 = xive_set_field32(NVP2_W1_CO_THRID_VALID, nvp->w1, 1); @@ -771,9 +921,18 @@ static uint8_t xive2_tctx_restore_os_ctx(Xive2Router *xrtr, XiveTCTX *tctx, /* * Checkout privilege: 0:OS, 1:Pool, 2:Hard * - * TODO: we only support OS push/pull + * TODO: we don't support hard push/pull */ - nvp->w1 = xive_set_field32(NVP2_W1_CO_PRIV, nvp->w1, 0); + switch (ring) { + case TM_QW1_OS: + nvp->w1 = xive_set_field32(NVP2_W1_CO_PRIV, nvp->w1, 0); + break; + case TM_QW2_HV_POOL: + nvp->w1 = xive_set_field32(NVP2_W1_CO_PRIV, nvp->w1, 1); + break; + default: + g_assert_not_reached(); + } xive2_router_write_nvp(xrtr, nvp_blk, nvp_idx, nvp, 1); @@ -781,18 +940,14 @@ static uint8_t xive2_tctx_restore_os_ctx(Xive2Router *xrtr, XiveTCTX *tctx, return cppr; } -static void xive2_tctx_need_resend(Xive2Router *xrtr, XiveTCTX *tctx, +/* Restore TIMA VP context from NVP backlog */ +static void xive2_tctx_restore_nvp(Xive2Router *xrtr, XiveTCTX *tctx, + uint8_t ring, uint8_t nvp_blk, uint32_t nvp_idx, bool do_restore) { - XivePresenter *xptr = XIVE_PRESENTER(xrtr); + uint8_t *regs = &tctx->regs[ring]; uint8_t ipb; - uint8_t backlog_level; - uint8_t group_level; - uint8_t first_group; - uint8_t backlog_prio; - uint8_t group_prio; - uint8_t *regs = &tctx->regs[TM_QW1_OS]; Xive2Nvp nvp; /* @@ -812,9 +967,8 @@ static void xive2_tctx_need_resend(Xive2Router *xrtr, XiveTCTX *tctx, } /* Automatically restore thread context registers */ - if (xive2_router_get_config(xrtr) & XIVE2_VP_SAVE_RESTORE && - do_restore) { - xive2_tctx_restore_os_ctx(xrtr, tctx, nvp_blk, nvp_idx, &nvp); + if (xive2_router_get_config(xrtr) & XIVE2_VP_SAVE_RESTORE && do_restore) { + xive2_tctx_restore_ctx(xrtr, tctx, ring, nvp_blk, nvp_idx, &nvp); } ipb = xive_get_field32(NVP2_W2_IPB, nvp.w2); @@ -822,143 +976,230 @@ static void xive2_tctx_need_resend(Xive2Router *xrtr, XiveTCTX *tctx, nvp.w2 = xive_set_field32(NVP2_W2_IPB, nvp.w2, 0); xive2_router_write_nvp(xrtr, nvp_blk, nvp_idx, &nvp, 2); } + /* IPB bits in the backlog are merged with the TIMA IPB bits */ regs[TM_IPB] |= ipb; - backlog_prio = xive_ipb_to_pipr(ipb); - backlog_level = 0; - - first_group = xive_get_field32(NVP2_W0_PGOFIRST, nvp.w0); - if (first_group && regs[TM_LSMFB] < backlog_prio) { - group_prio = xive2_presenter_backlog_scan(xptr, nvp_blk, nvp_idx, - first_group, &group_level); - regs[TM_LSMFB] = group_prio; - if (regs[TM_LGS] && group_prio < backlog_prio) { - /* VP can take a group interrupt */ - xive2_presenter_backlog_decr(xptr, nvp_blk, nvp_idx, - group_prio, group_level); - backlog_prio = group_prio; - backlog_level = group_level; - } - } - - /* - * Compute the PIPR based on the restored state. - * It will raise the External interrupt signal if needed. - */ - xive_tctx_pipr_update(tctx, TM_QW1_OS, backlog_prio, backlog_level); } /* - * Updating the OS CAM line can trigger a resend of interrupt + * Updating the ring CAM line can trigger a resend of interrupt */ -void xive2_tm_push_os_ctx(XivePresenter *xptr, XiveTCTX *tctx, - hwaddr offset, uint64_t value, unsigned size) +static void xive2_tm_push_ctx(XivePresenter *xptr, XiveTCTX *tctx, + hwaddr offset, uint64_t value, unsigned size, + uint8_t ring) { uint32_t cam; - uint32_t qw1w2; - uint64_t qw1dw1; + uint32_t w2; + uint64_t dw1; uint8_t nvp_blk; uint32_t nvp_idx; - bool vo; + bool v; bool do_restore; + if (xive_ring_valid(tctx, ring)) { + qemu_log_mask(LOG_GUEST_ERROR, "XIVE: Attempt to push VP to enabled" + " ring 0x%02x\n", ring); + return; + } + /* First update the thead context */ switch (size) { + case 1: + tctx->regs[ring + TM_WORD2] = value & 0xff; + cam = xive2_tctx_hw_cam_line(xptr, tctx); + cam |= ((value & 0xc0) << 24); /* V and H bits */ + break; case 4: cam = value; - qw1w2 = cpu_to_be32(cam); - memcpy(&tctx->regs[TM_QW1_OS + TM_WORD2], &qw1w2, 4); + w2 = cpu_to_be32(cam); + memcpy(&tctx->regs[ring + TM_WORD2], &w2, 4); break; case 8: cam = value >> 32; - qw1dw1 = cpu_to_be64(value); - memcpy(&tctx->regs[TM_QW1_OS + TM_WORD2], &qw1dw1, 8); + dw1 = cpu_to_be64(value); + memcpy(&tctx->regs[ring + TM_WORD2], &dw1, 8); break; default: g_assert_not_reached(); } - xive2_cam_decode(cam, &nvp_blk, &nvp_idx, &vo, &do_restore); + xive2_cam_decode(cam, &nvp_blk, &nvp_idx, &v, &do_restore); /* Check the interrupt pending bits */ - if (vo) { - xive2_tctx_need_resend(XIVE2_ROUTER(xptr), tctx, nvp_blk, nvp_idx, - do_restore); + if (v) { + Xive2Router *xrtr = XIVE2_ROUTER(xptr); + uint8_t cur_ring; + + xive2_tctx_restore_nvp(xrtr, tctx, ring, + nvp_blk, nvp_idx, do_restore); + + for (cur_ring = TM_QW1_OS; cur_ring <= ring; + cur_ring += XIVE_TM_RING_SIZE) { + uint8_t *sig_regs = xive_tctx_signal_regs(tctx, cur_ring); + uint8_t nsr = sig_regs[TM_NSR]; + + if (!xive_ring_valid(tctx, cur_ring)) { + continue; + } + + if (cur_ring == TM_QW2_HV_POOL) { + if (xive_nsr_indicates_exception(cur_ring, nsr)) { + g_assert(xive_nsr_exception_ring(cur_ring, nsr) == + TM_QW3_HV_PHYS); + xive2_redistribute(xrtr, tctx, + xive_nsr_exception_ring(ring, nsr)); + } + xive2_tctx_process_pending(tctx, TM_QW3_HV_PHYS); + break; + } + xive2_tctx_process_pending(tctx, cur_ring); + } } } +void xive2_tm_push_os_ctx(XivePresenter *xptr, XiveTCTX *tctx, + hwaddr offset, uint64_t value, unsigned size) +{ + xive2_tm_push_ctx(xptr, tctx, offset, value, size, TM_QW1_OS); +} + +void xive2_tm_push_pool_ctx(XivePresenter *xptr, XiveTCTX *tctx, + hwaddr offset, uint64_t value, unsigned size) +{ + xive2_tm_push_ctx(xptr, tctx, offset, value, size, TM_QW2_HV_POOL); +} + +void xive2_tm_push_phys_ctx(XivePresenter *xptr, XiveTCTX *tctx, + hwaddr offset, uint64_t value, unsigned size) +{ + xive2_tm_push_ctx(xptr, tctx, offset, value, size, TM_QW3_HV_PHYS); +} + +/* returns -1 if ring is invalid, but still populates block and index */ static int xive2_tctx_get_nvp_indexes(XiveTCTX *tctx, uint8_t ring, - uint32_t *nvp_blk, uint32_t *nvp_idx) + uint8_t *nvp_blk, uint32_t *nvp_idx) { - uint32_t w2, cam; + uint32_t w2; + uint32_t cam = 0; + int rc = 0; w2 = xive_tctx_word2(&tctx->regs[ring]); switch (ring) { case TM_QW1_OS: if (!(be32_to_cpu(w2) & TM2_QW1W2_VO)) { - return -1; + rc = -1; } cam = xive_get_field32(TM2_QW1W2_OS_CAM, w2); break; case TM_QW2_HV_POOL: if (!(be32_to_cpu(w2) & TM2_QW2W2_VP)) { - return -1; + rc = -1; } cam = xive_get_field32(TM2_QW2W2_POOL_CAM, w2); break; case TM_QW3_HV_PHYS: if (!(be32_to_cpu(w2) & TM2_QW3W2_VT)) { - return -1; + rc = -1; } cam = xive2_tctx_hw_cam_line(tctx->xptr, tctx); break; default: - return -1; + rc = -1; } *nvp_blk = xive2_nvp_blk(cam); *nvp_idx = xive2_nvp_idx(cam); - return 0; + return rc; } -static void xive2_tctx_set_cppr(XiveTCTX *tctx, uint8_t ring, uint8_t cppr) +static void xive2_tctx_accept_el(XivePresenter *xptr, XiveTCTX *tctx, + uint8_t ring, uint8_t cl_ring) { - uint8_t *regs = &tctx->regs[ring]; - Xive2Router *xrtr = XIVE2_ROUTER(tctx->xptr); - uint8_t old_cppr, backlog_prio, first_group, group_level = 0; - uint8_t pipr_min, lsmfb_min, ring_min; - bool group_enabled; - uint32_t nvp_blk, nvp_idx; + uint64_t rd; + Xive2Router *xrtr = XIVE2_ROUTER(xptr); + uint32_t nvp_idx, xive2_cfg; + uint8_t nvp_blk; Xive2Nvp nvp; - int rc; + uint64_t phys_addr; + uint8_t OGen = 0; - trace_xive_tctx_set_cppr(tctx->cs->cpu_index, ring, - regs[TM_IPB], regs[TM_PIPR], - cppr, regs[TM_NSR]); + xive2_tctx_get_nvp_indexes(tctx, cl_ring, &nvp_blk, &nvp_idx); - if (cppr > XIVE_PRIORITY_MAX) { - cppr = 0xff; + if (xive2_router_get_nvp(xrtr, (uint8_t)nvp_blk, nvp_idx, &nvp)) { + qemu_log_mask(LOG_GUEST_ERROR, "XIVE: No NVP %x/%x\n", + nvp_blk, nvp_idx); + return; + } + + if (!xive2_nvp_is_valid(&nvp)) { + qemu_log_mask(LOG_GUEST_ERROR, "XIVE: invalid NVP %x/%x\n", + nvp_blk, nvp_idx); + return; + } + + + rd = xive_tctx_accept(tctx, ring); + + if (ring == TM_QW1_OS) { + OGen = tctx->regs[ring + TM_OGEN]; + } + xive2_cfg = xive2_router_get_config(xrtr); + phys_addr = xive2_nvp_reporting_addr(&nvp); + uint8_t report_data[REPORT_LINE_GEN1_SIZE]; + memset(report_data, 0xff, sizeof(report_data)); + if ((OGen == 1) || (xive2_cfg & XIVE2_GEN1_TIMA_OS)) { + report_data[8] = (rd >> 8) & 0xff; + report_data[9] = rd & 0xff; + } else { + report_data[0] = (rd >> 8) & 0xff; + report_data[1] = rd & 0xff; } + cpu_physical_memory_write(phys_addr, report_data, REPORT_LINE_GEN1_SIZE); +} + +void xive2_tm_ack_os_el(XivePresenter *xptr, XiveTCTX *tctx, + hwaddr offset, uint64_t value, unsigned size) +{ + xive2_tctx_accept_el(xptr, tctx, TM_QW1_OS, TM_QW1_OS); +} + +/* Re-calculate and present pending interrupts */ +static void xive2_tctx_process_pending(XiveTCTX *tctx, uint8_t sig_ring) +{ + uint8_t *sig_regs = &tctx->regs[sig_ring]; + Xive2Router *xrtr = XIVE2_ROUTER(tctx->xptr); + uint8_t backlog_prio; + uint8_t first_group; + uint8_t group_level; + uint8_t pipr_min; + uint8_t lsmfb_min; + uint8_t ring_min; + uint8_t cppr = sig_regs[TM_CPPR]; + bool group_enabled; + Xive2Nvp nvp; + int rc; - old_cppr = regs[TM_CPPR]; - regs[TM_CPPR] = cppr; + g_assert(sig_ring == TM_QW3_HV_PHYS || sig_ring == TM_QW1_OS); + g_assert(sig_regs[TM_WORD2] & 0x80); + g_assert(!xive_nsr_indicates_group_exception(sig_ring, sig_regs[TM_NSR])); /* * Recompute the PIPR based on local pending interrupts. It will * be adjusted below if needed in case of pending group interrupts. */ - pipr_min = xive_ipb_to_pipr(regs[TM_IPB]); - group_enabled = !!regs[TM_LGS]; - lsmfb_min = (group_enabled) ? regs[TM_LSMFB] : 0xff; - ring_min = ring; +again: + pipr_min = xive_ipb_to_pipr(sig_regs[TM_IPB]); + group_enabled = !!sig_regs[TM_LGS]; + lsmfb_min = group_enabled ? sig_regs[TM_LSMFB] : 0xff; + ring_min = sig_ring; + group_level = 0; /* PHYS updates also depend on POOL values */ - if (ring == TM_QW3_HV_PHYS) { - uint8_t *pregs = &tctx->regs[TM_QW2_HV_POOL]; + if (sig_ring == TM_QW3_HV_PHYS) { + uint8_t *pool_regs = &tctx->regs[TM_QW2_HV_POOL]; /* POOL values only matter if POOL ctx is valid */ - if (pregs[TM_WORD2] & 0x80) { - - uint8_t pool_pipr = xive_ipb_to_pipr(pregs[TM_IPB]); - uint8_t pool_lsmfb = pregs[TM_LSMFB]; + if (pool_regs[TM_WORD2] & 0x80) { + uint8_t pool_pipr = xive_ipb_to_pipr(pool_regs[TM_IPB]); + uint8_t pool_lsmfb = pool_regs[TM_LSMFB]; /* * Determine highest priority interrupt and @@ -972,7 +1213,7 @@ static void xive2_tctx_set_cppr(XiveTCTX *tctx, uint8_t ring, uint8_t cppr) } /* Values needed for group priority calculation */ - if (pregs[TM_LGS] && (pool_lsmfb < lsmfb_min)) { + if (pool_regs[TM_LGS] && (pool_lsmfb < lsmfb_min)) { group_enabled = true; lsmfb_min = pool_lsmfb; if (lsmfb_min < pipr_min) { @@ -981,32 +1222,26 @@ static void xive2_tctx_set_cppr(XiveTCTX *tctx, uint8_t ring, uint8_t cppr) } } } - regs[TM_PIPR] = pipr_min; - - rc = xive2_tctx_get_nvp_indexes(tctx, ring_min, &nvp_blk, &nvp_idx); - if (rc) { - qemu_log_mask(LOG_GUEST_ERROR, "XIVE: set CPPR on invalid context\n"); - return; - } - - if (cppr < old_cppr) { - /* - * FIXME: check if there's a group interrupt being presented - * and if the new cppr prevents it. If so, then the group - * interrupt needs to be re-added to the backlog and - * re-triggered (see re-trigger END info in the NVGC - * structure) - */ - } if (group_enabled && lsmfb_min < cppr && - lsmfb_min < regs[TM_PIPR]) { + lsmfb_min < pipr_min) { + + uint8_t nvp_blk; + uint32_t nvp_idx; + /* * Thread has seen a group interrupt with a higher priority * than the new cppr or pending local interrupt. Check the * backlog */ + rc = xive2_tctx_get_nvp_indexes(tctx, ring_min, &nvp_blk, &nvp_idx); + if (rc) { + qemu_log_mask(LOG_GUEST_ERROR, "XIVE: set CPPR on invalid " + "context\n"); + return; + } + if (xive2_router_get_nvp(xrtr, nvp_blk, nvp_idx, &nvp)) { qemu_log_mask(LOG_GUEST_ERROR, "XIVE: No NVP %x/%x\n", nvp_blk, nvp_idx); @@ -1030,14 +1265,85 @@ static void xive2_tctx_set_cppr(XiveTCTX *tctx, uint8_t ring, uint8_t cppr) nvp_blk, nvp_idx, first_group, &group_level); tctx->regs[ring_min + TM_LSMFB] = backlog_prio; - if (backlog_prio != 0xFF) { - xive2_presenter_backlog_decr(tctx->xptr, nvp_blk, nvp_idx, - backlog_prio, group_level); - regs[TM_PIPR] = backlog_prio; + if (backlog_prio != lsmfb_min) { + /* + * If the group backlog scan finds a less favored or no interrupt, + * then re-do the processing which may turn up a more favored + * interrupt from IPB or the other pool. Backlog should not + * find a priority < LSMFB. + */ + g_assert(backlog_prio >= lsmfb_min); + goto again; + } + + xive2_presenter_backlog_decr(tctx->xptr, nvp_blk, nvp_idx, + backlog_prio, group_level); + pipr_min = backlog_prio; + } + + if (pipr_min > cppr) { + pipr_min = cppr; + } + xive_tctx_pipr_set(tctx, ring_min, pipr_min, group_level); +} + +/* NOTE: CPPR only exists for TM_QW1_OS and TM_QW3_HV_PHYS */ +static void xive2_tctx_set_cppr(XiveTCTX *tctx, uint8_t sig_ring, uint8_t cppr) +{ + uint8_t *sig_regs = &tctx->regs[sig_ring]; + Xive2Router *xrtr = XIVE2_ROUTER(tctx->xptr); + uint8_t old_cppr; + uint8_t nsr = sig_regs[TM_NSR]; + + g_assert(sig_ring == TM_QW1_OS || sig_ring == TM_QW3_HV_PHYS); + + g_assert(tctx->regs[TM_QW2_HV_POOL + TM_NSR] == 0); + g_assert(tctx->regs[TM_QW2_HV_POOL + TM_PIPR] == 0); + g_assert(tctx->regs[TM_QW2_HV_POOL + TM_CPPR] == 0); + + /* XXX: should show pool IPB for PHYS ring */ + trace_xive_tctx_set_cppr(tctx->cs->cpu_index, sig_ring, + sig_regs[TM_IPB], sig_regs[TM_PIPR], + cppr, nsr); + + if (cppr > XIVE_PRIORITY_MAX) { + cppr = 0xff; + } + + old_cppr = sig_regs[TM_CPPR]; + sig_regs[TM_CPPR] = cppr; + + /* Handle increased CPPR priority (lower value) */ + if (cppr < old_cppr) { + if (cppr <= sig_regs[TM_PIPR]) { + /* CPPR lowered below PIPR, must un-present interrupt */ + if (xive_nsr_indicates_exception(sig_ring, nsr)) { + if (xive_nsr_indicates_group_exception(sig_ring, nsr)) { + /* redistribute precluded active grp interrupt */ + xive2_redistribute(xrtr, tctx, + xive_nsr_exception_ring(sig_ring, nsr)); + return; + } + } + + /* interrupt is VP directed, pending in IPB */ + xive_tctx_pipr_set(tctx, sig_ring, cppr, 0); + return; + } else { + /* CPPR was lowered, but still above PIPR. No action needed. */ + return; } } - /* CPPR has changed, check if we need to raise a pending exception */ - xive_tctx_notify(tctx, ring_min, group_level); + + /* CPPR didn't change, nothing needs to be done */ + if (cppr == old_cppr) { + return; + } + + /* CPPR priority decreased (higher value) */ + if (!xive_nsr_indicates_exception(sig_ring, nsr)) { + xive2_tctx_process_pending(tctx, sig_ring); + } } void xive2_tm_set_hv_cppr(XivePresenter *xptr, XiveTCTX *tctx, @@ -1052,6 +1358,34 @@ void xive2_tm_set_os_cppr(XivePresenter *xptr, XiveTCTX *tctx, xive2_tctx_set_cppr(tctx, TM_QW1_OS, value & 0xff); } +/* + * Adjust the IPB to allow a CPU to process event queues of other + * priorities during one physical interrupt cycle. + */ +void xive2_tm_set_os_pending(XivePresenter *xptr, XiveTCTX *tctx, + hwaddr offset, uint64_t value, unsigned size) +{ + Xive2Router *xrtr = XIVE2_ROUTER(xptr); + uint8_t ring = TM_QW1_OS; + uint8_t *regs = &tctx->regs[ring]; + uint8_t priority = value & 0xff; + + /* + * XXX: should this simply set a bit in IPB and wait for it to be picked + * up next cycle, or is it supposed to present it now? We implement the + * latter here. + */ + regs[TM_IPB] |= xive_priority_to_ipb(priority); + if (xive_ipb_to_pipr(regs[TM_IPB]) >= regs[TM_PIPR]) { + return; + } + if (xive_nsr_indicates_group_exception(ring, regs[TM_NSR])) { + xive2_redistribute(xrtr, tctx, ring); + } + + xive_tctx_pipr_present(tctx, ring, priority, 0); +} + static void xive2_tctx_set_target(XiveTCTX *tctx, uint8_t ring, uint8_t target) { uint8_t *regs = &tctx->regs[ring]; @@ -1259,9 +1593,7 @@ int xive2_presenter_tctx_match(XivePresenter *xptr, XiveTCTX *tctx, bool xive2_tm_irq_precluded(XiveTCTX *tctx, int ring, uint8_t priority) { - /* HV_POOL ring uses HV_PHYS NSR, CPPR and PIPR registers */ - uint8_t alt_ring = (ring == TM_QW2_HV_POOL) ? TM_QW3_HV_PHYS : ring; - uint8_t *alt_regs = &tctx->regs[alt_ring]; + uint8_t *sig_regs = xive_tctx_signal_regs(tctx, ring); /* * The xive2_presenter_tctx_match() above tells if there's a match @@ -1269,7 +1601,7 @@ bool xive2_tm_irq_precluded(XiveTCTX *tctx, int ring, uint8_t priority) * priority to know if the thread can take the interrupt now or if * it is precluded. */ - if (priority < alt_regs[TM_CPPR]) { + if (priority < sig_regs[TM_PIPR]) { return false; } return true; @@ -1322,12 +1654,14 @@ static bool xive2_router_end_es_notify(Xive2Router *xrtr, uint8_t end_blk, * message has the same parameters than in the function below. */ static void xive2_router_end_notify(Xive2Router *xrtr, uint8_t end_blk, - uint32_t end_idx, uint32_t end_data) + uint32_t end_idx, uint32_t end_data, + bool redistribute) { Xive2End end; uint8_t priority; uint8_t format; - bool found, precluded; + XiveTCTXMatch match; + bool crowd, cam_ignore; uint8_t nvx_blk; uint32_t nvx_idx; @@ -1350,7 +1684,8 @@ static void xive2_router_end_notify(Xive2Router *xrtr, uint8_t end_blk, return; } - if (xive2_end_is_enqueue(&end)) { + if (!redistribute && xive2_end_is_enqueue(&end)) { + trace_xive_end_enqueue(end_blk, end_idx, end_data); xive2_end_enqueue(&end, end_data); /* Enqueuing event data modifies the EQ toggle and index */ xive2_router_write_end(xrtr, end_blk, end_idx, &end, 1); @@ -1396,16 +1731,28 @@ static void xive2_router_end_notify(Xive2Router *xrtr, uint8_t end_blk, */ nvx_blk = xive_get_field32(END2_W6_VP_BLOCK, end.w6); nvx_idx = xive_get_field32(END2_W6_VP_OFFSET, end.w6); - - found = xive_presenter_notify(xrtr->xfb, format, nvx_blk, nvx_idx, - xive2_end_is_crowd(&end), xive2_end_is_ignore(&end), - priority, - xive_get_field32(END2_W7_F1_LOG_SERVER_ID, end.w7), - &precluded); + crowd = xive2_end_is_crowd(&end); + cam_ignore = xive2_end_is_ignore(&end); /* TODO: Auto EOI. */ + if (xive_presenter_match(xrtr->xfb, format, nvx_blk, nvx_idx, + crowd, cam_ignore, priority, + xive_get_field32(END2_W7_F1_LOG_SERVER_ID, end.w7), + &match)) { + XiveTCTX *tctx = match.tctx; + uint8_t ring = match.ring; + uint8_t *sig_regs = xive_tctx_signal_regs(tctx, ring); + uint8_t nsr = sig_regs[TM_NSR]; + uint8_t group_level; + + if (priority < sig_regs[TM_PIPR] && + xive_nsr_indicates_group_exception(ring, nsr)) { + xive2_redistribute(xrtr, tctx, xive_nsr_exception_ring(ring, nsr)); + } - if (found) { + group_level = xive_get_group_level(crowd, cam_ignore, nvx_blk, nvx_idx); + trace_xive_presenter_notify(nvx_blk, nvx_idx, ring, group_level); + xive_tctx_pipr_present(tctx, ring, priority, group_level); return; } @@ -1423,7 +1770,7 @@ static void xive2_router_end_notify(Xive2Router *xrtr, uint8_t end_blk, return; } - if (!xive2_end_is_ignore(&end)) { + if (!cam_ignore) { uint8_t ipb; Xive2Nvp nvp; @@ -1452,9 +1799,6 @@ static void xive2_router_end_notify(Xive2Router *xrtr, uint8_t end_blk, } else { Xive2Nvgc nvgc; uint32_t backlog; - bool crowd; - - crowd = xive2_end_is_crowd(&end); /* * For groups and crowds, the per-priority backlog @@ -1486,9 +1830,7 @@ static void xive2_router_end_notify(Xive2Router *xrtr, uint8_t end_blk, if (backlog == 1) { XiveFabricClass *xfc = XIVE_FABRIC_GET_CLASS(xrtr->xfb); xfc->broadcast(xrtr->xfb, nvx_blk, nvx_idx, - xive2_end_is_crowd(&end), - xive2_end_is_ignore(&end), - priority); + crowd, cam_ignore, priority); if (!xive2_end_is_precluded_escalation(&end)) { /* @@ -1522,18 +1864,41 @@ do_escalation: } } - /* - * The END trigger becomes an Escalation trigger - */ - xive2_router_end_notify(xrtr, - xive_get_field32(END2_W4_END_BLOCK, end.w4), - xive_get_field32(END2_W4_ESC_END_INDEX, end.w4), - xive_get_field32(END2_W5_ESC_END_DATA, end.w5)); + if (xive2_end_is_escalate_end(&end)) { + /* + * Perform END Adaptive escalation processing + * The END trigger becomes an Escalation trigger + */ + uint8_t esc_blk = xive_get_field32(END2_W4_END_BLOCK, end.w4); + uint32_t esc_idx = xive_get_field32(END2_W4_ESC_END_INDEX, end.w4); + uint32_t esc_data = xive_get_field32(END2_W5_ESC_END_DATA, end.w5); + trace_xive_escalate_end(end_blk, end_idx, esc_blk, esc_idx, esc_data); + xive2_router_end_notify(xrtr, esc_blk, esc_idx, esc_data, false); + } /* end END adaptive escalation */ + + else { + uint32_t lisn; /* Logical Interrupt Source Number */ + + /* + * Perform ESB escalation processing + * E[N] == 1 --> N + * Req[Block] <- E[ESB_Block] + * Req[Index] <- E[ESB_Index] + * Req[Offset] <- 0x000 + * Execute <ESB Store> Req command + */ + lisn = XIVE_EAS(xive_get_field32(END2_W4_END_BLOCK, end.w4), + xive_get_field32(END2_W4_ESC_END_INDEX, end.w4)); + + trace_xive_escalate_esb(end_blk, end_idx, lisn); + xive2_notify(xrtr, lisn, true /* pq_checked */); + } + + return; } -void xive2_router_notify(XiveNotifier *xn, uint32_t lisn, bool pq_checked) +void xive2_notify(Xive2Router *xrtr , uint32_t lisn, bool pq_checked) { - Xive2Router *xrtr = XIVE2_ROUTER(xn); uint8_t eas_blk = XIVE_EAS_BLOCK(lisn); uint32_t eas_idx = XIVE_EAS_INDEX(lisn); Xive2Eas eas; @@ -1576,13 +1941,31 @@ void xive2_router_notify(XiveNotifier *xn, uint32_t lisn, bool pq_checked) return; } + /* TODO: add support for EAS resume */ + if (xive2_eas_is_resume(&eas)) { + qemu_log_mask(LOG_UNIMP, + "XIVE: EAS resume processing unimplemented - LISN %x\n", + lisn); + return; + } + /* * The event trigger becomes an END trigger */ xive2_router_end_notify(xrtr, - xive_get_field64(EAS2_END_BLOCK, eas.w), - xive_get_field64(EAS2_END_INDEX, eas.w), - xive_get_field64(EAS2_END_DATA, eas.w)); + xive_get_field64(EAS2_END_BLOCK, eas.w), + xive_get_field64(EAS2_END_INDEX, eas.w), + xive_get_field64(EAS2_END_DATA, eas.w), + false); + return; +} + +void xive2_router_notify(XiveNotifier *xn, uint32_t lisn, bool pq_checked) +{ + Xive2Router *xrtr = XIVE2_ROUTER(xn); + + xive2_notify(xrtr, lisn, pq_checked); + return; } static const Property xive2_router_properties[] = { diff --git a/hw/isa/lpc_ich9.c b/hw/isa/lpc_ich9.c index 71afb45..304dffa 100644 --- a/hw/isa/lpc_ich9.c +++ b/hw/isa/lpc_ich9.c @@ -182,7 +182,6 @@ static uint64_t ich9_cc_read(void *opaque, hwaddr addr, } /* IRQ routing */ -/* */ static void ich9_lpc_rout(uint8_t pirq_rout, int *pic_irq, int *pic_dis) { *pic_irq = pirq_rout & ICH9_LPC_PIRQ_ROUT_MASK; diff --git a/hw/loongarch/boot.c b/hw/loongarch/boot.c index 0324d6a..14d6c52 100644 --- a/hw/loongarch/boot.c +++ b/hw/loongarch/boot.c @@ -35,12 +35,6 @@ struct loongarch_linux_hdr { uint32_t pe_header_offset; } QEMU_PACKED; -struct memmap_entry *memmap_table; -unsigned memmap_entries; - -ram_addr_t initrd_offset; -uint64_t initrd_size; - static const unsigned int slave_boot_code[] = { /* Configure reset ebase. */ 0x0400302c, /* csrwr $t0, LOONGARCH_CSR_EENTRY */ @@ -94,12 +88,16 @@ static inline void *guidcpy(void *dst, const void *src) return memcpy(dst, src, sizeof(efi_guid_t)); } -static void init_efi_boot_memmap(struct efi_system_table *systab, +static void init_efi_boot_memmap(MachineState *ms, + struct efi_system_table *systab, void *p, void *start) { unsigned i; struct efi_boot_memmap *boot_memmap = p; efi_guid_t tbl_guid = LINUX_EFI_BOOT_MEMMAP_GUID; + LoongArchVirtMachineState *lvms = LOONGARCH_VIRT_MACHINE(ms); + struct memmap_entry *memmap_table; + unsigned int memmap_entries; /* efi_configuration_table 1 */ guidcpy(&systab->tables[0].guid, &tbl_guid); @@ -111,6 +109,8 @@ static void init_efi_boot_memmap(struct efi_system_table *systab, boot_memmap->map_size = 0; efi_memory_desc_t *map = p + sizeof(struct efi_boot_memmap); + memmap_table = lvms->memmap_table; + memmap_entries = lvms->memmap_entries; for (i = 0; i < memmap_entries; i++) { map = (void *)boot_memmap + sizeof(*map); map[i].type = memmap_table[i].type; @@ -121,7 +121,8 @@ static void init_efi_boot_memmap(struct efi_system_table *systab, } } -static void init_efi_initrd_table(struct efi_system_table *systab, +static void init_efi_initrd_table(struct loongarch_boot_info *info, + struct efi_system_table *systab, void *p, void *start) { efi_guid_t tbl_guid = LINUX_EFI_INITRD_MEDIA_GUID; @@ -132,8 +133,8 @@ static void init_efi_initrd_table(struct efi_system_table *systab, systab->tables[1].table = (struct efi_configuration_table *)(p - start); systab->nr_tables = 2; - initrd_table->base = initrd_offset; - initrd_table->size = initrd_size; + initrd_table->base = info->initrd_addr; + initrd_table->size = info->initrd_size; } static void init_efi_fdt_table(struct efi_system_table *systab) @@ -146,10 +147,12 @@ static void init_efi_fdt_table(struct efi_system_table *systab) systab->nr_tables = 3; } -static void init_systab(struct loongarch_boot_info *info, void *p, void *start) +static void init_systab(MachineState *ms, + struct loongarch_boot_info *info, void *p, void *start) { void *bp_tables_start; struct efi_system_table *systab = p; + LoongArchVirtMachineState *lvms = LOONGARCH_VIRT_MACHINE(ms); info->a2 = p - start; @@ -166,10 +169,10 @@ static void init_systab(struct loongarch_boot_info *info, void *p, void *start) systab->tables = p; bp_tables_start = p; - init_efi_boot_memmap(systab, p, start); + init_efi_boot_memmap(ms, systab, p, start); p += ROUND_UP(sizeof(struct efi_boot_memmap) + - sizeof(efi_memory_desc_t) * memmap_entries, 64 * KiB); - init_efi_initrd_table(systab, p, start); + sizeof(efi_memory_desc_t) * lvms->memmap_entries, 64 * KiB); + init_efi_initrd_table(info, systab, p, start); p += ROUND_UP(sizeof(struct efi_initrd), 64 * KiB); init_efi_fdt_table(systab); @@ -235,10 +238,49 @@ static int64_t load_loongarch_linux_image(const char *filename, return size; } +static ram_addr_t alloc_initrd_memory(struct loongarch_boot_info *info, + uint64_t advice_start, ssize_t rd_size) +{ + hwaddr base, ram_size, gap, low_end; + ram_addr_t initrd_end, initrd_start; + + base = VIRT_LOWMEM_BASE; + gap = VIRT_LOWMEM_SIZE; + initrd_start = advice_start; + initrd_end = initrd_start + rd_size; + + ram_size = info->ram_size; + low_end = base + MIN(ram_size, gap); + if (initrd_end <= low_end) { + return initrd_start; + } + + if (ram_size <= gap) { + error_report("The low memory too small for initial ram disk '%s'," + "You need to expand the ram", + info->initrd_filename); + exit(1); + } + + /* + * Try to load initrd in the high memory + */ + ram_size -= gap; + initrd_start = VIRT_HIGHMEM_BASE; + if (rd_size <= ram_size) { + return initrd_start; + } + + error_report("The high memory too small for initial ram disk '%s'," + "You need to expand the ram", + info->initrd_filename); + exit(1); +} + static int64_t load_kernel_info(struct loongarch_boot_info *info) { - uint64_t kernel_entry, kernel_low, kernel_high; - ssize_t kernel_size; + uint64_t kernel_entry, kernel_low, kernel_high, initrd_offset = 0; + ssize_t kernel_size, initrd_size; kernel_size = load_elf(info->kernel_filename, NULL, cpu_loongarch_virt_to_phys, NULL, @@ -263,15 +305,10 @@ static int64_t load_kernel_info(struct loongarch_boot_info *info) initrd_size = get_image_size(info->initrd_filename); if (initrd_size > 0) { initrd_offset = ROUND_UP(kernel_high + 4 * kernel_size, 64 * KiB); - - if (initrd_offset + initrd_size > info->ram_size) { - error_report("memory too small for initial ram disk '%s'", - info->initrd_filename); - exit(1); - } - - initrd_size = load_image_targphys(info->initrd_filename, initrd_offset, - info->ram_size - initrd_offset); + initrd_offset = alloc_initrd_memory(info, initrd_offset, + initrd_size); + initrd_size = load_image_targphys(info->initrd_filename, + initrd_offset, initrd_size); } if (initrd_size == (target_ulong)-1) { @@ -279,8 +316,9 @@ static int64_t load_kernel_info(struct loongarch_boot_info *info) info->initrd_filename); exit(1); } - } else { - initrd_size = 0; + + info->initrd_addr = initrd_offset; + info->initrd_size = initrd_size; } return kernel_entry; @@ -335,17 +373,19 @@ static void loongarch_firmware_boot(LoongArchVirtMachineState *lvms, fw_cfg_add_kernel_info(info, lvms->fw_cfg); } -static void init_boot_rom(struct loongarch_boot_info *info, void *p) +static void init_boot_rom(MachineState *ms, + struct loongarch_boot_info *info, void *p) { void *start = p; init_cmdline(info, p, start); p += COMMAND_LINE_SIZE; - init_systab(info, p, start); + init_systab(ms, info, p, start); } -static void loongarch_direct_kernel_boot(struct loongarch_boot_info *info) +static void loongarch_direct_kernel_boot(MachineState *ms, + struct loongarch_boot_info *info) { void *p, *bp; int64_t kernel_addr = VIRT_FLASH0_BASE; @@ -363,7 +403,7 @@ static void loongarch_direct_kernel_boot(struct loongarch_boot_info *info) /* Load cmdline and system tables at [0 - 1 MiB] */ p = g_malloc0(1 * MiB); bp = p; - init_boot_rom(info, p); + init_boot_rom(ms, info, p); rom_add_blob_fixed_as("boot_info", bp, 1 * MiB, 0, &address_space_memory); /* Load slave boot code at pflash0 . */ @@ -403,6 +443,6 @@ void loongarch_load_kernel(MachineState *ms, struct loongarch_boot_info *info) if (lvms->bios_loaded) { loongarch_firmware_boot(lvms, info); } else { - loongarch_direct_kernel_boot(info); + loongarch_direct_kernel_boot(ms, info); } } diff --git a/hw/loongarch/virt-acpi-build.c b/hw/loongarch/virt-acpi-build.c index 073b6de..8c2228a 100644 --- a/hw/loongarch/virt-acpi-build.c +++ b/hw/loongarch/virt-acpi-build.c @@ -557,7 +557,9 @@ static void acpi_build(AcpiBuildTables *tables, MachineState *machine) acpi_add_table(table_offsets, tables_blob); build_srat(tables_blob, tables->linker, machine); acpi_add_table(table_offsets, tables_blob); - spcr_setup(tables_blob, tables->linker, machine); + + if (machine->acpi_spcr_enabled) + spcr_setup(tables_blob, tables->linker, machine); if (machine->numa_state->num_nodes) { if (machine->numa_state->have_numa_distance) { @@ -575,8 +577,8 @@ static void acpi_build(AcpiBuildTables *tables, MachineState *machine) acpi_add_table(table_offsets, tables_blob); { AcpiMcfgInfo mcfg = { - .base = cpu_to_le64(VIRT_PCI_CFG_BASE), - .size = cpu_to_le64(VIRT_PCI_CFG_SIZE), + .base = VIRT_PCI_CFG_BASE, + .size = VIRT_PCI_CFG_SIZE, }; build_mcfg(tables_blob, tables->linker, &mcfg, lvms->oem_id, lvms->oem_table_id); diff --git a/hw/loongarch/virt.c b/hw/loongarch/virt.c index 7ad7fb6..b15ada2 100644 --- a/hw/loongarch/virt.c +++ b/hw/loongarch/virt.c @@ -136,6 +136,10 @@ static void virt_build_smbios(LoongArchVirtMachineState *lvms) return; } + if (kvm_enabled()) { + product = "KVM Virtual Machine"; + } + smbios_set_defaults("QEMU", product, mc->name); smbios_get_tables(ms, SMBIOS_ENTRY_POINT_TYPE_64, @@ -168,8 +172,15 @@ static void virt_powerdown_req(Notifier *notifier, void *opaque) acpi_send_event(s->acpi_ged, ACPI_POWER_DOWN_STATUS); } -static void memmap_add_entry(uint64_t address, uint64_t length, uint32_t type) +static void memmap_add_entry(MachineState *ms, uint64_t address, + uint64_t length, uint32_t type) { + LoongArchVirtMachineState *lvms = LOONGARCH_VIRT_MACHINE(ms); + struct memmap_entry *memmap_table; + unsigned int memmap_entries; + + memmap_table = lvms->memmap_table; + memmap_entries = lvms->memmap_entries; /* Ensure there are no duplicate entries. */ for (unsigned i = 0; i < memmap_entries; i++) { assert(memmap_table[i].address != address); @@ -182,6 +193,8 @@ static void memmap_add_entry(uint64_t address, uint64_t length, uint32_t type) memmap_table[memmap_entries].type = cpu_to_le32(type); memmap_table[memmap_entries].reserved = 0; memmap_entries++; + lvms->memmap_table = memmap_table; + lvms->memmap_entries = memmap_entries; } static DeviceState *create_acpi_ged(DeviceState *pch_pic, @@ -401,12 +414,6 @@ static void virt_irq_init(LoongArchVirtMachineState *lvms) lvms->ipi = ipi; sysbus_realize_and_unref(SYS_BUS_DEVICE(ipi), &error_fatal); - /* IPI iocsr memory region */ - memory_region_add_subregion(&lvms->system_iocsr, SMP_IPI_MAILBOX, - sysbus_mmio_get_region(SYS_BUS_DEVICE(ipi), 0)); - memory_region_add_subregion(&lvms->system_iocsr, MAIL_SEND_ADDR, - sysbus_mmio_get_region(SYS_BUS_DEVICE(ipi), 1)); - /* Create EXTIOI device */ extioi = qdev_new(TYPE_LOONGARCH_EXTIOI); lvms->extioi = extioi; @@ -414,12 +421,6 @@ static void virt_irq_init(LoongArchVirtMachineState *lvms) qdev_prop_set_bit(extioi, "has-virtualization-extension", true); } sysbus_realize_and_unref(SYS_BUS_DEVICE(extioi), &error_fatal); - memory_region_add_subregion(&lvms->system_iocsr, APIC_BASE, - sysbus_mmio_get_region(SYS_BUS_DEVICE(extioi), 0)); - if (virt_is_veiointc_enabled(lvms)) { - memory_region_add_subregion(&lvms->system_iocsr, EXTIOI_VIRT_BASE, - sysbus_mmio_get_region(SYS_BUS_DEVICE(extioi), 1)); - } virt_cpu_irq_init(lvms); pch_pic = qdev_new(TYPE_LOONGARCH_PIC); @@ -427,19 +428,6 @@ static void virt_irq_init(LoongArchVirtMachineState *lvms) qdev_prop_set_uint32(pch_pic, "pch_pic_irq_num", num); d = SYS_BUS_DEVICE(pch_pic); sysbus_realize_and_unref(d, &error_fatal); - memory_region_add_subregion(get_system_memory(), VIRT_IOAPIC_REG_BASE, - sysbus_mmio_get_region(d, 0)); - memory_region_add_subregion(get_system_memory(), - VIRT_IOAPIC_REG_BASE + PCH_PIC_ROUTE_ENTRY_OFFSET, - sysbus_mmio_get_region(d, 1)); - memory_region_add_subregion(get_system_memory(), - VIRT_IOAPIC_REG_BASE + PCH_PIC_INT_STATUS_LO, - sysbus_mmio_get_region(d, 2)); - - /* Connect pch_pic irqs to extioi */ - for (i = 0; i < num; i++) { - qdev_connect_gpio_out(DEVICE(d), i, qdev_get_gpio_in(extioi, i)); - } pch_msi = qdev_new(TYPE_LOONGARCH_PCH_MSI); start = num; @@ -449,12 +437,40 @@ static void virt_irq_init(LoongArchVirtMachineState *lvms) d = SYS_BUS_DEVICE(pch_msi); sysbus_realize_and_unref(d, &error_fatal); sysbus_mmio_map(d, 0, VIRT_PCH_MSI_ADDR_LOW); - for (i = 0; i < num; i++) { - /* Connect pch_msi irqs to extioi */ - qdev_connect_gpio_out(DEVICE(d), i, - qdev_get_gpio_in(extioi, i + start)); - } + if (kvm_irqchip_in_kernel()) { + kvm_loongarch_init_irq_routing(); + } else { + /* IPI iocsr memory region */ + memory_region_add_subregion(&lvms->system_iocsr, SMP_IPI_MAILBOX, + sysbus_mmio_get_region(SYS_BUS_DEVICE(ipi), 0)); + memory_region_add_subregion(&lvms->system_iocsr, MAIL_SEND_ADDR, + sysbus_mmio_get_region(SYS_BUS_DEVICE(ipi), 1)); + + /* EXTIOI iocsr memory region */ + memory_region_add_subregion(&lvms->system_iocsr, APIC_BASE, + sysbus_mmio_get_region(SYS_BUS_DEVICE(extioi), 0)); + if (virt_is_veiointc_enabled(lvms)) { + memory_region_add_subregion(&lvms->system_iocsr, EXTIOI_VIRT_BASE, + sysbus_mmio_get_region(SYS_BUS_DEVICE(extioi), 1)); + } + + /* PCH_PIC memory region */ + memory_region_add_subregion(get_system_memory(), VIRT_IOAPIC_REG_BASE, + sysbus_mmio_get_region(SYS_BUS_DEVICE(pch_pic), 0)); + + /* Connect pch_pic irqs to extioi */ + for (i = 0; i < VIRT_PCH_PIC_IRQ_NUM; i++) { + qdev_connect_gpio_out(DEVICE(pch_pic), i, + qdev_get_gpio_in(extioi, i)); + } + + for (i = VIRT_PCH_PIC_IRQ_NUM; i < EXTIOI_IRQS; i++) { + /* Connect pch_msi irqs to extioi */ + qdev_connect_gpio_out(DEVICE(pch_msi), i - VIRT_PCH_PIC_IRQ_NUM, + qdev_get_gpio_in(extioi, i)); + } + } virt_devices_init(pch_pic, lvms); } @@ -515,6 +531,10 @@ static MemTxResult virt_iocsr_misc_write(void *opaque, hwaddr addr, switch (addr) { case MISC_FUNC_REG: + if (kvm_irqchip_in_kernel()) { + return MEMTX_OK; + } + if (!virt_is_veiointc_enabled(lvms)) { return MEMTX_OK; } @@ -565,6 +585,10 @@ static MemTxResult virt_iocsr_misc_read(void *opaque, hwaddr addr, ret = 0x303030354133ULL; /* "3A5000" */ break; case MISC_FUNC_REG: + if (kvm_irqchip_in_kernel()) { + return MEMTX_OK; + } + if (!virt_is_veiointc_enabled(lvms)) { ret |= BIT_ULL(IOCSRM_EXTIOI_EN); break; @@ -625,13 +649,13 @@ static void fw_cfg_add_memory(MachineState *ms) } if (size >= gap) { - memmap_add_entry(base, gap, 1); + memmap_add_entry(ms, base, gap, 1); size -= gap; base = VIRT_HIGHMEM_BASE; } if (size) { - memmap_add_entry(base, size, 1); + memmap_add_entry(ms, base, size, 1); base += size; } @@ -646,7 +670,7 @@ static void fw_cfg_add_memory(MachineState *ms) * lowram: [base, +(gap - numa_info[0].node_mem)) * highram: [VIRT_HIGHMEM_BASE, +(ram_size - gap)) */ - memmap_add_entry(base, gap - numa_info[0].node_mem, 1); + memmap_add_entry(ms, base, gap - numa_info[0].node_mem, 1); size = ram_size - gap; base = VIRT_HIGHMEM_BASE; } else { @@ -654,7 +678,7 @@ static void fw_cfg_add_memory(MachineState *ms) } if (size) { - memmap_add_entry(base, size, 1); + memmap_add_entry(ms, base, size, 1); } } @@ -740,8 +764,8 @@ static void virt_init(MachineState *machine) rom_set_fw(lvms->fw_cfg); if (lvms->fw_cfg != NULL) { fw_cfg_add_file(lvms->fw_cfg, "etc/memmap", - memmap_table, - sizeof(struct memmap_entry) * (memmap_entries)); + lvms->memmap_table, + sizeof(struct memmap_entry) * lvms->memmap_entries); } /* Initialize the IO interrupt subsystem */ diff --git a/hw/mem/cxl_type3.c b/hw/mem/cxl_type3.c index bba923f..be609ff 100644 --- a/hw/mem/cxl_type3.c +++ b/hw/mem/cxl_type3.c @@ -8,6 +8,7 @@ * * SPDX-License-Identifier: GPL-v2-only */ +#include <math.h> #include "qemu/osdep.h" #include "qemu/units.h" @@ -225,10 +226,16 @@ static int ct3_build_cdat_table(CDATSubHeader ***cdat_table, void *priv) * future. */ for (i = 0; i < ct3d->dc.num_regions; i++) { + ct3d->dc.regions[i].nonvolatile = false; + ct3d->dc.regions[i].sharable = false; + ct3d->dc.regions[i].hw_managed_coherency = false; + ct3d->dc.regions[i].ic_specific_dc_management = false; + ct3d->dc.regions[i].rdonly = false; ct3_build_cdat_entries_for_mr(&(table[cur_ent]), dsmad_handle++, ct3d->dc.regions[i].len, - false, true, region_base); + ct3d->dc.regions[i].nonvolatile, + true, region_base); ct3d->dc.regions[i].dsmadhandle = dsmad_handle - 1; cur_ent += CT3_CDAT_NUM_ENTRIES; @@ -634,6 +641,8 @@ static bool cxl_create_dc_regions(CXLType3Dev *ct3d, Error **errp) uint64_t region_len; uint64_t decode_len; uint64_t blk_size = 2 * MiB; + /* Only 1 block size is supported for now. */ + uint64_t supported_blk_size_bitmask = blk_size; CXLDCRegion *region; MemoryRegion *mr; uint64_t dc_size; @@ -679,9 +688,11 @@ static bool cxl_create_dc_regions(CXLType3Dev *ct3d, Error **errp) .block_size = blk_size, /* dsmad_handle set when creating CDAT table entries */ .flags = 0, + .supported_blk_size_bitmask = supported_blk_size_bitmask, }; ct3d->dc.total_capacity += region->len; region->blk_bitmap = bitmap_new(region->len / region->block_size); + qemu_mutex_init(®ion->bitmap_lock); } QTAILQ_INIT(&ct3d->dc.extents); QTAILQ_INIT(&ct3d->dc.extents_pending); @@ -843,6 +854,19 @@ static DOEProtocol doe_cdat_prot[] = { { } }; +/* Initialize CXL device alerts with default threshold values. */ +static void init_alert_config(CXLType3Dev *ct3d) +{ + ct3d->alert_config = (CXLAlertConfig) { + .life_used_crit_alert_thresh = 75, + .life_used_warn_thresh = 40, + .over_temp_crit_alert_thresh = 35, + .under_temp_crit_alert_thresh = 10, + .over_temp_warn_thresh = 25, + .under_temp_warn_thresh = 20 + }; +} + static void ct3_realize(PCIDevice *pci_dev, Error **errp) { ERRP_GUARD(); @@ -910,6 +934,7 @@ static void ct3_realize(PCIDevice *pci_dev, Error **errp) goto err_msix_uninit; } + init_alert_config(ct3d); pcie_cap_deverr_init(pci_dev); /* Leave a bit of room for expansion */ rc = pcie_aer_init(pci_dev, PCI_ERR_VER, 0x200, PCI_ERR_SIZEOF, errp); @@ -969,6 +994,7 @@ static void ct3_exit(PCIDevice *pci_dev) cxl_doe_cdat_release(cxl_cstate); msix_uninit_exclusive_bar(pci_dev); g_free(regs->special_ops); + cxl_destroy_cci(&ct3d->cci); if (ct3d->dc.host_dc) { cxl_destroy_dc_regions(ct3d); address_space_destroy(&ct3d->dc.host_dc_as); @@ -995,6 +1021,7 @@ void ct3_set_region_block_backed(CXLType3Dev *ct3d, uint64_t dpa, return; } + QEMU_LOCK_GUARD(®ion->bitmap_lock); bitmap_set(region->blk_bitmap, (dpa - region->base) / region->block_size, len / region->block_size); } @@ -1021,6 +1048,7 @@ bool ct3_test_region_block_backed(CXLType3Dev *ct3d, uint64_t dpa, * if bits between [dpa, dpa + len) are all 1s, meaning the DPA range is * backed with DC extents, return true; else return false. */ + QEMU_LOCK_GUARD(®ion->bitmap_lock); return find_next_zero_bit(region->blk_bitmap, nr + nbits, nr) == nr + nbits; } @@ -1042,6 +1070,7 @@ void ct3_clear_region_block_backed(CXLType3Dev *ct3d, uint64_t dpa, nr = (dpa - region->base) / region->block_size; nbits = len / region->block_size; + QEMU_LOCK_GUARD(®ion->bitmap_lock); bitmap_clear(region->blk_bitmap, nr, nbits); } @@ -1224,12 +1253,17 @@ static void ct3d_reset(DeviceState *dev) * Bring up an endpoint to target with MCTP over VDM. * This device is emulating an MLD with single LD for now. */ + if (ct3d->vdm_fm_owned_ld_mctp_cci.initialized) { + cxl_destroy_cci(&ct3d->vdm_fm_owned_ld_mctp_cci); + } cxl_initialize_t3_fm_owned_ld_mctpcci(&ct3d->vdm_fm_owned_ld_mctp_cci, DEVICE(ct3d), DEVICE(ct3d), 512); /* Max payload made up */ + if (ct3d->ld0_cci.initialized) { + cxl_destroy_cci(&ct3d->ld0_cci); + } cxl_initialize_t3_ld_cci(&ct3d->ld0_cci, DEVICE(ct3d), DEVICE(ct3d), 512); /* Max payload made up */ - } static const Property ct3_props[] = { @@ -1556,9 +1590,9 @@ void qmp_cxl_inject_correctable_error(const char *path, CxlCorErrorType type, pcie_aer_inject_error(PCI_DEVICE(obj), &err); } -static void cxl_assign_event_header(CXLEventRecordHdr *hdr, - const QemuUUID *uuid, uint32_t flags, - uint8_t length, uint64_t timestamp) +void cxl_assign_event_header(CXLEventRecordHdr *hdr, + const QemuUUID *uuid, uint32_t flags, + uint8_t length, uint64_t timestamp) { st24_le_p(&hdr->flags, flags); hdr->length = length; @@ -1846,28 +1880,13 @@ void qmp_cxl_inject_memory_module_event(const char *path, CxlEventLog log, } } -/* CXL r3.1 Table 8-50: Dynamic Capacity Event Record */ -static const QemuUUID dynamic_capacity_uuid = { - .data = UUID(0xca95afa7, 0xf183, 0x4018, 0x8c, 0x2f, - 0x95, 0x26, 0x8e, 0x10, 0x1a, 0x2a), -}; - -typedef enum CXLDCEventType { - DC_EVENT_ADD_CAPACITY = 0x0, - DC_EVENT_RELEASE_CAPACITY = 0x1, - DC_EVENT_FORCED_RELEASE_CAPACITY = 0x2, - DC_EVENT_REGION_CONFIG_UPDATED = 0x3, - DC_EVENT_ADD_CAPACITY_RSP = 0x4, - DC_EVENT_CAPACITY_RELEASED = 0x5, -} CXLDCEventType; - /* * Check whether the range [dpa, dpa + len - 1] has overlaps with extents in * the list. * Return value: return true if has overlaps; otherwise, return false */ -static bool cxl_extents_overlaps_dpa_range(CXLDCExtentList *list, - uint64_t dpa, uint64_t len) +bool cxl_extents_overlaps_dpa_range(CXLDCExtentList *list, + uint64_t dpa, uint64_t len) { CXLDCExtent *ent; Range range1, range2; @@ -1912,8 +1931,8 @@ bool cxl_extents_contains_dpa_range(CXLDCExtentList *list, return false; } -static bool cxl_extent_groups_overlaps_dpa_range(CXLDCExtentGroupList *list, - uint64_t dpa, uint64_t len) +bool cxl_extent_groups_overlaps_dpa_range(CXLDCExtentGroupList *list, + uint64_t dpa, uint64_t len) { CXLDCExtentGroup *group; @@ -1938,15 +1957,11 @@ static void qmp_cxl_process_dynamic_capacity_prescriptive(const char *path, CxlDynamicCapacityExtentList *records, Error **errp) { Object *obj; - CXLEventDynamicCapacity dCap = {}; - CXLEventRecordHdr *hdr = &dCap.hdr; CXLType3Dev *dcd; - uint8_t flags = 1 << CXL_EVENT_TYPE_INFO; uint32_t num_extents = 0; CxlDynamicCapacityExtentList *list; CXLDCExtentGroup *group = NULL; g_autofree CXLDCExtentRaw *extents = NULL; - uint8_t enc_log = CXL_EVENT_TYPE_DYNAMIC_CAP; uint64_t dpa, offset, len, block_size; g_autofree unsigned long *blk_bitmap = NULL; int i; @@ -2056,40 +2071,10 @@ static void qmp_cxl_process_dynamic_capacity_prescriptive(const char *path, } if (group) { cxl_extent_group_list_insert_tail(&dcd->dc.extents_pending, group); + dcd->dc.total_extent_count += num_extents; } - /* - * CXL r3.1 section 8.2.9.2.1.6: Dynamic Capacity Event Record - * - * All Dynamic Capacity event records shall set the Event Record Severity - * field in the Common Event Record Format to Informational Event. All - * Dynamic Capacity related events shall be logged in the Dynamic Capacity - * Event Log. - */ - cxl_assign_event_header(hdr, &dynamic_capacity_uuid, flags, sizeof(dCap), - cxl_device_get_timestamp(&dcd->cxl_dstate)); - - dCap.type = type; - /* FIXME: for now, validity flag is cleared */ - dCap.validity_flags = 0; - stw_le_p(&dCap.host_id, hid); - /* only valid for DC_REGION_CONFIG_UPDATED event */ - dCap.updated_region_id = 0; - for (i = 0; i < num_extents; i++) { - memcpy(&dCap.dynamic_capacity_extent, &extents[i], - sizeof(CXLDCExtentRaw)); - - dCap.flags = 0; - if (i < num_extents - 1) { - /* Set "More" flag */ - dCap.flags |= BIT(0); - } - - if (cxl_event_insert(&dcd->cxl_dstate, enc_log, - (CXLEventRecordRaw *)&dCap)) { - cxl_event_irq_assert(dcd); - } - } + cxl_create_dc_event_records_for_extents(dcd, type, extents, num_extents); } void qmp_cxl_add_dynamic_capacity(const char *path, uint16_t host_id, diff --git a/hw/meson.build b/hw/meson.build index b91f761..791ce21 100644 --- a/hw/meson.build +++ b/hw/meson.build @@ -39,6 +39,7 @@ subdir('uefi') subdir('ufs') subdir('usb') subdir('vfio') +subdir('vfio-user') subdir('virtio') subdir('vmapple') subdir('watchdog') diff --git a/hw/microblaze/Kconfig b/hw/microblaze/Kconfig index b0214b2..72d8072 100644 --- a/hw/microblaze/Kconfig +++ b/hw/microblaze/Kconfig @@ -1,7 +1,7 @@ config PETALOGIX_S3ADSP1800 bool default y - depends on MICROBLAZE + depends on MICROBLAZE && FDT select PFLASH_CFI01 select XILINX select XILINX_AXI @@ -11,7 +11,7 @@ config PETALOGIX_S3ADSP1800 config PETALOGIX_ML605 bool default y - depends on MICROBLAZE + depends on MICROBLAZE && FDT select PFLASH_CFI01 select SERIAL_MM select SSI_M25P80 diff --git a/hw/microblaze/petalogix_ml605_mmu.c b/hw/microblaze/petalogix_ml605_mmu.c index bea6b68..6e923c4 100644 --- a/hw/microblaze/petalogix_ml605_mmu.c +++ b/hw/microblaze/petalogix_ml605_mmu.c @@ -80,8 +80,6 @@ petalogix_ml605_init(MachineState *machine) MemoryRegion *phys_lmb_bram = g_new(MemoryRegion, 1); MemoryRegion *phys_ram = g_new(MemoryRegion, 1); qemu_irq irq[32]; - EndianMode endianness = TARGET_BIG_ENDIAN ? ENDIAN_MODE_BIG - : ENDIAN_MODE_LITTLE; /* init CPUs */ cpu = MICROBLAZE_CPU(object_new(TYPE_MICROBLAZE_CPU)); @@ -113,7 +111,7 @@ petalogix_ml605_init(MachineState *machine) dev = qdev_new("xlnx.xps-intc"); - qdev_prop_set_enum(dev, "endianness", endianness); + qdev_prop_set_enum(dev, "endianness", ENDIAN_MODE_LITTLE); qdev_prop_set_uint32(dev, "kind-of-intr", 1 << TIMER_IRQ); sysbus_realize_and_unref(SYS_BUS_DEVICE(dev), &error_fatal); sysbus_mmio_map(SYS_BUS_DEVICE(dev), 0, INTC_BASEADDR); @@ -129,7 +127,7 @@ petalogix_ml605_init(MachineState *machine) /* 2 timers at irq 2 @ 100 Mhz. */ dev = qdev_new("xlnx.xps-timer"); - qdev_prop_set_enum(dev, "endianness", endianness); + qdev_prop_set_enum(dev, "endianness", ENDIAN_MODE_LITTLE); qdev_prop_set_uint32(dev, "one-timer-only", 0); qdev_prop_set_uint32(dev, "clock-frequency", 100 * 1000000); sysbus_realize_and_unref(SYS_BUS_DEVICE(dev), &error_fatal); @@ -177,7 +175,7 @@ petalogix_ml605_init(MachineState *machine) SSIBus *spi; dev = qdev_new("xlnx.xps-spi"); - qdev_prop_set_enum(dev, "endianness", endianness); + qdev_prop_set_enum(dev, "endianness", ENDIAN_MODE_LITTLE); qdev_prop_set_uint8(dev, "num-ss-bits", NUM_SPI_FLASHES); busdev = SYS_BUS_DEVICE(dev); sysbus_realize_and_unref(busdev, &error_fatal); @@ -218,12 +216,7 @@ petalogix_ml605_init(MachineState *machine) static void petalogix_ml605_machine_init(MachineClass *mc) { - if (TARGET_BIG_ENDIAN) { - mc->desc = "PetaLogix linux refdesign for xilinx ml605 (big endian)"; - mc->deprecation_reason = "big endian support is not tested"; - } else { - mc->desc = "PetaLogix linux refdesign for xilinx ml605 (little endian)"; - } + mc->desc = "PetaLogix linux refdesign for xilinx ml605 (little endian)"; mc->init = petalogix_ml605_init; } diff --git a/hw/microblaze/petalogix_s3adsp1800_mmu.c b/hw/microblaze/petalogix_s3adsp1800_mmu.c index 032f6f7..e8d0ddf 100644 --- a/hw/microblaze/petalogix_s3adsp1800_mmu.c +++ b/hw/microblaze/petalogix_s3adsp1800_mmu.c @@ -58,9 +58,20 @@ #define TYPE_PETALOGIX_S3ADSP1800_MACHINE \ MACHINE_TYPE_NAME("petalogix-s3adsp1800") +struct S3Adsp1800MachineState { + MachineState parent_class; + + EndianMode endianness; +}; + +OBJECT_DECLARE_TYPE(S3Adsp1800MachineState, MachineClass, + PETALOGIX_S3ADSP1800_MACHINE) + + static void petalogix_s3adsp1800_init(MachineState *machine) { + S3Adsp1800MachineState *psms = PETALOGIX_S3ADSP1800_MACHINE(machine); ram_addr_t ram_size = machine->ram_size; DeviceState *dev; MicroBlazeCPU *cpu; @@ -71,13 +82,12 @@ petalogix_s3adsp1800_init(MachineState *machine) MemoryRegion *phys_ram = g_new(MemoryRegion, 1); qemu_irq irq[32]; MemoryRegion *sysmem = get_system_memory(); - EndianMode endianness = TARGET_BIG_ENDIAN ? ENDIAN_MODE_BIG - : ENDIAN_MODE_LITTLE; + EndianMode endianness = psms->endianness; cpu = MICROBLAZE_CPU(object_new(TYPE_MICROBLAZE_CPU)); object_property_set_str(OBJECT(cpu), "version", "7.10.d", &error_abort); object_property_set_bool(OBJECT(cpu), "little-endian", - !TARGET_BIG_ENDIAN, &error_abort); + endianness == ENDIAN_MODE_LITTLE, &error_abort); qdev_realize(DEVICE(cpu), NULL, &error_abort); /* Attach emulated BRAM through the LMB. */ @@ -135,20 +145,41 @@ petalogix_s3adsp1800_init(MachineState *machine) create_unimplemented_device("xps_gpio", GPIO_BASEADDR, 0x10000); - microblaze_load_kernel(cpu, !TARGET_BIG_ENDIAN, ddr_base, ram_size, - machine->initrd_filename, + microblaze_load_kernel(cpu, endianness == ENDIAN_MODE_LITTLE, ddr_base, + ram_size, machine->initrd_filename, BINARY_DEVICE_TREE_FILE, NULL); } +static int machine_get_endianness(Object *obj, Error **errp G_GNUC_UNUSED) +{ + S3Adsp1800MachineState *ms = PETALOGIX_S3ADSP1800_MACHINE(obj); + return ms->endianness; +} + +static void machine_set_endianness(Object *obj, int endianness, Error **errp) +{ + S3Adsp1800MachineState *ms = PETALOGIX_S3ADSP1800_MACHINE(obj); + ms->endianness = endianness; +} + static void petalogix_s3adsp1800_machine_class_init(ObjectClass *oc, const void *data) { MachineClass *mc = MACHINE_CLASS(oc); + ObjectProperty *prop; mc->desc = "PetaLogix linux refdesign for xilinx Spartan 3ADSP1800"; mc->init = petalogix_s3adsp1800_init; mc->is_default = true; + + prop = object_class_property_add_enum(oc, "endianness", "EndianMode", + &EndianMode_lookup, + machine_get_endianness, + machine_set_endianness); + object_property_set_default_str(prop, TARGET_BIG_ENDIAN ? "big" : "little"); + object_class_property_set_description(oc, "endianness", + "Defines whether the machine runs in big or little endian mode"); } static const TypeInfo petalogix_s3adsp1800_machine_types[] = { @@ -156,6 +187,7 @@ static const TypeInfo petalogix_s3adsp1800_machine_types[] = { .name = TYPE_PETALOGIX_S3ADSP1800_MACHINE, .parent = TYPE_MACHINE, .class_init = petalogix_s3adsp1800_machine_class_init, + .instance_size = sizeof(S3Adsp1800MachineState), }, }; diff --git a/hw/microblaze/xlnx-zynqmp-pmu.c b/hw/microblaze/xlnx-zynqmp-pmu.c index ed40b5f..e909802 100644 --- a/hw/microblaze/xlnx-zynqmp-pmu.c +++ b/hw/microblaze/xlnx-zynqmp-pmu.c @@ -181,12 +181,7 @@ static void xlnx_zynqmp_pmu_init(MachineState *machine) static void xlnx_zynqmp_pmu_machine_init(MachineClass *mc) { - if (TARGET_BIG_ENDIAN) { - mc->desc = "Xilinx ZynqMP PMU machine (big endian)"; - mc->deprecation_reason = "big endian support is not tested"; - } else { - mc->desc = "Xilinx ZynqMP PMU machine (little endian)"; - } + mc->desc = "Xilinx ZynqMP PMU machine (little endian)"; mc->init = xlnx_zynqmp_pmu_init; } diff --git a/hw/mips/Kconfig b/hw/mips/Kconfig index b09c89a..f84fffc 100644 --- a/hw/mips/Kconfig +++ b/hw/mips/Kconfig @@ -76,7 +76,7 @@ config LOONGSON3V config MIPS_CPS bool - select MIPS_ITU + select MIPS_ITU if TCG config MIPS_BOSTON bool diff --git a/hw/mips/cps.c b/hw/mips/cps.c index 2a3ba3f..e47695e 100644 --- a/hw/mips/cps.c +++ b/hw/mips/cps.c @@ -24,7 +24,7 @@ #include "hw/mips/mips.h" #include "hw/qdev-clock.h" #include "hw/qdev-properties.h" -#include "system/kvm.h" +#include "system/tcg.h" #include "system/reset.h" qemu_irq get_cps_irq(MIPSCPSState *s, int pin_number) @@ -59,7 +59,7 @@ static bool cpu_mips_itu_supported(CPUMIPSState *env) { bool is_mt = (env->CP0_Config5 & (1 << CP0C5_VP)) || ase_mt_available(env); - return is_mt && !kvm_enabled(); + return is_mt && tcg_enabled(); } static void mips_cps_realize(DeviceState *dev, Error **errp) diff --git a/hw/misc/Kconfig b/hw/misc/Kconfig index ec0fa5a..4e35657 100644 --- a/hw/misc/Kconfig +++ b/hw/misc/Kconfig @@ -47,6 +47,18 @@ config A9SCU config ARM11SCU bool +config MAX78000_AES + bool + +config MAX78000_GCR + bool + +config MAX78000_ICC + bool + +config MAX78000_TRNG + bool + config MOS6522 bool @@ -107,6 +119,7 @@ config STM32L4X5_RCC config MIPS_ITU bool + depends on TCG config MPS2_FPGAIO bool diff --git a/hw/misc/aspeed_hace.c b/hw/misc/aspeed_hace.c index f4bff32..726368f 100644 --- a/hw/misc/aspeed_hace.c +++ b/hw/misc/aspeed_hace.c @@ -10,14 +10,17 @@ */ #include "qemu/osdep.h" +#include "qemu/cutils.h" #include "qemu/log.h" #include "qemu/error-report.h" +#include "qemu/iov.h" #include "hw/misc/aspeed_hace.h" #include "qapi/error.h" #include "migration/vmstate.h" #include "crypto/hash.h" #include "hw/qdev-properties.h" #include "hw/irq.h" +#include "trace.h" #define R_CRYPT_CMD (0x10 / 4) @@ -27,9 +30,12 @@ #define TAG_IRQ BIT(15) #define R_HASH_SRC (0x20 / 4) -#define R_HASH_DEST (0x24 / 4) +#define R_HASH_DIGEST (0x24 / 4) #define R_HASH_KEY_BUFF (0x28 / 4) #define R_HASH_SRC_LEN (0x2c / 4) +#define R_HASH_SRC_HI (0x90 / 4) +#define R_HASH_DIGEST_HI (0x94 / 4) +#define R_HASH_KEY_BUFF_HI (0x98 / 4) #define R_HASH_CMD (0x30 / 4) /* Hash algorithm selection */ @@ -84,6 +90,42 @@ static const struct { QCRYPTO_HASH_ALGO_SHA256 }, }; +static void hace_hexdump(const char *desc, const char *buf, size_t size) +{ + g_autoptr(GString) str = g_string_sized_new(64); + size_t len; + size_t i; + + for (i = 0; i < size; i += len) { + len = MIN(16, size - i); + g_string_truncate(str, 0); + qemu_hexdump_line(str, buf + i, len, 1, 4); + trace_aspeed_hace_hexdump(desc, i, str->str); + } +} + +static void hace_iov_hexdump(const char *desc, const struct iovec *iov, + const unsigned int iov_cnt) +{ + size_t size = 0; + char *buf; + int i; + + for (i = 0; i < iov_cnt; i++) { + size += iov[i].iov_len; + } + + buf = g_malloc(size); + + if (!buf) { + return; + } + + iov_to_buf(iov, iov_cnt, 0, buf, size); + hace_hexdump(desc, buf, size); + g_free(buf); +} + static int hash_algo_lookup(uint32_t reg) { int i; @@ -142,171 +184,269 @@ static bool has_padding(AspeedHACEState *s, struct iovec *iov, return false; } -static int reconstruct_iov(AspeedHACEState *s, struct iovec *iov, int id, - uint32_t *pad_offset) +static uint64_t hash_get_source_addr(AspeedHACEState *s) { - int i, iov_count; - if (*pad_offset != 0) { - s->iov_cache[s->iov_count].iov_base = iov[id].iov_base; - s->iov_cache[s->iov_count].iov_len = *pad_offset; - ++s->iov_count; - } - for (i = 0; i < s->iov_count; i++) { - iov[i].iov_base = s->iov_cache[i].iov_base; - iov[i].iov_len = s->iov_cache[i].iov_len; + AspeedHACEClass *ahc = ASPEED_HACE_GET_CLASS(s); + uint64_t src_addr = 0; + + src_addr = deposit64(src_addr, 0, 32, s->regs[R_HASH_SRC]); + if (ahc->has_dma64) { + src_addr = deposit64(src_addr, 32, 32, s->regs[R_HASH_SRC_HI]); } - iov_count = s->iov_count; - s->iov_count = 0; - s->total_req_len = 0; - return iov_count; + + return src_addr; } -static void do_hash_operation(AspeedHACEState *s, int algo, bool sg_mode, - bool acc_mode) +static int hash_prepare_direct_iov(AspeedHACEState *s, struct iovec *iov, + bool acc_mode, bool *acc_final_request) { - struct iovec iov[ASPEED_HACE_MAX_SG]; uint32_t total_msg_len; uint32_t pad_offset; - g_autofree uint8_t *digest_buf = NULL; - size_t digest_len = 0; - bool sg_acc_mode_final_request = false; - int i; + uint64_t src; void *haddr; - Error *local_err = NULL; + hwaddr plen; + int iov_idx; + + plen = s->regs[R_HASH_SRC_LEN]; + src = hash_get_source_addr(s); + trace_aspeed_hace_hash_addr("src", src); + haddr = address_space_map(&s->dram_as, src, &plen, false, + MEMTXATTRS_UNSPECIFIED); + if (haddr == NULL) { + qemu_log_mask(LOG_GUEST_ERROR, + "%s: Unable to map address, addr=0x%" HWADDR_PRIx + " ,plen=0x%" HWADDR_PRIx "\n", + __func__, src, plen); + return -1; + } - if (acc_mode && s->hash_ctx == NULL) { - s->hash_ctx = qcrypto_hash_new(algo, &local_err); - if (s->hash_ctx == NULL) { - qemu_log_mask(LOG_GUEST_ERROR, "qcrypto hash failed : %s", - error_get_pretty(local_err)); - error_free(local_err); - return; + iov[0].iov_base = haddr; + iov_idx = 1; + + if (acc_mode) { + s->total_req_len += plen; + + if (has_padding(s, &iov[0], plen, &total_msg_len, + &pad_offset)) { + /* Padding being present indicates the final request */ + *acc_final_request = true; + iov[0].iov_len = pad_offset; + } else { + iov[0].iov_len = plen; } + } else { + iov[0].iov_len = plen; } - if (sg_mode) { - uint32_t len = 0; - - for (i = 0; !(len & SG_LIST_LEN_LAST); i++) { - uint32_t addr, src; - hwaddr plen; + return iov_idx; +} - if (i == ASPEED_HACE_MAX_SG) { - qemu_log_mask(LOG_GUEST_ERROR, - "aspeed_hace: guest failed to set end of sg list marker\n"); - break; - } +static int hash_prepare_sg_iov(AspeedHACEState *s, struct iovec *iov, + bool acc_mode, bool *acc_final_request) +{ + uint32_t total_msg_len; + uint32_t pad_offset; + uint32_t len = 0; + uint32_t sg_addr; + uint64_t src; + int iov_idx; + hwaddr plen; + void *haddr; - src = s->regs[R_HASH_SRC] + (i * SG_LIST_ENTRY_SIZE); + src = hash_get_source_addr(s); + for (iov_idx = 0; !(len & SG_LIST_LEN_LAST); iov_idx++) { + if (iov_idx == ASPEED_HACE_MAX_SG) { + qemu_log_mask(LOG_GUEST_ERROR, + "%s: Failed to set end of sg list marker\n", + __func__); + return -1; + } - len = address_space_ldl_le(&s->dram_as, src, + len = address_space_ldl_le(&s->dram_as, src, + MEMTXATTRS_UNSPECIFIED, NULL); + sg_addr = address_space_ldl_le(&s->dram_as, src + SG_LIST_LEN_SIZE, MEMTXATTRS_UNSPECIFIED, NULL); + sg_addr &= SG_LIST_ADDR_MASK; + trace_aspeed_hace_hash_sg(iov_idx, src, sg_addr, len); + /* + * To maintain compatibility with older SoCs such as the AST2600, + * the AST2700 HW automatically set bit 34 of the 64-bit sg_addr. + * As a result, the firmware only needs to provide a 32-bit sg_addr + * containing bits [31:0]. This is sufficient for the AST2700, as + * it uses a DRAM offset rather than a DRAM address. + */ + plen = len & SG_LIST_LEN_MASK; + haddr = address_space_map(&s->dram_as, sg_addr, &plen, false, + MEMTXATTRS_UNSPECIFIED); - addr = address_space_ldl_le(&s->dram_as, src + SG_LIST_LEN_SIZE, - MEMTXATTRS_UNSPECIFIED, NULL); - addr &= SG_LIST_ADDR_MASK; + if (haddr == NULL) { + qemu_log_mask(LOG_GUEST_ERROR, + "%s: Unable to map address, sg_addr=0x%x, " + "plen=0x%" HWADDR_PRIx "\n", + __func__, sg_addr, plen); + return -1; + } - plen = len & SG_LIST_LEN_MASK; - haddr = address_space_map(&s->dram_as, addr, &plen, false, - MEMTXATTRS_UNSPECIFIED); - if (haddr == NULL) { - qemu_log_mask(LOG_GUEST_ERROR, - "%s: qcrypto failed\n", __func__); - return; - } - iov[i].iov_base = haddr; - if (acc_mode) { - s->total_req_len += plen; - - if (has_padding(s, &iov[i], plen, &total_msg_len, - &pad_offset)) { - /* Padding being present indicates the final request */ - sg_acc_mode_final_request = true; - iov[i].iov_len = pad_offset; - } else { - iov[i].iov_len = plen; - } + src += SG_LIST_ENTRY_SIZE; + + iov[iov_idx].iov_base = haddr; + if (acc_mode) { + s->total_req_len += plen; + + if (has_padding(s, &iov[iov_idx], plen, &total_msg_len, + &pad_offset)) { + /* Padding being present indicates the final request */ + *acc_final_request = true; + iov[iov_idx].iov_len = pad_offset; } else { - iov[i].iov_len = plen; + iov[iov_idx].iov_len = plen; } + } else { + iov[iov_idx].iov_len = plen; } - } else { - hwaddr len = s->regs[R_HASH_SRC_LEN]; + } - haddr = address_space_map(&s->dram_as, s->regs[R_HASH_SRC], - &len, false, MEMTXATTRS_UNSPECIFIED); - if (haddr == NULL) { - qemu_log_mask(LOG_GUEST_ERROR, "%s: qcrypto failed\n", __func__); + return iov_idx; +} + +static uint64_t hash_get_digest_addr(AspeedHACEState *s) +{ + AspeedHACEClass *ahc = ASPEED_HACE_GET_CLASS(s); + uint64_t digest_addr = 0; + + digest_addr = deposit64(digest_addr, 0, 32, s->regs[R_HASH_DIGEST]); + if (ahc->has_dma64) { + digest_addr = deposit64(digest_addr, 32, 32, s->regs[R_HASH_DIGEST_HI]); + } + + return digest_addr; +} + +static void hash_write_digest_and_unmap_iov(AspeedHACEState *s, + struct iovec *iov, + int iov_idx, + uint8_t *digest_buf, + size_t digest_len) +{ + uint64_t digest_addr = 0; + + digest_addr = hash_get_digest_addr(s); + trace_aspeed_hace_hash_addr("digest", digest_addr); + if (address_space_write(&s->dram_as, digest_addr, + MEMTXATTRS_UNSPECIFIED, + digest_buf, digest_len)) { + qemu_log_mask(LOG_GUEST_ERROR, + "%s: Failed to write digest to 0x%" HWADDR_PRIx "\n", + __func__, digest_addr); + } + + if (trace_event_get_state_backends(TRACE_ASPEED_HACE_HEXDUMP)) { + hace_hexdump("digest", (char *)digest_buf, digest_len); + } + + for (; iov_idx > 0; iov_idx--) { + address_space_unmap(&s->dram_as, iov[iov_idx - 1].iov_base, + iov[iov_idx - 1].iov_len, false, + iov[iov_idx - 1].iov_len); + } +} + +static void hash_execute_non_acc_mode(AspeedHACEState *s, int algo, + struct iovec *iov, int iov_idx) +{ + g_autofree uint8_t *digest_buf = NULL; + Error *local_err = NULL; + size_t digest_len = 0; + + if (qcrypto_hash_bytesv(algo, iov, iov_idx, &digest_buf, + &digest_len, &local_err) < 0) { + qemu_log_mask(LOG_GUEST_ERROR, + "%s: qcrypto hash bytesv failed : %s", + __func__, error_get_pretty(local_err)); + error_free(local_err); + return; + } + + hash_write_digest_and_unmap_iov(s, iov, iov_idx, digest_buf, digest_len); +} + +static void hash_execute_acc_mode(AspeedHACEState *s, int algo, + struct iovec *iov, int iov_idx, + bool final_request) +{ + g_autofree uint8_t *digest_buf = NULL; + Error *local_err = NULL; + size_t digest_len = 0; + + trace_aspeed_hace_hash_execute_acc_mode(final_request); + + if (s->hash_ctx == NULL) { + s->hash_ctx = qcrypto_hash_new(algo, &local_err); + if (s->hash_ctx == NULL) { + qemu_log_mask(LOG_GUEST_ERROR, "%s: qcrypto hash new failed : %s", + __func__, error_get_pretty(local_err)); + error_free(local_err); return; } - iov[0].iov_base = haddr; - iov[0].iov_len = len; - i = 1; - - if (s->iov_count) { - /* - * In aspeed sdk kernel driver, sg_mode is disabled in hash_final(). - * Thus if we received a request with sg_mode disabled, it is - * required to check whether cache is empty. If no, we should - * combine cached iov and the current iov. - */ - s->total_req_len += len; - if (has_padding(s, iov, len, &total_msg_len, &pad_offset)) { - i = reconstruct_iov(s, iov, 0, &pad_offset); - } - } } - if (acc_mode) { - if (qcrypto_hash_updatev(s->hash_ctx, iov, i, &local_err) < 0) { - qemu_log_mask(LOG_GUEST_ERROR, "qcrypto hash update failed : %s", - error_get_pretty(local_err)); + if (qcrypto_hash_updatev(s->hash_ctx, iov, iov_idx, &local_err) < 0) { + qemu_log_mask(LOG_GUEST_ERROR, "%s: qcrypto hash updatev failed : %s", + __func__, error_get_pretty(local_err)); + error_free(local_err); + return; + } + + if (final_request) { + if (qcrypto_hash_finalize_bytes(s->hash_ctx, &digest_buf, + &digest_len, &local_err)) { + qemu_log_mask(LOG_GUEST_ERROR, + "%s: qcrypto hash finalize bytes failed : %s", + __func__, error_get_pretty(local_err)); error_free(local_err); - return; + local_err = NULL; } - if (sg_acc_mode_final_request) { - if (qcrypto_hash_finalize_bytes(s->hash_ctx, &digest_buf, - &digest_len, &local_err)) { - qemu_log_mask(LOG_GUEST_ERROR, - "qcrypto hash finalize failed : %s", - error_get_pretty(local_err)); - error_free(local_err); - local_err = NULL; - } + qcrypto_hash_free(s->hash_ctx); + + s->hash_ctx = NULL; + s->total_req_len = 0; + } - qcrypto_hash_free(s->hash_ctx); + hash_write_digest_and_unmap_iov(s, iov, iov_idx, digest_buf, digest_len); +} - s->hash_ctx = NULL; - s->iov_count = 0; - s->total_req_len = 0; - } - } else if (qcrypto_hash_bytesv(algo, iov, i, &digest_buf, - &digest_len, &local_err) < 0) { - qemu_log_mask(LOG_GUEST_ERROR, "qcrypto hash bytesv failed : %s", - error_get_pretty(local_err)); - error_free(local_err); - return; +static void do_hash_operation(AspeedHACEState *s, int algo, bool sg_mode, + bool acc_mode) +{ + QEMU_UNINITIALIZED struct iovec iov[ASPEED_HACE_MAX_SG]; + bool acc_final_request = false; + int iov_idx = -1; + + /* Prepares the iov for hashing operations based on the selected mode */ + if (sg_mode) { + iov_idx = hash_prepare_sg_iov(s, iov, acc_mode, &acc_final_request); + } else { + iov_idx = hash_prepare_direct_iov(s, iov, acc_mode, + &acc_final_request); } - if (address_space_write(&s->dram_as, s->regs[R_HASH_DEST], - MEMTXATTRS_UNSPECIFIED, - digest_buf, digest_len)) { + if (iov_idx <= 0) { qemu_log_mask(LOG_GUEST_ERROR, - "aspeed_hace: address space write failed\n"); + "%s: Failed to prepare iov\n", __func__); + return; } - for (; i > 0; i--) { - address_space_unmap(&s->dram_as, iov[i - 1].iov_base, - iov[i - 1].iov_len, false, - iov[i - 1].iov_len); + if (trace_event_get_state_backends(TRACE_ASPEED_HACE_HEXDUMP)) { + hace_iov_hexdump("plaintext", iov, iov_idx); } - /* - * Set status bits to indicate completion. Testing shows hardware sets - * these irrespective of HASH_IRQ_EN. - */ - s->regs[R_STATUS] |= HASH_IRQ; + /* Executes the hash operation */ + if (acc_mode) { + hash_execute_acc_mode(s, algo, iov, iov_idx, acc_final_request); + } else { + hash_execute_non_acc_mode(s, algo, iov, iov_idx); + } } static uint64_t aspeed_hace_read(void *opaque, hwaddr addr, unsigned int size) @@ -315,12 +455,7 @@ static uint64_t aspeed_hace_read(void *opaque, hwaddr addr, unsigned int size) addr >>= 2; - if (addr >= ASPEED_HACE_NR_REGS) { - qemu_log_mask(LOG_GUEST_ERROR, - "%s: Out-of-bounds read at offset 0x%" HWADDR_PRIx "\n", - __func__, addr << 2); - return 0; - } + trace_aspeed_hace_read(addr << 2, s->regs[addr]); return s->regs[addr]; } @@ -333,12 +468,7 @@ static void aspeed_hace_write(void *opaque, hwaddr addr, uint64_t data, addr >>= 2; - if (addr >= ASPEED_HACE_NR_REGS) { - qemu_log_mask(LOG_GUEST_ERROR, - "%s: Out-of-bounds write at offset 0x%" HWADDR_PRIx "\n", - __func__, addr << 2); - return; - } + trace_aspeed_hace_write(addr << 2, data); switch (addr) { case R_STATUS: @@ -362,7 +492,7 @@ static void aspeed_hace_write(void *opaque, hwaddr addr, uint64_t data, case R_HASH_SRC: data &= ahc->src_mask; break; - case R_HASH_DEST: + case R_HASH_DIGEST: data &= ahc->dest_mask; break; case R_HASH_KEY_BUFF: @@ -390,10 +520,16 @@ static void aspeed_hace_write(void *opaque, hwaddr addr, uint64_t data, qemu_log_mask(LOG_GUEST_ERROR, "%s: Invalid hash algorithm selection 0x%"PRIx64"\n", __func__, data & ahc->hash_mask); - break; + } else { + do_hash_operation(s, algo, data & HASH_SG_EN, + ((data & HASH_HMAC_MASK) == HASH_DIGEST_ACCUM)); } - do_hash_operation(s, algo, data & HASH_SG_EN, - ((data & HASH_HMAC_MASK) == HASH_DIGEST_ACCUM)); + + /* + * Set status bits to indicate completion. Testing shows hardware sets + * these irrespective of HASH_IRQ_EN. + */ + s->regs[R_STATUS] |= HASH_IRQ; if (data & HASH_IRQ_EN) { qemu_irq_raise(s->irq); @@ -410,6 +546,15 @@ static void aspeed_hace_write(void *opaque, hwaddr addr, uint64_t data, } } break; + case R_HASH_SRC_HI: + data &= ahc->src_hi_mask; + break; + case R_HASH_DIGEST_HI: + data &= ahc->dest_hi_mask; + break; + case R_HASH_KEY_BUFF_HI: + data &= ahc->key_hi_mask; + break; default: break; } @@ -430,14 +575,14 @@ static const MemoryRegionOps aspeed_hace_ops = { static void aspeed_hace_reset(DeviceState *dev) { struct AspeedHACEState *s = ASPEED_HACE(dev); + AspeedHACEClass *ahc = ASPEED_HACE_GET_CLASS(s); if (s->hash_ctx != NULL) { qcrypto_hash_free(s->hash_ctx); s->hash_ctx = NULL; } - memset(s->regs, 0, sizeof(s->regs)); - s->iov_count = 0; + memset(s->regs, 0, ahc->nr_regs << 2); s->total_req_len = 0; } @@ -445,11 +590,13 @@ static void aspeed_hace_realize(DeviceState *dev, Error **errp) { AspeedHACEState *s = ASPEED_HACE(dev); SysBusDevice *sbd = SYS_BUS_DEVICE(dev); + AspeedHACEClass *ahc = ASPEED_HACE_GET_CLASS(s); sysbus_init_irq(sbd, &s->irq); + s->regs = g_new(uint32_t, ahc->nr_regs); memory_region_init_io(&s->iomem, OBJECT(s), &aspeed_hace_ops, s, - TYPE_ASPEED_HACE, 0x1000); + TYPE_ASPEED_HACE, ahc->nr_regs << 2); if (!s->dram_mr) { error_setg(errp, TYPE_ASPEED_HACE ": 'dram' link not set"); @@ -469,21 +616,28 @@ static const Property aspeed_hace_properties[] = { static const VMStateDescription vmstate_aspeed_hace = { .name = TYPE_ASPEED_HACE, - .version_id = 1, - .minimum_version_id = 1, + .version_id = 2, + .minimum_version_id = 2, .fields = (const VMStateField[]) { - VMSTATE_UINT32_ARRAY(regs, AspeedHACEState, ASPEED_HACE_NR_REGS), VMSTATE_UINT32(total_req_len, AspeedHACEState), - VMSTATE_UINT32(iov_count, AspeedHACEState), VMSTATE_END_OF_LIST(), } }; +static void aspeed_hace_unrealize(DeviceState *dev) +{ + AspeedHACEState *s = ASPEED_HACE(dev); + + g_free(s->regs); + s->regs = NULL; +} + static void aspeed_hace_class_init(ObjectClass *klass, const void *data) { DeviceClass *dc = DEVICE_CLASS(klass); dc->realize = aspeed_hace_realize; + dc->unrealize = aspeed_hace_unrealize; device_class_set_legacy_reset(dc, aspeed_hace_reset); device_class_set_props(dc, aspeed_hace_properties); dc->vmsd = &vmstate_aspeed_hace; @@ -504,6 +658,7 @@ static void aspeed_ast2400_hace_class_init(ObjectClass *klass, const void *data) dc->desc = "AST2400 Hash and Crypto Engine"; + ahc->nr_regs = 0x64 >> 2; ahc->src_mask = 0x0FFFFFFF; ahc->dest_mask = 0x0FFFFFF8; ahc->key_mask = 0x0FFFFFC0; @@ -523,6 +678,7 @@ static void aspeed_ast2500_hace_class_init(ObjectClass *klass, const void *data) dc->desc = "AST2500 Hash and Crypto Engine"; + ahc->nr_regs = 0x64 >> 2; ahc->src_mask = 0x3fffffff; ahc->dest_mask = 0x3ffffff8; ahc->key_mask = 0x3FFFFFC0; @@ -542,6 +698,7 @@ static void aspeed_ast2600_hace_class_init(ObjectClass *klass, const void *data) dc->desc = "AST2600 Hash and Crypto Engine"; + ahc->nr_regs = 0x64 >> 2; ahc->src_mask = 0x7FFFFFFF; ahc->dest_mask = 0x7FFFFFF8; ahc->key_mask = 0x7FFFFFF8; @@ -561,6 +718,7 @@ static void aspeed_ast1030_hace_class_init(ObjectClass *klass, const void *data) dc->desc = "AST1030 Hash and Crypto Engine"; + ahc->nr_regs = 0x64 >> 2; ahc->src_mask = 0x7FFFFFFF; ahc->dest_mask = 0x7FFFFFF8; ahc->key_mask = 0x7FFFFFF8; @@ -580,17 +738,36 @@ static void aspeed_ast2700_hace_class_init(ObjectClass *klass, const void *data) dc->desc = "AST2700 Hash and Crypto Engine"; + ahc->nr_regs = 0x9C >> 2; ahc->src_mask = 0x7FFFFFFF; ahc->dest_mask = 0x7FFFFFF8; ahc->key_mask = 0x7FFFFFF8; ahc->hash_mask = 0x00147FFF; /* + * The AST2700 supports a maximum DRAM size of 8 GB, with a DRAM + * addressable range from 0x0_0000_0000 to 0x1_FFFF_FFFF. Since this range + * fits within 34 bits, only bits [33:0] are needed to store the DRAM + * offset. To optimize address storage, the high physical address bits + * [1:0] of the source, digest and key buffer addresses are stored as + * dram_offset bits [33:32]. + * + * This approach eliminates the need to reduce the high part of the DRAM + * physical address for DMA operations. Previously, this was calculated as + * (high physical address bits [7:0] - 4), since the DRAM start address is + * 0x4_00000000, making the high part address [7:0] - 4. + */ + ahc->src_hi_mask = 0x00000003; + ahc->dest_hi_mask = 0x00000003; + ahc->key_hi_mask = 0x00000003; + + /* * Currently, it does not support the CRYPT command. Instead, it only * sends an interrupt to notify the firmware that the crypt command * has completed. It is a temporary workaround. */ ahc->raise_crypt_interrupt_workaround = true; + ahc->has_dma64 = true; } static const TypeInfo aspeed_ast2700_hace_info = { diff --git a/hw/misc/aspeed_scu.c b/hw/misc/aspeed_scu.c index 4930e00..a0ab5ee 100644 --- a/hw/misc/aspeed_scu.c +++ b/hw/misc/aspeed_scu.c @@ -91,6 +91,7 @@ #define BMC_DEV_ID TO_REG(0x1A4) #define AST2600_PROT_KEY TO_REG(0x00) +#define AST2600_PROT_KEY2 TO_REG(0x10) #define AST2600_SILICON_REV TO_REG(0x04) #define AST2600_SILICON_REV2 TO_REG(0x14) #define AST2600_SYS_RST_CTRL TO_REG(0x40) @@ -176,6 +177,7 @@ #define AST2700_SCUIO_UARTCLK_GEN TO_REG(0x330) #define AST2700_SCUIO_HUARTCLK_GEN TO_REG(0x334) #define AST2700_SCUIO_CLK_DUTY_MEAS_RST TO_REG(0x388) +#define AST2700_SCUIO_FREQ_CNT_CTL TO_REG(0x3A0) #define SCU_IO_REGION_SIZE 0x1000 @@ -722,6 +724,8 @@ static void aspeed_ast2600_scu_write(void *opaque, hwaddr offset, int reg = TO_REG(offset); /* Truncate here so bitwise operations below behave as expected */ uint32_t data = data64; + bool prot_data_state = data == ASPEED_SCU_PROT_KEY; + bool unlocked = s->regs[AST2600_PROT_KEY] && s->regs[AST2600_PROT_KEY2]; if (reg >= ASPEED_AST2600_SCU_NR_REGS) { qemu_log_mask(LOG_GUEST_ERROR, @@ -730,15 +734,24 @@ static void aspeed_ast2600_scu_write(void *opaque, hwaddr offset, return; } - if (reg > PROT_KEY && !s->regs[PROT_KEY]) { + if ((reg != AST2600_PROT_KEY && reg != AST2600_PROT_KEY2) && !unlocked) { qemu_log_mask(LOG_GUEST_ERROR, "%s: SCU is locked!\n", __func__); + return; } trace_aspeed_scu_write(offset, size, data); switch (reg) { case AST2600_PROT_KEY: - s->regs[reg] = (data == ASPEED_SCU_PROT_KEY) ? 1 : 0; + /* + * Writing a value to SCU000 will modify both protection + * registers to each protection register individually. + */ + s->regs[AST2600_PROT_KEY] = prot_data_state; + s->regs[AST2600_PROT_KEY2] = prot_data_state; + return; + case AST2600_PROT_KEY2: + s->regs[AST2600_PROT_KEY2] = prot_data_state; return; case AST2600_HW_STRAP1: case AST2600_HW_STRAP2: @@ -1022,6 +1035,10 @@ static void aspeed_ast2700_scuio_write(void *opaque, hwaddr offset, s->regs[reg - 1] ^= data; updated = true; break; + case AST2700_SCUIO_FREQ_CNT_CTL: + s->regs[reg] = deposit32(s->regs[reg], 6, 1, !!(data & BIT(1))); + updated = true; + break; default: qemu_log_mask(LOG_GUEST_ERROR, "%s: Unhandled write at offset 0x%" HWADDR_PRIx "\n", @@ -1066,6 +1083,7 @@ static const uint32_t ast2700_a0_resets_io[ASPEED_AST2700_SCU_NR_REGS] = { [AST2700_SCUIO_UARTCLK_GEN] = 0x00014506, [AST2700_SCUIO_HUARTCLK_GEN] = 0x000145c0, [AST2700_SCUIO_CLK_DUTY_MEAS_RST] = 0x0c9100d2, + [AST2700_SCUIO_FREQ_CNT_CTL] = 0x00000080, }; static void aspeed_2700_scuio_class_init(ObjectClass *klass, const void *data) diff --git a/hw/misc/aspeed_sdmc.c b/hw/misc/aspeed_sdmc.c index f04d993..dff7cc3 100644 --- a/hw/misc/aspeed_sdmc.c +++ b/hw/misc/aspeed_sdmc.c @@ -570,6 +570,9 @@ static void aspeed_2700_sdmc_reset(DeviceState *dev) /* Set ram size bit and defaults values */ s->regs[R_MAIN_CONF] = asc->compute_conf(s, 0); + /* Skipping dram init */ + s->regs[R_MAIN_CONTROL] = BIT(16); + if (s->unlocked) { s->regs[R_2700_PROT] = PROT_UNLOCKED; } diff --git a/hw/misc/ivshmem-flat.c b/hw/misc/ivshmem-flat.c index be28c24..fe4be6b 100644 --- a/hw/misc/ivshmem-flat.c +++ b/hw/misc/ivshmem-flat.c @@ -362,7 +362,7 @@ static bool ivshmem_flat_connect_server(DeviceState *dev, Error **errp) * * ivshmem_flat_recv_msg() calls return 'msg' and 'fd'. * - * See ./docs/specs/ivshmem-spec.txt for details on the protocol. + * See docs/specs/ivshmem-spec.rst for details on the protocol. */ /* Step 0 */ diff --git a/hw/misc/ivshmem-pci.c b/hw/misc/ivshmem-pci.c index 5a10bca..d47ae73 100644 --- a/hw/misc/ivshmem-pci.c +++ b/hw/misc/ivshmem-pci.c @@ -479,6 +479,11 @@ static void process_msg_shmem(IVShmemState *s, int fd, Error **errp) struct stat buf; size_t size; + if (fd < 0) { + error_setg(errp, "server didn't provide fd with shared memory message"); + return; + } + if (s->ivshmem_bar2) { error_setg(errp, "server sent unexpected shared memory message"); close(fd); @@ -553,7 +558,9 @@ static void process_msg(IVShmemState *s, int64_t msg, int fd, Error **errp) if (msg < -1 || msg > IVSHMEM_MAX_PEERS) { error_setg(errp, "server sent invalid message %" PRId64, msg); - close(fd); + if (fd >= 0) { + close(fd); + } return; } diff --git a/hw/misc/max78000_aes.c b/hw/misc/max78000_aes.c new file mode 100644 index 0000000..d883ddd --- /dev/null +++ b/hw/misc/max78000_aes.c @@ -0,0 +1,229 @@ +/* + * MAX78000 AES + * + * Copyright (c) 2025 Jackson Donaldson <jcksn@duck.com> + * + * SPDX-License-Identifier: GPL-2.0-or-later + */ + +#include "qemu/osdep.h" +#include "qemu/log.h" +#include "trace.h" +#include "hw/irq.h" +#include "migration/vmstate.h" +#include "hw/misc/max78000_aes.h" +#include "crypto/aes.h" + +static void max78000_aes_set_status(Max78000AesState *s) +{ + s->status = 0; + if (s->result_index >= 16) { + s->status |= OUTPUT_FULL; + } + if (s->result_index == 0) { + s->status |= OUTPUT_EMPTY; + } + if (s->data_index >= 16) { + s->status |= INPUT_FULL; + } + if (s->data_index == 0) { + s->status |= INPUT_EMPTY; + } +} + +static uint64_t max78000_aes_read(void *opaque, hwaddr addr, + unsigned int size) +{ + Max78000AesState *s = opaque; + switch (addr) { + case CTRL: + return s->ctrl; + + case STATUS: + return s->status; + + case INTFL: + return s->intfl; + + case INTEN: + return s->inten; + + case FIFO: + if (s->result_index >= 4) { + s->intfl &= ~DONE; + s->result_index -= 4; + max78000_aes_set_status(s); + return ldl_be_p(&s->result[s->result_index]); + } else{ + return 0; + } + + default: + qemu_log_mask(LOG_GUEST_ERROR, "%s: Bad offset 0x%" + HWADDR_PRIx "\n", __func__, addr); + break; + + } + return 0; +} + +static void max78000_aes_do_crypto(Max78000AesState *s) +{ + int keylen = 256; + uint8_t *keydata = s->key; + if ((s->ctrl & KEY_SIZE) == 0) { + keylen = 128; + keydata += 16; + } else if ((s->ctrl & KEY_SIZE) == 1 << 6) { + keylen = 192; + keydata += 8; + } + + /* + * The MAX78000 AES engine stores an internal key, which it uses only + * for decryption. This results in the slighly odd looking pairs of + * set_encrypt and set_decrypt calls below; s->internal_key is + * being stored for later use in both cases. + */ + AES_KEY key; + if ((s->ctrl & TYPE) == 0) { + AES_set_encrypt_key(keydata, keylen, &key); + AES_set_decrypt_key(keydata, keylen, &s->internal_key); + AES_encrypt(s->data, s->result, &key); + s->result_index = 16; + } else if ((s->ctrl & TYPE) == 1 << 8) { + AES_set_decrypt_key(keydata, keylen, &key); + AES_set_decrypt_key(keydata, keylen, &s->internal_key); + AES_decrypt(s->data, s->result, &key); + s->result_index = 16; + } else{ + AES_decrypt(s->data, s->result, &s->internal_key); + s->result_index = 16; + } + s->intfl |= DONE; +} + +static void max78000_aes_write(void *opaque, hwaddr addr, + uint64_t val64, unsigned int size) +{ + Max78000AesState *s = opaque; + uint32_t val = val64; + switch (addr) { + case CTRL: + if (val & OUTPUT_FLUSH) { + s->result_index = 0; + val &= ~OUTPUT_FLUSH; + } + if (val & INPUT_FLUSH) { + s->data_index = 0; + val &= ~INPUT_FLUSH; + } + if (val & START) { + max78000_aes_do_crypto(s); + } + + /* Hardware appears to stay enabled even if 0 written */ + s->ctrl = val | (s->ctrl & AES_EN); + break; + + case FIFO: + assert(s->data_index <= 12); + stl_be_p(&s->data[12 - s->data_index], val); + s->data_index += 4; + if (s->data_index >= 16) { + s->data_index = 0; + max78000_aes_do_crypto(s); + } + break; + + case KEY_BASE ... KEY_END - 4: + stl_be_p(&s->key[(KEY_END - KEY_BASE - 4) - (addr - KEY_BASE)], val); + break; + + default: + qemu_log_mask(LOG_GUEST_ERROR, "%s: Bad offset 0x%" + HWADDR_PRIx "\n", __func__, addr); + break; + + } + max78000_aes_set_status(s); +} + +static void max78000_aes_reset_hold(Object *obj, ResetType type) +{ + Max78000AesState *s = MAX78000_AES(obj); + s->ctrl = 0; + s->status = 0; + s->intfl = 0; + s->inten = 0; + + s->data_index = 0; + s->result_index = 0; + + memset(s->data, 0, sizeof(s->data)); + memset(s->key, 0, sizeof(s->key)); + memset(s->result, 0, sizeof(s->result)); + memset(&s->internal_key, 0, sizeof(s->internal_key)); +} + +static const MemoryRegionOps max78000_aes_ops = { + .read = max78000_aes_read, + .write = max78000_aes_write, + .endianness = DEVICE_LITTLE_ENDIAN, + .valid.min_access_size = 4, + .valid.max_access_size = 4, +}; + +static const VMStateDescription vmstate_max78000_aes = { + .name = TYPE_MAX78000_AES, + .version_id = 1, + .minimum_version_id = 1, + .fields = (const VMStateField[]) { + VMSTATE_UINT32(ctrl, Max78000AesState), + VMSTATE_UINT32(status, Max78000AesState), + VMSTATE_UINT32(intfl, Max78000AesState), + VMSTATE_UINT32(inten, Max78000AesState), + VMSTATE_UINT8_ARRAY(data, Max78000AesState, 16), + VMSTATE_UINT8_ARRAY(key, Max78000AesState, 32), + VMSTATE_UINT8_ARRAY(result, Max78000AesState, 16), + VMSTATE_UINT32_ARRAY(internal_key.rd_key, Max78000AesState, 60), + VMSTATE_INT32(internal_key.rounds, Max78000AesState), + VMSTATE_END_OF_LIST() + } +}; + +static void max78000_aes_init(Object *obj) +{ + Max78000AesState *s = MAX78000_AES(obj); + sysbus_init_irq(SYS_BUS_DEVICE(obj), &s->irq); + + memory_region_init_io(&s->mmio, obj, &max78000_aes_ops, s, + TYPE_MAX78000_AES, 0xc00); + sysbus_init_mmio(SYS_BUS_DEVICE(obj), &s->mmio); + +} + +static void max78000_aes_class_init(ObjectClass *klass, const void *data) +{ + ResettableClass *rc = RESETTABLE_CLASS(klass); + DeviceClass *dc = DEVICE_CLASS(klass); + + rc->phases.hold = max78000_aes_reset_hold; + dc->vmsd = &vmstate_max78000_aes; + +} + +static const TypeInfo max78000_aes_info = { + .name = TYPE_MAX78000_AES, + .parent = TYPE_SYS_BUS_DEVICE, + .instance_size = sizeof(Max78000AesState), + .instance_init = max78000_aes_init, + .class_init = max78000_aes_class_init, +}; + +static void max78000_aes_register_types(void) +{ + type_register_static(&max78000_aes_info); +} + +type_init(max78000_aes_register_types) diff --git a/hw/misc/max78000_gcr.c b/hw/misc/max78000_gcr.c new file mode 100644 index 0000000..fbbc92c --- /dev/null +++ b/hw/misc/max78000_gcr.c @@ -0,0 +1,351 @@ +/* + * MAX78000 Global Control Registers + * + * Copyright (c) 2025 Jackson Donaldson <jcksn@duck.com> + * + * SPDX-License-Identifier: GPL-2.0-or-later + */ + +#include "qemu/osdep.h" +#include "qemu/log.h" +#include "trace.h" +#include "hw/irq.h" +#include "system/runstate.h" +#include "migration/vmstate.h" +#include "hw/qdev-properties.h" +#include "hw/char/max78000_uart.h" +#include "hw/misc/max78000_trng.h" +#include "hw/misc/max78000_aes.h" +#include "hw/misc/max78000_gcr.h" + + +static void max78000_gcr_reset_hold(Object *obj, ResetType type) +{ + DeviceState *dev = DEVICE(obj); + Max78000GcrState *s = MAX78000_GCR(dev); + s->sysctrl = 0x21002; + s->rst0 = 0; + /* All clocks are always ready */ + s->clkctrl = 0x3e140008; + s->pm = 0x3f000; + s->pclkdiv = 0; + s->pclkdis0 = 0xffffffff; + s->memctrl = 0x5; + s->memz = 0; + s->sysst = 0; + s->rst1 = 0; + s->pckdis1 = 0xffffffff; + s->eventen = 0; + s->revision = 0xa1; + s->sysie = 0; + s->eccerr = 0; + s->ecced = 0; + s->eccie = 0; + s->eccaddr = 0; +} + +static uint64_t max78000_gcr_read(void *opaque, hwaddr addr, + unsigned int size) +{ + Max78000GcrState *s = opaque; + + switch (addr) { + case SYSCTRL: + return s->sysctrl; + + case RST0: + return s->rst0; + + case CLKCTRL: + return s->clkctrl; + + case PM: + return s->pm; + + case PCLKDIV: + return s->pclkdiv; + + case PCLKDIS0: + return s->pclkdis0; + + case MEMCTRL: + return s->memctrl; + + case MEMZ: + return s->memz; + + case SYSST: + return s->sysst; + + case RST1: + return s->rst1; + + case PCKDIS1: + return s->pckdis1; + + case EVENTEN: + return s->eventen; + + case REVISION: + return s->revision; + + case SYSIE: + return s->sysie; + + case ECCERR: + return s->eccerr; + + case ECCED: + return s->ecced; + + case ECCIE: + return s->eccie; + + case ECCADDR: + return s->eccaddr; + + default: + qemu_log_mask(LOG_GUEST_ERROR, "%s: Bad offset 0x%" + HWADDR_PRIx "\n", __func__, addr); + return 0; + + } +} + +static void max78000_gcr_write(void *opaque, hwaddr addr, + uint64_t val64, unsigned int size) +{ + Max78000GcrState *s = opaque; + uint32_t val = val64; + uint8_t zero[0xc000] = {0}; + switch (addr) { + case SYSCTRL: + /* Checksum calculations always pass immediately */ + s->sysctrl = (val & 0x30000) | 0x1002; + break; + + case RST0: + if (val & SYSTEM_RESET) { + qemu_system_reset_request(SHUTDOWN_CAUSE_GUEST_RESET); + } + if (val & PERIPHERAL_RESET) { + /* + * Peripheral reset resets all peripherals. The CPU + * retains its state. The GPIO, watchdog timers, AoD, + * RAM retention, and general control registers (GCR), + * including the clock configuration, are unaffected. + */ + val = UART2_RESET | UART1_RESET | UART0_RESET | + ADC_RESET | CNN_RESET | TRNG_RESET | + RTC_RESET | I2C0_RESET | SPI1_RESET | + TMR3_RESET | TMR2_RESET | TMR1_RESET | + TMR0_RESET | WDT0_RESET | DMA_RESET; + } + if (val & SOFT_RESET) { + /* Soft reset also resets GPIO */ + val = UART2_RESET | UART1_RESET | UART0_RESET | + ADC_RESET | CNN_RESET | TRNG_RESET | + RTC_RESET | I2C0_RESET | SPI1_RESET | + TMR3_RESET | TMR2_RESET | TMR1_RESET | + TMR0_RESET | GPIO1_RESET | GPIO0_RESET | + DMA_RESET; + } + if (val & UART2_RESET) { + device_cold_reset(s->uart2); + } + if (val & UART1_RESET) { + device_cold_reset(s->uart1); + } + if (val & UART0_RESET) { + device_cold_reset(s->uart0); + } + if (val & TRNG_RESET) { + device_cold_reset(s->trng); + } + if (val & AES_RESET) { + device_cold_reset(s->aes); + } + /* TODO: As other devices are implemented, add them here */ + break; + + case CLKCTRL: + s->clkctrl = val | SYSCLK_RDY; + break; + + case PM: + s->pm = val; + break; + + case PCLKDIV: + s->pclkdiv = val; + break; + + case PCLKDIS0: + s->pclkdis0 = val; + break; + + case MEMCTRL: + s->memctrl = val; + break; + + case MEMZ: + if (val & ram0) { + address_space_write(&s->sram_as, SYSRAM0_START, + MEMTXATTRS_UNSPECIFIED, zero, 0x8000); + } + if (val & ram1) { + address_space_write(&s->sram_as, SYSRAM1_START, + MEMTXATTRS_UNSPECIFIED, zero, 0x8000); + } + if (val & ram2) { + address_space_write(&s->sram_as, SYSRAM2_START, + MEMTXATTRS_UNSPECIFIED, zero, 0xC000); + } + if (val & ram3) { + address_space_write(&s->sram_as, SYSRAM3_START, + MEMTXATTRS_UNSPECIFIED, zero, 0x4000); + } + break; + + case SYSST: + s->sysst = val; + break; + + case RST1: + /* TODO: As other devices are implemented, add them here */ + s->rst1 = val; + break; + + case PCKDIS1: + s->pckdis1 = val; + break; + + case EVENTEN: + s->eventen = val; + break; + + case REVISION: + s->revision = val; + break; + + case SYSIE: + s->sysie = val; + break; + + case ECCERR: + s->eccerr = val; + break; + + case ECCED: + s->ecced = val; + break; + + case ECCIE: + s->eccie = val; + break; + + case ECCADDR: + s->eccaddr = val; + break; + + default: + qemu_log_mask(LOG_GUEST_ERROR, "%s: Bad offset 0x%" HWADDR_PRIx "\n", + __func__, addr); + break; + + } +} + +static const Property max78000_gcr_properties[] = { + DEFINE_PROP_LINK("sram", Max78000GcrState, sram, + TYPE_MEMORY_REGION, MemoryRegion*), + DEFINE_PROP_LINK("uart0", Max78000GcrState, uart0, + TYPE_MAX78000_UART, DeviceState*), + DEFINE_PROP_LINK("uart1", Max78000GcrState, uart1, + TYPE_MAX78000_UART, DeviceState*), + DEFINE_PROP_LINK("uart2", Max78000GcrState, uart2, + TYPE_MAX78000_UART, DeviceState*), + DEFINE_PROP_LINK("trng", Max78000GcrState, trng, + TYPE_MAX78000_TRNG, DeviceState*), + DEFINE_PROP_LINK("aes", Max78000GcrState, aes, + TYPE_MAX78000_AES, DeviceState*), +}; + +static const MemoryRegionOps max78000_gcr_ops = { + .read = max78000_gcr_read, + .write = max78000_gcr_write, + .endianness = DEVICE_LITTLE_ENDIAN, + .valid.min_access_size = 4, + .valid.max_access_size = 4, +}; + +static const VMStateDescription vmstate_max78000_gcr = { + .name = TYPE_MAX78000_GCR, + .version_id = 1, + .minimum_version_id = 1, + .fields = (const VMStateField[]) { + VMSTATE_UINT32(sysctrl, Max78000GcrState), + VMSTATE_UINT32(rst0, Max78000GcrState), + VMSTATE_UINT32(clkctrl, Max78000GcrState), + VMSTATE_UINT32(pm, Max78000GcrState), + VMSTATE_UINT32(pclkdiv, Max78000GcrState), + VMSTATE_UINT32(pclkdis0, Max78000GcrState), + VMSTATE_UINT32(memctrl, Max78000GcrState), + VMSTATE_UINT32(memz, Max78000GcrState), + VMSTATE_UINT32(sysst, Max78000GcrState), + VMSTATE_UINT32(rst1, Max78000GcrState), + VMSTATE_UINT32(pckdis1, Max78000GcrState), + VMSTATE_UINT32(eventen, Max78000GcrState), + VMSTATE_UINT32(revision, Max78000GcrState), + VMSTATE_UINT32(sysie, Max78000GcrState), + VMSTATE_UINT32(eccerr, Max78000GcrState), + VMSTATE_UINT32(ecced, Max78000GcrState), + VMSTATE_UINT32(eccie, Max78000GcrState), + VMSTATE_UINT32(eccaddr, Max78000GcrState), + VMSTATE_END_OF_LIST() + } +}; + +static void max78000_gcr_init(Object *obj) +{ + Max78000GcrState *s = MAX78000_GCR(obj); + + memory_region_init_io(&s->mmio, obj, &max78000_gcr_ops, s, + TYPE_MAX78000_GCR, 0x400); + sysbus_init_mmio(SYS_BUS_DEVICE(obj), &s->mmio); + +} + +static void max78000_gcr_realize(DeviceState *dev, Error **errp) +{ + Max78000GcrState *s = MAX78000_GCR(dev); + + address_space_init(&s->sram_as, s->sram, "sram"); +} + +static void max78000_gcr_class_init(ObjectClass *klass, const void *data) +{ + DeviceClass *dc = DEVICE_CLASS(klass); + ResettableClass *rc = RESETTABLE_CLASS(klass); + + device_class_set_props(dc, max78000_gcr_properties); + + dc->realize = max78000_gcr_realize; + dc->vmsd = &vmstate_max78000_gcr; + rc->phases.hold = max78000_gcr_reset_hold; +} + +static const TypeInfo max78000_gcr_info = { + .name = TYPE_MAX78000_GCR, + .parent = TYPE_SYS_BUS_DEVICE, + .instance_size = sizeof(Max78000GcrState), + .instance_init = max78000_gcr_init, + .class_init = max78000_gcr_class_init, +}; + +static void max78000_gcr_register_types(void) +{ + type_register_static(&max78000_gcr_info); +} + +type_init(max78000_gcr_register_types) diff --git a/hw/misc/max78000_icc.c b/hw/misc/max78000_icc.c new file mode 100644 index 0000000..6f7d2b2 --- /dev/null +++ b/hw/misc/max78000_icc.c @@ -0,0 +1,120 @@ +/* + * MAX78000 Instruction Cache + * + * Copyright (c) 2025 Jackson Donaldson <jcksn@duck.com> + * + * SPDX-License-Identifier: GPL-2.0-or-later + */ + +#include "qemu/osdep.h" +#include "qemu/log.h" +#include "trace.h" +#include "hw/irq.h" +#include "migration/vmstate.h" +#include "hw/misc/max78000_icc.h" + + +static uint64_t max78000_icc_read(void *opaque, hwaddr addr, + unsigned int size) +{ + Max78000IccState *s = opaque; + switch (addr) { + case ICC_INFO: + return s->info; + + case ICC_SZ: + return s->sz; + + case ICC_CTRL: + return s->ctrl; + + default: + qemu_log_mask(LOG_GUEST_ERROR, + "%s: Bad offset 0x%" HWADDR_PRIx "\n", + __func__, addr); + return 0; + + } +} + +static void max78000_icc_write(void *opaque, hwaddr addr, + uint64_t val64, unsigned int size) +{ + Max78000IccState *s = opaque; + + switch (addr) { + case ICC_CTRL: + s->ctrl = 0x10000 | (val64 & 1); + break; + + case ICC_INVALIDATE: + break; + + default: + qemu_log_mask(LOG_GUEST_ERROR, + "%s: Bad offset 0x%" HWADDR_PRIx "\n", + __func__, addr); + break; + } +} + +static const MemoryRegionOps max78000_icc_ops = { + .read = max78000_icc_read, + .write = max78000_icc_write, + .endianness = DEVICE_LITTLE_ENDIAN, + .valid.min_access_size = 4, + .valid.max_access_size = 4, +}; + +static const VMStateDescription max78000_icc_vmstate = { + .name = TYPE_MAX78000_ICC, + .version_id = 1, + .minimum_version_id = 1, + .fields = (const VMStateField[]) { + VMSTATE_UINT32(info, Max78000IccState), + VMSTATE_UINT32(sz, Max78000IccState), + VMSTATE_UINT32(ctrl, Max78000IccState), + VMSTATE_END_OF_LIST() + } +}; + +static void max78000_icc_reset_hold(Object *obj, ResetType type) +{ + Max78000IccState *s = MAX78000_ICC(obj); + s->info = 0; + s->sz = 0x10000010; + s->ctrl = 0x10000; +} + +static void max78000_icc_init(Object *obj) +{ + Max78000IccState *s = MAX78000_ICC(obj); + + memory_region_init_io(&s->mmio, obj, &max78000_icc_ops, s, + TYPE_MAX78000_ICC, 0x800); + sysbus_init_mmio(SYS_BUS_DEVICE(obj), &s->mmio); +} + +static void max78000_icc_class_init(ObjectClass *klass, const void *data) +{ + DeviceClass *dc = DEVICE_CLASS(klass); + ResettableClass *rc = RESETTABLE_CLASS(klass); + + rc->phases.hold = max78000_icc_reset_hold; + dc->vmsd = &max78000_icc_vmstate; +} + +static const TypeInfo max78000_icc_info = { + .name = TYPE_MAX78000_ICC, + .parent = TYPE_SYS_BUS_DEVICE, + .instance_size = sizeof(Max78000IccState), + .instance_init = max78000_icc_init, + .class_init = max78000_icc_class_init, +}; + +static void max78000_icc_register_types(void) +{ + type_register_static(&max78000_icc_info); +} + +type_init(max78000_icc_register_types) diff --git a/hw/misc/max78000_trng.c b/hw/misc/max78000_trng.c new file mode 100644 index 0000000..ecdaef5 --- /dev/null +++ b/hw/misc/max78000_trng.c @@ -0,0 +1,139 @@ +/* + * MAX78000 True Random Number Generator + * + * Copyright (c) 2025 Jackson Donaldson <jcksn@duck.com> + * + * SPDX-License-Identifier: GPL-2.0-or-later + */ + +#include "qemu/osdep.h" +#include "qemu/log.h" +#include "trace.h" +#include "hw/irq.h" +#include "migration/vmstate.h" +#include "hw/misc/max78000_trng.h" +#include "qemu/guest-random.h" + +static uint64_t max78000_trng_read(void *opaque, hwaddr addr, + unsigned int size) +{ + uint32_t data; + + Max78000TrngState *s = opaque; + switch (addr) { + case CTRL: + return s->ctrl; + + case STATUS: + return 1; + + case DATA: + /* + * When interrupts are enabled, reading random data should cause a + * new interrupt to be generated; since there's always a random number + * available, we could qemu_set_irq(s->irq, s->ctrl & RND_IE). Because + * of how trng_write is set up, this is always a noop, so don't + */ + qemu_guest_getrandom_nofail(&data, sizeof(data)); + return data; + + default: + qemu_log_mask(LOG_GUEST_ERROR, "%s: Bad offset 0x%" + HWADDR_PRIx "\n", __func__, addr); + break; + } + return 0; +} + +static void max78000_trng_write(void *opaque, hwaddr addr, + uint64_t val64, unsigned int size) +{ + Max78000TrngState *s = opaque; + uint32_t val = val64; + switch (addr) { + case CTRL: + /* TODO: implement AES keygen */ + s->ctrl = val; + + /* + * This device models random number generation as taking 0 time. + * A new random number is always available, so the condition for the + * RND interrupt is always fulfilled; we can just set irq to 1. + */ + if (val & RND_IE) { + qemu_set_irq(s->irq, 1); + } else{ + qemu_set_irq(s->irq, 0); + } + break; + + default: + qemu_log_mask(LOG_GUEST_ERROR, "%s: Bad offset 0x%" + HWADDR_PRIx "\n", __func__, addr); + break; + } +} + +static void max78000_trng_reset_hold(Object *obj, ResetType type) +{ + Max78000TrngState *s = MAX78000_TRNG(obj); + s->ctrl = 0; + s->status = 0; + s->data = 0; +} + +static const MemoryRegionOps max78000_trng_ops = { + .read = max78000_trng_read, + .write = max78000_trng_write, + .endianness = DEVICE_LITTLE_ENDIAN, + .valid.min_access_size = 4, + .valid.max_access_size = 4, +}; + +static const VMStateDescription max78000_trng_vmstate = { + .name = TYPE_MAX78000_TRNG, + .version_id = 1, + .minimum_version_id = 1, + .fields = (const VMStateField[]) { + VMSTATE_UINT32(ctrl, Max78000TrngState), + VMSTATE_UINT32(status, Max78000TrngState), + VMSTATE_UINT32(data, Max78000TrngState), + VMSTATE_END_OF_LIST() + } +}; + +static void max78000_trng_init(Object *obj) +{ + Max78000TrngState *s = MAX78000_TRNG(obj); + sysbus_init_irq(SYS_BUS_DEVICE(obj), &s->irq); + + memory_region_init_io(&s->mmio, obj, &max78000_trng_ops, s, + TYPE_MAX78000_TRNG, 0x1000); + sysbus_init_mmio(SYS_BUS_DEVICE(obj), &s->mmio); + +} + +static void max78000_trng_class_init(ObjectClass *klass, const void *data) +{ + ResettableClass *rc = RESETTABLE_CLASS(klass); + DeviceClass *dc = DEVICE_CLASS(klass); + + rc->phases.hold = max78000_trng_reset_hold; + dc->vmsd = &max78000_trng_vmstate; + +} + +static const TypeInfo max78000_trng_info = { + .name = TYPE_MAX78000_TRNG, + .parent = TYPE_SYS_BUS_DEVICE, + .instance_size = sizeof(Max78000TrngState), + .instance_init = max78000_trng_init, + .class_init = max78000_trng_class_init, +}; + +static void max78000_trng_register_types(void) +{ + type_register_static(&max78000_trng_info); +} + +type_init(max78000_trng_register_types) diff --git a/hw/misc/mchp_pfsoc_sysreg.c b/hw/misc/mchp_pfsoc_sysreg.c index bfa78d3..f47c835 100644 --- a/hw/misc/mchp_pfsoc_sysreg.c +++ b/hw/misc/mchp_pfsoc_sysreg.c @@ -27,7 +27,9 @@ #include "hw/irq.h" #include "hw/sysbus.h" #include "hw/misc/mchp_pfsoc_sysreg.h" +#include "system/runstate.h" +#define MSS_RESET_CR 0x18 #define ENVM_CR 0xb8 #define MESSAGE_INT 0x118c @@ -56,6 +58,11 @@ static void mchp_pfsoc_sysreg_write(void *opaque, hwaddr offset, { MchpPfSoCSysregState *s = opaque; switch (offset) { + case MSS_RESET_CR: + if (value == 0xdead) { + qemu_system_reset_request(SHUTDOWN_CAUSE_GUEST_RESET); + } + break; case MESSAGE_INT: qemu_irq_lower(s->irq); break; diff --git a/hw/misc/meson.build b/hw/misc/meson.build index 6d47de4..b1d8d8e 100644 --- a/hw/misc/meson.build +++ b/hw/misc/meson.build @@ -70,6 +70,10 @@ system_ss.add(when: 'CONFIG_IMX', if_true: files( 'imx_ccm.c', 'imx_rngc.c', )) +system_ss.add(when: 'CONFIG_MAX78000_AES', if_true: files('max78000_aes.c')) +system_ss.add(when: 'CONFIG_MAX78000_GCR', if_true: files('max78000_gcr.c')) +system_ss.add(when: 'CONFIG_MAX78000_ICC', if_true: files('max78000_icc.c')) +system_ss.add(when: 'CONFIG_MAX78000_TRNG', if_true: files('max78000_trng.c')) system_ss.add(when: 'CONFIG_NPCM7XX', if_true: files( 'npcm_clk.c', 'npcm_gcr.c', diff --git a/hw/misc/omap_clk.c b/hw/misc/omap_clk.c index 0157c9b..da95c4a 100644 --- a/hw/misc/omap_clk.c +++ b/hw/misc/omap_clk.c @@ -30,170 +30,170 @@ struct clk { struct clk *parent; struct clk *child1; struct clk *sibling; -#define ALWAYS_ENABLED (1 << 0) -#define CLOCK_IN_OMAP310 (1 << 10) -#define CLOCK_IN_OMAP730 (1 << 11) -#define CLOCK_IN_OMAP1510 (1 << 12) -#define CLOCK_IN_OMAP16XX (1 << 13) +#define ALWAYS_ENABLED (1 << 0) +#define CLOCK_IN_OMAP310 (1 << 10) +#define CLOCK_IN_OMAP730 (1 << 11) +#define CLOCK_IN_OMAP1510 (1 << 12) +#define CLOCK_IN_OMAP16XX (1 << 13) uint32_t flags; int id; - int running; /* Is currently ticking */ - int enabled; /* Is enabled, regardless of its input clk */ - unsigned long rate; /* Current rate (if .running) */ - unsigned int divisor; /* Rate relative to input (if .enabled) */ - unsigned int multiplier; /* Rate relative to input (if .enabled) */ - qemu_irq users[16]; /* Who to notify on change */ - int usecount; /* Automatically idle when unused */ + int running; /* Is currently ticking */ + int enabled; /* Is enabled, regardless of its input clk */ + unsigned long rate; /* Current rate (if .running) */ + unsigned int divisor; /* Rate relative to input (if .enabled) */ + unsigned int multiplier; /* Rate relative to input (if .enabled) */ + qemu_irq users[16]; /* Who to notify on change */ + int usecount; /* Automatically idle when unused */ }; static struct clk xtal_osc12m = { - .name = "xtal_osc_12m", - .rate = 12000000, - .flags = CLOCK_IN_OMAP1510 | CLOCK_IN_OMAP16XX | CLOCK_IN_OMAP310, + .name = "xtal_osc_12m", + .rate = 12000000, + .flags = CLOCK_IN_OMAP1510 | CLOCK_IN_OMAP16XX | CLOCK_IN_OMAP310, }; static struct clk xtal_osc32k = { - .name = "xtal_osc_32k", - .rate = 32768, - .flags = CLOCK_IN_OMAP1510 | CLOCK_IN_OMAP16XX | CLOCK_IN_OMAP310, + .name = "xtal_osc_32k", + .rate = 32768, + .flags = CLOCK_IN_OMAP1510 | CLOCK_IN_OMAP16XX | CLOCK_IN_OMAP310, }; static struct clk ck_ref = { - .name = "ck_ref", - .alias = "clkin", - .parent = &xtal_osc12m, - .flags = CLOCK_IN_OMAP1510 | CLOCK_IN_OMAP16XX | CLOCK_IN_OMAP310 | + .name = "ck_ref", + .alias = "clkin", + .parent = &xtal_osc12m, + .flags = CLOCK_IN_OMAP1510 | CLOCK_IN_OMAP16XX | CLOCK_IN_OMAP310 | ALWAYS_ENABLED, }; /* If a dpll is disabled it becomes a bypass, child clocks don't stop */ static struct clk dpll1 = { - .name = "dpll1", - .parent = &ck_ref, - .flags = CLOCK_IN_OMAP1510 | CLOCK_IN_OMAP16XX | CLOCK_IN_OMAP310 | + .name = "dpll1", + .parent = &ck_ref, + .flags = CLOCK_IN_OMAP1510 | CLOCK_IN_OMAP16XX | CLOCK_IN_OMAP310 | ALWAYS_ENABLED, }; static struct clk dpll2 = { - .name = "dpll2", - .parent = &ck_ref, - .flags = CLOCK_IN_OMAP310 | ALWAYS_ENABLED, + .name = "dpll2", + .parent = &ck_ref, + .flags = CLOCK_IN_OMAP310 | ALWAYS_ENABLED, }; static struct clk dpll3 = { - .name = "dpll3", - .parent = &ck_ref, - .flags = CLOCK_IN_OMAP310 | ALWAYS_ENABLED, + .name = "dpll3", + .parent = &ck_ref, + .flags = CLOCK_IN_OMAP310 | ALWAYS_ENABLED, }; static struct clk dpll4 = { - .name = "dpll4", - .parent = &ck_ref, - .multiplier = 4, - .flags = CLOCK_IN_OMAP1510 | CLOCK_IN_OMAP16XX | CLOCK_IN_OMAP310, + .name = "dpll4", + .parent = &ck_ref, + .multiplier = 4, + .flags = CLOCK_IN_OMAP1510 | CLOCK_IN_OMAP16XX | CLOCK_IN_OMAP310, }; static struct clk apll = { - .name = "apll", - .parent = &ck_ref, - .multiplier = 48, - .divisor = 12, - .flags = CLOCK_IN_OMAP1510 | CLOCK_IN_OMAP16XX | CLOCK_IN_OMAP310, + .name = "apll", + .parent = &ck_ref, + .multiplier = 48, + .divisor = 12, + .flags = CLOCK_IN_OMAP1510 | CLOCK_IN_OMAP16XX | CLOCK_IN_OMAP310, }; static struct clk ck_48m = { - .name = "ck_48m", - .parent = &dpll4, /* either dpll4 or apll */ - .flags = CLOCK_IN_OMAP1510 | CLOCK_IN_OMAP16XX | CLOCK_IN_OMAP310, + .name = "ck_48m", + .parent = &dpll4, /* either dpll4 or apll */ + .flags = CLOCK_IN_OMAP1510 | CLOCK_IN_OMAP16XX | CLOCK_IN_OMAP310, }; static struct clk ck_dpll1out = { - .name = "ck_dpll1out", - .parent = &dpll1, - .flags = CLOCK_IN_OMAP16XX, + .name = "ck_dpll1out", + .parent = &dpll1, + .flags = CLOCK_IN_OMAP16XX, }; static struct clk sossi_ck = { - .name = "ck_sossi", - .parent = &ck_dpll1out, - .flags = CLOCK_IN_OMAP16XX, + .name = "ck_sossi", + .parent = &ck_dpll1out, + .flags = CLOCK_IN_OMAP16XX, }; static struct clk clkm1 = { - .name = "clkm1", - .alias = "ck_gen1", - .parent = &dpll1, - .flags = CLOCK_IN_OMAP1510 | CLOCK_IN_OMAP16XX | CLOCK_IN_OMAP310 | + .name = "clkm1", + .alias = "ck_gen1", + .parent = &dpll1, + .flags = CLOCK_IN_OMAP1510 | CLOCK_IN_OMAP16XX | CLOCK_IN_OMAP310 | ALWAYS_ENABLED, }; static struct clk clkm2 = { - .name = "clkm2", - .alias = "ck_gen2", - .parent = &dpll1, - .flags = CLOCK_IN_OMAP1510 | CLOCK_IN_OMAP16XX | CLOCK_IN_OMAP310 | + .name = "clkm2", + .alias = "ck_gen2", + .parent = &dpll1, + .flags = CLOCK_IN_OMAP1510 | CLOCK_IN_OMAP16XX | CLOCK_IN_OMAP310 | ALWAYS_ENABLED, }; static struct clk clkm3 = { - .name = "clkm3", - .alias = "ck_gen3", - .parent = &dpll1, /* either dpll1 or ck_ref */ - .flags = CLOCK_IN_OMAP1510 | CLOCK_IN_OMAP16XX | CLOCK_IN_OMAP310 | + .name = "clkm3", + .alias = "ck_gen3", + .parent = &dpll1, /* either dpll1 or ck_ref */ + .flags = CLOCK_IN_OMAP1510 | CLOCK_IN_OMAP16XX | CLOCK_IN_OMAP310 | ALWAYS_ENABLED, }; static struct clk arm_ck = { - .name = "arm_ck", - .alias = "mpu_ck", - .parent = &clkm1, - .flags = CLOCK_IN_OMAP1510 | CLOCK_IN_OMAP16XX | CLOCK_IN_OMAP310 | + .name = "arm_ck", + .alias = "mpu_ck", + .parent = &clkm1, + .flags = CLOCK_IN_OMAP1510 | CLOCK_IN_OMAP16XX | CLOCK_IN_OMAP310 | ALWAYS_ENABLED, }; static struct clk armper_ck = { - .name = "armper_ck", - .alias = "mpuper_ck", - .parent = &clkm1, - .flags = CLOCK_IN_OMAP1510 | CLOCK_IN_OMAP16XX | CLOCK_IN_OMAP310, + .name = "armper_ck", + .alias = "mpuper_ck", + .parent = &clkm1, + .flags = CLOCK_IN_OMAP1510 | CLOCK_IN_OMAP16XX | CLOCK_IN_OMAP310, }; static struct clk arm_gpio_ck = { - .name = "arm_gpio_ck", - .alias = "mpu_gpio_ck", - .parent = &clkm1, - .divisor = 1, - .flags = CLOCK_IN_OMAP1510 | CLOCK_IN_OMAP310, + .name = "arm_gpio_ck", + .alias = "mpu_gpio_ck", + .parent = &clkm1, + .divisor = 1, + .flags = CLOCK_IN_OMAP1510 | CLOCK_IN_OMAP310, }; static struct clk armxor_ck = { - .name = "armxor_ck", - .alias = "mpuxor_ck", - .parent = &ck_ref, - .flags = CLOCK_IN_OMAP1510 | CLOCK_IN_OMAP16XX | CLOCK_IN_OMAP310, + .name = "armxor_ck", + .alias = "mpuxor_ck", + .parent = &ck_ref, + .flags = CLOCK_IN_OMAP1510 | CLOCK_IN_OMAP16XX | CLOCK_IN_OMAP310, }; static struct clk armtim_ck = { - .name = "armtim_ck", - .alias = "mputim_ck", - .parent = &ck_ref, /* either CLKIN or DPLL1 */ - .flags = CLOCK_IN_OMAP1510 | CLOCK_IN_OMAP16XX | CLOCK_IN_OMAP310, + .name = "armtim_ck", + .alias = "mputim_ck", + .parent = &ck_ref, /* either CLKIN or DPLL1 */ + .flags = CLOCK_IN_OMAP1510 | CLOCK_IN_OMAP16XX | CLOCK_IN_OMAP310, }; static struct clk armwdt_ck = { - .name = "armwdt_ck", - .alias = "mpuwd_ck", - .parent = &clkm1, - .divisor = 14, - .flags = CLOCK_IN_OMAP1510 | CLOCK_IN_OMAP16XX | CLOCK_IN_OMAP310 | + .name = "armwdt_ck", + .alias = "mpuwd_ck", + .parent = &clkm1, + .divisor = 14, + .flags = CLOCK_IN_OMAP1510 | CLOCK_IN_OMAP16XX | CLOCK_IN_OMAP310 | ALWAYS_ENABLED, }; static struct clk arminth_ck16xx = { - .name = "arminth_ck", - .parent = &arm_ck, - .flags = CLOCK_IN_OMAP16XX | ALWAYS_ENABLED, + .name = "arminth_ck", + .parent = &arm_ck, + .flags = CLOCK_IN_OMAP16XX | ALWAYS_ENABLED, /* Note: On 16xx the frequency can be divided by 2 by programming * ARM_CKCTL:ARM_INTHCK_SEL(14) to 1 * @@ -202,48 +202,48 @@ static struct clk arminth_ck16xx = { }; static struct clk dsp_ck = { - .name = "dsp_ck", - .parent = &clkm2, - .flags = CLOCK_IN_OMAP310 | CLOCK_IN_OMAP1510 | CLOCK_IN_OMAP16XX, + .name = "dsp_ck", + .parent = &clkm2, + .flags = CLOCK_IN_OMAP310 | CLOCK_IN_OMAP1510 | CLOCK_IN_OMAP16XX, }; static struct clk dspmmu_ck = { - .name = "dspmmu_ck", - .parent = &clkm2, - .flags = CLOCK_IN_OMAP310 | CLOCK_IN_OMAP1510 | CLOCK_IN_OMAP16XX | + .name = "dspmmu_ck", + .parent = &clkm2, + .flags = CLOCK_IN_OMAP310 | CLOCK_IN_OMAP1510 | CLOCK_IN_OMAP16XX | ALWAYS_ENABLED, }; static struct clk dspper_ck = { - .name = "dspper_ck", - .parent = &clkm2, - .flags = CLOCK_IN_OMAP310 | CLOCK_IN_OMAP1510 | CLOCK_IN_OMAP16XX, + .name = "dspper_ck", + .parent = &clkm2, + .flags = CLOCK_IN_OMAP310 | CLOCK_IN_OMAP1510 | CLOCK_IN_OMAP16XX, }; static struct clk dspxor_ck = { - .name = "dspxor_ck", - .parent = &ck_ref, - .flags = CLOCK_IN_OMAP310 | CLOCK_IN_OMAP1510 | CLOCK_IN_OMAP16XX, + .name = "dspxor_ck", + .parent = &ck_ref, + .flags = CLOCK_IN_OMAP310 | CLOCK_IN_OMAP1510 | CLOCK_IN_OMAP16XX, }; static struct clk dsptim_ck = { - .name = "dsptim_ck", - .parent = &ck_ref, - .flags = CLOCK_IN_OMAP310 | CLOCK_IN_OMAP1510 | CLOCK_IN_OMAP16XX, + .name = "dsptim_ck", + .parent = &ck_ref, + .flags = CLOCK_IN_OMAP310 | CLOCK_IN_OMAP1510 | CLOCK_IN_OMAP16XX, }; static struct clk tc_ck = { - .name = "tc_ck", - .parent = &clkm3, - .flags = CLOCK_IN_OMAP1510 | CLOCK_IN_OMAP16XX | + .name = "tc_ck", + .parent = &clkm3, + .flags = CLOCK_IN_OMAP1510 | CLOCK_IN_OMAP16XX | CLOCK_IN_OMAP730 | CLOCK_IN_OMAP310 | ALWAYS_ENABLED, }; static struct clk arminth_ck15xx = { - .name = "arminth_ck", - .parent = &tc_ck, - .flags = CLOCK_IN_OMAP1510 | CLOCK_IN_OMAP310 | ALWAYS_ENABLED, + .name = "arminth_ck", + .parent = &tc_ck, + .flags = CLOCK_IN_OMAP1510 | CLOCK_IN_OMAP310 | ALWAYS_ENABLED, /* Note: On 1510 the frequency follows TC_CK * * 16xx version is in MPU clocks. @@ -252,259 +252,259 @@ static struct clk arminth_ck15xx = { static struct clk tipb_ck = { /* No-idle controlled by "tc_ck" */ - .name = "tipb_ck", - .parent = &tc_ck, - .flags = CLOCK_IN_OMAP1510 | CLOCK_IN_OMAP310 | ALWAYS_ENABLED, + .name = "tipb_ck", + .parent = &tc_ck, + .flags = CLOCK_IN_OMAP1510 | CLOCK_IN_OMAP310 | ALWAYS_ENABLED, }; static struct clk l3_ocpi_ck = { /* No-idle controlled by "tc_ck" */ - .name = "l3_ocpi_ck", - .parent = &tc_ck, - .flags = CLOCK_IN_OMAP16XX, + .name = "l3_ocpi_ck", + .parent = &tc_ck, + .flags = CLOCK_IN_OMAP16XX, }; static struct clk tc1_ck = { - .name = "tc1_ck", - .parent = &tc_ck, - .flags = CLOCK_IN_OMAP16XX, + .name = "tc1_ck", + .parent = &tc_ck, + .flags = CLOCK_IN_OMAP16XX, }; static struct clk tc2_ck = { - .name = "tc2_ck", - .parent = &tc_ck, - .flags = CLOCK_IN_OMAP16XX, + .name = "tc2_ck", + .parent = &tc_ck, + .flags = CLOCK_IN_OMAP16XX, }; static struct clk dma_ck = { /* No-idle controlled by "tc_ck" */ - .name = "dma_ck", - .parent = &tc_ck, - .flags = CLOCK_IN_OMAP1510 | CLOCK_IN_OMAP16XX | CLOCK_IN_OMAP310 | + .name = "dma_ck", + .parent = &tc_ck, + .flags = CLOCK_IN_OMAP1510 | CLOCK_IN_OMAP16XX | CLOCK_IN_OMAP310 | ALWAYS_ENABLED, }; static struct clk dma_lcdfree_ck = { - .name = "dma_lcdfree_ck", - .parent = &tc_ck, - .flags = CLOCK_IN_OMAP16XX | ALWAYS_ENABLED, + .name = "dma_lcdfree_ck", + .parent = &tc_ck, + .flags = CLOCK_IN_OMAP16XX | ALWAYS_ENABLED, }; static struct clk api_ck = { - .name = "api_ck", - .alias = "mpui_ck", - .parent = &tc_ck, - .flags = CLOCK_IN_OMAP1510 | CLOCK_IN_OMAP16XX | CLOCK_IN_OMAP310, + .name = "api_ck", + .alias = "mpui_ck", + .parent = &tc_ck, + .flags = CLOCK_IN_OMAP1510 | CLOCK_IN_OMAP16XX | CLOCK_IN_OMAP310, }; static struct clk lb_ck = { - .name = "lb_ck", - .parent = &tc_ck, - .flags = CLOCK_IN_OMAP1510 | CLOCK_IN_OMAP310, + .name = "lb_ck", + .parent = &tc_ck, + .flags = CLOCK_IN_OMAP1510 | CLOCK_IN_OMAP310, }; static struct clk lbfree_ck = { - .name = "lbfree_ck", - .parent = &tc_ck, - .flags = CLOCK_IN_OMAP1510 | CLOCK_IN_OMAP310, + .name = "lbfree_ck", + .parent = &tc_ck, + .flags = CLOCK_IN_OMAP1510 | CLOCK_IN_OMAP310, }; static struct clk hsab_ck = { - .name = "hsab_ck", - .parent = &tc_ck, - .flags = CLOCK_IN_OMAP1510 | CLOCK_IN_OMAP310, + .name = "hsab_ck", + .parent = &tc_ck, + .flags = CLOCK_IN_OMAP1510 | CLOCK_IN_OMAP310, }; static struct clk rhea1_ck = { - .name = "rhea1_ck", - .parent = &tc_ck, - .flags = CLOCK_IN_OMAP16XX | ALWAYS_ENABLED, + .name = "rhea1_ck", + .parent = &tc_ck, + .flags = CLOCK_IN_OMAP16XX | ALWAYS_ENABLED, }; static struct clk rhea2_ck = { - .name = "rhea2_ck", - .parent = &tc_ck, - .flags = CLOCK_IN_OMAP16XX | ALWAYS_ENABLED, + .name = "rhea2_ck", + .parent = &tc_ck, + .flags = CLOCK_IN_OMAP16XX | ALWAYS_ENABLED, }; static struct clk lcd_ck_16xx = { - .name = "lcd_ck", - .parent = &clkm3, - .flags = CLOCK_IN_OMAP16XX | CLOCK_IN_OMAP730, + .name = "lcd_ck", + .parent = &clkm3, + .flags = CLOCK_IN_OMAP16XX | CLOCK_IN_OMAP730, }; static struct clk lcd_ck_1510 = { - .name = "lcd_ck", - .parent = &clkm3, - .flags = CLOCK_IN_OMAP1510 | CLOCK_IN_OMAP310, + .name = "lcd_ck", + .parent = &clkm3, + .flags = CLOCK_IN_OMAP1510 | CLOCK_IN_OMAP310, }; static struct clk uart1_1510 = { - .name = "uart1_ck", + .name = "uart1_ck", /* Direct from ULPD, no real parent */ - .parent = &armper_ck, /* either armper_ck or dpll4 */ - .rate = 12000000, - .flags = CLOCK_IN_OMAP1510 | CLOCK_IN_OMAP310 | ALWAYS_ENABLED, + .parent = &armper_ck, /* either armper_ck or dpll4 */ + .rate = 12000000, + .flags = CLOCK_IN_OMAP1510 | CLOCK_IN_OMAP310 | ALWAYS_ENABLED, }; static struct clk uart1_16xx = { - .name = "uart1_ck", + .name = "uart1_ck", /* Direct from ULPD, no real parent */ - .parent = &armper_ck, - .rate = 48000000, - .flags = CLOCK_IN_OMAP16XX, + .parent = &armper_ck, + .rate = 48000000, + .flags = CLOCK_IN_OMAP16XX, }; static struct clk uart2_ck = { - .name = "uart2_ck", + .name = "uart2_ck", /* Direct from ULPD, no real parent */ - .parent = &armper_ck, /* either armper_ck or dpll4 */ - .rate = 12000000, - .flags = CLOCK_IN_OMAP1510 | CLOCK_IN_OMAP16XX | CLOCK_IN_OMAP310 | + .parent = &armper_ck, /* either armper_ck or dpll4 */ + .rate = 12000000, + .flags = CLOCK_IN_OMAP1510 | CLOCK_IN_OMAP16XX | CLOCK_IN_OMAP310 | ALWAYS_ENABLED, }; static struct clk uart3_1510 = { - .name = "uart3_ck", + .name = "uart3_ck", /* Direct from ULPD, no real parent */ - .parent = &armper_ck, /* either armper_ck or dpll4 */ - .rate = 12000000, - .flags = CLOCK_IN_OMAP1510 | CLOCK_IN_OMAP310 | ALWAYS_ENABLED, + .parent = &armper_ck, /* either armper_ck or dpll4 */ + .rate = 12000000, + .flags = CLOCK_IN_OMAP1510 | CLOCK_IN_OMAP310 | ALWAYS_ENABLED, }; static struct clk uart3_16xx = { - .name = "uart3_ck", + .name = "uart3_ck", /* Direct from ULPD, no real parent */ - .parent = &armper_ck, - .rate = 48000000, - .flags = CLOCK_IN_OMAP16XX, + .parent = &armper_ck, + .rate = 48000000, + .flags = CLOCK_IN_OMAP16XX, }; -static struct clk usb_clk0 = { /* 6 MHz output on W4_USB_CLK0 */ - .name = "usb_clk0", - .alias = "usb.clko", +static struct clk usb_clk0 = { /* 6 MHz output on W4_USB_CLK0 */ + .name = "usb_clk0", + .alias = "usb.clko", /* Direct from ULPD, no parent */ - .rate = 6000000, - .flags = CLOCK_IN_OMAP1510 | CLOCK_IN_OMAP16XX | CLOCK_IN_OMAP310, + .rate = 6000000, + .flags = CLOCK_IN_OMAP1510 | CLOCK_IN_OMAP16XX | CLOCK_IN_OMAP310, }; static struct clk usb_hhc_ck1510 = { - .name = "usb_hhc_ck", + .name = "usb_hhc_ck", /* Direct from ULPD, no parent */ - .rate = 48000000, /* Actually 2 clocks, 12MHz and 48MHz */ - .flags = CLOCK_IN_OMAP1510 | CLOCK_IN_OMAP310, + .rate = 48000000, /* Actually 2 clocks, 12MHz and 48MHz */ + .flags = CLOCK_IN_OMAP1510 | CLOCK_IN_OMAP310, }; static struct clk usb_hhc_ck16xx = { - .name = "usb_hhc_ck", + .name = "usb_hhc_ck", /* Direct from ULPD, no parent */ - .rate = 48000000, + .rate = 48000000, /* OTG_SYSCON_2.OTG_PADEN == 0 (not 1510-compatible) */ - .flags = CLOCK_IN_OMAP16XX, + .flags = CLOCK_IN_OMAP16XX, }; static struct clk usb_w2fc_mclk = { - .name = "usb_w2fc_mclk", - .alias = "usb_w2fc_ck", - .parent = &ck_48m, - .rate = 48000000, - .flags = CLOCK_IN_OMAP310 | CLOCK_IN_OMAP1510 | CLOCK_IN_OMAP16XX, + .name = "usb_w2fc_mclk", + .alias = "usb_w2fc_ck", + .parent = &ck_48m, + .rate = 48000000, + .flags = CLOCK_IN_OMAP310 | CLOCK_IN_OMAP1510 | CLOCK_IN_OMAP16XX, }; static struct clk mclk_1510 = { - .name = "mclk", + .name = "mclk", /* Direct from ULPD, no parent. May be enabled by ext hardware. */ - .rate = 12000000, - .flags = CLOCK_IN_OMAP1510, + .rate = 12000000, + .flags = CLOCK_IN_OMAP1510, }; static struct clk bclk_310 = { - .name = "bt_mclk_out", /* Alias midi_mclk_out? */ - .parent = &armper_ck, - .flags = CLOCK_IN_OMAP310, + .name = "bt_mclk_out", /* Alias midi_mclk_out? */ + .parent = &armper_ck, + .flags = CLOCK_IN_OMAP310, }; static struct clk mclk_310 = { - .name = "com_mclk_out", - .parent = &armper_ck, - .flags = CLOCK_IN_OMAP310, + .name = "com_mclk_out", + .parent = &armper_ck, + .flags = CLOCK_IN_OMAP310, }; static struct clk mclk_16xx = { - .name = "mclk", + .name = "mclk", /* Direct from ULPD, no parent. May be enabled by ext hardware. */ - .flags = CLOCK_IN_OMAP16XX, + .flags = CLOCK_IN_OMAP16XX, }; static struct clk bclk_1510 = { - .name = "bclk", + .name = "bclk", /* Direct from ULPD, no parent. May be enabled by ext hardware. */ - .rate = 12000000, - .flags = CLOCK_IN_OMAP1510, + .rate = 12000000, + .flags = CLOCK_IN_OMAP1510, }; static struct clk bclk_16xx = { - .name = "bclk", + .name = "bclk", /* Direct from ULPD, no parent. May be enabled by ext hardware. */ - .flags = CLOCK_IN_OMAP16XX, + .flags = CLOCK_IN_OMAP16XX, }; static struct clk mmc1_ck = { - .name = "mmc_ck", - .id = 1, + .name = "mmc_ck", + .id = 1, /* Functional clock is direct from ULPD, interface clock is ARMPER */ - .parent = &armper_ck, /* either armper_ck or dpll4 */ - .rate = 48000000, - .flags = CLOCK_IN_OMAP1510 | CLOCK_IN_OMAP16XX | CLOCK_IN_OMAP310, + .parent = &armper_ck, /* either armper_ck or dpll4 */ + .rate = 48000000, + .flags = CLOCK_IN_OMAP1510 | CLOCK_IN_OMAP16XX | CLOCK_IN_OMAP310, }; static struct clk mmc2_ck = { - .name = "mmc_ck", - .id = 2, + .name = "mmc_ck", + .id = 2, /* Functional clock is direct from ULPD, interface clock is ARMPER */ - .parent = &armper_ck, - .rate = 48000000, - .flags = CLOCK_IN_OMAP16XX, + .parent = &armper_ck, + .rate = 48000000, + .flags = CLOCK_IN_OMAP16XX, }; static struct clk cam_mclk = { - .name = "cam.mclk", - .flags = CLOCK_IN_OMAP310 | CLOCK_IN_OMAP1510 | CLOCK_IN_OMAP16XX, - .rate = 12000000, + .name = "cam.mclk", + .flags = CLOCK_IN_OMAP310 | CLOCK_IN_OMAP1510 | CLOCK_IN_OMAP16XX, + .rate = 12000000, }; static struct clk cam_exclk = { - .name = "cam.exclk", - .flags = CLOCK_IN_OMAP310 | CLOCK_IN_OMAP1510 | CLOCK_IN_OMAP16XX, + .name = "cam.exclk", + .flags = CLOCK_IN_OMAP310 | CLOCK_IN_OMAP1510 | CLOCK_IN_OMAP16XX, /* Either 12M from cam.mclk or 48M from dpll4 */ - .parent = &cam_mclk, + .parent = &cam_mclk, }; static struct clk cam_lclk = { - .name = "cam.lclk", - .flags = CLOCK_IN_OMAP310 | CLOCK_IN_OMAP1510 | CLOCK_IN_OMAP16XX, + .name = "cam.lclk", + .flags = CLOCK_IN_OMAP310 | CLOCK_IN_OMAP1510 | CLOCK_IN_OMAP16XX, }; static struct clk i2c_fck = { - .name = "i2c_fck", - .id = 1, - .flags = CLOCK_IN_OMAP310 | CLOCK_IN_OMAP1510 | CLOCK_IN_OMAP16XX | + .name = "i2c_fck", + .id = 1, + .flags = CLOCK_IN_OMAP310 | CLOCK_IN_OMAP1510 | CLOCK_IN_OMAP16XX | ALWAYS_ENABLED, - .parent = &armxor_ck, + .parent = &armxor_ck, }; static struct clk i2c_ick = { - .name = "i2c_ick", - .id = 1, - .flags = CLOCK_IN_OMAP16XX | ALWAYS_ENABLED, - .parent = &armper_ck, + .name = "i2c_ick", + .id = 1, + .flags = CLOCK_IN_OMAP16XX | ALWAYS_ENABLED, + .parent = &armper_ck, }; static struct clk clk32k = { - .name = "clk32-kHz", - .flags = CLOCK_IN_OMAP310 | CLOCK_IN_OMAP1510 | CLOCK_IN_OMAP16XX | + .name = "clk32-kHz", + .flags = CLOCK_IN_OMAP310 | CLOCK_IN_OMAP1510 | CLOCK_IN_OMAP16XX | ALWAYS_ENABLED, - .parent = &xtal_osc32k, + .parent = &xtal_osc32k, }; static struct clk *onchip_clks[] = { diff --git a/hw/misc/pci-testdev.c b/hw/misc/pci-testdev.c index 3f6a8bb..ba71c50 100644 --- a/hw/misc/pci-testdev.c +++ b/hw/misc/pci-testdev.c @@ -90,6 +90,7 @@ struct PCITestDevState { int current; uint64_t membar_size; + bool membar_backed; MemoryRegion membar; }; @@ -258,8 +259,14 @@ static void pci_testdev_realize(PCIDevice *pci_dev, Error **errp) pci_register_bar(pci_dev, 1, PCI_BASE_ADDRESS_SPACE_IO, &d->portio); if (d->membar_size) { - memory_region_init(&d->membar, OBJECT(d), "pci-testdev-membar", - d->membar_size); + if (d->membar_backed) + memory_region_init_ram(&d->membar, OBJECT(d), + "pci-testdev-membar-backed", + d->membar_size, NULL); + else + memory_region_init(&d->membar, OBJECT(d), + "pci-testdev-membar", + d->membar_size); pci_register_bar(pci_dev, 2, PCI_BASE_ADDRESS_SPACE_MEMORY | PCI_BASE_ADDRESS_MEM_PREFETCH | @@ -321,6 +328,7 @@ static void qdev_pci_testdev_reset(DeviceState *dev) static const Property pci_testdev_properties[] = { DEFINE_PROP_SIZE("membar", PCITestDevState, membar_size, 0), + DEFINE_PROP_BOOL("membar-backed", PCITestDevState, membar_backed, false), }; static void pci_testdev_class_init(ObjectClass *klass, const void *data) diff --git a/hw/misc/stm32_rcc.c b/hw/misc/stm32_rcc.c index 94e8dae..5815b3e 100644 --- a/hw/misc/stm32_rcc.c +++ b/hw/misc/stm32_rcc.c @@ -60,7 +60,7 @@ static void stm32_rcc_write(void *opaque, hwaddr addr, uint32_t value = val64; uint32_t prev_value, new_value, irq_offset; - trace_stm32_rcc_write(value, addr); + trace_stm32_rcc_write(addr, value); if (addr > STM32_RCC_DCKCFGR2) { qemu_log_mask(LOG_GUEST_ERROR, "%s: Bad offset 0x%"HWADDR_PRIx"\n", diff --git a/hw/misc/trace-events b/hw/misc/trace-events index 4383808..e3f64c0 100644 --- a/hw/misc/trace-events +++ b/hw/misc/trace-events @@ -302,6 +302,14 @@ aspeed_peci_read(uint64_t offset, uint64_t data) "offset 0x%" PRIx64 " data 0x%" aspeed_peci_write(uint64_t offset, uint64_t data) "offset 0x%" PRIx64 " data 0x%" PRIx64 aspeed_peci_raise_interrupt(uint32_t ctrl, uint32_t status) "ctrl 0x%" PRIx32 " status 0x%" PRIx32 +# aspeed_hace.c +aspeed_hace_read(uint64_t offset, uint64_t data) "offset 0x%" PRIx64 " data 0x%" PRIx64 +aspeed_hace_write(uint64_t offset, uint64_t data) "offset 0x%" PRIx64 " data 0x%" PRIx64 +aspeed_hace_hash_sg(int index, uint64_t list_addr, uint64_t buf_addr, uint32_t len) "%d: list_addr 0x%" PRIx64 " buf_addr 0x%" PRIx64 " len 0x%" PRIx32 +aspeed_hace_hash_addr(const char *s, uint64_t addr) "%s: 0x%" PRIx64 +aspeed_hace_hash_execute_acc_mode(bool final_request) "final request: %d" +aspeed_hace_hexdump(const char *desc, uint32_t offset, char *s) "%s: 0x%08x: %s" + # bcm2835_property.c bcm2835_mbox_property(uint32_t tag, uint32_t bufsize, size_t resplen) "mbox property tag:0x%08x in_sz:%u out_sz:%zu" diff --git a/hw/net/cadence_gem.c b/hw/net/cadence_gem.c index 50025d5..4444666 100644 --- a/hw/net/cadence_gem.c +++ b/hw/net/cadence_gem.c @@ -1756,6 +1756,7 @@ static void gem_realize(DeviceState *dev, Error **errp) sysbus_init_irq(SYS_BUS_DEVICE(dev), &s->irq[i]); } + gem_init_register_masks(s); qemu_macaddr_default_if_unset(&s->conf.macaddr); s->nic = qemu_new_nic(&net_gem_info, &s->conf, @@ -1776,7 +1777,6 @@ static void gem_init(Object *obj) DB_PRINT("\n"); - gem_init_register_masks(s); memory_region_init_io(&s->iomem, OBJECT(s), &gem_ops, s, "enet", sizeof(s->regs)); diff --git a/hw/net/can/ctucan_core.c b/hw/net/can/ctucan_core.c index 17131a4..6bd99c4 100644 --- a/hw/net/can/ctucan_core.c +++ b/hw/net/can/ctucan_core.c @@ -28,7 +28,6 @@ #include "qemu/osdep.h" #include "qemu/log.h" -#include "qemu/bswap.h" #include "qemu/bitops.h" #include "hw/irq.h" #include "migration/vmstate.h" diff --git a/hw/net/e1000.c b/hw/net/e1000.c index cba4999..a80a7b0 100644 --- a/hw/net/e1000.c +++ b/hw/net/e1000.c @@ -127,10 +127,8 @@ struct E1000State_st { QEMUTimer *flush_queue_timer; /* Compatibility flags for migration to/from qemu 1.3.0 and older */ -#define E1000_FLAG_MAC_BIT 2 #define E1000_FLAG_TSO_BIT 3 #define E1000_FLAG_VET_BIT 4 -#define E1000_FLAG_MAC (1 << E1000_FLAG_MAC_BIT) #define E1000_FLAG_TSO (1 << E1000_FLAG_TSO_BIT) #define E1000_FLAG_VET (1 << E1000_FLAG_VET_BIT) @@ -1212,52 +1210,51 @@ enum { NWRITEOPS = ARRAY_SIZE(macreg_writeops) }; enum { MAC_ACCESS_PARTIAL = 1, MAC_ACCESS_FLAG_NEEDED = 2 }; -#define markflag(x) ((E1000_FLAG_##x << 2) | MAC_ACCESS_FLAG_NEEDED) /* In the array below the meaning of the bits is: [f|f|f|f|f|f|n|p] * f - flag bits (up to 6 possible flags) * n - flag needed - * p - partially implenented */ + * p - partially implemented */ static const uint8_t mac_reg_access[0x8000] = { - [IPAV] = markflag(MAC), [WUC] = markflag(MAC), - [IP6AT] = markflag(MAC), [IP4AT] = markflag(MAC), - [FFVT] = markflag(MAC), [WUPM] = markflag(MAC), - [ECOL] = markflag(MAC), [MCC] = markflag(MAC), - [DC] = markflag(MAC), [TNCRS] = markflag(MAC), - [RLEC] = markflag(MAC), [XONRXC] = markflag(MAC), - [XOFFTXC] = markflag(MAC), [RFC] = markflag(MAC), - [TSCTFC] = markflag(MAC), [MGTPRC] = markflag(MAC), - [WUS] = markflag(MAC), [AIT] = markflag(MAC), - [FFLT] = markflag(MAC), [FFMT] = markflag(MAC), - [SCC] = markflag(MAC), [FCRUC] = markflag(MAC), - [LATECOL] = markflag(MAC), [COLC] = markflag(MAC), - [SEQEC] = markflag(MAC), [CEXTERR] = markflag(MAC), - [XONTXC] = markflag(MAC), [XOFFRXC] = markflag(MAC), - [RJC] = markflag(MAC), [RNBC] = markflag(MAC), - [MGTPDC] = markflag(MAC), [MGTPTC] = markflag(MAC), - [RUC] = markflag(MAC), [ROC] = markflag(MAC), - [GORCL] = markflag(MAC), [GORCH] = markflag(MAC), - [GOTCL] = markflag(MAC), [GOTCH] = markflag(MAC), - [BPRC] = markflag(MAC), [MPRC] = markflag(MAC), - [TSCTC] = markflag(MAC), [PRC64] = markflag(MAC), - [PRC127] = markflag(MAC), [PRC255] = markflag(MAC), - [PRC511] = markflag(MAC), [PRC1023] = markflag(MAC), - [PRC1522] = markflag(MAC), [PTC64] = markflag(MAC), - [PTC127] = markflag(MAC), [PTC255] = markflag(MAC), - [PTC511] = markflag(MAC), [PTC1023] = markflag(MAC), - [PTC1522] = markflag(MAC), [MPTC] = markflag(MAC), - [BPTC] = markflag(MAC), - - [TDFH] = markflag(MAC) | MAC_ACCESS_PARTIAL, - [TDFT] = markflag(MAC) | MAC_ACCESS_PARTIAL, - [TDFHS] = markflag(MAC) | MAC_ACCESS_PARTIAL, - [TDFTS] = markflag(MAC) | MAC_ACCESS_PARTIAL, - [TDFPC] = markflag(MAC) | MAC_ACCESS_PARTIAL, - [RDFH] = markflag(MAC) | MAC_ACCESS_PARTIAL, - [RDFT] = markflag(MAC) | MAC_ACCESS_PARTIAL, - [RDFHS] = markflag(MAC) | MAC_ACCESS_PARTIAL, - [RDFTS] = markflag(MAC) | MAC_ACCESS_PARTIAL, - [RDFPC] = markflag(MAC) | MAC_ACCESS_PARTIAL, - [PBM] = markflag(MAC) | MAC_ACCESS_PARTIAL, + [IPAV] = MAC_ACCESS_FLAG_NEEDED, [WUC] = MAC_ACCESS_FLAG_NEEDED, + [IP6AT] = MAC_ACCESS_FLAG_NEEDED, [IP4AT] = MAC_ACCESS_FLAG_NEEDED, + [FFVT] = MAC_ACCESS_FLAG_NEEDED, [WUPM] = MAC_ACCESS_FLAG_NEEDED, + [ECOL] = MAC_ACCESS_FLAG_NEEDED, [MCC] = MAC_ACCESS_FLAG_NEEDED, + [DC] = MAC_ACCESS_FLAG_NEEDED, [TNCRS] = MAC_ACCESS_FLAG_NEEDED, + [RLEC] = MAC_ACCESS_FLAG_NEEDED, [XONRXC] = MAC_ACCESS_FLAG_NEEDED, + [XOFFTXC] = MAC_ACCESS_FLAG_NEEDED, [RFC] = MAC_ACCESS_FLAG_NEEDED, + [TSCTFC] = MAC_ACCESS_FLAG_NEEDED, [MGTPRC] = MAC_ACCESS_FLAG_NEEDED, + [WUS] = MAC_ACCESS_FLAG_NEEDED, [AIT] = MAC_ACCESS_FLAG_NEEDED, + [FFLT] = MAC_ACCESS_FLAG_NEEDED, [FFMT] = MAC_ACCESS_FLAG_NEEDED, + [SCC] = MAC_ACCESS_FLAG_NEEDED, [FCRUC] = MAC_ACCESS_FLAG_NEEDED, + [LATECOL] = MAC_ACCESS_FLAG_NEEDED, [COLC] = MAC_ACCESS_FLAG_NEEDED, + [SEQEC] = MAC_ACCESS_FLAG_NEEDED, [CEXTERR] = MAC_ACCESS_FLAG_NEEDED, + [XONTXC] = MAC_ACCESS_FLAG_NEEDED, [XOFFRXC] = MAC_ACCESS_FLAG_NEEDED, + [RJC] = MAC_ACCESS_FLAG_NEEDED, [RNBC] = MAC_ACCESS_FLAG_NEEDED, + [MGTPDC] = MAC_ACCESS_FLAG_NEEDED, [MGTPTC] = MAC_ACCESS_FLAG_NEEDED, + [RUC] = MAC_ACCESS_FLAG_NEEDED, [ROC] = MAC_ACCESS_FLAG_NEEDED, + [GORCL] = MAC_ACCESS_FLAG_NEEDED, [GORCH] = MAC_ACCESS_FLAG_NEEDED, + [GOTCL] = MAC_ACCESS_FLAG_NEEDED, [GOTCH] = MAC_ACCESS_FLAG_NEEDED, + [BPRC] = MAC_ACCESS_FLAG_NEEDED, [MPRC] = MAC_ACCESS_FLAG_NEEDED, + [TSCTC] = MAC_ACCESS_FLAG_NEEDED, [PRC64] = MAC_ACCESS_FLAG_NEEDED, + [PRC127] = MAC_ACCESS_FLAG_NEEDED, [PRC255] = MAC_ACCESS_FLAG_NEEDED, + [PRC511] = MAC_ACCESS_FLAG_NEEDED, [PRC1023] = MAC_ACCESS_FLAG_NEEDED, + [PRC1522] = MAC_ACCESS_FLAG_NEEDED, [PTC64] = MAC_ACCESS_FLAG_NEEDED, + [PTC127] = MAC_ACCESS_FLAG_NEEDED, [PTC255] = MAC_ACCESS_FLAG_NEEDED, + [PTC511] = MAC_ACCESS_FLAG_NEEDED, [PTC1023] = MAC_ACCESS_FLAG_NEEDED, + [PTC1522] = MAC_ACCESS_FLAG_NEEDED, [MPTC] = MAC_ACCESS_FLAG_NEEDED, + [BPTC] = MAC_ACCESS_FLAG_NEEDED, + + [TDFH] = MAC_ACCESS_FLAG_NEEDED | MAC_ACCESS_PARTIAL, + [TDFT] = MAC_ACCESS_FLAG_NEEDED | MAC_ACCESS_PARTIAL, + [TDFHS] = MAC_ACCESS_FLAG_NEEDED | MAC_ACCESS_PARTIAL, + [TDFTS] = MAC_ACCESS_FLAG_NEEDED | MAC_ACCESS_PARTIAL, + [TDFPC] = MAC_ACCESS_FLAG_NEEDED | MAC_ACCESS_PARTIAL, + [RDFH] = MAC_ACCESS_FLAG_NEEDED | MAC_ACCESS_PARTIAL, + [RDFT] = MAC_ACCESS_FLAG_NEEDED | MAC_ACCESS_PARTIAL, + [RDFHS] = MAC_ACCESS_FLAG_NEEDED | MAC_ACCESS_PARTIAL, + [RDFTS] = MAC_ACCESS_FLAG_NEEDED | MAC_ACCESS_PARTIAL, + [RDFPC] = MAC_ACCESS_FLAG_NEEDED | MAC_ACCESS_PARTIAL, + [PBM] = MAC_ACCESS_FLAG_NEEDED | MAC_ACCESS_PARTIAL, }; static void @@ -1419,13 +1416,6 @@ static int e1000_tx_tso_post_load(void *opaque, int version_id) return 0; } -static bool e1000_full_mac_needed(void *opaque) -{ - E1000State *s = opaque; - - return chkflag(MAC); -} - static bool e1000_tso_state_needed(void *opaque) { E1000State *s = opaque; @@ -1451,7 +1441,6 @@ static const VMStateDescription vmstate_e1000_full_mac_state = { .name = "e1000/full_mac_state", .version_id = 1, .minimum_version_id = 1, - .needed = e1000_full_mac_needed, .fields = (const VMStateField[]) { VMSTATE_UINT32_ARRAY(mac_reg, E1000State, 0x8000), VMSTATE_END_OF_LIST() @@ -1679,8 +1668,6 @@ static void pci_e1000_realize(PCIDevice *pci_dev, Error **errp) static const Property e1000_properties[] = { DEFINE_NIC_PROPERTIES(E1000State, conf), - DEFINE_PROP_BIT("extra_mac_registers", E1000State, - compat_flags, E1000_FLAG_MAC_BIT, true), DEFINE_PROP_BIT("migrate_tso_props", E1000State, compat_flags, E1000_FLAG_TSO_BIT, true), DEFINE_PROP_BIT("init-vet", E1000State, diff --git a/hw/net/e1000x_regs.h b/hw/net/e1000x_regs.h index cd896fc..e9a74de 100644 --- a/hw/net/e1000x_regs.h +++ b/hw/net/e1000x_regs.h @@ -900,7 +900,7 @@ struct e1000_context_desc { uint16_t tucse; /* TCP checksum end */ } tcp_fields; } upper_setup; - uint32_t cmd_and_length; /* */ + uint32_t cmd_and_length; union { uint32_t data; struct { diff --git a/hw/net/fsl_etsec/etsec.c b/hw/net/fsl_etsec/etsec.c index d14cb2a..846f6cb 100644 --- a/hw/net/fsl_etsec/etsec.c +++ b/hw/net/fsl_etsec/etsec.c @@ -389,6 +389,7 @@ static void etsec_realize(DeviceState *dev, Error **errp) { eTSEC *etsec = ETSEC_COMMON(dev); + qemu_macaddr_default_if_unset(&etsec->conf.macaddr); etsec->nic = qemu_new_nic(&net_etsec_info, &etsec->conf, object_get_typename(OBJECT(dev)), dev->id, &dev->mem_reentrancy_guard, etsec); diff --git a/hw/net/i82596.c b/hw/net/i82596.c index 64ed3c8..c1ff3e6 100644 --- a/hw/net/i82596.c +++ b/hw/net/i82596.c @@ -5,7 +5,7 @@ * This work is licensed under the GNU GPL license version 2 or later. * * This software was written to be compatible with the specification: - * https://www.intel.com/assets/pdf/general/82596ca.pdf + * https://parisc.docs.kernel.org/en/latest/_downloads/96672be0650d9fc046bbcea40b92482f/82596CA.pdf */ #include "qemu/osdep.h" @@ -177,6 +177,26 @@ static void set_individual_address(I82596State *s, uint32_t addr) trace_i82596_new_mac(nc->info_str); } +static void i82596_configure(I82596State *s, uint32_t addr) +{ + uint8_t byte_cnt; + byte_cnt = get_byte(addr + 8) & 0x0f; + + byte_cnt = MAX(byte_cnt, 4); + byte_cnt = MIN(byte_cnt, sizeof(s->config)); + /* copy byte_cnt max. */ + address_space_read(&address_space_memory, addr + 8, + MEMTXATTRS_UNSPECIFIED, s->config, byte_cnt); + /* config byte according to page 35ff */ + s->config[2] &= 0x82; /* mask valid bits */ + s->config[2] |= 0x40; + s->config[7] &= 0xf7; /* clear zero bit */ + assert(I596_NOCRC_INS == 0); /* do CRC insertion */ + s->config[10] = MAX(s->config[10], 5); /* min frame length */ + s->config[12] &= 0x40; /* only full duplex field valid */ + s->config[13] |= 0x3f; /* set ones in byte 13 */ +} + static void set_multicast_list(I82596State *s, uint32_t addr) { uint16_t mc_count, i; @@ -234,7 +254,6 @@ static void command_loop(I82596State *s) { uint16_t cmd; uint16_t status; - uint8_t byte_cnt; DBG(printf("STARTING COMMAND LOOP cmd_p=%08x\n", s->cmd_p)); @@ -254,20 +273,7 @@ static void command_loop(I82596State *s) set_individual_address(s, s->cmd_p); break; case CmdConfigure: - byte_cnt = get_byte(s->cmd_p + 8) & 0x0f; - byte_cnt = MAX(byte_cnt, 4); - byte_cnt = MIN(byte_cnt, sizeof(s->config)); - /* copy byte_cnt max. */ - address_space_read(&address_space_memory, s->cmd_p + 8, - MEMTXATTRS_UNSPECIFIED, s->config, byte_cnt); - /* config byte according to page 35ff */ - s->config[2] &= 0x82; /* mask valid bits */ - s->config[2] |= 0x40; - s->config[7] &= 0xf7; /* clear zero bit */ - assert(I596_NOCRC_INS == 0); /* do CRC insertion */ - s->config[10] = MAX(s->config[10], 5); /* min frame length */ - s->config[12] &= 0x40; /* only full duplex field valid */ - s->config[13] |= 0x3f; /* set ones in byte 13 */ + i82596_configure(s, s->cmd_p); break; case CmdTDR: /* get signal LINK */ diff --git a/hw/net/lan9118.c b/hw/net/lan9118.c index 6dda1e5..3017e12 100644 --- a/hw/net/lan9118.c +++ b/hw/net/lan9118.c @@ -21,6 +21,7 @@ #include "hw/ptimer.h" #include "hw/qdev-properties.h" #include "qapi/error.h" +#include "qemu/bswap.h" #include "qemu/log.h" #include "qemu/module.h" #include <zlib.h> /* for crc32 */ diff --git a/hw/net/npcm_gmac.c b/hw/net/npcm_gmac.c index a434112..5e32cd3 100644 --- a/hw/net/npcm_gmac.c +++ b/hw/net/npcm_gmac.c @@ -516,8 +516,6 @@ static void gmac_try_send_next_packet(NPCMGMACState *gmac) uint32_t desc_addr; struct NPCMGMACTxDesc tx_desc; uint32_t tx_buf_addr, tx_buf_len; - uint16_t length = 0; - uint8_t *buf = tx_send_buffer; uint32_t prev_buf_size = 0; int csum = 0; @@ -568,22 +566,20 @@ static void gmac_try_send_next_packet(NPCMGMACState *gmac) tx_buf_addr = tx_desc.tdes2; gmac->regs[R_NPCM_DMA_CUR_TX_BUF_ADDR] = tx_buf_addr; tx_buf_len = TX_DESC_TDES1_BFFR1_SZ_MASK(tx_desc.tdes1); - buf = &tx_send_buffer[prev_buf_size]; - if ((prev_buf_size + tx_buf_len) > sizeof(buf)) { + if ((prev_buf_size + tx_buf_len) > tx_buffer_size) { tx_buffer_size = prev_buf_size + tx_buf_len; tx_send_buffer = g_realloc(tx_send_buffer, tx_buffer_size); - buf = &tx_send_buffer[prev_buf_size]; } /* step 5 */ - if (dma_memory_read(&address_space_memory, tx_buf_addr, buf, + if (dma_memory_read(&address_space_memory, tx_buf_addr, + tx_send_buffer + prev_buf_size, tx_buf_len, MEMTXATTRS_UNSPECIFIED)) { qemu_log_mask(LOG_GUEST_ERROR, "%s: Failed to read packet @ 0x%x\n", __func__, tx_buf_addr); return; } - length += tx_buf_len; prev_buf_size += tx_buf_len; /* If not chained we'll have a second buffer. */ @@ -591,30 +587,32 @@ static void gmac_try_send_next_packet(NPCMGMACState *gmac) tx_buf_addr = tx_desc.tdes3; gmac->regs[R_NPCM_DMA_CUR_TX_BUF_ADDR] = tx_buf_addr; tx_buf_len = TX_DESC_TDES1_BFFR2_SZ_MASK(tx_desc.tdes1); - buf = &tx_send_buffer[prev_buf_size]; - if ((prev_buf_size + tx_buf_len) > sizeof(buf)) { + if ((prev_buf_size + tx_buf_len) > tx_buffer_size) { tx_buffer_size = prev_buf_size + tx_buf_len; tx_send_buffer = g_realloc(tx_send_buffer, tx_buffer_size); - buf = &tx_send_buffer[prev_buf_size]; } - if (dma_memory_read(&address_space_memory, tx_buf_addr, buf, + if (dma_memory_read(&address_space_memory, tx_buf_addr, + tx_send_buffer + prev_buf_size, tx_buf_len, MEMTXATTRS_UNSPECIFIED)) { qemu_log_mask(LOG_GUEST_ERROR, "%s: Failed to read packet @ 0x%x\n", __func__, tx_buf_addr); return; } - length += tx_buf_len; prev_buf_size += tx_buf_len; } if (tx_desc.tdes1 & TX_DESC_TDES1_LAST_SEG_MASK) { + /* + * This will truncate the packet at 64K. + * TODO: find out if this is the correct behaviour. + */ + uint16_t length = prev_buf_size; net_checksum_calculate(tx_send_buffer, length, csum); qemu_send_packet(qemu_get_queue(gmac->nic), tx_send_buffer, length); trace_npcm_gmac_packet_sent(DEVICE(gmac)->canonical_path, length); - buf = tx_send_buffer; - length = 0; + prev_buf_size = 0; } /* step 6 */ diff --git a/hw/net/rocker/rocker.h b/hw/net/rocker/rocker.h index 6e0962f..ae06c1c 100644 --- a/hw/net/rocker/rocker.h +++ b/hw/net/rocker/rocker.h @@ -36,15 +36,7 @@ static inline G_GNUC_PRINTF(1, 2) int DPRINTF(const char *fmt, ...) } #endif -#define __le16 uint16_t -#define __le32 uint32_t -#define __le64 uint64_t - -#define __be16 uint16_t -#define __be32 uint32_t -#define __be64 uint64_t - -static inline bool ipv4_addr_is_multicast(__be32 addr) +static inline bool ipv4_addr_is_multicast(uint32_t addr) { return (addr & htonl(0xf0000000)) == htonl(0xe0000000); } @@ -52,8 +44,8 @@ static inline bool ipv4_addr_is_multicast(__be32 addr) typedef struct ipv6_addr { union { uint8_t addr8[16]; - __be16 addr16[8]; - __be32 addr32[4]; + uint16_t addr16[8]; + uint32_t addr32[4]; }; } Ipv6Addr; diff --git a/hw/net/rocker/rocker_hw.h b/hw/net/rocker/rocker_hw.h index 1786323..7ec6bfb 100644 --- a/hw/net/rocker/rocker_hw.h +++ b/hw/net/rocker/rocker_hw.h @@ -9,10 +9,6 @@ #ifndef ROCKER_HW_H #define ROCKER_HW_H -#define __le16 uint16_t -#define __le32 uint32_t -#define __le64 uint64_t - /* * Return codes */ @@ -124,12 +120,12 @@ enum { */ typedef struct rocker_desc { - __le64 buf_addr; + uint64_t buf_addr; uint64_t cookie; - __le16 buf_size; - __le16 tlv_size; - __le16 rsvd[5]; /* pad to 32 bytes */ - __le16 comp_err; + uint16_t buf_size; + uint16_t tlv_size; + uint16_t rsvd[5]; /* pad to 32 bytes */ + uint16_t comp_err; } __attribute__((packed, aligned(8))) RockerDesc; /* @@ -137,9 +133,9 @@ typedef struct rocker_desc { */ typedef struct rocker_tlv { - __le32 type; - __le16 len; - __le16 rsvd; + uint32_t type; + uint16_t len; + uint16_t rsvd; } __attribute__((packed, aligned(8))) RockerTlv; /* cmd msg */ diff --git a/hw/net/rocker/rocker_of_dpa.c b/hw/net/rocker/rocker_of_dpa.c index 3378f63..4aed178 100644 --- a/hw/net/rocker/rocker_of_dpa.c +++ b/hw/net/rocker/rocker_of_dpa.c @@ -52,10 +52,10 @@ typedef struct of_dpa_flow_key { uint32_t tunnel_id; /* overlay tunnel id */ uint32_t tbl_id; /* table id */ struct { - __be16 vlan_id; /* 0 if no VLAN */ + uint16_t vlan_id; /* 0 if no VLAN */ MACAddr src; /* ethernet source address */ MACAddr dst; /* ethernet destination address */ - __be16 type; /* ethernet frame type */ + uint16_t type; /* ethernet frame type */ } eth; struct { uint8_t proto; /* IP protocol or ARP opcode */ @@ -66,14 +66,14 @@ typedef struct of_dpa_flow_key { union { struct { struct { - __be32 src; /* IP source address */ - __be32 dst; /* IP destination address */ + uint32_t src; /* IP source address */ + uint32_t dst; /* IP destination address */ } addr; union { struct { - __be16 src; /* TCP/UDP/SCTP source port */ - __be16 dst; /* TCP/UDP/SCTP destination port */ - __be16 flags; /* TCP flags */ + uint16_t src; /* TCP/UDP/SCTP source port */ + uint16_t dst; /* TCP/UDP/SCTP destination port */ + uint16_t flags; /* TCP flags */ } tp; struct { MACAddr sha; /* ARP source hardware address */ @@ -86,11 +86,11 @@ typedef struct of_dpa_flow_key { Ipv6Addr src; /* IPv6 source address */ Ipv6Addr dst; /* IPv6 destination address */ } addr; - __be32 label; /* IPv6 flow label */ + uint32_t label; /* IPv6 flow label */ struct { - __be16 src; /* TCP/UDP/SCTP source port */ - __be16 dst; /* TCP/UDP/SCTP destination port */ - __be16 flags; /* TCP flags */ + uint16_t src; /* TCP/UDP/SCTP source port */ + uint16_t dst; /* TCP/UDP/SCTP destination port */ + uint16_t flags; /* TCP flags */ } tp; struct { Ipv6Addr target; /* ND target address */ @@ -112,13 +112,13 @@ typedef struct of_dpa_flow_action { struct { uint32_t group_id; uint32_t tun_log_lport; - __be16 vlan_id; + uint16_t vlan_id; } write; struct { - __be16 new_vlan_id; + uint16_t new_vlan_id; uint32_t out_pport; uint8_t copy_to_cpu; - __be16 vlan_id; + uint16_t vlan_id; } apply; } OfDpaFlowAction; @@ -143,7 +143,7 @@ typedef struct of_dpa_flow { typedef struct of_dpa_flow_pkt_fields { uint32_t tunnel_id; struct eth_header *ethhdr; - __be16 *h_proto; + uint16_t *h_proto; struct vlan_header *vlanhdr; struct ip_header *ipv4hdr; struct ip6_header *ipv6hdr; @@ -180,7 +180,7 @@ typedef struct of_dpa_group { uint32_t group_id; MACAddr src_mac; MACAddr dst_mac; - __be16 vlan_id; + uint16_t vlan_id; } l2_rewrite; struct { uint16_t group_count; @@ -190,13 +190,13 @@ typedef struct of_dpa_group { uint32_t group_id; MACAddr src_mac; MACAddr dst_mac; - __be16 vlan_id; + uint16_t vlan_id; uint8_t ttl_check; } l3_unicast; }; } OfDpaGroup; -static int of_dpa_mask2prefix(__be32 mask) +static int of_dpa_mask2prefix(uint32_t mask) { int i; int count = 32; @@ -451,7 +451,7 @@ static void of_dpa_flow_pkt_parse(OfDpaFlowContext *fc, fc->iovcnt = iovcnt + 2; } -static void of_dpa_flow_pkt_insert_vlan(OfDpaFlowContext *fc, __be16 vlan_id) +static void of_dpa_flow_pkt_insert_vlan(OfDpaFlowContext *fc, uint16_t vlan_id) { OfDpaFlowPktFields *fields = &fc->fields; uint16_t h_proto = fields->ethhdr->h_proto; @@ -486,7 +486,7 @@ static void of_dpa_flow_pkt_strip_vlan(OfDpaFlowContext *fc) static void of_dpa_flow_pkt_hdr_rewrite(OfDpaFlowContext *fc, uint8_t *src_mac, uint8_t *dst_mac, - __be16 vlan_id) + uint16_t vlan_id) { OfDpaFlowPktFields *fields = &fc->fields; diff --git a/hw/net/rtl8139.c b/hw/net/rtl8139.c index 15b8f75..324fb93 100644 --- a/hw/net/rtl8139.c +++ b/hw/net/rtl8139.c @@ -57,6 +57,7 @@ #include "system/dma.h" #include "qemu/module.h" #include "qemu/timer.h" +#include "qemu/bswap.h" #include "net/net.h" #include "net/eth.h" #include "system/system.h" @@ -1816,7 +1817,7 @@ static int rtl8139_transmit_one(RTL8139State *s, int descriptor) PCIDevice *d = PCI_DEVICE(s); int txsize = s->TxStatus[descriptor] & 0x1fff; - uint8_t txbuffer[0x2000]; + QEMU_UNINITIALIZED uint8_t txbuffer[0x2000]; DPRINTF("+++ transmit reading %d bytes from host memory at 0x%08x\n", txsize, s->TxAddr[descriptor]); diff --git a/hw/net/tulip.c b/hw/net/tulip.c index 63fe513..319af90 100644 --- a/hw/net/tulip.c +++ b/hw/net/tulip.c @@ -629,7 +629,7 @@ static void tulip_setup_filter_addr(TULIPState *s, uint8_t *buf, int n) static void tulip_setup_frame(TULIPState *s, struct tulip_descriptor *desc) { - uint8_t buf[4096]; + QEMU_UNINITIALIZED uint8_t buf[4096]; int len = (desc->control >> TDES1_BUF1_SIZE_SHIFT) & TDES1_BUF1_SIZE_MASK; int i; diff --git a/hw/net/vhost_net-stub.c b/hw/net/vhost_net-stub.c index 72df6d7..7d49f82 100644 --- a/hw/net/vhost_net-stub.c +++ b/hw/net/vhost_net-stub.c @@ -13,7 +13,6 @@ #include "qemu/osdep.h" #include "net/net.h" #include "net/tap.h" -#include "net/vhost-user.h" #include "hw/virtio/virtio-net.h" #include "net/vhost_net.h" @@ -101,7 +100,7 @@ VHostNetState *get_vhost_net(NetClientState *nc) return 0; } -int vhost_set_vring_enable(NetClientState *nc, int enable) +int vhost_net_set_vring_enable(NetClientState *nc, int enable) { return 0; } diff --git a/hw/net/vhost_net.c b/hw/net/vhost_net.c index 891f235..540492b 100644 --- a/hw/net/vhost_net.c +++ b/hw/net/vhost_net.c @@ -16,7 +16,6 @@ #include "qemu/osdep.h" #include "net/net.h" #include "net/tap.h" -#include "net/vhost-user.h" #include "net/vhost-vdpa.h" #include "standard-headers/linux/vhost_types.h" @@ -36,94 +35,9 @@ #include "hw/virtio/virtio-bus.h" #include "linux-headers/linux/vhost.h" - -/* Features supported by host kernel. */ -static const int kernel_feature_bits[] = { - VIRTIO_F_NOTIFY_ON_EMPTY, - VIRTIO_RING_F_INDIRECT_DESC, - VIRTIO_RING_F_EVENT_IDX, - VIRTIO_NET_F_MRG_RXBUF, - VIRTIO_F_VERSION_1, - VIRTIO_NET_F_MTU, - VIRTIO_F_IOMMU_PLATFORM, - VIRTIO_F_RING_PACKED, - VIRTIO_F_RING_RESET, - VIRTIO_F_IN_ORDER, - VIRTIO_F_NOTIFICATION_DATA, - VIRTIO_NET_F_RSC_EXT, - VIRTIO_NET_F_HASH_REPORT, - VHOST_INVALID_FEATURE_BIT -}; - -/* Features supported by others. */ -static const int user_feature_bits[] = { - VIRTIO_F_NOTIFY_ON_EMPTY, - VIRTIO_F_NOTIFICATION_DATA, - VIRTIO_RING_F_INDIRECT_DESC, - VIRTIO_RING_F_EVENT_IDX, - - VIRTIO_F_ANY_LAYOUT, - VIRTIO_F_VERSION_1, - VIRTIO_NET_F_CSUM, - VIRTIO_NET_F_GUEST_CSUM, - VIRTIO_NET_F_GSO, - VIRTIO_NET_F_GUEST_TSO4, - VIRTIO_NET_F_GUEST_TSO6, - VIRTIO_NET_F_GUEST_ECN, - VIRTIO_NET_F_GUEST_UFO, - VIRTIO_NET_F_HOST_TSO4, - VIRTIO_NET_F_HOST_TSO6, - VIRTIO_NET_F_HOST_ECN, - VIRTIO_NET_F_HOST_UFO, - VIRTIO_NET_F_MRG_RXBUF, - VIRTIO_NET_F_MTU, - VIRTIO_F_IOMMU_PLATFORM, - VIRTIO_F_RING_PACKED, - VIRTIO_F_RING_RESET, - VIRTIO_F_IN_ORDER, - VIRTIO_NET_F_RSS, - VIRTIO_NET_F_RSC_EXT, - VIRTIO_NET_F_HASH_REPORT, - VIRTIO_NET_F_GUEST_USO4, - VIRTIO_NET_F_GUEST_USO6, - VIRTIO_NET_F_HOST_USO, - - /* This bit implies RARP isn't sent by QEMU out of band */ - VIRTIO_NET_F_GUEST_ANNOUNCE, - - VIRTIO_NET_F_MQ, - - VHOST_INVALID_FEATURE_BIT -}; - -static const int *vhost_net_get_feature_bits(struct vhost_net *net) -{ - const int *feature_bits = 0; - - switch (net->nc->info->type) { - case NET_CLIENT_DRIVER_TAP: - feature_bits = kernel_feature_bits; - break; - case NET_CLIENT_DRIVER_VHOST_USER: - feature_bits = user_feature_bits; - break; -#ifdef CONFIG_VHOST_NET_VDPA - case NET_CLIENT_DRIVER_VHOST_VDPA: - feature_bits = vdpa_feature_bits; - break; -#endif - default: - error_report("Feature bits not defined for this type: %d", - net->nc->info->type); - break; - } - - return feature_bits; -} - uint64_t vhost_net_get_features(struct vhost_net *net, uint64_t features) { - return vhost_get_features(&net->dev, vhost_net_get_feature_bits(net), + return vhost_get_features(&net->dev, net->feature_bits, features); } int vhost_net_get_config(struct vhost_net *net, uint8_t *config, @@ -140,7 +54,7 @@ int vhost_net_set_config(struct vhost_net *net, const uint8_t *data, void vhost_net_ack_features(struct vhost_net *net, uint64_t features) { net->dev.acked_features = net->dev.backend_features; - vhost_ack_features(&net->dev, vhost_net_get_feature_bits(net), features); + vhost_ack_features(&net->dev, net->feature_bits, features); } uint64_t vhost_net_get_max_queues(VHostNetState *net) @@ -155,11 +69,11 @@ uint64_t vhost_net_get_acked_features(VHostNetState *net) void vhost_net_save_acked_features(NetClientState *nc) { -#ifdef CONFIG_VHOST_NET_USER - if (nc->info->type == NET_CLIENT_DRIVER_VHOST_USER) { - vhost_user_save_acked_features(nc); + struct vhost_net *net = get_vhost_net(nc); + + if (net && net->save_acked_features) { + net->save_acked_features(nc); } -#endif } static void vhost_net_disable_notifiers_nvhosts(VirtIODevice *dev, @@ -329,6 +243,10 @@ struct vhost_net *vhost_net_init(VhostNetOptions *options) } net->nc = options->net_backend; net->dev.nvqs = options->nvqs; + net->feature_bits = options->feature_bits; + net->save_acked_features = options->save_acked_features; + net->max_tx_queue_size = options->max_tx_queue_size; + net->is_vhost_user = options->is_vhost_user; net->dev.max_queues = 1; net->dev.vqs = net->vqs; @@ -372,9 +290,8 @@ struct vhost_net *vhost_net_init(VhostNetOptions *options) } /* Set sane init value. Override when guest acks. */ -#ifdef CONFIG_VHOST_NET_USER - if (net->nc->info->type == NET_CLIENT_DRIVER_VHOST_USER) { - features = vhost_user_get_acked_features(net->nc); + if (options->get_acked_features) { + features = options->get_acked_features(net->nc); if (~net->dev.features & features) { fprintf(stderr, "vhost lacks feature mask 0x%" PRIx64 " for backend\n", @@ -382,7 +299,6 @@ struct vhost_net *vhost_net_init(VhostNetOptions *options) goto fail; } } -#endif vhost_net_ack_features(net, features); @@ -525,7 +441,7 @@ int vhost_net_start(VirtIODevice *dev, NetClientState *ncs, * because vhost user doesn't interrupt masking/unmasking * properly. */ - if (net->nc->info->type == NET_CLIENT_DRIVER_VHOST_USER) { + if (net->is_vhost_user) { dev->use_guest_notifier_mask = false; } } @@ -551,7 +467,7 @@ int vhost_net_start(VirtIODevice *dev, NetClientState *ncs, if (peer->vring_enable) { /* restore vring enable state */ - r = vhost_set_vring_enable(peer, peer->vring_enable); + r = vhost_net_set_vring_enable(peer, peer->vring_enable); if (r < 0) { goto err_guest_notifiers; @@ -649,44 +565,21 @@ void vhost_net_config_mask(VHostNetState *net, VirtIODevice *dev, bool mask) { vhost_config_mask(&net->dev, dev, mask); } + VHostNetState *get_vhost_net(NetClientState *nc) { - VHostNetState *vhost_net = 0; - if (!nc) { return 0; } - switch (nc->info->type) { - case NET_CLIENT_DRIVER_TAP: - vhost_net = tap_get_vhost_net(nc); - /* - * tap_get_vhost_net() can return NULL if a tap net-device backend is - * created with 'vhost=off' option, 'vhostforce=off' or no vhost or - * vhostforce or vhostfd options at all. Please see net_init_tap_one(). - * Hence, we omit the assertion here. - */ - break; -#ifdef CONFIG_VHOST_NET_USER - case NET_CLIENT_DRIVER_VHOST_USER: - vhost_net = vhost_user_get_vhost_net(nc); - assert(vhost_net); - break; -#endif -#ifdef CONFIG_VHOST_NET_VDPA - case NET_CLIENT_DRIVER_VHOST_VDPA: - vhost_net = vhost_vdpa_get_vhost_net(nc); - assert(vhost_net); - break; -#endif - default: - break; + if (nc->info->get_vhost_net) { + return nc->info->get_vhost_net(nc); } - return vhost_net; + return NULL; } -int vhost_set_vring_enable(NetClientState *nc, int enable) +int vhost_net_set_vring_enable(NetClientState *nc, int enable) { VHostNetState *net = get_vhost_net(nc); const VhostOps *vhost_ops = net->dev.vhost_ops; diff --git a/hw/net/virtio-net.c b/hw/net/virtio-net.c index 2de037c..6b5b5da 100644 --- a/hw/net/virtio-net.c +++ b/hw/net/virtio-net.c @@ -158,7 +158,7 @@ static void virtio_net_get_config(VirtIODevice *vdev, uint8_t *config) virtio_host_has_feature(vdev, VIRTIO_NET_F_RSS) ? VIRTIO_NET_RSS_MAX_TABLE_LEN : 1); virtio_stl_p(vdev, &netcfg.supported_hash_types, - VIRTIO_NET_RSS_SUPPORTED_HASHES); + n->rss_data.supported_hash_types); memcpy(config, &netcfg, n->config_size); /* @@ -382,7 +382,7 @@ static void virtio_net_drop_tx_queue_data(VirtIODevice *vdev, VirtQueue *vq) } } -static void virtio_net_set_status(struct VirtIODevice *vdev, uint8_t status) +static int virtio_net_set_status(struct VirtIODevice *vdev, uint8_t status) { VirtIONet *n = VIRTIO_NET(vdev); VirtIONetQueue *q; @@ -437,6 +437,7 @@ static void virtio_net_set_status(struct VirtIODevice *vdev, uint8_t status) } } } + return 0; } static void virtio_net_set_link_status(NetClientState *nc) @@ -669,34 +670,36 @@ static void virtio_net_set_mrg_rx_bufs(VirtIONet *n, int mergeable_rx_bufs, static int virtio_net_max_tx_queue_size(VirtIONet *n) { NetClientState *peer = n->nic_conf.peers.ncs[0]; + struct vhost_net *net; - /* - * Backends other than vhost-user or vhost-vdpa don't support max queue - * size. - */ if (!peer) { - return VIRTIO_NET_TX_QUEUE_DEFAULT_SIZE; + goto default_value; } - switch(peer->info->type) { - case NET_CLIENT_DRIVER_VHOST_USER: - case NET_CLIENT_DRIVER_VHOST_VDPA: - return VIRTQUEUE_MAX_SIZE; - default: - return VIRTIO_NET_TX_QUEUE_DEFAULT_SIZE; - }; + net = get_vhost_net(peer); + + if (!net || !net->max_tx_queue_size) { + goto default_value; + } + + return net->max_tx_queue_size; + +default_value: + return VIRTIO_NET_TX_QUEUE_DEFAULT_SIZE; } static int peer_attach(VirtIONet *n, int index) { NetClientState *nc = qemu_get_subqueue(n->nic, index); + struct vhost_net *net; if (!nc->peer) { return 0; } - if (nc->peer->info->type == NET_CLIENT_DRIVER_VHOST_USER) { - vhost_set_vring_enable(nc->peer, 1); + net = get_vhost_net(nc->peer); + if (net && net->is_vhost_user) { + vhost_net_set_vring_enable(nc->peer, 1); } if (nc->peer->info->type != NET_CLIENT_DRIVER_TAP) { @@ -713,13 +716,15 @@ static int peer_attach(VirtIONet *n, int index) static int peer_detach(VirtIONet *n, int index) { NetClientState *nc = qemu_get_subqueue(n->nic, index); + struct vhost_net *net; if (!nc->peer) { return 0; } - if (nc->peer->info->type == NET_CLIENT_DRIVER_VHOST_USER) { - vhost_set_vring_enable(nc->peer, 0); + net = get_vhost_net(nc->peer); + if (net && net->is_vhost_user) { + vhost_net_set_vring_enable(nc->peer, 0); } if (nc->peer->info->type != NET_CLIENT_DRIVER_TAP) { @@ -751,79 +756,6 @@ static void virtio_net_set_queue_pairs(VirtIONet *n) static void virtio_net_set_multiqueue(VirtIONet *n, int multiqueue); -static uint64_t virtio_net_get_features(VirtIODevice *vdev, uint64_t features, - Error **errp) -{ - VirtIONet *n = VIRTIO_NET(vdev); - NetClientState *nc = qemu_get_queue(n->nic); - - /* Firstly sync all virtio-net possible supported features */ - features |= n->host_features; - - virtio_add_feature(&features, VIRTIO_NET_F_MAC); - - if (!peer_has_vnet_hdr(n)) { - virtio_clear_feature(&features, VIRTIO_NET_F_CSUM); - virtio_clear_feature(&features, VIRTIO_NET_F_HOST_TSO4); - virtio_clear_feature(&features, VIRTIO_NET_F_HOST_TSO6); - virtio_clear_feature(&features, VIRTIO_NET_F_HOST_ECN); - - virtio_clear_feature(&features, VIRTIO_NET_F_GUEST_CSUM); - virtio_clear_feature(&features, VIRTIO_NET_F_GUEST_TSO4); - virtio_clear_feature(&features, VIRTIO_NET_F_GUEST_TSO6); - virtio_clear_feature(&features, VIRTIO_NET_F_GUEST_ECN); - - virtio_clear_feature(&features, VIRTIO_NET_F_HOST_USO); - virtio_clear_feature(&features, VIRTIO_NET_F_GUEST_USO4); - virtio_clear_feature(&features, VIRTIO_NET_F_GUEST_USO6); - - virtio_clear_feature(&features, VIRTIO_NET_F_HASH_REPORT); - } - - if (!peer_has_vnet_hdr(n) || !peer_has_ufo(n)) { - virtio_clear_feature(&features, VIRTIO_NET_F_GUEST_UFO); - virtio_clear_feature(&features, VIRTIO_NET_F_HOST_UFO); - } - - if (!peer_has_uso(n)) { - virtio_clear_feature(&features, VIRTIO_NET_F_HOST_USO); - virtio_clear_feature(&features, VIRTIO_NET_F_GUEST_USO4); - virtio_clear_feature(&features, VIRTIO_NET_F_GUEST_USO6); - } - - if (!get_vhost_net(nc->peer)) { - return features; - } - - if (!ebpf_rss_is_loaded(&n->ebpf_rss)) { - virtio_clear_feature(&features, VIRTIO_NET_F_RSS); - } - features = vhost_net_get_features(get_vhost_net(nc->peer), features); - vdev->backend_features = features; - - if (n->mtu_bypass_backend && - (n->host_features & 1ULL << VIRTIO_NET_F_MTU)) { - features |= (1ULL << VIRTIO_NET_F_MTU); - } - - /* - * Since GUEST_ANNOUNCE is emulated the feature bit could be set without - * enabled. This happens in the vDPA case. - * - * Make sure the feature set is not incoherent, as the driver could refuse - * to start. - * - * TODO: QEMU is able to emulate a CVQ just for guest_announce purposes, - * helping guest to notify the new location with vDPA devices that does not - * support it. - */ - if (!virtio_has_feature(vdev->backend_features, VIRTIO_NET_F_CTRL_VQ)) { - virtio_clear_feature(&features, VIRTIO_NET_F_GUEST_ANNOUNCE); - } - - return features; -} - static uint64_t virtio_net_bad_features(VirtIODevice *vdev) { uint64_t features = 0; @@ -997,8 +929,9 @@ static void virtio_net_set_features(VirtIODevice *vdev, uint64_t features) vhost_net_save_acked_features(nc->peer); } - if (!virtio_has_feature(features, VIRTIO_NET_F_CTRL_VLAN)) { - memset(n->vlans, 0xff, MAX_VLAN >> 3); + if (virtio_has_feature(vdev->guest_features ^ features, VIRTIO_NET_F_CTRL_VLAN)) { + bool vlan = virtio_has_feature(features, VIRTIO_NET_F_CTRL_VLAN); + memset(n->vlans, vlan ? 0 : 0xff, MAX_VLAN >> 3); } if (virtio_has_feature(features, VIRTIO_NET_F_STANDBY)) { @@ -1250,7 +1183,7 @@ static void rss_data_to_rss_config(struct VirtioNetRssData *data, { config->redirect = data->redirect; config->populate_hash = data->populate_hash; - config->hash_types = data->hash_types; + config->hash_types = data->runtime_hash_types; config->indirections_len = data->indirections_len; config->default_queue = data->default_queue; } @@ -1285,6 +1218,10 @@ static void virtio_net_detach_ebpf_rss(VirtIONet *n) static void virtio_net_commit_rss_config(VirtIONet *n) { + if (n->rss_data.peer_hash_available) { + return; + } + if (n->rss_data.enabled) { n->rss_data.enabled_software_rss = n->rss_data.populate_hash; if (n->rss_data.populate_hash) { @@ -1299,7 +1236,7 @@ static void virtio_net_commit_rss_config(VirtIONet *n) } trace_virtio_net_rss_enable(n, - n->rss_data.hash_types, + n->rss_data.runtime_hash_types, n->rss_data.indirections_len, sizeof(n->rss_data.key)); } else { @@ -1410,7 +1347,7 @@ static uint16_t virtio_net_handle_rss(VirtIONet *n, err_value = (uint32_t)s; goto error; } - n->rss_data.hash_types = virtio_ldl_p(vdev, &cfg.hash_types); + n->rss_data.runtime_hash_types = virtio_ldl_p(vdev, &cfg.hash_types); n->rss_data.indirections_len = virtio_lduw_p(vdev, &cfg.indirection_table_mask); if (!do_rss) { @@ -1473,12 +1410,12 @@ static uint16_t virtio_net_handle_rss(VirtIONet *n, err_value = temp.b; goto error; } - if (!temp.b && n->rss_data.hash_types) { + if (!temp.b && n->rss_data.runtime_hash_types) { err_msg = "No key provided"; err_value = 0; goto error; } - if (!temp.b && !n->rss_data.hash_types) { + if (!temp.b && !n->rss_data.runtime_hash_types) { virtio_net_disable_rss(n); return queue_pairs; } @@ -1880,7 +1817,7 @@ static int virtio_net_process_rss(NetClientState *nc, const uint8_t *buf, net_rx_pkt_set_protocols(pkt, &iov, 1, n->host_hdr_len); net_rx_pkt_get_protocols(pkt, &hasip4, &hasip6, &l4hdr_proto); net_hash_type = virtio_net_get_hash_type(hasip4, hasip6, l4hdr_proto, - n->rss_data.hash_types); + n->rss_data.runtime_hash_types); if (net_hash_type > NetPktRssIpV6UdpEx) { if (n->rss_data.populate_hash) { hdr->hash_value = VIRTIO_NET_HASH_REPORT_NONE; @@ -1910,9 +1847,9 @@ static ssize_t virtio_net_receive_rcu(NetClientState *nc, const uint8_t *buf, VirtIONet *n = qemu_get_nic_opaque(nc); VirtIONetQueue *q; VirtIODevice *vdev = VIRTIO_DEVICE(n); - VirtQueueElement *elems[VIRTQUEUE_MAX_SIZE]; - size_t lens[VIRTQUEUE_MAX_SIZE]; - struct iovec mhdr_sg[VIRTQUEUE_MAX_SIZE]; + QEMU_UNINITIALIZED VirtQueueElement *elems[VIRTQUEUE_MAX_SIZE]; + QEMU_UNINITIALIZED size_t lens[VIRTQUEUE_MAX_SIZE]; + QEMU_UNINITIALIZED struct iovec mhdr_sg[VIRTQUEUE_MAX_SIZE]; struct virtio_net_hdr_v1_hash extra_hdr; unsigned mhdr_cnt = 0; size_t offset, i, guest_offset, j; @@ -3021,11 +2958,10 @@ static void virtio_net_del_queue(VirtIONet *n, int index) virtio_del_queue(vdev, index * 2 + 1); } -static void virtio_net_change_num_queue_pairs(VirtIONet *n, int new_max_queue_pairs) +static void virtio_net_change_num_queues(VirtIONet *n, int new_num_queues) { VirtIODevice *vdev = VIRTIO_DEVICE(n); int old_num_queues = virtio_get_num_queues(vdev); - int new_num_queues = new_max_queue_pairs * 2 + 1; int i; assert(old_num_queues >= 3); @@ -3061,20 +2997,115 @@ static void virtio_net_set_multiqueue(VirtIONet *n, int multiqueue) int max = multiqueue ? n->max_queue_pairs : 1; n->multiqueue = multiqueue; - virtio_net_change_num_queue_pairs(n, max); + virtio_net_change_num_queues(n, max * 2 + 1); virtio_net_set_queue_pairs(n); } -static int virtio_net_pre_load_queues(VirtIODevice *vdev) +static int virtio_net_pre_load_queues(VirtIODevice *vdev, uint32_t n) { - virtio_net_set_multiqueue(VIRTIO_NET(vdev), - virtio_has_feature(vdev->guest_features, VIRTIO_NET_F_RSS) || - virtio_has_feature(vdev->guest_features, VIRTIO_NET_F_MQ)); + virtio_net_change_num_queues(VIRTIO_NET(vdev), n); return 0; } +static uint64_t virtio_net_get_features(VirtIODevice *vdev, uint64_t features, + Error **errp) +{ + VirtIONet *n = VIRTIO_NET(vdev); + NetClientState *nc = qemu_get_queue(n->nic); + uint32_t supported_hash_types = n->rss_data.supported_hash_types; + uint32_t peer_hash_types = n->rss_data.peer_hash_types; + bool use_own_hash = + (supported_hash_types & VIRTIO_NET_RSS_SUPPORTED_HASHES) == + supported_hash_types; + bool use_peer_hash = + n->rss_data.peer_hash_available && + (supported_hash_types & peer_hash_types) == supported_hash_types; + + /* Firstly sync all virtio-net possible supported features */ + features |= n->host_features; + + virtio_add_feature(&features, VIRTIO_NET_F_MAC); + + if (!peer_has_vnet_hdr(n)) { + virtio_clear_feature(&features, VIRTIO_NET_F_CSUM); + virtio_clear_feature(&features, VIRTIO_NET_F_HOST_TSO4); + virtio_clear_feature(&features, VIRTIO_NET_F_HOST_TSO6); + virtio_clear_feature(&features, VIRTIO_NET_F_HOST_ECN); + + virtio_clear_feature(&features, VIRTIO_NET_F_GUEST_CSUM); + virtio_clear_feature(&features, VIRTIO_NET_F_GUEST_TSO4); + virtio_clear_feature(&features, VIRTIO_NET_F_GUEST_TSO6); + virtio_clear_feature(&features, VIRTIO_NET_F_GUEST_ECN); + + virtio_clear_feature(&features, VIRTIO_NET_F_HOST_USO); + virtio_clear_feature(&features, VIRTIO_NET_F_GUEST_USO4); + virtio_clear_feature(&features, VIRTIO_NET_F_GUEST_USO6); + + virtio_clear_feature(&features, VIRTIO_NET_F_HASH_REPORT); + } + + if (!peer_has_vnet_hdr(n) || !peer_has_ufo(n)) { + virtio_clear_feature(&features, VIRTIO_NET_F_GUEST_UFO); + virtio_clear_feature(&features, VIRTIO_NET_F_HOST_UFO); + } + + if (!peer_has_uso(n)) { + virtio_clear_feature(&features, VIRTIO_NET_F_HOST_USO); + virtio_clear_feature(&features, VIRTIO_NET_F_GUEST_USO4); + virtio_clear_feature(&features, VIRTIO_NET_F_GUEST_USO6); + } + + if (!get_vhost_net(nc->peer)) { + if (!use_own_hash) { + virtio_clear_feature(&features, VIRTIO_NET_F_HASH_REPORT); + virtio_clear_feature(&features, VIRTIO_NET_F_RSS); + } else if (virtio_has_feature(features, VIRTIO_NET_F_RSS)) { + virtio_net_load_ebpf(n, errp); + } + + return features; + } + + if (!use_peer_hash) { + virtio_clear_feature(&features, VIRTIO_NET_F_HASH_REPORT); + + if (!use_own_hash || !virtio_net_attach_ebpf_to_backend(n->nic, -1)) { + if (!virtio_net_load_ebpf(n, errp)) { + return features; + } + + virtio_clear_feature(&features, VIRTIO_NET_F_RSS); + } + } + + features = vhost_net_get_features(get_vhost_net(nc->peer), features); + vdev->backend_features = features; + + if (n->mtu_bypass_backend && + (n->host_features & 1ULL << VIRTIO_NET_F_MTU)) { + features |= (1ULL << VIRTIO_NET_F_MTU); + } + + /* + * Since GUEST_ANNOUNCE is emulated the feature bit could be set without + * enabled. This happens in the vDPA case. + * + * Make sure the feature set is not incoherent, as the driver could refuse + * to start. + * + * TODO: QEMU is able to emulate a CVQ just for guest_announce purposes, + * helping guest to notify the new location with vDPA devices that does not + * support it. + */ + if (!virtio_has_feature(vdev->backend_features, VIRTIO_NET_F_CTRL_VQ)) { + virtio_clear_feature(&features, VIRTIO_NET_F_GUEST_ANNOUNCE); + } + + return features; +} + static int virtio_net_post_load_device(void *opaque, int version_id) { VirtIONet *n = opaque; @@ -3313,6 +3344,17 @@ static const VMStateDescription vmstate_virtio_net_has_vnet = { }, }; +static int virtio_net_rss_post_load(void *opaque, int version_id) +{ + VirtIONet *n = VIRTIO_NET(opaque); + + if (version_id == 1) { + n->rss_data.supported_hash_types = VIRTIO_NET_RSS_SUPPORTED_HASHES; + } + + return 0; +} + static bool virtio_net_rss_needed(void *opaque) { return VIRTIO_NET(opaque)->rss_data.enabled; @@ -3320,14 +3362,16 @@ static bool virtio_net_rss_needed(void *opaque) static const VMStateDescription vmstate_virtio_net_rss = { .name = "virtio-net-device/rss", - .version_id = 1, + .version_id = 2, .minimum_version_id = 1, + .post_load = virtio_net_rss_post_load, .needed = virtio_net_rss_needed, .fields = (const VMStateField[]) { VMSTATE_BOOL(rss_data.enabled, VirtIONet), VMSTATE_BOOL(rss_data.redirect, VirtIONet), VMSTATE_BOOL(rss_data.populate_hash, VirtIONet), - VMSTATE_UINT32(rss_data.hash_types, VirtIONet), + VMSTATE_UINT32(rss_data.runtime_hash_types, VirtIONet), + VMSTATE_UINT32_V(rss_data.supported_hash_types, VirtIONet, 2), VMSTATE_UINT16(rss_data.indirections_len, VirtIONet), VMSTATE_UINT16(rss_data.default_queue, VirtIONet), VMSTATE_UINT8_ARRAY(rss_data.key, VirtIONet, @@ -3899,6 +3943,7 @@ static void virtio_net_device_realize(DeviceState *dev, Error **errp) n->mac_table.macs = g_malloc0(MAC_TABLE_ENTRIES * ETH_ALEN); n->vlans = g_malloc0(MAX_VLAN >> 3); + memset(n->vlans, 0xff, MAX_VLAN >> 3); nc = qemu_get_queue(n->nic); nc->rxfilter_notify_enabled = 1; @@ -3914,8 +3959,17 @@ static void virtio_net_device_realize(DeviceState *dev, Error **errp) net_rx_pkt_init(&n->rx_pkt); - if (virtio_has_feature(n->host_features, VIRTIO_NET_F_RSS)) { - virtio_net_load_ebpf(n, errp); + if (qemu_get_vnet_hash_supported_types(qemu_get_queue(n->nic)->peer, + &n->rss_data.peer_hash_types)) { + n->rss_data.peer_hash_available = true; + n->rss_data.supported_hash_types = + n->rss_data.specified_hash_types.on_bits | + (n->rss_data.specified_hash_types.auto_bits & + n->rss_data.peer_hash_types); + } else { + n->rss_data.supported_hash_types = + n->rss_data.specified_hash_types.on_bits | + n->rss_data.specified_hash_types.auto_bits; } } @@ -3989,7 +4043,6 @@ static void virtio_net_reset(VirtIODevice *vdev) memset(n->mac_table.macs, 0, MAC_TABLE_ENTRIES * ETH_ALEN); memcpy(&n->mac[0], &n->nic->conf->macaddr, sizeof(n->mac)); qemu_format_nic_info_str(qemu_get_queue(n->nic), n->mac); - memset(n->vlans, 0, MAX_VLAN >> 3); /* Flush any async TX */ for (i = 0; i < n->max_queue_pairs; i++) { @@ -4132,6 +4185,42 @@ static const Property virtio_net_properties[] = { VIRTIO_NET_F_GUEST_USO6, true), DEFINE_PROP_BIT64("host_uso", VirtIONet, host_features, VIRTIO_NET_F_HOST_USO, true), + DEFINE_PROP_ON_OFF_AUTO_BIT64("hash-ipv4", VirtIONet, + rss_data.specified_hash_types, + VIRTIO_NET_HASH_REPORT_IPv4 - 1, + ON_OFF_AUTO_AUTO), + DEFINE_PROP_ON_OFF_AUTO_BIT64("hash-tcp4", VirtIONet, + rss_data.specified_hash_types, + VIRTIO_NET_HASH_REPORT_TCPv4 - 1, + ON_OFF_AUTO_AUTO), + DEFINE_PROP_ON_OFF_AUTO_BIT64("hash-udp4", VirtIONet, + rss_data.specified_hash_types, + VIRTIO_NET_HASH_REPORT_UDPv4 - 1, + ON_OFF_AUTO_AUTO), + DEFINE_PROP_ON_OFF_AUTO_BIT64("hash-ipv6", VirtIONet, + rss_data.specified_hash_types, + VIRTIO_NET_HASH_REPORT_IPv6 - 1, + ON_OFF_AUTO_AUTO), + DEFINE_PROP_ON_OFF_AUTO_BIT64("hash-tcp6", VirtIONet, + rss_data.specified_hash_types, + VIRTIO_NET_HASH_REPORT_TCPv6 - 1, + ON_OFF_AUTO_AUTO), + DEFINE_PROP_ON_OFF_AUTO_BIT64("hash-udp6", VirtIONet, + rss_data.specified_hash_types, + VIRTIO_NET_HASH_REPORT_UDPv6 - 1, + ON_OFF_AUTO_AUTO), + DEFINE_PROP_ON_OFF_AUTO_BIT64("hash-ipv6ex", VirtIONet, + rss_data.specified_hash_types, + VIRTIO_NET_HASH_REPORT_IPv6_EX - 1, + ON_OFF_AUTO_AUTO), + DEFINE_PROP_ON_OFF_AUTO_BIT64("hash-tcp6ex", VirtIONet, + rss_data.specified_hash_types, + VIRTIO_NET_HASH_REPORT_TCPv6_EX - 1, + ON_OFF_AUTO_AUTO), + DEFINE_PROP_ON_OFF_AUTO_BIT64("hash-udp6ex", VirtIONet, + rss_data.specified_hash_types, + VIRTIO_NET_HASH_REPORT_UDPv6_EX - 1, + ON_OFF_AUTO_AUTO), }; static void virtio_net_class_init(ObjectClass *klass, const void *data) diff --git a/hw/net/vmxnet3.c b/hw/net/vmxnet3.c index 83d942a..af73aa8 100644 --- a/hw/net/vmxnet3.c +++ b/hw/net/vmxnet3.c @@ -22,7 +22,6 @@ #include "net/tap.h" #include "net/checksum.h" #include "system/system.h" -#include "qemu/bswap.h" #include "qemu/log.h" #include "qemu/module.h" #include "hw/pci/msix.h" @@ -41,19 +40,9 @@ #define PCI_DEVICE_ID_VMWARE_VMXNET3_REVISION 0x1 #define VMXNET3_MSIX_BAR_SIZE 0x2000 -/* Compatibility flags for migration */ -#define VMXNET3_COMPAT_FLAG_OLD_MSI_OFFSETS_BIT 0 -#define VMXNET3_COMPAT_FLAG_OLD_MSI_OFFSETS \ - (1 << VMXNET3_COMPAT_FLAG_OLD_MSI_OFFSETS_BIT) -#define VMXNET3_COMPAT_FLAG_DISABLE_PCIE_BIT 1 -#define VMXNET3_COMPAT_FLAG_DISABLE_PCIE \ - (1 << VMXNET3_COMPAT_FLAG_DISABLE_PCIE_BIT) - #define VMXNET3_EXP_EP_OFFSET (0x48) -#define VMXNET3_MSI_OFFSET(s) \ - ((s)->compat_flags & VMXNET3_COMPAT_FLAG_OLD_MSI_OFFSETS ? 0x50 : 0x84) -#define VMXNET3_MSIX_OFFSET(s) \ - ((s)->compat_flags & VMXNET3_COMPAT_FLAG_OLD_MSI_OFFSETS ? 0 : 0x9c) +#define VMXNET3_MSI_OFFSET (0x84) +#define VMXNET3_MSIX_OFFSET (0x9c) #define VMXNET3_DSN_OFFSET (0x100) #define VMXNET3_BAR0_IDX (0) @@ -61,8 +50,7 @@ #define VMXNET3_MSIX_BAR_IDX (2) #define VMXNET3_OFF_MSIX_TABLE (0x000) -#define VMXNET3_OFF_MSIX_PBA(s) \ - ((s)->compat_flags & VMXNET3_COMPAT_FLAG_OLD_MSI_OFFSETS ? 0x800 : 0x1000) +#define VMXNET3_OFF_MSIX_PBA (0x1000) /* Link speed in Mbps should be shifted by 16 */ #define VMXNET3_LINK_SPEED (1000 << 16) @@ -2122,8 +2110,8 @@ vmxnet3_init_msix(VMXNET3State *s) &s->msix_bar, VMXNET3_MSIX_BAR_IDX, VMXNET3_OFF_MSIX_TABLE, &s->msix_bar, - VMXNET3_MSIX_BAR_IDX, VMXNET3_OFF_MSIX_PBA(s), - VMXNET3_MSIX_OFFSET(s), NULL); + VMXNET3_MSIX_BAR_IDX, VMXNET3_OFF_MSIX_PBA, + VMXNET3_MSIX_OFFSET, NULL); if (0 > res) { VMW_WRPRN("Failed to initialize MSI-X, error %d", res); @@ -2221,7 +2209,7 @@ static void vmxnet3_pci_realize(PCIDevice *pci_dev, Error **errp) /* Interrupt pin A */ pci_dev->config[PCI_INTERRUPT_PIN] = 0x01; - ret = msi_init(pci_dev, VMXNET3_MSI_OFFSET(s), VMXNET3_MAX_NMSIX_INTRS, + ret = msi_init(pci_dev, VMXNET3_MSI_OFFSET, VMXNET3_MAX_NMSIX_INTRS, VMXNET3_USE_64BIT, VMXNET3_PER_VECTOR_MASK, NULL); /* Any error other than -ENOTSUP(board's MSI support is broken) * is a programming error. Fall back to INTx silently on -ENOTSUP */ @@ -2249,6 +2237,7 @@ static void vmxnet3_instance_init(Object *obj) device_add_bootindex_property(obj, &s->conf.bootindex, "bootindex", "/ethernet-phy@0", DEVICE(obj)); + PCI_DEVICE(obj)->cap_present |= QEMU_PCI_CAP_EXPRESS; } static void vmxnet3_pci_uninit(PCIDevice *pci_dev) @@ -2472,30 +2461,12 @@ static const VMStateDescription vmstate_vmxnet3 = { static const Property vmxnet3_properties[] = { DEFINE_NIC_PROPERTIES(VMXNET3State, conf), - DEFINE_PROP_BIT("x-old-msi-offsets", VMXNET3State, compat_flags, - VMXNET3_COMPAT_FLAG_OLD_MSI_OFFSETS_BIT, false), - DEFINE_PROP_BIT("x-disable-pcie", VMXNET3State, compat_flags, - VMXNET3_COMPAT_FLAG_DISABLE_PCIE_BIT, false), }; -static void vmxnet3_realize(DeviceState *qdev, Error **errp) -{ - VMXNET3Class *vc = VMXNET3_DEVICE_GET_CLASS(qdev); - PCIDevice *pci_dev = PCI_DEVICE(qdev); - VMXNET3State *s = VMXNET3(qdev); - - if (!(s->compat_flags & VMXNET3_COMPAT_FLAG_DISABLE_PCIE)) { - pci_dev->cap_present |= QEMU_PCI_CAP_EXPRESS; - } - - vc->parent_dc_realize(qdev, errp); -} - static void vmxnet3_class_init(ObjectClass *class, const void *data) { DeviceClass *dc = DEVICE_CLASS(class); PCIDeviceClass *c = PCI_DEVICE_CLASS(class); - VMXNET3Class *vc = VMXNET3_DEVICE_CLASS(class); c->realize = vmxnet3_pci_realize; c->exit = vmxnet3_pci_uninit; @@ -2506,8 +2477,6 @@ static void vmxnet3_class_init(ObjectClass *class, const void *data) c->class_id = PCI_CLASS_NETWORK_ETHERNET; c->subsystem_vendor_id = PCI_VENDOR_ID_VMWARE; c->subsystem_id = PCI_DEVICE_ID_VMWARE_VMXNET3; - device_class_set_parent_realize(dc, vmxnet3_realize, - &vc->parent_dc_realize); dc->desc = "VMWare Paravirtualized Ethernet v3"; device_class_set_legacy_reset(dc, vmxnet3_qdev_reset); dc->vmsd = &vmstate_vmxnet3; diff --git a/hw/net/vmxnet3.h b/hw/net/vmxnet3.h index f9283f9..dbc69d5 100644 --- a/hw/net/vmxnet3.h +++ b/hw/net/vmxnet3.h @@ -63,8 +63,8 @@ * details. * * You should have received a copy of the GNU General Public License - * along with this program; if not, write to the Free Software - * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. + * along with this program; if not, see + * <https://www.gnu.org/licenses/>. * * The full GNU General Public License is included in this distribution in * the file called "COPYING". diff --git a/hw/net/xgmac.c b/hw/net/xgmac.c index 9c87c4e..d45f872 100644 --- a/hw/net/xgmac.c +++ b/hw/net/xgmac.c @@ -207,7 +207,7 @@ static void xgmac_enet_send(XgmacState *s) struct desc bd; int frame_size; int len; - uint8_t frame[8192]; + QEMU_UNINITIALIZED uint8_t frame[8192]; uint8_t *ptr; ptr = frame; diff --git a/hw/nvme/ctrl.c b/hw/nvme/ctrl.c index fd93550..e764ec7 100644 --- a/hw/nvme/ctrl.c +++ b/hw/nvme/ctrl.c @@ -22,7 +22,7 @@ * * Usage * ----- - * See docs/system/nvme.rst for extensive documentation. + * See docs/system/devices/nvme.rst for extensive documentation. * * Add options: * -drive file=<file>,if=none,id=<drive_id> @@ -1057,7 +1057,8 @@ static uint16_t nvme_map_sgl(NvmeCtrl *n, NvmeSg *sg, NvmeSglDescriptor sgl, */ #define SEG_CHUNK_SIZE 256 - NvmeSglDescriptor segment[SEG_CHUNK_SIZE], *sgld, *last_sgld; + QEMU_UNINITIALIZED NvmeSglDescriptor segment[SEG_CHUNK_SIZE]; + NvmeSglDescriptor *sgld, *last_sgld; uint64_t nsgld; uint32_t seg_len; uint16_t status; @@ -5128,7 +5129,7 @@ static uint16_t nvme_error_info(NvmeCtrl *n, uint8_t rae, uint32_t buf_len, static uint16_t nvme_changed_nslist(NvmeCtrl *n, uint8_t rae, uint32_t buf_len, uint64_t off, NvmeRequest *req) { - uint32_t nslist[1024]; + uint32_t nslist[1024] = {}; uint32_t trans_len; int i = 0; uint32_t nsid; @@ -5138,7 +5139,6 @@ static uint16_t nvme_changed_nslist(NvmeCtrl *n, uint8_t rae, uint32_t buf_len, return NVME_INVALID_FIELD | NVME_DNR; } - memset(nslist, 0x0, sizeof(nslist)); trans_len = MIN(sizeof(nslist) - off, buf_len); while ((nsid = find_first_bit(n->changed_nsids, NVME_CHANGED_NSID_SIZE)) != diff --git a/hw/nvme/subsys.c b/hw/nvme/subsys.c index 38271d7..777e1c6 100644 --- a/hw/nvme/subsys.c +++ b/hw/nvme/subsys.c @@ -226,7 +226,6 @@ static void nvme_subsys_class_init(ObjectClass *oc, const void *data) dc->realize = nvme_subsys_realize; dc->desc = "Virtual NVMe subsystem"; - dc->hotpluggable = false; device_class_set_props(dc, nvme_subsystem_props); } diff --git a/hw/nvram/fw_cfg.c b/hw/nvram/fw_cfg.c index 237b9f7..aa24050 100644 --- a/hw/nvram/fw_cfg.c +++ b/hw/nvram/fw_cfg.c @@ -817,62 +817,6 @@ void fw_cfg_modify_i64(FWCfgState *s, uint16_t key, uint64_t value) g_free(old); } -void fw_cfg_set_order_override(FWCfgState *s, int order) -{ - assert(s->fw_cfg_order_override == 0); - s->fw_cfg_order_override = order; -} - -void fw_cfg_reset_order_override(FWCfgState *s) -{ - assert(s->fw_cfg_order_override != 0); - s->fw_cfg_order_override = 0; -} - -/* - * This is the legacy order list. For legacy systems, files are in - * the fw_cfg in the order defined below, by the "order" value. Note - * that some entries (VGA ROMs, NIC option ROMS, etc.) go into a - * specific area, but there may be more than one and they occur in the - * order that the user specifies them on the command line. Those are - * handled in a special manner, using the order override above. - * - * For non-legacy, the files are sorted by filename to avoid this kind - * of complexity in the future. - * - * This is only for x86, other arches don't implement versioning so - * they won't set legacy mode. - */ -static struct { - const char *name; - int order; -} fw_cfg_order[] = { - { "etc/boot-menu-wait", 10 }, - { "bootsplash.jpg", 11 }, - { "bootsplash.bmp", 12 }, - { "etc/boot-fail-wait", 15 }, - { "etc/smbios/smbios-tables", 20 }, - { "etc/smbios/smbios-anchor", 30 }, - { "etc/e820", 40 }, - { "etc/reserved-memory-end", 50 }, - { "genroms/kvmvapic.bin", 55 }, - { "genroms/linuxboot.bin", 60 }, - { }, /* VGA ROMs from pc_vga_init come here, 70. */ - { }, /* NIC option ROMs from pc_nic_init come here, 80. */ - { "etc/system-states", 90 }, - { }, /* User ROMs come here, 100. */ - { }, /* Device FW comes here, 110. */ - { "etc/extra-pci-roots", 120 }, - { "etc/acpi/tables", 130 }, - { "etc/table-loader", 140 }, - { "etc/tpm/log", 150 }, - { "etc/acpi/rsdp", 160 }, - { "bootorder", 170 }, - { "etc/msr_feature_control", 180 }, - -#define FW_CFG_ORDER_OVERRIDE_LAST 200 -}; - /* * Any sub-page size update to these table MRs will be lost during migration, * as we use aligned size in ram_load_precopy() -> qemu_ram_resize() path. @@ -890,29 +834,6 @@ static void fw_cfg_acpi_mr_save(FWCfgState *s, const char *filename, size_t len) } } -static int get_fw_cfg_order(FWCfgState *s, const char *name) -{ - int i; - - if (s->fw_cfg_order_override > 0) { - return s->fw_cfg_order_override; - } - - for (i = 0; i < ARRAY_SIZE(fw_cfg_order); i++) { - if (fw_cfg_order[i].name == NULL) { - continue; - } - - if (strcmp(name, fw_cfg_order[i].name) == 0) { - return fw_cfg_order[i].order; - } - } - - /* Stick unknown stuff at the end. */ - warn_report("Unknown firmware file in legacy mode: %s", name); - return FW_CFG_ORDER_OVERRIDE_LAST; -} - void fw_cfg_add_file_callback(FWCfgState *s, const char *filename, FWCfgCallback select_cb, FWCfgWriteCallback write_cb, @@ -921,7 +842,6 @@ void fw_cfg_add_file_callback(FWCfgState *s, const char *filename, { int i, index, count; size_t dsize; - MachineClass *mc = MACHINE_GET_CLASS(qdev_get_machine()); int order = 0; if (!s->files) { @@ -933,22 +853,11 @@ void fw_cfg_add_file_callback(FWCfgState *s, const char *filename, count = be32_to_cpu(s->files->count); assert(count < fw_cfg_file_slots(s)); - /* Find the insertion point. */ - if (mc->legacy_fw_cfg_order) { - /* - * Sort by order. For files with the same order, we keep them - * in the sequence in which they were added. - */ - order = get_fw_cfg_order(s, filename); - for (index = count; - index > 0 && order < s->entry_order[index - 1]; - index--); - } else { - /* Sort by file name. */ - for (index = count; - index > 0 && strcmp(filename, s->files->f[index - 1].name) < 0; - index--); - } + /* Find the insertion point, sorting by file name. */ + for (index = count; + index > 0 && strcmp(filename, s->files->f[index - 1].name) < 0; + index--) + ; /* * Move all the entries from the index point and after down one @@ -1058,7 +967,6 @@ bool fw_cfg_add_file_from_generator(FWCfgState *s, static void fw_cfg_machine_reset(void *opaque) { - MachineClass *mc = MACHINE_GET_CLASS(qdev_get_machine()); FWCfgState *s = opaque; void *ptr; size_t len; @@ -1068,11 +976,9 @@ static void fw_cfg_machine_reset(void *opaque) ptr = fw_cfg_modify_file(s, "bootorder", (uint8_t *)buf, len); g_free(ptr); - if (!mc->legacy_fw_cfg_order) { - buf = get_boot_devices_lchs_list(&len); - ptr = fw_cfg_modify_file(s, "bios-geometry", (uint8_t *)buf, len); - g_free(ptr); - } + buf = get_boot_devices_lchs_list(&len); + ptr = fw_cfg_modify_file(s, "bios-geometry", (uint8_t *)buf, len); + g_free(ptr); } static void fw_cfg_machine_ready(struct Notifier *n, void *data) diff --git a/hw/pci-host/Kconfig b/hw/pci-host/Kconfig index 35c0415..9824fa1 100644 --- a/hw/pci-host/Kconfig +++ b/hw/pci-host/Kconfig @@ -54,6 +54,7 @@ config PCI_EXPRESS_Q35 config PCI_EXPRESS_GENERIC_BRIDGE bool select PCI_EXPRESS + imply ACPI_PCI config PCI_EXPRESS_XILINX bool diff --git a/hw/pci-host/gpex-acpi.c b/hw/pci-host/gpex-acpi.c index e8b4c64..952a0ac 100644 --- a/hw/pci-host/gpex-acpi.c +++ b/hw/pci-host/gpex-acpi.c @@ -1,5 +1,6 @@ #include "qemu/osdep.h" #include "hw/acpi/aml-build.h" +#include "hw/acpi/pci.h" #include "hw/pci-host/gpex.h" #include "hw/arm/virt.h" #include "hw/pci/pci_bus.h" @@ -50,61 +51,10 @@ static void acpi_dsdt_add_pci_route_table(Aml *dev, uint32_t irq, } } -static void acpi_dsdt_add_pci_osc(Aml *dev) +static Aml *build_pci_host_bridge_dsm_method(void) { - Aml *method, *UUID, *ifctx, *ifctx1, *elsectx, *buf; - - /* Declare an _OSC (OS Control Handoff) method */ - aml_append(dev, aml_name_decl("SUPP", aml_int(0))); - aml_append(dev, aml_name_decl("CTRL", aml_int(0))); - method = aml_method("_OSC", 4, AML_NOTSERIALIZED); - aml_append(method, - aml_create_dword_field(aml_arg(3), aml_int(0), "CDW1")); - - /* PCI Firmware Specification 3.0 - * 4.5.1. _OSC Interface for PCI Host Bridge Devices - * The _OSC interface for a PCI/PCI-X/PCI Express hierarchy is - * identified by the Universal Unique IDentifier (UUID) - * 33DB4D5B-1FF7-401C-9657-7441C03DD766 - */ - UUID = aml_touuid("33DB4D5B-1FF7-401C-9657-7441C03DD766"); - ifctx = aml_if(aml_equal(aml_arg(0), UUID)); - aml_append(ifctx, - aml_create_dword_field(aml_arg(3), aml_int(4), "CDW2")); - aml_append(ifctx, - aml_create_dword_field(aml_arg(3), aml_int(8), "CDW3")); - aml_append(ifctx, aml_store(aml_name("CDW2"), aml_name("SUPP"))); - aml_append(ifctx, aml_store(aml_name("CDW3"), aml_name("CTRL"))); - - /* - * Allow OS control for all 5 features: - * PCIeHotplug SHPCHotplug PME AER PCIeCapability. - */ - aml_append(ifctx, aml_and(aml_name("CTRL"), aml_int(0x1F), - aml_name("CTRL"))); - - ifctx1 = aml_if(aml_lnot(aml_equal(aml_arg(1), aml_int(0x1)))); - aml_append(ifctx1, aml_or(aml_name("CDW1"), aml_int(0x08), - aml_name("CDW1"))); - aml_append(ifctx, ifctx1); - - ifctx1 = aml_if(aml_lnot(aml_equal(aml_name("CDW3"), aml_name("CTRL")))); - aml_append(ifctx1, aml_or(aml_name("CDW1"), aml_int(0x10), - aml_name("CDW1"))); - aml_append(ifctx, ifctx1); - - aml_append(ifctx, aml_store(aml_name("CTRL"), aml_name("CDW3"))); - aml_append(ifctx, aml_return(aml_arg(3))); - aml_append(method, ifctx); - - elsectx = aml_else(); - aml_append(elsectx, aml_or(aml_name("CDW1"), aml_int(4), - aml_name("CDW1"))); - aml_append(elsectx, aml_return(aml_arg(3))); - aml_append(method, elsectx); - aml_append(dev, method); - - method = aml_method("_DSM", 4, AML_NOTSERIALIZED); + Aml *method = aml_method("_DSM", 4, AML_NOTSERIALIZED); + Aml *UUID, *ifctx, *ifctx1, *buf; /* PCI Firmware Specification 3.0 * 4.6.1. _DSM for PCI Express Slot Information @@ -123,7 +73,16 @@ static void acpi_dsdt_add_pci_osc(Aml *dev) byte_list[0] = 0; buf = aml_buffer(1, byte_list); aml_append(method, aml_return(buf)); - aml_append(dev, method); + return method; +} + +static void acpi_dsdt_add_host_bridge_methods(Aml *dev, + bool enable_native_pcie_hotplug) +{ + /* Declare an _OSC (OS Control Handoff) method */ + aml_append(dev, + build_pci_host_bridge_osc_method(enable_native_pcie_hotplug)); + aml_append(dev, build_pci_host_bridge_dsm_method()); } void acpi_dsdt_add_gpex(Aml *scope, struct GPEXConfig *cfg) @@ -182,7 +141,7 @@ void acpi_dsdt_add_gpex(Aml *scope, struct GPEXConfig *cfg) /* * Resources defined for PXBs are composed of the following parts: - * 1. The resources the pci-brige/pcie-root-port need. + * 1. The resources the pci-bridge/pcie-root-port need. * 2. The resources the devices behind pxb need. */ crs = build_crs(PCI_HOST_BRIDGE(BUS(bus)->parent), &crs_range_set, @@ -192,7 +151,8 @@ void acpi_dsdt_add_gpex(Aml *scope, struct GPEXConfig *cfg) if (is_cxl) { build_cxl_osc_method(dev); } else { - acpi_dsdt_add_pci_osc(dev); + /* pxb bridges do not have ACPI PCI Hot-plug enabled */ + acpi_dsdt_add_host_bridge_methods(dev, true); } aml_append(scope, dev); @@ -267,7 +227,7 @@ void acpi_dsdt_add_gpex(Aml *scope, struct GPEXConfig *cfg) } aml_append(dev, aml_name_decl("_CRS", rbuf)); - acpi_dsdt_add_pci_osc(dev); + acpi_dsdt_add_host_bridge_methods(dev, cfg->pci_native_hotplug); Aml *dev_res0 = aml_device("%s", "RES0"); aml_append(dev_res0, aml_name_decl("_HID", aml_string("PNP0C02"))); diff --git a/hw/pci-host/gt64120.c b/hw/pci-host/gt64120.c index 56a6ef9..b1d96f6 100644 --- a/hw/pci-host/gt64120.c +++ b/hw/pci-host/gt64120.c @@ -28,6 +28,7 @@ #include "qapi/error.h" #include "qemu/units.h" #include "qemu/log.h" +#include "qemu/bswap.h" #include "hw/qdev-properties.h" #include "hw/registerfields.h" #include "hw/pci/pci_device.h" @@ -320,38 +321,6 @@ static void gt64120_isd_mapping(GT64120State *s) memory_region_transaction_commit(); } -static void gt64120_update_pci_cfgdata_mapping(GT64120State *s) -{ - /* Indexed on MByteSwap bit, see Table 158: PCI_0 Command, Offset: 0xc00 */ - static const MemoryRegionOps *pci_host_data_ops[] = { - &pci_host_data_be_ops, &pci_host_data_le_ops - }; - PCIHostState *phb = PCI_HOST_BRIDGE(s); - - memory_region_transaction_begin(); - - /* - * The setting of the MByteSwap bit and MWordSwap bit in the PCI Internal - * Command Register determines how data transactions from the CPU to/from - * PCI are handled along with the setting of the Endianness bit in the CPU - * Configuration Register. See: - * - Table 16: 32-bit PCI Transaction Endianness - * - Table 158: PCI_0 Command, Offset: 0xc00 - */ - - if (memory_region_is_mapped(&phb->data_mem)) { - memory_region_del_subregion(&s->ISD_mem, &phb->data_mem); - object_unparent(OBJECT(&phb->data_mem)); - } - memory_region_init_io(&phb->data_mem, OBJECT(phb), - pci_host_data_ops[s->regs[GT_PCI0_CMD] & 1], - s, "pci-conf-data", 4); - memory_region_add_subregion_overlap(&s->ISD_mem, GT_PCI0_CFGDATA << 2, - &phb->data_mem, 1); - - memory_region_transaction_commit(); -} - static void gt64120_pci_mapping(GT64120State *s) { memory_region_transaction_begin(); @@ -645,7 +614,6 @@ static void gt64120_writel(void *opaque, hwaddr addr, case GT_PCI0_CMD: case GT_PCI1_CMD: s->regs[saddr] = val & 0x0401fc0f; - gt64120_update_pci_cfgdata_mapping(s); break; case GT_PCI0_TOR: case GT_PCI0_BS_SCS10: @@ -1024,6 +992,48 @@ static const MemoryRegionOps isd_mem_ops = { }, }; +static bool bswap(const GT64120State *s) +{ + PCIHostState *phb = PCI_HOST_BRIDGE(s); + /*check for bus == 0 && device == 0, Bits 11:15 = Device , Bits 16:23 = Bus*/ + bool is_phb_dev0 = extract32(phb->config_reg, 11, 13) == 0; + bool le_mode = FIELD_EX32(s->regs[GT_PCI0_CMD], GT_PCI0_CMD, MByteSwap); + /* Only swap for non-bridge devices in big-endian mode */ + return !le_mode && !is_phb_dev0; +} + +static uint64_t gt64120_pci_data_read(void *opaque, hwaddr addr, unsigned size) +{ + GT64120State *s = opaque; + uint32_t val = pci_host_data_le_ops.read(opaque, addr, size); + + if (bswap(s)) { + val = bswap32(val); + } + return val; +} + +static void gt64120_pci_data_write(void *opaque, hwaddr addr, + uint64_t val, unsigned size) +{ + GT64120State *s = opaque; + + if (bswap(s)) { + val = bswap32(val); + } + pci_host_data_le_ops.write(opaque, addr, val, size); +} + +static const MemoryRegionOps gt64120_pci_data_ops = { + .read = gt64120_pci_data_read, + .write = gt64120_pci_data_write, + .endianness = DEVICE_LITTLE_ENDIAN, + .valid = { + .min_access_size = 4, + .max_access_size = 4, + }, +}; + static void gt64120_reset(DeviceState *dev) { GT64120State *s = GT64120_PCI_HOST_BRIDGE(dev); @@ -1178,7 +1188,6 @@ static void gt64120_reset(DeviceState *dev) gt64120_isd_mapping(s); gt64120_pci_mapping(s); - gt64120_update_pci_cfgdata_mapping(s); } static void gt64120_realize(DeviceState *dev, Error **errp) @@ -1202,6 +1211,12 @@ static void gt64120_realize(DeviceState *dev, Error **errp) memory_region_add_subregion_overlap(&s->ISD_mem, GT_PCI0_CFGADDR << 2, &phb->conf_mem, 1); + memory_region_init_io(&phb->data_mem, OBJECT(phb), + >64120_pci_data_ops, + s, "pci-conf-data", 4); + memory_region_add_subregion_overlap(&s->ISD_mem, GT_PCI0_CFGDATA << 2, + &phb->data_mem, 1); + /* * The whole address space decoded by the GT-64120A doesn't generate diff --git a/hw/pci-host/pnv_phb3.c b/hw/pci-host/pnv_phb3.c index a4335f4..5d8383f 100644 --- a/hw/pci-host/pnv_phb3.c +++ b/hw/pci-host/pnv_phb3.c @@ -8,6 +8,7 @@ */ #include "qemu/osdep.h" #include "qemu/log.h" +#include "qemu/bswap.h" #include "qapi/visitor.h" #include "qapi/error.h" #include "hw/pci-host/pnv_phb3_regs.h" diff --git a/hw/pci-host/pnv_phb4.c b/hw/pci-host/pnv_phb4.c index 77ea352..1899205 100644 --- a/hw/pci-host/pnv_phb4.c +++ b/hw/pci-host/pnv_phb4.c @@ -8,6 +8,7 @@ */ #include "qemu/osdep.h" #include "qemu/log.h" +#include "qemu/bswap.h" #include "qapi/visitor.h" #include "qapi/error.h" #include "target/ppc/cpu.h" diff --git a/hw/pci-host/ppce500.c b/hw/pci-host/ppce500.c index e97a515..975d191 100644 --- a/hw/pci-host/ppce500.c +++ b/hw/pci-host/ppce500.c @@ -16,12 +16,10 @@ #include "qemu/osdep.h" #include "hw/irq.h" -#include "hw/ppc/e500-ccsr.h" #include "hw/qdev-properties.h" #include "migration/vmstate.h" #include "hw/pci/pci_device.h" #include "hw/pci/pci_host.h" -#include "qemu/bswap.h" #include "hw/pci-host/ppce500.h" #include "qom/object.h" @@ -418,11 +416,12 @@ static const VMStateDescription vmstate_ppce500_pci = { static void e500_pcihost_bridge_realize(PCIDevice *d, Error **errp) { PPCE500PCIBridgeState *b = PPC_E500_PCI_BRIDGE(d); - PPCE500CCSRState *ccsr = CCSR( + SysBusDevice *ccsr = SYS_BUS_DEVICE( object_resolve_path_component(qdev_get_machine(), "e500-ccsr")); + MemoryRegion *ccsr_space = sysbus_mmio_get_region(ccsr, 0); - memory_region_init_alias(&b->bar0, OBJECT(ccsr), "e500-pci-bar0", &ccsr->ccsr_space, - 0, int128_get64(ccsr->ccsr_space.size)); + memory_region_init_alias(&b->bar0, OBJECT(ccsr), "e500-pci-bar0", + ccsr_space, 0, int128_get64(ccsr_space->size)); pci_register_bar(d, 0, PCI_BASE_ADDRESS_SPACE_MEMORY, &b->bar0); } diff --git a/hw/pci-host/raven.c b/hw/pci-host/raven.c index 21f7ca6..f8c0be5 100644 --- a/hw/pci-host/raven.c +++ b/hw/pci-host/raven.c @@ -24,7 +24,6 @@ */ #include "qemu/osdep.h" -#include "qemu/datadir.h" #include "qemu/units.h" #include "qemu/log.h" #include "qapi/error.h" @@ -35,9 +34,7 @@ #include "migration/vmstate.h" #include "hw/intc/i8259.h" #include "hw/irq.h" -#include "hw/loader.h" #include "hw/or-irq.h" -#include "elf.h" #include "qom/object.h" #define TYPE_RAVEN_PCI_DEVICE "raven" @@ -47,10 +44,6 @@ OBJECT_DECLARE_SIMPLE_TYPE(RavenPCIState, RAVEN_PCI_DEVICE) struct RavenPCIState { PCIDevice dev; - - uint32_t elf_machine; - char *bios_name; - MemoryRegion bios; }; typedef struct PRePPCIState PREPPCIState; @@ -75,11 +68,8 @@ struct PRePPCIState { RavenPCIState pci_dev; int contiguous_map; - bool is_legacy_prep; }; -#define BIOS_SIZE (1 * MiB) - #define PCI_IO_BASE_ADDR 0x80000000 /* Physical address on main bus */ static inline uint32_t raven_pci_io_config(hwaddr addr) @@ -243,22 +233,18 @@ static void raven_pcihost_realizefn(DeviceState *d, Error **errp) MemoryRegion *address_space_mem = get_system_memory(); int i; - if (s->is_legacy_prep) { - for (i = 0; i < PCI_NUM_PINS; i++) { - sysbus_init_irq(dev, &s->pci_irqs[i]); - } - } else { - /* According to PReP specification section 6.1.6 "System Interrupt - * Assignments", all PCI interrupts are routed via IRQ 15 */ - s->or_irq = OR_IRQ(object_new(TYPE_OR_IRQ)); - object_property_set_int(OBJECT(s->or_irq), "num-lines", PCI_NUM_PINS, - &error_fatal); - qdev_realize(DEVICE(s->or_irq), NULL, &error_fatal); - sysbus_init_irq(dev, &s->or_irq->out_irq); - - for (i = 0; i < PCI_NUM_PINS; i++) { - s->pci_irqs[i] = qdev_get_gpio_in(DEVICE(s->or_irq), i); - } + /* + * According to PReP specification section 6.1.6 "System Interrupt + * Assignments", all PCI interrupts are routed via IRQ 15 + */ + s->or_irq = OR_IRQ(object_new(TYPE_OR_IRQ)); + object_property_set_int(OBJECT(s->or_irq), "num-lines", PCI_NUM_PINS, + &error_fatal); + qdev_realize(DEVICE(s->or_irq), NULL, &error_fatal); + sysbus_init_irq(dev, &s->or_irq->out_irq); + + for (i = 0; i < PCI_NUM_PINS; i++) { + s->pci_irqs[i] = qdev_get_gpio_in(DEVICE(s->or_irq), i); } qdev_init_gpio_in(d, raven_change_gpio, 1); @@ -338,48 +324,9 @@ static void raven_pcihost_initfn(Object *obj) static void raven_realize(PCIDevice *d, Error **errp) { - RavenPCIState *s = RAVEN_PCI_DEVICE(d); - char *filename; - int bios_size = -1; - d->config[PCI_CACHE_LINE_SIZE] = 0x08; d->config[PCI_LATENCY_TIMER] = 0x10; d->config[PCI_CAPABILITY_LIST] = 0x00; - - if (!memory_region_init_rom_nomigrate(&s->bios, OBJECT(s), "bios", - BIOS_SIZE, errp)) { - return; - } - memory_region_add_subregion(get_system_memory(), (uint32_t)(-BIOS_SIZE), - &s->bios); - if (s->bios_name) { - filename = qemu_find_file(QEMU_FILE_TYPE_BIOS, s->bios_name); - if (filename) { - if (s->elf_machine != EM_NONE) { - bios_size = load_elf(filename, NULL, NULL, NULL, NULL, - NULL, NULL, NULL, - ELFDATA2MSB, s->elf_machine, 0, 0); - } - if (bios_size < 0) { - bios_size = get_image_size(filename); - if (bios_size > 0 && bios_size <= BIOS_SIZE) { - hwaddr bios_addr; - bios_size = (bios_size + 0xfff) & ~0xfff; - bios_addr = (uint32_t)(-BIOS_SIZE); - bios_size = load_image_targphys(filename, bios_addr, - bios_size); - } - } - } - g_free(filename); - if (bios_size < 0 || bios_size > BIOS_SIZE) { - memory_region_del_subregion(get_system_memory(), &s->bios); - error_setg(errp, "Could not load bios image '%s'", s->bios_name); - return; - } - } - - vmstate_register_ram_global(&s->bios); } static const VMStateDescription vmstate_raven = { @@ -422,22 +369,12 @@ static const TypeInfo raven_info = { }, }; -static const Property raven_pcihost_properties[] = { - DEFINE_PROP_UINT32("elf-machine", PREPPCIState, pci_dev.elf_machine, - EM_NONE), - DEFINE_PROP_STRING("bios-name", PREPPCIState, pci_dev.bios_name), - /* Temporary workaround until legacy prep machine is removed */ - DEFINE_PROP_BOOL("is-legacy-prep", PREPPCIState, is_legacy_prep, - false), -}; - static void raven_pcihost_class_init(ObjectClass *klass, const void *data) { DeviceClass *dc = DEVICE_CLASS(klass); set_bit(DEVICE_CATEGORY_BRIDGE, dc->categories); dc->realize = raven_pcihost_realizefn; - device_class_set_props(dc, raven_pcihost_properties); dc->fw_name = "pci"; } diff --git a/hw/pci-host/sh_pci.c b/hw/pci-host/sh_pci.c index de8f6a8..62fb945 100644 --- a/hw/pci-host/sh_pci.c +++ b/hw/pci-host/sh_pci.c @@ -28,7 +28,6 @@ #include "hw/irq.h" #include "hw/pci/pci_device.h" #include "hw/pci/pci_host.h" -#include "qemu/bswap.h" #include "qemu/module.h" #include "qom/object.h" diff --git a/hw/pci/msix.c b/hw/pci/msix.c index 66f27b9..8c7f670 100644 --- a/hw/pci/msix.c +++ b/hw/pci/msix.c @@ -72,7 +72,7 @@ static uint8_t *msix_pending_byte(PCIDevice *dev, int vector) return dev->msix_pba + vector / 8; } -static int msix_is_pending(PCIDevice *dev, int vector) +int msix_is_pending(PCIDevice *dev, unsigned int vector) { return *msix_pending_byte(dev, vector) & msix_pending_mask(vector); } diff --git a/hw/pci/pci.c b/hw/pci/pci.c index fe38c4c..c70b5ce 100644 --- a/hw/pci/pci.c +++ b/hw/pci/pci.c @@ -32,6 +32,7 @@ #include "hw/pci/pci_host.h" #include "hw/qdev-properties.h" #include "hw/qdev-properties-system.h" +#include "migration/cpr.h" #include "migration/qemu-file-types.h" #include "migration/vmstate.h" #include "net/net.h" @@ -54,13 +55,6 @@ #include "hw/xen/xen.h" #include "hw/i386/kvm/xen_evtchn.h" -//#define DEBUG_PCI -#ifdef DEBUG_PCI -# define PCI_DPRINTF(format, ...) printf(format, ## __VA_ARGS__) -#else -# define PCI_DPRINTF(format, ...) do { } while (0) -#endif - bool pci_available = true; static char *pcibus_get_dev_path(DeviceState *dev); @@ -101,6 +95,7 @@ static const Property pci_props[] = { QEMU_PCIE_ARI_NEXTFN_1_BITNR, false), DEFINE_PROP_SIZE32("x-max-bounce-buffer-size", PCIDevice, max_bounce_buffer_size, DEFAULT_MAX_BOUNCE_BUFFER_SIZE), + DEFINE_PROP_STRING("sriov-pf", PCIDevice, sriov_pf), DEFINE_PROP_BIT("x-pcie-ext-tag", PCIDevice, cap_present, QEMU_PCIE_EXT_TAG_BITNR, true), { .name = "busnr", .info = &prop_pci_busnr }, @@ -134,6 +129,12 @@ static GSequence *pci_acpi_index_list(void) return used_acpi_index_list; } +static void pci_set_master(PCIDevice *d, bool enable) +{ + memory_region_set_enabled(&d->bus_master_enable_region, enable); + d->is_master = enable; /* cache the status */ +} + static void pci_init_bus_master(PCIDevice *pci_dev) { AddressSpace *dma_as = pci_device_iommu_address_space(pci_dev); @@ -141,7 +142,7 @@ static void pci_init_bus_master(PCIDevice *pci_dev) memory_region_init_alias(&pci_dev->bus_master_enable_region, OBJECT(pci_dev), "bus master", dma_as->root, 0, memory_region_size(dma_as->root)); - memory_region_set_enabled(&pci_dev->bus_master_enable_region, false); + pci_set_master(pci_dev, false); memory_region_add_subregion(&pci_dev->bus_master_container_region, 0, &pci_dev->bus_master_enable_region); } @@ -537,6 +538,10 @@ static void pci_reset_regions(PCIDevice *dev) static void pci_do_device_reset(PCIDevice *dev) { + if ((dev->cap_present & QEMU_PCI_SKIP_RESET_ON_CPR) && cpr_is_incoming()) { + return; + } + pci_device_deassert_intx(dev); assert(dev->irq_state == 0); @@ -810,9 +815,8 @@ static int get_pci_config_device(QEMUFile *f, void *pv, size_t size, pci_bridge_update_mappings(PCI_BRIDGE(s)); } - memory_region_set_enabled(&s->bus_master_enable_region, - pci_get_word(s->config + PCI_COMMAND) - & PCI_COMMAND_MASTER); + pci_set_master(s, pci_get_word(s->config + PCI_COMMAND) + & PCI_COMMAND_MASTER); g_free(config); return 0; @@ -1112,13 +1116,8 @@ static void pci_init_multifunction(PCIBus *bus, PCIDevice *dev, Error **errp) dev->config[PCI_HEADER_TYPE] |= PCI_HEADER_TYPE_MULTI_FUNCTION; } - /* - * With SR/IOV and ARI, a device at function 0 need not be a multifunction - * device, as it may just be a VF that ended up with function 0 in - * the legacy PCI interpretation. Avoid failing in such cases: - */ - if (pci_is_vf(dev) && - dev->exp.sriov_vf.pf->cap_present & QEMU_PCI_CAP_MULTIFUNCTION) { + /* SR/IOV is not handled here. */ + if (pci_is_vf(dev)) { return; } @@ -1151,7 +1150,8 @@ static void pci_init_multifunction(PCIBus *bus, PCIDevice *dev, Error **errp) } /* function 0 indicates single function, so function > 0 must be NULL */ for (func = 1; func < PCI_FUNC_MAX; ++func) { - if (bus->devices[PCI_DEVFN(slot, func)]) { + PCIDevice *device = bus->devices[PCI_DEVFN(slot, func)]; + if (device && !pci_is_vf(device)) { error_setg(errp, "PCI: %x.0 indicates single function, " "but %x.%x is already populated.", slot, slot, func); @@ -1439,6 +1439,7 @@ static void pci_qdev_unrealize(DeviceState *dev) pci_unregister_io_regions(pci_dev); pci_del_option_rom(pci_dev); + pcie_sriov_unregister_device(pci_dev); if (pc->exit) { pc->exit(pci_dev); @@ -1470,7 +1471,6 @@ void pci_register_bar(PCIDevice *pci_dev, int region_num, pcibus_t size = memory_region_size(memory); uint8_t hdr_type; - assert(!pci_is_vf(pci_dev)); /* VFs must use pcie_sriov_vf_register_bar */ assert(region_num >= 0); assert(region_num < PCI_NUM_REGIONS); assert(is_power_of_2(size)); @@ -1482,7 +1482,6 @@ void pci_register_bar(PCIDevice *pci_dev, int region_num, r = &pci_dev->io_regions[region_num]; assert(!r->size); - r->addr = PCI_BAR_UNMAPPED; r->size = size; r->type = type; r->memory = memory; @@ -1490,22 +1489,35 @@ void pci_register_bar(PCIDevice *pci_dev, int region_num, ? pci_get_bus(pci_dev)->address_space_io : pci_get_bus(pci_dev)->address_space_mem; - wmask = ~(size - 1); - if (region_num == PCI_ROM_SLOT) { - /* ROM enable bit is writable */ - wmask |= PCI_ROM_ADDRESS_ENABLE; - } - - addr = pci_bar(pci_dev, region_num); - pci_set_long(pci_dev->config + addr, type); + if (pci_is_vf(pci_dev)) { + PCIDevice *pf = pci_dev->exp.sriov_vf.pf; + assert(!pf || type == pf->exp.sriov_pf.vf_bar_type[region_num]); - if (!(r->type & PCI_BASE_ADDRESS_SPACE_IO) && - r->type & PCI_BASE_ADDRESS_MEM_TYPE_64) { - pci_set_quad(pci_dev->wmask + addr, wmask); - pci_set_quad(pci_dev->cmask + addr, ~0ULL); + r->addr = pci_bar_address(pci_dev, region_num, r->type, r->size); + if (r->addr != PCI_BAR_UNMAPPED) { + memory_region_add_subregion_overlap(r->address_space, + r->addr, r->memory, 1); + } } else { - pci_set_long(pci_dev->wmask + addr, wmask & 0xffffffff); - pci_set_long(pci_dev->cmask + addr, 0xffffffff); + r->addr = PCI_BAR_UNMAPPED; + + wmask = ~(size - 1); + if (region_num == PCI_ROM_SLOT) { + /* ROM enable bit is writable */ + wmask |= PCI_ROM_ADDRESS_ENABLE; + } + + addr = pci_bar(pci_dev, region_num); + pci_set_long(pci_dev->config + addr, type); + + if (!(r->type & PCI_BASE_ADDRESS_SPACE_IO) && + r->type & PCI_BASE_ADDRESS_MEM_TYPE_64) { + pci_set_quad(pci_dev->wmask + addr, wmask); + pci_set_quad(pci_dev->cmask + addr, ~0ULL); + } else { + pci_set_long(pci_dev->wmask + addr, wmask & 0xffffffff); + pci_set_long(pci_dev->cmask + addr, 0xffffffff); + } } } @@ -1594,7 +1606,11 @@ static pcibus_t pci_config_get_bar_addr(PCIDevice *d, int reg, pci_get_word(pf->config + sriov_cap + PCI_SRIOV_VF_OFFSET); uint16_t vf_stride = pci_get_word(pf->config + sriov_cap + PCI_SRIOV_VF_STRIDE); - uint32_t vf_num = (d->devfn - (pf->devfn + vf_offset)) / vf_stride; + uint32_t vf_num = d->devfn - (pf->devfn + vf_offset); + + if (vf_num) { + vf_num /= vf_stride; + } if (type & PCI_BASE_ADDRESS_MEM_TYPE_64) { new_addr = pci_get_quad(pf->config + bar); @@ -1719,7 +1735,7 @@ static void pci_update_mappings(PCIDevice *d) pci_update_vga(d); } -static inline int pci_irq_disabled(PCIDevice *d) +int pci_irq_disabled(PCIDevice *d) { return pci_get_word(d->config + PCI_COMMAND) & PCI_COMMAND_INTX_DISABLE; } @@ -1781,9 +1797,8 @@ void pci_default_write_config(PCIDevice *d, uint32_t addr, uint32_t val_in, int if (ranges_overlap(addr, l, PCI_COMMAND, 2)) { pci_update_irq_disabled(d, was_irq_disabled); - memory_region_set_enabled(&d->bus_master_enable_region, - (pci_get_word(d->config + PCI_COMMAND) - & PCI_COMMAND_MASTER) && d->enabled); + pci_set_master(d, (pci_get_word(d->config + PCI_COMMAND) & + PCI_COMMAND_MASTER) && d->enabled); } msi_write_config(d, addr, val_in, l); @@ -2268,6 +2283,11 @@ static void pci_qdev_realize(DeviceState *qdev, Error **errp) } } + if (!pcie_sriov_register_device(pci_dev, errp)) { + pci_qdev_unrealize(DEVICE(pci_dev)); + return; + } + /* * A PCIe Downstream Port that do not have ARI Forwarding enabled must * associate only Device 0 with the device attached to the bus @@ -2439,12 +2459,12 @@ static void pci_patch_ids(PCIDevice *pdev, uint8_t *ptr, uint32_t size) /* Only a valid rom will be patched. */ rom_magic = pci_get_word(ptr); if (rom_magic != 0xaa55) { - PCI_DPRINTF("Bad ROM magic %04x\n", rom_magic); + trace_pci_bad_rom_magic(rom_magic, 0xaa55); return; } pcir_offset = pci_get_word(ptr + 0x18); if (pcir_offset + 8 >= size || memcmp(ptr + pcir_offset, "PCIR", 4)) { - PCI_DPRINTF("Bad PCIR offset 0x%x or signature\n", pcir_offset); + trace_pci_bad_pcir_offset(pcir_offset); return; } @@ -2453,8 +2473,8 @@ static void pci_patch_ids(PCIDevice *pdev, uint8_t *ptr, uint32_t size) rom_vendor_id = pci_get_word(ptr + pcir_offset + 4); rom_device_id = pci_get_word(ptr + pcir_offset + 6); - PCI_DPRINTF("%s: ROM id %04x%04x / PCI id %04x%04x\n", pdev->romfile, - vendor_id, device_id, rom_vendor_id, rom_device_id); + trace_pci_rom_and_pci_ids(pdev->romfile, vendor_id, device_id, + rom_vendor_id, rom_device_id); checksum = ptr[6]; @@ -2462,7 +2482,7 @@ static void pci_patch_ids(PCIDevice *pdev, uint8_t *ptr, uint32_t size) /* Patch vendor id and checksum (at offset 6 for etherboot roms). */ checksum += (uint8_t)rom_vendor_id + (uint8_t)(rom_vendor_id >> 8); checksum -= (uint8_t)vendor_id + (uint8_t)(vendor_id >> 8); - PCI_DPRINTF("ROM checksum %02x / %02x\n", ptr[6], checksum); + trace_pci_rom_checksum_change(ptr[6], checksum); ptr[6] = checksum; pci_set_word(ptr + pcir_offset + 4, vendor_id); } @@ -2471,7 +2491,7 @@ static void pci_patch_ids(PCIDevice *pdev, uint8_t *ptr, uint32_t size) /* Patch device id and checksum (at offset 6 for etherboot roms). */ checksum += (uint8_t)rom_device_id + (uint8_t)(rom_device_id >> 8); checksum -= (uint8_t)device_id + (uint8_t)(device_id >> 8); - PCI_DPRINTF("ROM checksum %02x / %02x\n", ptr[6], checksum); + trace_pci_rom_checksum_change(ptr[6], checksum); ptr[6] = checksum; pci_set_word(ptr + pcir_offset + 6, device_id); } @@ -2522,6 +2542,14 @@ static void pci_add_option_rom(PCIDevice *pdev, bool is_default_rom, return; } + if (pci_is_vf(pdev)) { + if (pdev->rom_bar > 0) { + error_setg(errp, "ROM BAR cannot be enabled for SR-IOV VF"); + } + + return; + } + if (load_file || pdev->romsize == UINT32_MAX) { path = qemu_find_file(QEMU_FILE_TYPE_BIOS, pdev->romfile); if (path == NULL) { @@ -2916,6 +2944,23 @@ AddressSpace *pci_device_iommu_address_space(PCIDevice *dev) return &address_space_memory; } +int pci_iommu_init_iotlb_notifier(PCIDevice *dev, IOMMUNotifier *n, + IOMMUNotify fn, void *opaque) +{ + PCIBus *bus; + PCIBus *iommu_bus; + int devfn; + + pci_device_get_iommu_bus_devfn(dev, &bus, &iommu_bus, &devfn); + if (iommu_bus && iommu_bus->iommu_ops->init_iotlb_notifier) { + iommu_bus->iommu_ops->init_iotlb_notifier(bus, iommu_bus->iommu_opaque, + devfn, n, fn, opaque); + return 0; + } + + return -ENODEV; +} + bool pci_device_set_iommu_device(PCIDevice *dev, HostIOMMUDevice *hiod, Error **errp) { @@ -2947,6 +2992,170 @@ void pci_device_unset_iommu_device(PCIDevice *dev) } } +int pci_pri_request_page(PCIDevice *dev, uint32_t pasid, bool priv_req, + bool exec_req, hwaddr addr, bool lpig, + uint16_t prgi, bool is_read, bool is_write) +{ + PCIBus *bus; + PCIBus *iommu_bus; + int devfn; + + if (!dev->is_master || + ((pasid != PCI_NO_PASID) && !pcie_pasid_enabled(dev))) { + return -EPERM; + } + + if (!pcie_pri_enabled(dev)) { + return -EPERM; + } + + pci_device_get_iommu_bus_devfn(dev, &bus, &iommu_bus, &devfn); + if (iommu_bus && iommu_bus->iommu_ops->pri_request_page) { + return iommu_bus->iommu_ops->pri_request_page(bus, + iommu_bus->iommu_opaque, + devfn, pasid, priv_req, + exec_req, addr, lpig, prgi, + is_read, is_write); + } + + return -ENODEV; +} + +int pci_pri_register_notifier(PCIDevice *dev, uint32_t pasid, + IOMMUPRINotifier *notifier) +{ + PCIBus *bus; + PCIBus *iommu_bus; + int devfn; + + if (!dev->is_master || + ((pasid != PCI_NO_PASID) && !pcie_pasid_enabled(dev))) { + return -EPERM; + } + + pci_device_get_iommu_bus_devfn(dev, &bus, &iommu_bus, &devfn); + if (iommu_bus && iommu_bus->iommu_ops->pri_register_notifier) { + iommu_bus->iommu_ops->pri_register_notifier(bus, + iommu_bus->iommu_opaque, + devfn, pasid, notifier); + return 0; + } + + return -ENODEV; +} + +void pci_pri_unregister_notifier(PCIDevice *dev, uint32_t pasid) +{ + PCIBus *bus; + PCIBus *iommu_bus; + int devfn; + + pci_device_get_iommu_bus_devfn(dev, &bus, &iommu_bus, &devfn); + if (iommu_bus && iommu_bus->iommu_ops->pri_unregister_notifier) { + iommu_bus->iommu_ops->pri_unregister_notifier(bus, + iommu_bus->iommu_opaque, + devfn, pasid); + } +} + +ssize_t pci_ats_request_translation(PCIDevice *dev, uint32_t pasid, + bool priv_req, bool exec_req, + hwaddr addr, size_t length, + bool no_write, IOMMUTLBEntry *result, + size_t result_length, + uint32_t *err_count) +{ + PCIBus *bus; + PCIBus *iommu_bus; + int devfn; + + if (!dev->is_master || + ((pasid != PCI_NO_PASID) && !pcie_pasid_enabled(dev))) { + return -EPERM; + } + + if (result_length == 0) { + return -ENOSPC; + } + + if (!pcie_ats_enabled(dev)) { + return -EPERM; + } + + pci_device_get_iommu_bus_devfn(dev, &bus, &iommu_bus, &devfn); + if (iommu_bus && iommu_bus->iommu_ops->ats_request_translation) { + return iommu_bus->iommu_ops->ats_request_translation(bus, + iommu_bus->iommu_opaque, + devfn, pasid, priv_req, + exec_req, addr, length, + no_write, result, + result_length, err_count); + } + + return -ENODEV; +} + +int pci_iommu_register_iotlb_notifier(PCIDevice *dev, uint32_t pasid, + IOMMUNotifier *n) +{ + PCIBus *bus; + PCIBus *iommu_bus; + int devfn; + + if ((pasid != PCI_NO_PASID) && !pcie_pasid_enabled(dev)) { + return -EPERM; + } + + pci_device_get_iommu_bus_devfn(dev, &bus, &iommu_bus, &devfn); + if (iommu_bus && iommu_bus->iommu_ops->register_iotlb_notifier) { + iommu_bus->iommu_ops->register_iotlb_notifier(bus, + iommu_bus->iommu_opaque, devfn, + pasid, n); + return 0; + } + + return -ENODEV; +} + +int pci_iommu_unregister_iotlb_notifier(PCIDevice *dev, uint32_t pasid, + IOMMUNotifier *n) +{ + PCIBus *bus; + PCIBus *iommu_bus; + int devfn; + + if ((pasid != PCI_NO_PASID) && !pcie_pasid_enabled(dev)) { + return -EPERM; + } + + pci_device_get_iommu_bus_devfn(dev, &bus, &iommu_bus, &devfn); + if (iommu_bus && iommu_bus->iommu_ops->unregister_iotlb_notifier) { + iommu_bus->iommu_ops->unregister_iotlb_notifier(bus, + iommu_bus->iommu_opaque, + devfn, pasid, n); + return 0; + } + + return -ENODEV; +} + +int pci_iommu_get_iotlb_info(PCIDevice *dev, uint8_t *addr_width, + uint32_t *min_page_size) +{ + PCIBus *bus; + PCIBus *iommu_bus; + int devfn; + + pci_device_get_iommu_bus_devfn(dev, &bus, &iommu_bus, &devfn); + if (iommu_bus && iommu_bus->iommu_ops->get_iotlb_info) { + iommu_bus->iommu_ops->get_iotlb_info(iommu_bus->iommu_opaque, + addr_width, min_page_size); + return 0; + } + + return -ENODEV; +} + void pci_setup_iommu(PCIBus *bus, const PCIIOMMUOps *ops, void *opaque) { /* @@ -3081,9 +3290,8 @@ void pci_set_enabled(PCIDevice *d, bool state) d->enabled = state; pci_update_mappings(d); - memory_region_set_enabled(&d->bus_master_enable_region, - (pci_get_word(d->config + PCI_COMMAND) - & PCI_COMMAND_MASTER) && d->enabled); + pci_set_master(d, (pci_get_word(d->config + PCI_COMMAND) + & PCI_COMMAND_MASTER) && d->enabled); if (qdev_is_realized(&d->qdev)) { pci_device_reset(d); } diff --git a/hw/pci/pci_host.c b/hw/pci/pci_host.c index abe83bb..7179d99 100644 --- a/hw/pci/pci_host.c +++ b/hw/pci/pci_host.c @@ -217,12 +217,6 @@ const MemoryRegionOps pci_host_data_le_ops = { .endianness = DEVICE_LITTLE_ENDIAN, }; -const MemoryRegionOps pci_host_data_be_ops = { - .read = pci_host_data_read, - .write = pci_host_data_write, - .endianness = DEVICE_BIG_ENDIAN, -}; - static bool pci_host_needed(void *opaque) { PCIHostState *s = opaque; diff --git a/hw/pci/pcie.c b/hw/pci/pcie.c index 1b12db6..eaeb688 100644 --- a/hw/pci/pcie.c +++ b/hw/pci/pcie.c @@ -1214,3 +1214,81 @@ void pcie_acs_reset(PCIDevice *dev) pci_set_word(dev->config + dev->exp.acs_cap + PCI_ACS_CTRL, 0); } } + +/* PASID */ +void pcie_pasid_init(PCIDevice *dev, uint16_t offset, uint8_t pasid_width, + bool exec_perm, bool priv_mod) +{ + static const uint16_t control_reg_rw_mask = 0x07; + uint16_t capability_reg; + + assert(pasid_width <= PCI_EXT_CAP_PASID_MAX_WIDTH); + + pcie_add_capability(dev, PCI_EXT_CAP_ID_PASID, PCI_PASID_VER, offset, + PCI_EXT_CAP_PASID_SIZEOF); + + capability_reg = ((uint16_t)pasid_width) << PCI_PASID_CAP_WIDTH_SHIFT; + capability_reg |= exec_perm ? PCI_PASID_CAP_EXEC : 0; + capability_reg |= priv_mod ? PCI_PASID_CAP_PRIV : 0; + pci_set_word(dev->config + offset + PCI_PASID_CAP, capability_reg); + + /* Everything is disabled by default */ + pci_set_word(dev->config + offset + PCI_PASID_CTRL, 0); + + pci_set_word(dev->wmask + offset + PCI_PASID_CTRL, control_reg_rw_mask); + + dev->exp.pasid_cap = offset; +} + +/* PRI */ +void pcie_pri_init(PCIDevice *dev, uint16_t offset, uint32_t outstanding_pr_cap, + bool prg_response_pasid_req) +{ + static const uint16_t control_reg_rw_mask = 0x3; + static const uint16_t status_reg_rw1_mask = 0x3; + static const uint32_t pr_alloc_reg_rw_mask = 0xffffffff; + uint16_t status_reg; + + status_reg = prg_response_pasid_req ? PCI_PRI_STATUS_PASID : 0; + status_reg |= PCI_PRI_STATUS_STOPPED; /* Stopped by default */ + + pcie_add_capability(dev, PCI_EXT_CAP_ID_PRI, PCI_PRI_VER, offset, + PCI_EXT_CAP_PRI_SIZEOF); + /* Disabled by default */ + + pci_set_word(dev->config + offset + PCI_PRI_STATUS, status_reg); + pci_set_long(dev->config + offset + PCI_PRI_MAX_REQ, outstanding_pr_cap); + + pci_set_word(dev->wmask + offset + PCI_PRI_CTRL, control_reg_rw_mask); + pci_set_word(dev->w1cmask + offset + PCI_PRI_STATUS, status_reg_rw1_mask); + pci_set_long(dev->wmask + offset + PCI_PRI_ALLOC_REQ, pr_alloc_reg_rw_mask); + + dev->exp.pri_cap = offset; +} + +bool pcie_pri_enabled(const PCIDevice *dev) +{ + if (!pci_is_express(dev) || !dev->exp.pri_cap) { + return false; + } + return (pci_get_word(dev->config + dev->exp.pri_cap + PCI_PRI_CTRL) & + PCI_PRI_CTRL_ENABLE) != 0; +} + +bool pcie_pasid_enabled(const PCIDevice *dev) +{ + if (!pci_is_express(dev) || !dev->exp.pasid_cap) { + return false; + } + return (pci_get_word(dev->config + dev->exp.pasid_cap + PCI_PASID_CTRL) & + PCI_PASID_CTRL_ENABLE) != 0; +} + +bool pcie_ats_enabled(const PCIDevice *dev) +{ + if (!pci_is_express(dev) || !dev->exp.ats_cap) { + return false; + } + return (pci_get_word(dev->config + dev->exp.ats_cap + PCI_ATS_CTRL) & + PCI_ATS_CTRL_ENABLE) != 0; +} diff --git a/hw/pci/pcie_sriov.c b/hw/pci/pcie_sriov.c index 1eb4358..8a4bf0d 100644 --- a/hw/pci/pcie_sriov.c +++ b/hw/pci/pcie_sriov.c @@ -15,11 +15,12 @@ #include "hw/pci/pcie.h" #include "hw/pci/pci_bus.h" #include "hw/qdev-properties.h" -#include "qemu/error-report.h" #include "qemu/range.h" #include "qapi/error.h" #include "trace.h" +static GHashTable *pfs; + static void unparent_vfs(PCIDevice *dev, uint16_t total_vfs) { for (uint16_t i = 0; i < total_vfs; i++) { @@ -31,17 +32,78 @@ static void unparent_vfs(PCIDevice *dev, uint16_t total_vfs) dev->exp.sriov_pf.vf = NULL; } -bool pcie_sriov_pf_init(PCIDevice *dev, uint16_t offset, - const char *vfname, uint16_t vf_dev_id, - uint16_t init_vfs, uint16_t total_vfs, - uint16_t vf_offset, uint16_t vf_stride, - Error **errp) +static void register_vfs(PCIDevice *dev) +{ + uint16_t num_vfs; + uint16_t i; + uint16_t sriov_cap = dev->exp.sriov_cap; + + assert(sriov_cap > 0); + num_vfs = pci_get_word(dev->config + sriov_cap + PCI_SRIOV_NUM_VF); + + trace_sriov_register_vfs(dev->name, PCI_SLOT(dev->devfn), + PCI_FUNC(dev->devfn), num_vfs); + for (i = 0; i < num_vfs; i++) { + pci_set_enabled(dev->exp.sriov_pf.vf[i], true); + } + + pci_set_word(dev->wmask + sriov_cap + PCI_SRIOV_NUM_VF, 0); +} + +static void unregister_vfs(PCIDevice *dev) +{ + uint8_t *cfg = dev->config + dev->exp.sriov_cap; + uint16_t i; + + trace_sriov_unregister_vfs(dev->name, PCI_SLOT(dev->devfn), + PCI_FUNC(dev->devfn)); + for (i = 0; i < pci_get_word(cfg + PCI_SRIOV_TOTAL_VF); i++) { + pci_set_enabled(dev->exp.sriov_pf.vf[i], false); + } + + pci_set_word(dev->wmask + dev->exp.sriov_cap + PCI_SRIOV_NUM_VF, 0xffff); +} + +static void consume_config(PCIDevice *dev) +{ + uint8_t *cfg = dev->config + dev->exp.sriov_cap; + + if (pci_get_word(cfg + PCI_SRIOV_CTRL) & PCI_SRIOV_CTRL_VFE) { + register_vfs(dev); + } else { + uint8_t *wmask = dev->wmask + dev->exp.sriov_cap; + uint16_t num_vfs = pci_get_word(cfg + PCI_SRIOV_NUM_VF); + uint16_t wmask_val = PCI_SRIOV_CTRL_MSE | PCI_SRIOV_CTRL_ARI; + + unregister_vfs(dev); + + if (num_vfs <= pci_get_word(cfg + PCI_SRIOV_TOTAL_VF)) { + wmask_val |= PCI_SRIOV_CTRL_VFE; + } + + pci_set_word(wmask + PCI_SRIOV_CTRL, wmask_val); + } +} + +static bool pcie_sriov_pf_init_common(PCIDevice *dev, uint16_t offset, + uint16_t vf_dev_id, uint16_t init_vfs, + uint16_t total_vfs, uint16_t vf_offset, + uint16_t vf_stride, Error **errp) { - BusState *bus = qdev_get_parent_bus(&dev->qdev); int32_t devfn = dev->devfn + vf_offset; uint8_t *cfg = dev->config + offset; uint8_t *wmask; + if (!pci_is_express(dev)) { + error_setg(errp, "PCI Express is required for SR-IOV PF"); + return false; + } + + if (pci_is_vf(dev)) { + error_setg(errp, "a device cannot be a SR-IOV PF and a VF at the same time"); + return false; + } + if (total_vfs && (uint32_t)devfn + (uint32_t)(total_vfs - 1) * vf_stride >= PCI_DEVFN_MAX) { error_setg(errp, "VF addr overflows"); @@ -84,6 +146,28 @@ bool pcie_sriov_pf_init(PCIDevice *dev, uint16_t offset, qdev_prop_set_bit(&dev->qdev, "multifunction", true); + return true; +} + +bool pcie_sriov_pf_init(PCIDevice *dev, uint16_t offset, + const char *vfname, uint16_t vf_dev_id, + uint16_t init_vfs, uint16_t total_vfs, + uint16_t vf_offset, uint16_t vf_stride, + Error **errp) +{ + BusState *bus = qdev_get_parent_bus(&dev->qdev); + int32_t devfn = dev->devfn + vf_offset; + + if (pfs && g_hash_table_contains(pfs, dev->qdev.id)) { + error_setg(errp, "attaching user-created SR-IOV VF unsupported"); + return false; + } + + if (!pcie_sriov_pf_init_common(dev, offset, vf_dev_id, init_vfs, + total_vfs, vf_offset, vf_stride, errp)) { + return false; + } + dev->exp.sriov_pf.vf = g_new(PCIDevice *, total_vfs); for (uint16_t i = 0; i < total_vfs; i++) { @@ -113,7 +197,22 @@ void pcie_sriov_pf_exit(PCIDevice *dev) { uint8_t *cfg = dev->config + dev->exp.sriov_cap; - unparent_vfs(dev, pci_get_word(cfg + PCI_SRIOV_TOTAL_VF)); + if (dev->exp.sriov_pf.vf_user_created) { + uint16_t ven_id = pci_get_word(dev->config + PCI_VENDOR_ID); + uint16_t total_vfs = pci_get_word(dev->config + PCI_SRIOV_TOTAL_VF); + uint16_t vf_dev_id = pci_get_word(dev->config + PCI_SRIOV_VF_DID); + + unregister_vfs(dev); + + for (uint16_t i = 0; i < total_vfs; i++) { + dev->exp.sriov_pf.vf[i]->exp.sriov_vf.pf = NULL; + + pci_config_set_vendor_id(dev->exp.sriov_pf.vf[i]->config, ven_id); + pci_config_set_device_id(dev->exp.sriov_pf.vf[i]->config, vf_dev_id); + } + } else { + unparent_vfs(dev, pci_get_word(cfg + PCI_SRIOV_TOTAL_VF)); + } } void pcie_sriov_pf_init_vf_bar(PCIDevice *dev, int region_num, @@ -146,69 +245,179 @@ void pcie_sriov_pf_init_vf_bar(PCIDevice *dev, int region_num, void pcie_sriov_vf_register_bar(PCIDevice *dev, int region_num, MemoryRegion *memory) { - PCIIORegion *r; - PCIBus *bus = pci_get_bus(dev); uint8_t type; - pcibus_t size = memory_region_size(memory); - assert(pci_is_vf(dev)); /* PFs must use pci_register_bar */ - assert(region_num >= 0); - assert(region_num < PCI_NUM_REGIONS); + assert(dev->exp.sriov_vf.pf); type = dev->exp.sriov_vf.pf->exp.sriov_pf.vf_bar_type[region_num]; - if (!is_power_of_2(size)) { - error_report("%s: PCI region size must be a power" - " of two - type=0x%x, size=0x%"FMT_PCIBUS, - __func__, type, size); - exit(1); - } + return pci_register_bar(dev, region_num, type, memory); +} - r = &dev->io_regions[region_num]; - r->memory = memory; - r->address_space = - type & PCI_BASE_ADDRESS_SPACE_IO - ? bus->address_space_io - : bus->address_space_mem; - r->size = size; - r->type = type; - - r->addr = pci_bar_address(dev, region_num, r->type, r->size); - if (r->addr != PCI_BAR_UNMAPPED) { - memory_region_add_subregion_overlap(r->address_space, - r->addr, r->memory, 1); - } +static gint compare_vf_devfns(gconstpointer a, gconstpointer b) +{ + return (*(PCIDevice **)a)->devfn - (*(PCIDevice **)b)->devfn; } -static void register_vfs(PCIDevice *dev) +int16_t pcie_sriov_pf_init_from_user_created_vfs(PCIDevice *dev, + uint16_t offset, + Error **errp) { - uint16_t num_vfs; + GPtrArray *pf; + PCIDevice **vfs; + BusState *bus = qdev_get_parent_bus(DEVICE(dev)); + uint16_t ven_id = pci_get_word(dev->config + PCI_VENDOR_ID); + uint16_t size = PCI_EXT_CAP_SRIOV_SIZEOF; + uint16_t vf_dev_id; + uint16_t vf_offset; + uint16_t vf_stride; uint16_t i; - uint16_t sriov_cap = dev->exp.sriov_cap; - assert(sriov_cap > 0); - num_vfs = pci_get_word(dev->config + sriov_cap + PCI_SRIOV_NUM_VF); + if (!pfs || !dev->qdev.id) { + return 0; + } - trace_sriov_register_vfs(dev->name, PCI_SLOT(dev->devfn), - PCI_FUNC(dev->devfn), num_vfs); - for (i = 0; i < num_vfs; i++) { - pci_set_enabled(dev->exp.sriov_pf.vf[i], true); + pf = g_hash_table_lookup(pfs, dev->qdev.id); + if (!pf) { + return 0; } - pci_set_word(dev->wmask + sriov_cap + PCI_SRIOV_NUM_VF, 0); + if (pf->len > UINT16_MAX) { + error_setg(errp, "too many VFs"); + return -1; + } + + g_ptr_array_sort(pf, compare_vf_devfns); + vfs = (void *)pf->pdata; + + if (vfs[0]->devfn <= dev->devfn) { + error_setg(errp, "a VF function number is less than the PF function number"); + return -1; + } + + vf_dev_id = pci_get_word(vfs[0]->config + PCI_DEVICE_ID); + vf_offset = vfs[0]->devfn - dev->devfn; + vf_stride = pf->len < 2 ? 0 : vfs[1]->devfn - vfs[0]->devfn; + + for (i = 0; i < pf->len; i++) { + if (bus != qdev_get_parent_bus(&vfs[i]->qdev)) { + error_setg(errp, "SR-IOV VF parent bus mismatches with PF"); + return -1; + } + + if (ven_id != pci_get_word(vfs[i]->config + PCI_VENDOR_ID)) { + error_setg(errp, "SR-IOV VF vendor ID mismatches with PF"); + return -1; + } + + if (vf_dev_id != pci_get_word(vfs[i]->config + PCI_DEVICE_ID)) { + error_setg(errp, "inconsistent SR-IOV VF device IDs"); + return -1; + } + + for (size_t j = 0; j < PCI_NUM_REGIONS; j++) { + if (vfs[i]->io_regions[j].size != vfs[0]->io_regions[j].size || + vfs[i]->io_regions[j].type != vfs[0]->io_regions[j].type) { + error_setg(errp, "inconsistent SR-IOV BARs"); + return -1; + } + } + + if (vfs[i]->devfn - vfs[0]->devfn != vf_stride * i) { + error_setg(errp, "inconsistent SR-IOV stride"); + return -1; + } + } + + if (!pcie_sriov_pf_init_common(dev, offset, vf_dev_id, pf->len, + pf->len, vf_offset, vf_stride, errp)) { + return -1; + } + + if (!pcie_find_capability(dev, PCI_EXT_CAP_ID_ARI)) { + pcie_ari_init(dev, offset + size); + size += PCI_ARI_SIZEOF; + } + + for (i = 0; i < pf->len; i++) { + vfs[i]->exp.sriov_vf.pf = dev; + vfs[i]->exp.sriov_vf.vf_number = i; + + /* set vid/did according to sr/iov spec - they are not used */ + pci_config_set_vendor_id(vfs[i]->config, 0xffff); + pci_config_set_device_id(vfs[i]->config, 0xffff); + } + + dev->exp.sriov_pf.vf = vfs; + dev->exp.sriov_pf.vf_user_created = true; + + for (i = 0; i < PCI_NUM_REGIONS; i++) { + PCIIORegion *region = &vfs[0]->io_regions[i]; + + if (region->size) { + pcie_sriov_pf_init_vf_bar(dev, i, region->type, region->size); + } + } + + return size; } -static void unregister_vfs(PCIDevice *dev) +bool pcie_sriov_register_device(PCIDevice *dev, Error **errp) { - uint8_t *cfg = dev->config + dev->exp.sriov_cap; - uint16_t i; + if (!dev->exp.sriov_pf.vf && dev->qdev.id && + pfs && g_hash_table_contains(pfs, dev->qdev.id)) { + error_setg(errp, "attaching user-created SR-IOV VF unsupported"); + return false; + } - trace_sriov_unregister_vfs(dev->name, PCI_SLOT(dev->devfn), - PCI_FUNC(dev->devfn)); - for (i = 0; i < pci_get_word(cfg + PCI_SRIOV_TOTAL_VF); i++) { - pci_set_enabled(dev->exp.sriov_pf.vf[i], false); + if (dev->sriov_pf) { + PCIDevice *pci_pf; + GPtrArray *pf; + + if (!PCI_DEVICE_GET_CLASS(dev)->sriov_vf_user_creatable) { + error_setg(errp, "user cannot create SR-IOV VF with this device type"); + return false; + } + + if (!pci_is_express(dev)) { + error_setg(errp, "PCI Express is required for SR-IOV VF"); + return false; + } + + if (!pci_qdev_find_device(dev->sriov_pf, &pci_pf)) { + error_setg(errp, "PCI device specified as SR-IOV PF already exists"); + return false; + } + + if (!pfs) { + pfs = g_hash_table_new_full(g_str_hash, g_str_equal, g_free, NULL); + } + + pf = g_hash_table_lookup(pfs, dev->sriov_pf); + if (!pf) { + pf = g_ptr_array_new(); + g_hash_table_insert(pfs, g_strdup(dev->sriov_pf), pf); + } + + g_ptr_array_add(pf, dev); } - pci_set_word(dev->wmask + dev->exp.sriov_cap + PCI_SRIOV_NUM_VF, 0xffff); + return true; +} + +void pcie_sriov_unregister_device(PCIDevice *dev) +{ + if (dev->sriov_pf && pfs) { + GPtrArray *pf = g_hash_table_lookup(pfs, dev->sriov_pf); + + if (pf) { + g_ptr_array_remove_fast(pf, dev); + + if (!pf->len) { + g_hash_table_remove(pfs, dev->sriov_pf); + g_ptr_array_free(pf, FALSE); + } + } + } } void pcie_sriov_config_write(PCIDevice *dev, uint32_t address, @@ -228,30 +437,13 @@ void pcie_sriov_config_write(PCIDevice *dev, uint32_t address, trace_sriov_config_write(dev->name, PCI_SLOT(dev->devfn), PCI_FUNC(dev->devfn), off, val, len); - if (range_covers_byte(off, len, PCI_SRIOV_CTRL)) { - if (val & PCI_SRIOV_CTRL_VFE) { - register_vfs(dev); - } else { - unregister_vfs(dev); - } - } else if (range_covers_byte(off, len, PCI_SRIOV_NUM_VF)) { - uint8_t *cfg = dev->config + sriov_cap; - uint8_t *wmask = dev->wmask + sriov_cap; - uint16_t num_vfs = pci_get_word(cfg + PCI_SRIOV_NUM_VF); - uint16_t wmask_val = PCI_SRIOV_CTRL_MSE | PCI_SRIOV_CTRL_ARI; - - if (num_vfs <= pci_get_word(cfg + PCI_SRIOV_TOTAL_VF)) { - wmask_val |= PCI_SRIOV_CTRL_VFE; - } - - pci_set_word(wmask + PCI_SRIOV_CTRL, wmask_val); - } + consume_config(dev); } void pcie_sriov_pf_post_load(PCIDevice *dev) { if (dev->exp.sriov_cap) { - register_vfs(dev); + consume_config(dev); } } @@ -304,7 +496,7 @@ void pcie_sriov_pf_add_sup_pgsize(PCIDevice *dev, uint16_t opt_sup_pgsize) uint16_t pcie_sriov_vf_number(PCIDevice *dev) { - assert(pci_is_vf(dev)); + assert(dev->exp.sriov_vf.pf); return dev->exp.sriov_vf.vf_number; } diff --git a/hw/pci/trace-events b/hw/pci/trace-events index 6a99689..02c80d3 100644 --- a/hw/pci/trace-events +++ b/hw/pci/trace-events @@ -6,6 +6,10 @@ pci_pm_transition(const char *dev, uint32_t bus, uint32_t slot, uint32_t func, u pci_update_mappings_del(const char *dev, uint32_t bus, uint32_t slot, uint32_t func, int bar, uint64_t addr, uint64_t size) "%s %02x:%02x.%x %d,0x%"PRIx64"+0x%"PRIx64 pci_update_mappings_add(const char *dev, uint32_t bus, uint32_t slot, uint32_t func, int bar, uint64_t addr, uint64_t size) "%s %02x:%02x.%x %d,0x%"PRIx64"+0x%"PRIx64 pci_route_irq(int dev_irq, const char *dev_path, int parent_irq, const char *parent_path) "IRQ %d @%s -> IRQ %d @%s" +pci_bad_rom_magic(uint16_t bad_rom_magic, uint16_t good_rom_magic) "Bad ROM magic number: %04"PRIX16". Should be: %04"PRIX16 +pci_bad_pcir_offset(uint16_t pcir_offset) "Bad PCIR offset 0x%"PRIx16" or signature" +pci_rom_and_pci_ids(char *romfile, uint16_t vendor_id, uint16_t device_id, uint16_t rom_vendor_id, uint16_t rom_device_id) "%s: ROM ID %04"PRIx16":%04"PRIx16" | PCI ID %04"PRIx16":%04"PRIx16 +pci_rom_checksum_change(uint8_t old_checksum, uint8_t new_checksum) "ROM checksum changed from %02"PRIx8" to %02"PRIx8 # pci_host.c pci_cfg_read(const char *dev, uint32_t bus, uint32_t slot, uint32_t func, unsigned offs, unsigned val) "%s %02x:%02x.%x @0x%x -> 0x%x" diff --git a/hw/ppc/e500.c b/hw/ppc/e500.c index 809078a..723c97f 100644 --- a/hw/ppc/e500.c +++ b/hw/ppc/e500.c @@ -79,8 +79,6 @@ #define MPC85XX_ESDHC_IRQ 72 #define RTC_REGS_OFFSET 0x68 -#define PLATFORM_CLK_FREQ_HZ (400 * 1000 * 1000) - struct boot_info { uint32_t dt_base; @@ -120,7 +118,7 @@ static uint32_t *pci_map_create(void *fdt, uint32_t mpic, int first_slot, } static void dt_serial_create(void *fdt, unsigned long long offset, - const char *soc, const char *mpic, + const char *soc, uint32_t freq, const char *mpic, const char *alias, int idx, bool defcon) { char *ser; @@ -131,7 +129,7 @@ static void dt_serial_create(void *fdt, unsigned long long offset, qemu_fdt_setprop_string(fdt, ser, "compatible", "ns16550"); qemu_fdt_setprop_cells(fdt, ser, "reg", offset, 0x100); qemu_fdt_setprop_cell(fdt, ser, "cell-index", idx); - qemu_fdt_setprop_cell(fdt, ser, "clock-frequency", PLATFORM_CLK_FREQ_HZ); + qemu_fdt_setprop_cell(fdt, ser, "clock-frequency", freq); qemu_fdt_setprop_cells(fdt, ser, "interrupts", 42, 2); qemu_fdt_setprop_phandle(fdt, ser, "interrupt-parent", mpic); qemu_fdt_setprop_string(fdt, "/aliases", alias, ser); @@ -382,8 +380,7 @@ static int ppce500_load_device_tree(PPCE500MachineState *pms, int fdt_size; void *fdt; uint8_t hypercall[16]; - uint32_t clock_freq = PLATFORM_CLK_FREQ_HZ; - uint32_t tb_freq = PLATFORM_CLK_FREQ_HZ; + uint32_t clock_freq, tb_freq; int i; char compatible_sb[] = "fsl,mpc8544-immr\0simple-bus"; char *soc; @@ -411,7 +408,7 @@ static int ppce500_load_device_tree(PPCE500MachineState *pms, if (dtb_file) { char *filename; - filename = qemu_find_file(QEMU_FILE_TYPE_BIOS, dtb_file); + filename = qemu_find_file(QEMU_FILE_TYPE_DTB, dtb_file); if (!filename) { goto out; } @@ -484,6 +481,9 @@ static int ppce500_load_device_tree(PPCE500MachineState *pms, if (kvmppc_get_hasidle(env)) { qemu_fdt_setprop(fdt, "/hypervisor", "has-idle", NULL, 0); } + } else { + clock_freq = pmc->clock_freq; + tb_freq = pmc->tb_freq; } /* Create CPU nodes */ @@ -564,12 +564,12 @@ static int ppce500_load_device_tree(PPCE500MachineState *pms, */ if (serial_hd(1)) { dt_serial_create(fdt, MPC8544_SERIAL1_REGS_OFFSET, - soc, mpic, "serial1", 1, false); + soc, pmc->clock_freq, mpic, "serial1", 1, false); } if (serial_hd(0)) { dt_serial_create(fdt, MPC8544_SERIAL0_REGS_OFFSET, - soc, mpic, "serial0", 0, true); + soc, pmc->clock_freq, mpic, "serial0", 0, true); } /* i2c */ @@ -931,7 +931,6 @@ void ppce500_init(MachineState *machine) CPUPPCState *firstenv = NULL; MemoryRegion *ccsr_addr_space; SysBusDevice *s; - PPCE500CCSRState *ccsr; I2CBus *i2c; irqs = g_new0(IrqLines, smp_cpus); @@ -968,7 +967,7 @@ void ppce500_init(MachineState *machine) env->spr_cb[SPR_BOOKE_PIR].default_value = cs->cpu_index = i; env->mpic_iack = pmc->ccsrbar_base + MPC8544_MPIC_REGS_OFFSET + 0xa0; - ppc_booke_timers_init(cpu, PLATFORM_CLK_FREQ_HZ, PPC_TIMER_E500); + ppc_booke_timers_init(cpu, pmc->tb_freq, PPC_TIMER_E500); /* Register reset handler */ if (!i) { @@ -993,10 +992,10 @@ void ppce500_init(MachineState *machine) memory_region_add_subregion(address_space_mem, 0, machine->ram); dev = qdev_new("e500-ccsr"); + s = SYS_BUS_DEVICE(dev); object_property_add_child(OBJECT(machine), "e500-ccsr", OBJECT(dev)); - sysbus_realize_and_unref(SYS_BUS_DEVICE(dev), &error_fatal); - ccsr = CCSR(dev); - ccsr_addr_space = &ccsr->ccsr_space; + sysbus_realize_and_unref(s, &error_fatal); + ccsr_addr_space = sysbus_mmio_get_region(s, 0); memory_region_add_subregion(address_space_mem, pmc->ccsrbar_base, ccsr_addr_space); @@ -1284,6 +1283,7 @@ static void e500_ccsr_initfn(Object *obj) PPCE500CCSRState *ccsr = CCSR(obj); memory_region_init(&ccsr->ccsr_space, obj, "e500-ccsr", MPC8544_CCSRBAR_SIZE); + sysbus_init_mmio(SYS_BUS_DEVICE(ccsr), &ccsr->ccsr_space); } static const TypeInfo e500_ccsr_info = { diff --git a/hw/ppc/e500.h b/hw/ppc/e500.h index 01db102..00f4905 100644 --- a/hw/ppc/e500.h +++ b/hw/ppc/e500.h @@ -5,6 +5,8 @@ #include "hw/platform-bus.h" #include "qom/object.h" +#define PLATFORM_CLK_FREQ_HZ (400 * 1000 * 1000) + struct PPCE500MachineState { /*< private >*/ MachineState parent_obj; @@ -37,6 +39,8 @@ struct PPCE500MachineClass { hwaddr pci_mmio_base; hwaddr pci_mmio_bus_base; hwaddr spin_base; + uint32_t clock_freq; + uint32_t tb_freq; }; void ppce500_init(MachineState *machine); diff --git a/hw/ppc/e500plat.c b/hw/ppc/e500plat.c index 775b9d8..4f1d659 100644 --- a/hw/ppc/e500plat.c +++ b/hw/ppc/e500plat.c @@ -93,6 +93,8 @@ static void e500plat_machine_class_init(ObjectClass *oc, const void *data) pmc->pci_mmio_base = 0xC00000000ULL; pmc->pci_mmio_bus_base = 0xE0000000ULL; pmc->spin_base = 0xFEF000000ULL; + pmc->clock_freq = PLATFORM_CLK_FREQ_HZ; + pmc->tb_freq = PLATFORM_CLK_FREQ_HZ; mc->desc = "generic paravirt e500 platform"; mc->init = e500plat_init; diff --git a/hw/ppc/mpc8544ds.c b/hw/ppc/mpc8544ds.c index 97fb0f3..5826985 100644 --- a/hw/ppc/mpc8544ds.c +++ b/hw/ppc/mpc8544ds.c @@ -55,6 +55,8 @@ static void mpc8544ds_machine_class_init(ObjectClass *oc, const void *data) pmc->pci_mmio_bus_base = 0xC0000000ULL; pmc->pci_pio_base = 0xE1000000ULL; pmc->spin_base = 0xEF000000ULL; + pmc->clock_freq = PLATFORM_CLK_FREQ_HZ; + pmc->tb_freq = PLATFORM_CLK_FREQ_HZ; mc->desc = "mpc8544ds"; mc->init = mpc8544ds_init; diff --git a/hw/ppc/pnv.c b/hw/ppc/pnv.c index 4a49e9d..d84c906 100644 --- a/hw/ppc/pnv.c +++ b/hw/ppc/pnv.c @@ -2608,62 +2608,46 @@ static void pnv_pic_print_info(InterruptStatsProvider *obj, GString *buf) } } -static int pnv_match_nvt(XiveFabric *xfb, uint8_t format, - uint8_t nvt_blk, uint32_t nvt_idx, - bool crowd, bool cam_ignore, uint8_t priority, - uint32_t logic_serv, - XiveTCTXMatch *match) +static bool pnv_match_nvt(XiveFabric *xfb, uint8_t format, + uint8_t nvt_blk, uint32_t nvt_idx, + bool crowd, bool cam_ignore, uint8_t priority, + uint32_t logic_serv, + XiveTCTXMatch *match) { PnvMachineState *pnv = PNV_MACHINE(xfb); - int total_count = 0; int i; for (i = 0; i < pnv->num_chips; i++) { Pnv9Chip *chip9 = PNV9_CHIP(pnv->chips[i]); XivePresenter *xptr = XIVE_PRESENTER(&chip9->xive); XivePresenterClass *xpc = XIVE_PRESENTER_GET_CLASS(xptr); - int count; - count = xpc->match_nvt(xptr, format, nvt_blk, nvt_idx, crowd, - cam_ignore, priority, logic_serv, match); - - if (count < 0) { - return count; - } - - total_count += count; + xpc->match_nvt(xptr, format, nvt_blk, nvt_idx, crowd, + cam_ignore, priority, logic_serv, match); } - return total_count; + return !!match->count; } -static int pnv10_xive_match_nvt(XiveFabric *xfb, uint8_t format, - uint8_t nvt_blk, uint32_t nvt_idx, - bool crowd, bool cam_ignore, uint8_t priority, - uint32_t logic_serv, - XiveTCTXMatch *match) +static bool pnv10_xive_match_nvt(XiveFabric *xfb, uint8_t format, + uint8_t nvt_blk, uint32_t nvt_idx, + bool crowd, bool cam_ignore, uint8_t priority, + uint32_t logic_serv, + XiveTCTXMatch *match) { PnvMachineState *pnv = PNV_MACHINE(xfb); - int total_count = 0; int i; for (i = 0; i < pnv->num_chips; i++) { Pnv10Chip *chip10 = PNV10_CHIP(pnv->chips[i]); XivePresenter *xptr = XIVE_PRESENTER(&chip10->xive); XivePresenterClass *xpc = XIVE_PRESENTER_GET_CLASS(xptr); - int count; - - count = xpc->match_nvt(xptr, format, nvt_blk, nvt_idx, crowd, - cam_ignore, priority, logic_serv, match); - - if (count < 0) { - return count; - } - total_count += count; + xpc->match_nvt(xptr, format, nvt_blk, nvt_idx, crowd, + cam_ignore, priority, logic_serv, match); } - return total_count; + return !!match->count; } static int pnv10_xive_broadcast(XiveFabric *xfb, diff --git a/hw/ppc/pnv_occ.c b/hw/ppc/pnv_occ.c index fa6f31c..24b789c 100644 --- a/hw/ppc/pnv_occ.c +++ b/hw/ppc/pnv_occ.c @@ -789,7 +789,7 @@ static bool occ_opal_process_command(PnvOCC *occ, static bool occ_model_tick(PnvOCC *occ) { - struct occ_dynamic_data dynamic_data; + QEMU_UNINITIALIZED struct occ_dynamic_data dynamic_data; if (!occ_read_dynamic_data(occ, &dynamic_data, NULL)) { /* Can't move OCC state field to safe because we can't map it! */ diff --git a/hw/ppc/prep.c b/hw/ppc/prep.c index 7395263..982e40e 100644 --- a/hw/ppc/prep.c +++ b/hw/ppc/prep.c @@ -35,6 +35,7 @@ #include "qapi/error.h" #include "qemu/error-report.h" #include "qemu/log.h" +#include "qemu/datadir.h" #include "hw/loader.h" #include "hw/rtc/mc146818rtc.h" #include "hw/isa/pc87312.h" @@ -55,6 +56,8 @@ #define KERNEL_LOAD_ADDR 0x01000000 #define INITRD_LOAD_ADDR 0x01800000 +#define BIOS_ADDR 0xfff00000 +#define BIOS_SIZE (1 * MiB) #define NVRAM_SIZE 0x2000 static void fw_cfg_boot_set(void *opaque, const char *boot_device, @@ -241,6 +244,9 @@ static void ibm_40p_init(MachineState *machine) ISADevice *isa_dev; ISABus *isa_bus; void *fw_cfg; + MemoryRegion *bios = g_new(MemoryRegion, 1); + char *filename; + ssize_t bios_size = -1; uint32_t kernel_base = 0, initrd_base = 0; long kernel_size = 0, initrd_size = 0; char boot_device; @@ -263,10 +269,27 @@ static void ibm_40p_init(MachineState *machine) cpu_ppc_tb_init(env, 100UL * 1000UL * 1000UL); qemu_register_reset(ppc_prep_reset, cpu); + /* allocate and load firmware */ + filename = qemu_find_file(QEMU_FILE_TYPE_BIOS, bios_name); + if (!filename) { + error_report("Could not find bios image '%s'", bios_name); + exit(1); + } + memory_region_init_rom(bios, NULL, "bios", BIOS_SIZE, &error_fatal); + memory_region_add_subregion(get_system_memory(), BIOS_ADDR, bios); + bios_size = load_elf(filename, NULL, NULL, NULL, NULL, NULL, NULL, NULL, + ELFDATA2MSB, PPC_ELF_MACHINE, 0, 0); + if (bios_size < 0) { + bios_size = load_image_targphys(filename, BIOS_ADDR, BIOS_SIZE); + } + if (bios_size < 0 || bios_size > BIOS_SIZE) { + error_report("Could not load bios image '%s'", filename); + return; + } + g_free(filename); + /* PCI host */ dev = qdev_new("raven-pcihost"); - qdev_prop_set_string(dev, "bios-name", bios_name); - qdev_prop_set_uint32(dev, "elf-machine", PPC_ELF_MACHINE); pcihost = SYS_BUS_DEVICE(dev); object_property_add_child(qdev_get_machine(), "raven", OBJECT(dev)); sysbus_realize_and_unref(pcihost, &error_fatal); diff --git a/hw/ppc/spapr.c b/hw/ppc/spapr.c index 702f774..1855a3c 100644 --- a/hw/ppc/spapr.c +++ b/hw/ppc/spapr.c @@ -577,7 +577,7 @@ static int spapr_dt_dynamic_memory(SpaprMachineState *spapr, void *fdt, /* * Adds ibm,dynamic-reconfiguration-memory node. - * Refer to docs/specs/ppc-spapr-hotplug.txt for the documentation + * Refer to docs/specs/ppc-spapr-hotplug.rst for the documentation * of this device tree node. */ static int spapr_dt_dynamic_reconfiguration_memory(SpaprMachineState *spapr, @@ -2518,7 +2518,7 @@ static void htab_save_cleanup(void *opaque) static SaveVMHandlers savevm_htab_handlers = { .save_setup = htab_save_setup, .save_live_iterate = htab_save_iterate, - .save_live_complete_precopy = htab_save_complete, + .save_complete = htab_save_complete, .save_cleanup = htab_save_cleanup, .load_state = htab_load, }; @@ -4468,21 +4468,14 @@ static void spapr_pic_print_info(InterruptStatsProvider *obj, GString *buf) /* * This is a XIVE only operation */ -static int spapr_match_nvt(XiveFabric *xfb, uint8_t format, - uint8_t nvt_blk, uint32_t nvt_idx, - bool crowd, bool cam_ignore, uint8_t priority, - uint32_t logic_serv, XiveTCTXMatch *match) +static bool spapr_match_nvt(XiveFabric *xfb, uint8_t format, + uint8_t nvt_blk, uint32_t nvt_idx, + bool crowd, bool cam_ignore, uint8_t priority, + uint32_t logic_serv, XiveTCTXMatch *match) { SpaprMachineState *spapr = SPAPR_MACHINE(xfb); XivePresenter *xptr = XIVE_PRESENTER(spapr->active_intc); XivePresenterClass *xpc = XIVE_PRESENTER_GET_CLASS(xptr); - int count; - - count = xpc->match_nvt(xptr, format, nvt_blk, nvt_idx, crowd, cam_ignore, - priority, logic_serv, match); - if (count < 0) { - return count; - } /* * When we implement the save and restore of the thread interrupt @@ -4493,12 +4486,14 @@ static int spapr_match_nvt(XiveFabric *xfb, uint8_t format, * Until this is done, the sPAPR machine should find at least one * matching context always. */ - if (count == 0) { + if (!xpc->match_nvt(xptr, format, nvt_blk, nvt_idx, crowd, cam_ignore, + priority, logic_serv, match)) { qemu_log_mask(LOG_GUEST_ERROR, "XIVE: NVT %x/%x is not dispatched\n", nvt_blk, nvt_idx); + return false; } - return count; + return true; } int spapr_get_vcpu_id(PowerPCCPU *cpu) diff --git a/hw/ppc/spapr_tpm_proxy.c b/hw/ppc/spapr_tpm_proxy.c index 862eeaa..1297b3a 100644 --- a/hw/ppc/spapr_tpm_proxy.c +++ b/hw/ppc/spapr_tpm_proxy.c @@ -41,8 +41,8 @@ static ssize_t tpm_execute(SpaprTpmProxy *tpm_proxy, target_ulong *args) target_ulong data_in_size = args[2]; uint64_t data_out = ppc64_phys_to_real(args[3]); target_ulong data_out_size = args[4]; - uint8_t buf_in[TPM_SPAPR_BUFSIZE]; - uint8_t buf_out[TPM_SPAPR_BUFSIZE]; + QEMU_UNINITIALIZED uint8_t buf_in[TPM_SPAPR_BUFSIZE]; + QEMU_UNINITIALIZED uint8_t buf_out[TPM_SPAPR_BUFSIZE]; ssize_t ret; trace_spapr_tpm_execute(data_in, data_in_size, data_out, data_out_size); diff --git a/hw/riscv/Kconfig b/hw/riscv/Kconfig index e6a0ac1..fc9c35b 100644 --- a/hw/riscv/Kconfig +++ b/hw/riscv/Kconfig @@ -119,3 +119,12 @@ config SPIKE select HTIF select RISCV_ACLINT select SIFIVE_PLIC + +config XIANGSHAN_KUNMINGHU + bool + default y + depends on RISCV64 + select RISCV_ACLINT + select RISCV_APLIC + select RISCV_IMSIC + select SERIAL_MM diff --git a/hw/riscv/boot.c b/hw/riscv/boot.c index 765b9e2..828a867 100644 --- a/hw/riscv/boot.c +++ b/hw/riscv/boot.c @@ -37,7 +37,7 @@ bool riscv_is_32bit(RISCVHartArrayState *harts) { RISCVCPUClass *mcc = RISCV_CPU_GET_CLASS(&harts->harts[0]); - return mcc->misa_mxl_max == MXL_RV32; + return mcc->def->misa_mxl_max == MXL_RV32; } /* diff --git a/hw/riscv/meson.build b/hw/riscv/meson.build index c22f3a7..2a8d5b1 100644 --- a/hw/riscv/meson.build +++ b/hw/riscv/meson.build @@ -13,5 +13,6 @@ riscv_ss.add(when: 'CONFIG_ACPI', if_true: files('virt-acpi-build.c')) riscv_ss.add(when: 'CONFIG_RISCV_IOMMU', if_true: files( 'riscv-iommu.c', 'riscv-iommu-pci.c', 'riscv-iommu-sys.c', 'riscv-iommu-hpm.c')) riscv_ss.add(when: 'CONFIG_MICROBLAZE_V', if_true: files('microblaze-v-generic.c')) +riscv_ss.add(when: 'CONFIG_XIANGSHAN_KUNMINGHU', if_true: files('xiangshan_kmh.c')) hw_arch += {'riscv': riscv_ss} diff --git a/hw/riscv/microchip_pfsoc.c b/hw/riscv/microchip_pfsoc.c index e39ee65..2e74783 100644 --- a/hw/riscv/microchip_pfsoc.c +++ b/hw/riscv/microchip_pfsoc.c @@ -39,6 +39,7 @@ #include "qemu/units.h" #include "qemu/cutils.h" #include "qapi/error.h" +#include "qapi/visitor.h" #include "hw/boards.h" #include "hw/loader.h" #include "hw/sysbus.h" @@ -61,9 +62,6 @@ #define BIOS_FILENAME "hss.bin" #define RESET_VECTOR 0x20220000 -/* CLINT timebase frequency */ -#define CLINT_TIMEBASE_FREQ 1000000 - /* GEM version */ #define GEM_REVISION 0x0107010c @@ -193,6 +191,7 @@ static void microchip_pfsoc_soc_instance_init(Object *obj) static void microchip_pfsoc_soc_realize(DeviceState *dev, Error **errp) { MachineState *ms = MACHINE(qdev_get_machine()); + MicrochipIcicleKitState *iks = MICROCHIP_ICICLE_KIT_MACHINE(ms); MicrochipPFSoCState *s = MICROCHIP_PFSOC(dev); const MemMapEntry *memmap = microchip_pfsoc_memmap; MemoryRegion *system_memory = get_system_memory(); @@ -253,7 +252,7 @@ static void microchip_pfsoc_soc_realize(DeviceState *dev, Error **errp) memmap[MICROCHIP_PFSOC_CLINT].base + RISCV_ACLINT_SWI_SIZE, RISCV_ACLINT_DEFAULT_MTIMER_SIZE, 0, ms->smp.cpus, RISCV_ACLINT_DEFAULT_MTIMECMP, RISCV_ACLINT_DEFAULT_MTIME, - CLINT_TIMEBASE_FREQ, false); + iks->clint_timebase_freq, false); /* L2 cache controller */ create_unimplemented_device("microchip.pfsoc.l2cc", @@ -516,7 +515,6 @@ static void microchip_icicle_kit_machine_init(MachineState *machine) uint64_t mem_low_size, mem_high_size; hwaddr firmware_load_addr; const char *firmware_name; - bool kernel_as_payload = false; target_ulong firmware_end_addr, kernel_start_addr; uint64_t kernel_entry; uint64_t fdt_load_addr; @@ -579,45 +577,50 @@ static void microchip_icicle_kit_machine_init(MachineState *machine) } /* - * We follow the following table to select which payload we execute. - * - * -bios | -kernel | payload - * -------+------------+-------- - * N | N | HSS - * Y | don't care | HSS - * N | Y | kernel + * We follow the following table to select which firmware we use. * - * This ensures backwards compatibility with how we used to expose -bios - * to users but allows them to run through direct kernel booting as well. - * - * When -kernel is used for direct boot, -dtb must be present to provide - * a valid device tree for the board, as we don't generate device tree. + * -bios | -kernel | firmware + * --------------+------------+-------- + * none | N | error + * none | Y | kernel + * NULL, default | N | BIOS_FILENAME + * NULL, default | Y | RISCV64_BIOS_BIN + * other | don't care | other */ - - if (machine->kernel_filename && machine->dtb) { - int fdt_size; - machine->fdt = load_device_tree(machine->dtb, &fdt_size); - if (!machine->fdt) { - error_report("load_device_tree() failed"); + if (machine->firmware && !strcmp(machine->firmware, "none")) { + if (!machine->kernel_filename) { + error_report("for -bios none, a kernel is required"); exit(1); } - firmware_name = RISCV64_BIOS_BIN; - firmware_load_addr = memmap[MICROCHIP_PFSOC_DRAM_LO].base; - kernel_as_payload = true; - } - - if (!kernel_as_payload) { - firmware_name = BIOS_FILENAME; + firmware_name = NULL; + firmware_load_addr = RESET_VECTOR; + } else if (!machine->firmware || !strcmp(machine->firmware, "default")) { + if (machine->kernel_filename) { + firmware_name = RISCV64_BIOS_BIN; + firmware_load_addr = memmap[MICROCHIP_PFSOC_DRAM_LO].base; + } else { + firmware_name = BIOS_FILENAME; + firmware_load_addr = RESET_VECTOR; + } + } else { + firmware_name = machine->firmware; firmware_load_addr = RESET_VECTOR; } - /* Load the firmware */ - firmware_end_addr = riscv_find_and_load_firmware(machine, firmware_name, - &firmware_load_addr, NULL); + /* Load the firmware if necessary */ + firmware_end_addr = firmware_load_addr; + if (firmware_name) { + char *filename = riscv_find_firmware(firmware_name, NULL); + if (filename) { + firmware_end_addr = riscv_load_firmware(filename, + &firmware_load_addr, NULL); + g_free(filename); + } + } riscv_boot_info_init(&boot_info, &s->soc.u_cpus); - if (kernel_as_payload) { + if (machine->kernel_filename) { kernel_start_addr = riscv_calc_kernel_start_addr(&boot_info, firmware_end_addr); @@ -625,20 +628,82 @@ static void microchip_icicle_kit_machine_init(MachineState *machine) true, NULL); kernel_entry = boot_info.image_low_addr; - /* Compute the fdt load address in dram */ - fdt_load_addr = riscv_compute_fdt_addr(memmap[MICROCHIP_PFSOC_DRAM_LO].base, - memmap[MICROCHIP_PFSOC_DRAM_LO].size, - machine, &boot_info); - riscv_load_fdt(fdt_load_addr, machine->fdt); + if (machine->dtb) { + int fdt_size; + machine->fdt = load_device_tree(machine->dtb, &fdt_size); + if (!machine->fdt) { + error_report("load_device_tree() failed"); + exit(1); + } + + /* Compute the FDT load address in DRAM */ + hwaddr kernel_ram_base = memmap[MICROCHIP_PFSOC_DRAM_LO].base; + hwaddr kernel_ram_size = memmap[MICROCHIP_PFSOC_DRAM_LO].size; + + if (kernel_entry - kernel_ram_base >= kernel_ram_size) { + kernel_ram_base = memmap[MICROCHIP_PFSOC_DRAM_HI].base; + kernel_ram_size = mem_high_size; + } + + fdt_load_addr = riscv_compute_fdt_addr(kernel_ram_base, kernel_ram_size, + machine, &boot_info); + riscv_load_fdt(fdt_load_addr, machine->fdt); + } else { + warn_report_once("The QEMU microchip-icicle-kit machine does not " + "generate a device tree, so no device tree is " + "being provided to the guest."); + fdt_load_addr = 0; + } + + hwaddr start_addr; + if (firmware_name) { + start_addr = firmware_load_addr; + } else { + start_addr = kernel_entry; + } /* Load the reset vector */ - riscv_setup_rom_reset_vec(machine, &s->soc.u_cpus, firmware_load_addr, + riscv_setup_rom_reset_vec(machine, &s->soc.u_cpus, start_addr, memmap[MICROCHIP_PFSOC_ENVM_DATA].base, memmap[MICROCHIP_PFSOC_ENVM_DATA].size, kernel_entry, fdt_load_addr); } } +static void microchip_icicle_kit_set_clint_timebase_freq(Object *obj, + Visitor *v, + const char *name, + void *opaque, + Error **errp) +{ + MicrochipIcicleKitState *s = MICROCHIP_ICICLE_KIT_MACHINE(obj); + uint32_t value; + + if (!visit_type_uint32(v, name, &value, errp)) { + return; + } + + s->clint_timebase_freq = value; +} + +static void microchip_icicle_kit_get_clint_timebase_freq(Object *obj, + Visitor *v, + const char *name, + void *opaque, + Error **errp) +{ + MicrochipIcicleKitState *s = MICROCHIP_ICICLE_KIT_MACHINE(obj); + uint32_t value = s->clint_timebase_freq; + + visit_type_uint32(v, name, &value, errp); +} + +static void microchip_icicle_kit_machine_instance_init(Object *obj) +{ + MicrochipIcicleKitState *m = MICROCHIP_ICICLE_KIT_MACHINE(obj); + m->clint_timebase_freq = 1000000; +} + static void microchip_icicle_kit_machine_class_init(ObjectClass *oc, const void *data) { @@ -661,12 +726,20 @@ static void microchip_icicle_kit_machine_class_init(ObjectClass *oc, * See memory_tests() in mss_ddr.c in the HSS source code. */ mc->default_ram_size = 1537 * MiB; + + object_class_property_add(oc, "clint-timebase-frequency", "uint32_t", + microchip_icicle_kit_get_clint_timebase_freq, + microchip_icicle_kit_set_clint_timebase_freq, + NULL, NULL); + object_class_property_set_description(oc, "clint-timebase-frequency", + "Set CLINT timebase frequency in Hz."); } static const TypeInfo microchip_icicle_kit_machine_typeinfo = { .name = MACHINE_TYPE_NAME("microchip-icicle-kit"), .parent = TYPE_MACHINE, .class_init = microchip_icicle_kit_machine_class_init, + .instance_init = microchip_icicle_kit_machine_instance_init, .instance_size = sizeof(MicrochipIcicleKitState), }; diff --git a/hw/riscv/riscv-iommu-bits.h b/hw/riscv/riscv-iommu-bits.h index 1017d73..47fe01b 100644 --- a/hw/riscv/riscv-iommu-bits.h +++ b/hw/riscv/riscv-iommu-bits.h @@ -79,6 +79,7 @@ struct riscv_iommu_pq_record { #define RISCV_IOMMU_CAP_SV39 BIT_ULL(9) #define RISCV_IOMMU_CAP_SV48 BIT_ULL(10) #define RISCV_IOMMU_CAP_SV57 BIT_ULL(11) +#define RISCV_IOMMU_CAP_SVRSW60T59B BIT_ULL(14) #define RISCV_IOMMU_CAP_SV32X4 BIT_ULL(16) #define RISCV_IOMMU_CAP_SV39X4 BIT_ULL(17) #define RISCV_IOMMU_CAP_SV48X4 BIT_ULL(18) diff --git a/hw/riscv/riscv-iommu-pci.c b/hw/riscv/riscv-iommu-pci.c index 1f44eef..cdb4a7a 100644 --- a/hw/riscv/riscv-iommu-pci.c +++ b/hw/riscv/riscv-iommu-pci.c @@ -68,12 +68,6 @@ typedef struct RISCVIOMMUStatePci { RISCVIOMMUState iommu; /* common IOMMU state */ } RISCVIOMMUStatePci; -struct RISCVIOMMUPciClass { - /*< public >*/ - DeviceRealize parent_realize; - ResettablePhases parent_phases; -}; - /* interrupt delivery callback */ static void riscv_iommu_pci_notify(RISCVIOMMUState *iommu, unsigned vector) { diff --git a/hw/riscv/riscv-iommu-sys.c b/hw/riscv/riscv-iommu-sys.c index 74e76b9..e34d00a 100644 --- a/hw/riscv/riscv-iommu-sys.c +++ b/hw/riscv/riscv-iommu-sys.c @@ -53,12 +53,6 @@ struct RISCVIOMMUStateSys { uint8_t *msix_pba; }; -struct RISCVIOMMUSysClass { - /*< public >*/ - DeviceRealize parent_realize; - ResettablePhases parent_phases; -}; - static uint64_t msix_table_mmio_read(void *opaque, hwaddr addr, unsigned size) { diff --git a/hw/riscv/riscv-iommu.c b/hw/riscv/riscv-iommu.c index a877e5d..96a7fbd 100644 --- a/hw/riscv/riscv-iommu.c +++ b/hw/riscv/riscv-iommu.c @@ -1935,11 +1935,7 @@ static void riscv_iommu_process_dbg(RISCVIOMMUState *s) iova = RISCV_IOMMU_TR_RESPONSE_FAULT | (((uint64_t) fault) << 10); } else { iova = iotlb.translated_addr & ~iotlb.addr_mask; - iova >>= TARGET_PAGE_BITS; - iova &= RISCV_IOMMU_TR_RESPONSE_PPN; - - /* We do not support superpages (> 4kbs) for now */ - iova &= ~RISCV_IOMMU_TR_RESPONSE_S; + iova = set_field(0, RISCV_IOMMU_TR_RESPONSE_PPN, PPN_DOWN(iova)); } riscv_iommu_reg_set64(s, RISCV_IOMMU_REG_TR_RESPONSE, iova); } @@ -2355,7 +2351,8 @@ static void riscv_iommu_realize(DeviceState *dev, Error **errp) } if (s->enable_g_stage) { s->cap |= RISCV_IOMMU_CAP_SV32X4 | RISCV_IOMMU_CAP_SV39X4 | - RISCV_IOMMU_CAP_SV48X4 | RISCV_IOMMU_CAP_SV57X4; + RISCV_IOMMU_CAP_SV48X4 | RISCV_IOMMU_CAP_SV57X4 | + RISCV_IOMMU_CAP_SVRSW60T59B; } if (s->hpm_cntrs > 0) { diff --git a/hw/riscv/riscv_hart.c b/hw/riscv/riscv_hart.c index ac6539b..7f26760 100644 --- a/hw/riscv/riscv_hart.c +++ b/hw/riscv/riscv_hart.c @@ -72,7 +72,7 @@ static void csr_call(char *cmd, uint64_t cpu_num, int csrno, uint64_t *val) ret = riscv_csrr(env, csrno, (target_ulong *)val); } else if (strcmp(cmd, "set_csr") == 0) { ret = riscv_csrrw(env, csrno, NULL, *(target_ulong *)val, - MAKE_64BIT_MASK(0, TARGET_LONG_BITS)); + MAKE_64BIT_MASK(0, TARGET_LONG_BITS), 0); } g_assert(ret == RISCV_EXCP_NONE); @@ -104,8 +104,11 @@ static bool csr_qtest_callback(CharBackend *chr, gchar **words) static void riscv_cpu_register_csr_qtest_callback(void) { - static GOnce once; - g_once(&once, (GThreadFunc)qtest_set_command_cb, csr_qtest_callback); + static bool first = true; + if (first) { + first = false; + qtest_set_command_cb(csr_qtest_callback); + } } #endif diff --git a/hw/riscv/virt-acpi-build.c b/hw/riscv/virt-acpi-build.c index 1ad6800..f1406cb 100644 --- a/hw/riscv/virt-acpi-build.c +++ b/hw/riscv/virt-acpi-build.c @@ -199,6 +199,32 @@ acpi_dsdt_add_uart(Aml *scope, const MemMapEntry *uart_memmap, } /* + * Add DSDT entry for the IOMMU platform device. + * ACPI ID for IOMMU is defined in the section 6.2 of RISC-V BRS spec. + * https://github.com/riscv-non-isa/riscv-brs/releases/download/v0.8/riscv-brs-spec.pdf + */ +static void acpi_dsdt_add_iommu_sys(Aml *scope, const MemMapEntry *iommu_memmap, + uint32_t iommu_irq) +{ + uint32_t i; + + Aml *dev = aml_device("IMU0"); + aml_append(dev, aml_name_decl("_HID", aml_string("RSCV0004"))); + aml_append(dev, aml_name_decl("_UID", aml_int(0))); + + Aml *crs = aml_resource_template(); + aml_append(crs, aml_memory32_fixed(iommu_memmap->base, + iommu_memmap->size, AML_READ_WRITE)); + for (i = iommu_irq; i < iommu_irq + 4; i++) { + aml_append(crs, aml_interrupt(AML_CONSUMER, AML_EDGE, AML_ACTIVE_LOW, + AML_EXCLUSIVE, &i, 1)); + } + + aml_append(dev, aml_name_decl("_CRS", crs)); + aml_append(scope, dev); +} + +/* * Serial Port Console Redirection Table (SPCR) * Rev: 1.10 */ @@ -244,11 +270,8 @@ spcr_setup(GArray *table_data, BIOSLinker *linker, RISCVVirtState *s) #define RHCT_NODE_ARRAY_OFFSET 56 /* - * ACPI spec, Revision 6.5+ - * 5.2.36 RISC-V Hart Capabilities Table (RHCT) - * REF: https://github.com/riscv-non-isa/riscv-acpi/issues/16 - * https://drive.google.com/file/d/1nP3nFiH4jkPMp6COOxP6123DCZKR-tia/view - * https://drive.google.com/file/d/1sKbOa8m1UZw1JkquZYe3F1zQBN1xXsaf/view + * ACPI spec, Revision 6.6 + * 5.2.37 RISC-V Hart Capabilities Table (RHCT) */ static void build_rhct(GArray *table_data, BIOSLinker *linker, @@ -261,7 +284,7 @@ static void build_rhct(GArray *table_data, uint32_t isa_offset, num_rhct_nodes, cmo_offset = 0; RISCVCPU *cpu = &s->soc[0].harts[0]; uint32_t mmu_offset = 0; - uint8_t satp_mode_max; + bool rv32 = riscv_cpu_is_32bit(cpu); g_autofree char *isa = NULL; AcpiTable table = { .sig = "RHCT", .rev = 1, .oem_id = s->oem_id, @@ -281,7 +304,7 @@ static void build_rhct(GArray *table_data, num_rhct_nodes++; } - if (cpu->cfg.satp_mode.supported != 0) { + if (!rv32 && cpu->cfg.max_satp_mode >= VM_1_10_SV39) { num_rhct_nodes++; } @@ -341,22 +364,21 @@ static void build_rhct(GArray *table_data, } /* MMU node structure */ - if (cpu->cfg.satp_mode.supported != 0) { - satp_mode_max = satp_mode_max_from_map(cpu->cfg.satp_mode.map); + if (!rv32 && cpu->cfg.max_satp_mode >= VM_1_10_SV39) { mmu_offset = table_data->len - table.table_offset; build_append_int_noprefix(table_data, 2, 2); /* Type */ build_append_int_noprefix(table_data, 8, 2); /* Length */ build_append_int_noprefix(table_data, 0x1, 2); /* Revision */ build_append_int_noprefix(table_data, 0, 1); /* Reserved */ /* MMU Type */ - if (satp_mode_max == VM_1_10_SV57) { + if (cpu->cfg.max_satp_mode == VM_1_10_SV57) { build_append_int_noprefix(table_data, 2, 1); /* Sv57 */ - } else if (satp_mode_max == VM_1_10_SV48) { + } else if (cpu->cfg.max_satp_mode == VM_1_10_SV48) { build_append_int_noprefix(table_data, 1, 1); /* Sv48 */ - } else if (satp_mode_max == VM_1_10_SV39) { + } else if (cpu->cfg.max_satp_mode == VM_1_10_SV39) { build_append_int_noprefix(table_data, 0, 1); /* Sv39 */ } else { - assert(1); + g_assert_not_reached(); } } @@ -396,7 +418,10 @@ static void build_rhct(GArray *table_data, acpi_table_end(linker, &table); } -/* FADT */ +/* + * ACPI spec, Revision 6.6 + * 5.2.9 Fixed ACPI Description Table (MADT) + */ static void build_fadt_rev6(GArray *table_data, BIOSLinker *linker, RISCVVirtState *s, @@ -404,7 +429,7 @@ static void build_fadt_rev6(GArray *table_data, { AcpiFadtData fadt = { .rev = 6, - .minor_ver = 5, + .minor_ver = 6, .flags = 1 << ACPI_FADT_F_HW_REDUCED_ACPI, .xdsdt_tbl_offset = &dsdt_tbl_offset, }; @@ -450,6 +475,9 @@ static void build_dsdt(GArray *table_data, } acpi_dsdt_add_uart(scope, &memmap[VIRT_UART0], UART0_IRQ); + if (virt_is_iommu_sys_enabled(s)) { + acpi_dsdt_add_iommu_sys(scope, &memmap[VIRT_IOMMU_SYS], IOMMU_SYS_IRQ); + } if (socket_count == 1) { virtio_acpi_dsdt_add(scope, memmap[VIRT_VIRTIO].base, @@ -480,11 +508,8 @@ static void build_dsdt(GArray *table_data, } /* - * ACPI spec, Revision 6.5+ + * ACPI spec, Revision 6.6 * 5.2.12 Multiple APIC Description Table (MADT) - * REF: https://github.com/riscv-non-isa/riscv-acpi/issues/15 - * https://drive.google.com/file/d/1R6k4MshhN3WTT-hwqAquu5nX6xSEqK2l/view - * https://drive.google.com/file/d/1oMGPyOD58JaPgMl1pKasT-VKsIKia7zR/view */ static void build_madt(GArray *table_data, BIOSLinker *linker, @@ -509,7 +534,7 @@ static void build_madt(GArray *table_data, hart_index_bits = imsic_num_bits(imsic_max_hart_per_socket); - AcpiTable table = { .sig = "APIC", .rev = 6, .oem_id = s->oem_id, + AcpiTable table = { .sig = "APIC", .rev = 7, .oem_id = s->oem_id, .oem_table_id = s->oem_table_id }; acpi_table_begin(&table, table_data); @@ -602,11 +627,190 @@ static void build_madt(GArray *table_data, acpi_table_end(linker, &table); } +#define ID_MAPPING_ENTRY_SIZE 20 +#define IOMMU_ENTRY_SIZE 40 +#define RISCV_INTERRUPT_WIRE_OFFSSET 40 +#define ROOT_COMPLEX_ENTRY_SIZE 20 +#define RIMT_NODE_OFFSET 48 + /* - * ACPI spec, Revision 6.5+ + * ID Mapping Structure + */ +static void build_rimt_id_mapping(GArray *table_data, uint32_t source_id_base, + uint32_t num_ids, uint32_t dest_id_base) +{ + /* Source ID Base */ + build_append_int_noprefix(table_data, source_id_base, 4); + /* Number of IDs */ + build_append_int_noprefix(table_data, num_ids, 4); + /* Destination Device ID Base */ + build_append_int_noprefix(table_data, source_id_base, 4); + /* Destination IOMMU Offset */ + build_append_int_noprefix(table_data, dest_id_base, 4); + /* Flags */ + build_append_int_noprefix(table_data, 0, 4); +} + +struct AcpiRimtIdMapping { + uint32_t source_id_base; + uint32_t num_ids; +}; +typedef struct AcpiRimtIdMapping AcpiRimtIdMapping; + +/* Build the rimt ID mapping to IOMMU for a given PCI host bridge */ +static int rimt_host_bridges(Object *obj, void *opaque) +{ + GArray *idmap_blob = opaque; + + if (object_dynamic_cast(obj, TYPE_PCI_HOST_BRIDGE)) { + PCIBus *bus = PCI_HOST_BRIDGE(obj)->bus; + + if (bus && !pci_bus_bypass_iommu(bus)) { + int min_bus, max_bus; + + pci_bus_range(bus, &min_bus, &max_bus); + + AcpiRimtIdMapping idmap = { + .source_id_base = min_bus << 8, + .num_ids = (max_bus - min_bus + 1) << 8, + }; + g_array_append_val(idmap_blob, idmap); + } + } + + return 0; +} + +static int rimt_idmap_compare(gconstpointer a, gconstpointer b) +{ + AcpiRimtIdMapping *idmap_a = (AcpiRimtIdMapping *)a; + AcpiRimtIdMapping *idmap_b = (AcpiRimtIdMapping *)b; + + return idmap_a->source_id_base - idmap_b->source_id_base; +} + +/* + * RISC-V IO Mapping Table (RIMT) + * https://github.com/riscv-non-isa/riscv-acpi-rimt/releases/download/v0.99/rimt-spec.pdf + */ +static void build_rimt(GArray *table_data, BIOSLinker *linker, + RISCVVirtState *s) +{ + int i, nb_nodes, rc_mapping_count; + size_t node_size, iommu_offset = 0; + uint32_t id = 0; + g_autoptr(GArray) iommu_idmaps = g_array_new(false, true, + sizeof(AcpiRimtIdMapping)); + + AcpiTable table = { .sig = "RIMT", .rev = 1, .oem_id = s->oem_id, + .oem_table_id = s->oem_table_id }; + + acpi_table_begin(&table, table_data); + + object_child_foreach_recursive(object_get_root(), + rimt_host_bridges, iommu_idmaps); + + /* Sort the ID mapping by Source ID Base*/ + g_array_sort(iommu_idmaps, rimt_idmap_compare); + + nb_nodes = 2; /* RC, IOMMU */ + rc_mapping_count = iommu_idmaps->len; + /* Number of RIMT Nodes */ + build_append_int_noprefix(table_data, nb_nodes, 4); + + /* Offset to Array of RIMT Nodes */ + build_append_int_noprefix(table_data, RIMT_NODE_OFFSET, 4); + build_append_int_noprefix(table_data, 0, 4); /* Reserved */ + + iommu_offset = table_data->len - table.table_offset; + /* IOMMU Device Structure */ + build_append_int_noprefix(table_data, 0, 1); /* Type - IOMMU*/ + build_append_int_noprefix(table_data, 1, 1); /* Revision */ + node_size = IOMMU_ENTRY_SIZE; + build_append_int_noprefix(table_data, node_size, 2); /* Length */ + build_append_int_noprefix(table_data, 0, 2); /* Reserved */ + build_append_int_noprefix(table_data, id++, 2); /* ID */ + if (virt_is_iommu_sys_enabled(s)) { + /* Hardware ID */ + build_append_int_noprefix(table_data, 'R', 1); + build_append_int_noprefix(table_data, 'S', 1); + build_append_int_noprefix(table_data, 'C', 1); + build_append_int_noprefix(table_data, 'V', 1); + build_append_int_noprefix(table_data, '0', 1); + build_append_int_noprefix(table_data, '0', 1); + build_append_int_noprefix(table_data, '0', 1); + build_append_int_noprefix(table_data, '4', 1); + /* Base Address */ + build_append_int_noprefix(table_data, + s->memmap[VIRT_IOMMU_SYS].base, 8); + build_append_int_noprefix(table_data, 0, 4); /* Flags */ + } else { + /* Hardware ID */ + build_append_int_noprefix(table_data, '0', 1); + build_append_int_noprefix(table_data, '0', 1); + build_append_int_noprefix(table_data, '1', 1); + build_append_int_noprefix(table_data, '0', 1); + build_append_int_noprefix(table_data, '0', 1); + build_append_int_noprefix(table_data, '0', 1); + build_append_int_noprefix(table_data, '1', 1); + build_append_int_noprefix(table_data, '4', 1); + + build_append_int_noprefix(table_data, 0, 8); /* Base Address */ + build_append_int_noprefix(table_data, 1, 4); /* Flags */ + } + + build_append_int_noprefix(table_data, 0, 4); /* Proximity Domain */ + build_append_int_noprefix(table_data, 0, 2); /* PCI Segment number */ + /* PCIe B/D/F */ + if (virt_is_iommu_sys_enabled(s)) { + build_append_int_noprefix(table_data, 0, 2); + } else { + build_append_int_noprefix(table_data, s->pci_iommu_bdf, 2); + } + /* Number of interrupt wires */ + build_append_int_noprefix(table_data, 0, 2); + /* Interrupt wire array offset */ + build_append_int_noprefix(table_data, RISCV_INTERRUPT_WIRE_OFFSSET, 2); + + /* PCIe Root Complex Node */ + build_append_int_noprefix(table_data, 1, 1); /* Type */ + build_append_int_noprefix(table_data, 1, 1); /* Revision */ + node_size = ROOT_COMPLEX_ENTRY_SIZE + + ID_MAPPING_ENTRY_SIZE * rc_mapping_count; + build_append_int_noprefix(table_data, node_size, 2); /* Length */ + build_append_int_noprefix(table_data, 0, 2); /* Reserved */ + build_append_int_noprefix(table_data, id++, 2); /* ID */ + build_append_int_noprefix(table_data, 0, 4); /* Flags */ + build_append_int_noprefix(table_data, 0, 2); /* Reserved */ + /* PCI Segment number */ + build_append_int_noprefix(table_data, 0, 2); + /* ID mapping array offset */ + build_append_int_noprefix(table_data, ROOT_COMPLEX_ENTRY_SIZE, 2); + /* Number of ID mappings */ + build_append_int_noprefix(table_data, rc_mapping_count, 2); + + /* Output Reference */ + AcpiRimtIdMapping *range; + + /* ID mapping array */ + for (i = 0; i < iommu_idmaps->len; i++) { + range = &g_array_index(iommu_idmaps, AcpiRimtIdMapping, i); + if (virt_is_iommu_sys_enabled(s)) { + range->source_id_base = 0; + } else { + range->source_id_base = s->pci_iommu_bdf + 1; + } + range->num_ids = 0xffff - s->pci_iommu_bdf; + build_rimt_id_mapping(table_data, range->source_id_base, + range->num_ids, iommu_offset); + } + + acpi_table_end(linker, &table); +} + +/* + * ACPI spec, Revision 6.6 * 5.2.16 System Resource Affinity Table (SRAT) - * REF: https://github.com/riscv-non-isa/riscv-acpi/issues/25 - * https://drive.google.com/file/d/1YTdDx2IPm5IeZjAW932EYU-tUtgS08tX/view */ static void build_srat(GArray *table_data, BIOSLinker *linker, RISCVVirtState *vms) @@ -679,8 +883,16 @@ static void virt_acpi_build(RISCVVirtState *s, AcpiBuildTables *tables) acpi_add_table(table_offsets, tables_blob); build_rhct(tables_blob, tables->linker, s); + if (virt_is_iommu_sys_enabled(s) || s->pci_iommu_bdf) { + acpi_add_table(table_offsets, tables_blob); + build_rimt(tables_blob, tables->linker, s); + } + acpi_add_table(table_offsets, tables_blob); - spcr_setup(tables_blob, tables->linker, s); + + if (ms->acpi_spcr_enabled) { + spcr_setup(tables_blob, tables->linker, s); + } acpi_add_table(table_offsets, tables_blob); { diff --git a/hw/riscv/virt.c b/hw/riscv/virt.c index be1bf0f..47e573f 100644 --- a/hw/riscv/virt.c +++ b/hw/riscv/virt.c @@ -166,8 +166,8 @@ static void virt_flash_map1(PFlashCFI01 *flash, static void virt_flash_map(RISCVVirtState *s, MemoryRegion *sysmem) { - hwaddr flashsize = virt_memmap[VIRT_FLASH].size / 2; - hwaddr flashbase = virt_memmap[VIRT_FLASH].base; + hwaddr flashsize = s->memmap[VIRT_FLASH].size / 2; + hwaddr flashbase = s->memmap[VIRT_FLASH].base; virt_flash_map1(s->flash[0], flashbase, flashsize, sysmem); @@ -237,10 +237,10 @@ static void create_fdt_socket_cpus(RISCVVirtState *s, int socket, uint32_t cpu_phandle; MachineState *ms = MACHINE(s); bool is_32_bit = riscv_is_32bit(&s->soc[0]); - uint8_t satp_mode_max; for (cpu = s->soc[socket].num_harts - 1; cpu >= 0; cpu--) { RISCVCPU *cpu_ptr = &s->soc[socket].harts[cpu]; + int8_t satp_mode_max = cpu_ptr->cfg.max_satp_mode; g_autofree char *cpu_name = NULL; g_autofree char *core_name = NULL; g_autofree char *intc_name = NULL; @@ -252,8 +252,7 @@ static void create_fdt_socket_cpus(RISCVVirtState *s, int socket, s->soc[socket].hartid_base + cpu); qemu_fdt_add_subnode(ms->fdt, cpu_name); - if (cpu_ptr->cfg.satp_mode.supported != 0) { - satp_mode_max = satp_mode_max_from_map(cpu_ptr->cfg.satp_mode.map); + if (satp_mode_max != -1) { sv_name = g_strdup_printf("riscv,%s", satp_mode_str(satp_mode_max, is_32_bit)); qemu_fdt_setprop_string(ms->fdt, cpu_name, "mmu-type", sv_name); @@ -301,31 +300,30 @@ static void create_fdt_socket_cpus(RISCVVirtState *s, int socket, } } -static void create_fdt_socket_memory(RISCVVirtState *s, - const MemMapEntry *memmap, int socket) +static void create_fdt_socket_memory(RISCVVirtState *s, int socket) { g_autofree char *mem_name = NULL; - uint64_t addr, size; + hwaddr addr; + uint64_t size; MachineState *ms = MACHINE(s); - addr = memmap[VIRT_DRAM].base + riscv_socket_mem_offset(ms, socket); + addr = s->memmap[VIRT_DRAM].base + riscv_socket_mem_offset(ms, socket); size = riscv_socket_mem_size(ms, socket); - mem_name = g_strdup_printf("/memory@%lx", (long)addr); + mem_name = g_strdup_printf("/memory@%"HWADDR_PRIx, addr); qemu_fdt_add_subnode(ms->fdt, mem_name); - qemu_fdt_setprop_cells(ms->fdt, mem_name, "reg", - addr >> 32, addr, size >> 32, size); + qemu_fdt_setprop_sized_cells(ms->fdt, mem_name, "reg", 2, addr, 2, size); qemu_fdt_setprop_string(ms->fdt, mem_name, "device_type", "memory"); riscv_socket_fdt_write_id(ms, mem_name, socket); } static void create_fdt_socket_clint(RISCVVirtState *s, - const MemMapEntry *memmap, int socket, + int socket, uint32_t *intc_phandles) { int cpu; g_autofree char *clint_name = NULL; g_autofree uint32_t *clint_cells = NULL; - unsigned long clint_addr; + hwaddr clint_addr; MachineState *ms = MACHINE(s); static const char * const clint_compat[2] = { "sifive,clint0", "riscv,clint0" @@ -340,21 +338,22 @@ static void create_fdt_socket_clint(RISCVVirtState *s, clint_cells[cpu * 4 + 3] = cpu_to_be32(IRQ_M_TIMER); } - clint_addr = memmap[VIRT_CLINT].base + (memmap[VIRT_CLINT].size * socket); - clint_name = g_strdup_printf("/soc/clint@%lx", clint_addr); + clint_addr = s->memmap[VIRT_CLINT].base + + s->memmap[VIRT_CLINT].size * socket; + clint_name = g_strdup_printf("/soc/clint@%"HWADDR_PRIx, clint_addr); qemu_fdt_add_subnode(ms->fdt, clint_name); qemu_fdt_setprop_string_array(ms->fdt, clint_name, "compatible", (char **)&clint_compat, ARRAY_SIZE(clint_compat)); - qemu_fdt_setprop_cells(ms->fdt, clint_name, "reg", - 0x0, clint_addr, 0x0, memmap[VIRT_CLINT].size); + qemu_fdt_setprop_sized_cells(ms->fdt, clint_name, "reg", + 2, clint_addr, 2, s->memmap[VIRT_CLINT].size); qemu_fdt_setprop(ms->fdt, clint_name, "interrupts-extended", clint_cells, s->soc[socket].num_harts * sizeof(uint32_t) * 4); riscv_socket_fdt_write_id(ms, clint_name, socket); } static void create_fdt_socket_aclint(RISCVVirtState *s, - const MemMapEntry *memmap, int socket, + int socket, uint32_t *intc_phandles) { int cpu; @@ -381,13 +380,15 @@ static void create_fdt_socket_aclint(RISCVVirtState *s, aclint_cells_size = s->soc[socket].num_harts * sizeof(uint32_t) * 2; if (s->aia_type != VIRT_AIA_TYPE_APLIC_IMSIC) { - addr = memmap[VIRT_CLINT].base + (memmap[VIRT_CLINT].size * socket); + addr = s->memmap[VIRT_CLINT].base + + (s->memmap[VIRT_CLINT].size * socket); name = g_strdup_printf("/soc/mswi@%lx", addr); + qemu_fdt_add_subnode(ms->fdt, name); qemu_fdt_setprop_string(ms->fdt, name, "compatible", "riscv,aclint-mswi"); - qemu_fdt_setprop_cells(ms->fdt, name, "reg", - 0x0, addr, 0x0, RISCV_ACLINT_SWI_SIZE); + qemu_fdt_setprop_sized_cells(ms->fdt, name, "reg", + 2, addr, 2, RISCV_ACLINT_SWI_SIZE); qemu_fdt_setprop(ms->fdt, name, "interrupts-extended", aclint_mswi_cells, aclint_cells_size); qemu_fdt_setprop(ms->fdt, name, "interrupt-controller", NULL, 0); @@ -397,37 +398,38 @@ static void create_fdt_socket_aclint(RISCVVirtState *s, } if (s->aia_type == VIRT_AIA_TYPE_APLIC_IMSIC) { - addr = memmap[VIRT_CLINT].base + + addr = s->memmap[VIRT_CLINT].base + (RISCV_ACLINT_DEFAULT_MTIMER_SIZE * socket); size = RISCV_ACLINT_DEFAULT_MTIMER_SIZE; } else { - addr = memmap[VIRT_CLINT].base + RISCV_ACLINT_SWI_SIZE + - (memmap[VIRT_CLINT].size * socket); - size = memmap[VIRT_CLINT].size - RISCV_ACLINT_SWI_SIZE; + addr = s->memmap[VIRT_CLINT].base + RISCV_ACLINT_SWI_SIZE + + (s->memmap[VIRT_CLINT].size * socket); + size = s->memmap[VIRT_CLINT].size - RISCV_ACLINT_SWI_SIZE; } name = g_strdup_printf("/soc/mtimer@%lx", addr); qemu_fdt_add_subnode(ms->fdt, name); qemu_fdt_setprop_string(ms->fdt, name, "compatible", "riscv,aclint-mtimer"); - qemu_fdt_setprop_cells(ms->fdt, name, "reg", - 0x0, addr + RISCV_ACLINT_DEFAULT_MTIME, - 0x0, size - RISCV_ACLINT_DEFAULT_MTIME, - 0x0, addr + RISCV_ACLINT_DEFAULT_MTIMECMP, - 0x0, RISCV_ACLINT_DEFAULT_MTIME); + qemu_fdt_setprop_sized_cells(ms->fdt, name, "reg", + 2, addr + RISCV_ACLINT_DEFAULT_MTIME, + 2, size - RISCV_ACLINT_DEFAULT_MTIME, + 2, addr + RISCV_ACLINT_DEFAULT_MTIMECMP, + 2, RISCV_ACLINT_DEFAULT_MTIME); qemu_fdt_setprop(ms->fdt, name, "interrupts-extended", aclint_mtimer_cells, aclint_cells_size); riscv_socket_fdt_write_id(ms, name, socket); g_free(name); if (s->aia_type != VIRT_AIA_TYPE_APLIC_IMSIC) { - addr = memmap[VIRT_ACLINT_SSWI].base + - (memmap[VIRT_ACLINT_SSWI].size * socket); + addr = s->memmap[VIRT_ACLINT_SSWI].base + + (s->memmap[VIRT_ACLINT_SSWI].size * socket); + name = g_strdup_printf("/soc/sswi@%lx", addr); qemu_fdt_add_subnode(ms->fdt, name); qemu_fdt_setprop_string(ms->fdt, name, "compatible", "riscv,aclint-sswi"); - qemu_fdt_setprop_cells(ms->fdt, name, "reg", - 0x0, addr, 0x0, memmap[VIRT_ACLINT_SSWI].size); + qemu_fdt_setprop_sized_cells(ms->fdt, name, "reg", + 2, addr, 2, s->memmap[VIRT_ACLINT_SSWI].size); qemu_fdt_setprop(ms->fdt, name, "interrupts-extended", aclint_sswi_cells, aclint_cells_size); qemu_fdt_setprop(ms->fdt, name, "interrupt-controller", NULL, 0); @@ -438,7 +440,7 @@ static void create_fdt_socket_aclint(RISCVVirtState *s, } static void create_fdt_socket_plic(RISCVVirtState *s, - const MemMapEntry *memmap, int socket, + int socket, uint32_t *phandle, uint32_t *intc_phandles, uint32_t *plic_phandles) { @@ -452,7 +454,8 @@ static void create_fdt_socket_plic(RISCVVirtState *s, }; plic_phandles[socket] = (*phandle)++; - plic_addr = memmap[VIRT_PLIC].base + (memmap[VIRT_PLIC].size * socket); + plic_addr = s->memmap[VIRT_PLIC].base + + (s->memmap[VIRT_PLIC].size * socket); plic_name = g_strdup_printf("/soc/plic@%lx", plic_addr); qemu_fdt_add_subnode(ms->fdt, plic_name); qemu_fdt_setprop_cell(ms->fdt, plic_name, @@ -490,8 +493,8 @@ static void create_fdt_socket_plic(RISCVVirtState *s, s->soc[socket].num_harts * sizeof(uint32_t) * 4); } - qemu_fdt_setprop_cells(ms->fdt, plic_name, "reg", - 0x0, plic_addr, 0x0, memmap[VIRT_PLIC].size); + qemu_fdt_setprop_sized_cells(ms->fdt, plic_name, "reg", + 2, plic_addr, 2, s->memmap[VIRT_PLIC].size); qemu_fdt_setprop_cell(ms->fdt, plic_name, "riscv,ndev", VIRT_IRQCHIP_NUM_SOURCES - 1); riscv_socket_fdt_write_id(ms, plic_name, socket); @@ -500,8 +503,8 @@ static void create_fdt_socket_plic(RISCVVirtState *s, if (!socket) { platform_bus_add_all_fdt_nodes(ms->fdt, plic_name, - memmap[VIRT_PLATFORM_BUS].base, - memmap[VIRT_PLATFORM_BUS].size, + s->memmap[VIRT_PLATFORM_BUS].base, + s->memmap[VIRT_PLATFORM_BUS].size, VIRT_PLATFORM_BUS_IRQ); } } @@ -588,7 +591,7 @@ static void create_fdt_one_imsic(RISCVVirtState *s, hwaddr base_addr, qemu_fdt_setprop_cell(ms->fdt, imsic_name, "phandle", msi_phandle); } -static void create_fdt_imsic(RISCVVirtState *s, const MemMapEntry *memmap, +static void create_fdt_imsic(RISCVVirtState *s, uint32_t *phandle, uint32_t *intc_phandles, uint32_t *msi_m_phandle, uint32_t *msi_s_phandle) { @@ -597,12 +600,12 @@ static void create_fdt_imsic(RISCVVirtState *s, const MemMapEntry *memmap, if (!kvm_enabled()) { /* M-level IMSIC node */ - create_fdt_one_imsic(s, memmap[VIRT_IMSIC_M].base, intc_phandles, + create_fdt_one_imsic(s, s->memmap[VIRT_IMSIC_M].base, intc_phandles, *msi_m_phandle, true, 0); } /* S-level IMSIC node */ - create_fdt_one_imsic(s, memmap[VIRT_IMSIC_S].base, intc_phandles, + create_fdt_one_imsic(s, s->memmap[VIRT_IMSIC_S].base, intc_phandles, *msi_s_phandle, false, imsic_num_bits(s->aia_guests + 1)); @@ -652,8 +655,8 @@ static void create_fdt_one_aplic(RISCVVirtState *s, int socket, qemu_fdt_setprop_cell(ms->fdt, aplic_name, "msi-parent", msi_phandle); } - qemu_fdt_setprop_cells(ms->fdt, aplic_name, "reg", - 0x0, aplic_addr, 0x0, aplic_size); + qemu_fdt_setprop_sized_cells(ms->fdt, aplic_name, "reg", + 2, aplic_addr, 2, aplic_size); qemu_fdt_setprop_cell(ms->fdt, aplic_name, "riscv,num-sources", VIRT_IRQCHIP_NUM_SOURCES); @@ -679,7 +682,7 @@ static void create_fdt_one_aplic(RISCVVirtState *s, int socket, } static void create_fdt_socket_aplic(RISCVVirtState *s, - const MemMapEntry *memmap, int socket, + int socket, uint32_t msi_m_phandle, uint32_t msi_s_phandle, uint32_t *phandle, @@ -696,18 +699,19 @@ static void create_fdt_socket_aplic(RISCVVirtState *s, if (!kvm_enabled()) { /* M-level APLIC node */ - aplic_addr = memmap[VIRT_APLIC_M].base + - (memmap[VIRT_APLIC_M].size * socket); - create_fdt_one_aplic(s, socket, aplic_addr, memmap[VIRT_APLIC_M].size, + aplic_addr = s->memmap[VIRT_APLIC_M].base + + (s->memmap[VIRT_APLIC_M].size * socket); + create_fdt_one_aplic(s, socket, aplic_addr, + s->memmap[VIRT_APLIC_M].size, msi_m_phandle, intc_phandles, aplic_m_phandle, aplic_s_phandle, true, num_harts); } /* S-level APLIC node */ - aplic_addr = memmap[VIRT_APLIC_S].base + - (memmap[VIRT_APLIC_S].size * socket); - create_fdt_one_aplic(s, socket, aplic_addr, memmap[VIRT_APLIC_S].size, + aplic_addr = s->memmap[VIRT_APLIC_S].base + + (s->memmap[VIRT_APLIC_S].size * socket); + create_fdt_one_aplic(s, socket, aplic_addr, s->memmap[VIRT_APLIC_S].size, msi_s_phandle, intc_phandles, aplic_s_phandle, 0, false, num_harts); @@ -715,8 +719,8 @@ static void create_fdt_socket_aplic(RISCVVirtState *s, if (!socket) { g_autofree char *aplic_name = fdt_get_aplic_nodename(aplic_addr); platform_bus_add_all_fdt_nodes(ms->fdt, aplic_name, - memmap[VIRT_PLATFORM_BUS].base, - memmap[VIRT_PLATFORM_BUS].size, + s->memmap[VIRT_PLATFORM_BUS].base, + s->memmap[VIRT_PLATFORM_BUS].size, VIRT_PLATFORM_BUS_IRQ); } @@ -734,7 +738,7 @@ static void create_fdt_pmu(RISCVVirtState *s) riscv_pmu_generate_fdt_node(ms->fdt, hart.pmu_avail_ctrs, pmu_name); } -static void create_fdt_sockets(RISCVVirtState *s, const MemMapEntry *memmap, +static void create_fdt_sockets(RISCVVirtState *s, uint32_t *phandle, uint32_t *irq_mmio_phandle, uint32_t *irq_pcie_phandle, @@ -770,20 +774,20 @@ static void create_fdt_sockets(RISCVVirtState *s, const MemMapEntry *memmap, create_fdt_socket_cpus(s, socket, clust_name, phandle, &intc_phandles[phandle_pos]); - create_fdt_socket_memory(s, memmap, socket); + create_fdt_socket_memory(s, socket); if (virt_aclint_allowed() && s->have_aclint) { - create_fdt_socket_aclint(s, memmap, socket, + create_fdt_socket_aclint(s, socket, &intc_phandles[phandle_pos]); } else if (tcg_enabled()) { - create_fdt_socket_clint(s, memmap, socket, + create_fdt_socket_clint(s, socket, &intc_phandles[phandle_pos]); } } if (s->aia_type == VIRT_AIA_TYPE_APLIC_IMSIC) { - create_fdt_imsic(s, memmap, phandle, intc_phandles, - &msi_m_phandle, &msi_s_phandle); + create_fdt_imsic(s, phandle, intc_phandles, + &msi_m_phandle, &msi_s_phandle); *msi_pcie_phandle = msi_s_phandle; } @@ -792,7 +796,7 @@ static void create_fdt_sockets(RISCVVirtState *s, const MemMapEntry *memmap, * mode, we'll use only one APLIC instance. */ if (!virt_use_emulated_aplic(s->aia_type)) { - create_fdt_socket_aplic(s, memmap, 0, + create_fdt_socket_aplic(s, 0, msi_m_phandle, msi_s_phandle, phandle, &intc_phandles[0], xplic_phandles, ms->smp.cpus); @@ -806,11 +810,11 @@ static void create_fdt_sockets(RISCVVirtState *s, const MemMapEntry *memmap, phandle_pos -= s->soc[socket].num_harts; if (s->aia_type == VIRT_AIA_TYPE_NONE) { - create_fdt_socket_plic(s, memmap, socket, phandle, + create_fdt_socket_plic(s, socket, phandle, &intc_phandles[phandle_pos], xplic_phandles); } else { - create_fdt_socket_aplic(s, memmap, socket, + create_fdt_socket_aplic(s, socket, msi_m_phandle, msi_s_phandle, phandle, &intc_phandles[phandle_pos], xplic_phandles, @@ -837,21 +841,22 @@ static void create_fdt_sockets(RISCVVirtState *s, const MemMapEntry *memmap, riscv_socket_fdt_write_distance_matrix(ms); } -static void create_fdt_virtio(RISCVVirtState *s, const MemMapEntry *memmap, - uint32_t irq_virtio_phandle) +static void create_fdt_virtio(RISCVVirtState *s, uint32_t irq_virtio_phandle) { int i; MachineState *ms = MACHINE(s); + hwaddr virtio_base = s->memmap[VIRT_VIRTIO].base; for (i = 0; i < VIRTIO_COUNT; i++) { - g_autofree char *name = g_strdup_printf("/soc/virtio_mmio@%lx", - (long)(memmap[VIRT_VIRTIO].base + i * memmap[VIRT_VIRTIO].size)); + g_autofree char *name = NULL; + uint64_t size = s->memmap[VIRT_VIRTIO].size; + hwaddr addr = virtio_base + i * size; + + name = g_strdup_printf("/soc/virtio_mmio@%"HWADDR_PRIx, addr); qemu_fdt_add_subnode(ms->fdt, name); qemu_fdt_setprop_string(ms->fdt, name, "compatible", "virtio,mmio"); - qemu_fdt_setprop_cells(ms->fdt, name, "reg", - 0x0, memmap[VIRT_VIRTIO].base + i * memmap[VIRT_VIRTIO].size, - 0x0, memmap[VIRT_VIRTIO].size); + qemu_fdt_setprop_sized_cells(ms->fdt, name, "reg", 2, addr, 2, size); qemu_fdt_setprop_cell(ms->fdt, name, "interrupt-parent", irq_virtio_phandle); if (s->aia_type == VIRT_AIA_TYPE_NONE) { @@ -864,7 +869,7 @@ static void create_fdt_virtio(RISCVVirtState *s, const MemMapEntry *memmap, } } -static void create_fdt_pcie(RISCVVirtState *s, const MemMapEntry *memmap, +static void create_fdt_pcie(RISCVVirtState *s, uint32_t irq_pcie_phandle, uint32_t msi_pcie_phandle, uint32_t iommu_sys_phandle) @@ -872,8 +877,8 @@ static void create_fdt_pcie(RISCVVirtState *s, const MemMapEntry *memmap, g_autofree char *name = NULL; MachineState *ms = MACHINE(s); - name = g_strdup_printf("/soc/pci@%lx", - (long) memmap[VIRT_PCIE_ECAM].base); + name = g_strdup_printf("/soc/pci@%"HWADDR_PRIx, + s->memmap[VIRT_PCIE_ECAM].base); qemu_fdt_setprop_cell(ms->fdt, name, "#address-cells", FDT_PCI_ADDR_CELLS); qemu_fdt_setprop_cell(ms->fdt, name, "#interrupt-cells", @@ -884,19 +889,19 @@ static void create_fdt_pcie(RISCVVirtState *s, const MemMapEntry *memmap, qemu_fdt_setprop_string(ms->fdt, name, "device_type", "pci"); qemu_fdt_setprop_cell(ms->fdt, name, "linux,pci-domain", 0); qemu_fdt_setprop_cells(ms->fdt, name, "bus-range", 0, - memmap[VIRT_PCIE_ECAM].size / PCIE_MMCFG_SIZE_MIN - 1); + s->memmap[VIRT_PCIE_ECAM].size / PCIE_MMCFG_SIZE_MIN - 1); qemu_fdt_setprop(ms->fdt, name, "dma-coherent", NULL, 0); if (s->aia_type == VIRT_AIA_TYPE_APLIC_IMSIC) { qemu_fdt_setprop_cell(ms->fdt, name, "msi-parent", msi_pcie_phandle); } - qemu_fdt_setprop_cells(ms->fdt, name, "reg", 0, - memmap[VIRT_PCIE_ECAM].base, 0, memmap[VIRT_PCIE_ECAM].size); + qemu_fdt_setprop_sized_cells(ms->fdt, name, "reg", 2, + s->memmap[VIRT_PCIE_ECAM].base, 2, s->memmap[VIRT_PCIE_ECAM].size); qemu_fdt_setprop_sized_cells(ms->fdt, name, "ranges", 1, FDT_PCI_RANGE_IOPORT, 2, 0, - 2, memmap[VIRT_PCIE_PIO].base, 2, memmap[VIRT_PCIE_PIO].size, + 2, s->memmap[VIRT_PCIE_PIO].base, 2, s->memmap[VIRT_PCIE_PIO].size, 1, FDT_PCI_RANGE_MMIO, - 2, memmap[VIRT_PCIE_MMIO].base, - 2, memmap[VIRT_PCIE_MMIO].base, 2, memmap[VIRT_PCIE_MMIO].size, + 2, s->memmap[VIRT_PCIE_MMIO].base, + 2, s->memmap[VIRT_PCIE_MMIO].base, 2, s->memmap[VIRT_PCIE_MMIO].size, 1, FDT_PCI_RANGE_MMIO_64BIT, 2, virt_high_pcie_memmap.base, 2, virt_high_pcie_memmap.base, 2, virt_high_pcie_memmap.size); @@ -910,16 +915,15 @@ static void create_fdt_pcie(RISCVVirtState *s, const MemMapEntry *memmap, create_pcie_irq_map(s, ms->fdt, name, irq_pcie_phandle); } -static void create_fdt_reset(RISCVVirtState *s, const MemMapEntry *memmap, - uint32_t *phandle) +static void create_fdt_reset(RISCVVirtState *s, uint32_t *phandle) { char *name; uint32_t test_phandle; MachineState *ms = MACHINE(s); test_phandle = (*phandle)++; - name = g_strdup_printf("/soc/test@%lx", - (long)memmap[VIRT_TEST].base); + name = g_strdup_printf("/soc/test@%"HWADDR_PRIx, + s->memmap[VIRT_TEST].base); qemu_fdt_add_subnode(ms->fdt, name); { static const char * const compat[3] = { @@ -928,8 +932,9 @@ static void create_fdt_reset(RISCVVirtState *s, const MemMapEntry *memmap, qemu_fdt_setprop_string_array(ms->fdt, name, "compatible", (char **)&compat, ARRAY_SIZE(compat)); } - qemu_fdt_setprop_cells(ms->fdt, name, "reg", - 0x0, memmap[VIRT_TEST].base, 0x0, memmap[VIRT_TEST].size); + qemu_fdt_setprop_sized_cells(ms->fdt, name, "reg", + 2, s->memmap[VIRT_TEST].base, + 2, s->memmap[VIRT_TEST].size); qemu_fdt_setprop_cell(ms->fdt, name, "phandle", test_phandle); test_phandle = qemu_fdt_get_phandle(ms->fdt, name); g_free(name); @@ -951,18 +956,19 @@ static void create_fdt_reset(RISCVVirtState *s, const MemMapEntry *memmap, g_free(name); } -static void create_fdt_uart(RISCVVirtState *s, const MemMapEntry *memmap, +static void create_fdt_uart(RISCVVirtState *s, uint32_t irq_mmio_phandle) { g_autofree char *name = NULL; MachineState *ms = MACHINE(s); - name = g_strdup_printf("/soc/serial@%lx", (long)memmap[VIRT_UART0].base); + name = g_strdup_printf("/soc/serial@%"HWADDR_PRIx, + s->memmap[VIRT_UART0].base); qemu_fdt_add_subnode(ms->fdt, name); qemu_fdt_setprop_string(ms->fdt, name, "compatible", "ns16550a"); - qemu_fdt_setprop_cells(ms->fdt, name, "reg", - 0x0, memmap[VIRT_UART0].base, - 0x0, memmap[VIRT_UART0].size); + qemu_fdt_setprop_sized_cells(ms->fdt, name, "reg", + 2, s->memmap[VIRT_UART0].base, + 2, s->memmap[VIRT_UART0].size); qemu_fdt_setprop_cell(ms->fdt, name, "clock-frequency", 3686400); qemu_fdt_setprop_cell(ms->fdt, name, "interrupt-parent", irq_mmio_phandle); if (s->aia_type == VIRT_AIA_TYPE_NONE) { @@ -975,18 +981,20 @@ static void create_fdt_uart(RISCVVirtState *s, const MemMapEntry *memmap, qemu_fdt_setprop_string(ms->fdt, "/aliases", "serial0", name); } -static void create_fdt_rtc(RISCVVirtState *s, const MemMapEntry *memmap, +static void create_fdt_rtc(RISCVVirtState *s, uint32_t irq_mmio_phandle) { g_autofree char *name = NULL; MachineState *ms = MACHINE(s); - name = g_strdup_printf("/soc/rtc@%lx", (long)memmap[VIRT_RTC].base); + name = g_strdup_printf("/soc/rtc@%"HWADDR_PRIx, + s->memmap[VIRT_RTC].base); qemu_fdt_add_subnode(ms->fdt, name); qemu_fdt_setprop_string(ms->fdt, name, "compatible", "google,goldfish-rtc"); - qemu_fdt_setprop_cells(ms->fdt, name, "reg", - 0x0, memmap[VIRT_RTC].base, 0x0, memmap[VIRT_RTC].size); + qemu_fdt_setprop_sized_cells(ms->fdt, name, "reg", + 2, s->memmap[VIRT_RTC].base, + 2, s->memmap[VIRT_RTC].size); qemu_fdt_setprop_cell(ms->fdt, name, "interrupt-parent", irq_mmio_phandle); if (s->aia_type == VIRT_AIA_TYPE_NONE) { @@ -996,11 +1004,11 @@ static void create_fdt_rtc(RISCVVirtState *s, const MemMapEntry *memmap, } } -static void create_fdt_flash(RISCVVirtState *s, const MemMapEntry *memmap) +static void create_fdt_flash(RISCVVirtState *s) { MachineState *ms = MACHINE(s); - hwaddr flashsize = virt_memmap[VIRT_FLASH].size / 2; - hwaddr flashbase = virt_memmap[VIRT_FLASH].base; + hwaddr flashsize = s->memmap[VIRT_FLASH].size / 2; + hwaddr flashbase = s->memmap[VIRT_FLASH].base; g_autofree char *name = g_strdup_printf("/flash@%" PRIx64, flashbase); qemu_fdt_add_subnode(ms->fdt, name); @@ -1011,11 +1019,11 @@ static void create_fdt_flash(RISCVVirtState *s, const MemMapEntry *memmap) qemu_fdt_setprop_cell(ms->fdt, name, "bank-width", 4); } -static void create_fdt_fw_cfg(RISCVVirtState *s, const MemMapEntry *memmap) +static void create_fdt_fw_cfg(RISCVVirtState *s) { MachineState *ms = MACHINE(s); - hwaddr base = memmap[VIRT_FW_CFG].base; - hwaddr size = memmap[VIRT_FW_CFG].size; + hwaddr base = s->memmap[VIRT_FW_CFG].base; + hwaddr size = s->memmap[VIRT_FW_CFG].size; g_autofree char *nodename = g_strdup_printf("/fw-cfg@%" PRIx64, base); qemu_fdt_add_subnode(ms->fdt, nodename); @@ -1034,8 +1042,8 @@ static void create_fdt_virtio_iommu(RISCVVirtState *s, uint16_t bdf) g_autofree char *iommu_node = NULL; g_autofree char *pci_node = NULL; - pci_node = g_strdup_printf("/soc/pci@%lx", - (long) virt_memmap[VIRT_PCIE_ECAM].base); + pci_node = g_strdup_printf("/soc/pci@%"HWADDR_PRIx, + s->memmap[VIRT_PCIE_ECAM].base); iommu_node = g_strdup_printf("%s/virtio_iommu@%x,%x", pci_node, PCI_SLOT(bdf), PCI_FUNC(bdf)); iommu_phandle = qemu_fdt_alloc_phandle(fdt); @@ -1080,8 +1088,7 @@ static void create_fdt_iommu_sys(RISCVVirtState *s, uint32_t irq_chip, qemu_fdt_setprop_cell(fdt, iommu_node, "#iommu-cells", 1); qemu_fdt_setprop_cell(fdt, iommu_node, "phandle", iommu_phandle); - qemu_fdt_setprop_cells(fdt, iommu_node, "reg", - addr >> 32, addr, size >> 32, size); + qemu_fdt_setprop_sized_cells(fdt, iommu_node, "reg", 2, addr, 2, size); qemu_fdt_setprop_cell(fdt, iommu_node, "interrupt-parent", irq_chip); qemu_fdt_setprop_cells(fdt, iommu_node, "interrupts", @@ -1103,8 +1110,8 @@ static void create_fdt_iommu(RISCVVirtState *s, uint16_t bdf) g_autofree char *iommu_node = NULL; g_autofree char *pci_node = NULL; - pci_node = g_strdup_printf("/soc/pci@%lx", - (long) virt_memmap[VIRT_PCIE_ECAM].base); + pci_node = g_strdup_printf("/soc/pci@%"HWADDR_PRIx, + s->memmap[VIRT_PCIE_ECAM].base); iommu_node = g_strdup_printf("%s/iommu@%x", pci_node, bdf); iommu_phandle = qemu_fdt_alloc_phandle(fdt); qemu_fdt_add_subnode(fdt, iommu_node); @@ -1117,6 +1124,7 @@ static void create_fdt_iommu(RISCVVirtState *s, uint16_t bdf) qemu_fdt_setprop_cells(fdt, pci_node, "iommu-map", 0, iommu_phandle, 0, bdf, bdf + 1, iommu_phandle, bdf + 1, 0xffff - bdf); + s->pci_iommu_bdf = bdf; } static void finalize_fdt(RISCVVirtState *s) @@ -1125,27 +1133,27 @@ static void finalize_fdt(RISCVVirtState *s) uint32_t irq_pcie_phandle = 1, irq_virtio_phandle = 1; uint32_t iommu_sys_phandle = 1; - create_fdt_sockets(s, virt_memmap, &phandle, &irq_mmio_phandle, + create_fdt_sockets(s, &phandle, &irq_mmio_phandle, &irq_pcie_phandle, &irq_virtio_phandle, &msi_pcie_phandle); - create_fdt_virtio(s, virt_memmap, irq_virtio_phandle); + create_fdt_virtio(s, irq_virtio_phandle); if (virt_is_iommu_sys_enabled(s)) { create_fdt_iommu_sys(s, irq_mmio_phandle, msi_pcie_phandle, &iommu_sys_phandle); } - create_fdt_pcie(s, virt_memmap, irq_pcie_phandle, msi_pcie_phandle, + create_fdt_pcie(s, irq_pcie_phandle, msi_pcie_phandle, iommu_sys_phandle); - create_fdt_reset(s, virt_memmap, &phandle); + create_fdt_reset(s, &phandle); - create_fdt_uart(s, virt_memmap, irq_mmio_phandle); + create_fdt_uart(s, irq_mmio_phandle); - create_fdt_rtc(s, virt_memmap, irq_mmio_phandle); + create_fdt_rtc(s, irq_mmio_phandle); } -static void create_fdt(RISCVVirtState *s, const MemMapEntry *memmap) +static void create_fdt(RISCVVirtState *s) { MachineState *ms = MACHINE(s); uint8_t rng_seed[32]; @@ -1172,7 +1180,8 @@ static void create_fdt(RISCVVirtState *s, const MemMapEntry *memmap) * The "/soc/pci@..." node is needed for PCIE hotplugs * that might happen before finalize_fdt(). */ - name = g_strdup_printf("/soc/pci@%lx", (long) memmap[VIRT_PCIE_ECAM].base); + name = g_strdup_printf("/soc/pci@%"HWADDR_PRIx, + s->memmap[VIRT_PCIE_ECAM].base); qemu_fdt_add_subnode(ms->fdt, name); qemu_fdt_add_subnode(ms->fdt, "/chosen"); @@ -1184,8 +1193,8 @@ static void create_fdt(RISCVVirtState *s, const MemMapEntry *memmap) qemu_fdt_add_subnode(ms->fdt, "/aliases"); - create_fdt_flash(s, memmap); - create_fdt_fw_cfg(s, memmap); + create_fdt_flash(s); + create_fdt_fw_cfg(s); create_fdt_pmu(s); } @@ -1261,9 +1270,8 @@ static inline DeviceState *gpex_pcie_init(MemoryRegion *sys_mem, return dev; } -static FWCfgState *create_fw_cfg(const MachineState *ms) +static FWCfgState *create_fw_cfg(const MachineState *ms, hwaddr base) { - hwaddr base = virt_memmap[VIRT_FW_CFG].base; FWCfgState *fw_cfg; fw_cfg = fw_cfg_init_mem_wide(base + 8, base, 8, base + 16, @@ -1360,14 +1368,13 @@ static void create_platform_bus(RISCVVirtState *s, DeviceState *irqchip) { DeviceState *dev; SysBusDevice *sysbus; - const MemMapEntry *memmap = virt_memmap; int i; MemoryRegion *sysmem = get_system_memory(); dev = qdev_new(TYPE_PLATFORM_BUS_DEVICE); dev->id = g_strdup(TYPE_PLATFORM_BUS_DEVICE); qdev_prop_set_uint32(dev, "num_irqs", VIRT_PLATFORM_BUS_NUM_IRQS); - qdev_prop_set_uint32(dev, "mmio_size", memmap[VIRT_PLATFORM_BUS].size); + qdev_prop_set_uint32(dev, "mmio_size", s->memmap[VIRT_PLATFORM_BUS].size); sysbus_realize_and_unref(SYS_BUS_DEVICE(dev), &error_fatal); s->platform_bus_dev = dev; @@ -1378,7 +1385,7 @@ static void create_platform_bus(RISCVVirtState *s, DeviceState *irqchip) } memory_region_add_subregion(sysmem, - memmap[VIRT_PLATFORM_BUS].base, + s->memmap[VIRT_PLATFORM_BUS].base, sysbus_mmio_get_region(sysbus, 0)); } @@ -1425,9 +1432,8 @@ static void virt_machine_done(Notifier *notifier, void *data) { RISCVVirtState *s = container_of(notifier, RISCVVirtState, machine_done); - const MemMapEntry *memmap = virt_memmap; MachineState *machine = MACHINE(s); - hwaddr start_addr = memmap[VIRT_DRAM].base; + hwaddr start_addr = s->memmap[VIRT_DRAM].base; target_ulong firmware_end_addr, kernel_start_addr; const char *firmware_name = riscv_default_firmware_name(&s->soc[0]); uint64_t fdt_load_addr; @@ -1471,14 +1477,14 @@ static void virt_machine_done(Notifier *notifier, void *data) * let's overwrite the address we jump to after reset to * the base of the flash. */ - start_addr = virt_memmap[VIRT_FLASH].base; + start_addr = s->memmap[VIRT_FLASH].base; } else { /* * Pflash was supplied but either KVM guest or bios is not none. * In this case, base of the flash would contain S-mode payload. */ riscv_setup_firmware_boot(machine); - kernel_entry = virt_memmap[VIRT_FLASH].base; + kernel_entry = s->memmap[VIRT_FLASH].base; } } @@ -1492,15 +1498,15 @@ static void virt_machine_done(Notifier *notifier, void *data) kernel_entry = boot_info.image_low_addr; } - fdt_load_addr = riscv_compute_fdt_addr(memmap[VIRT_DRAM].base, - memmap[VIRT_DRAM].size, + fdt_load_addr = riscv_compute_fdt_addr(s->memmap[VIRT_DRAM].base, + s->memmap[VIRT_DRAM].size, machine, &boot_info); riscv_load_fdt(fdt_load_addr, machine->fdt); /* load the reset vector */ riscv_setup_rom_reset_vec(machine, &s->soc[0], start_addr, - virt_memmap[VIRT_MROM].base, - virt_memmap[VIRT_MROM].size, kernel_entry, + s->memmap[VIRT_MROM].base, + s->memmap[VIRT_MROM].size, kernel_entry, fdt_load_addr); /* @@ -1521,7 +1527,6 @@ static void virt_machine_done(Notifier *notifier, void *data) static void virt_machine_init(MachineState *machine) { - const MemMapEntry *memmap = virt_memmap; RISCVVirtState *s = RISCV_VIRT_MACHINE(machine); MemoryRegion *system_memory = get_system_memory(); MemoryRegion *mask_rom = g_new(MemoryRegion, 1); @@ -1529,6 +1534,8 @@ static void virt_machine_init(MachineState *machine) int i, base_hartid, hart_count; int socket_count = riscv_socket_count(machine); + s->memmap = virt_memmap; + /* Check socket count limit */ if (VIRT_SOCKETS_MAX < socket_count) { error_report("number of sockets/nodes should be less than %d", @@ -1576,7 +1583,7 @@ static void virt_machine_init(MachineState *machine) if (virt_aclint_allowed() && s->have_aclint) { if (s->aia_type == VIRT_AIA_TYPE_APLIC_IMSIC) { /* Per-socket ACLINT MTIMER */ - riscv_aclint_mtimer_create(memmap[VIRT_CLINT].base + + riscv_aclint_mtimer_create(s->memmap[VIRT_CLINT].base + i * RISCV_ACLINT_DEFAULT_MTIMER_SIZE, RISCV_ACLINT_DEFAULT_MTIMER_SIZE, base_hartid, hart_count, @@ -1585,28 +1592,28 @@ static void virt_machine_init(MachineState *machine) RISCV_ACLINT_DEFAULT_TIMEBASE_FREQ, true); } else { /* Per-socket ACLINT MSWI, MTIMER, and SSWI */ - riscv_aclint_swi_create(memmap[VIRT_CLINT].base + - i * memmap[VIRT_CLINT].size, + riscv_aclint_swi_create(s->memmap[VIRT_CLINT].base + + i * s->memmap[VIRT_CLINT].size, base_hartid, hart_count, false); - riscv_aclint_mtimer_create(memmap[VIRT_CLINT].base + - i * memmap[VIRT_CLINT].size + + riscv_aclint_mtimer_create(s->memmap[VIRT_CLINT].base + + i * s->memmap[VIRT_CLINT].size + RISCV_ACLINT_SWI_SIZE, RISCV_ACLINT_DEFAULT_MTIMER_SIZE, base_hartid, hart_count, RISCV_ACLINT_DEFAULT_MTIMECMP, RISCV_ACLINT_DEFAULT_MTIME, RISCV_ACLINT_DEFAULT_TIMEBASE_FREQ, true); - riscv_aclint_swi_create(memmap[VIRT_ACLINT_SSWI].base + - i * memmap[VIRT_ACLINT_SSWI].size, + riscv_aclint_swi_create(s->memmap[VIRT_ACLINT_SSWI].base + + i * s->memmap[VIRT_ACLINT_SSWI].size, base_hartid, hart_count, true); } } else if (tcg_enabled()) { /* Per-socket SiFive CLINT */ riscv_aclint_swi_create( - memmap[VIRT_CLINT].base + i * memmap[VIRT_CLINT].size, + s->memmap[VIRT_CLINT].base + i * s->memmap[VIRT_CLINT].size, base_hartid, hart_count, false); - riscv_aclint_mtimer_create(memmap[VIRT_CLINT].base + - i * memmap[VIRT_CLINT].size + RISCV_ACLINT_SWI_SIZE, + riscv_aclint_mtimer_create(s->memmap[VIRT_CLINT].base + + i * s->memmap[VIRT_CLINT].size + RISCV_ACLINT_SWI_SIZE, RISCV_ACLINT_DEFAULT_MTIMER_SIZE, base_hartid, hart_count, RISCV_ACLINT_DEFAULT_MTIMECMP, RISCV_ACLINT_DEFAULT_MTIME, RISCV_ACLINT_DEFAULT_TIMEBASE_FREQ, true); @@ -1614,11 +1621,11 @@ static void virt_machine_init(MachineState *machine) /* Per-socket interrupt controller */ if (s->aia_type == VIRT_AIA_TYPE_NONE) { - s->irqchip[i] = virt_create_plic(memmap, i, + s->irqchip[i] = virt_create_plic(s->memmap, i, base_hartid, hart_count); } else { s->irqchip[i] = virt_create_aia(s->aia_type, s->aia_guests, - memmap, i, base_hartid, + s->memmap, i, base_hartid, hart_count); } @@ -1640,8 +1647,8 @@ static void virt_machine_init(MachineState *machine) if (kvm_enabled() && virt_use_kvm_aia_aplic_imsic(s->aia_type)) { kvm_riscv_aia_create(machine, IMSIC_MMIO_GROUP_MIN_SHIFT, VIRT_IRQCHIP_NUM_SOURCES, VIRT_IRQCHIP_NUM_MSIS, - memmap[VIRT_APLIC_S].base, - memmap[VIRT_IMSIC_S].base, + s->memmap[VIRT_APLIC_S].base, + s->memmap[VIRT_IMSIC_S].base, s->aia_guests); } @@ -1657,37 +1664,36 @@ static void virt_machine_init(MachineState *machine) virt_high_pcie_memmap.size = VIRT32_HIGH_PCIE_MMIO_SIZE; } else { virt_high_pcie_memmap.size = VIRT64_HIGH_PCIE_MMIO_SIZE; - virt_high_pcie_memmap.base = memmap[VIRT_DRAM].base + machine->ram_size; + virt_high_pcie_memmap.base = s->memmap[VIRT_DRAM].base + + machine->ram_size; virt_high_pcie_memmap.base = ROUND_UP(virt_high_pcie_memmap.base, virt_high_pcie_memmap.size); } - s->memmap = virt_memmap; - /* register system main memory (actual RAM) */ - memory_region_add_subregion(system_memory, memmap[VIRT_DRAM].base, - machine->ram); + memory_region_add_subregion(system_memory, s->memmap[VIRT_DRAM].base, + machine->ram); /* boot rom */ memory_region_init_rom(mask_rom, NULL, "riscv_virt_board.mrom", - memmap[VIRT_MROM].size, &error_fatal); - memory_region_add_subregion(system_memory, memmap[VIRT_MROM].base, + s->memmap[VIRT_MROM].size, &error_fatal); + memory_region_add_subregion(system_memory, s->memmap[VIRT_MROM].base, mask_rom); /* * Init fw_cfg. Must be done before riscv_load_fdt, otherwise the * device tree cannot be altered and we get FDT_ERR_NOSPACE. */ - s->fw_cfg = create_fw_cfg(machine); + s->fw_cfg = create_fw_cfg(machine, s->memmap[VIRT_FW_CFG].base); rom_set_fw(s->fw_cfg); /* SiFive Test MMIO device */ - sifive_test_create(memmap[VIRT_TEST].base); + sifive_test_create(s->memmap[VIRT_TEST].base); /* VirtIO MMIO devices */ for (i = 0; i < VIRTIO_COUNT; i++) { sysbus_create_simple("virtio-mmio", - memmap[VIRT_VIRTIO].base + i * memmap[VIRT_VIRTIO].size, + s->memmap[VIRT_VIRTIO].base + i * s->memmap[VIRT_VIRTIO].size, qdev_get_gpio_in(virtio_irqchip, VIRTIO_IRQ + i)); } @@ -1695,11 +1701,11 @@ static void virt_machine_init(MachineState *machine) create_platform_bus(s, mmio_irqchip); - serial_mm_init(system_memory, memmap[VIRT_UART0].base, + serial_mm_init(system_memory, s->memmap[VIRT_UART0].base, 0, qdev_get_gpio_in(mmio_irqchip, UART0_IRQ), 399193, serial_hd(0), DEVICE_LITTLE_ENDIAN); - sysbus_create_simple("goldfish_rtc", memmap[VIRT_RTC].base, + sysbus_create_simple("goldfish_rtc", s->memmap[VIRT_RTC].base, qdev_get_gpio_in(mmio_irqchip, RTC_IRQ)); for (i = 0; i < ARRAY_SIZE(s->flash); i++) { @@ -1717,7 +1723,7 @@ static void virt_machine_init(MachineState *machine) exit(1); } } else { - create_fdt(s, memmap); + create_fdt(s); } if (virt_is_iommu_sys_enabled(s)) { diff --git a/hw/riscv/xiangshan_kmh.c b/hw/riscv/xiangshan_kmh.c new file mode 100644 index 0000000..a95fd61 --- /dev/null +++ b/hw/riscv/xiangshan_kmh.c @@ -0,0 +1,220 @@ +/* + * QEMU RISC-V Board Compatible with the Xiangshan Kunminghu + * FPGA prototype platform + * + * Copyright (c) 2025 Beijing Institute of Open Source Chip (BOSC) + * SPDX-License-Identifier: GPL-2.0-or-later + * + * Provides a board compatible with the Xiangshan Kunminghu + * FPGA prototype platform: + * + * 0) UART (16550A) + * 1) CLINT (Core-Local Interruptor) + * 2) IMSIC (Incoming MSI Controller) + * 3) APLIC (Advanced Platform-Level Interrupt Controller) + * + * More information can be found in our Github repository: + * https://github.com/OpenXiangShan/XiangShan + * + * This program is free software; you can redistribute it and/or modify it + * under the terms and conditions of the GNU General Public License, + * version 2 or later, as published by the Free Software Foundation. + * + * This program is distributed in the hope it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for + * more details. + * + * You should have received a copy of the GNU General Public License along with + * this program. If not, see <http://www.gnu.org/licenses/>. + */ + +#include "qemu/osdep.h" +#include "qapi/error.h" +#include "system/address-spaces.h" +#include "hw/boards.h" +#include "hw/char/serial-mm.h" +#include "hw/intc/riscv_aclint.h" +#include "hw/intc/riscv_aplic.h" +#include "hw/intc/riscv_imsic.h" +#include "hw/qdev-properties.h" +#include "hw/riscv/boot.h" +#include "hw/riscv/xiangshan_kmh.h" +#include "hw/riscv/riscv_hart.h" +#include "system/system.h" + +static const MemMapEntry xiangshan_kmh_memmap[] = { + [XIANGSHAN_KMH_ROM] = { 0x1000, 0xF000 }, + [XIANGSHAN_KMH_UART0] = { 0x310B0000, 0x10000 }, + [XIANGSHAN_KMH_CLINT] = { 0x38000000, 0x10000 }, + [XIANGSHAN_KMH_APLIC_M] = { 0x31100000, 0x4000 }, + [XIANGSHAN_KMH_APLIC_S] = { 0x31120000, 0x4000 }, + [XIANGSHAN_KMH_IMSIC_M] = { 0x3A800000, 0x10000 }, + [XIANGSHAN_KMH_IMSIC_S] = { 0x3B000000, 0x80000 }, + [XIANGSHAN_KMH_DRAM] = { 0x80000000, 0x0 }, +}; + +static DeviceState *xiangshan_kmh_create_aia(uint32_t num_harts) +{ + int i; + const MemMapEntry *memmap = xiangshan_kmh_memmap; + hwaddr addr = 0; + DeviceState *aplic_m = NULL; + + /* M-level IMSICs */ + addr = memmap[XIANGSHAN_KMH_IMSIC_M].base; + for (i = 0; i < num_harts; i++) { + riscv_imsic_create(addr + i * IMSIC_HART_SIZE(0), i, true, + 1, XIANGSHAN_KMH_IMSIC_NUM_IDS); + } + + /* S-level IMSICs */ + addr = memmap[XIANGSHAN_KMH_IMSIC_S].base; + for (i = 0; i < num_harts; i++) { + riscv_imsic_create(addr + + i * IMSIC_HART_SIZE(XIANGSHAN_KMH_IMSIC_GUEST_BITS), + i, false, 1 + XIANGSHAN_KMH_IMSIC_GUEST_BITS, + XIANGSHAN_KMH_IMSIC_NUM_IDS); + } + + /* M-level APLIC */ + aplic_m = riscv_aplic_create(memmap[XIANGSHAN_KMH_APLIC_M].base, + memmap[XIANGSHAN_KMH_APLIC_M].size, + 0, 0, XIANGSHAN_KMH_APLIC_NUM_SOURCES, + 1, true, true, NULL); + + /* S-level APLIC */ + riscv_aplic_create(memmap[XIANGSHAN_KMH_APLIC_S].base, + memmap[XIANGSHAN_KMH_APLIC_S].size, + 0, 0, XIANGSHAN_KMH_APLIC_NUM_SOURCES, + 1, true, false, aplic_m); + + return aplic_m; +} + +static void xiangshan_kmh_soc_realize(DeviceState *dev, Error **errp) +{ + MachineState *ms = MACHINE(qdev_get_machine()); + XiangshanKmhSoCState *s = XIANGSHAN_KMH_SOC(dev); + const MemMapEntry *memmap = xiangshan_kmh_memmap; + MemoryRegion *system_memory = get_system_memory(); + uint32_t num_harts = ms->smp.cpus; + + qdev_prop_set_uint32(DEVICE(&s->cpus), "num-harts", num_harts); + qdev_prop_set_uint32(DEVICE(&s->cpus), "hartid-base", 0); + qdev_prop_set_string(DEVICE(&s->cpus), "cpu-type", + TYPE_RISCV_CPU_XIANGSHAN_KMH); + sysbus_realize(SYS_BUS_DEVICE(&s->cpus), &error_fatal); + + /* AIA */ + s->irqchip = xiangshan_kmh_create_aia(num_harts); + + /* UART */ + serial_mm_init(system_memory, memmap[XIANGSHAN_KMH_UART0].base, 2, + qdev_get_gpio_in(s->irqchip, XIANGSHAN_KMH_UART0_IRQ), + 115200, serial_hd(0), DEVICE_LITTLE_ENDIAN); + + /* CLINT */ + riscv_aclint_swi_create(memmap[XIANGSHAN_KMH_CLINT].base, + 0, num_harts, false); + riscv_aclint_mtimer_create(memmap[XIANGSHAN_KMH_CLINT].base + + RISCV_ACLINT_SWI_SIZE, + RISCV_ACLINT_DEFAULT_MTIMER_SIZE, + 0, num_harts, RISCV_ACLINT_DEFAULT_MTIMECMP, + RISCV_ACLINT_DEFAULT_MTIME, + XIANGSHAN_KMH_CLINT_TIMEBASE_FREQ, true); + + /* ROM */ + memory_region_init_rom(&s->rom, OBJECT(dev), "xiangshan.kunminghu.rom", + memmap[XIANGSHAN_KMH_ROM].size, &error_fatal); + memory_region_add_subregion(system_memory, + memmap[XIANGSHAN_KMH_ROM].base, &s->rom); +} + +static void xiangshan_kmh_soc_class_init(ObjectClass *klass, const void *data) +{ + DeviceClass *dc = DEVICE_CLASS(klass); + + dc->realize = xiangshan_kmh_soc_realize; + dc->user_creatable = false; +} + +static void xiangshan_kmh_soc_instance_init(Object *obj) +{ + XiangshanKmhSoCState *s = XIANGSHAN_KMH_SOC(obj); + + object_initialize_child(obj, "cpus", &s->cpus, TYPE_RISCV_HART_ARRAY); +} + +static const TypeInfo xiangshan_kmh_soc_info = { + .name = TYPE_XIANGSHAN_KMH_SOC, + .parent = TYPE_DEVICE, + .instance_size = sizeof(XiangshanKmhSoCState), + .instance_init = xiangshan_kmh_soc_instance_init, + .class_init = xiangshan_kmh_soc_class_init, +}; + +static void xiangshan_kmh_soc_register_types(void) +{ + type_register_static(&xiangshan_kmh_soc_info); +} +type_init(xiangshan_kmh_soc_register_types) + +static void xiangshan_kmh_machine_init(MachineState *machine) +{ + XiangshanKmhState *s = XIANGSHAN_KMH_MACHINE(machine); + const MemMapEntry *memmap = xiangshan_kmh_memmap; + MemoryRegion *system_memory = get_system_memory(); + hwaddr start_addr = memmap[XIANGSHAN_KMH_DRAM].base; + + /* Initialize SoC */ + object_initialize_child(OBJECT(machine), "soc", &s->soc, + TYPE_XIANGSHAN_KMH_SOC); + qdev_realize(DEVICE(&s->soc), NULL, &error_fatal); + + /* Register RAM */ + memory_region_add_subregion(system_memory, + memmap[XIANGSHAN_KMH_DRAM].base, + machine->ram); + + /* ROM reset vector */ + riscv_setup_rom_reset_vec(machine, &s->soc.cpus, + start_addr, + memmap[XIANGSHAN_KMH_ROM].base, + memmap[XIANGSHAN_KMH_ROM].size, 0, 0); + if (machine->firmware) { + riscv_load_firmware(machine->firmware, &start_addr, NULL); + } + + /* Note: dtb has been integrated into firmware(OpenSBI) when compiling */ +} + +static void xiangshan_kmh_machine_class_init(ObjectClass *klass, const void *data) +{ + MachineClass *mc = MACHINE_CLASS(klass); + static const char *const valid_cpu_types[] = { + TYPE_RISCV_CPU_XIANGSHAN_KMH, + NULL + }; + + mc->desc = "RISC-V Board compatible with the Xiangshan " \ + "Kunminghu FPGA prototype platform"; + mc->init = xiangshan_kmh_machine_init; + mc->max_cpus = XIANGSHAN_KMH_MAX_CPUS; + mc->default_cpu_type = TYPE_RISCV_CPU_XIANGSHAN_KMH; + mc->valid_cpu_types = valid_cpu_types; + mc->default_ram_id = "xiangshan.kunminghu.ram"; +} + +static const TypeInfo xiangshan_kmh_machine_info = { + .name = TYPE_XIANGSHAN_KMH_MACHINE, + .parent = TYPE_MACHINE, + .instance_size = sizeof(XiangshanKmhState), + .class_init = xiangshan_kmh_machine_class_init, +}; + +static void xiangshan_kmh_machine_register_types(void) +{ + type_register_static(&xiangshan_kmh_machine_info); +} +type_init(xiangshan_kmh_machine_register_types) diff --git a/hw/s390x/ap-stub.c b/hw/s390x/ap-stub.c new file mode 100644 index 0000000..001fe5f --- /dev/null +++ b/hw/s390x/ap-stub.c @@ -0,0 +1,21 @@ +/* + * VFIO based AP matrix device assignment + * + * Copyright 2025 IBM Corp. + * Author(s): Rorie Reyes <rreyes@linux.ibm.com> + * + * SPDX-License-Identifier: GPL-2.0-or-later + */ + +#include "qemu/osdep.h" +#include "hw/s390x/ap-bridge.h" + +int ap_chsc_sei_nt0_get_event(void *res) +{ + return EVENT_INFORMATION_NOT_STORED; +} + +bool ap_chsc_sei_nt0_have_event(void) +{ + return false; +} diff --git a/hw/s390x/ccw-device.c b/hw/s390x/ccw-device.c index 19c2238..8be1813 100644 --- a/hw/s390x/ccw-device.c +++ b/hw/s390x/ccw-device.c @@ -57,7 +57,7 @@ static void ccw_device_set_loadparm(Object *obj, Visitor *v, Error **errp) { CcwDevice *dev = CCW_DEVICE(obj); - char *val; + g_autofree char *val = NULL; int index; index = object_property_get_int(obj, "bootindex", NULL); diff --git a/hw/s390x/cpu-topology.c b/hw/s390x/cpu-topology.c index 7d4e1f5..b513f89 100644 --- a/hw/s390x/cpu-topology.c +++ b/hw/s390x/cpu-topology.c @@ -23,8 +23,8 @@ #include "target/s390x/cpu.h" #include "hw/s390x/s390-virtio-ccw.h" #include "hw/s390x/cpu-topology.h" -#include "qapi/qapi-commands-machine-target.h" -#include "qapi/qapi-events-machine-target.h" +#include "qapi/qapi-commands-machine-s390x.h" +#include "qapi/qapi-events-machine-s390x.h" /* * s390_topology is used to keep the topology information. diff --git a/hw/s390x/event-facility.c b/hw/s390x/event-facility.c index 1afe364..fee286e 100644 --- a/hw/s390x/event-facility.c +++ b/hw/s390x/event-facility.c @@ -4,6 +4,7 @@ * handles SCLP event types * - Signal Quiesce - system power down * - ASCII Console Data - VT220 read and write + * - Control-Program Identification - Send OS data from guest to host * * Copyright IBM, Corp. 2012 * @@ -40,18 +41,12 @@ struct SCLPEventFacility { SysBusDevice parent_obj; SCLPEventsBus sbus; SCLPEvent quiesce, cpu_hotplug; + SCLPEventCPI cpi; /* guest's receive mask */ union { uint32_t receive_mask_pieces[2]; sccb_mask_t receive_mask; }; - /* - * when false, we keep the same broken, backwards compatible behaviour as - * before, allowing only masks of size exactly 4; when true, we implement - * the architecture correctly, allowing all valid mask sizes. Needed for - * migration toward older versions. - */ - bool allow_all_mask_sizes; /* length of the receive mask */ uint16_t mask_length; }; @@ -294,8 +289,7 @@ static void write_event_mask(SCLPEventFacility *ef, SCCB *sccb) uint16_t mask_length = be16_to_cpu(we_mask->mask_length); sccb_mask_t tmp_mask; - if (!mask_length || (mask_length > SCLP_EVENT_MASK_LEN_MAX) || - ((mask_length != 4) && !ef->allow_all_mask_sizes)) { + if (!mask_length || mask_length > SCLP_EVENT_MASK_LEN_MAX) { sccb->h.response_code = cpu_to_be16(SCLP_RC_INVALID_MASK_LENGTH); return; } @@ -355,13 +349,6 @@ static bool vmstate_event_facility_mask64_needed(void *opaque) return (ef->receive_mask & 0xFFFFFFFF) != 0; } -static bool vmstate_event_facility_mask_length_needed(void *opaque) -{ - SCLPEventFacility *ef = opaque; - - return ef->allow_all_mask_sizes; -} - static const VMStateDescription vmstate_event_facility_mask64 = { .name = "vmstate-event-facility/mask64", .version_id = 0, @@ -377,7 +364,6 @@ static const VMStateDescription vmstate_event_facility_mask_length = { .name = "vmstate-event-facility/mask_length", .version_id = 0, .minimum_version_id = 0, - .needed = vmstate_event_facility_mask_length_needed, .fields = (const VMStateField[]) { VMSTATE_UINT16(mask_length, SCLPEventFacility), VMSTATE_END_OF_LIST() @@ -399,31 +385,12 @@ static const VMStateDescription vmstate_event_facility = { } }; -static void sclp_event_set_allow_all_mask_sizes(Object *obj, bool value, - Error **errp) -{ - SCLPEventFacility *ef = (SCLPEventFacility *)obj; - - ef->allow_all_mask_sizes = value; -} - -static bool sclp_event_get_allow_all_mask_sizes(Object *obj, Error **errp) -{ - SCLPEventFacility *ef = (SCLPEventFacility *)obj; - - return ef->allow_all_mask_sizes; -} - static void init_event_facility(Object *obj) { SCLPEventFacility *event_facility = EVENT_FACILITY(obj); DeviceState *sdev = DEVICE(obj); event_facility->mask_length = 4; - event_facility->allow_all_mask_sizes = true; - object_property_add_bool(obj, "allow_all_mask_sizes", - sclp_event_get_allow_all_mask_sizes, - sclp_event_set_allow_all_mask_sizes); /* Spawn a new bus for SCLP events */ qbus_init(&event_facility->sbus, sizeof(event_facility->sbus), diff --git a/hw/s390x/meson.build b/hw/s390x/meson.build index 3bbebfd..1bc8583 100644 --- a/hw/s390x/meson.build +++ b/hw/s390x/meson.build @@ -13,6 +13,7 @@ s390x_ss.add(files( 's390-skeys.c', 's390-stattrib.c', 'sclp.c', + 'sclpcpi.c', 'sclpcpu.c', 'sclpquiesce.c', 'tod.c', @@ -33,6 +34,7 @@ s390x_ss.add(when: 'CONFIG_S390_CCW_VIRTIO', if_true: files( )) s390x_ss.add(when: 'CONFIG_TERMINAL3270', if_true: files('3270-ccw.c')) s390x_ss.add(when: 'CONFIG_VFIO', if_true: files('s390-pci-vfio.c')) +s390x_ss.add(when: 'CONFIG_VFIO_AP', if_false: files('ap-stub.c')) virtio_ss = ss.source_set() virtio_ss.add(files('virtio-ccw.c')) diff --git a/hw/s390x/s390-pci-bus.c b/hw/s390x/s390-pci-bus.c index e6aa445..f87d274 100644 --- a/hw/s390x/s390-pci-bus.c +++ b/hw/s390x/s390-pci-bus.c @@ -384,9 +384,9 @@ static uint64_t get_table_index(uint64_t iova, int8_t ett) return calc_sx(iova); case ZPCI_ETT_RT: return calc_rtx(iova); + default: + g_assert_not_reached(); } - - return -1; } static bool entry_isvalid(uint64_t entry, int8_t ett) @@ -397,22 +397,24 @@ static bool entry_isvalid(uint64_t entry, int8_t ett) case ZPCI_ETT_ST: case ZPCI_ETT_RT: return rt_entry_isvalid(entry); + default: + g_assert_not_reached(); } - - return false; } /* Return true if address translation is done */ static bool translate_iscomplete(uint64_t entry, int8_t ett) { switch (ett) { - case 0: + case ZPCI_ETT_ST: return (entry & ZPCI_TABLE_FC) ? true : false; - case 1: + case ZPCI_ETT_RT: return false; + case ZPCI_ETT_PT: + return true; + default: + g_assert_not_reached(); } - - return true; } static uint64_t get_frame_size(int8_t ett) @@ -424,9 +426,9 @@ static uint64_t get_frame_size(int8_t ett) return 1ULL << 20; case ZPCI_ETT_RT: return 1ULL << 31; + default: + g_assert_not_reached(); } - - return 0; } static uint64_t get_next_table_origin(uint64_t entry, int8_t ett) @@ -438,9 +440,9 @@ static uint64_t get_next_table_origin(uint64_t entry, int8_t ett) return get_st_pto(entry); case ZPCI_ETT_RT: return get_rt_sto(entry); + default: + g_assert_not_reached(); } - - return 0; } /** diff --git a/hw/s390x/s390-pci-inst.c b/hw/s390x/s390-pci-inst.c index b5dddb2..a3bb5aa 100644 --- a/hw/s390x/s390-pci-inst.c +++ b/hw/s390x/s390-pci-inst.c @@ -16,6 +16,7 @@ #include "exec/target_page.h" #include "system/memory.h" #include "qemu/error-report.h" +#include "qemu/bswap.h" #include "system/hw_accel.h" #include "hw/boards.h" #include "hw/pci/pci_device.h" diff --git a/hw/s390x/s390-skeys.c b/hw/s390x/s390-skeys.c index aedb62b..8eeecfd 100644 --- a/hw/s390x/s390-skeys.c +++ b/hw/s390x/s390-skeys.c @@ -17,7 +17,6 @@ #include "hw/s390x/storage-keys.h" #include "qapi/error.h" #include "qapi/qapi-commands-machine.h" -#include "qapi/qapi-commands-misc-target.h" #include "qobject/qdict.h" #include "qemu/error-report.h" #include "system/memory_mapping.h" diff --git a/hw/s390x/s390-stattrib.c b/hw/s390x/s390-stattrib.c index f74cf32..13a678a 100644 --- a/hw/s390x/s390-stattrib.c +++ b/hw/s390x/s390-stattrib.c @@ -338,7 +338,7 @@ static const TypeInfo qemu_s390_stattrib_info = { static SaveVMHandlers savevm_s390_stattrib_handlers = { .save_setup = cmma_save_setup, .save_live_iterate = cmma_save_iterate, - .save_live_complete_precopy = cmma_save_complete, + .save_complete = cmma_save_complete, .state_pending_exact = cmma_state_pending, .state_pending_estimate = cmma_state_pending, .save_cleanup = cmma_save_cleanup, diff --git a/hw/s390x/s390-virtio-ccw.c b/hw/s390x/s390-virtio-ccw.c index d5658af..a79bd13 100644 --- a/hw/s390x/s390-virtio-ccw.c +++ b/hw/s390x/s390-virtio-ccw.c @@ -260,9 +260,21 @@ static void s390_create_sclpconsole(SCLPDevice *sclp, qdev_realize_and_unref(dev, ev_fac_bus, &error_fatal); } +static void s390_create_sclpcpi(SCLPDevice *sclp) +{ + SCLPEventFacility *ef = sclp->event_facility; + BusState *ev_fac_bus = sclp_get_event_facility_bus(ef); + DeviceState *dev; + + dev = qdev_new(TYPE_SCLP_EVENT_CPI); + object_property_add_child(OBJECT(ef), "sclpcpi", OBJECT(dev)); + qdev_realize_and_unref(dev, ev_fac_bus, &error_fatal); +} + static void ccw_init(MachineState *machine) { MachineClass *mc = MACHINE_GET_CLASS(machine); + S390CcwMachineClass *s390mc = S390_CCW_MACHINE_CLASS(mc); S390CcwMachineState *ms = S390_CCW_MACHINE(machine); int ret; VirtualCssBus *css_bus; @@ -323,6 +335,12 @@ static void ccw_init(MachineState *machine) /* init the TOD clock */ s390_init_tod(); + + /* init SCLP event Control-Program Identification */ + if (s390mc->use_cpi) { + s390_create_sclpcpi(ms->sclp); + } + } static void s390_cpu_plug(HotplugHandler *hotplug_dev, @@ -748,39 +766,6 @@ static inline void machine_set_dea_key_wrap(Object *obj, bool value, ms->dea_key_wrap = value; } -static S390CcwMachineClass *current_mc; - -/* - * Get the class of the s390-ccw-virtio machine that is currently in use. - * Note: libvirt is using the "none" machine to probe for the features of the - * host CPU, so in case this is called with the "none" machine, the function - * returns the TYPE_S390_CCW_MACHINE base class. In this base class, all the - * various "*_allowed" variables are enabled, so that the *_allowed() wrappers - * below return the correct default value for the "none" machine. - * - * Attention! Do *not* add additional new wrappers for CPU features via this - * mechanism anymore. CPU features should be handled via the CPU models, - * i.e. checking with s390_has_feat() should be sufficient. - */ -static S390CcwMachineClass *get_machine_class(void) -{ - if (unlikely(!current_mc)) { - /* - * No s390 ccw machine was instantiated, we are likely to - * be called for the 'none' machine. The properties will - * have their after-initialization values. - */ - current_mc = S390_CCW_MACHINE_CLASS( - object_class_by_name(TYPE_S390_CCW_MACHINE)); - } - return current_mc; -} - -bool hpage_1m_allowed(void) -{ - return get_machine_class()->hpage_1m_allowed; -} - static void machine_get_loadparm(Object *obj, Visitor *v, const char *name, void *opaque, Error **errp) @@ -804,6 +789,7 @@ static void machine_set_loadparm(Object *obj, Visitor *v, } s390_ipl_fmt_loadparm(ms->loadparm, val, errp); + g_free(val); } static void ccw_machine_class_init(ObjectClass *oc, const void *data) @@ -814,8 +800,8 @@ static void ccw_machine_class_init(ObjectClass *oc, const void *data) S390CcwMachineClass *s390mc = S390_CCW_MACHINE_CLASS(mc); DumpSKeysInterface *dsi = DUMP_SKEYS_INTERFACE_CLASS(oc); - s390mc->hpage_1m_allowed = true; s390mc->max_threads = 1; + s390mc->use_cpi = true; mc->reset = s390_machine_reset; mc->block_default_type = IF_VIRTIO; mc->no_cdrom = 1; @@ -888,7 +874,6 @@ static const TypeInfo ccw_machine_info = { #define DEFINE_CCW_MACHINE_IMPL(latest, ...) \ static void MACHINE_VER_SYM(mach_init, ccw, __VA_ARGS__)(MachineState *mach) \ { \ - current_mc = S390_CCW_MACHINE_CLASS(MACHINE_GET_CLASS(mach)); \ MACHINE_VER_SYM(instance_options, ccw, __VA_ARGS__)(mach); \ ccw_init(mach); \ } \ @@ -942,6 +927,9 @@ static void ccw_machine_10_0_instance_options(MachineState *machine) static void ccw_machine_10_0_class_options(MachineClass *mc) { + S390CcwMachineClass *s390mc = S390_CCW_MACHINE_CLASS(mc); + s390mc->use_cpi = false; + ccw_machine_10_1_class_options(mc); compat_props_add(mc->compat_props, hw_compat_10_0, hw_compat_10_0_len); } @@ -1179,116 +1167,6 @@ static void ccw_machine_4_2_class_options(MachineClass *mc) } DEFINE_CCW_MACHINE(4, 2); -static void ccw_machine_4_1_instance_options(MachineState *machine) -{ - static const S390FeatInit qemu_cpu_feat = { S390_FEAT_LIST_QEMU_V4_1 }; - ccw_machine_4_2_instance_options(machine); - s390_set_qemu_cpu_model(0x2964, 13, 2, qemu_cpu_feat); -} - -static void ccw_machine_4_1_class_options(MachineClass *mc) -{ - ccw_machine_4_2_class_options(mc); - compat_props_add(mc->compat_props, hw_compat_4_1, hw_compat_4_1_len); -} -DEFINE_CCW_MACHINE(4, 1); - -static void ccw_machine_4_0_instance_options(MachineState *machine) -{ - static const S390FeatInit qemu_cpu_feat = { S390_FEAT_LIST_QEMU_V4_0 }; - ccw_machine_4_1_instance_options(machine); - s390_set_qemu_cpu_model(0x2827, 12, 2, qemu_cpu_feat); -} - -static void ccw_machine_4_0_class_options(MachineClass *mc) -{ - ccw_machine_4_1_class_options(mc); - compat_props_add(mc->compat_props, hw_compat_4_0, hw_compat_4_0_len); -} -DEFINE_CCW_MACHINE(4, 0); - -static void ccw_machine_3_1_instance_options(MachineState *machine) -{ - static const S390FeatInit qemu_cpu_feat = { S390_FEAT_LIST_QEMU_V3_1 }; - ccw_machine_4_0_instance_options(machine); - s390_cpudef_featoff_greater(14, 1, S390_FEAT_MULTIPLE_EPOCH); - s390_cpudef_group_featoff_greater(14, 1, S390_FEAT_GROUP_MULTIPLE_EPOCH_PTFF); - s390_set_qemu_cpu_model(0x2827, 12, 2, qemu_cpu_feat); -} - -static void ccw_machine_3_1_class_options(MachineClass *mc) -{ - ccw_machine_4_0_class_options(mc); - compat_props_add(mc->compat_props, hw_compat_3_1, hw_compat_3_1_len); -} -DEFINE_CCW_MACHINE(3, 1); - -static void ccw_machine_3_0_instance_options(MachineState *machine) -{ - ccw_machine_3_1_instance_options(machine); -} - -static void ccw_machine_3_0_class_options(MachineClass *mc) -{ - S390CcwMachineClass *s390mc = S390_CCW_MACHINE_CLASS(mc); - - s390mc->hpage_1m_allowed = false; - ccw_machine_3_1_class_options(mc); - compat_props_add(mc->compat_props, hw_compat_3_0, hw_compat_3_0_len); -} -DEFINE_CCW_MACHINE(3, 0); - -static void ccw_machine_2_12_instance_options(MachineState *machine) -{ - ccw_machine_3_0_instance_options(machine); - s390_cpudef_featoff_greater(11, 1, S390_FEAT_PPA15); - s390_cpudef_featoff_greater(11, 1, S390_FEAT_BPB); -} - -static void ccw_machine_2_12_class_options(MachineClass *mc) -{ - ccw_machine_3_0_class_options(mc); - compat_props_add(mc->compat_props, hw_compat_2_12, hw_compat_2_12_len); -} -DEFINE_CCW_MACHINE(2, 12); - -#ifdef CONFIG_S390X_LEGACY_CPUS - -static void ccw_machine_2_11_instance_options(MachineState *machine) -{ - static const S390FeatInit qemu_cpu_feat = { S390_FEAT_LIST_QEMU_V2_11 }; - ccw_machine_2_12_instance_options(machine); - - /* before 2.12 we emulated the very first z900 */ - s390_set_qemu_cpu_model(0x2064, 7, 1, qemu_cpu_feat); -} - -static void ccw_machine_2_11_class_options(MachineClass *mc) -{ - static GlobalProperty compat[] = { - { TYPE_SCLP_EVENT_FACILITY, "allow_all_mask_sizes", "off", }, - }; - - ccw_machine_2_12_class_options(mc); - compat_props_add(mc->compat_props, hw_compat_2_11, hw_compat_2_11_len); - compat_props_add(mc->compat_props, compat, G_N_ELEMENTS(compat)); -} -DEFINE_CCW_MACHINE(2, 11); - -static void ccw_machine_2_10_instance_options(MachineState *machine) -{ - ccw_machine_2_11_instance_options(machine); -} - -static void ccw_machine_2_10_class_options(MachineClass *mc) -{ - ccw_machine_2_11_class_options(mc); - compat_props_add(mc->compat_props, hw_compat_2_10, hw_compat_2_10_len); -} -DEFINE_CCW_MACHINE(2, 10); - -#endif - static void ccw_machine_register_types(void) { type_register_static(&ccw_machine_info); diff --git a/hw/s390x/sclpcpi.c b/hw/s390x/sclpcpi.c new file mode 100644 index 0000000..7aa039d --- /dev/null +++ b/hw/s390x/sclpcpi.c @@ -0,0 +1,212 @@ + /* + * SPDX-License-Identifier: GPL-2.0-or-later + * + * SCLP event type 11 - Control-Program Identification (CPI): + * CPI is used to send program identifiers from the guest to the + * Service-Call Logical Processor (SCLP). It is not sent by the SCLP. + * + * Control-program identifiers provide data about the guest operating + * system. The control-program identifiers are: system type, system name, + * system level and sysplex name. + * + * In Linux, all the control-program identifiers are user configurable. The + * system type, system name, and sysplex name use EBCDIC characters from + * this set: capital A-Z, 0-9, $, @, #, and blank. In Linux, the system + * type, system name and sysplex name are arbitrary free-form texts. + * + * In Linux, the 8-byte hexadecimal system-level has the format + * 0x<a><b><cc><dd><eeee><ff><gg><hh>, where: + * <a>: is a 4-bit digit, its most significant bit indicates hypervisor use + * <b>: is one digit that represents Linux distributions as follows + * 0: generic Linux + * 1: Red Hat Enterprise Linux + * 2: SUSE Linux Enterprise Server + * 3: Canonical Ubuntu + * 4: Fedora + * 5: openSUSE Leap + * 6: Debian GNU/Linux + * 7: Red Hat Enterprise Linux CoreOS + * <cc>: are two digits for a distribution-specific encoding of the major + * version of the distribution + * <dd>: are two digits for a distribution-specific encoding of the minor + * version of the distribution + * <eeee>: are four digits for the patch level of the distribution + * <ff>: are two digits for the major version of the kernel + * <gg>: are two digits for the minor version of the kernel + * <hh>: are two digits for the stable version of the kernel + * (e.g. 74872343805430528, when converted to hex is 0x010a000000060b00). On + * machines prior to z16, some of the values are not available to display. + * + * Sysplex refers to a cluster of logical partitions that communicates and + * co-operates with each other. + * + * The CPI feature is supported since 10.1. + * + * Copyright IBM, Corp. 2024 + * + * Authors: + * Shalini Chellathurai Saroja <shalini@linux.ibm.com> + * + */ + +#include "qemu/osdep.h" +#include "qemu/timer.h" +#include "hw/s390x/event-facility.h" +#include "hw/s390x/ebcdic.h" +#include "qapi/qapi-visit-machine.h" +#include "migration/vmstate.h" + +typedef struct Data { + uint8_t id_format; + uint8_t reserved0; + uint8_t system_type[8]; + uint64_t reserved1; + uint8_t system_name[8]; + uint64_t reserved2; + uint64_t system_level; + uint64_t reserved3; + uint8_t sysplex_name[8]; + uint8_t reserved4[16]; +} QEMU_PACKED Data; + +typedef struct ControlProgramIdMsg { + EventBufferHeader ebh; + Data data; +} QEMU_PACKED ControlProgramIdMsg; + +static bool can_handle_event(uint8_t type) +{ + return type == SCLP_EVENT_CTRL_PGM_ID; +} + +static sccb_mask_t send_mask(void) +{ + return 0; +} + +/* Enable SCLP to accept buffers of event type CPI from the control-program. */ +static sccb_mask_t receive_mask(void) +{ + return SCLP_EVENT_MASK_CTRL_PGM_ID; +} + +static int write_event_data(SCLPEvent *event, EventBufferHeader *evt_buf_hdr) +{ + ControlProgramIdMsg *cpim = container_of(evt_buf_hdr, ControlProgramIdMsg, + ebh); + SCLPEventCPI *e = SCLP_EVENT_CPI(event); + + ascii_put(e->system_type, (char *)cpim->data.system_type, + sizeof(cpim->data.system_type)); + ascii_put(e->system_name, (char *)cpim->data.system_name, + sizeof(cpim->data.system_name)); + ascii_put(e->sysplex_name, (char *)cpim->data.sysplex_name, + sizeof(cpim->data.sysplex_name)); + e->system_level = ldq_be_p(&cpim->data.system_level); + e->timestamp = qemu_clock_get_ns(QEMU_CLOCK_HOST); + + cpim->ebh.flags = SCLP_EVENT_BUFFER_ACCEPTED; + return SCLP_RC_NORMAL_COMPLETION; +} + +static char *get_system_type(Object *obj, Error **errp) +{ + SCLPEventCPI *e = SCLP_EVENT_CPI(obj); + + return g_strndup((char *) e->system_type, sizeof(e->system_type)); +} + +static char *get_system_name(Object *obj, Error **errp) +{ + SCLPEventCPI *e = SCLP_EVENT_CPI(obj); + + return g_strndup((char *) e->system_name, sizeof(e->system_name)); +} + +static char *get_sysplex_name(Object *obj, Error **errp) +{ + SCLPEventCPI *e = SCLP_EVENT_CPI(obj); + + return g_strndup((char *) e->sysplex_name, sizeof(e->sysplex_name)); +} + +static void get_system_level(Object *obj, Visitor *v, const char *name, + void *opaque, Error **errp) +{ + SCLPEventCPI *e = SCLP_EVENT_CPI(obj); + + visit_type_uint64(v, name, &e->system_level, errp); +} + +static void get_timestamp(Object *obj, Visitor *v, const char *name, + void *opaque, Error **errp) +{ + SCLPEventCPI *e = SCLP_EVENT_CPI(obj); + + visit_type_uint64(v, name, &e->timestamp, errp); +} + +static const VMStateDescription vmstate_sclpcpi = { + .name = "s390_control_program_id", + .version_id = 0, + .fields = (const VMStateField[]) { + VMSTATE_UINT8_ARRAY(system_type, SCLPEventCPI, 8), + VMSTATE_UINT8_ARRAY(system_name, SCLPEventCPI, 8), + VMSTATE_UINT64(system_level, SCLPEventCPI), + VMSTATE_UINT8_ARRAY(sysplex_name, SCLPEventCPI, 8), + VMSTATE_UINT64(timestamp, SCLPEventCPI), + VMSTATE_END_OF_LIST() + } +}; + +static void cpi_class_init(ObjectClass *klass, const void *data) +{ + DeviceClass *dc = DEVICE_CLASS(klass); + SCLPEventClass *k = SCLP_EVENT_CLASS(klass); + + dc->user_creatable = false; + dc->vmsd = &vmstate_sclpcpi; + + k->can_handle_event = can_handle_event; + k->get_send_mask = send_mask; + k->get_receive_mask = receive_mask; + k->write_event_data = write_event_data; + + object_class_property_add_str(klass, "system_type", get_system_type, NULL); + object_class_property_set_description(klass, "system_type", + "operating system e.g. \"LINUX \""); + + object_class_property_add_str(klass, "system_name", get_system_name, NULL); + object_class_property_set_description(klass, "system_name", + "user configurable name of the VM e.g. \"TESTVM \""); + + object_class_property_add_str(klass, "sysplex_name", get_sysplex_name, + NULL); + object_class_property_set_description(klass, "sysplex_name", + "name of the cluster which the VM belongs to, if any" + " e.g. \"PLEX \""); + + object_class_property_add(klass, "system_level", "uint64", get_system_level, + NULL, NULL, NULL); + object_class_property_set_description(klass, "system_level", + "distribution and kernel version in Linux e.g. 74872343805430528"); + + object_class_property_add(klass, "timestamp", "uint64", get_timestamp, + NULL, NULL, NULL); + object_class_property_set_description(klass, "timestamp", + "latest update of CPI data in nanoseconds since the UNIX EPOCH"); +} + +static const TypeInfo sclp_cpi_info = { + .name = TYPE_SCLP_EVENT_CPI, + .parent = TYPE_SCLP_EVENT, + .instance_size = sizeof(SCLPEventCPI), + .class_init = cpi_class_init, +}; + +static void sclp_cpi_register_types(void) +{ + type_register_static(&sclp_cpi_info); +} + +type_init(sclp_cpi_register_types) diff --git a/hw/scsi/esp.c b/hw/scsi/esp.c index f24991f..1d264c4 100644 --- a/hw/scsi/esp.c +++ b/hw/scsi/esp.c @@ -275,6 +275,7 @@ static int esp_select(ESPState *s) if (!s->current_dev) { /* No such drive */ s->rregs[ESP_RSTAT] = 0; + s->asc_mode = ESP_ASC_MODE_DIS; s->rregs[ESP_RINTR] = INTR_DC; esp_raise_irq(s); return -1; @@ -284,6 +285,7 @@ static int esp_select(ESPState *s) * Note that we deliberately don't raise the IRQ here: this will be done * either in esp_transfer_data() or esp_command_complete() */ + s->asc_mode = ESP_ASC_MODE_INI; return 0; } @@ -308,6 +310,7 @@ static void do_command_phase(ESPState *s) if (!current_lun) { /* No such drive */ s->rregs[ESP_RSTAT] = 0; + s->asc_mode = ESP_ASC_MODE_DIS; s->rregs[ESP_RINTR] = INTR_DC; s->rregs[ESP_RSEQ] = SEQ_0; esp_raise_irq(s); @@ -487,8 +490,10 @@ static void esp_do_dma(ESPState *s) case STAT_MO: if (s->dma_memory_read) { len = MIN(len, fifo8_num_free(&s->cmdfifo)); - s->dma_memory_read(s->dma_opaque, buf, len); - esp_set_tc(s, esp_get_tc(s) - len); + if (len) { + s->dma_memory_read(s->dma_opaque, buf, len); + esp_set_tc(s, esp_get_tc(s) - len); + } } else { len = esp_fifo_pop_buf(s, buf, fifo8_num_used(&s->fifo)); len = MIN(fifo8_num_free(&s->cmdfifo), len); @@ -541,9 +546,11 @@ static void esp_do_dma(ESPState *s) trace_esp_do_dma(cmdlen, len); if (s->dma_memory_read) { len = MIN(len, fifo8_num_free(&s->cmdfifo)); - s->dma_memory_read(s->dma_opaque, buf, len); - fifo8_push_all(&s->cmdfifo, buf, len); - esp_set_tc(s, esp_get_tc(s) - len); + if (len) { + s->dma_memory_read(s->dma_opaque, buf, len); + fifo8_push_all(&s->cmdfifo, buf, len); + esp_set_tc(s, esp_get_tc(s) - len); + } } else { len = esp_fifo_pop_buf(s, buf, fifo8_num_used(&s->fifo)); len = MIN(fifo8_num_free(&s->cmdfifo), len); @@ -572,8 +579,10 @@ static void esp_do_dma(ESPState *s) switch (s->rregs[ESP_CMD]) { case CMD_TI | CMD_DMA: if (s->dma_memory_read) { - s->dma_memory_read(s->dma_opaque, s->async_buf, len); - esp_set_tc(s, esp_get_tc(s) - len); + if (len) { + s->dma_memory_read(s->dma_opaque, s->async_buf, len); + esp_set_tc(s, esp_get_tc(s) - len); + } } else { /* Copy FIFO data to device */ len = MIN(s->async_len, ESP_FIFO_SZ); @@ -625,7 +634,9 @@ static void esp_do_dma(ESPState *s) switch (s->rregs[ESP_CMD]) { case CMD_TI | CMD_DMA: if (s->dma_memory_write) { - s->dma_memory_write(s->dma_opaque, s->async_buf, len); + if (len) { + s->dma_memory_write(s->dma_opaque, s->async_buf, len); + } } else { /* Copy device data to FIFO */ len = MIN(len, fifo8_num_free(&s->fifo)); @@ -675,6 +686,7 @@ static void esp_do_dma(ESPState *s) buf[0] = s->status; if (s->dma_memory_write) { + /* Length already non-zero */ s->dma_memory_write(s->dma_opaque, buf, len); } else { esp_fifo_push_buf(s, buf, len); @@ -709,6 +721,7 @@ static void esp_do_dma(ESPState *s) buf[0] = 0; if (s->dma_memory_write) { + /* Length already non-zero */ s->dma_memory_write(s->dma_opaque, buf, len); } else { esp_fifo_push_buf(s, buf, len); @@ -1012,6 +1025,7 @@ void esp_transfer_data(SCSIRequest *req, uint32_t len) */ s->rregs[ESP_RINTR] |= INTR_BS | INTR_FC; s->rregs[ESP_RSEQ] = SEQ_CD; + esp_raise_irq(s); break; case CMD_SELATNS | CMD_DMA: @@ -1022,20 +1036,21 @@ void esp_transfer_data(SCSIRequest *req, uint32_t len) */ s->rregs[ESP_RINTR] |= INTR_BS; s->rregs[ESP_RSEQ] = SEQ_MO; + esp_raise_irq(s); break; case CMD_TI | CMD_DMA: case CMD_TI: /* - * Bus service interrupt raised because of initial change to - * DATA phase + * If the final COMMAND phase data was transferred using a TI + * command, clear ESP_CMD to terminate the TI command and raise + * the completion interrupt */ s->rregs[ESP_CMD] = 0; s->rregs[ESP_RINTR] |= INTR_BS; + esp_raise_irq(s); break; } - - esp_raise_irq(s); } /* @@ -1090,6 +1105,7 @@ void esp_hard_reset(ESPState *s) fifo8_reset(&s->cmdfifo); s->dma = 0; s->dma_cb = NULL; + s->asc_mode = ESP_ASC_MODE_DIS; s->rregs[ESP_CFG1] = 7; } @@ -1113,6 +1129,38 @@ static void parent_esp_reset(ESPState *s, int irq, int level) } } +static bool esp_cmd_is_valid(ESPState *s, uint8_t cmd) +{ + uint8_t cmd_group = (cmd & CMD_GRP_MASK) >> 4; + + /* Always allow misc commands */ + if (cmd_group == CMD_GRP_MISC) { + return true; + } + + switch (s->asc_mode) { + case ESP_ASC_MODE_DIS: + /* Disconnected mode: only allow disconnected commands */ + if (cmd_group == CMD_GRP_DISC) { + return true; + } + break; + + case ESP_ASC_MODE_INI: + /* Initiator mode: allow initiator commands */ + if (cmd_group == CMD_GRP_INIT) { + return true; + } + break; + + default: + g_assert_not_reached(); + } + + trace_esp_invalid_cmd(cmd, s->asc_mode); + return false; +} + static void esp_run_cmd(ESPState *s) { uint8_t cmd = s->rregs[ESP_CMD]; @@ -1158,6 +1206,7 @@ static void esp_run_cmd(ESPState *s) break; case CMD_MSGACC: trace_esp_mem_writeb_cmd_msgacc(cmd); + s->asc_mode = ESP_ASC_MODE_DIS; s->rregs[ESP_RINTR] |= INTR_DC; s->rregs[ESP_RSEQ] = 0; s->rregs[ESP_RFLAGS] = 0; @@ -1268,6 +1317,11 @@ void esp_reg_write(ESPState *s, uint32_t saddr, uint64_t val) break; case ESP_CMD: s->rregs[saddr] = val; + if (!esp_cmd_is_valid(s, s->rregs[saddr])) { + s->rregs[ESP_RSTAT] |= INTR_IL; + esp_raise_irq(s); + break; + } esp_run_cmd(s); break; case ESP_WBUSID ... ESP_WSYNO: @@ -1325,6 +1379,14 @@ static bool esp_is_between_version_5_and_6(void *opaque, int version_id) return version_id >= 5 && version_id <= 6; } +static bool esp_is_version_8(void *opaque, int version_id) +{ + ESPState *s = ESP(opaque); + + version_id = MIN(version_id, s->mig_version_id); + return version_id >= 8; +} + int esp_pre_save(void *opaque) { ESPState *s = ESP(object_resolve_path_component( @@ -1356,13 +1418,18 @@ static int esp_post_load(void *opaque, int version_id) } } + if (version_id < 8) { + /* Assume initiator mode to allow all commands to continue */ + s->asc_mode = ESP_ASC_MODE_INI; + } + s->mig_version_id = vmstate_esp.version_id; return 0; } const VMStateDescription vmstate_esp = { .name = "esp", - .version_id = 7, + .version_id = 8, .minimum_version_id = 3, .post_load = esp_post_load, .fields = (const VMStateField[]) { @@ -1394,6 +1461,7 @@ const VMStateDescription vmstate_esp = { esp_is_between_version_5_and_6), VMSTATE_UINT8_TEST(lun, ESPState, esp_is_version_6), VMSTATE_BOOL(drq_state, ESPState), + VMSTATE_UINT8_TEST(asc_mode, ESPState, esp_is_version_8), VMSTATE_END_OF_LIST() }, }; diff --git a/hw/scsi/lsi53c895a.c b/hw/scsi/lsi53c895a.c index f4f2ef3..9ea4aa0 100644 --- a/hw/scsi/lsi53c895a.c +++ b/hw/scsi/lsi53c895a.c @@ -1112,7 +1112,7 @@ bad: static void lsi_memcpy(LSIState *s, uint32_t dest, uint32_t src, int count) { int n; - uint8_t buf[LSI_BUF_SIZE]; + QEMU_UNINITIALIZED uint8_t buf[LSI_BUF_SIZE]; trace_lsi_memcpy(dest, src, count); while (count) { diff --git a/hw/scsi/megasas.c b/hw/scsi/megasas.c index 55cd188..844643d 100644 --- a/hw/scsi/megasas.c +++ b/hw/scsi/megasas.c @@ -981,13 +981,11 @@ static int megasas_event_wait(MegasasState *s, MegasasCmd *cmd) static int megasas_dcmd_pd_get_list(MegasasState *s, MegasasCmd *cmd) { - struct mfi_pd_list info; - size_t dcmd_size = sizeof(info); + struct mfi_pd_list info = {}; BusChild *kid; uint32_t offset, dcmd_limit, num_pd_disks = 0, max_pd_disks; dma_addr_t residual; - memset(&info, 0, dcmd_size); offset = 8; dcmd_limit = offset + sizeof(struct mfi_pd_address); if (cmd->iov_size < dcmd_limit) { @@ -1429,11 +1427,10 @@ static int megasas_dcmd_cfg_read(MegasasState *s, MegasasCmd *cmd) static int megasas_dcmd_get_properties(MegasasState *s, MegasasCmd *cmd) { - struct mfi_ctrl_props info; + struct mfi_ctrl_props info = {}; size_t dcmd_size = sizeof(info); dma_addr_t residual; - memset(&info, 0x0, dcmd_size); if (cmd->iov_size < dcmd_size) { trace_megasas_dcmd_invalid_xfer_len(cmd->index, cmd->iov_size, dcmd_size); diff --git a/hw/scsi/scsi-bus.c b/hw/scsi/scsi-bus.c index 70be4a7..9b12ee7 100644 --- a/hw/scsi/scsi-bus.c +++ b/hw/scsi/scsi-bus.c @@ -400,7 +400,7 @@ static void scsi_qdev_realize(DeviceState *qdev, Error **errp) return; } dev->vmsentry = qdev_add_vm_change_state_handler(DEVICE(dev), - scsi_dma_restart_cb, dev); + scsi_dma_restart_cb, NULL, dev); } static void scsi_qdev_unrealize(DeviceState *qdev) diff --git a/hw/scsi/scsi-disk.c b/hw/scsi/scsi-disk.c index cb4af1b..b4782c6 100644 --- a/hw/scsi/scsi-disk.c +++ b/hw/scsi/scsi-disk.c @@ -74,7 +74,7 @@ struct SCSIDiskClass { */ DMAIOFunc *dma_readv; DMAIOFunc *dma_writev; - bool (*need_fua_emulation)(SCSICommand *cmd); + bool (*need_fua)(SCSICommand *cmd); void (*update_sense)(SCSIRequest *r); }; @@ -85,7 +85,7 @@ typedef struct SCSIDiskReq { uint32_t sector_count; uint32_t buflen; bool started; - bool need_fua_emulation; + bool need_fua; struct iovec iov; QEMUIOVector qiov; BlockAcctCookie acct; @@ -389,24 +389,6 @@ static bool scsi_is_cmd_fua(SCSICommand *cmd) } } -static void scsi_write_do_fua(SCSIDiskReq *r) -{ - SCSIDiskState *s = DO_UPCAST(SCSIDiskState, qdev, r->req.dev); - - assert(r->req.aiocb == NULL); - assert(!r->req.io_canceled); - - if (r->need_fua_emulation) { - block_acct_start(blk_get_stats(s->qdev.conf.blk), &r->acct, 0, - BLOCK_ACCT_FLUSH); - r->req.aiocb = blk_aio_flush(s->qdev.conf.blk, scsi_aio_complete, r); - return; - } - - scsi_req_complete(&r->req, GOOD); - scsi_req_unref(&r->req); -} - static void scsi_dma_complete_noio(SCSIDiskReq *r, int ret) { assert(r->req.aiocb == NULL); @@ -416,12 +398,7 @@ static void scsi_dma_complete_noio(SCSIDiskReq *r, int ret) r->sector += r->sector_count; r->sector_count = 0; - if (r->req.cmd.mode == SCSI_XFER_TO_DEV) { - scsi_write_do_fua(r); - return; - } else { - scsi_req_complete(&r->req, GOOD); - } + scsi_req_complete(&r->req, GOOD); done: scsi_req_unref(&r->req); @@ -564,7 +541,7 @@ static void scsi_read_data(SCSIRequest *req) first = !r->started; r->started = true; - if (first && r->need_fua_emulation) { + if (first && r->need_fua) { block_acct_start(blk_get_stats(s->qdev.conf.blk), &r->acct, 0, BLOCK_ACCT_FLUSH); r->req.aiocb = blk_aio_flush(s->qdev.conf.blk, scsi_do_read_cb, r); @@ -589,8 +566,7 @@ static void scsi_write_complete_noio(SCSIDiskReq *r, int ret) r->sector += n; r->sector_count -= n; if (r->sector_count == 0) { - scsi_write_do_fua(r); - return; + scsi_req_complete(&r->req, GOOD); } else { scsi_init_iovec(r, SCSI_DMA_BUF_SIZE); trace_scsi_disk_write_complete_noio(r->req.tag, r->qiov.size); @@ -623,6 +599,7 @@ static void scsi_write_data(SCSIRequest *req) SCSIDiskReq *r = DO_UPCAST(SCSIDiskReq, req, req); SCSIDiskState *s = DO_UPCAST(SCSIDiskState, qdev, r->req.dev); SCSIDiskClass *sdc = (SCSIDiskClass *) object_get_class(OBJECT(s)); + BlockCompletionFunc *cb; /* No data transfer may already be in progress */ assert(r->req.aiocb == NULL); @@ -648,11 +625,10 @@ static void scsi_write_data(SCSIRequest *req) if (r->req.cmd.buf[0] == VERIFY_10 || r->req.cmd.buf[0] == VERIFY_12 || r->req.cmd.buf[0] == VERIFY_16) { - if (r->req.sg) { - scsi_dma_complete_noio(r, 0); - } else { - scsi_write_complete_noio(r, 0); - } + block_acct_start(blk_get_stats(s->qdev.conf.blk), &r->acct, 0, + BLOCK_ACCT_FLUSH); + cb = r->req.sg ? scsi_dma_complete : scsi_write_complete; + r->req.aiocb = blk_aio_flush(s->qdev.conf.blk, cb, r); return; } @@ -2391,7 +2367,7 @@ static int32_t scsi_disk_dma_command(SCSIRequest *req, uint8_t *buf) scsi_check_condition(r, SENSE_CODE(LBA_OUT_OF_RANGE)); return 0; } - r->need_fua_emulation = sdc->need_fua_emulation(&r->req.cmd); + r->need_fua = sdc->need_fua(&r->req.cmd); if (r->sector_count == 0) { scsi_req_complete(&r->req, GOOD); } @@ -3137,7 +3113,8 @@ BlockAIOCB *scsi_dma_writev(int64_t offset, QEMUIOVector *iov, { SCSIDiskReq *r = opaque; SCSIDiskState *s = DO_UPCAST(SCSIDiskState, qdev, r->req.dev); - return blk_aio_pwritev(s->qdev.conf.blk, offset, iov, 0, cb, cb_opaque); + int flags = r->need_fua ? BDRV_REQ_FUA : 0; + return blk_aio_pwritev(s->qdev.conf.blk, offset, iov, flags, cb, cb_opaque); } static char *scsi_property_get_loadparm(Object *obj, Error **errp) @@ -3186,7 +3163,7 @@ static void scsi_disk_base_class_initfn(ObjectClass *klass, const void *data) device_class_set_legacy_reset(dc, scsi_disk_reset); sdc->dma_readv = scsi_dma_readv; sdc->dma_writev = scsi_dma_writev; - sdc->need_fua_emulation = scsi_is_cmd_fua; + sdc->need_fua = scsi_is_cmd_fua; } static const TypeInfo scsi_disk_base_info = { @@ -3215,7 +3192,7 @@ static const Property scsi_hd_properties[] = { DEFINE_PROP_BIT("removable", SCSIDiskState, features, SCSI_DISK_F_REMOVABLE, false), DEFINE_PROP_BIT("dpofua", SCSIDiskState, features, - SCSI_DISK_F_DPOFUA, false), + SCSI_DISK_F_DPOFUA, true), DEFINE_PROP_UINT64("wwn", SCSIDiskState, qdev.wwn, 0), DEFINE_PROP_UINT64("port_wwn", SCSIDiskState, qdev.port_wwn, 0), DEFINE_PROP_UINT16("port_index", SCSIDiskState, port_index, 0), @@ -3338,7 +3315,7 @@ static void scsi_block_class_initfn(ObjectClass *klass, const void *data) sdc->dma_readv = scsi_block_dma_readv; sdc->dma_writev = scsi_block_dma_writev; sdc->update_sense = scsi_block_update_sense; - sdc->need_fua_emulation = scsi_block_no_fua; + sdc->need_fua = scsi_block_no_fua; dc->desc = "SCSI block device passthrough"; device_class_set_props(dc, scsi_block_properties); dc->vmsd = &vmstate_scsi_disk_state; diff --git a/hw/scsi/trace-events b/hw/scsi/trace-events index f0f2a98..6c2788e 100644 --- a/hw/scsi/trace-events +++ b/hw/scsi/trace-events @@ -198,6 +198,7 @@ esp_mem_writeb_cmd_ensel(uint32_t val) "Enable selection (0x%2.2x)" esp_mem_writeb_cmd_dissel(uint32_t val) "Disable selection (0x%2.2x)" esp_mem_writeb_cmd_ti(uint32_t val) "Transfer Information (0x%2.2x)" esp_set_phase(const char *phase) "setting bus phase to %s" +esp_invalid_cmd(uint8_t cmd, uint8_t asc_mode) "command 0x%x asc_mode 0x%x" # esp-pci.c esp_pci_error_invalid_dma_direction(void) "invalid DMA transfer direction" diff --git a/hw/scsi/vhost-scsi-common.c b/hw/scsi/vhost-scsi-common.c index 4c86370..43525ba 100644 --- a/hw/scsi/vhost-scsi-common.c +++ b/hw/scsi/vhost-scsi-common.c @@ -101,24 +101,25 @@ err_host_notifiers: return ret; } -void vhost_scsi_common_stop(VHostSCSICommon *vsc) +int vhost_scsi_common_stop(VHostSCSICommon *vsc) { VirtIODevice *vdev = VIRTIO_DEVICE(vsc); BusState *qbus = BUS(qdev_get_parent_bus(DEVICE(vdev))); VirtioBusClass *k = VIRTIO_BUS_GET_CLASS(qbus); int ret = 0; - vhost_dev_stop(&vsc->dev, vdev, true); + ret = vhost_dev_stop(&vsc->dev, vdev, true); if (k->set_guest_notifiers) { - ret = k->set_guest_notifiers(qbus->parent, vsc->dev.nvqs, false); - if (ret < 0) { - error_report("vhost guest notifier cleanup failed: %d", ret); + int r = k->set_guest_notifiers(qbus->parent, vsc->dev.nvqs, false); + if (r < 0) { + error_report("vhost guest notifier cleanup failed: %d", ret); + return r; } } - assert(ret >= 0); vhost_dev_disable_notifiers(&vsc->dev, vdev); + return ret; } uint64_t vhost_scsi_common_get_features(VirtIODevice *vdev, uint64_t features, diff --git a/hw/scsi/vhost-scsi.c b/hw/scsi/vhost-scsi.c index 10fde8e..cdf405b 100644 --- a/hw/scsi/vhost-scsi.c +++ b/hw/scsi/vhost-scsi.c @@ -114,7 +114,7 @@ static void vhost_scsi_stop(VHostSCSI *s) vhost_scsi_common_stop(vsc); } -static void vhost_scsi_set_status(VirtIODevice *vdev, uint8_t val) +static int vhost_scsi_set_status(VirtIODevice *vdev, uint8_t val) { VHostSCSI *s = VHOST_SCSI(vdev); VHostSCSICommon *vsc = VHOST_SCSI_COMMON(s); @@ -125,7 +125,7 @@ static void vhost_scsi_set_status(VirtIODevice *vdev, uint8_t val) } if (vhost_dev_is_started(&vsc->dev) == start) { - return; + return 0; } if (start) { @@ -139,6 +139,7 @@ static void vhost_scsi_set_status(VirtIODevice *vdev, uint8_t val) } else { vhost_scsi_stop(s); } + return 0; } static void vhost_dummy_handle_output(VirtIODevice *vdev, VirtQueue *vq) @@ -358,6 +359,9 @@ static const Property vhost_scsi_properties[] = { DEFINE_PROP_BIT64("t10_pi", VHostSCSICommon, host_features, VIRTIO_SCSI_F_T10_PI, false), + DEFINE_PROP_BIT64("hotplug", VHostSCSICommon, host_features, + VIRTIO_SCSI_F_HOTPLUG, + false), DEFINE_PROP_BOOL("migratable", VHostSCSICommon, migratable, false), DEFINE_PROP_BOOL("worker_per_virtqueue", VirtIOSCSICommon, conf.worker_per_virtqueue, false), diff --git a/hw/scsi/vhost-user-scsi.c b/hw/scsi/vhost-user-scsi.c index 8298e8c..25f2d89 100644 --- a/hw/scsi/vhost-user-scsi.c +++ b/hw/scsi/vhost-user-scsi.c @@ -52,19 +52,19 @@ static int vhost_user_scsi_start(VHostUserSCSI *s, Error **errp) return ret; } -static void vhost_user_scsi_stop(VHostUserSCSI *s) +static int vhost_user_scsi_stop(VHostUserSCSI *s) { VHostSCSICommon *vsc = VHOST_SCSI_COMMON(s); if (!s->started_vu) { - return; + return 0; } s->started_vu = false; - vhost_scsi_common_stop(vsc); + return vhost_scsi_common_stop(vsc); } -static void vhost_user_scsi_set_status(VirtIODevice *vdev, uint8_t status) +static int vhost_user_scsi_set_status(VirtIODevice *vdev, uint8_t status) { VHostUserSCSI *s = (VHostUserSCSI *)vdev; DeviceState *dev = DEVICE(vdev); @@ -75,11 +75,11 @@ static void vhost_user_scsi_set_status(VirtIODevice *vdev, uint8_t status) int ret; if (!s->connected) { - return; + return -1; } if (vhost_dev_is_started(&vsc->dev) == should_start) { - return; + return 0; } if (should_start) { @@ -91,8 +91,12 @@ static void vhost_user_scsi_set_status(VirtIODevice *vdev, uint8_t status) qemu_chr_fe_disconnect(&vs->conf.chardev); } } else { - vhost_user_scsi_stop(s); + ret = vhost_user_scsi_stop(s); + if (ret) { + return ret; + } } + return 0; } static void vhost_user_scsi_handle_output(VirtIODevice *vdev, VirtQueue *vq) diff --git a/hw/scsi/vmw_pvscsi.c b/hw/scsi/vmw_pvscsi.c index d5825b6..7c98b1b 100644 --- a/hw/scsi/vmw_pvscsi.c +++ b/hw/scsi/vmw_pvscsi.c @@ -68,18 +68,7 @@ struct PVSCSIClass { OBJECT_DECLARE_TYPE(PVSCSIState, PVSCSIClass, PVSCSI) -/* Compatibility flags for migration */ -#define PVSCSI_COMPAT_OLD_PCI_CONFIGURATION_BIT 0 -#define PVSCSI_COMPAT_OLD_PCI_CONFIGURATION \ - (1 << PVSCSI_COMPAT_OLD_PCI_CONFIGURATION_BIT) -#define PVSCSI_COMPAT_DISABLE_PCIE_BIT 1 -#define PVSCSI_COMPAT_DISABLE_PCIE \ - (1 << PVSCSI_COMPAT_DISABLE_PCIE_BIT) - -#define PVSCSI_USE_OLD_PCI_CONFIGURATION(s) \ - ((s)->compat_flags & PVSCSI_COMPAT_OLD_PCI_CONFIGURATION) -#define PVSCSI_MSI_OFFSET(s) \ - (PVSCSI_USE_OLD_PCI_CONFIGURATION(s) ? 0x50 : 0x7c) +#define PVSCSI_MSI_OFFSET (0x7c) #define PVSCSI_EXP_EP_OFFSET (0x40) typedef struct PVSCSIRingInfo { @@ -129,8 +118,6 @@ struct PVSCSIState { uint8_t msi_used; /* For migration compatibility */ PVSCSIRingInfo rings; /* Data transfer rings manager */ uint32_t resetting; /* Reset in progress */ - - uint32_t compat_flags; }; typedef struct PVSCSIRequest { @@ -1110,7 +1097,7 @@ pvscsi_init_msi(PVSCSIState *s) int res; PCIDevice *d = PCI_DEVICE(s); - res = msi_init(d, PVSCSI_MSI_OFFSET(s), PVSCSI_MSIX_NUM_VECTORS, + res = msi_init(d, PVSCSI_MSI_OFFSET, PVSCSI_MSIX_NUM_VECTORS, PVSCSI_USE_64BIT, PVSCSI_PER_VECTOR_MASK, NULL); if (res < 0) { trace_pvscsi_init_msi_fail(res); @@ -1158,15 +1145,11 @@ pvscsi_realizefn(PCIDevice *pci_dev, Error **errp) trace_pvscsi_state("init"); /* PCI subsystem ID, subsystem vendor ID, revision */ - if (PVSCSI_USE_OLD_PCI_CONFIGURATION(s)) { - pci_set_word(pci_dev->config + PCI_SUBSYSTEM_ID, 0x1000); - } else { - pci_set_word(pci_dev->config + PCI_SUBSYSTEM_VENDOR_ID, - PCI_VENDOR_ID_VMWARE); - pci_set_word(pci_dev->config + PCI_SUBSYSTEM_ID, - PCI_DEVICE_ID_VMWARE_PVSCSI); - pci_config_set_revision(pci_dev->config, 0x2); - } + pci_set_word(pci_dev->config + PCI_SUBSYSTEM_VENDOR_ID, + PCI_VENDOR_ID_VMWARE); + pci_set_word(pci_dev->config + PCI_SUBSYSTEM_ID, + PCI_DEVICE_ID_VMWARE_PVSCSI); + pci_config_set_revision(pci_dev->config, 0x2); /* PCI latency timer = 255 */ pci_dev->config[PCI_LATENCY_TIMER] = 0xff; @@ -1234,21 +1217,8 @@ pvscsi_post_load(void *opaque, int version_id) return 0; } -static bool pvscsi_vmstate_need_pcie_device(void *opaque) -{ - PVSCSIState *s = PVSCSI(opaque); - - return !(s->compat_flags & PVSCSI_COMPAT_DISABLE_PCIE); -} - -static bool pvscsi_vmstate_test_pci_device(void *opaque, int version_id) -{ - return !pvscsi_vmstate_need_pcie_device(opaque); -} - static const VMStateDescription vmstate_pvscsi_pcie_device = { .name = "pvscsi/pcie", - .needed = pvscsi_vmstate_need_pcie_device, .fields = (const VMStateField[]) { VMSTATE_PCI_DEVICE(parent_obj, PVSCSIState), VMSTATE_END_OF_LIST() @@ -1262,9 +1232,6 @@ static const VMStateDescription vmstate_pvscsi = { .pre_save = pvscsi_pre_save, .post_load = pvscsi_post_load, .fields = (const VMStateField[]) { - VMSTATE_STRUCT_TEST(parent_obj, PVSCSIState, - pvscsi_vmstate_test_pci_device, 0, - vmstate_pci_device, PCIDevice), VMSTATE_UINT8(msi_used, PVSCSIState), VMSTATE_UINT32(resetting, PVSCSIState), VMSTATE_UINT64(reg_interrupt_status, PVSCSIState), @@ -1298,30 +1265,17 @@ static const VMStateDescription vmstate_pvscsi = { static const Property pvscsi_properties[] = { DEFINE_PROP_UINT8("use_msg", PVSCSIState, use_msg, 1), - DEFINE_PROP_BIT("x-old-pci-configuration", PVSCSIState, compat_flags, - PVSCSI_COMPAT_OLD_PCI_CONFIGURATION_BIT, false), - DEFINE_PROP_BIT("x-disable-pcie", PVSCSIState, compat_flags, - PVSCSI_COMPAT_DISABLE_PCIE_BIT, false), }; -static void pvscsi_realize(DeviceState *qdev, Error **errp) +static void pvscsi_instance_init(Object *obj) { - PVSCSIClass *pvs_c = PVSCSI_GET_CLASS(qdev); - PCIDevice *pci_dev = PCI_DEVICE(qdev); - PVSCSIState *s = PVSCSI(qdev); - - if (!(s->compat_flags & PVSCSI_COMPAT_DISABLE_PCIE)) { - pci_dev->cap_present |= QEMU_PCI_CAP_EXPRESS; - } - - pvs_c->parent_dc_realize(qdev, errp); + PCI_DEVICE(obj)->cap_present |= QEMU_PCI_CAP_EXPRESS; } static void pvscsi_class_init(ObjectClass *klass, const void *data) { DeviceClass *dc = DEVICE_CLASS(klass); PCIDeviceClass *k = PCI_DEVICE_CLASS(klass); - PVSCSIClass *pvs_k = PVSCSI_CLASS(klass); HotplugHandlerClass *hc = HOTPLUG_HANDLER_CLASS(klass); k->realize = pvscsi_realizefn; @@ -1330,8 +1284,6 @@ static void pvscsi_class_init(ObjectClass *klass, const void *data) k->device_id = PCI_DEVICE_ID_VMWARE_PVSCSI; k->class_id = PCI_CLASS_STORAGE_SCSI; k->subsystem_id = 0x1000; - device_class_set_parent_realize(dc, pvscsi_realize, - &pvs_k->parent_dc_realize); device_class_set_legacy_reset(dc, pvscsi_reset); dc->vmsd = &vmstate_pvscsi; device_class_set_props(dc, pvscsi_properties); @@ -1346,6 +1298,7 @@ static const TypeInfo pvscsi_info = { .class_size = sizeof(PVSCSIClass), .instance_size = sizeof(PVSCSIState), .class_init = pvscsi_class_init, + .instance_init = pvscsi_instance_init, .interfaces = (const InterfaceInfo[]) { { TYPE_HOTPLUG_HANDLER }, { INTERFACE_PCIE_DEVICE }, diff --git a/hw/scsi/vmw_pvscsi.h b/hw/scsi/vmw_pvscsi.h index 17fcf66..a3ae517 100644 --- a/hw/scsi/vmw_pvscsi.h +++ b/hw/scsi/vmw_pvscsi.h @@ -14,8 +14,8 @@ * details. * * You should have received a copy of the GNU General Public License - * along with this program; if not, write to the Free Software - * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. + * along with this program; if not, see + * <https://www.gnu.org/licenses/>. * * Maintained by: Arvind Kumar <arvindkumar@vmware.com> * diff --git a/hw/sensor/lsm303dlhc_mag.c b/hw/sensor/lsm303dlhc_mag.c index f9e501d..cd5773a 100644 --- a/hw/sensor/lsm303dlhc_mag.c +++ b/hw/sensor/lsm303dlhc_mag.c @@ -28,7 +28,6 @@ #include "qapi/visitor.h" #include "qemu/module.h" #include "qemu/log.h" -#include "qemu/bswap.h" enum LSM303DLHCMagReg { LSM303DLHC_MAG_REG_CRA = 0x00, diff --git a/hw/smbios/smbios.c b/hw/smbios/smbios.c index ad4cd67..1ac063c 100644 --- a/hw/smbios/smbios.c +++ b/hw/smbios/smbios.c @@ -17,6 +17,7 @@ #include "qemu/osdep.h" #include "qemu/units.h" +#include "qemu/bswap.h" #include "qapi/error.h" #include "qemu/config-file.h" #include "qemu/module.h" diff --git a/hw/timer/hpet.c b/hw/timer/hpet.c index d1b7bc5..cb48cc1 100644 --- a/hw/timer/hpet.c +++ b/hw/timer/hpet.c @@ -328,16 +328,16 @@ static const VMStateDescription vmstate_hpet_timer = { static const VMStateDescription vmstate_hpet = { .name = "hpet", .version_id = 2, - .minimum_version_id = 1, + .minimum_version_id = 2, .pre_save = hpet_pre_save, .post_load = hpet_post_load, .fields = (const VMStateField[]) { VMSTATE_UINT64(config, HPETState), VMSTATE_UINT64(isr, HPETState), VMSTATE_UINT64(hpet_counter, HPETState), - VMSTATE_UINT8_V(num_timers_save, HPETState, 2), + VMSTATE_UINT8(num_timers_save, HPETState), VMSTATE_VALIDATE("num_timers must match", hpet_validate_num_timers), - VMSTATE_STRUCT_VARRAY_UINT8(timer, HPETState, num_timers, 0, + VMSTATE_STRUCT_VARRAY_UINT8(timer, HPETState, num_timers_save, 0, vmstate_hpet_timer, HPETTimer), VMSTATE_END_OF_LIST() }, @@ -426,30 +426,11 @@ static uint64_t hpet_ram_read(void *opaque, hwaddr addr, uint64_t cur_tick; trace_hpet_ram_read(addr); + addr &= ~4; - /*address range of all TN regs*/ - if (addr >= 0x100 && addr <= 0x3ff) { - uint8_t timer_id = (addr - 0x100) / 0x20; - HPETTimer *timer = &s->timer[timer_id]; - - if (timer_id > s->num_timers) { - trace_hpet_timer_id_out_of_range(timer_id); - return 0; - } - - switch (addr & 0x18) { - case HPET_TN_CFG: // including interrupt capabilities - return timer->config >> shift; - case HPET_TN_CMP: // comparator register - return timer->cmp >> shift; - case HPET_TN_ROUTE: - return timer->fsb >> shift; - default: - trace_hpet_ram_read_invalid(); - break; - } - } else { - switch (addr & ~4) { + /*address range of all global regs*/ + if (addr <= 0xff) { + switch (addr) { case HPET_ID: // including HPET_PERIOD return s->capability >> shift; case HPET_CFG: @@ -468,6 +449,26 @@ static uint64_t hpet_ram_read(void *opaque, hwaddr addr, trace_hpet_ram_read_invalid(); break; } + } else { + uint8_t timer_id = (addr - 0x100) / 0x20; + HPETTimer *timer = &s->timer[timer_id]; + + if (timer_id > s->num_timers) { + trace_hpet_timer_id_out_of_range(timer_id); + return 0; + } + + switch (addr & 0x1f) { + case HPET_TN_CFG: // including interrupt capabilities + return timer->config >> shift; + case HPET_TN_CMP: // comparator register + return timer->cmp >> shift; + case HPET_TN_ROUTE: + return timer->fsb >> shift; + default: + trace_hpet_ram_read_invalid(); + break; + } } return 0; } @@ -482,9 +483,67 @@ static void hpet_ram_write(void *opaque, hwaddr addr, uint64_t old_val, new_val, cleared; trace_hpet_ram_write(addr, value); + addr &= ~4; - /*address range of all TN regs*/ - if (addr >= 0x100 && addr <= 0x3ff) { + /*address range of all global regs*/ + if (addr <= 0xff) { + switch (addr) { + case HPET_ID: + return; + case HPET_CFG: + old_val = s->config; + new_val = deposit64(old_val, shift, len, value); + new_val = hpet_fixup_reg(new_val, old_val, HPET_CFG_WRITE_MASK); + s->config = new_val; + if (activating_bit(old_val, new_val, HPET_CFG_ENABLE)) { + /* Enable main counter and interrupt generation. */ + s->hpet_offset = + ticks_to_ns(s->hpet_counter) - qemu_clock_get_ns(QEMU_CLOCK_VIRTUAL); + for (i = 0; i < s->num_timers; i++) { + if (timer_enabled(&s->timer[i]) && (s->isr & (1 << i))) { + update_irq(&s->timer[i], 1); + } + hpet_set_timer(&s->timer[i]); + } + } else if (deactivating_bit(old_val, new_val, HPET_CFG_ENABLE)) { + /* Halt main counter and disable interrupt generation. */ + s->hpet_counter = hpet_get_ticks(s); + for (i = 0; i < s->num_timers; i++) { + hpet_del_timer(&s->timer[i]); + } + } + /* i8254 and RTC output pins are disabled + * when HPET is in legacy mode */ + if (activating_bit(old_val, new_val, HPET_CFG_LEGACY)) { + qemu_set_irq(s->pit_enabled, 0); + qemu_irq_lower(s->irqs[0]); + qemu_irq_lower(s->irqs[RTC_ISA_IRQ]); + } else if (deactivating_bit(old_val, new_val, HPET_CFG_LEGACY)) { + qemu_irq_lower(s->irqs[0]); + qemu_set_irq(s->pit_enabled, 1); + qemu_set_irq(s->irqs[RTC_ISA_IRQ], s->rtc_irq_level); + } + break; + case HPET_STATUS: + new_val = value << shift; + cleared = new_val & s->isr; + for (i = 0; i < s->num_timers; i++) { + if (cleared & (1 << i)) { + update_irq(&s->timer[i], 0); + } + } + break; + case HPET_COUNTER: + if (hpet_enabled(s)) { + trace_hpet_ram_write_counter_write_while_enabled(); + } + s->hpet_counter = deposit64(s->hpet_counter, shift, len, value); + break; + default: + trace_hpet_ram_write_invalid(); + break; + } + } else { uint8_t timer_id = (addr - 0x100) / 0x20; HPETTimer *timer = &s->timer[timer_id]; @@ -550,63 +609,6 @@ static void hpet_ram_write(void *opaque, hwaddr addr, break; } return; - } else { - switch (addr & ~4) { - case HPET_ID: - return; - case HPET_CFG: - old_val = s->config; - new_val = deposit64(old_val, shift, len, value); - new_val = hpet_fixup_reg(new_val, old_val, HPET_CFG_WRITE_MASK); - s->config = new_val; - if (activating_bit(old_val, new_val, HPET_CFG_ENABLE)) { - /* Enable main counter and interrupt generation. */ - s->hpet_offset = - ticks_to_ns(s->hpet_counter) - qemu_clock_get_ns(QEMU_CLOCK_VIRTUAL); - for (i = 0; i < s->num_timers; i++) { - if (timer_enabled(&s->timer[i]) && (s->isr & (1 << i))) { - update_irq(&s->timer[i], 1); - } - hpet_set_timer(&s->timer[i]); - } - } else if (deactivating_bit(old_val, new_val, HPET_CFG_ENABLE)) { - /* Halt main counter and disable interrupt generation. */ - s->hpet_counter = hpet_get_ticks(s); - for (i = 0; i < s->num_timers; i++) { - hpet_del_timer(&s->timer[i]); - } - } - /* i8254 and RTC output pins are disabled - * when HPET is in legacy mode */ - if (activating_bit(old_val, new_val, HPET_CFG_LEGACY)) { - qemu_set_irq(s->pit_enabled, 0); - qemu_irq_lower(s->irqs[0]); - qemu_irq_lower(s->irqs[RTC_ISA_IRQ]); - } else if (deactivating_bit(old_val, new_val, HPET_CFG_LEGACY)) { - qemu_irq_lower(s->irqs[0]); - qemu_set_irq(s->pit_enabled, 1); - qemu_set_irq(s->irqs[RTC_ISA_IRQ], s->rtc_irq_level); - } - break; - case HPET_STATUS: - new_val = value << shift; - cleared = new_val & s->isr; - for (i = 0; i < s->num_timers; i++) { - if (cleared & (1 << i)) { - update_irq(&s->timer[i], 0); - } - } - break; - case HPET_COUNTER: - if (hpet_enabled(s)) { - trace_hpet_ram_write_counter_write_while_enabled(); - } - s->hpet_counter = deposit64(s->hpet_counter, shift, len, value); - break; - default: - trace_hpet_ram_write_invalid(); - break; - } } } @@ -689,8 +691,14 @@ static void hpet_realize(DeviceState *dev, Error **errp) int i; HPETTimer *timer; + if (s->num_timers < HPET_MIN_TIMERS || s->num_timers > HPET_MAX_TIMERS) { + error_setg(errp, "hpet.num_timers must be between %d and %d", + HPET_MIN_TIMERS, HPET_MAX_TIMERS); + return; + } if (!s->intcap) { - warn_report("Hpet's intcap not initialized"); + error_setg(errp, "hpet.hpet-intcap not initialized"); + return; } if (hpet_fw_cfg.count == UINT8_MAX) { /* first instance */ @@ -698,7 +706,7 @@ static void hpet_realize(DeviceState *dev, Error **errp) } if (hpet_fw_cfg.count == 8) { - error_setg(errp, "Only 8 instances of HPET is allowed"); + error_setg(errp, "Only 8 instances of HPET are allowed"); return; } @@ -708,11 +716,6 @@ static void hpet_realize(DeviceState *dev, Error **errp) sysbus_init_irq(sbd, &s->irqs[i]); } - if (s->num_timers < HPET_MIN_TIMERS) { - s->num_timers = HPET_MIN_TIMERS; - } else if (s->num_timers > HPET_MAX_TIMERS) { - s->num_timers = HPET_MAX_TIMERS; - } for (i = 0; i < HPET_MAX_TIMERS; i++) { timer = &s->timer[i]; timer->qemu_timer = timer_new_ns(QEMU_CLOCK_VIRTUAL, hpet_timer, timer); diff --git a/hw/timer/pxa2xx_timer.c b/hw/timer/pxa2xx_timer.c index 7a94366..6d4ac31 100644 --- a/hw/timer/pxa2xx_timer.c +++ b/hw/timer/pxa2xx_timer.c @@ -19,41 +19,41 @@ #include "qom/object.h" #include "system/watchdog.h" -#define OSMR0 0x00 -#define OSMR1 0x04 -#define OSMR2 0x08 -#define OSMR3 0x0c -#define OSMR4 0x80 -#define OSMR5 0x84 -#define OSMR6 0x88 -#define OSMR7 0x8c -#define OSMR8 0x90 -#define OSMR9 0x94 -#define OSMR10 0x98 -#define OSMR11 0x9c -#define OSCR 0x10 /* OS Timer Count */ -#define OSCR4 0x40 -#define OSCR5 0x44 -#define OSCR6 0x48 -#define OSCR7 0x4c -#define OSCR8 0x50 -#define OSCR9 0x54 -#define OSCR10 0x58 -#define OSCR11 0x5c -#define OSSR 0x14 /* Timer status register */ -#define OWER 0x18 -#define OIER 0x1c /* Interrupt enable register 3-0 to E3-E0 */ -#define OMCR4 0xc0 /* OS Match Control registers */ -#define OMCR5 0xc4 -#define OMCR6 0xc8 -#define OMCR7 0xcc -#define OMCR8 0xd0 -#define OMCR9 0xd4 -#define OMCR10 0xd8 -#define OMCR11 0xdc -#define OSNR 0x20 - -#define PXA25X_FREQ 3686400 /* 3.6864 MHz */ +#define OSMR0 0x00 +#define OSMR1 0x04 +#define OSMR2 0x08 +#define OSMR3 0x0c +#define OSMR4 0x80 +#define OSMR5 0x84 +#define OSMR6 0x88 +#define OSMR7 0x8c +#define OSMR8 0x90 +#define OSMR9 0x94 +#define OSMR10 0x98 +#define OSMR11 0x9c +#define OSCR 0x10 /* OS Timer Count */ +#define OSCR4 0x40 +#define OSCR5 0x44 +#define OSCR6 0x48 +#define OSCR7 0x4c +#define OSCR8 0x50 +#define OSCR9 0x54 +#define OSCR10 0x58 +#define OSCR11 0x5c +#define OSSR 0x14 /* Timer status register */ +#define OWER 0x18 +#define OIER 0x1c /* Interrupt enable register 3-0 to E3-E0 */ +#define OMCR4 0xc0 /* OS Match Control registers */ +#define OMCR5 0xc4 +#define OMCR6 0xc8 +#define OMCR7 0xcc +#define OMCR8 0xd0 +#define OMCR9 0xd4 +#define OMCR10 0xd8 +#define OMCR11 0xdc +#define OSNR 0x20 + +#define PXA25X_FREQ 3686400 /* 3.6864 MHz */ static int pxa2xx_timer4_freq[8] = { [0] = 0, @@ -106,7 +106,7 @@ struct PXA2xxTimerInfo { PXA2xxTimer4 tm4[8]; }; -#define PXA2XX_TIMER_HAVE_TM4 0 +#define PXA2XX_TIMER_HAVE_TM4 0 static inline int pxa2xx_timer_has_tm4(PXA2xxTimerInfo *s) { @@ -230,7 +230,7 @@ static uint64_t pxa2xx_timer_read(void *opaque, hwaddr offset, NANOSECONDS_PER_SECOND); case OIER: return s->irq_enabled; - case OSSR: /* Status register */ + case OSSR: /* Status register */ return s->events; case OWER: return s->reset3; @@ -336,7 +336,7 @@ static void pxa2xx_timer_write(void *opaque, hwaddr offset, case OIER: s->irq_enabled = value & 0xfff; break; - case OSSR: /* Status register */ + case OSSR: /* Status register */ value &= s->events; s->events &= ~value; for (i = 0; i < 4; i ++, value >>= 1) @@ -345,7 +345,7 @@ static void pxa2xx_timer_write(void *opaque, hwaddr offset, if (pxa2xx_timer_has_tm4(s) && !(s->events & 0xff0) && value) qemu_irq_lower(s->irq4); break; - case OWER: /* XXX: Reset on OSMR3 match? */ + case OWER: /* XXX: Reset on OSMR3 match? */ s->reset3 = value; break; case OMCR7: tm ++; diff --git a/hw/uefi/trace.h b/hw/uefi/trace.h new file mode 100644 index 0000000..6aa1c93 --- /dev/null +++ b/hw/uefi/trace.h @@ -0,0 +1,2 @@ +/* SPDX-License-Identifier: GPL-2.0-or-later */ +#include "trace/trace-hw_uefi.h" diff --git a/hw/uefi/var-service-core.c b/hw/uefi/var-service-core.c index 4836a0c..feec5a5 100644 --- a/hw/uefi/var-service-core.c +++ b/hw/uefi/var-service-core.c @@ -12,7 +12,7 @@ #include "hw/uefi/var-service-api.h" #include "hw/uefi/var-service-edk2.h" -#include "trace/trace-hw_uefi.h" +#include "trace.h" static int uefi_vars_pre_load(void *opaque) { diff --git a/hw/uefi/var-service-policy.c b/hw/uefi/var-service-policy.c index 3b1155f..58da4ad 100644 --- a/hw/uefi/var-service-policy.c +++ b/hw/uefi/var-service-policy.c @@ -14,7 +14,7 @@ #include "hw/uefi/var-service-api.h" #include "hw/uefi/var-service-edk2.h" -#include "trace/trace-hw_uefi.h" +#include "trace.h" static void calc_policy(uefi_var_policy *pol); diff --git a/hw/uefi/var-service-utils.c b/hw/uefi/var-service-utils.c index c9ef465..258013f 100644 --- a/hw/uefi/var-service-utils.c +++ b/hw/uefi/var-service-utils.c @@ -8,7 +8,7 @@ #include "hw/uefi/var-service.h" -#include "trace/trace-hw_uefi.h" +#include "trace.h" /* ------------------------------------------------------------------ */ diff --git a/hw/uefi/var-service-vars.c b/hw/uefi/var-service-vars.c index 7f98d77..37d05b7 100644 --- a/hw/uefi/var-service-vars.c +++ b/hw/uefi/var-service-vars.c @@ -12,7 +12,7 @@ #include "hw/uefi/var-service-api.h" #include "hw/uefi/var-service-edk2.h" -#include "trace/trace-hw_uefi.h" +#include "trace.h" #define EFI_VARIABLE_ATTRIBUTE_SUPPORTED \ (EFI_VARIABLE_NON_VOLATILE | \ diff --git a/hw/ufs/lu.c b/hw/ufs/lu.c index 57b307e..2d8ffd7 100644 --- a/hw/ufs/lu.c +++ b/hw/ufs/lu.c @@ -194,7 +194,7 @@ static int ufs_emulate_wlun_inquiry(UfsRequest *req, uint8_t *outbuf, static UfsReqResult ufs_emulate_scsi_cmd(UfsLu *lu, UfsRequest *req) { uint8_t lun = lu->lun; - uint8_t outbuf[4096]; + QEMU_UNINITIALIZED uint8_t outbuf[4096]; uint8_t sense_buf[UFS_SENSE_SIZE]; uint8_t scsi_status; int len = 0; diff --git a/hw/usb/dev-hid.c b/hw/usb/dev-hid.c index 54d064e..96623aa 100644 --- a/hw/usb/dev-hid.c +++ b/hw/usb/dev-hid.c @@ -491,14 +491,14 @@ static const uint8_t qemu_tablet_hid_report_descriptor[] = { 0xa1, 0x00, /* Collection (Physical) */ 0x05, 0x09, /* Usage Page (Button) */ 0x19, 0x01, /* Usage Minimum (1) */ - 0x29, 0x03, /* Usage Maximum (3) */ + 0x29, 0x05, /* Usage Maximum (5) */ 0x15, 0x00, /* Logical Minimum (0) */ 0x25, 0x01, /* Logical Maximum (1) */ - 0x95, 0x03, /* Report Count (3) */ + 0x95, 0x05, /* Report Count (5) */ 0x75, 0x01, /* Report Size (1) */ 0x81, 0x02, /* Input (Data, Variable, Absolute) */ 0x95, 0x01, /* Report Count (1) */ - 0x75, 0x05, /* Report Size (5) */ + 0x75, 0x03, /* Report Size (3) */ 0x81, 0x01, /* Input (Constant) */ 0x05, 0x01, /* Usage Page (Generic Desktop) */ 0x09, 0x30, /* Usage (X) */ diff --git a/hw/usb/hcd-ohci.c b/hw/usb/hcd-ohci.c index 71b5491..72a9f9f 100644 --- a/hw/usb/hcd-ohci.c +++ b/hw/usb/hcd-ohci.c @@ -577,7 +577,7 @@ static int ohci_service_iso_td(OHCIState *ohci, struct ohci_ed *ed) USBDevice *dev; USBEndpoint *ep; USBPacket *pkt; - uint8_t buf[8192]; + QEMU_UNINITIALIZED uint8_t buf[8192]; bool int_req; struct ohci_iso_td iso_td; uint32_t addr; diff --git a/hw/vfio-user/Kconfig b/hw/vfio-user/Kconfig new file mode 100644 index 0000000..24bdf7a --- /dev/null +++ b/hw/vfio-user/Kconfig @@ -0,0 +1,7 @@ +# SPDX-License-Identifier: GPL-2.0-or-later + +config VFIO_USER + bool + default y + depends on VFIO_PCI + diff --git a/hw/vfio-user/container.c b/hw/vfio-user/container.c new file mode 100644 index 0000000..d589dd9 --- /dev/null +++ b/hw/vfio-user/container.c @@ -0,0 +1,357 @@ +/* + * Container for vfio-user IOMMU type: rather than communicating with the kernel + * vfio driver, we communicate over a socket to a server using the vfio-user + * protocol. + * + * SPDX-License-Identifier: GPL-2.0-or-later + */ + +#include <sys/ioctl.h> +#include <linux/vfio.h> +#include "qemu/osdep.h" + +#include "hw/vfio-user/container.h" +#include "hw/vfio-user/device.h" +#include "hw/vfio-user/trace.h" +#include "hw/vfio/vfio-device.h" +#include "hw/vfio/vfio-listener.h" +#include "qapi/error.h" + +/* + * When DMA space is the physical address space, the region add/del listeners + * will fire during memory update transactions. These depend on BQL being held, + * so do any resulting map/demap ops async while keeping BQL. + */ +static void vfio_user_listener_begin(VFIOContainerBase *bcontainer) +{ + VFIOUserContainer *container = container_of(bcontainer, VFIOUserContainer, + bcontainer); + + container->proxy->async_ops = true; +} + +static void vfio_user_listener_commit(VFIOContainerBase *bcontainer) +{ + VFIOUserContainer *container = container_of(bcontainer, VFIOUserContainer, + bcontainer); + + /* wait here for any async requests sent during the transaction */ + container->proxy->async_ops = false; + vfio_user_wait_reqs(container->proxy); +} + +static int vfio_user_dma_unmap(const VFIOContainerBase *bcontainer, + hwaddr iova, ram_addr_t size, + IOMMUTLBEntry *iotlb, bool unmap_all) +{ + VFIOUserContainer *container = container_of(bcontainer, VFIOUserContainer, + bcontainer); + Error *local_err = NULL; + int ret = 0; + + VFIOUserDMAUnmap *msgp = g_malloc(sizeof(*msgp)); + + vfio_user_request_msg(&msgp->hdr, VFIO_USER_DMA_UNMAP, sizeof(*msgp), 0); + msgp->argsz = sizeof(struct vfio_iommu_type1_dma_unmap); + msgp->flags = unmap_all ? VFIO_DMA_UNMAP_FLAG_ALL : 0; + msgp->iova = iova; + msgp->size = size; + trace_vfio_user_dma_unmap(msgp->iova, msgp->size, msgp->flags, + container->proxy->async_ops); + + if (container->proxy->async_ops) { + if (!vfio_user_send_nowait(container->proxy, &msgp->hdr, NULL, + 0, &local_err)) { + error_report_err(local_err); + ret = -EFAULT; + } + } else { + if (!vfio_user_send_wait(container->proxy, &msgp->hdr, NULL, + 0, &local_err)) { + error_report_err(local_err); + ret = -EFAULT; + } + + if (msgp->hdr.flags & VFIO_USER_ERROR) { + ret = -msgp->hdr.error_reply; + } + + g_free(msgp); + } + + return ret; +} + +static int vfio_user_dma_map(const VFIOContainerBase *bcontainer, hwaddr iova, + ram_addr_t size, void *vaddr, bool readonly, + MemoryRegion *mrp) +{ + VFIOUserContainer *container = container_of(bcontainer, VFIOUserContainer, + bcontainer); + int fd = memory_region_get_fd(mrp); + Error *local_err = NULL; + int ret = 0; + + VFIOUserFDs *fds = NULL; + VFIOUserDMAMap *msgp = g_malloc0(sizeof(*msgp)); + + vfio_user_request_msg(&msgp->hdr, VFIO_USER_DMA_MAP, sizeof(*msgp), 0); + msgp->argsz = sizeof(struct vfio_iommu_type1_dma_map); + msgp->flags = VFIO_DMA_MAP_FLAG_READ; + msgp->offset = 0; + msgp->iova = iova; + msgp->size = size; + + /* + * vaddr enters as a QEMU process address; make it either a file offset + * for mapped areas or leave as 0. + */ + if (fd != -1) { + msgp->offset = qemu_ram_block_host_offset(mrp->ram_block, vaddr); + } + + if (!readonly) { + msgp->flags |= VFIO_DMA_MAP_FLAG_WRITE; + } + + trace_vfio_user_dma_map(msgp->iova, msgp->size, msgp->offset, msgp->flags, + container->proxy->async_ops); + + /* + * The async_ops case sends without blocking. They're later waited for in + * vfio_send_wait_reqs. + */ + if (container->proxy->async_ops) { + /* can't use auto variable since we don't block */ + if (fd != -1) { + fds = vfio_user_getfds(1); + fds->send_fds = 1; + fds->fds[0] = fd; + } + + if (!vfio_user_send_nowait(container->proxy, &msgp->hdr, fds, + 0, &local_err)) { + error_report_err(local_err); + ret = -EFAULT; + } + } else { + VFIOUserFDs local_fds = { 1, 0, &fd }; + + fds = fd != -1 ? &local_fds : NULL; + + if (!vfio_user_send_wait(container->proxy, &msgp->hdr, fds, + 0, &local_err)) { + error_report_err(local_err); + ret = -EFAULT; + } + + if (msgp->hdr.flags & VFIO_USER_ERROR) { + ret = -msgp->hdr.error_reply; + } + + g_free(msgp); + } + + return ret; +} + +static int +vfio_user_set_dirty_page_tracking(const VFIOContainerBase *bcontainer, + bool start, Error **errp) +{ + error_setg_errno(errp, ENOTSUP, "Not supported"); + return -ENOTSUP; +} + +static int vfio_user_query_dirty_bitmap(const VFIOContainerBase *bcontainer, + VFIOBitmap *vbmap, hwaddr iova, + hwaddr size, Error **errp) +{ + error_setg_errno(errp, ENOTSUP, "Not supported"); + return -ENOTSUP; +} + +static bool vfio_user_setup(VFIOContainerBase *bcontainer, Error **errp) +{ + VFIOUserContainer *container = container_of(bcontainer, VFIOUserContainer, + bcontainer); + + assert(container->proxy->dma_pgsizes != 0); + bcontainer->pgsizes = container->proxy->dma_pgsizes; + bcontainer->dma_max_mappings = container->proxy->max_dma; + + /* No live migration support yet. */ + bcontainer->dirty_pages_supported = false; + bcontainer->max_dirty_bitmap_size = container->proxy->max_bitmap; + bcontainer->dirty_pgsizes = container->proxy->migr_pgsize; + + return true; +} + +static VFIOUserContainer *vfio_user_create_container(VFIODevice *vbasedev, + Error **errp) +{ + VFIOUserContainer *container; + + container = VFIO_IOMMU_USER(object_new(TYPE_VFIO_IOMMU_USER)); + container->proxy = vbasedev->proxy; + return container; +} + +/* + * Try to mirror vfio_container_connect() as much as possible. + */ +static VFIOUserContainer * +vfio_user_container_connect(AddressSpace *as, VFIODevice *vbasedev, + Error **errp) +{ + VFIOContainerBase *bcontainer; + VFIOUserContainer *container; + VFIOAddressSpace *space; + VFIOIOMMUClass *vioc; + int ret; + + space = vfio_address_space_get(as); + + container = vfio_user_create_container(vbasedev, errp); + if (!container) { + goto put_space_exit; + } + + bcontainer = &container->bcontainer; + + ret = ram_block_uncoordinated_discard_disable(true); + if (ret) { + error_setg_errno(errp, -ret, "Cannot set discarding of RAM broken"); + goto free_container_exit; + } + + vioc = VFIO_IOMMU_GET_CLASS(bcontainer); + assert(vioc->setup); + + if (!vioc->setup(bcontainer, errp)) { + goto enable_discards_exit; + } + + vfio_address_space_insert(space, bcontainer); + + if (!vfio_listener_register(bcontainer, errp)) { + goto listener_release_exit; + } + + bcontainer->initialized = true; + + return container; + +listener_release_exit: + vfio_listener_unregister(bcontainer); + if (vioc->release) { + vioc->release(bcontainer); + } + +enable_discards_exit: + ram_block_uncoordinated_discard_disable(false); + +free_container_exit: + object_unref(container); + +put_space_exit: + vfio_address_space_put(space); + + return NULL; +} + +static void vfio_user_container_disconnect(VFIOUserContainer *container) +{ + VFIOContainerBase *bcontainer = &container->bcontainer; + VFIOIOMMUClass *vioc = VFIO_IOMMU_GET_CLASS(bcontainer); + VFIOAddressSpace *space = bcontainer->space; + + ram_block_uncoordinated_discard_disable(false); + + vfio_listener_unregister(bcontainer); + if (vioc->release) { + vioc->release(bcontainer); + } + + object_unref(container); + + vfio_address_space_put(space); +} + +static bool vfio_user_device_get(VFIOUserContainer *container, + VFIODevice *vbasedev, Error **errp) +{ + struct vfio_device_info info = { .argsz = sizeof(info) }; + + + if (!vfio_user_get_device_info(vbasedev->proxy, &info, errp)) { + return false; + } + + vbasedev->fd = -1; + + vfio_device_prepare(vbasedev, &container->bcontainer, &info); + + return true; +} + +/* + * vfio_user_device_attach: attach a device to a new container. + */ +static bool vfio_user_device_attach(const char *name, VFIODevice *vbasedev, + AddressSpace *as, Error **errp) +{ + VFIOUserContainer *container; + + container = vfio_user_container_connect(as, vbasedev, errp); + if (container == NULL) { + error_prepend(errp, "failed to connect proxy"); + return false; + } + + return vfio_user_device_get(container, vbasedev, errp); +} + +static void vfio_user_device_detach(VFIODevice *vbasedev) +{ + VFIOUserContainer *container = container_of(vbasedev->bcontainer, + VFIOUserContainer, bcontainer); + + vfio_device_unprepare(vbasedev); + + vfio_user_container_disconnect(container); +} + +static int vfio_user_pci_hot_reset(VFIODevice *vbasedev, bool single) +{ + /* ->needs_reset is always false for vfio-user. */ + return 0; +} + +static void vfio_iommu_user_class_init(ObjectClass *klass, const void *data) +{ + VFIOIOMMUClass *vioc = VFIO_IOMMU_CLASS(klass); + + vioc->setup = vfio_user_setup; + vioc->listener_begin = vfio_user_listener_begin, + vioc->listener_commit = vfio_user_listener_commit, + vioc->dma_map = vfio_user_dma_map; + vioc->dma_unmap = vfio_user_dma_unmap; + vioc->attach_device = vfio_user_device_attach; + vioc->detach_device = vfio_user_device_detach; + vioc->set_dirty_page_tracking = vfio_user_set_dirty_page_tracking; + vioc->query_dirty_bitmap = vfio_user_query_dirty_bitmap; + vioc->pci_hot_reset = vfio_user_pci_hot_reset; +}; + +static const TypeInfo types[] = { + { + .name = TYPE_VFIO_IOMMU_USER, + .parent = TYPE_VFIO_IOMMU, + .instance_size = sizeof(VFIOUserContainer), + .class_init = vfio_iommu_user_class_init, + }, +}; + +DEFINE_TYPES(types) diff --git a/hw/vfio-user/container.h b/hw/vfio-user/container.h new file mode 100644 index 0000000..2bb1fa1 --- /dev/null +++ b/hw/vfio-user/container.h @@ -0,0 +1,23 @@ +/* + * vfio-user specific definitions. + * + * SPDX-License-Identifier: GPL-2.0-or-later + */ + +#ifndef HW_VFIO_USER_CONTAINER_H +#define HW_VFIO_USER_CONTAINER_H + +#include "qemu/osdep.h" + +#include "hw/vfio/vfio-container-base.h" +#include "hw/vfio-user/proxy.h" + +/* MMU container sub-class for vfio-user. */ +typedef struct VFIOUserContainer { + VFIOContainerBase bcontainer; + VFIOUserProxy *proxy; +} VFIOUserContainer; + +OBJECT_DECLARE_SIMPLE_TYPE(VFIOUserContainer, VFIO_IOMMU_USER); + +#endif /* HW_VFIO_USER_CONTAINER_H */ diff --git a/hw/vfio-user/device.c b/hw/vfio-user/device.c new file mode 100644 index 0000000..0609a7d --- /dev/null +++ b/hw/vfio-user/device.c @@ -0,0 +1,441 @@ +/* + * vfio protocol over a UNIX socket device handling. + * + * Copyright © 2018, 2021 Oracle and/or its affiliates. + * + * SPDX-License-Identifier: GPL-2.0-or-later + */ + +#include "qemu/osdep.h" +#include "qapi/error.h" +#include "qemu/error-report.h" +#include "qemu/lockable.h" +#include "qemu/thread.h" + +#include "hw/vfio-user/device.h" +#include "hw/vfio-user/trace.h" + +/* + * These are to defend against a malign server trying + * to force us to run out of memory. + */ +#define VFIO_USER_MAX_REGIONS 100 +#define VFIO_USER_MAX_IRQS 50 + +bool vfio_user_get_device_info(VFIOUserProxy *proxy, + struct vfio_device_info *info, Error **errp) +{ + VFIOUserDeviceInfo msg; + uint32_t argsz = sizeof(msg) - sizeof(msg.hdr); + + memset(&msg, 0, sizeof(msg)); + vfio_user_request_msg(&msg.hdr, VFIO_USER_DEVICE_GET_INFO, sizeof(msg), 0); + msg.argsz = argsz; + + if (!vfio_user_send_wait(proxy, &msg.hdr, NULL, 0, errp)) { + return false; + } + + if (msg.hdr.flags & VFIO_USER_ERROR) { + error_setg_errno(errp, -msg.hdr.error_reply, + "VFIO_USER_DEVICE_GET_INFO failed"); + return false; + } + + trace_vfio_user_get_info(msg.num_regions, msg.num_irqs); + + memcpy(info, &msg.argsz, argsz); + + /* defend against a malicious server */ + if (info->num_regions > VFIO_USER_MAX_REGIONS || + info->num_irqs > VFIO_USER_MAX_IRQS) { + error_setg_errno(errp, EINVAL, "invalid reply"); + return false; + } + + return true; +} + +void vfio_user_device_reset(VFIOUserProxy *proxy) +{ + Error *local_err = NULL; + VFIOUserHdr hdr; + + vfio_user_request_msg(&hdr, VFIO_USER_DEVICE_RESET, sizeof(hdr), 0); + + if (!vfio_user_send_wait(proxy, &hdr, NULL, 0, &local_err)) { + error_prepend(&local_err, "%s: ", __func__); + error_report_err(local_err); + return; + } + + if (hdr.flags & VFIO_USER_ERROR) { + error_printf("reset reply error %d\n", hdr.error_reply); + } +} + +static int vfio_user_get_region_info(VFIOUserProxy *proxy, + struct vfio_region_info *info, + VFIOUserFDs *fds) +{ + g_autofree VFIOUserRegionInfo *msgp = NULL; + Error *local_err = NULL; + uint32_t size; + + /* data returned can be larger than vfio_region_info */ + if (info->argsz < sizeof(*info)) { + error_printf("vfio_user_get_region_info argsz too small\n"); + return -E2BIG; + } + if (fds != NULL && fds->send_fds != 0) { + error_printf("vfio_user_get_region_info can't send FDs\n"); + return -EINVAL; + } + + size = info->argsz + sizeof(VFIOUserHdr); + msgp = g_malloc0(size); + + vfio_user_request_msg(&msgp->hdr, VFIO_USER_DEVICE_GET_REGION_INFO, + sizeof(*msgp), 0); + msgp->argsz = info->argsz; + msgp->index = info->index; + + if (!vfio_user_send_wait(proxy, &msgp->hdr, fds, size, &local_err)) { + error_prepend(&local_err, "%s: ", __func__); + error_report_err(local_err); + return -EFAULT; + } + + if (msgp->hdr.flags & VFIO_USER_ERROR) { + return -msgp->hdr.error_reply; + } + trace_vfio_user_get_region_info(msgp->index, msgp->flags, msgp->size); + + memcpy(info, &msgp->argsz, info->argsz); + + /* + * If at least one region is directly mapped into the VM, then we can no + * longer rely on the sequential nature of vfio-user request handling to + * ensure that posted writes are completed before a subsequent read. In this + * case, disable posted write support. This is a per-device property, not + * per-region. + */ + if (info->flags & VFIO_REGION_INFO_FLAG_MMAP) { + vfio_user_disable_posted_writes(proxy); + } + + return 0; +} + +static int vfio_user_device_io_get_region_info(VFIODevice *vbasedev, + struct vfio_region_info *info, + int *fd) +{ + VFIOUserFDs fds = { 0, 1, fd}; + int ret; + + if (info->index > vbasedev->num_regions) { + return -EINVAL; + } + + ret = vfio_user_get_region_info(vbasedev->proxy, info, &fds); + if (ret) { + return ret; + } + + /* cap_offset in valid area */ + if ((info->flags & VFIO_REGION_INFO_FLAG_CAPS) && + (info->cap_offset < sizeof(*info) || info->cap_offset > info->argsz)) { + return -EINVAL; + } + + return 0; +} + +static int vfio_user_device_io_get_irq_info(VFIODevice *vbasedev, + struct vfio_irq_info *info) +{ + VFIOUserProxy *proxy = vbasedev->proxy; + Error *local_err = NULL; + VFIOUserIRQInfo msg; + + memset(&msg, 0, sizeof(msg)); + vfio_user_request_msg(&msg.hdr, VFIO_USER_DEVICE_GET_IRQ_INFO, + sizeof(msg), 0); + msg.argsz = info->argsz; + msg.index = info->index; + + if (!vfio_user_send_wait(proxy, &msg.hdr, NULL, 0, &local_err)) { + error_prepend(&local_err, "%s: ", __func__); + error_report_err(local_err); + return -EFAULT; + } + + if (msg.hdr.flags & VFIO_USER_ERROR) { + return -msg.hdr.error_reply; + } + trace_vfio_user_get_irq_info(msg.index, msg.flags, msg.count); + + memcpy(info, &msg.argsz, sizeof(*info)); + return 0; +} + +static int irq_howmany(int *fdp, uint32_t cur, uint32_t max) +{ + int n = 0; + + if (fdp[cur] != -1) { + do { + n++; + } while (n < max && fdp[cur + n] != -1); + } else { + do { + n++; + } while (n < max && fdp[cur + n] == -1); + } + + return n; +} + +static int vfio_user_device_io_set_irqs(VFIODevice *vbasedev, + struct vfio_irq_set *irq) +{ + VFIOUserProxy *proxy = vbasedev->proxy; + g_autofree VFIOUserIRQSet *msgp = NULL; + uint32_t size, nfds, send_fds, sent_fds, max; + Error *local_err = NULL; + + if (irq->argsz < sizeof(*irq)) { + error_printf("vfio_user_set_irqs argsz too small\n"); + return -EINVAL; + } + + /* + * Handle simple case + */ + if ((irq->flags & VFIO_IRQ_SET_DATA_EVENTFD) == 0) { + size = sizeof(VFIOUserHdr) + irq->argsz; + msgp = g_malloc0(size); + + vfio_user_request_msg(&msgp->hdr, VFIO_USER_DEVICE_SET_IRQS, size, 0); + msgp->argsz = irq->argsz; + msgp->flags = irq->flags; + msgp->index = irq->index; + msgp->start = irq->start; + msgp->count = irq->count; + trace_vfio_user_set_irqs(msgp->index, msgp->start, msgp->count, + msgp->flags); + + if (!vfio_user_send_wait(proxy, &msgp->hdr, NULL, 0, &local_err)) { + error_prepend(&local_err, "%s: ", __func__); + error_report_err(local_err); + return -EFAULT; + } + + if (msgp->hdr.flags & VFIO_USER_ERROR) { + return -msgp->hdr.error_reply; + } + + return 0; + } + + /* + * Calculate the number of FDs to send + * and adjust argsz + */ + nfds = (irq->argsz - sizeof(*irq)) / sizeof(int); + irq->argsz = sizeof(*irq); + msgp = g_malloc0(sizeof(*msgp)); + /* + * Send in chunks if over max_send_fds + */ + for (sent_fds = 0; nfds > sent_fds; sent_fds += send_fds) { + VFIOUserFDs *arg_fds, loop_fds; + + /* must send all valid FDs or all invalid FDs in single msg */ + max = nfds - sent_fds; + if (max > proxy->max_send_fds) { + max = proxy->max_send_fds; + } + send_fds = irq_howmany((int *)irq->data, sent_fds, max); + + vfio_user_request_msg(&msgp->hdr, VFIO_USER_DEVICE_SET_IRQS, + sizeof(*msgp), 0); + msgp->argsz = irq->argsz; + msgp->flags = irq->flags; + msgp->index = irq->index; + msgp->start = irq->start + sent_fds; + msgp->count = send_fds; + trace_vfio_user_set_irqs(msgp->index, msgp->start, msgp->count, + msgp->flags); + + loop_fds.send_fds = send_fds; + loop_fds.recv_fds = 0; + loop_fds.fds = (int *)irq->data + sent_fds; + arg_fds = loop_fds.fds[0] != -1 ? &loop_fds : NULL; + + if (!vfio_user_send_wait(proxy, &msgp->hdr, arg_fds, 0, &local_err)) { + error_prepend(&local_err, "%s: ", __func__); + error_report_err(local_err); + return -EFAULT; + } + + if (msgp->hdr.flags & VFIO_USER_ERROR) { + return -msgp->hdr.error_reply; + } + } + + return 0; +} + +static int vfio_user_device_io_region_read(VFIODevice *vbasedev, uint8_t index, + off_t off, uint32_t count, + void *data) +{ + g_autofree VFIOUserRegionRW *msgp = NULL; + VFIOUserProxy *proxy = vbasedev->proxy; + int size = sizeof(*msgp) + count; + Error *local_err = NULL; + + if (count > proxy->max_xfer_size) { + return -EINVAL; + } + + msgp = g_malloc0(size); + vfio_user_request_msg(&msgp->hdr, VFIO_USER_REGION_READ, sizeof(*msgp), 0); + msgp->offset = off; + msgp->region = index; + msgp->count = count; + trace_vfio_user_region_rw(msgp->region, msgp->offset, msgp->count); + + if (!vfio_user_send_wait(proxy, &msgp->hdr, NULL, size, &local_err)) { + error_prepend(&local_err, "%s: ", __func__); + error_report_err(local_err); + return -EFAULT; + } + + if (msgp->hdr.flags & VFIO_USER_ERROR) { + return -msgp->hdr.error_reply; + } else if (msgp->count > count) { + return -E2BIG; + } else { + memcpy(data, &msgp->data, msgp->count); + } + + return msgp->count; +} + +/* + * If this is a posted write, and VFIO_PROXY_NO_POST is not set, then we are OK + * to send the write to the socket without waiting for the server's reply: + * a subsequent read (of any region) will not pass the posted write, as all + * messages are handled sequentially. + */ +static int vfio_user_device_io_region_write(VFIODevice *vbasedev, uint8_t index, + off_t off, unsigned count, + void *data, bool post) +{ + VFIOUserRegionRW *msgp = NULL; + VFIOUserProxy *proxy = vbasedev->proxy; + int size = sizeof(*msgp) + count; + Error *local_err = NULL; + bool can_multi; + int flags = 0; + int ret; + + if (count > proxy->max_xfer_size) { + return -EINVAL; + } + + if (proxy->flags & VFIO_PROXY_NO_POST) { + post = false; + } + + if (post) { + flags |= VFIO_USER_NO_REPLY; + } + + /* write eligible to be in a WRITE_MULTI msg ? */ + can_multi = (proxy->flags & VFIO_PROXY_USE_MULTI) && post && + count <= VFIO_USER_MULTI_DATA; + + /* + * This should be a rare case, so first check without the lock, + * if we're wrong, vfio_send_queued() will flush any posted writes + * we missed here + */ + if (proxy->wr_multi != NULL || + (proxy->num_outgoing > VFIO_USER_OUT_HIGH && can_multi)) { + + /* + * re-check with lock + * + * if already building a WRITE_MULTI msg, + * add this one if possible else flush pending before + * sending the current one + * + * else if outgoing queue is over the highwater, + * start a new WRITE_MULTI message + */ + WITH_QEMU_LOCK_GUARD(&proxy->lock) { + if (proxy->wr_multi != NULL) { + if (can_multi) { + vfio_user_add_multi(proxy, index, off, count, data); + return count; + } + vfio_user_flush_multi(proxy); + } else if (proxy->num_outgoing > VFIO_USER_OUT_HIGH && can_multi) { + vfio_user_create_multi(proxy); + vfio_user_add_multi(proxy, index, off, count, data); + return count; + } + } + } + + msgp = g_malloc0(size); + vfio_user_request_msg(&msgp->hdr, VFIO_USER_REGION_WRITE, size, flags); + msgp->offset = off; + msgp->region = index; + msgp->count = count; + memcpy(&msgp->data, data, count); + trace_vfio_user_region_rw(msgp->region, msgp->offset, msgp->count); + + /* async send will free msg after it's sent */ + if (post) { + if (!vfio_user_send_async(proxy, &msgp->hdr, NULL, &local_err)) { + error_prepend(&local_err, "%s: ", __func__); + error_report_err(local_err); + return -EFAULT; + } + + return count; + } + + if (!vfio_user_send_wait(proxy, &msgp->hdr, NULL, 0, &local_err)) { + error_prepend(&local_err, "%s: ", __func__); + error_report_err(local_err); + g_free(msgp); + return -EFAULT; + } + + if (msgp->hdr.flags & VFIO_USER_ERROR) { + ret = -msgp->hdr.error_reply; + } else { + ret = count; + } + + g_free(msgp); + return ret; +} + +/* + * Socket-based io_ops + */ +VFIODeviceIOOps vfio_user_device_io_ops_sock = { + .get_region_info = vfio_user_device_io_get_region_info, + .get_irq_info = vfio_user_device_io_get_irq_info, + .set_irqs = vfio_user_device_io_set_irqs, + .region_read = vfio_user_device_io_region_read, + .region_write = vfio_user_device_io_region_write, + +}; diff --git a/hw/vfio-user/device.h b/hw/vfio-user/device.h new file mode 100644 index 0000000..d183a39 --- /dev/null +++ b/hw/vfio-user/device.h @@ -0,0 +1,24 @@ +#ifndef VFIO_USER_DEVICE_H +#define VFIO_USER_DEVICE_H + +/* + * vfio protocol over a UNIX socket device handling. + * + * Copyright © 2018, 2021 Oracle and/or its affiliates. + * + * SPDX-License-Identifier: GPL-2.0-or-later + */ + +#include "qemu/osdep.h" +#include "linux/vfio.h" + +#include "hw/vfio-user/proxy.h" + +bool vfio_user_get_device_info(VFIOUserProxy *proxy, + struct vfio_device_info *info, Error **errp); + +void vfio_user_device_reset(VFIOUserProxy *proxy); + +extern VFIODeviceIOOps vfio_user_device_io_ops_sock; + +#endif /* VFIO_USER_DEVICE_H */ diff --git a/hw/vfio-user/meson.build b/hw/vfio-user/meson.build new file mode 100644 index 0000000..2ed0ae5 --- /dev/null +++ b/hw/vfio-user/meson.build @@ -0,0 +1,11 @@ +# SPDX-License-Identifier: GPL-2.0-or-later + +vfio_user_ss = ss.source_set() +vfio_user_ss.add(files( + 'container.c', + 'device.c', + 'pci.c', + 'proxy.c', +)) + +system_ss.add_all(when: 'CONFIG_VFIO_USER', if_true: vfio_user_ss) diff --git a/hw/vfio-user/pci.c b/hw/vfio-user/pci.c new file mode 100644 index 0000000..be71c77 --- /dev/null +++ b/hw/vfio-user/pci.c @@ -0,0 +1,475 @@ +/* + * vfio PCI device over a UNIX socket. + * + * Copyright © 2018, 2021 Oracle and/or its affiliates. + * + * SPDX-License-Identifier: GPL-2.0-or-later + */ + +#include <sys/ioctl.h> +#include "qemu/osdep.h" +#include "qapi-visit-sockets.h" +#include "qemu/error-report.h" + +#include "hw/qdev-properties.h" +#include "hw/vfio/pci.h" +#include "hw/vfio-user/device.h" +#include "hw/vfio-user/proxy.h" + +#define TYPE_VFIO_USER_PCI "vfio-user-pci" +OBJECT_DECLARE_SIMPLE_TYPE(VFIOUserPCIDevice, VFIO_USER_PCI) + +struct VFIOUserPCIDevice { + VFIOPCIDevice device; + SocketAddress *socket; + bool send_queued; /* all sends are queued */ + uint32_t wait_time; /* timeout for message replies */ + bool no_post; /* all region writes are sync */ +}; + +/* + * The server maintains the device's pending interrupts, + * via its MSIX table and PBA, so we treat these accesses + * like PCI config space and forward them. + */ +static uint64_t vfio_user_pba_read(void *opaque, hwaddr addr, + unsigned size) +{ + VFIOPCIDevice *vdev = opaque; + VFIORegion *region = &vdev->bars[vdev->msix->pba_bar].region; + uint64_t data; + + /* server copy is what matters */ + data = vfio_region_read(region, addr + vdev->msix->pba_offset, size); + return data; +} + +static void vfio_user_pba_write(void *opaque, hwaddr addr, + uint64_t data, unsigned size) +{ + /* dropped */ +} + +static const MemoryRegionOps vfio_user_pba_ops = { + .read = vfio_user_pba_read, + .write = vfio_user_pba_write, + .endianness = DEVICE_LITTLE_ENDIAN, +}; + +static void vfio_user_msix_setup(VFIOPCIDevice *vdev) +{ + MemoryRegion *vfio_reg, *msix_reg, *pba_reg; + + pba_reg = g_new0(MemoryRegion, 1); + vdev->msix->pba_region = pba_reg; + + vfio_reg = vdev->bars[vdev->msix->pba_bar].mr; + msix_reg = &vdev->pdev.msix_pba_mmio; + memory_region_init_io(pba_reg, OBJECT(vdev), &vfio_user_pba_ops, vdev, + "VFIO MSIX PBA", int128_get64(msix_reg->size)); + memory_region_add_subregion_overlap(vfio_reg, vdev->msix->pba_offset, + pba_reg, 1); +} + +static void vfio_user_msix_teardown(VFIOPCIDevice *vdev) +{ + MemoryRegion *mr, *sub; + + mr = vdev->bars[vdev->msix->pba_bar].mr; + sub = vdev->msix->pba_region; + memory_region_del_subregion(mr, sub); + + g_free(vdev->msix->pba_region); + vdev->msix->pba_region = NULL; +} + +static void vfio_user_dma_read(VFIOPCIDevice *vdev, VFIOUserDMARW *msg) +{ + PCIDevice *pdev = &vdev->pdev; + VFIOUserProxy *proxy = vdev->vbasedev.proxy; + VFIOUserDMARW *res; + MemTxResult r; + size_t size; + + if (msg->hdr.size < sizeof(*msg)) { + vfio_user_send_error(proxy, &msg->hdr, EINVAL); + return; + } + if (msg->count > proxy->max_xfer_size) { + vfio_user_send_error(proxy, &msg->hdr, E2BIG); + return; + } + + /* switch to our own message buffer */ + size = msg->count + sizeof(VFIOUserDMARW); + res = g_malloc0(size); + memcpy(res, msg, sizeof(*res)); + g_free(msg); + + r = pci_dma_read(pdev, res->offset, &res->data, res->count); + + switch (r) { + case MEMTX_OK: + if (res->hdr.flags & VFIO_USER_NO_REPLY) { + g_free(res); + return; + } + vfio_user_send_reply(proxy, &res->hdr, size); + break; + case MEMTX_ERROR: + vfio_user_send_error(proxy, &res->hdr, EFAULT); + break; + case MEMTX_DECODE_ERROR: + vfio_user_send_error(proxy, &res->hdr, ENODEV); + break; + case MEMTX_ACCESS_ERROR: + vfio_user_send_error(proxy, &res->hdr, EPERM); + break; + default: + error_printf("vfio_user_dma_read unknown error %d\n", r); + vfio_user_send_error(vdev->vbasedev.proxy, &res->hdr, EINVAL); + } +} + +static void vfio_user_dma_write(VFIOPCIDevice *vdev, VFIOUserDMARW *msg) +{ + PCIDevice *pdev = &vdev->pdev; + VFIOUserProxy *proxy = vdev->vbasedev.proxy; + MemTxResult r; + + if (msg->hdr.size < sizeof(*msg)) { + vfio_user_send_error(proxy, &msg->hdr, EINVAL); + return; + } + /* make sure transfer count isn't larger than the message data */ + if (msg->count > msg->hdr.size - sizeof(*msg)) { + vfio_user_send_error(proxy, &msg->hdr, E2BIG); + return; + } + + r = pci_dma_write(pdev, msg->offset, &msg->data, msg->count); + + switch (r) { + case MEMTX_OK: + if ((msg->hdr.flags & VFIO_USER_NO_REPLY) == 0) { + vfio_user_send_reply(proxy, &msg->hdr, sizeof(msg->hdr)); + } else { + g_free(msg); + } + break; + case MEMTX_ERROR: + vfio_user_send_error(proxy, &msg->hdr, EFAULT); + break; + case MEMTX_DECODE_ERROR: + vfio_user_send_error(proxy, &msg->hdr, ENODEV); + break; + case MEMTX_ACCESS_ERROR: + vfio_user_send_error(proxy, &msg->hdr, EPERM); + break; + default: + error_printf("vfio_user_dma_write unknown error %d\n", r); + vfio_user_send_error(vdev->vbasedev.proxy, &msg->hdr, EINVAL); + } +} + +/* + * Incoming request message callback. + * + * Runs off main loop, so BQL held. + */ +static void vfio_user_pci_process_req(void *opaque, VFIOUserMsg *msg) +{ + VFIOPCIDevice *vdev = opaque; + VFIOUserHdr *hdr = msg->hdr; + + /* no incoming PCI requests pass FDs */ + if (msg->fds != NULL) { + vfio_user_send_error(vdev->vbasedev.proxy, hdr, EINVAL); + vfio_user_putfds(msg); + return; + } + + switch (hdr->command) { + case VFIO_USER_DMA_READ: + vfio_user_dma_read(vdev, (VFIOUserDMARW *)hdr); + break; + case VFIO_USER_DMA_WRITE: + vfio_user_dma_write(vdev, (VFIOUserDMARW *)hdr); + break; + default: + error_printf("vfio_user_pci_process_req unknown cmd %d\n", + hdr->command); + vfio_user_send_error(vdev->vbasedev.proxy, hdr, ENOSYS); + } +} + +/* + * Emulated devices don't use host hot reset + */ +static void vfio_user_compute_needs_reset(VFIODevice *vbasedev) +{ + vbasedev->needs_reset = false; +} + +static Object *vfio_user_pci_get_object(VFIODevice *vbasedev) +{ + VFIOUserPCIDevice *vdev = container_of(vbasedev, VFIOUserPCIDevice, + device.vbasedev); + + return OBJECT(vdev); +} + +static VFIODeviceOps vfio_user_pci_ops = { + .vfio_compute_needs_reset = vfio_user_compute_needs_reset, + .vfio_eoi = vfio_pci_intx_eoi, + .vfio_get_object = vfio_user_pci_get_object, + /* No live migration support yet. */ + .vfio_save_config = NULL, + .vfio_load_config = NULL, +}; + +static void vfio_user_pci_realize(PCIDevice *pdev, Error **errp) +{ + ERRP_GUARD(); + VFIOUserPCIDevice *udev = VFIO_USER_PCI(pdev); + VFIOPCIDevice *vdev = VFIO_PCI_BASE(pdev); + VFIODevice *vbasedev = &vdev->vbasedev; + const char *sock_name; + AddressSpace *as; + SocketAddress addr; + VFIOUserProxy *proxy; + + if (!udev->socket) { + error_setg(errp, "No socket specified"); + error_append_hint(errp, "e.g. -device '{" + "\"driver\":\"vfio-user-pci\", " + "\"socket\": {\"path\": \"/tmp/vfio-user.sock\", " + "\"type\": \"unix\"}'" + "}'\n"); + return; + } + + sock_name = udev->socket->u.q_unix.path; + + vbasedev->name = g_strdup_printf("vfio-user:%s", sock_name); + + memset(&addr, 0, sizeof(addr)); + addr.type = SOCKET_ADDRESS_TYPE_UNIX; + addr.u.q_unix.path = (char *)sock_name; + proxy = vfio_user_connect_dev(&addr, errp); + if (!proxy) { + return; + } + vbasedev->proxy = proxy; + vfio_user_set_handler(vbasedev, vfio_user_pci_process_req, vdev); + + vbasedev->name = g_strdup_printf("vfio-user:%s", sock_name); + + if (udev->send_queued) { + proxy->flags |= VFIO_PROXY_FORCE_QUEUED; + } + + if (udev->no_post) { + proxy->flags |= VFIO_PROXY_NO_POST; + } + + /* user specified or 5 sec default */ + proxy->wait_time = udev->wait_time; + + if (!vfio_user_validate_version(proxy, errp)) { + goto error; + } + + /* + * Use socket-based device I/O instead of vfio kernel driver. + */ + vbasedev->io_ops = &vfio_user_device_io_ops_sock; + + /* + * vfio-user devices are effectively mdevs (don't use a host iommu). + */ + vbasedev->mdev = true; + + /* + * Enable per-region fds. + */ + vbasedev->use_region_fds = true; + + as = pci_device_iommu_address_space(pdev); + if (!vfio_device_attach_by_iommu_type(TYPE_VFIO_IOMMU_USER, + vbasedev->name, vbasedev, + as, errp)) { + goto error; + } + + if (!vfio_pci_populate_device(vdev, errp)) { + goto error; + } + + if (!vfio_pci_config_setup(vdev, errp)) { + goto error; + } + + /* + * vfio_pci_config_setup will have registered the device's BARs + * and setup any MSIX BARs, so errors after it succeeds must + * use out_teardown + */ + + if (!vfio_pci_add_capabilities(vdev, errp)) { + goto out_teardown; + } + + if (vdev->msix != NULL) { + vfio_user_msix_setup(vdev); + } + + if (!vfio_pci_interrupt_setup(vdev, errp)) { + goto out_teardown; + } + + vfio_pci_register_err_notifier(vdev); + vfio_pci_register_req_notifier(vdev); + + return; + +out_teardown: + vfio_pci_teardown_msi(vdev); + vfio_pci_bars_exit(vdev); +error: + error_prepend(errp, VFIO_MSG_PREFIX, vdev->vbasedev.name); + vfio_pci_put_device(vdev); +} + +static void vfio_user_instance_init(Object *obj) +{ + PCIDevice *pci_dev = PCI_DEVICE(obj); + VFIOPCIDevice *vdev = VFIO_PCI_BASE(obj); + VFIODevice *vbasedev = &vdev->vbasedev; + + device_add_bootindex_property(obj, &vdev->bootindex, + "bootindex", NULL, + &pci_dev->qdev); + vdev->host.domain = ~0U; + vdev->host.bus = ~0U; + vdev->host.slot = ~0U; + vdev->host.function = ~0U; + + vfio_device_init(vbasedev, VFIO_DEVICE_TYPE_PCI, &vfio_user_pci_ops, + DEVICE(vdev), false); + + vdev->nv_gpudirect_clique = 0xFF; + + /* + * QEMU_PCI_CAP_EXPRESS initialization does not depend on QEMU command + * line, therefore, no need to wait to realize like other devices. + */ + pci_dev->cap_present |= QEMU_PCI_CAP_EXPRESS; +} + +static void vfio_user_instance_finalize(Object *obj) +{ + VFIOPCIDevice *vdev = VFIO_PCI_BASE(obj); + VFIODevice *vbasedev = &vdev->vbasedev; + + if (vdev->msix != NULL) { + vfio_user_msix_teardown(vdev); + } + + vfio_pci_put_device(vdev); + + if (vbasedev->proxy != NULL) { + vfio_user_disconnect(vbasedev->proxy); + } +} + +static void vfio_user_pci_reset(DeviceState *dev) +{ + VFIOPCIDevice *vdev = VFIO_PCI_BASE(dev); + VFIODevice *vbasedev = &vdev->vbasedev; + + vfio_pci_pre_reset(vdev); + + if (vbasedev->reset_works) { + vfio_user_device_reset(vbasedev->proxy); + } + + vfio_pci_post_reset(vdev); +} + +static const Property vfio_user_pci_dev_properties[] = { + DEFINE_PROP_UINT32("x-pci-vendor-id", VFIOPCIDevice, + vendor_id, PCI_ANY_ID), + DEFINE_PROP_UINT32("x-pci-device-id", VFIOPCIDevice, + device_id, PCI_ANY_ID), + DEFINE_PROP_UINT32("x-pci-sub-vendor-id", VFIOPCIDevice, + sub_vendor_id, PCI_ANY_ID), + DEFINE_PROP_UINT32("x-pci-sub-device-id", VFIOPCIDevice, + sub_device_id, PCI_ANY_ID), + DEFINE_PROP_BOOL("x-send-queued", VFIOUserPCIDevice, send_queued, false), + DEFINE_PROP_UINT32("x-msg-timeout", VFIOUserPCIDevice, wait_time, 5000), + DEFINE_PROP_BOOL("x-no-posted-writes", VFIOUserPCIDevice, no_post, false), +}; + +static void vfio_user_pci_set_socket(Object *obj, Visitor *v, const char *name, + void *opaque, Error **errp) +{ + VFIOUserPCIDevice *udev = VFIO_USER_PCI(obj); + bool success; + + if (udev->device.vbasedev.proxy) { + error_setg(errp, "Proxy is connected"); + return; + } + + qapi_free_SocketAddress(udev->socket); + + udev->socket = NULL; + + success = visit_type_SocketAddress(v, name, &udev->socket, errp); + + if (!success) { + return; + } + + if (udev->socket->type != SOCKET_ADDRESS_TYPE_UNIX) { + error_setg(errp, "Unsupported socket type %s", + SocketAddressType_str(udev->socket->type)); + qapi_free_SocketAddress(udev->socket); + udev->socket = NULL; + return; + } +} + +static void vfio_user_pci_dev_class_init(ObjectClass *klass, const void *data) +{ + DeviceClass *dc = DEVICE_CLASS(klass); + PCIDeviceClass *pdc = PCI_DEVICE_CLASS(klass); + + device_class_set_legacy_reset(dc, vfio_user_pci_reset); + device_class_set_props(dc, vfio_user_pci_dev_properties); + + object_class_property_add(klass, "socket", "SocketAddress", NULL, + vfio_user_pci_set_socket, NULL, NULL); + object_class_property_set_description(klass, "socket", + "SocketAddress (UNIX sockets only)"); + + dc->desc = "VFIO over socket PCI device assignment"; + pdc->realize = vfio_user_pci_realize; +} + +static const TypeInfo vfio_user_pci_dev_info = { + .name = TYPE_VFIO_USER_PCI, + .parent = TYPE_VFIO_PCI_BASE, + .instance_size = sizeof(VFIOUserPCIDevice), + .class_init = vfio_user_pci_dev_class_init, + .instance_init = vfio_user_instance_init, + .instance_finalize = vfio_user_instance_finalize, +}; + +static void register_vfio_user_dev_type(void) +{ + type_register_static(&vfio_user_pci_dev_info); +} + + type_init(register_vfio_user_dev_type) diff --git a/hw/vfio-user/protocol.h b/hw/vfio-user/protocol.h new file mode 100644 index 0000000..3249a4a --- /dev/null +++ b/hw/vfio-user/protocol.h @@ -0,0 +1,242 @@ +#ifndef VFIO_USER_PROTOCOL_H +#define VFIO_USER_PROTOCOL_H + +/* + * vfio protocol over a UNIX socket. + * + * Copyright © 2018, 2021 Oracle and/or its affiliates. + * + * Each message has a standard header that describes the command + * being sent, which is almost always a VFIO ioctl(). + * + * The header may be followed by command-specific data, such as the + * region and offset info for read and write commands. + * + * SPDX-License-Identifier: GPL-2.0-or-later + */ + +typedef struct { + uint16_t id; + uint16_t command; + uint32_t size; + uint32_t flags; + uint32_t error_reply; +} VFIOUserHdr; + +/* VFIOUserHdr commands */ +enum vfio_user_command { + VFIO_USER_VERSION = 1, + VFIO_USER_DMA_MAP = 2, + VFIO_USER_DMA_UNMAP = 3, + VFIO_USER_DEVICE_GET_INFO = 4, + VFIO_USER_DEVICE_GET_REGION_INFO = 5, + VFIO_USER_DEVICE_GET_REGION_IO_FDS = 6, + VFIO_USER_DEVICE_GET_IRQ_INFO = 7, + VFIO_USER_DEVICE_SET_IRQS = 8, + VFIO_USER_REGION_READ = 9, + VFIO_USER_REGION_WRITE = 10, + VFIO_USER_DMA_READ = 11, + VFIO_USER_DMA_WRITE = 12, + VFIO_USER_DEVICE_RESET = 13, + VFIO_USER_DIRTY_PAGES = 14, + VFIO_USER_REGION_WRITE_MULTI = 15, + VFIO_USER_MAX, +}; + +/* VFIOUserHdr flags */ +#define VFIO_USER_REQUEST 0x0 +#define VFIO_USER_REPLY 0x1 +#define VFIO_USER_TYPE 0xF + +#define VFIO_USER_NO_REPLY 0x10 +#define VFIO_USER_ERROR 0x20 + + +/* + * VFIO_USER_VERSION + */ +typedef struct { + VFIOUserHdr hdr; + uint16_t major; + uint16_t minor; + char capabilities[]; +} VFIOUserVersion; + +#define VFIO_USER_MAJOR_VER 0 +#define VFIO_USER_MINOR_VER 0 + +#define VFIO_USER_CAP "capabilities" + +/* "capabilities" members */ +#define VFIO_USER_CAP_MAX_FDS "max_msg_fds" +#define VFIO_USER_CAP_MAX_XFER "max_data_xfer_size" +#define VFIO_USER_CAP_PGSIZES "pgsizes" +#define VFIO_USER_CAP_MAP_MAX "max_dma_maps" +#define VFIO_USER_CAP_MIGR "migration" +#define VFIO_USER_CAP_MULTI "write_multiple" + +/* "migration" members */ +#define VFIO_USER_CAP_PGSIZE "pgsize" +#define VFIO_USER_CAP_MAX_BITMAP "max_bitmap_size" + +/* + * Max FDs mainly comes into play when a device supports multiple interrupts + * where each ones uses an eventfd to inject it into the guest. + * It is clamped by the the number of FDs the qio channel supports in a + * single message. + */ +#define VFIO_USER_DEF_MAX_FDS 8 +#define VFIO_USER_MAX_MAX_FDS 16 + +/* + * Max transfer limits the amount of data in region and DMA messages. + * Region R/W will be very small (limited by how much a single instruction + * can process) so just use a reasonable limit here. + */ +#define VFIO_USER_DEF_MAX_XFER (1024 * 1024) +#define VFIO_USER_MAX_MAX_XFER (64 * 1024 * 1024) + +/* + * Default pagesizes supported is 4k. + */ +#define VFIO_USER_DEF_PGSIZE 4096 + +/* + * Default max number of DMA mappings is stolen from the + * linux kernel "dma_entry_limit" + */ +#define VFIO_USER_DEF_MAP_MAX 65535 + +/* + * Default max bitmap size is also take from the linux kernel, + * where usage of signed ints limits the VA range to 2^31 bytes. + * Dividing that by the number of bits per byte yields 256MB + */ +#define VFIO_USER_DEF_MAX_BITMAP (256 * 1024 * 1024) + +/* + * VFIO_USER_DMA_MAP + * imported from struct vfio_iommu_type1_dma_map + */ +typedef struct { + VFIOUserHdr hdr; + uint32_t argsz; + uint32_t flags; + uint64_t offset; /* FD offset */ + uint64_t iova; + uint64_t size; +} VFIOUserDMAMap; + +/* + * VFIO_USER_DMA_UNMAP + * imported from struct vfio_iommu_type1_dma_unmap + */ +typedef struct { + VFIOUserHdr hdr; + uint32_t argsz; + uint32_t flags; + uint64_t iova; + uint64_t size; +} VFIOUserDMAUnmap; + +/* + * VFIO_USER_DEVICE_GET_INFO + * imported from struct vfio_device_info + */ +typedef struct { + VFIOUserHdr hdr; + uint32_t argsz; + uint32_t flags; + uint32_t num_regions; + uint32_t num_irqs; +} VFIOUserDeviceInfo; + +/* + * VFIO_USER_DEVICE_GET_REGION_INFO + * imported from struct vfio_region_info + */ +typedef struct { + VFIOUserHdr hdr; + uint32_t argsz; + uint32_t flags; + uint32_t index; + uint32_t cap_offset; + uint64_t size; + uint64_t offset; +} VFIOUserRegionInfo; + +/* + * VFIO_USER_DEVICE_GET_IRQ_INFO + * imported from struct vfio_irq_info + */ +typedef struct { + VFIOUserHdr hdr; + uint32_t argsz; + uint32_t flags; + uint32_t index; + uint32_t count; +} VFIOUserIRQInfo; + +/* + * VFIO_USER_DEVICE_SET_IRQS + * imported from struct vfio_irq_set + */ +typedef struct { + VFIOUserHdr hdr; + uint32_t argsz; + uint32_t flags; + uint32_t index; + uint32_t start; + uint32_t count; +} VFIOUserIRQSet; + +/* + * VFIO_USER_REGION_READ + * VFIO_USER_REGION_WRITE + */ +typedef struct { + VFIOUserHdr hdr; + uint64_t offset; + uint32_t region; + uint32_t count; + char data[]; +} VFIOUserRegionRW; + +/* + * VFIO_USER_DMA_READ + * VFIO_USER_DMA_WRITE + */ +typedef struct { + VFIOUserHdr hdr; + uint64_t offset; + uint32_t count; + char data[]; +} VFIOUserDMARW; + +/* imported from struct vfio_bitmap */ +typedef struct { + uint64_t pgsize; + uint64_t size; + char data[]; +} VFIOUserBitmap; + +/* + * VFIO_USER_REGION_WRITE_MULTI + */ +#define VFIO_USER_MULTI_DATA 8 +#define VFIO_USER_MULTI_MAX 200 + +typedef struct { + uint64_t offset; + uint32_t region; + uint32_t count; + char data[VFIO_USER_MULTI_DATA]; +} VFIOUserWROne; + +typedef struct { + VFIOUserHdr hdr; + uint64_t wr_cnt; + VFIOUserWROne wrs[VFIO_USER_MULTI_MAX]; +} VFIOUserWRMulti; + +#endif /* VFIO_USER_PROTOCOL_H */ diff --git a/hw/vfio-user/proxy.c b/hw/vfio-user/proxy.c new file mode 100644 index 0000000..2275d3f --- /dev/null +++ b/hw/vfio-user/proxy.c @@ -0,0 +1,1358 @@ +/* + * vfio protocol over a UNIX socket. + * + * Copyright © 2018, 2021 Oracle and/or its affiliates. + * + * SPDX-License-Identifier: GPL-2.0-or-later + */ + +#include "qemu/osdep.h" +#include <sys/ioctl.h> + +#include "hw/vfio/vfio-device.h" +#include "hw/vfio-user/proxy.h" +#include "hw/vfio-user/trace.h" +#include "qapi/error.h" +#include "qobject/qbool.h" +#include "qobject/qdict.h" +#include "qobject/qjson.h" +#include "qobject/qnum.h" +#include "qemu/error-report.h" +#include "qemu/lockable.h" +#include "qemu/main-loop.h" +#include "qemu/thread.h" +#include "system/iothread.h" + +static IOThread *vfio_user_iothread; + +static void vfio_user_shutdown(VFIOUserProxy *proxy); +static VFIOUserMsg *vfio_user_getmsg(VFIOUserProxy *proxy, VFIOUserHdr *hdr, + VFIOUserFDs *fds); +static void vfio_user_recycle(VFIOUserProxy *proxy, VFIOUserMsg *msg); + +static void vfio_user_recv(void *opaque); +static void vfio_user_send(void *opaque); + +static void vfio_user_request(void *opaque); + +static inline void vfio_user_set_error(VFIOUserHdr *hdr, uint32_t err) +{ + hdr->flags |= VFIO_USER_ERROR; + hdr->error_reply = err; +} + +/* + * Functions called by main, CPU, or iothread threads + */ + +static void vfio_user_shutdown(VFIOUserProxy *proxy) +{ + qio_channel_shutdown(proxy->ioc, QIO_CHANNEL_SHUTDOWN_READ, NULL); + qio_channel_set_aio_fd_handler(proxy->ioc, proxy->ctx, NULL, + proxy->ctx, NULL, NULL); +} + +/* + * Same return values as qio_channel_writev_full(): + * + * QIO_CHANNEL_ERR_BLOCK: *errp not set + * -1: *errp will be populated + * otherwise: bytes written + */ +static ssize_t vfio_user_send_qio(VFIOUserProxy *proxy, VFIOUserMsg *msg, + Error **errp) +{ + VFIOUserFDs *fds = msg->fds; + struct iovec iov = { + .iov_base = msg->hdr, + .iov_len = msg->hdr->size, + }; + size_t numfds = 0; + int *fdp = NULL; + ssize_t ret; + + if (fds != NULL && fds->send_fds != 0) { + numfds = fds->send_fds; + fdp = fds->fds; + } + + ret = qio_channel_writev_full(proxy->ioc, &iov, 1, fdp, numfds, 0, errp); + + if (ret == -1) { + vfio_user_set_error(msg->hdr, EIO); + vfio_user_shutdown(proxy); + } + trace_vfio_user_send_write(msg->hdr->id, ret); + + return ret; +} + +static VFIOUserMsg *vfio_user_getmsg(VFIOUserProxy *proxy, VFIOUserHdr *hdr, + VFIOUserFDs *fds) +{ + VFIOUserMsg *msg; + + msg = QTAILQ_FIRST(&proxy->free); + if (msg != NULL) { + QTAILQ_REMOVE(&proxy->free, msg, next); + } else { + msg = g_malloc0(sizeof(*msg)); + qemu_cond_init(&msg->cv); + } + + msg->hdr = hdr; + msg->fds = fds; + return msg; +} + +/* + * Recycle a message list entry to the free list. + */ +static void vfio_user_recycle(VFIOUserProxy *proxy, VFIOUserMsg *msg) +{ + if (msg->type == VFIO_MSG_NONE) { + error_printf("vfio_user_recycle - freeing free msg\n"); + return; + } + + /* free msg buffer if no one is waiting to consume the reply */ + if (msg->type == VFIO_MSG_NOWAIT || msg->type == VFIO_MSG_ASYNC) { + g_free(msg->hdr); + if (msg->fds != NULL) { + g_free(msg->fds); + } + } + + msg->type = VFIO_MSG_NONE; + msg->hdr = NULL; + msg->fds = NULL; + msg->complete = false; + msg->pending = false; + QTAILQ_INSERT_HEAD(&proxy->free, msg, next); +} + +VFIOUserFDs *vfio_user_getfds(int numfds) +{ + VFIOUserFDs *fds = g_malloc0(sizeof(*fds) + (numfds * sizeof(int))); + + fds->fds = (int *)((char *)fds + sizeof(*fds)); + + return fds; +} + +/* + * Functions only called by iothread + */ + +/* + * Process a received message. + */ +static void vfio_user_process(VFIOUserProxy *proxy, VFIOUserMsg *msg, + bool isreply) +{ + + /* + * Replies signal a waiter, if none just check for errors + * and free the message buffer. + * + * Requests get queued for the BH. + */ + if (isreply) { + msg->complete = true; + if (msg->type == VFIO_MSG_WAIT) { + qemu_cond_signal(&msg->cv); + } else { + if (msg->hdr->flags & VFIO_USER_ERROR) { + error_printf("vfio_user_process: error reply on async "); + error_printf("request command %x error %s\n", + msg->hdr->command, + strerror(msg->hdr->error_reply)); + } + /* youngest nowait msg has been ack'd */ + if (proxy->last_nowait == msg) { + proxy->last_nowait = NULL; + } + vfio_user_recycle(proxy, msg); + } + } else { + QTAILQ_INSERT_TAIL(&proxy->incoming, msg, next); + qemu_bh_schedule(proxy->req_bh); + } +} + +/* + * Complete a partial message read + */ +static int vfio_user_complete(VFIOUserProxy *proxy, Error **errp) +{ + VFIOUserMsg *msg = proxy->part_recv; + size_t msgleft = proxy->recv_left; + bool isreply; + char *data; + int ret; + + data = (char *)msg->hdr + (msg->hdr->size - msgleft); + while (msgleft > 0) { + ret = qio_channel_read(proxy->ioc, data, msgleft, errp); + + /* error or would block */ + if (ret <= 0) { + /* try for rest on next iternation */ + if (ret == QIO_CHANNEL_ERR_BLOCK) { + proxy->recv_left = msgleft; + } + return ret; + } + trace_vfio_user_recv_read(msg->hdr->id, ret); + + msgleft -= ret; + data += ret; + } + + /* + * Read complete message, process it. + */ + proxy->part_recv = NULL; + proxy->recv_left = 0; + isreply = (msg->hdr->flags & VFIO_USER_TYPE) == VFIO_USER_REPLY; + vfio_user_process(proxy, msg, isreply); + + /* return positive value */ + return 1; +} + +/* + * Receive and process one incoming message. + * + * For replies, find matching outgoing request and wake any waiters. + * For requests, queue in incoming list and run request BH. + */ +static int vfio_user_recv_one(VFIOUserProxy *proxy, Error **errp) +{ + VFIOUserMsg *msg = NULL; + g_autofree int *fdp = NULL; + VFIOUserFDs *reqfds; + VFIOUserHdr hdr; + struct iovec iov = { + .iov_base = &hdr, + .iov_len = sizeof(hdr), + }; + bool isreply = false; + int i, ret; + size_t msgleft, numfds = 0; + char *data = NULL; + char *buf = NULL; + + /* + * Complete any partial reads + */ + if (proxy->part_recv != NULL) { + ret = vfio_user_complete(proxy, errp); + + /* still not complete, try later */ + if (ret == QIO_CHANNEL_ERR_BLOCK) { + return ret; + } + + if (ret <= 0) { + goto fatal; + } + /* else fall into reading another msg */ + } + + /* + * Read header + */ + ret = qio_channel_readv_full(proxy->ioc, &iov, 1, &fdp, &numfds, 0, + errp); + if (ret == QIO_CHANNEL_ERR_BLOCK) { + return ret; + } + + /* read error or other side closed connection */ + if (ret <= 0) { + goto fatal; + } + + if (ret < sizeof(hdr)) { + error_setg(errp, "short read of header"); + goto fatal; + } + + /* + * Validate header + */ + if (hdr.size < sizeof(VFIOUserHdr)) { + error_setg(errp, "bad header size"); + goto fatal; + } + switch (hdr.flags & VFIO_USER_TYPE) { + case VFIO_USER_REQUEST: + isreply = false; + break; + case VFIO_USER_REPLY: + isreply = true; + break; + default: + error_setg(errp, "unknown message type"); + goto fatal; + } + trace_vfio_user_recv_hdr(proxy->sockname, hdr.id, hdr.command, hdr.size, + hdr.flags); + + /* + * For replies, find the matching pending request. + * For requests, reap incoming FDs. + */ + if (isreply) { + QTAILQ_FOREACH(msg, &proxy->pending, next) { + if (hdr.id == msg->id) { + break; + } + } + if (msg == NULL) { + error_setg(errp, "unexpected reply"); + goto err; + } + QTAILQ_REMOVE(&proxy->pending, msg, next); + + /* + * Process any received FDs + */ + if (numfds != 0) { + if (msg->fds == NULL || msg->fds->recv_fds < numfds) { + error_setg(errp, "unexpected FDs"); + goto err; + } + msg->fds->recv_fds = numfds; + memcpy(msg->fds->fds, fdp, numfds * sizeof(int)); + } + } else { + if (numfds != 0) { + reqfds = vfio_user_getfds(numfds); + memcpy(reqfds->fds, fdp, numfds * sizeof(int)); + } else { + reqfds = NULL; + } + } + + /* + * Put the whole message into a single buffer. + */ + if (isreply) { + if (hdr.size > msg->rsize) { + error_setg(errp, "reply larger than recv buffer"); + goto err; + } + *msg->hdr = hdr; + data = (char *)msg->hdr + sizeof(hdr); + } else { + if (hdr.size > proxy->max_xfer_size + sizeof(VFIOUserDMARW)) { + error_setg(errp, "vfio_user_recv request larger than max"); + goto err; + } + buf = g_malloc0(hdr.size); + memcpy(buf, &hdr, sizeof(hdr)); + data = buf + sizeof(hdr); + msg = vfio_user_getmsg(proxy, (VFIOUserHdr *)buf, reqfds); + msg->type = VFIO_MSG_REQ; + } + + /* + * Read rest of message. + */ + msgleft = hdr.size - sizeof(hdr); + while (msgleft > 0) { + ret = qio_channel_read(proxy->ioc, data, msgleft, errp); + + /* prepare to complete read on next iternation */ + if (ret == QIO_CHANNEL_ERR_BLOCK) { + proxy->part_recv = msg; + proxy->recv_left = msgleft; + return ret; + } + + if (ret <= 0) { + goto fatal; + } + trace_vfio_user_recv_read(hdr.id, ret); + + msgleft -= ret; + data += ret; + } + + vfio_user_process(proxy, msg, isreply); + return 0; + + /* + * fatal means the other side closed or we don't trust the stream + * err means this message is corrupt + */ +fatal: + vfio_user_shutdown(proxy); + proxy->state = VFIO_PROXY_ERROR; + + /* set error if server side closed */ + if (ret == 0) { + error_setg(errp, "server closed socket"); + } + +err: + for (i = 0; i < numfds; i++) { + close(fdp[i]); + } + if (isreply && msg != NULL) { + /* force an error to keep sending thread from hanging */ + vfio_user_set_error(msg->hdr, EINVAL); + msg->complete = true; + qemu_cond_signal(&msg->cv); + } + return -1; +} + +static void vfio_user_recv(void *opaque) +{ + VFIOUserProxy *proxy = opaque; + + QEMU_LOCK_GUARD(&proxy->lock); + + if (proxy->state == VFIO_PROXY_CONNECTED) { + Error *local_err = NULL; + + while (vfio_user_recv_one(proxy, &local_err) == 0) { + ; + } + + if (local_err != NULL) { + error_report_err(local_err); + } + } +} + +/* + * Send a single message, same return semantics as vfio_user_send_qio(). + * + * Sent async messages are freed, others are moved to pending queue. + */ +static ssize_t vfio_user_send_one(VFIOUserProxy *proxy, Error **errp) +{ + VFIOUserMsg *msg; + ssize_t ret; + + msg = QTAILQ_FIRST(&proxy->outgoing); + ret = vfio_user_send_qio(proxy, msg, errp); + if (ret < 0) { + return ret; + } + + QTAILQ_REMOVE(&proxy->outgoing, msg, next); + proxy->num_outgoing--; + if (msg->type == VFIO_MSG_ASYNC) { + vfio_user_recycle(proxy, msg); + } else { + QTAILQ_INSERT_TAIL(&proxy->pending, msg, next); + msg->pending = true; + } + + return ret; +} + +/* + * Send messages from outgoing queue when the socket buffer has space. + * If we deplete 'outgoing', remove ourselves from the poll list. + */ +static void vfio_user_send(void *opaque) +{ + VFIOUserProxy *proxy = opaque; + + QEMU_LOCK_GUARD(&proxy->lock); + + if (proxy->state == VFIO_PROXY_CONNECTED) { + while (!QTAILQ_EMPTY(&proxy->outgoing)) { + Error *local_err = NULL; + int ret; + + ret = vfio_user_send_one(proxy, &local_err); + + if (ret == QIO_CHANNEL_ERR_BLOCK) { + return; + } else if (ret == -1) { + error_report_err(local_err); + return; + } + } + qio_channel_set_aio_fd_handler(proxy->ioc, proxy->ctx, + vfio_user_recv, NULL, NULL, proxy); + + /* queue empty - send any pending multi write msgs */ + if (proxy->wr_multi != NULL) { + vfio_user_flush_multi(proxy); + } + } +} + +static void vfio_user_close_cb(void *opaque) +{ + VFIOUserProxy *proxy = opaque; + + QEMU_LOCK_GUARD(&proxy->lock); + + proxy->state = VFIO_PROXY_CLOSED; + qemu_cond_signal(&proxy->close_cv); +} + + +/* + * Functions called by main or CPU threads + */ + +/* + * Process incoming requests. + * + * The bus-specific callback has the form: + * request(opaque, msg) + * where 'opaque' was specified in vfio_user_set_handler + * and 'msg' is the inbound message. + * + * The callback is responsible for disposing of the message buffer, + * usually by re-using it when calling vfio_send_reply or vfio_send_error, + * both of which free their message buffer when the reply is sent. + * + * If the callback uses a new buffer, it needs to free the old one. + */ +static void vfio_user_request(void *opaque) +{ + VFIOUserProxy *proxy = opaque; + VFIOUserMsgQ new, free; + VFIOUserMsg *msg, *m1; + + /* reap all incoming */ + QTAILQ_INIT(&new); + WITH_QEMU_LOCK_GUARD(&proxy->lock) { + QTAILQ_FOREACH_SAFE(msg, &proxy->incoming, next, m1) { + QTAILQ_REMOVE(&proxy->incoming, msg, next); + QTAILQ_INSERT_TAIL(&new, msg, next); + } + } + + /* process list */ + QTAILQ_INIT(&free); + QTAILQ_FOREACH_SAFE(msg, &new, next, m1) { + QTAILQ_REMOVE(&new, msg, next); + trace_vfio_user_recv_request(msg->hdr->command); + proxy->request(proxy->req_arg, msg); + QTAILQ_INSERT_HEAD(&free, msg, next); + } + + /* free list */ + WITH_QEMU_LOCK_GUARD(&proxy->lock) { + QTAILQ_FOREACH_SAFE(msg, &free, next, m1) { + vfio_user_recycle(proxy, msg); + } + } +} + +/* + * Messages are queued onto the proxy's outgoing list. + * + * It handles 3 types of messages: + * + * async messages - replies and posted writes + * + * There will be no reply from the server, so message + * buffers are freed after they're sent. + * + * nowait messages - map/unmap during address space transactions + * + * These are also sent async, but a reply is expected so that + * vfio_wait_reqs() can wait for the youngest nowait request. + * They transition from the outgoing list to the pending list + * when sent, and are freed when the reply is received. + * + * wait messages - all other requests + * + * The reply to these messages is waited for by their caller. + * They also transition from outgoing to pending when sent, but + * the message buffer is returned to the caller with the reply + * contents. The caller is responsible for freeing these messages. + * + * As an optimization, if the outgoing list and the socket send + * buffer are empty, the message is sent inline instead of being + * added to the outgoing list. The rest of the transitions are + * unchanged. + */ +static bool vfio_user_send_queued(VFIOUserProxy *proxy, VFIOUserMsg *msg, + Error **errp) +{ + int ret; + + /* older coalesced writes go first */ + if (proxy->wr_multi != NULL && + ((msg->hdr->flags & VFIO_USER_TYPE) == VFIO_USER_REQUEST)) { + vfio_user_flush_multi(proxy); + } + + /* + * Unsent outgoing msgs - add to tail + */ + if (!QTAILQ_EMPTY(&proxy->outgoing)) { + QTAILQ_INSERT_TAIL(&proxy->outgoing, msg, next); + proxy->num_outgoing++; + return true; + } + + /* + * Try inline - if blocked, queue it and kick send poller + */ + if (proxy->flags & VFIO_PROXY_FORCE_QUEUED) { + ret = QIO_CHANNEL_ERR_BLOCK; + } else { + ret = vfio_user_send_qio(proxy, msg, errp); + } + + if (ret == QIO_CHANNEL_ERR_BLOCK) { + QTAILQ_INSERT_HEAD(&proxy->outgoing, msg, next); + proxy->num_outgoing = 1; + qio_channel_set_aio_fd_handler(proxy->ioc, proxy->ctx, + vfio_user_recv, proxy->ctx, + vfio_user_send, proxy); + return true; + } + if (ret == -1) { + return false; + } + + /* + * Sent - free async, add others to pending + */ + if (msg->type == VFIO_MSG_ASYNC) { + vfio_user_recycle(proxy, msg); + } else { + QTAILQ_INSERT_TAIL(&proxy->pending, msg, next); + msg->pending = true; + } + + return true; +} + +/* + * nowait send - vfio_wait_reqs() can wait for it later + * + * Returns false if we did not successfully receive a reply message, in which + * case @errp will be populated. + * + * In either case, ownership of @hdr and @fds is taken, and the caller must + * *not* free them itself. + */ +bool vfio_user_send_nowait(VFIOUserProxy *proxy, VFIOUserHdr *hdr, + VFIOUserFDs *fds, int rsize, Error **errp) +{ + VFIOUserMsg *msg; + + QEMU_LOCK_GUARD(&proxy->lock); + + msg = vfio_user_getmsg(proxy, hdr, fds); + msg->id = hdr->id; + msg->rsize = rsize ? rsize : hdr->size; + msg->type = VFIO_MSG_NOWAIT; + + if (hdr->flags & VFIO_USER_NO_REPLY) { + error_setg_errno(errp, EINVAL, "%s on NO_REPLY message", __func__); + vfio_user_recycle(proxy, msg); + return false; + } + + if (!vfio_user_send_queued(proxy, msg, errp)) { + vfio_user_recycle(proxy, msg); + return false; + } + + proxy->last_nowait = msg; + + return true; +} + +/* + * Returns false if we did not successfully receive a reply message, in which + * case @errp will be populated. + * + * In either case, the caller must free @hdr and @fds if needed. + */ +bool vfio_user_send_wait(VFIOUserProxy *proxy, VFIOUserHdr *hdr, + VFIOUserFDs *fds, int rsize, Error **errp) +{ + VFIOUserMsg *msg; + bool ok = false; + + if (hdr->flags & VFIO_USER_NO_REPLY) { + error_setg_errno(errp, EINVAL, "%s on NO_REPLY message", __func__); + return false; + } + + qemu_mutex_lock(&proxy->lock); + + msg = vfio_user_getmsg(proxy, hdr, fds); + msg->id = hdr->id; + msg->rsize = rsize ? rsize : hdr->size; + msg->type = VFIO_MSG_WAIT; + + ok = vfio_user_send_queued(proxy, msg, errp); + + if (ok) { + while (!msg->complete) { + if (!qemu_cond_timedwait(&msg->cv, &proxy->lock, + proxy->wait_time)) { + VFIOUserMsgQ *list; + + list = msg->pending ? &proxy->pending : &proxy->outgoing; + QTAILQ_REMOVE(list, msg, next); + error_setg_errno(errp, ETIMEDOUT, + "timed out waiting for reply"); + ok = false; + break; + } + } + } + + vfio_user_recycle(proxy, msg); + + qemu_mutex_unlock(&proxy->lock); + + return ok; +} + +/* + * async send - msg can be queued, but will be freed when sent + * + * Returns false on failure, in which case @errp will be populated. + * + * In either case, ownership of @hdr and @fds is taken, and the caller must + * *not* free them itself. + */ +bool vfio_user_send_async(VFIOUserProxy *proxy, VFIOUserHdr *hdr, + VFIOUserFDs *fds, Error **errp) +{ + VFIOUserMsg *msg; + + QEMU_LOCK_GUARD(&proxy->lock); + + msg = vfio_user_getmsg(proxy, hdr, fds); + msg->id = hdr->id; + msg->rsize = 0; + msg->type = VFIO_MSG_ASYNC; + + if (!(hdr->flags & (VFIO_USER_NO_REPLY | VFIO_USER_REPLY))) { + error_setg_errno(errp, EINVAL, "%s on sync message", __func__); + vfio_user_recycle(proxy, msg); + return false; + } + + if (!vfio_user_send_queued(proxy, msg, errp)) { + vfio_user_recycle(proxy, msg); + return false; + } + + return true; +} + +void vfio_user_wait_reqs(VFIOUserProxy *proxy) +{ + VFIOUserMsg *msg; + + /* + * Any DMA map/unmap requests sent in the middle + * of a memory region transaction were sent nowait. + * Wait for them here. + */ + qemu_mutex_lock(&proxy->lock); + if (proxy->last_nowait != NULL) { + /* + * Change type to WAIT to wait for reply + */ + msg = proxy->last_nowait; + msg->type = VFIO_MSG_WAIT; + proxy->last_nowait = NULL; + while (!msg->complete) { + if (!qemu_cond_timedwait(&msg->cv, &proxy->lock, + proxy->wait_time)) { + VFIOUserMsgQ *list; + + list = msg->pending ? &proxy->pending : &proxy->outgoing; + QTAILQ_REMOVE(list, msg, next); + error_printf("vfio_wait_reqs - timed out\n"); + break; + } + } + + if (msg->hdr->flags & VFIO_USER_ERROR) { + error_printf("vfio_user_wait_reqs - error reply on async "); + error_printf("request: command %x error %s\n", msg->hdr->command, + strerror(msg->hdr->error_reply)); + } + + /* + * Change type back to NOWAIT to free + */ + msg->type = VFIO_MSG_NOWAIT; + vfio_user_recycle(proxy, msg); + } + + qemu_mutex_unlock(&proxy->lock); +} + +/* + * Reply to an incoming request. + */ +void vfio_user_send_reply(VFIOUserProxy *proxy, VFIOUserHdr *hdr, int size) +{ + Error *local_err = NULL; + + if (size < sizeof(VFIOUserHdr)) { + error_printf("%s: size too small", __func__); + g_free(hdr); + return; + } + + /* + * convert header to associated reply + */ + hdr->flags = VFIO_USER_REPLY; + hdr->size = size; + + if (!vfio_user_send_async(proxy, hdr, NULL, &local_err)) { + error_report_err(local_err); + } +} + +/* + * Send an error reply to an incoming request. + */ +void vfio_user_send_error(VFIOUserProxy *proxy, VFIOUserHdr *hdr, int error) +{ + Error *local_err = NULL; + + /* + * convert header to associated reply + */ + hdr->flags = VFIO_USER_REPLY; + hdr->flags |= VFIO_USER_ERROR; + hdr->error_reply = error; + hdr->size = sizeof(*hdr); + + if (!vfio_user_send_async(proxy, hdr, NULL, &local_err)) { + error_report_err(local_err); + } +} + +/* + * Close FDs erroneously received in an incoming request. + */ +void vfio_user_putfds(VFIOUserMsg *msg) +{ + VFIOUserFDs *fds = msg->fds; + int i; + + for (i = 0; i < fds->recv_fds; i++) { + close(fds->fds[i]); + } + g_free(fds); + msg->fds = NULL; +} + +void +vfio_user_disable_posted_writes(VFIOUserProxy *proxy) +{ + WITH_QEMU_LOCK_GUARD(&proxy->lock) { + proxy->flags |= VFIO_PROXY_NO_POST; + } +} + +static QLIST_HEAD(, VFIOUserProxy) vfio_user_sockets = + QLIST_HEAD_INITIALIZER(vfio_user_sockets); + +VFIOUserProxy *vfio_user_connect_dev(SocketAddress *addr, Error **errp) +{ + VFIOUserProxy *proxy; + QIOChannelSocket *sioc; + QIOChannel *ioc; + char *sockname; + + if (addr->type != SOCKET_ADDRESS_TYPE_UNIX) { + error_setg(errp, "vfio_user_connect - bad address family"); + return NULL; + } + sockname = addr->u.q_unix.path; + + sioc = qio_channel_socket_new(); + ioc = QIO_CHANNEL(sioc); + if (qio_channel_socket_connect_sync(sioc, addr, errp)) { + object_unref(OBJECT(ioc)); + return NULL; + } + qio_channel_set_blocking(ioc, false, NULL); + + proxy = g_malloc0(sizeof(VFIOUserProxy)); + proxy->sockname = g_strdup_printf("unix:%s", sockname); + proxy->ioc = ioc; + + /* init defaults */ + proxy->max_xfer_size = VFIO_USER_DEF_MAX_XFER; + proxy->max_send_fds = VFIO_USER_DEF_MAX_FDS; + proxy->max_dma = VFIO_USER_DEF_MAP_MAX; + proxy->dma_pgsizes = VFIO_USER_DEF_PGSIZE; + proxy->max_bitmap = VFIO_USER_DEF_MAX_BITMAP; + proxy->migr_pgsize = VFIO_USER_DEF_PGSIZE; + + proxy->flags = VFIO_PROXY_CLIENT; + proxy->state = VFIO_PROXY_CONNECTED; + + qemu_mutex_init(&proxy->lock); + qemu_cond_init(&proxy->close_cv); + + if (vfio_user_iothread == NULL) { + vfio_user_iothread = iothread_create("VFIO user", errp); + } + + proxy->ctx = iothread_get_aio_context(vfio_user_iothread); + proxy->req_bh = qemu_bh_new(vfio_user_request, proxy); + + QTAILQ_INIT(&proxy->outgoing); + QTAILQ_INIT(&proxy->incoming); + QTAILQ_INIT(&proxy->free); + QTAILQ_INIT(&proxy->pending); + QLIST_INSERT_HEAD(&vfio_user_sockets, proxy, next); + + return proxy; +} + +void vfio_user_set_handler(VFIODevice *vbasedev, + void (*handler)(void *opaque, VFIOUserMsg *msg), + void *req_arg) +{ + VFIOUserProxy *proxy = vbasedev->proxy; + + proxy->request = handler; + proxy->req_arg = req_arg; + qio_channel_set_aio_fd_handler(proxy->ioc, proxy->ctx, + vfio_user_recv, NULL, NULL, proxy); +} + +void vfio_user_disconnect(VFIOUserProxy *proxy) +{ + VFIOUserMsg *r1, *r2; + + qemu_mutex_lock(&proxy->lock); + + /* our side is quitting */ + if (proxy->state == VFIO_PROXY_CONNECTED) { + vfio_user_shutdown(proxy); + if (!QTAILQ_EMPTY(&proxy->pending)) { + error_printf("vfio_user_disconnect: outstanding requests\n"); + } + } + object_unref(OBJECT(proxy->ioc)); + proxy->ioc = NULL; + qemu_bh_delete(proxy->req_bh); + proxy->req_bh = NULL; + + proxy->state = VFIO_PROXY_CLOSING; + QTAILQ_FOREACH_SAFE(r1, &proxy->outgoing, next, r2) { + qemu_cond_destroy(&r1->cv); + QTAILQ_REMOVE(&proxy->outgoing, r1, next); + g_free(r1); + } + QTAILQ_FOREACH_SAFE(r1, &proxy->incoming, next, r2) { + qemu_cond_destroy(&r1->cv); + QTAILQ_REMOVE(&proxy->incoming, r1, next); + g_free(r1); + } + QTAILQ_FOREACH_SAFE(r1, &proxy->pending, next, r2) { + qemu_cond_destroy(&r1->cv); + QTAILQ_REMOVE(&proxy->pending, r1, next); + g_free(r1); + } + QTAILQ_FOREACH_SAFE(r1, &proxy->free, next, r2) { + qemu_cond_destroy(&r1->cv); + QTAILQ_REMOVE(&proxy->free, r1, next); + g_free(r1); + } + + /* + * Make sure the iothread isn't blocking anywhere + * with a ref to this proxy by waiting for a BH + * handler to run after the proxy fd handlers were + * deleted above. + */ + aio_bh_schedule_oneshot(proxy->ctx, vfio_user_close_cb, proxy); + + while (proxy->state != VFIO_PROXY_CLOSED) { + qemu_cond_wait(&proxy->close_cv, &proxy->lock); + } + + /* we now hold the only ref to proxy */ + qemu_mutex_unlock(&proxy->lock); + qemu_cond_destroy(&proxy->close_cv); + qemu_mutex_destroy(&proxy->lock); + + QLIST_REMOVE(proxy, next); + if (QLIST_EMPTY(&vfio_user_sockets)) { + iothread_destroy(vfio_user_iothread); + vfio_user_iothread = NULL; + } + + g_free(proxy->sockname); + g_free(proxy); +} + +void vfio_user_request_msg(VFIOUserHdr *hdr, uint16_t cmd, + uint32_t size, uint32_t flags) +{ + static uint16_t next_id; + + hdr->id = qatomic_fetch_inc(&next_id); + hdr->command = cmd; + hdr->size = size; + hdr->flags = (flags & ~VFIO_USER_TYPE) | VFIO_USER_REQUEST; + hdr->error_reply = 0; +} + +struct cap_entry { + const char *name; + bool (*check)(VFIOUserProxy *proxy, QObject *qobj, Error **errp); +}; + +static bool caps_parse(VFIOUserProxy *proxy, QDict *qdict, + struct cap_entry caps[], Error **errp) +{ + QObject *qobj; + struct cap_entry *p; + + for (p = caps; p->name != NULL; p++) { + qobj = qdict_get(qdict, p->name); + if (qobj != NULL) { + if (!p->check(proxy, qobj, errp)) { + return false; + } + qdict_del(qdict, p->name); + } + } + + /* warning, for now */ + if (qdict_size(qdict) != 0) { + warn_report("spurious capabilities"); + } + return true; +} + +static bool check_migr_pgsize(VFIOUserProxy *proxy, QObject *qobj, Error **errp) +{ + QNum *qn = qobject_to(QNum, qobj); + uint64_t pgsize; + + if (qn == NULL || !qnum_get_try_uint(qn, &pgsize)) { + error_setg(errp, "malformed %s", VFIO_USER_CAP_PGSIZE); + return false; + } + + /* must be larger than default */ + if (pgsize & (VFIO_USER_DEF_PGSIZE - 1)) { + error_setg(errp, "pgsize 0x%"PRIx64" too small", pgsize); + return false; + } + + proxy->migr_pgsize = pgsize; + return true; +} + +static bool check_bitmap(VFIOUserProxy *proxy, QObject *qobj, Error **errp) +{ + QNum *qn = qobject_to(QNum, qobj); + uint64_t bitmap_size; + + if (qn == NULL || !qnum_get_try_uint(qn, &bitmap_size)) { + error_setg(errp, "malformed %s", VFIO_USER_CAP_MAX_BITMAP); + return false; + } + + /* can only lower it */ + if (bitmap_size > VFIO_USER_DEF_MAX_BITMAP) { + error_setg(errp, "%s too large", VFIO_USER_CAP_MAX_BITMAP); + return false; + } + + proxy->max_bitmap = bitmap_size; + return true; +} + +static struct cap_entry caps_migr[] = { + { VFIO_USER_CAP_PGSIZE, check_migr_pgsize }, + { VFIO_USER_CAP_MAX_BITMAP, check_bitmap }, + { NULL } +}; + +static bool check_max_fds(VFIOUserProxy *proxy, QObject *qobj, Error **errp) +{ + QNum *qn = qobject_to(QNum, qobj); + uint64_t max_send_fds; + + if (qn == NULL || !qnum_get_try_uint(qn, &max_send_fds) || + max_send_fds > VFIO_USER_MAX_MAX_FDS) { + error_setg(errp, "malformed %s", VFIO_USER_CAP_MAX_FDS); + return false; + } + proxy->max_send_fds = max_send_fds; + return true; +} + +static bool check_max_xfer(VFIOUserProxy *proxy, QObject *qobj, Error **errp) +{ + QNum *qn = qobject_to(QNum, qobj); + uint64_t max_xfer_size; + + if (qn == NULL || !qnum_get_try_uint(qn, &max_xfer_size) || + max_xfer_size > VFIO_USER_MAX_MAX_XFER) { + error_setg(errp, "malformed %s", VFIO_USER_CAP_MAX_XFER); + return false; + } + proxy->max_xfer_size = max_xfer_size; + return true; +} + +static bool check_pgsizes(VFIOUserProxy *proxy, QObject *qobj, Error **errp) +{ + QNum *qn = qobject_to(QNum, qobj); + uint64_t pgsizes; + + if (qn == NULL || !qnum_get_try_uint(qn, &pgsizes)) { + error_setg(errp, "malformed %s", VFIO_USER_CAP_PGSIZES); + return false; + } + + /* must be larger than default */ + if (pgsizes & (VFIO_USER_DEF_PGSIZE - 1)) { + error_setg(errp, "pgsize 0x%"PRIx64" too small", pgsizes); + return false; + } + + proxy->dma_pgsizes = pgsizes; + return true; +} + +static bool check_max_dma(VFIOUserProxy *proxy, QObject *qobj, Error **errp) +{ + QNum *qn = qobject_to(QNum, qobj); + uint64_t max_dma; + + if (qn == NULL || !qnum_get_try_uint(qn, &max_dma)) { + error_setg(errp, "malformed %s", VFIO_USER_CAP_MAP_MAX); + return false; + } + + /* can only lower it */ + if (max_dma > VFIO_USER_DEF_MAP_MAX) { + error_setg(errp, "%s too large", VFIO_USER_CAP_MAP_MAX); + return false; + } + + proxy->max_dma = max_dma; + return true; +} + +static bool check_migr(VFIOUserProxy *proxy, QObject *qobj, Error **errp) +{ + QDict *qdict = qobject_to(QDict, qobj); + + if (qdict == NULL) { + error_setg(errp, "malformed %s", VFIO_USER_CAP_MAX_FDS); + return true; + } + return caps_parse(proxy, qdict, caps_migr, errp); +} + +static bool check_multi(VFIOUserProxy *proxy, QObject *qobj, Error **errp) +{ + QBool *qb = qobject_to(QBool, qobj); + + if (qb == NULL) { + error_setg(errp, "malformed %s", VFIO_USER_CAP_MULTI); + return false; + } + if (qbool_get_bool(qb)) { + proxy->flags |= VFIO_PROXY_USE_MULTI; + } + return true; +} + +static struct cap_entry caps_cap[] = { + { VFIO_USER_CAP_MAX_FDS, check_max_fds }, + { VFIO_USER_CAP_MAX_XFER, check_max_xfer }, + { VFIO_USER_CAP_PGSIZES, check_pgsizes }, + { VFIO_USER_CAP_MAP_MAX, check_max_dma }, + { VFIO_USER_CAP_MIGR, check_migr }, + { VFIO_USER_CAP_MULTI, check_multi }, + { NULL } +}; + +static bool check_cap(VFIOUserProxy *proxy, QObject *qobj, Error **errp) +{ + QDict *qdict = qobject_to(QDict, qobj); + + if (qdict == NULL) { + error_setg(errp, "malformed %s", VFIO_USER_CAP); + return false; + } + return caps_parse(proxy, qdict, caps_cap, errp); +} + +static struct cap_entry ver_0_0[] = { + { VFIO_USER_CAP, check_cap }, + { NULL } +}; + +static bool caps_check(VFIOUserProxy *proxy, int minor, const char *caps, + Error **errp) +{ + QObject *qobj; + QDict *qdict; + bool ret; + + qobj = qobject_from_json(caps, NULL); + if (qobj == NULL) { + error_setg(errp, "malformed capabilities %s", caps); + return false; + } + qdict = qobject_to(QDict, qobj); + if (qdict == NULL) { + error_setg(errp, "capabilities %s not an object", caps); + qobject_unref(qobj); + return false; + } + ret = caps_parse(proxy, qdict, ver_0_0, errp); + + qobject_unref(qobj); + return ret; +} + +static GString *caps_json(void) +{ + QDict *dict = qdict_new(); + QDict *capdict = qdict_new(); + QDict *migdict = qdict_new(); + GString *str; + + qdict_put_int(migdict, VFIO_USER_CAP_PGSIZE, VFIO_USER_DEF_PGSIZE); + qdict_put_int(migdict, VFIO_USER_CAP_MAX_BITMAP, VFIO_USER_DEF_MAX_BITMAP); + qdict_put_obj(capdict, VFIO_USER_CAP_MIGR, QOBJECT(migdict)); + + qdict_put_int(capdict, VFIO_USER_CAP_MAX_FDS, VFIO_USER_MAX_MAX_FDS); + qdict_put_int(capdict, VFIO_USER_CAP_MAX_XFER, VFIO_USER_DEF_MAX_XFER); + qdict_put_int(capdict, VFIO_USER_CAP_PGSIZES, VFIO_USER_DEF_PGSIZE); + qdict_put_int(capdict, VFIO_USER_CAP_MAP_MAX, VFIO_USER_DEF_MAP_MAX); + qdict_put_bool(capdict, VFIO_USER_CAP_MULTI, true); + + qdict_put_obj(dict, VFIO_USER_CAP, QOBJECT(capdict)); + + str = qobject_to_json(QOBJECT(dict)); + qobject_unref(dict); + return str; +} + +bool vfio_user_validate_version(VFIOUserProxy *proxy, Error **errp) +{ + g_autofree VFIOUserVersion *msgp = NULL; + GString *caps; + char *reply; + int size, caplen; + + caps = caps_json(); + caplen = caps->len + 1; + size = sizeof(*msgp) + caplen; + msgp = g_malloc0(size); + + vfio_user_request_msg(&msgp->hdr, VFIO_USER_VERSION, size, 0); + msgp->major = VFIO_USER_MAJOR_VER; + msgp->minor = VFIO_USER_MINOR_VER; + memcpy(&msgp->capabilities, caps->str, caplen); + g_string_free(caps, true); + trace_vfio_user_version(msgp->major, msgp->minor, msgp->capabilities); + + if (!vfio_user_send_wait(proxy, &msgp->hdr, NULL, 0, errp)) { + return false; + } + + if (msgp->hdr.flags & VFIO_USER_ERROR) { + error_setg_errno(errp, msgp->hdr.error_reply, "version reply"); + return false; + } + + if (msgp->major != VFIO_USER_MAJOR_VER || + msgp->minor > VFIO_USER_MINOR_VER) { + error_setg(errp, "incompatible server version"); + return false; + } + + reply = msgp->capabilities; + if (reply[msgp->hdr.size - sizeof(*msgp) - 1] != '\0') { + error_setg(errp, "corrupt version reply"); + return false; + } + + if (!caps_check(proxy, msgp->minor, reply, errp)) { + return false; + } + + trace_vfio_user_version(msgp->major, msgp->minor, msgp->capabilities); + return true; +} + +void vfio_user_flush_multi(VFIOUserProxy *proxy) +{ + VFIOUserMsg *msg; + VFIOUserWRMulti *wm = proxy->wr_multi; + Error *local_err = NULL; + + proxy->wr_multi = NULL; + + /* adjust size for actual # of writes */ + wm->hdr.size -= (VFIO_USER_MULTI_MAX - wm->wr_cnt) * sizeof(VFIOUserWROne); + + msg = vfio_user_getmsg(proxy, &wm->hdr, NULL); + msg->id = wm->hdr.id; + msg->rsize = 0; + msg->type = VFIO_MSG_ASYNC; + trace_vfio_user_wrmulti("flush", wm->wr_cnt); + + if (!vfio_user_send_queued(proxy, msg, &local_err)) { + error_report_err(local_err); + vfio_user_recycle(proxy, msg); + } +} + +void vfio_user_create_multi(VFIOUserProxy *proxy) +{ + VFIOUserWRMulti *wm; + + wm = g_malloc0(sizeof(*wm)); + vfio_user_request_msg(&wm->hdr, VFIO_USER_REGION_WRITE_MULTI, + sizeof(*wm), VFIO_USER_NO_REPLY); + proxy->wr_multi = wm; +} + +void vfio_user_add_multi(VFIOUserProxy *proxy, uint8_t index, + off_t offset, uint32_t count, void *data) +{ + VFIOUserWRMulti *wm = proxy->wr_multi; + VFIOUserWROne *w1 = &wm->wrs[wm->wr_cnt]; + + w1->offset = offset; + w1->region = index; + w1->count = count; + memcpy(&w1->data, data, count); + + wm->wr_cnt++; + trace_vfio_user_wrmulti("add", wm->wr_cnt); + if (wm->wr_cnt == VFIO_USER_MULTI_MAX || + proxy->num_outgoing < VFIO_USER_OUT_LOW) { + vfio_user_flush_multi(proxy); + } +} diff --git a/hw/vfio-user/proxy.h b/hw/vfio-user/proxy.h new file mode 100644 index 0000000..61e64a0 --- /dev/null +++ b/hw/vfio-user/proxy.h @@ -0,0 +1,135 @@ +#ifndef VFIO_USER_PROXY_H +#define VFIO_USER_PROXY_H + +/* + * vfio protocol over a UNIX socket. + * + * Copyright © 2018, 2021 Oracle and/or its affiliates. + * + * SPDX-License-Identifier: GPL-2.0-or-later + */ + +#include "io/channel.h" +#include "io/channel-socket.h" + +#include "qemu/queue.h" +#include "qemu/sockets.h" +#include "qemu/thread.h" +#include "hw/vfio/vfio-device.h" +#include "hw/vfio-user/protocol.h" + +typedef struct { + int send_fds; + int recv_fds; + int *fds; +} VFIOUserFDs; + +enum msg_type { + VFIO_MSG_NONE, + VFIO_MSG_ASYNC, + VFIO_MSG_WAIT, + VFIO_MSG_NOWAIT, + VFIO_MSG_REQ, +}; + +typedef struct VFIOUserMsg { + QTAILQ_ENTRY(VFIOUserMsg) next; + VFIOUserHdr *hdr; + VFIOUserFDs *fds; + uint32_t rsize; + uint32_t id; + QemuCond cv; + bool complete; + bool pending; + enum msg_type type; +} VFIOUserMsg; + + +enum proxy_state { + VFIO_PROXY_CONNECTED = 1, + VFIO_PROXY_ERROR = 2, + VFIO_PROXY_CLOSING = 3, + VFIO_PROXY_CLOSED = 4, +}; + +typedef QTAILQ_HEAD(VFIOUserMsgQ, VFIOUserMsg) VFIOUserMsgQ; + +typedef struct VFIOUserProxy { + QLIST_ENTRY(VFIOUserProxy) next; + char *sockname; + struct QIOChannel *ioc; + void (*request)(void *opaque, VFIOUserMsg *msg); + void *req_arg; + uint64_t max_xfer_size; + uint64_t max_send_fds; + uint64_t max_dma; + uint64_t dma_pgsizes; + uint64_t max_bitmap; + uint64_t migr_pgsize; + int flags; + uint32_t wait_time; + QemuCond close_cv; + AioContext *ctx; + QEMUBH *req_bh; + bool async_ops; + + /* + * above only changed when BQL is held + * below are protected by per-proxy lock + */ + QemuMutex lock; + VFIOUserMsgQ free; + VFIOUserMsgQ pending; + VFIOUserMsgQ incoming; + VFIOUserMsgQ outgoing; + VFIOUserMsg *last_nowait; + VFIOUserMsg *part_recv; + size_t recv_left; + VFIOUserWRMulti *wr_multi; + int num_outgoing; + enum proxy_state state; +} VFIOUserProxy; + +/* VFIOProxy flags */ +#define VFIO_PROXY_CLIENT 0x1 +#define VFIO_PROXY_FORCE_QUEUED 0x4 +#define VFIO_PROXY_NO_POST 0x8 +#define VFIO_PROXY_USE_MULTI 0x16 + +/* coalescing high and low water marks for VFIOProxy num_outgoing */ +#define VFIO_USER_OUT_HIGH 1024 +#define VFIO_USER_OUT_LOW 128 + +typedef struct VFIODevice VFIODevice; + +VFIOUserProxy *vfio_user_connect_dev(SocketAddress *addr, Error **errp); +void vfio_user_disconnect(VFIOUserProxy *proxy); +void vfio_user_set_handler(VFIODevice *vbasedev, + void (*handler)(void *opaque, VFIOUserMsg *msg), + void *reqarg); +bool vfio_user_validate_version(VFIOUserProxy *proxy, Error **errp); + +VFIOUserFDs *vfio_user_getfds(int numfds); +void vfio_user_putfds(VFIOUserMsg *msg); + +void vfio_user_disable_posted_writes(VFIOUserProxy *proxy); + +void vfio_user_request_msg(VFIOUserHdr *hdr, uint16_t cmd, + uint32_t size, uint32_t flags); +void vfio_user_wait_reqs(VFIOUserProxy *proxy); +bool vfio_user_send_wait(VFIOUserProxy *proxy, VFIOUserHdr *hdr, + VFIOUserFDs *fds, int rsize, Error **errp); +bool vfio_user_send_nowait(VFIOUserProxy *proxy, VFIOUserHdr *hdr, + VFIOUserFDs *fds, int rsize, Error **errp); +bool vfio_user_send_async(VFIOUserProxy *proxy, VFIOUserHdr *hdr, + VFIOUserFDs *fds, Error **errp); + +void vfio_user_send_reply(VFIOUserProxy *proxy, VFIOUserHdr *hdr, int size); +void vfio_user_send_error(VFIOUserProxy *proxy, VFIOUserHdr *hdr, int error); + +void vfio_user_flush_multi(VFIOUserProxy *proxy); +void vfio_user_create_multi(VFIOUserProxy *proxy); +void vfio_user_add_multi(VFIOUserProxy *proxy, uint8_t index, + off_t offset, uint32_t count, void *data); + +#endif /* VFIO_USER_PROXY_H */ diff --git a/hw/vfio-user/trace-events b/hw/vfio-user/trace-events new file mode 100644 index 0000000..abb67f4 --- /dev/null +++ b/hw/vfio-user/trace-events @@ -0,0 +1,20 @@ +# See docs/devel/tracing.rst for syntax documentation. +# +# SPDX-License-Identifier: GPL-2.0-or-later + +# common.c +vfio_user_recv_hdr(const char *name, uint16_t id, uint16_t cmd, uint32_t size, uint32_t flags) " (%s) id 0x%x cmd 0x%x size 0x%x flags 0x%x" +vfio_user_recv_read(uint16_t id, int read) " id 0x%x read 0x%x" +vfio_user_recv_request(uint16_t cmd) " command 0x%x" +vfio_user_send_write(uint16_t id, int wrote) " id 0x%x wrote 0x%x" +vfio_user_version(uint16_t major, uint16_t minor, const char *caps) " major %d minor %d caps: %s" +vfio_user_get_info(uint32_t nregions, uint32_t nirqs) " #regions %d #irqs %d" +vfio_user_get_region_info(uint32_t index, uint32_t flags, uint64_t size) " index %d flags 0x%x size 0x%"PRIx64 +vfio_user_region_rw(uint32_t region, uint64_t off, uint32_t count) " region %d offset 0x%"PRIx64" count %d" +vfio_user_get_irq_info(uint32_t index, uint32_t flags, uint32_t count) " index %d flags 0x%x count %d" +vfio_user_set_irqs(uint32_t index, uint32_t start, uint32_t count, uint32_t flags) " index %d start %d count %d flags 0x%x" +vfio_user_wrmulti(const char *s, uint64_t wr_cnt) " %s count 0x%"PRIx64 + +# container.c +vfio_user_dma_map(uint64_t iova, uint64_t size, uint64_t off, uint32_t flags, bool async_ops) " iova 0x%"PRIx64" size 0x%"PRIx64" off 0x%"PRIx64" flags 0x%x async_ops %d" +vfio_user_dma_unmap(uint64_t iova, uint64_t size, uint32_t flags, bool async_ops) " iova 0x%"PRIx64" size 0x%"PRIx64" flags 0x%x async_ops %d" diff --git a/hw/vfio-user/trace.h b/hw/vfio-user/trace.h new file mode 100644 index 0000000..9cf02d9 --- /dev/null +++ b/hw/vfio-user/trace.h @@ -0,0 +1,4 @@ +/* + * SPDX-License-Identifier: GPL-2.0-or-later + */ +#include "trace/trace-hw_vfio_user.h" diff --git a/hw/vfio/Kconfig b/hw/vfio/Kconfig index 7cdba05..91d9023 100644 --- a/hw/vfio/Kconfig +++ b/hw/vfio/Kconfig @@ -1,3 +1,5 @@ +# SPDX-License-Identifier: GPL-2.0-or-later + config VFIO bool depends on LINUX diff --git a/hw/vfio/ap.c b/hw/vfio/ap.c index 1207c08..7719f24 100644 --- a/hw/vfio/ap.c +++ b/hw/vfio/ap.c @@ -10,6 +10,7 @@ * directory. */ +#include <stdbool.h> #include "qemu/osdep.h" #include CONFIG_DEVICES /* CONFIG_IOMMUFD */ #include <linux/vfio.h> @@ -18,8 +19,10 @@ #include "hw/vfio/vfio-device.h" #include "system/iommufd.h" #include "hw/s390x/ap-device.h" +#include "hw/s390x/css.h" #include "qemu/error-report.h" #include "qemu/event_notifier.h" +#include "qemu/lockable.h" #include "qemu/main-loop.h" #include "qemu/module.h" #include "qemu/option.h" @@ -37,8 +40,23 @@ struct VFIOAPDevice { APDevice apdev; VFIODevice vdev; EventNotifier req_notifier; + EventNotifier cfg_notifier; }; +typedef struct APConfigChgEvent { + QTAILQ_ENTRY(APConfigChgEvent) next; +} APConfigChgEvent; + +static QTAILQ_HEAD(, APConfigChgEvent) cfg_chg_events = + QTAILQ_HEAD_INITIALIZER(cfg_chg_events); + +static QemuMutex cfg_chg_events_lock; + +static void __attribute__((constructor)) vfio_ap_global_init(void) +{ + qemu_mutex_init(&cfg_chg_events_lock); +} + OBJECT_DECLARE_SIMPLE_TYPE(VFIOAPDevice, VFIO_AP_DEVICE) static void vfio_ap_compute_needs_reset(VFIODevice *vdev) @@ -70,14 +88,65 @@ static void vfio_ap_req_notifier_handler(void *opaque) } } +static void vfio_ap_cfg_chg_notifier_handler(void *opaque) +{ + APConfigChgEvent *cfg_chg_event; + VFIOAPDevice *vapdev = opaque; + + if (!event_notifier_test_and_clear(&vapdev->cfg_notifier)) { + return; + } + + cfg_chg_event = g_new0(APConfigChgEvent, 1); + + WITH_QEMU_LOCK_GUARD(&cfg_chg_events_lock) { + QTAILQ_INSERT_TAIL(&cfg_chg_events, cfg_chg_event, next); + } + + css_generate_css_crws(0); + +} + +int ap_chsc_sei_nt0_get_event(void *res) +{ + ChscSeiNt0Res *nt0_res = (ChscSeiNt0Res *)res; + APConfigChgEvent *cfg_chg_event; + + WITH_QEMU_LOCK_GUARD(&cfg_chg_events_lock) { + if (QTAILQ_EMPTY(&cfg_chg_events)) { + return EVENT_INFORMATION_NOT_STORED; + } + + cfg_chg_event = QTAILQ_FIRST(&cfg_chg_events); + QTAILQ_REMOVE(&cfg_chg_events, cfg_chg_event, next); + } + + memset(nt0_res, 0, sizeof(*nt0_res)); + g_free(cfg_chg_event); + nt0_res->flags |= PENDING_EVENT_INFO_BITMASK; + nt0_res->length = sizeof(ChscSeiNt0Res); + nt0_res->code = NT0_RES_RESPONSE_CODE; + nt0_res->nt = NT0_RES_NT_DEFAULT; + nt0_res->rs = NT0_RES_RS_AP_CHANGE; + nt0_res->cc = NT0_RES_CC_AP_CHANGE; + + return EVENT_INFORMATION_STORED; +} + +bool ap_chsc_sei_nt0_have_event(void) +{ + QEMU_LOCK_GUARD(&cfg_chg_events_lock); + return !QTAILQ_EMPTY(&cfg_chg_events); +} + static bool vfio_ap_register_irq_notifier(VFIOAPDevice *vapdev, unsigned int irq, Error **errp) { int fd; - size_t argsz; + int ret; IOHandler *fd_read; EventNotifier *notifier; - g_autofree struct vfio_irq_info *irq_info = NULL; + struct vfio_irq_info irq_info; VFIODevice *vdev = &vapdev->vdev; switch (irq) { @@ -85,6 +154,10 @@ static bool vfio_ap_register_irq_notifier(VFIOAPDevice *vapdev, notifier = &vapdev->req_notifier; fd_read = vfio_ap_req_notifier_handler; break; + case VFIO_AP_CFG_CHG_IRQ_INDEX: + notifier = &vapdev->cfg_notifier; + fd_read = vfio_ap_cfg_chg_notifier_handler; + break; default: error_setg(errp, "vfio: Unsupported device irq(%d)", irq); return false; @@ -96,14 +169,15 @@ static bool vfio_ap_register_irq_notifier(VFIOAPDevice *vapdev, return false; } - argsz = sizeof(*irq_info); - irq_info = g_malloc0(argsz); - irq_info->index = irq; - irq_info->argsz = argsz; + ret = vfio_device_get_irq_info(vdev, irq, &irq_info); - if (ioctl(vdev->fd, VFIO_DEVICE_GET_IRQ_INFO, - irq_info) < 0 || irq_info->count < 1) { - error_setg_errno(errp, errno, "vfio: Error getting irq info"); + if (ret < 0) { + error_setg_errno(errp, -ret, "vfio: Error getting irq info"); + return false; + } + + if (irq_info.count < 1) { + error_setg(errp, "vfio: Error getting irq info, count=0"); return false; } @@ -136,6 +210,9 @@ static void vfio_ap_unregister_irq_notifier(VFIOAPDevice *vapdev, case VFIO_AP_REQ_IRQ_INDEX: notifier = &vapdev->req_notifier; break; + case VFIO_AP_CFG_CHG_IRQ_INDEX: + notifier = &vapdev->cfg_notifier; + break; default: error_report("vfio: Unsupported device irq(%d)", irq); return; @@ -175,11 +252,20 @@ static void vfio_ap_realize(DeviceState *dev, Error **errp) warn_report_err(err); } + if (!vfio_ap_register_irq_notifier(vapdev, VFIO_AP_CFG_CHG_IRQ_INDEX, &err)) + { + /* + * Report this error, but do not make it a failing condition. + * Lack of this IRQ in the host does not prevent normal operation. + */ + warn_report_err(err); + } + return; error: error_prepend(errp, VFIO_MSG_PREFIX, vbasedev->name); - g_free(vbasedev->name); + vfio_device_free_name(vbasedev); } static void vfio_ap_unrealize(DeviceState *dev) @@ -187,8 +273,9 @@ static void vfio_ap_unrealize(DeviceState *dev) VFIOAPDevice *vapdev = VFIO_AP_DEVICE(dev); vfio_ap_unregister_irq_notifier(vapdev, VFIO_AP_REQ_IRQ_INDEX); + vfio_ap_unregister_irq_notifier(vapdev, VFIO_AP_CFG_CHG_IRQ_INDEX); vfio_device_detach(&vapdev->vdev); - g_free(vapdev->vdev.name); + vfio_device_free_name(&vapdev->vdev); } static const Property vfio_ap_properties[] = { diff --git a/hw/vfio/ccw.c b/hw/vfio/ccw.c index fde0c3f..9560b8d 100644 --- a/hw/vfio/ccw.c +++ b/hw/vfio/ccw.c @@ -376,8 +376,8 @@ static bool vfio_ccw_register_irq_notifier(VFIOCCWDevice *vcdev, Error **errp) { VFIODevice *vdev = &vcdev->vdev; - g_autofree struct vfio_irq_info *irq_info = NULL; - size_t argsz; + struct vfio_irq_info irq_info; + int ret; int fd; EventNotifier *notifier; IOHandler *fd_read; @@ -406,13 +406,15 @@ static bool vfio_ccw_register_irq_notifier(VFIOCCWDevice *vcdev, return false; } - argsz = sizeof(*irq_info); - irq_info = g_malloc0(argsz); - irq_info->index = irq; - irq_info->argsz = argsz; - if (ioctl(vdev->fd, VFIO_DEVICE_GET_IRQ_INFO, - irq_info) < 0 || irq_info->count < 1) { - error_setg_errno(errp, errno, "vfio: Error getting irq info"); + ret = vfio_device_get_irq_info(vdev, irq, &irq_info); + + if (ret < 0) { + error_setg_errno(errp, -ret, "vfio: Error getting irq info"); + return false; + } + + if (irq_info.count < 1) { + error_setg(errp, "vfio: Error getting irq info, count=0"); return false; } @@ -502,7 +504,6 @@ static bool vfio_ccw_get_region(VFIOCCWDevice *vcdev, Error **errp) vcdev->io_region_offset = info->offset; vcdev->io_region = g_malloc0(info->size); - g_free(info); /* check for the optional async command region */ ret = vfio_device_get_region_info_type(vdev, VFIO_REGION_TYPE_CCW, @@ -515,7 +516,6 @@ static bool vfio_ccw_get_region(VFIOCCWDevice *vcdev, Error **errp) } vcdev->async_cmd_region_offset = info->offset; vcdev->async_cmd_region = g_malloc0(info->size); - g_free(info); } ret = vfio_device_get_region_info_type(vdev, VFIO_REGION_TYPE_CCW, @@ -528,7 +528,6 @@ static bool vfio_ccw_get_region(VFIOCCWDevice *vcdev, Error **errp) } vcdev->schib_region_offset = info->offset; vcdev->schib_region = g_malloc(info->size); - g_free(info); } ret = vfio_device_get_region_info_type(vdev, VFIO_REGION_TYPE_CCW, @@ -542,7 +541,6 @@ static bool vfio_ccw_get_region(VFIOCCWDevice *vcdev, Error **errp) } vcdev->crw_region_offset = info->offset; vcdev->crw_region = g_malloc(info->size); - g_free(info); } return true; @@ -552,7 +550,6 @@ out_err: g_free(vcdev->schib_region); g_free(vcdev->async_cmd_region); g_free(vcdev->io_region); - g_free(info); return false; } @@ -622,7 +619,7 @@ out_io_notifier_err: out_region_err: vfio_device_detach(vbasedev); out_attach_dev_err: - g_free(vbasedev->name); + vfio_device_free_name(vbasedev); out_unrealize: if (cdc->unrealize) { cdc->unrealize(cdev); @@ -640,7 +637,7 @@ static void vfio_ccw_unrealize(DeviceState *dev) vfio_ccw_unregister_irq_notifier(vcdev, VFIO_CCW_IO_IRQ_INDEX); vfio_ccw_put_region(vcdev); vfio_device_detach(&vcdev->vdev); - g_free(vcdev->vdev.name); + vfio_device_free_name(&vcdev->vdev); if (cdc->unrealize) { cdc->unrealize(cdev); diff --git a/hw/vfio/container-base.c b/hw/vfio/container-base.c index 09340fd..5630497 100644 --- a/hw/vfio/container-base.c +++ b/hw/vfio/container-base.c @@ -75,22 +75,31 @@ void vfio_address_space_insert(VFIOAddressSpace *space, int vfio_container_dma_map(VFIOContainerBase *bcontainer, hwaddr iova, ram_addr_t size, - void *vaddr, bool readonly) + void *vaddr, bool readonly, MemoryRegion *mr) { VFIOIOMMUClass *vioc = VFIO_IOMMU_GET_CLASS(bcontainer); + RAMBlock *rb = mr->ram_block; + int mfd = rb ? qemu_ram_get_fd(rb) : -1; + if (mfd >= 0 && vioc->dma_map_file) { + unsigned long start = vaddr - qemu_ram_get_host_addr(rb); + unsigned long offset = qemu_ram_get_fd_offset(rb); + + return vioc->dma_map_file(bcontainer, iova, size, mfd, start + offset, + readonly); + } g_assert(vioc->dma_map); - return vioc->dma_map(bcontainer, iova, size, vaddr, readonly); + return vioc->dma_map(bcontainer, iova, size, vaddr, readonly, mr); } int vfio_container_dma_unmap(VFIOContainerBase *bcontainer, hwaddr iova, ram_addr_t size, - IOMMUTLBEntry *iotlb) + IOMMUTLBEntry *iotlb, bool unmap_all) { VFIOIOMMUClass *vioc = VFIO_IOMMU_GET_CLASS(bcontainer); g_assert(vioc->dma_unmap); - return vioc->dma_unmap(bcontainer, iova, size, iotlb); + return vioc->dma_unmap(bcontainer, iova, size, iotlb, unmap_all); } bool vfio_container_add_section_window(VFIOContainerBase *bcontainer, @@ -198,11 +207,7 @@ static int vfio_device_dma_logging_report(VFIODevice *vbasedev, hwaddr iova, feature->flags = VFIO_DEVICE_FEATURE_GET | VFIO_DEVICE_FEATURE_DMA_LOGGING_REPORT; - if (ioctl(vbasedev->fd, VFIO_DEVICE_FEATURE, feature)) { - return -errno; - } - - return 0; + return vbasedev->io_ops->device_feature(vbasedev, feature); } static int vfio_container_iommu_query_dirty_bitmap(const VFIOContainerBase *bcontainer, diff --git a/hw/vfio/container.c b/hw/vfio/container.c index 77ff56b..3e13fea 100644 --- a/hw/vfio/container.c +++ b/hw/vfio/container.c @@ -31,10 +31,11 @@ #include "system/reset.h" #include "trace.h" #include "qapi/error.h" +#include "migration/cpr.h" +#include "migration/blocker.h" #include "pci.h" #include "hw/vfio/vfio-container.h" #include "vfio-helpers.h" -#include "vfio-cpr.h" #include "vfio-listener.h" #define TYPE_HOST_IOMMU_DEVICE_LEGACY_VFIO TYPE_HOST_IOMMU_DEVICE "-legacy-vfio" @@ -119,12 +120,9 @@ unmap_exit: return ret; } -/* - * DMA - Mapping and unmapping for the "type1" IOMMU interface used on x86 - */ -static int vfio_legacy_dma_unmap(const VFIOContainerBase *bcontainer, - hwaddr iova, ram_addr_t size, - IOMMUTLBEntry *iotlb) +static int vfio_legacy_dma_unmap_one(const VFIOContainerBase *bcontainer, + hwaddr iova, ram_addr_t size, + IOMMUTLBEntry *iotlb) { const VFIOContainer *container = container_of(bcontainer, VFIOContainer, bcontainer); @@ -138,6 +136,8 @@ static int vfio_legacy_dma_unmap(const VFIOContainerBase *bcontainer, int ret; Error *local_err = NULL; + g_assert(!cpr_is_incoming()); + if (iotlb && vfio_container_dirty_tracking_is_started(bcontainer)) { if (!vfio_container_devices_dirty_tracking_is_supported(bcontainer) && bcontainer->dirty_pages_supported) { @@ -181,8 +181,37 @@ static int vfio_legacy_dma_unmap(const VFIOContainerBase *bcontainer, return 0; } +/* + * DMA - Mapping and unmapping for the "type1" IOMMU interface used on x86 + */ +static int vfio_legacy_dma_unmap(const VFIOContainerBase *bcontainer, + hwaddr iova, ram_addr_t size, + IOMMUTLBEntry *iotlb, bool unmap_all) +{ + int ret; + + if (unmap_all) { + /* The unmap ioctl doesn't accept a full 64-bit span. */ + Int128 llsize = int128_rshift(int128_2_64(), 1); + + ret = vfio_legacy_dma_unmap_one(bcontainer, 0, int128_get64(llsize), + iotlb); + + if (ret == 0) { + ret = vfio_legacy_dma_unmap_one(bcontainer, int128_get64(llsize), + int128_get64(llsize), iotlb); + } + + } else { + ret = vfio_legacy_dma_unmap_one(bcontainer, iova, size, iotlb); + } + + return ret; +} + static int vfio_legacy_dma_map(const VFIOContainerBase *bcontainer, hwaddr iova, - ram_addr_t size, void *vaddr, bool readonly) + ram_addr_t size, void *vaddr, bool readonly, + MemoryRegion *mr) { const VFIOContainer *container = container_of(bcontainer, VFIOContainer, bcontainer); @@ -205,7 +234,7 @@ static int vfio_legacy_dma_map(const VFIOContainerBase *bcontainer, hwaddr iova, */ if (ioctl(container->fd, VFIO_IOMMU_MAP_DMA, &map) == 0 || (errno == EBUSY && - vfio_legacy_dma_unmap(bcontainer, iova, size, NULL) == 0 && + vfio_legacy_dma_unmap(bcontainer, iova, size, NULL, false) == 0 && ioctl(container->fd, VFIO_IOMMU_MAP_DMA, &map) == 0)) { return 0; } @@ -400,7 +429,12 @@ static VFIOContainer *vfio_create_container(int fd, VFIOGroup *group, return NULL; } - if (!vfio_set_iommu(fd, group->fd, &iommu_type, errp)) { + /* + * During CPR, just set the container type and skip the ioctls, as the + * container and group are already configured in the kernel. + */ + if (!cpr_is_incoming() && + !vfio_set_iommu(fd, group->fd, &iommu_type, errp)) { return NULL; } @@ -511,16 +545,10 @@ static bool vfio_legacy_setup(VFIOContainerBase *bcontainer, Error **errp) return true; } -static bool vfio_container_connect(VFIOGroup *group, AddressSpace *as, - Error **errp) +static bool vfio_container_attach_discard_disable(VFIOContainer *container, + VFIOGroup *group, Error **errp) { - VFIOContainer *container; - VFIOContainerBase *bcontainer; - int ret, fd; - VFIOAddressSpace *space; - VFIOIOMMUClass *vioc; - - space = vfio_address_space_get(as); + int ret; /* * VFIO is currently incompatible with discarding of RAM insofar as the @@ -553,97 +581,152 @@ static bool vfio_container_connect(VFIOGroup *group, AddressSpace *as, * details once we know which type of IOMMU we are using. */ - QLIST_FOREACH(bcontainer, &space->containers, next) { - container = container_of(bcontainer, VFIOContainer, bcontainer); - if (!ioctl(group->fd, VFIO_GROUP_SET_CONTAINER, &container->fd)) { - ret = vfio_ram_block_discard_disable(container, true); - if (ret) { - error_setg_errno(errp, -ret, - "Cannot set discarding of RAM broken"); - if (ioctl(group->fd, VFIO_GROUP_UNSET_CONTAINER, - &container->fd)) { - error_report("vfio: error disconnecting group %d from" - " container", group->groupid); - } - return false; - } - group->container = container; - QLIST_INSERT_HEAD(&container->group_list, group, container_next); - vfio_group_add_kvm_device(group); - return true; + ret = vfio_ram_block_discard_disable(container, true); + if (ret) { + error_setg_errno(errp, -ret, "Cannot set discarding of RAM broken"); + if (ioctl(group->fd, VFIO_GROUP_UNSET_CONTAINER, &container->fd)) { + error_report("vfio: error disconnecting group %d from" + " container", group->groupid); } } + return !ret; +} - fd = qemu_open("/dev/vfio/vfio", O_RDWR, errp); - if (fd < 0) { - goto put_space_exit; +static bool vfio_container_group_add(VFIOContainer *container, VFIOGroup *group, + Error **errp) +{ + if (!vfio_container_attach_discard_disable(container, group, errp)) { + return false; + } + group->container = container; + QLIST_INSERT_HEAD(&container->group_list, group, container_next); + vfio_group_add_kvm_device(group); + /* + * Remember the container fd for each group, so we can attach to the same + * container after CPR. + */ + cpr_resave_fd("vfio_container_for_group", group->groupid, container->fd); + return true; +} + +static void vfio_container_group_del(VFIOContainer *container, VFIOGroup *group) +{ + QLIST_REMOVE(group, container_next); + group->container = NULL; + vfio_group_del_kvm_device(group); + vfio_ram_block_discard_disable(container, false); + cpr_delete_fd("vfio_container_for_group", group->groupid); +} + +static bool vfio_container_connect(VFIOGroup *group, AddressSpace *as, + Error **errp) +{ + VFIOContainer *container; + VFIOContainerBase *bcontainer; + int ret, fd = -1; + VFIOAddressSpace *space; + VFIOIOMMUClass *vioc = NULL; + bool new_container = false; + bool group_was_added = false; + + space = vfio_address_space_get(as); + fd = cpr_find_fd("vfio_container_for_group", group->groupid); + + if (!cpr_is_incoming()) { + QLIST_FOREACH(bcontainer, &space->containers, next) { + container = container_of(bcontainer, VFIOContainer, bcontainer); + if (!ioctl(group->fd, VFIO_GROUP_SET_CONTAINER, &container->fd)) { + return vfio_container_group_add(container, group, errp); + } + } + + fd = qemu_open("/dev/vfio/vfio", O_RDWR, errp); + if (fd < 0) { + goto fail; + } + } else { + /* + * For incoming CPR, the group is already attached in the kernel. + * If a container with matching fd is found, then update the + * userland group list and return. If not, then after the loop, + * create the container struct and group list. + */ + QLIST_FOREACH(bcontainer, &space->containers, next) { + container = container_of(bcontainer, VFIOContainer, bcontainer); + + if (vfio_cpr_container_match(container, group, fd)) { + return vfio_container_group_add(container, group, errp); + } + } } ret = ioctl(fd, VFIO_GET_API_VERSION); if (ret != VFIO_API_VERSION) { error_setg(errp, "supported vfio version: %d, " "reported version: %d", VFIO_API_VERSION, ret); - goto close_fd_exit; + goto fail; } container = vfio_create_container(fd, group, errp); if (!container) { - goto close_fd_exit; + goto fail; } + new_container = true; bcontainer = &container->bcontainer; - if (!vfio_cpr_register_container(bcontainer, errp)) { - goto free_container_exit; - } - - ret = vfio_ram_block_discard_disable(container, true); - if (ret) { - error_setg_errno(errp, -ret, "Cannot set discarding of RAM broken"); - goto unregister_container_exit; + if (!vfio_legacy_cpr_register_container(container, errp)) { + goto fail; } vioc = VFIO_IOMMU_GET_CLASS(bcontainer); assert(vioc->setup); if (!vioc->setup(bcontainer, errp)) { - goto enable_discards_exit; + goto fail; } - vfio_group_add_kvm_device(group); - vfio_address_space_insert(space, bcontainer); - group->container = container; - QLIST_INSERT_HEAD(&container->group_list, group, container_next); + if (!vfio_container_group_add(container, group, errp)) { + goto fail; + } + group_was_added = true; - if (!vfio_listener_register(bcontainer, errp)) { - goto listener_release_exit; + /* + * If CPR, register the listener later, after all state that may + * affect regions and mapping boundaries has been cpr load'ed. Later, + * the listener will invoke its callback on each flat section and call + * dma_map to supply the new vaddr, and the calls will match the mappings + * remembered by the kernel. + */ + if (!cpr_is_incoming()) { + if (!vfio_listener_register(bcontainer, errp)) { + goto fail; + } } bcontainer->initialized = true; return true; -listener_release_exit: - QLIST_REMOVE(group, container_next); - vfio_group_del_kvm_device(group); - vfio_listener_unregister(bcontainer); - if (vioc->release) { - vioc->release(bcontainer); - } - -enable_discards_exit: - vfio_ram_block_discard_disable(container, false); - -unregister_container_exit: - vfio_cpr_unregister_container(bcontainer); -free_container_exit: - object_unref(container); - -close_fd_exit: - close(fd); +fail: + if (new_container) { + vfio_listener_unregister(bcontainer); + } -put_space_exit: + if (group_was_added) { + vfio_container_group_del(container, group); + } + if (vioc && vioc->release) { + vioc->release(bcontainer); + } + if (new_container) { + vfio_legacy_cpr_unregister_container(container); + object_unref(container); + } + if (fd >= 0) { + close(fd); + } vfio_address_space_put(space); return false; @@ -657,6 +740,7 @@ static void vfio_container_disconnect(VFIOGroup *group) QLIST_REMOVE(group, container_next); group->container = NULL; + cpr_delete_fd("vfio_container_for_group", group->groupid); /* * Explicitly release the listener first before unset container, @@ -679,7 +763,7 @@ static void vfio_container_disconnect(VFIOGroup *group) VFIOAddressSpace *space = bcontainer->space; trace_vfio_container_disconnect(container->fd); - vfio_cpr_unregister_container(bcontainer); + vfio_legacy_cpr_unregister_container(container); close(container->fd); object_unref(container); @@ -710,7 +794,7 @@ static VFIOGroup *vfio_group_get(int groupid, AddressSpace *as, Error **errp) group = g_malloc0(sizeof(*group)); snprintf(path, sizeof(path), "/dev/vfio/%d", groupid); - group->fd = qemu_open(path, O_RDWR, errp); + group->fd = cpr_open_fd(path, O_RDWR, "vfio_group", groupid, errp); if (group->fd < 0) { goto free_group_exit; } @@ -742,6 +826,7 @@ static VFIOGroup *vfio_group_get(int groupid, AddressSpace *as, Error **errp) return group; close_fd_exit: + cpr_delete_fd("vfio_group", groupid); close(group->fd); free_group_exit: @@ -763,6 +848,7 @@ static void vfio_group_put(VFIOGroup *group) vfio_container_disconnect(group); QLIST_REMOVE(group, next); trace_vfio_group_put(group->fd); + cpr_delete_fd("vfio_group", group->groupid); close(group->fd); g_free(group); } @@ -773,7 +859,7 @@ static bool vfio_device_get(VFIOGroup *group, const char *name, g_autofree struct vfio_device_info *info = NULL; int fd; - fd = ioctl(group->fd, VFIO_GROUP_GET_DEVICE_FD, name); + fd = vfio_cpr_group_get_device_fd(group->fd, name); if (fd < 0) { error_setg_errno(errp, errno, "error getting device from group %d", group->groupid); @@ -786,8 +872,7 @@ static bool vfio_device_get(VFIOGroup *group, const char *name, info = vfio_get_device_info(fd); if (!info) { error_setg_errno(errp, errno, "error getting device info"); - close(fd); - return false; + goto fail; } /* @@ -801,8 +886,7 @@ static bool vfio_device_get(VFIOGroup *group, const char *name, if (!QLIST_EMPTY(&group->device_list)) { error_setg(errp, "Inconsistent setting of support for discarding " "RAM (e.g., balloon) within group"); - close(fd); - return false; + goto fail; } if (!group->ram_block_discard_allowed) { @@ -811,19 +895,20 @@ static bool vfio_device_get(VFIOGroup *group, const char *name, } } + vfio_device_prepare(vbasedev, &group->container->bcontainer, info); + vbasedev->fd = fd; vbasedev->group = group; QLIST_INSERT_HEAD(&group->device_list, vbasedev, next); - vbasedev->num_irqs = info->num_irqs; - vbasedev->num_regions = info->num_regions; - vbasedev->flags = info->flags; - trace_vfio_device_get(name, info->flags, info->num_regions, info->num_irqs); - vbasedev->reset_works = !!(info->flags & VFIO_DEVICE_FLAGS_RESET); - return true; + +fail: + close(fd); + cpr_delete_fd(name, 0); + return false; } static void vfio_device_put(VFIODevice *vbasedev) @@ -834,6 +919,7 @@ static void vfio_device_put(VFIODevice *vbasedev) QLIST_REMOVE(vbasedev, next); vbasedev->group = NULL; trace_vfio_device_put(vbasedev->fd); + cpr_delete_fd(vbasedev->name, 0); close(vbasedev->fd); } @@ -875,7 +961,6 @@ static bool vfio_legacy_attach_device(const char *name, VFIODevice *vbasedev, int groupid = vfio_device_get_groupid(vbasedev, errp); VFIODevice *vbasedev_iter; VFIOGroup *group; - VFIOContainerBase *bcontainer; if (groupid < 0) { return false; @@ -904,13 +989,19 @@ static bool vfio_legacy_attach_device(const char *name, VFIODevice *vbasedev, goto device_put_exit; } - bcontainer = &group->container->bcontainer; - vbasedev->bcontainer = bcontainer; - QLIST_INSERT_HEAD(&bcontainer->device_list, vbasedev, container_next); - QLIST_INSERT_HEAD(&vfio_device_list, vbasedev, global_next); + if (vbasedev->mdev) { + error_setg(&vbasedev->cpr.mdev_blocker, + "CPR does not support vfio mdev %s", vbasedev->name); + if (migrate_add_blocker_modes(&vbasedev->cpr.mdev_blocker, errp, + MIG_MODE_CPR_TRANSFER, -1) < 0) { + goto hiod_unref_exit; + } + } return true; +hiod_unref_exit: + object_unref(vbasedev->hiod); device_put_exit: vfio_device_put(vbasedev); group_put_exit: @@ -922,10 +1013,11 @@ static void vfio_legacy_detach_device(VFIODevice *vbasedev) { VFIOGroup *group = vbasedev->group; - QLIST_REMOVE(vbasedev, global_next); - QLIST_REMOVE(vbasedev, container_next); - vbasedev->bcontainer = NULL; trace_vfio_device_detach(vbasedev->name, group->groupid); + + vfio_device_unprepare(vbasedev); + + migrate_del_blocker(&vbasedev->cpr.mdev_blocker); object_unref(vbasedev->hiod); vfio_device_put(vbasedev); vfio_group_put(group); diff --git a/hw/vfio/cpr-iommufd.c b/hw/vfio/cpr-iommufd.c new file mode 100644 index 0000000..148a06d --- /dev/null +++ b/hw/vfio/cpr-iommufd.c @@ -0,0 +1,225 @@ +/* + * Copyright (c) 2024-2025 Oracle and/or its affiliates. + * + * SPDX-License-Identifier: GPL-2.0-or-later + */ + +#include "qemu/osdep.h" +#include "qemu/error-report.h" +#include "qapi/error.h" +#include "hw/vfio/vfio-cpr.h" +#include "hw/vfio/vfio-device.h" +#include "migration/blocker.h" +#include "migration/cpr.h" +#include "migration/migration.h" +#include "migration/vmstate.h" +#include "system/iommufd.h" +#include "vfio-iommufd.h" +#include "trace.h" + +typedef struct CprVFIODevice { + char *name; + unsigned int namelen; + uint32_t ioas_id; + int devid; + uint32_t hwpt_id; + QLIST_ENTRY(CprVFIODevice) next; +} CprVFIODevice; + +static const VMStateDescription vmstate_cpr_vfio_device = { + .name = "cpr vfio device", + .version_id = 1, + .minimum_version_id = 1, + .fields = (VMStateField[]) { + VMSTATE_UINT32(namelen, CprVFIODevice), + VMSTATE_VBUFFER_ALLOC_UINT32(name, CprVFIODevice, 0, NULL, namelen), + VMSTATE_INT32(devid, CprVFIODevice), + VMSTATE_UINT32(ioas_id, CprVFIODevice), + VMSTATE_UINT32(hwpt_id, CprVFIODevice), + VMSTATE_END_OF_LIST() + } +}; + +const VMStateDescription vmstate_cpr_vfio_devices = { + .name = CPR_STATE "/vfio devices", + .version_id = 1, + .minimum_version_id = 1, + .fields = (const VMStateField[]){ + VMSTATE_QLIST_V(vfio_devices, CprState, 1, vmstate_cpr_vfio_device, + CprVFIODevice, next), + VMSTATE_END_OF_LIST() + } +}; + +static void vfio_cpr_save_device(VFIODevice *vbasedev) +{ + CprVFIODevice *elem = g_new0(CprVFIODevice, 1); + + elem->name = g_strdup(vbasedev->name); + elem->namelen = strlen(vbasedev->name) + 1; + elem->ioas_id = vbasedev->cpr.ioas_id; + elem->devid = vbasedev->devid; + elem->hwpt_id = vbasedev->cpr.hwpt_id; + QLIST_INSERT_HEAD(&cpr_state.vfio_devices, elem, next); +} + +static CprVFIODevice *find_device(const char *name) +{ + CprVFIODeviceList *head = &cpr_state.vfio_devices; + CprVFIODevice *elem; + + QLIST_FOREACH(elem, head, next) { + if (!strcmp(elem->name, name)) { + return elem; + } + } + return NULL; +} + +static void vfio_cpr_delete_device(const char *name) +{ + CprVFIODevice *elem = find_device(name); + + if (elem) { + QLIST_REMOVE(elem, next); + g_free(elem->name); + g_free(elem); + } +} + +static bool vfio_cpr_find_device(VFIODevice *vbasedev) +{ + CprVFIODevice *elem = find_device(vbasedev->name); + + if (elem) { + vbasedev->cpr.ioas_id = elem->ioas_id; + vbasedev->devid = elem->devid; + vbasedev->cpr.hwpt_id = elem->hwpt_id; + trace_vfio_cpr_find_device(elem->ioas_id, elem->devid, elem->hwpt_id); + return true; + } + return false; +} + +static bool vfio_cpr_supported(IOMMUFDBackend *be, Error **errp) +{ + if (!iommufd_change_process_capable(be)) { + if (errp) { + error_setg(errp, "vfio iommufd backend does not support " + "IOMMU_IOAS_CHANGE_PROCESS"); + } + return false; + } + return true; +} + +static int iommufd_cpr_pre_save(void *opaque) +{ + IOMMUFDBackend *be = opaque; + + /* + * The process has not changed yet, but proactively try the ioctl, + * and it will fail if any DMA mappings are not supported. + */ + if (!iommufd_change_process_capable(be)) { + error_report("some memory regions do not support " + "IOMMU_IOAS_CHANGE_PROCESS"); + return -1; + } + return 0; +} + +static int iommufd_cpr_post_load(void *opaque, int version_id) +{ + IOMMUFDBackend *be = opaque; + Error *local_err = NULL; + + if (!iommufd_change_process(be, &local_err)) { + error_report_err(local_err); + return -1; + } + return 0; +} + +static const VMStateDescription iommufd_cpr_vmstate = { + .name = "iommufd", + .version_id = 0, + .minimum_version_id = 0, + .pre_save = iommufd_cpr_pre_save, + .post_load = iommufd_cpr_post_load, + .needed = cpr_incoming_needed, + .fields = (VMStateField[]) { + VMSTATE_END_OF_LIST() + } +}; + +bool vfio_iommufd_cpr_register_iommufd(IOMMUFDBackend *be, Error **errp) +{ + Error **cpr_blocker = &be->cpr_blocker; + + if (!vfio_cpr_supported(be, cpr_blocker)) { + return migrate_add_blocker_modes(cpr_blocker, errp, + MIG_MODE_CPR_TRANSFER, -1) == 0; + } + + vmstate_register(NULL, -1, &iommufd_cpr_vmstate, be); + + return true; +} + +void vfio_iommufd_cpr_unregister_iommufd(IOMMUFDBackend *be) +{ + vmstate_unregister(NULL, &iommufd_cpr_vmstate, be); + migrate_del_blocker(&be->cpr_blocker); +} + +bool vfio_iommufd_cpr_register_container(VFIOIOMMUFDContainer *container, + Error **errp) +{ + VFIOContainerBase *bcontainer = &container->bcontainer; + + migration_add_notifier_mode(&bcontainer->cpr_reboot_notifier, + vfio_cpr_reboot_notifier, + MIG_MODE_CPR_REBOOT); + + vfio_cpr_add_kvm_notifier(); + + return true; +} + +void vfio_iommufd_cpr_unregister_container(VFIOIOMMUFDContainer *container) +{ + VFIOContainerBase *bcontainer = &container->bcontainer; + + migration_remove_notifier(&bcontainer->cpr_reboot_notifier); +} + +void vfio_iommufd_cpr_register_device(VFIODevice *vbasedev) +{ + if (!cpr_is_incoming()) { + /* + * Beware fd may have already been saved by vfio_device_set_fd, + * so call resave to avoid a duplicate entry. + */ + cpr_resave_fd(vbasedev->name, 0, vbasedev->fd); + vfio_cpr_save_device(vbasedev); + } +} + +void vfio_iommufd_cpr_unregister_device(VFIODevice *vbasedev) +{ + cpr_delete_fd(vbasedev->name, 0); + vfio_cpr_delete_device(vbasedev->name); +} + +void vfio_cpr_load_device(VFIODevice *vbasedev) +{ + if (cpr_is_incoming()) { + bool ret = vfio_cpr_find_device(vbasedev); + g_assert(ret); + + if (vbasedev->fd < 0) { + vbasedev->fd = cpr_find_fd(vbasedev->name, 0); + } + } +} diff --git a/hw/vfio/cpr-legacy.c b/hw/vfio/cpr-legacy.c new file mode 100644 index 0000000..553b203 --- /dev/null +++ b/hw/vfio/cpr-legacy.c @@ -0,0 +1,284 @@ +/* + * Copyright (c) 2021-2025 Oracle and/or its affiliates. + * + * SPDX-License-Identifier: GPL-2.0-or-later + */ + +#include <sys/ioctl.h> +#include <linux/vfio.h> +#include "qemu/osdep.h" +#include "hw/vfio/vfio-container.h" +#include "hw/vfio/vfio-device.h" +#include "hw/vfio/vfio-listener.h" +#include "migration/blocker.h" +#include "migration/cpr.h" +#include "migration/migration.h" +#include "migration/vmstate.h" +#include "qapi/error.h" +#include "qemu/error-report.h" + +static bool vfio_dma_unmap_vaddr_all(VFIOContainer *container, Error **errp) +{ + struct vfio_iommu_type1_dma_unmap unmap = { + .argsz = sizeof(unmap), + .flags = VFIO_DMA_UNMAP_FLAG_VADDR | VFIO_DMA_UNMAP_FLAG_ALL, + .iova = 0, + .size = 0, + }; + if (ioctl(container->fd, VFIO_IOMMU_UNMAP_DMA, &unmap)) { + error_setg_errno(errp, errno, "vfio_dma_unmap_vaddr_all"); + return false; + } + container->cpr.vaddr_unmapped = true; + return true; +} + +/* + * Set the new @vaddr for any mappings registered during cpr load. + * The incoming state is cleared thereafter. + */ +static int vfio_legacy_cpr_dma_map(const VFIOContainerBase *bcontainer, + hwaddr iova, ram_addr_t size, void *vaddr, + bool readonly, MemoryRegion *mr) +{ + const VFIOContainer *container = container_of(bcontainer, VFIOContainer, + bcontainer); + struct vfio_iommu_type1_dma_map map = { + .argsz = sizeof(map), + .flags = VFIO_DMA_MAP_FLAG_VADDR, + .vaddr = (__u64)(uintptr_t)vaddr, + .iova = iova, + .size = size, + }; + + g_assert(cpr_is_incoming()); + + if (ioctl(container->fd, VFIO_IOMMU_MAP_DMA, &map)) { + return -errno; + } + + return 0; +} + +static void vfio_region_remap(MemoryListener *listener, + MemoryRegionSection *section) +{ + VFIOContainer *container = container_of(listener, VFIOContainer, + cpr.remap_listener); + vfio_container_region_add(&container->bcontainer, section, true); +} + +static bool vfio_cpr_supported(VFIOContainer *container, Error **errp) +{ + if (!ioctl(container->fd, VFIO_CHECK_EXTENSION, VFIO_UPDATE_VADDR)) { + error_setg(errp, "VFIO container does not support VFIO_UPDATE_VADDR"); + return false; + + } else if (!ioctl(container->fd, VFIO_CHECK_EXTENSION, VFIO_UNMAP_ALL)) { + error_setg(errp, "VFIO container does not support VFIO_UNMAP_ALL"); + return false; + + } else { + return true; + } +} + +static int vfio_container_pre_save(void *opaque) +{ + VFIOContainer *container = opaque; + Error *local_err = NULL; + + if (!vfio_dma_unmap_vaddr_all(container, &local_err)) { + error_report_err(local_err); + return -1; + } + return 0; +} + +static int vfio_container_post_load(void *opaque, int version_id) +{ + VFIOContainer *container = opaque; + VFIOContainerBase *bcontainer = &container->bcontainer; + VFIOIOMMUClass *vioc = VFIO_IOMMU_GET_CLASS(bcontainer); + dma_map_fn saved_dma_map = vioc->dma_map; + Error *local_err = NULL; + + /* During incoming CPR, divert calls to dma_map. */ + vioc->dma_map = vfio_legacy_cpr_dma_map; + + if (!vfio_listener_register(bcontainer, &local_err)) { + error_report_err(local_err); + return -1; + } + + /* Restore original dma_map function */ + vioc->dma_map = saved_dma_map; + + return 0; +} + +static const VMStateDescription vfio_container_vmstate = { + .name = "vfio-container", + .version_id = 0, + .minimum_version_id = 0, + .priority = MIG_PRI_LOW, /* Must happen after devices and groups */ + .pre_save = vfio_container_pre_save, + .post_load = vfio_container_post_load, + .needed = cpr_incoming_needed, + .fields = (VMStateField[]) { + VMSTATE_END_OF_LIST() + } +}; + +static int vfio_cpr_fail_notifier(NotifierWithReturn *notifier, + MigrationEvent *e, Error **errp) +{ + VFIOContainer *container = + container_of(notifier, VFIOContainer, cpr.transfer_notifier); + VFIOContainerBase *bcontainer = &container->bcontainer; + + if (e->type != MIG_EVENT_PRECOPY_FAILED) { + return 0; + } + + if (container->cpr.vaddr_unmapped) { + /* + * Force a call to vfio_region_remap for each mapped section by + * temporarily registering a listener, and temporarily diverting + * dma_map to vfio_legacy_cpr_dma_map. The latter restores vaddr. + */ + + VFIOIOMMUClass *vioc = VFIO_IOMMU_GET_CLASS(bcontainer); + dma_map_fn saved_dma_map = vioc->dma_map; + vioc->dma_map = vfio_legacy_cpr_dma_map; + + container->cpr.remap_listener = (MemoryListener) { + .name = "vfio cpr recover", + .region_add = vfio_region_remap + }; + memory_listener_register(&container->cpr.remap_listener, + bcontainer->space->as); + memory_listener_unregister(&container->cpr.remap_listener); + container->cpr.vaddr_unmapped = false; + vioc->dma_map = saved_dma_map; + } + return 0; +} + +bool vfio_legacy_cpr_register_container(VFIOContainer *container, Error **errp) +{ + VFIOContainerBase *bcontainer = &container->bcontainer; + Error **cpr_blocker = &container->cpr.blocker; + + migration_add_notifier_mode(&bcontainer->cpr_reboot_notifier, + vfio_cpr_reboot_notifier, + MIG_MODE_CPR_REBOOT); + + if (!vfio_cpr_supported(container, cpr_blocker)) { + return migrate_add_blocker_modes(cpr_blocker, errp, + MIG_MODE_CPR_TRANSFER, -1) == 0; + } + + vfio_cpr_add_kvm_notifier(); + + vmstate_register(NULL, -1, &vfio_container_vmstate, container); + + migration_add_notifier_mode(&container->cpr.transfer_notifier, + vfio_cpr_fail_notifier, + MIG_MODE_CPR_TRANSFER); + return true; +} + +void vfio_legacy_cpr_unregister_container(VFIOContainer *container) +{ + VFIOContainerBase *bcontainer = &container->bcontainer; + + migration_remove_notifier(&bcontainer->cpr_reboot_notifier); + migrate_del_blocker(&container->cpr.blocker); + vmstate_unregister(NULL, &vfio_container_vmstate, container); + migration_remove_notifier(&container->cpr.transfer_notifier); +} + +/* + * In old QEMU, VFIO_DMA_UNMAP_FLAG_VADDR may fail on some mapping after + * succeeding for others, so the latter have lost their vaddr. Call this + * to restore vaddr for a section with a giommu. + * + * The giommu already exists. Find it and replay it, which calls + * vfio_legacy_cpr_dma_map further down the stack. + */ +void vfio_cpr_giommu_remap(VFIOContainerBase *bcontainer, + MemoryRegionSection *section) +{ + VFIOGuestIOMMU *giommu = NULL; + hwaddr as_offset = section->offset_within_address_space; + hwaddr iommu_offset = as_offset - section->offset_within_region; + + QLIST_FOREACH(giommu, &bcontainer->giommu_list, giommu_next) { + if (giommu->iommu_mr == IOMMU_MEMORY_REGION(section->mr) && + giommu->iommu_offset == iommu_offset) { + break; + } + } + g_assert(giommu); + memory_region_iommu_replay(giommu->iommu_mr, &giommu->n); +} + +/* + * In old QEMU, VFIO_DMA_UNMAP_FLAG_VADDR may fail on some mapping after + * succeeding for others, so the latter have lost their vaddr. Call this + * to restore vaddr for a section with a RamDiscardManager. + * + * The ram discard listener already exists. Call its populate function + * directly, which calls vfio_legacy_cpr_dma_map. + */ +bool vfio_cpr_ram_discard_register_listener(VFIOContainerBase *bcontainer, + MemoryRegionSection *section) +{ + VFIORamDiscardListener *vrdl = + vfio_find_ram_discard_listener(bcontainer, section); + + g_assert(vrdl); + return vrdl->listener.notify_populate(&vrdl->listener, section) == 0; +} + +int vfio_cpr_group_get_device_fd(int d, const char *name) +{ + const int id = 0; + int fd = cpr_find_fd(name, id); + + if (fd < 0) { + fd = ioctl(d, VFIO_GROUP_GET_DEVICE_FD, name); + if (fd >= 0) { + cpr_save_fd(name, id, fd); + } + } + return fd; +} + +static bool same_device(int fd1, int fd2) +{ + struct stat st1, st2; + + return !fstat(fd1, &st1) && !fstat(fd2, &st2) && st1.st_dev == st2.st_dev; +} + +bool vfio_cpr_container_match(VFIOContainer *container, VFIOGroup *group, + int fd) +{ + if (container->fd == fd) { + return true; + } + if (!same_device(container->fd, fd)) { + return false; + } + /* + * Same device, different fd. This occurs when the container fd is + * cpr_save'd multiple times, once for each groupid, so SCM_RIGHTS + * produces duplicates. De-dup it. + */ + cpr_delete_fd("vfio_container_for_group", group->groupid); + close(fd); + cpr_save_fd("vfio_container_for_group", group->groupid, container->fd); + return true; +} diff --git a/hw/vfio/cpr.c b/hw/vfio/cpr.c index 3214184..384b56c 100644 --- a/hw/vfio/cpr.c +++ b/hw/vfio/cpr.c @@ -7,13 +7,16 @@ #include "qemu/osdep.h" #include "hw/vfio/vfio-device.h" -#include "migration/misc.h" +#include "hw/vfio/vfio-cpr.h" +#include "hw/vfio/pci.h" +#include "hw/pci/msix.h" +#include "hw/pci/msi.h" +#include "migration/cpr.h" #include "qapi/error.h" #include "system/runstate.h" -#include "vfio-cpr.h" -static int vfio_cpr_reboot_notifier(NotifierWithReturn *notifier, - MigrationEvent *e, Error **errp) +int vfio_cpr_reboot_notifier(NotifierWithReturn *notifier, + MigrationEvent *e, Error **errp) { if (e->type == MIG_EVENT_PRECOPY_SETUP && !runstate_check(RUN_STATE_SUSPENDED) && !vm_get_suspended()) { @@ -26,15 +29,174 @@ static int vfio_cpr_reboot_notifier(NotifierWithReturn *notifier, return 0; } -bool vfio_cpr_register_container(VFIOContainerBase *bcontainer, Error **errp) +#define STRDUP_VECTOR_FD_NAME(vdev, name) \ + g_strdup_printf("%s_%s", (vdev)->vbasedev.name, (name)) + +void vfio_cpr_save_vector_fd(VFIOPCIDevice *vdev, const char *name, int nr, + int fd) +{ + g_autofree char *fdname = STRDUP_VECTOR_FD_NAME(vdev, name); + cpr_save_fd(fdname, nr, fd); +} + +int vfio_cpr_load_vector_fd(VFIOPCIDevice *vdev, const char *name, int nr) +{ + g_autofree char *fdname = STRDUP_VECTOR_FD_NAME(vdev, name); + return cpr_find_fd(fdname, nr); +} + +void vfio_cpr_delete_vector_fd(VFIOPCIDevice *vdev, const char *name, int nr) +{ + g_autofree char *fdname = STRDUP_VECTOR_FD_NAME(vdev, name); + cpr_delete_fd(fdname, nr); +} + +static void vfio_cpr_claim_vectors(VFIOPCIDevice *vdev, int nr_vectors, + bool msix) +{ + int i, fd; + bool pending = false; + PCIDevice *pdev = &vdev->pdev; + + vdev->nr_vectors = nr_vectors; + vdev->msi_vectors = g_new0(VFIOMSIVector, nr_vectors); + vdev->interrupt = msix ? VFIO_INT_MSIX : VFIO_INT_MSI; + + vfio_pci_prepare_kvm_msi_virq_batch(vdev); + + for (i = 0; i < nr_vectors; i++) { + VFIOMSIVector *vector = &vdev->msi_vectors[i]; + + fd = vfio_cpr_load_vector_fd(vdev, "interrupt", i); + if (fd >= 0) { + vfio_pci_vector_init(vdev, i); + vfio_pci_msi_set_handler(vdev, i); + } + + if (vfio_cpr_load_vector_fd(vdev, "kvm_interrupt", i) >= 0) { + vfio_pci_add_kvm_msi_virq(vdev, vector, i, msix); + } else { + vdev->msi_vectors[i].virq = -1; + } + + if (msix && msix_is_pending(pdev, i) && msix_is_masked(pdev, i)) { + set_bit(i, vdev->msix->pending); + pending = true; + } + } + + vfio_pci_commit_kvm_msi_virq_batch(vdev); + + if (msix) { + memory_region_set_enabled(&pdev->msix_pba_mmio, pending); + } +} + +/* + * The kernel may change non-emulated config bits. Exclude them from the + * changed-bits check in get_pci_config_device. + */ +static int vfio_cpr_pci_pre_load(void *opaque) +{ + VFIOPCIDevice *vdev = opaque; + PCIDevice *pdev = &vdev->pdev; + int size = MIN(pci_config_size(pdev), vdev->config_size); + int i; + + for (i = 0; i < size; i++) { + pdev->cmask[i] &= vdev->emulated_config_bits[i]; + } + + return 0; +} + +static int vfio_cpr_pci_post_load(void *opaque, int version_id) +{ + VFIOPCIDevice *vdev = opaque; + PCIDevice *pdev = &vdev->pdev; + int nr_vectors; + + vfio_sub_page_bar_update_mappings(vdev); + + if (msix_enabled(pdev)) { + vfio_pci_msix_set_notifiers(vdev); + nr_vectors = vdev->msix->entries; + vfio_cpr_claim_vectors(vdev, nr_vectors, true); + + } else if (msi_enabled(pdev)) { + nr_vectors = msi_nr_vectors_allocated(pdev); + vfio_cpr_claim_vectors(vdev, nr_vectors, false); + + } else if (vfio_pci_read_config(pdev, PCI_INTERRUPT_PIN, 1)) { + Error *local_err = NULL; + if (!vfio_pci_intx_enable(vdev, &local_err)) { + error_report_err(local_err); + return -1; + } + } + + return 0; +} + +static bool pci_msix_present(void *opaque, int version_id) { - migration_add_notifier_mode(&bcontainer->cpr_reboot_notifier, - vfio_cpr_reboot_notifier, - MIG_MODE_CPR_REBOOT); - return true; + PCIDevice *pdev = opaque; + + return msix_present(pdev); +} + +static const VMStateDescription vfio_intx_vmstate = { + .name = "vfio-cpr-intx", + .version_id = 0, + .minimum_version_id = 0, + .fields = (VMStateField[]) { + VMSTATE_BOOL(pending, VFIOINTx), + VMSTATE_UINT32(route.mode, VFIOINTx), + VMSTATE_INT32(route.irq, VFIOINTx), + VMSTATE_END_OF_LIST() + } +}; + +#define VMSTATE_VFIO_INTX(_field, _state) { \ + .name = (stringify(_field)), \ + .size = sizeof(VFIOINTx), \ + .vmsd = &vfio_intx_vmstate, \ + .flags = VMS_STRUCT, \ + .offset = vmstate_offset_value(_state, _field, VFIOINTx), \ +} + +const VMStateDescription vfio_cpr_pci_vmstate = { + .name = "vfio-cpr-pci", + .version_id = 0, + .minimum_version_id = 0, + .pre_load = vfio_cpr_pci_pre_load, + .post_load = vfio_cpr_pci_post_load, + .needed = cpr_incoming_needed, + .fields = (VMStateField[]) { + VMSTATE_PCI_DEVICE(pdev, VFIOPCIDevice), + VMSTATE_MSIX_TEST(pdev, VFIOPCIDevice, pci_msix_present), + VMSTATE_VFIO_INTX(intx, VFIOPCIDevice), + VMSTATE_END_OF_LIST() + } +}; + +static NotifierWithReturn kvm_close_notifier; + +static int vfio_cpr_kvm_close_notifier(NotifierWithReturn *notifier, + MigrationEvent *e, + Error **errp) +{ + if (e->type == MIG_EVENT_PRECOPY_DONE) { + vfio_kvm_device_close(); + } + return 0; } -void vfio_cpr_unregister_container(VFIOContainerBase *bcontainer) +void vfio_cpr_add_kvm_notifier(void) { - migration_remove_notifier(&bcontainer->cpr_reboot_notifier); + if (!kvm_close_notifier.notify) { + migration_add_notifier_mode(&kvm_close_notifier, + vfio_cpr_kvm_close_notifier, + MIG_MODE_CPR_TRANSFER); + } } diff --git a/hw/vfio/device.c b/hw/vfio/device.c index d625a7c..52a1996 100644 --- a/hw/vfio/device.c +++ b/hw/vfio/device.c @@ -28,6 +28,8 @@ #include "qapi/error.h" #include "qemu/error-report.h" #include "qemu/units.h" +#include "migration/cpr.h" +#include "migration/blocker.h" #include "monitor/monitor.h" #include "vfio-helpers.h" @@ -82,7 +84,7 @@ void vfio_device_irq_disable(VFIODevice *vbasedev, int index) .count = 0, }; - ioctl(vbasedev->fd, VFIO_DEVICE_SET_IRQS, &irq_set); + vbasedev->io_ops->set_irqs(vbasedev, &irq_set); } void vfio_device_irq_unmask(VFIODevice *vbasedev, int index) @@ -95,7 +97,7 @@ void vfio_device_irq_unmask(VFIODevice *vbasedev, int index) .count = 1, }; - ioctl(vbasedev->fd, VFIO_DEVICE_SET_IRQS, &irq_set); + vbasedev->io_ops->set_irqs(vbasedev, &irq_set); } void vfio_device_irq_mask(VFIODevice *vbasedev, int index) @@ -108,7 +110,7 @@ void vfio_device_irq_mask(VFIODevice *vbasedev, int index) .count = 1, }; - ioctl(vbasedev->fd, VFIO_DEVICE_SET_IRQS, &irq_set); + vbasedev->io_ops->set_irqs(vbasedev, &irq_set); } static inline const char *action_to_str(int action) @@ -167,7 +169,7 @@ bool vfio_device_irq_set_signaling(VFIODevice *vbasedev, int index, int subindex pfd = (int32_t *)&irq_set->data; *pfd = fd; - if (!ioctl(vbasedev->fd, VFIO_DEVICE_SET_IRQS, irq_set)) { + if (!vbasedev->io_ops->set_irqs(vbasedev, irq_set)) { return true; } @@ -185,10 +187,29 @@ bool vfio_device_irq_set_signaling(VFIODevice *vbasedev, int index, int subindex return false; } +int vfio_device_get_irq_info(VFIODevice *vbasedev, int index, + struct vfio_irq_info *info) +{ + memset(info, 0, sizeof(*info)); + + info->argsz = sizeof(*info); + info->index = index; + + return vbasedev->io_ops->get_irq_info(vbasedev, info); +} + int vfio_device_get_region_info(VFIODevice *vbasedev, int index, struct vfio_region_info **info) { size_t argsz = sizeof(struct vfio_region_info); + int fd = -1; + int ret; + + /* check cache */ + if (vbasedev->reginfo[index] != NULL) { + *info = vbasedev->reginfo[index]; + return 0; + } *info = g_malloc0(argsz); @@ -196,22 +217,41 @@ int vfio_device_get_region_info(VFIODevice *vbasedev, int index, retry: (*info)->argsz = argsz; - if (ioctl(vbasedev->fd, VFIO_DEVICE_GET_REGION_INFO, *info)) { + ret = vbasedev->io_ops->get_region_info(vbasedev, *info, &fd); + if (ret != 0) { g_free(*info); *info = NULL; - return -errno; + return ret; } if ((*info)->argsz > argsz) { argsz = (*info)->argsz; *info = g_realloc(*info, argsz); + if (fd != -1) { + close(fd); + fd = -1; + } + goto retry; } + /* fill cache */ + vbasedev->reginfo[index] = *info; + if (vbasedev->region_fds != NULL) { + vbasedev->region_fds[index] = fd; + } + return 0; } +int vfio_device_get_region_fd(VFIODevice *vbasedev, int index) +{ + return vbasedev->region_fds ? + vbasedev->region_fds[index] : + vbasedev->fd; +} + int vfio_device_get_region_info_type(VFIODevice *vbasedev, uint32_t type, uint32_t subtype, struct vfio_region_info **info) { @@ -227,7 +267,6 @@ int vfio_device_get_region_info_type(VFIODevice *vbasedev, uint32_t type, hdr = vfio_get_region_info_cap(*info, VFIO_REGION_INFO_CAP_TYPE); if (!hdr) { - g_free(*info); continue; } @@ -239,8 +278,6 @@ int vfio_device_get_region_info_type(VFIODevice *vbasedev, uint32_t type, if (cap_type->type == type && cap_type->subtype == subtype) { return 0; } - - g_free(*info); } *info = NULL; @@ -249,7 +286,7 @@ int vfio_device_get_region_info_type(VFIODevice *vbasedev, uint32_t type, bool vfio_device_has_region_cap(VFIODevice *vbasedev, int region, uint16_t cap_type) { - g_autofree struct vfio_region_info *info = NULL; + struct vfio_region_info *info = NULL; bool ret = false; if (!vfio_device_get_region_info(vbasedev, region, &info)) { @@ -281,37 +318,53 @@ bool vfio_device_get_name(VFIODevice *vbasedev, Error **errp) error_setg(errp, "Use FD passing only with iommufd backend"); return false; } - /* - * Give a name with fd so any function printing out vbasedev->name - * will not break. - */ if (!vbasedev->name) { - vbasedev->name = g_strdup_printf("VFIO_FD%d", vbasedev->fd); + + if (vbasedev->dev->id) { + vbasedev->name = g_strdup(vbasedev->dev->id); + return true; + } else { + /* + * Assign a name so any function printing it will not break. + * The fd number changes across processes, so this cannot be + * used as an invariant name for CPR. + */ + vbasedev->name = g_strdup_printf("VFIO_FD%d", vbasedev->fd); + error_setg(&vbasedev->cpr.id_blocker, + "vfio device with fd=%d needs an id property", + vbasedev->fd); + return migrate_add_blocker_modes(&vbasedev->cpr.id_blocker, + errp, MIG_MODE_CPR_TRANSFER, + -1) == 0; + } } } return true; } -void vfio_device_set_fd(VFIODevice *vbasedev, const char *str, Error **errp) +void vfio_device_free_name(VFIODevice *vbasedev) { - ERRP_GUARD(); - int fd = monitor_fd_param(monitor_cur(), str, errp); + g_clear_pointer(&vbasedev->name, g_free); + migrate_del_blocker(&vbasedev->cpr.id_blocker); +} - if (fd < 0) { - error_prepend(errp, "Could not parse remote object fd %s:", str); - return; - } - vbasedev->fd = fd; +void vfio_device_set_fd(VFIODevice *vbasedev, const char *str, Error **errp) +{ + vbasedev->fd = cpr_get_fd_param(vbasedev->dev->id, str, 0, errp); } +static VFIODeviceIOOps vfio_device_io_ops_ioctl; + void vfio_device_init(VFIODevice *vbasedev, int type, VFIODeviceOps *ops, DeviceState *dev, bool ram_discard) { vbasedev->type = type; vbasedev->ops = ops; + vbasedev->io_ops = &vfio_device_io_ops_ioctl; vbasedev->dev = dev; vbasedev->fd = -1; + vbasedev->use_region_fds = false; vbasedev->ram_block_discard_allowed = ram_discard; } @@ -370,27 +423,35 @@ bool vfio_device_hiod_create_and_realize(VFIODevice *vbasedev, VFIODevice *vfio_get_vfio_device(Object *obj) { if (object_dynamic_cast(obj, TYPE_VFIO_PCI)) { - return &VFIO_PCI(obj)->vbasedev; + return &VFIO_PCI_BASE(obj)->vbasedev; } else { return NULL; } } -bool vfio_device_attach(char *name, VFIODevice *vbasedev, - AddressSpace *as, Error **errp) +bool vfio_device_attach_by_iommu_type(const char *iommu_type, char *name, + VFIODevice *vbasedev, AddressSpace *as, + Error **errp) { const VFIOIOMMUClass *ops = - VFIO_IOMMU_CLASS(object_class_by_name(TYPE_VFIO_IOMMU_LEGACY)); - - if (vbasedev->iommufd) { - ops = VFIO_IOMMU_CLASS(object_class_by_name(TYPE_VFIO_IOMMU_IOMMUFD)); - } + VFIO_IOMMU_CLASS(object_class_by_name(iommu_type)); assert(ops); return ops->attach_device(name, vbasedev, as, errp); } +bool vfio_device_attach(char *name, VFIODevice *vbasedev, + AddressSpace *as, Error **errp) +{ + const char *iommu_type = vbasedev->iommufd ? + TYPE_VFIO_IOMMU_IOMMUFD : + TYPE_VFIO_IOMMU_LEGACY; + + return vfio_device_attach_by_iommu_type(iommu_type, name, vbasedev, + as, errp); +} + void vfio_device_detach(VFIODevice *vbasedev) { if (!vbasedev->bcontainer) { @@ -398,3 +459,136 @@ void vfio_device_detach(VFIODevice *vbasedev) } VFIO_IOMMU_GET_CLASS(vbasedev->bcontainer)->detach_device(vbasedev); } + +void vfio_device_prepare(VFIODevice *vbasedev, VFIOContainerBase *bcontainer, + struct vfio_device_info *info) +{ + int i; + + vbasedev->num_irqs = info->num_irqs; + vbasedev->num_regions = info->num_regions; + vbasedev->flags = info->flags; + vbasedev->reset_works = !!(info->flags & VFIO_DEVICE_FLAGS_RESET); + + vbasedev->bcontainer = bcontainer; + QLIST_INSERT_HEAD(&bcontainer->device_list, vbasedev, container_next); + + QLIST_INSERT_HEAD(&vfio_device_list, vbasedev, global_next); + + vbasedev->reginfo = g_new0(struct vfio_region_info *, + vbasedev->num_regions); + if (vbasedev->use_region_fds) { + vbasedev->region_fds = g_new0(int, vbasedev->num_regions); + for (i = 0; i < vbasedev->num_regions; i++) { + vbasedev->region_fds[i] = -1; + } + } +} + +void vfio_device_unprepare(VFIODevice *vbasedev) +{ + int i; + + for (i = 0; i < vbasedev->num_regions; i++) { + g_free(vbasedev->reginfo[i]); + if (vbasedev->region_fds != NULL && vbasedev->region_fds[i] != -1) { + close(vbasedev->region_fds[i]); + } + } + + g_clear_pointer(&vbasedev->reginfo, g_free); + g_clear_pointer(&vbasedev->region_fds, g_free); + + QLIST_REMOVE(vbasedev, container_next); + QLIST_REMOVE(vbasedev, global_next); + vbasedev->bcontainer = NULL; +} + +/* + * Traditional ioctl() based io + */ + +static int vfio_device_io_device_feature(VFIODevice *vbasedev, + struct vfio_device_feature *feature) +{ + int ret; + + ret = ioctl(vbasedev->fd, VFIO_DEVICE_FEATURE, feature); + + return ret < 0 ? -errno : ret; +} + +static int vfio_device_io_get_region_info(VFIODevice *vbasedev, + struct vfio_region_info *info, + int *fd) +{ + int ret; + + *fd = -1; + + ret = ioctl(vbasedev->fd, VFIO_DEVICE_GET_REGION_INFO, info); + + return ret < 0 ? -errno : ret; +} + +static int vfio_device_io_get_irq_info(VFIODevice *vbasedev, + struct vfio_irq_info *info) +{ + int ret; + + ret = ioctl(vbasedev->fd, VFIO_DEVICE_GET_IRQ_INFO, info); + + return ret < 0 ? -errno : ret; +} + +static int vfio_device_io_set_irqs(VFIODevice *vbasedev, + struct vfio_irq_set *irqs) +{ + int ret; + + ret = ioctl(vbasedev->fd, VFIO_DEVICE_SET_IRQS, irqs); + + return ret < 0 ? -errno : ret; +} + +static int vfio_device_io_region_read(VFIODevice *vbasedev, uint8_t index, + off_t off, uint32_t size, void *data) +{ + struct vfio_region_info *info; + int ret; + + ret = vfio_device_get_region_info(vbasedev, index, &info); + if (ret != 0) { + return ret; + } + + ret = pread(vbasedev->fd, data, size, info->offset + off); + + return ret < 0 ? -errno : ret; +} + +static int vfio_device_io_region_write(VFIODevice *vbasedev, uint8_t index, + off_t off, uint32_t size, void *data, + bool post) +{ + struct vfio_region_info *info; + int ret; + + ret = vfio_device_get_region_info(vbasedev, index, &info); + if (ret != 0) { + return ret; + } + + ret = pwrite(vbasedev->fd, data, size, info->offset + off); + + return ret < 0 ? -errno : ret; +} + +static VFIODeviceIOOps vfio_device_io_ops_ioctl = { + .device_feature = vfio_device_io_device_feature, + .get_region_info = vfio_device_io_get_region_info, + .get_irq_info = vfio_device_io_get_irq_info, + .set_irqs = vfio_device_io_set_irqs, + .region_read = vfio_device_io_region_read, + .region_write = vfio_device_io_region_write, +}; diff --git a/hw/vfio/display.c b/hw/vfio/display.c index 9c6f5aa..faacd90 100644 --- a/hw/vfio/display.c +++ b/hw/vfio/display.c @@ -365,7 +365,7 @@ static bool vfio_display_dmabuf_init(VFIOPCIDevice *vdev, Error **errp) &vfio_display_dmabuf_ops, vdev); if (vdev->enable_ramfb) { - vdev->dpy->ramfb = ramfb_setup(errp); + vdev->dpy->ramfb = ramfb_setup(vdev->use_legacy_x86_rom, errp); if (!vdev->dpy->ramfb) { return false; } @@ -494,7 +494,7 @@ static bool vfio_display_region_init(VFIOPCIDevice *vdev, Error **errp) &vfio_display_region_ops, vdev); if (vdev->enable_ramfb) { - vdev->dpy->ramfb = ramfb_setup(errp); + vdev->dpy->ramfb = ramfb_setup(vdev->use_legacy_x86_rom, errp); if (!vdev->dpy->ramfb) { return false; } diff --git a/hw/vfio/helpers.c b/hw/vfio/helpers.c index d0dbab1..23d13e5 100644 --- a/hw/vfio/helpers.c +++ b/hw/vfio/helpers.c @@ -117,6 +117,17 @@ bool vfio_get_info_dma_avail(struct vfio_iommu_type1_info *info, int vfio_kvm_device_fd = -1; #endif +void vfio_kvm_device_close(void) +{ +#ifdef CONFIG_KVM + kvm_close(); + if (vfio_kvm_device_fd != -1) { + close(vfio_kvm_device_fd); + vfio_kvm_device_fd = -1; + } +#endif +} + int vfio_kvm_device_add_fd(int fd, Error **errp) { #ifdef CONFIG_KVM @@ -198,3 +209,20 @@ retry: return info; } + +bool vfio_arch_wants_loading_config_after_iter(void) +{ + /* + * Starting the config load only after all iterables were loaded (during + * non-iterables loading phase) is required for ARM64 due to this platform + * VFIO dependency on interrupt controller being loaded first. + * + * See commit d329f5032e17 ("vfio: Move the saving of the config space to + * the right place in VFIO migration"). + */ +#if defined(TARGET_ARM) + return true; +#else + return false; +#endif +} diff --git a/hw/vfio/igd.c b/hw/vfio/igd.c index d7e4728..ee0767b 100644 --- a/hw/vfio/igd.c +++ b/hw/vfio/igd.c @@ -103,6 +103,7 @@ static int igd_gen(VFIOPCIDevice *vdev) /* * Unfortunately, Intel changes it's specification quite often. This makes * it impossible to use a suitable default value for unknown devices. + * Return -1 for not applying any generation-specific quirks. */ return -1; } @@ -112,6 +113,7 @@ static int igd_gen(VFIOPCIDevice *vdev) #define IGD_BDSM 0x5c /* Base Data of Stolen Memory */ #define IGD_BDSM_GEN11 0xc0 /* Base Data of Stolen Memory of gen 11 and later */ +#define IGD_GMCH_VGA_DISABLE BIT(1) #define IGD_GMCH_GEN6_GMS_SHIFT 3 /* SNB_GMCH in i915 */ #define IGD_GMCH_GEN6_GMS_MASK 0x1f #define IGD_GMCH_GEN8_GMS_SHIFT 8 /* BDW_GMCH in i915 */ @@ -182,34 +184,25 @@ static bool vfio_pci_igd_opregion_init(VFIOPCIDevice *vdev, trace_vfio_pci_igd_opregion_enabled(vdev->vbasedev.name); - pci_set_long(vdev->pdev.config + IGD_ASLS, 0); - pci_set_long(vdev->pdev.wmask + IGD_ASLS, ~0); - pci_set_long(vdev->emulated_config_bits + IGD_ASLS, ~0); - return true; } -static bool vfio_pci_igd_setup_opregion(VFIOPCIDevice *vdev, Error **errp) +static bool vfio_pci_igd_opregion_detect(VFIOPCIDevice *vdev, + struct vfio_region_info **opregion) { - g_autofree struct vfio_region_info *opregion = NULL; int ret; - /* Hotplugging is not supported for opregion access */ - if (vdev->pdev.qdev.hotplugged) { - error_setg(errp, "IGD OpRegion is not supported on hotplugged device"); - return false; - } - ret = vfio_device_get_region_info_type(&vdev->vbasedev, VFIO_REGION_TYPE_PCI_VENDOR_TYPE | PCI_VENDOR_ID_INTEL, - VFIO_REGION_SUBTYPE_INTEL_IGD_OPREGION, &opregion); + VFIO_REGION_SUBTYPE_INTEL_IGD_OPREGION, opregion); if (ret) { - error_setg_errno(errp, -ret, - "Device does not supports IGD OpRegion feature"); return false; } - if (!vfio_pci_igd_opregion_init(vdev, opregion, errp)) { + /* Hotplugging is not supported for opregion access */ + if (vdev->pdev.qdev.hotplugged) { + warn_report("IGD device detected, but OpRegion is not supported " + "on hotplugged device."); return false; } @@ -355,8 +348,8 @@ static int vfio_pci_igd_lpc_init(VFIOPCIDevice *vdev, static bool vfio_pci_igd_setup_lpc_bridge(VFIOPCIDevice *vdev, Error **errp) { - g_autofree struct vfio_region_info *host = NULL; - g_autofree struct vfio_region_info *lpc = NULL; + struct vfio_region_info *host = NULL; + struct vfio_region_info *lpc = NULL; PCIDevice *lpc_bridge; int ret; @@ -419,6 +412,44 @@ static bool vfio_pci_igd_setup_lpc_bridge(VFIOPCIDevice *vdev, Error **errp) return true; } +static bool vfio_pci_igd_override_gms(int gen, uint32_t gms, uint32_t *gmch) +{ + bool ret = false; + + if (gen == -1) { + error_report("x-igd-gms is not supported on this device"); + } else if (gen < 8) { + if (gms <= 0x10) { + *gmch &= ~(IGD_GMCH_GEN6_GMS_MASK << IGD_GMCH_GEN6_GMS_SHIFT); + *gmch |= gms << IGD_GMCH_GEN6_GMS_SHIFT; + ret = true; + } else { + error_report(QERR_INVALID_PARAMETER_VALUE, "x-igd-gms", "0~0x10"); + } + } else if (gen == 8) { + if (gms <= 0x40) { + *gmch &= ~(IGD_GMCH_GEN8_GMS_MASK << IGD_GMCH_GEN8_GMS_SHIFT); + *gmch |= gms << IGD_GMCH_GEN8_GMS_SHIFT; + ret = true; + } else { + error_report(QERR_INVALID_PARAMETER_VALUE, "x-igd-gms", "0~0x40"); + } + } else { + /* 0x0 to 0x40: 32MB increments starting at 0MB */ + /* 0xf0 to 0xfe: 4MB increments starting at 4MB */ + if ((gms <= 0x40) || (gms >= 0xf0 && gms <= 0xfe)) { + *gmch &= ~(IGD_GMCH_GEN8_GMS_MASK << IGD_GMCH_GEN8_GMS_SHIFT); + *gmch |= gms << IGD_GMCH_GEN8_GMS_SHIFT; + ret = true; + } else { + error_report(QERR_INVALID_PARAMETER_VALUE, + "x-igd-gms", "0~0x40 or 0xf0~0xfe"); + } + } + + return ret; +} + #define IGD_GGC_MMIO_OFFSET 0x108040 #define IGD_BDSM_MMIO_OFFSET 0x1080C0 @@ -428,41 +459,35 @@ void vfio_probe_igd_bar0_quirk(VFIOPCIDevice *vdev, int nr) VFIOConfigMirrorQuirk *ggc_mirror, *bdsm_mirror; int gen; - /* - * This must be an Intel VGA device at address 00:02.0 for us to even - * consider enabling legacy mode. Some driver have dependencies on the PCI - * bus address. - */ if (!vfio_pci_is(vdev, PCI_VENDOR_ID_INTEL, PCI_ANY_ID) || !vfio_is_vga(vdev) || nr != 0) { return; } - /* - * Only on IGD devices of gen 11 and above, the BDSM register is mirrored - * into MMIO space and read from MMIO space by the Windows driver. - */ + /* Only on IGD Gen6-12 device needs quirks in BAR 0 */ gen = igd_gen(vdev); if (gen < 6) { return; } - ggc_quirk = vfio_quirk_alloc(1); - ggc_mirror = ggc_quirk->data = g_malloc0(sizeof(*ggc_mirror)); - ggc_mirror->mem = ggc_quirk->mem; - ggc_mirror->vdev = vdev; - ggc_mirror->bar = nr; - ggc_mirror->offset = IGD_GGC_MMIO_OFFSET; - ggc_mirror->config_offset = IGD_GMCH; - - memory_region_init_io(ggc_mirror->mem, OBJECT(vdev), - &vfio_generic_mirror_quirk, ggc_mirror, - "vfio-igd-ggc-quirk", 2); - memory_region_add_subregion_overlap(vdev->bars[nr].region.mem, - ggc_mirror->offset, ggc_mirror->mem, - 1); - - QLIST_INSERT_HEAD(&vdev->bars[nr].quirks, ggc_quirk, next); + if (vdev->igd_gms) { + ggc_quirk = vfio_quirk_alloc(1); + ggc_mirror = ggc_quirk->data = g_malloc0(sizeof(*ggc_mirror)); + ggc_mirror->mem = ggc_quirk->mem; + ggc_mirror->vdev = vdev; + ggc_mirror->bar = nr; + ggc_mirror->offset = IGD_GGC_MMIO_OFFSET; + ggc_mirror->config_offset = IGD_GMCH; + + memory_region_init_io(ggc_mirror->mem, OBJECT(vdev), + &vfio_generic_mirror_quirk, ggc_mirror, + "vfio-igd-ggc-quirk", 2); + memory_region_add_subregion_overlap(vdev->bars[nr].region.mem, + ggc_mirror->offset, ggc_mirror->mem, + 1); + + QLIST_INSERT_HEAD(&vdev->bars[nr].quirks, ggc_quirk, next); + } bdsm_quirk = vfio_quirk_alloc(1); bdsm_mirror = bdsm_quirk->data = g_malloc0(sizeof(*bdsm_mirror)); @@ -484,44 +509,39 @@ void vfio_probe_igd_bar0_quirk(VFIOPCIDevice *vdev, int nr) static bool vfio_pci_igd_config_quirk(VFIOPCIDevice *vdev, Error **errp) { + struct vfio_region_info *opregion = NULL; int ret, gen; - uint64_t gms_size; + uint64_t gms_size = 0; uint64_t *bdsm_size; uint32_t gmch; bool legacy_mode_enabled = false; Error *err = NULL; - /* - * This must be an Intel VGA device at address 00:02.0 for us to even - * consider enabling legacy mode. The vBIOS has dependencies on the - * PCI bus address. - */ if (!vfio_pci_is(vdev, PCI_VENDOR_ID_INTEL, PCI_ANY_ID) || !vfio_is_vga(vdev)) { return true; } - /* - * IGD is not a standard, they like to change their specs often. We - * only attempt to support back to SandBridge and we hope that newer - * devices maintain compatibility with generation 8. - */ - gen = igd_gen(vdev); - if (gen == -1) { - error_report("IGD device %s is unsupported in legacy mode, " - "try SandyBridge or newer", vdev->vbasedev.name); + /* IGD device always comes with OpRegion */ + if (!vfio_pci_igd_opregion_detect(vdev, &opregion)) { return true; } + info_report("OpRegion detected on Intel display %x.", vdev->device_id); + gen = igd_gen(vdev); gmch = vfio_pci_read_config(&vdev->pdev, IGD_GMCH, 4); /* * For backward compatibility, enable legacy mode when + * - Device geneation is 6 to 9 (including both) + * - IGD claims VGA cycles on host * - Machine type is i440fx (pc_piix) * - IGD device is at guest BDF 00:02.0 * - Not manually disabled by x-igd-legacy-mode=off */ if ((vdev->igd_legacy_mode != ON_OFF_AUTO_OFF) && + (gen >= 6 && gen <= 9) && + !(gmch & IGD_GMCH_VGA_DISABLE) && !strcmp(MACHINE_GET_CLASS(qdev_get_machine())->family, "pc_piix") && (&vdev->pdev == pci_find_device(pci_device_root_bus(&vdev->pdev), 0, PCI_DEVFN(0x2, 0)))) { @@ -532,7 +552,7 @@ static bool vfio_pci_igd_config_quirk(VFIOPCIDevice *vdev, Error **errp) * - OpRegion * - Same LPC bridge and Host bridge VID/DID/SVID/SSID as host */ - g_autofree struct vfio_region_info *rom = NULL; + struct vfio_region_info *rom = NULL; legacy_mode_enabled = true; info_report("IGD legacy mode enabled, " @@ -551,14 +571,16 @@ static bool vfio_pci_igd_config_quirk(VFIOPCIDevice *vdev, Error **errp) } /* - * If IGD VGA Disable is clear (expected) and VGA is not already - * enabled, try to enable it. Probably shouldn't be using legacy mode - * without VGA, but also no point in us enabling VGA if disabled in - * hardware. + * If VGA is not already enabled, try to enable it. We shouldn't be + * using legacy mode without VGA. */ - if (!(gmch & 0x2) && !vdev->vga && !vfio_populate_vga(vdev, &err)) { - error_setg(&err, "Unable to enable VGA access"); - goto error; + if (!vdev->vga) { + if (vfio_populate_vga(vdev, &err)) { + vfio_pci_config_register_vga(vdev); + } else { + error_setg(&err, "Unable to enable VGA access"); + goto error; + } } /* Enable OpRegion and LPC bridge quirk */ @@ -566,13 +588,15 @@ static bool vfio_pci_igd_config_quirk(VFIOPCIDevice *vdev, Error **errp) vdev->features |= VFIO_FEATURE_ENABLE_IGD_LPC; } else if (vdev->igd_legacy_mode == ON_OFF_AUTO_ON) { error_setg(&err, - "Machine is not i440fx or assigned BDF is not 00:02.0"); + "Machine is not i440fx, assigned BDF is not 00:02.0, " + "or device %04x (gen %d) doesn't support legacy mode", + vdev->device_id, gen); goto error; } /* Setup OpRegion access */ if ((vdev->features & VFIO_FEATURE_ENABLE_IGD_OPREGION) && - !vfio_pci_igd_setup_opregion(vdev, errp)) { + !vfio_pci_igd_opregion_init(vdev, opregion, errp)) { goto error; } @@ -580,7 +604,15 @@ static bool vfio_pci_igd_config_quirk(VFIOPCIDevice *vdev, Error **errp) if ((vdev->features & VFIO_FEATURE_ENABLE_IGD_LPC) && !vfio_pci_igd_setup_lpc_bridge(vdev, errp)) { goto error; - } + } + + /* + * ASLS (OpRegion address) is read-only, emulated + * It contains HPA, guest firmware need to reprogram it with GPA. + */ + pci_set_long(vdev->pdev.config + IGD_ASLS, 0); + pci_set_long(vdev->pdev.wmask + IGD_ASLS, ~0); + pci_set_long(vdev->emulated_config_bits + IGD_ASLS, ~0); /* * Allow user to override dsm size using x-igd-gms option, in multiples of @@ -588,56 +620,44 @@ static bool vfio_pci_igd_config_quirk(VFIOPCIDevice *vdev, Error **errp) * set from DVMT Pre-Allocated option in host BIOS. */ if (vdev->igd_gms) { - if (gen < 8) { - if (vdev->igd_gms <= 0x10) { - gmch &= ~(IGD_GMCH_GEN6_GMS_MASK << IGD_GMCH_GEN6_GMS_SHIFT); - gmch |= vdev->igd_gms << IGD_GMCH_GEN6_GMS_SHIFT; - } else { - error_report(QERR_INVALID_PARAMETER_VALUE, - "x-igd-gms", "0~0x10"); - } - } else { - if (vdev->igd_gms <= 0x40) { - gmch &= ~(IGD_GMCH_GEN8_GMS_MASK << IGD_GMCH_GEN8_GMS_SHIFT); - gmch |= vdev->igd_gms << IGD_GMCH_GEN8_GMS_SHIFT; - } else { - error_report(QERR_INVALID_PARAMETER_VALUE, - "x-igd-gms", "0~0x40"); - } + if (!vfio_pci_igd_override_gms(gen, vdev->igd_gms, &gmch)) { + return false; } + + /* GMCH is read-only, emulated */ + pci_set_long(vdev->pdev.config + IGD_GMCH, gmch); + pci_set_long(vdev->pdev.wmask + IGD_GMCH, 0); + pci_set_long(vdev->emulated_config_bits + IGD_GMCH, ~0); } - gms_size = igd_stolen_memory_size(gen, gmch); + if (gen > 0) { + gms_size = igd_stolen_memory_size(gen, gmch); + + /* BDSM is read-write, emulated. BIOS needs to be able to write it */ + if (gen < 11) { + pci_set_long(vdev->pdev.config + IGD_BDSM, 0); + pci_set_long(vdev->pdev.wmask + IGD_BDSM, ~0); + pci_set_long(vdev->emulated_config_bits + IGD_BDSM, ~0); + } else { + pci_set_quad(vdev->pdev.config + IGD_BDSM_GEN11, 0); + pci_set_quad(vdev->pdev.wmask + IGD_BDSM_GEN11, ~0); + pci_set_quad(vdev->emulated_config_bits + IGD_BDSM_GEN11, ~0); + } + } /* * Request reserved memory for stolen memory via fw_cfg. VM firmware * must allocate a 1MB aligned reserved memory region below 4GB with - * the requested size (in bytes) for use by the Intel PCI class VGA - * device at VM address 00:02.0. The base address of this reserved - * memory region must be written to the device BDSM register at PCI - * config offset 0x5C. + * the requested size (in bytes) for use by the IGD device. The base + * address of this reserved memory region must be written to the + * device BDSM register. + * For newer device without BDSM register, this fw_cfg item is 0. */ bdsm_size = g_malloc(sizeof(*bdsm_size)); *bdsm_size = cpu_to_le64(gms_size); fw_cfg_add_file(fw_cfg_find(), "etc/igd-bdsm-size", bdsm_size, sizeof(*bdsm_size)); - /* GMCH is read-only, emulated */ - pci_set_long(vdev->pdev.config + IGD_GMCH, gmch); - pci_set_long(vdev->pdev.wmask + IGD_GMCH, 0); - pci_set_long(vdev->emulated_config_bits + IGD_GMCH, ~0); - - /* BDSM is read-write, emulated. The BIOS needs to be able to write it */ - if (gen < 11) { - pci_set_long(vdev->pdev.config + IGD_BDSM, 0); - pci_set_long(vdev->pdev.wmask + IGD_BDSM, ~0); - pci_set_long(vdev->emulated_config_bits + IGD_BDSM, ~0); - } else { - pci_set_quad(vdev->pdev.config + IGD_BDSM_GEN11, 0); - pci_set_quad(vdev->pdev.wmask + IGD_BDSM_GEN11, ~0); - pci_set_quad(vdev->emulated_config_bits + IGD_BDSM_GEN11, ~0); - } - trace_vfio_pci_igd_bdsm_enabled(vdev->vbasedev.name, (gms_size / MiB)); return true; @@ -664,8 +684,27 @@ error: */ static bool vfio_pci_kvmgt_config_quirk(VFIOPCIDevice *vdev, Error **errp) { + struct vfio_region_info *opregion = NULL; + int gen; + + if (!vfio_pci_is(vdev, PCI_VENDOR_ID_INTEL, PCI_ANY_ID) || + !vfio_is_vga(vdev)) { + return true; + } + + /* FIXME: Cherryview is Gen8, but don't support GVT-g */ + gen = igd_gen(vdev); + if (gen != 8 && gen != 9) { + return true; + } + + if (!vfio_pci_igd_opregion_detect(vdev, &opregion)) { + /* Should never reach here, KVMGT always emulates OpRegion */ + return false; + } + if ((vdev->features & VFIO_FEATURE_ENABLE_IGD_OPREGION) && - !vfio_pci_igd_setup_opregion(vdev, errp)) { + !vfio_pci_igd_opregion_init(vdev, opregion, errp)) { return false; } diff --git a/hw/vfio/iommufd-stubs.c b/hw/vfio/iommufd-stubs.c new file mode 100644 index 0000000..0be5276 --- /dev/null +++ b/hw/vfio/iommufd-stubs.c @@ -0,0 +1,18 @@ +/* + * Copyright (c) 2025 Oracle and/or its affiliates. + * + * SPDX-License-Identifier: GPL-2.0-or-later + */ + +#include "qemu/osdep.h" +#include "migration/cpr.h" +#include "migration/vmstate.h" + +const VMStateDescription vmstate_cpr_vfio_devices = { + .name = CPR_STATE "/vfio devices", + .version_id = 1, + .minimum_version_id = 1, + .fields = (const VMStateField[]){ + VMSTATE_END_OF_LIST() + } +}; diff --git a/hw/vfio/iommufd.c b/hw/vfio/iommufd.c index 232c06d..48c590b 100644 --- a/hw/vfio/iommufd.c +++ b/hw/vfio/iommufd.c @@ -21,20 +21,22 @@ #include "qapi/error.h" #include "system/iommufd.h" #include "hw/qdev-core.h" +#include "hw/vfio/vfio-cpr.h" #include "system/reset.h" #include "qemu/cutils.h" #include "qemu/chardev_open.h" +#include "migration/cpr.h" #include "pci.h" #include "vfio-iommufd.h" #include "vfio-helpers.h" -#include "vfio-cpr.h" #include "vfio-listener.h" #define TYPE_HOST_IOMMU_DEVICE_IOMMUFD_VFIO \ TYPE_HOST_IOMMU_DEVICE_IOMMUFD "-vfio" static int iommufd_cdev_map(const VFIOContainerBase *bcontainer, hwaddr iova, - ram_addr_t size, void *vaddr, bool readonly) + ram_addr_t size, void *vaddr, bool readonly, + MemoryRegion *mr) { const VFIOIOMMUFDContainer *container = container_of(bcontainer, VFIOIOMMUFDContainer, bcontainer); @@ -44,13 +46,42 @@ static int iommufd_cdev_map(const VFIOContainerBase *bcontainer, hwaddr iova, iova, size, vaddr, readonly); } +static int iommufd_cdev_map_file(const VFIOContainerBase *bcontainer, + hwaddr iova, ram_addr_t size, + int fd, unsigned long start, bool readonly) +{ + const VFIOIOMMUFDContainer *container = + container_of(bcontainer, VFIOIOMMUFDContainer, bcontainer); + + return iommufd_backend_map_file_dma(container->be, + container->ioas_id, + iova, size, fd, start, readonly); +} + static int iommufd_cdev_unmap(const VFIOContainerBase *bcontainer, hwaddr iova, ram_addr_t size, - IOMMUTLBEntry *iotlb) + IOMMUTLBEntry *iotlb, bool unmap_all) { const VFIOIOMMUFDContainer *container = container_of(bcontainer, VFIOIOMMUFDContainer, bcontainer); + /* unmap in halves */ + if (unmap_all) { + Int128 llsize = int128_rshift(int128_2_64(), 1); + int ret; + + ret = iommufd_backend_unmap_dma(container->be, container->ioas_id, + 0, int128_get64(llsize)); + + if (ret == 0) { + ret = iommufd_backend_unmap_dma(container->be, container->ioas_id, + int128_get64(llsize), + int128_get64(llsize)); + } + + return ret; + } + /* TODO: Handle dma_unmap_bitmap with iotlb args (migration) */ return iommufd_backend_unmap_dma(container->be, container->ioas_id, iova, size); @@ -91,6 +122,10 @@ static bool iommufd_cdev_connect_and_bind(VFIODevice *vbasedev, Error **errp) goto err_kvm_device_add; } + if (cpr_is_incoming()) { + goto skip_bind; + } + /* Bind device to iommufd */ bind.iommufd = iommufd->fd; if (ioctl(vbasedev->fd, VFIO_DEVICE_BIND_IOMMUFD, &bind)) { @@ -102,6 +137,8 @@ static bool iommufd_cdev_connect_and_bind(VFIODevice *vbasedev, Error **errp) vbasedev->devid = bind.out_devid; trace_iommufd_cdev_connect_and_bind(bind.iommufd, vbasedev->name, vbasedev->fd, vbasedev->devid); + +skip_bind: return true; err_bind: iommufd_cdev_kvm_device_del(vbasedev); @@ -295,7 +332,14 @@ static bool iommufd_cdev_autodomains_get(VFIODevice *vbasedev, /* Try to find a domain */ QLIST_FOREACH(hwpt, &container->hwpt_list, next) { - ret = iommufd_cdev_attach_ioas_hwpt(vbasedev, hwpt->hwpt_id, errp); + if (!cpr_is_incoming()) { + ret = iommufd_cdev_attach_ioas_hwpt(vbasedev, hwpt->hwpt_id, errp); + } else if (vbasedev->cpr.hwpt_id == hwpt->hwpt_id) { + ret = 0; + } else { + continue; + } + if (ret) { /* -EINVAL means the domain is incompatible with the device. */ if (ret == -EINVAL) { @@ -312,6 +356,7 @@ static bool iommufd_cdev_autodomains_get(VFIODevice *vbasedev, return false; } else { vbasedev->hwpt = hwpt; + vbasedev->cpr.hwpt_id = hwpt->hwpt_id; QLIST_INSERT_HEAD(&hwpt->device_list, vbasedev, hwpt_next); vbasedev->iommu_dirty_tracking = iommufd_hwpt_dirty_tracking(hwpt); return true; @@ -334,6 +379,11 @@ static bool iommufd_cdev_autodomains_get(VFIODevice *vbasedev, flags = IOMMU_HWPT_ALLOC_DIRTY_TRACKING; } + if (cpr_is_incoming()) { + hwpt_id = vbasedev->cpr.hwpt_id; + goto skip_alloc; + } + if (!iommufd_backend_alloc_hwpt(iommufd, vbasedev->devid, container->ioas_id, flags, IOMMU_HWPT_DATA_NONE, 0, NULL, @@ -341,19 +391,20 @@ static bool iommufd_cdev_autodomains_get(VFIODevice *vbasedev, return false; } + ret = iommufd_cdev_attach_ioas_hwpt(vbasedev, hwpt_id, errp); + if (ret) { + iommufd_backend_free_id(container->be, hwpt_id); + return false; + } + +skip_alloc: hwpt = g_malloc0(sizeof(*hwpt)); hwpt->hwpt_id = hwpt_id; hwpt->hwpt_flags = flags; QLIST_INIT(&hwpt->device_list); - ret = iommufd_cdev_attach_ioas_hwpt(vbasedev, hwpt->hwpt_id, errp); - if (ret) { - iommufd_backend_free_id(container->be, hwpt->hwpt_id); - g_free(hwpt); - return false; - } - vbasedev->hwpt = hwpt; + vbasedev->cpr.hwpt_id = hwpt->hwpt_id; vbasedev->iommu_dirty_tracking = iommufd_hwpt_dirty_tracking(hwpt); QLIST_INSERT_HEAD(&hwpt->device_list, vbasedev, hwpt_next); QLIST_INSERT_HEAD(&container->hwpt_list, hwpt, next); @@ -391,7 +442,9 @@ static bool iommufd_cdev_attach_container(VFIODevice *vbasedev, return iommufd_cdev_autodomains_get(vbasedev, container, errp); } - return !iommufd_cdev_attach_ioas_hwpt(vbasedev, container->ioas_id, errp); + /* If CPR, we are already attached to ioas_id. */ + return cpr_is_incoming() || + !iommufd_cdev_attach_ioas_hwpt(vbasedev, container->ioas_id, errp); } static void iommufd_cdev_detach_container(VFIODevice *vbasedev, @@ -416,7 +469,7 @@ static void iommufd_cdev_container_destroy(VFIOIOMMUFDContainer *container) if (!QLIST_EMPTY(&bcontainer->device_list)) { return; } - vfio_cpr_unregister_container(bcontainer); + vfio_iommufd_cpr_unregister_container(container); vfio_listener_unregister(bcontainer); iommufd_backend_free_id(container->be, container->ioas_id); object_unref(container); @@ -480,11 +533,14 @@ static bool iommufd_cdev_attach(const char *name, VFIODevice *vbasedev, VFIOAddressSpace *space; struct vfio_device_info dev_info = { .argsz = sizeof(dev_info) }; int ret, devfd; + bool res; uint32_t ioas_id; Error *err = NULL; const VFIOIOMMUClass *iommufd_vioc = VFIO_IOMMU_CLASS(object_class_by_name(TYPE_VFIO_IOMMU_IOMMUFD)); + vfio_cpr_load_device(vbasedev); + if (vbasedev->fd < 0) { devfd = iommufd_cdev_getfd(vbasedev->sysfsdev, errp); if (devfd < 0) { @@ -508,7 +564,16 @@ static bool iommufd_cdev_attach(const char *name, VFIODevice *vbasedev, vbasedev->iommufd != container->be) { continue; } - if (!iommufd_cdev_attach_container(vbasedev, container, &err)) { + + if (!cpr_is_incoming()) { + res = iommufd_cdev_attach_container(vbasedev, container, &err); + } else if (vbasedev->cpr.ioas_id == container->ioas_id) { + res = true; + } else { + continue; + } + + if (!res) { const char *msg = error_get_pretty(err); trace_iommufd_cdev_fail_attach_existing_container(msg); @@ -525,6 +590,11 @@ static bool iommufd_cdev_attach(const char *name, VFIODevice *vbasedev, } } + if (cpr_is_incoming()) { + ioas_id = vbasedev->cpr.ioas_id; + goto skip_ioas_alloc; + } + /* Need to allocate a new dedicated container */ if (!iommufd_backend_alloc_ioas(vbasedev->iommufd, &ioas_id, errp)) { goto err_alloc_ioas; @@ -532,10 +602,12 @@ static bool iommufd_cdev_attach(const char *name, VFIODevice *vbasedev, trace_iommufd_cdev_alloc_ioas(vbasedev->iommufd->fd, ioas_id); +skip_ioas_alloc: container = VFIO_IOMMU_IOMMUFD(object_new(TYPE_VFIO_IOMMU_IOMMUFD)); container->be = vbasedev->iommufd; container->ioas_id = ioas_id; QLIST_INIT(&container->hwpt_list); + vbasedev->cpr.ioas_id = ioas_id; bcontainer = &container->bcontainer; vfio_address_space_insert(space, bcontainer); @@ -562,7 +634,7 @@ static bool iommufd_cdev_attach(const char *name, VFIODevice *vbasedev, goto err_listener_register; } - if (!vfio_cpr_register_container(bcontainer, errp)) { + if (!vfio_iommufd_cpr_register_container(container, errp)) { goto err_listener_register; } @@ -575,6 +647,10 @@ found_container: goto err_listener_register; } + /* + * Do not move this code before attachment! The nested IOMMU support + * needs device and hwpt id which are generated only after attachment. + */ if (!vfio_device_hiod_create_and_realize(vbasedev, TYPE_HOST_IOMMU_DEVICE_IOMMUFD_VFIO, errp)) { goto err_listener_register; @@ -588,14 +664,8 @@ found_container: iommufd_cdev_ram_block_discard_disable(false); } - vbasedev->group = 0; - vbasedev->num_irqs = dev_info.num_irqs; - vbasedev->num_regions = dev_info.num_regions; - vbasedev->flags = dev_info.flags; - vbasedev->reset_works = !!(dev_info.flags & VFIO_DEVICE_FLAGS_RESET); - vbasedev->bcontainer = bcontainer; - QLIST_INSERT_HEAD(&bcontainer->device_list, vbasedev, container_next); - QLIST_INSERT_HEAD(&vfio_device_list, vbasedev, global_next); + vfio_device_prepare(vbasedev, bcontainer, &dev_info); + vfio_iommufd_cpr_register_device(vbasedev); trace_iommufd_cdev_device_info(vbasedev->name, devfd, vbasedev->num_irqs, vbasedev->num_regions, vbasedev->flags); @@ -622,9 +692,7 @@ static void iommufd_cdev_detach(VFIODevice *vbasedev) VFIOIOMMUFDContainer *container = container_of(bcontainer, VFIOIOMMUFDContainer, bcontainer); - QLIST_REMOVE(vbasedev, global_next); - QLIST_REMOVE(vbasedev, container_next); - vbasedev->bcontainer = NULL; + vfio_device_unprepare(vbasedev); if (!vbasedev->ram_block_discard_allowed) { iommufd_cdev_ram_block_discard_disable(false); @@ -635,6 +703,7 @@ static void iommufd_cdev_detach(VFIODevice *vbasedev) iommufd_cdev_container_destroy(container); vfio_address_space_put(space); + vfio_iommufd_cpr_unregister_device(vbasedev); iommufd_cdev_unbind_and_disconnect(vbasedev); close(vbasedev->fd); } @@ -794,6 +863,7 @@ static void vfio_iommu_iommufd_class_init(ObjectClass *klass, const void *data) VFIOIOMMUClass *vioc = VFIO_IOMMU_CLASS(klass); vioc->dma_map = iommufd_cdev_map; + vioc->dma_map_file = iommufd_cdev_map_file; vioc->dma_unmap = iommufd_cdev_unmap; vioc->attach_device = iommufd_cdev_attach; vioc->detach_device = iommufd_cdev_detach; @@ -802,21 +872,38 @@ static void vfio_iommu_iommufd_class_init(ObjectClass *klass, const void *data) vioc->query_dirty_bitmap = iommufd_query_dirty_bitmap; }; +static bool +host_iommu_device_iommufd_vfio_attach_hwpt(HostIOMMUDeviceIOMMUFD *idev, + uint32_t hwpt_id, Error **errp) +{ + VFIODevice *vbasedev = HOST_IOMMU_DEVICE(idev)->agent; + + return !iommufd_cdev_attach_ioas_hwpt(vbasedev, hwpt_id, errp); +} + +static bool +host_iommu_device_iommufd_vfio_detach_hwpt(HostIOMMUDeviceIOMMUFD *idev, + Error **errp) +{ + VFIODevice *vbasedev = HOST_IOMMU_DEVICE(idev)->agent; + + return iommufd_cdev_detach_ioas_hwpt(vbasedev, errp); +} + static bool hiod_iommufd_vfio_realize(HostIOMMUDevice *hiod, void *opaque, Error **errp) { VFIODevice *vdev = opaque; + HostIOMMUDeviceIOMMUFD *idev; HostIOMMUDeviceCaps *caps = &hiod->caps; + VendorCaps *vendor_caps = &caps->vendor_caps; enum iommu_hw_info_type type; - union { - struct iommu_hw_info_vtd vtd; - } data; uint64_t hw_caps; hiod->agent = opaque; - if (!iommufd_backend_get_device_info(vdev->iommufd, vdev->devid, - &type, &data, sizeof(data), + if (!iommufd_backend_get_device_info(vdev->iommufd, vdev->devid, &type, + vendor_caps, sizeof(*vendor_caps), &hw_caps, errp)) { return false; } @@ -825,6 +912,11 @@ static bool hiod_iommufd_vfio_realize(HostIOMMUDevice *hiod, void *opaque, caps->type = type; caps->hw_caps = hw_caps; + idev = HOST_IOMMU_DEVICE_IOMMUFD(hiod); + idev->iommufd = vdev->iommufd; + idev->devid = vdev->devid; + idev->hwpt_id = vdev->hwpt->hwpt_id; + return true; } @@ -850,10 +942,14 @@ hiod_iommufd_vfio_get_page_size_mask(HostIOMMUDevice *hiod) static void hiod_iommufd_vfio_class_init(ObjectClass *oc, const void *data) { HostIOMMUDeviceClass *hiodc = HOST_IOMMU_DEVICE_CLASS(oc); + HostIOMMUDeviceIOMMUFDClass *idevc = HOST_IOMMU_DEVICE_IOMMUFD_CLASS(oc); hiodc->realize = hiod_iommufd_vfio_realize; hiodc->get_iova_ranges = hiod_iommufd_vfio_get_iova_ranges; hiodc->get_page_size_mask = hiod_iommufd_vfio_get_page_size_mask; + + idevc->attach_hwpt = host_iommu_device_iommufd_vfio_attach_hwpt; + idevc->detach_hwpt = host_iommu_device_iommufd_vfio_detach_hwpt; }; static const TypeInfo types[] = { diff --git a/hw/vfio/listener.c b/hw/vfio/listener.c index 6f77e18..f498e23 100644 --- a/hw/vfio/listener.c +++ b/hw/vfio/listener.c @@ -90,16 +90,17 @@ static bool vfio_listener_skipped_section(MemoryRegionSection *section) section->offset_within_address_space & (1ULL << 63); } -/* Called with rcu_read_lock held. */ -static bool vfio_get_xlat_addr(IOMMUTLBEntry *iotlb, void **vaddr, - ram_addr_t *ram_addr, bool *read_only, - Error **errp) +/* + * Called with rcu_read_lock held. + * The returned MemoryRegion must not be accessed after calling rcu_read_unlock. + */ +static MemoryRegion *vfio_translate_iotlb(IOMMUTLBEntry *iotlb, hwaddr *xlat_p, + Error **errp) { - bool ret, mr_has_discard_manager; + MemoryRegion *mr; - ret = memory_get_xlat_addr(iotlb, vaddr, ram_addr, read_only, - &mr_has_discard_manager, errp); - if (ret && mr_has_discard_manager) { + mr = memory_translate_iotlb(iotlb, xlat_p, errp); + if (mr && memory_region_has_ram_discard_manager(mr)) { /* * Malicious VMs might trigger discarding of IOMMU-mapped memory. The * pages will remain pinned inside vfio until unmapped, resulting in a @@ -118,7 +119,7 @@ static bool vfio_get_xlat_addr(IOMMUTLBEntry *iotlb, void **vaddr, " intended via an IOMMU. It's possible to mitigate " " by setting/adjusting RLIMIT_MEMLOCK."); } - return ret; + return mr; } static void vfio_iommu_map_notify(IOMMUNotifier *n, IOMMUTLBEntry *iotlb) @@ -126,6 +127,8 @@ static void vfio_iommu_map_notify(IOMMUNotifier *n, IOMMUTLBEntry *iotlb) VFIOGuestIOMMU *giommu = container_of(n, VFIOGuestIOMMU, n); VFIOContainerBase *bcontainer = giommu->bcontainer; hwaddr iova = iotlb->iova + giommu->iommu_offset; + MemoryRegion *mr; + hwaddr xlat; void *vaddr; int ret; Error *local_err = NULL; @@ -150,10 +153,14 @@ static void vfio_iommu_map_notify(IOMMUNotifier *n, IOMMUTLBEntry *iotlb) if ((iotlb->perm & IOMMU_RW) != IOMMU_NONE) { bool read_only; - if (!vfio_get_xlat_addr(iotlb, &vaddr, NULL, &read_only, &local_err)) { + mr = vfio_translate_iotlb(iotlb, &xlat, &local_err); + if (!mr) { error_report_err(local_err); goto out; } + vaddr = memory_region_get_ram_ptr(mr) + xlat; + read_only = !(iotlb->perm & IOMMU_WO) || mr->readonly; + /* * vaddr is only valid until rcu_read_unlock(). But after * vfio_dma_map has set up the mapping the pages will be @@ -163,7 +170,7 @@ static void vfio_iommu_map_notify(IOMMUNotifier *n, IOMMUTLBEntry *iotlb) */ ret = vfio_container_dma_map(bcontainer, iova, iotlb->addr_mask + 1, vaddr, - read_only); + read_only, mr); if (ret) { error_report("vfio_container_dma_map(%p, 0x%"HWADDR_PRIx", " "0x%"HWADDR_PRIx", %p) = %d (%s)", @@ -172,7 +179,7 @@ static void vfio_iommu_map_notify(IOMMUNotifier *n, IOMMUTLBEntry *iotlb) } } else { ret = vfio_container_dma_unmap(bcontainer, iova, - iotlb->addr_mask + 1, iotlb); + iotlb->addr_mask + 1, iotlb, false); if (ret) { error_setg(&local_err, "vfio_container_dma_unmap(%p, 0x%"HWADDR_PRIx", " @@ -201,7 +208,7 @@ static void vfio_ram_discard_notify_discard(RamDiscardListener *rdl, int ret; /* Unmap with a single call. */ - ret = vfio_container_dma_unmap(bcontainer, iova, size , NULL); + ret = vfio_container_dma_unmap(bcontainer, iova, size , NULL, false); if (ret) { error_report("%s: vfio_container_dma_unmap() failed: %s", __func__, strerror(-ret)); @@ -233,7 +240,7 @@ static int vfio_ram_discard_notify_populate(RamDiscardListener *rdl, vaddr = memory_region_get_ram_ptr(section->mr) + start; ret = vfio_container_dma_map(bcontainer, iova, next - start, - vaddr, section->readonly); + vaddr, section->readonly, section->mr); if (ret) { /* Rollback */ vfio_ram_discard_notify_discard(rdl, section); @@ -411,6 +418,32 @@ static bool vfio_get_section_iova_range(VFIOContainerBase *bcontainer, return true; } +static void vfio_listener_begin(MemoryListener *listener) +{ + VFIOContainerBase *bcontainer = container_of(listener, VFIOContainerBase, + listener); + void (*listener_begin)(VFIOContainerBase *bcontainer); + + listener_begin = VFIO_IOMMU_GET_CLASS(bcontainer)->listener_begin; + + if (listener_begin) { + listener_begin(bcontainer); + } +} + +static void vfio_listener_commit(MemoryListener *listener) +{ + VFIOContainerBase *bcontainer = container_of(listener, VFIOContainerBase, + listener); + void (*listener_commit)(VFIOContainerBase *bcontainer); + + listener_commit = VFIO_IOMMU_GET_CLASS(bcontainer)->listener_commit; + + if (listener_commit) { + listener_commit(bcontainer); + } +} + static void vfio_device_error_append(VFIODevice *vbasedev, Error **errp) { /* @@ -423,11 +456,38 @@ static void vfio_device_error_append(VFIODevice *vbasedev, Error **errp) } } +VFIORamDiscardListener *vfio_find_ram_discard_listener( + VFIOContainerBase *bcontainer, MemoryRegionSection *section) +{ + VFIORamDiscardListener *vrdl = NULL; + + QLIST_FOREACH(vrdl, &bcontainer->vrdl_list, next) { + if (vrdl->mr == section->mr && + vrdl->offset_within_address_space == + section->offset_within_address_space) { + break; + } + } + + if (!vrdl) { + hw_error("vfio: Trying to sync missing RAM discard listener"); + /* does not return */ + } + return vrdl; +} + static void vfio_listener_region_add(MemoryListener *listener, MemoryRegionSection *section) { VFIOContainerBase *bcontainer = container_of(listener, VFIOContainerBase, listener); + vfio_container_region_add(bcontainer, section, false); +} + +void vfio_container_region_add(VFIOContainerBase *bcontainer, + MemoryRegionSection *section, + bool cpr_remap) +{ hwaddr iova, end; Int128 llend, llsize; void *vaddr; @@ -463,6 +523,11 @@ static void vfio_listener_region_add(MemoryListener *listener, int iommu_idx; trace_vfio_listener_region_add_iommu(section->mr->name, iova, end); + + if (cpr_remap) { + vfio_cpr_giommu_remap(bcontainer, section); + } + /* * FIXME: For VFIO iommu types which have KVM acceleration to * avoid bouncing all map/unmaps through qemu this way, this @@ -505,7 +570,12 @@ static void vfio_listener_region_add(MemoryListener *listener, * about changes. */ if (memory_region_has_ram_discard_manager(section->mr)) { - vfio_ram_discard_register_listener(bcontainer, section); + if (!cpr_remap) { + vfio_ram_discard_register_listener(bcontainer, section); + } else if (!vfio_cpr_ram_discard_register_listener(bcontainer, + section)) { + goto fail; + } return; } @@ -531,7 +601,7 @@ static void vfio_listener_region_add(MemoryListener *listener, } ret = vfio_container_dma_map(bcontainer, iova, int128_get64(llsize), - vaddr, section->readonly); + vaddr, section->readonly, section->mr); if (ret) { error_setg(&err, "vfio_container_dma_map(%p, 0x%"HWADDR_PRIx", " "0x%"HWADDR_PRIx", %p) = %d (%s)", @@ -634,21 +704,14 @@ static void vfio_listener_region_del(MemoryListener *listener, } if (try_unmap) { + bool unmap_all = false; + if (int128_eq(llsize, int128_2_64())) { - /* The unmap ioctl doesn't accept a full 64-bit span. */ - llsize = int128_rshift(llsize, 1); - ret = vfio_container_dma_unmap(bcontainer, iova, - int128_get64(llsize), NULL); - if (ret) { - error_report("vfio_container_dma_unmap(%p, 0x%"HWADDR_PRIx", " - "0x%"HWADDR_PRIx") = %d (%s)", - bcontainer, iova, int128_get64(llsize), ret, - strerror(-ret)); - } - iova += int128_get64(llsize); + unmap_all = true; + llsize = int128_zero(); } - ret = vfio_container_dma_unmap(bcontainer, iova, - int128_get64(llsize), NULL); + ret = vfio_container_dma_unmap(bcontainer, iova, int128_get64(llsize), + NULL, unmap_all); if (ret) { error_report("vfio_container_dma_unmap(%p, 0x%"HWADDR_PRIx", " "0x%"HWADDR_PRIx") = %d (%s)", @@ -801,13 +864,17 @@ static void vfio_devices_dma_logging_stop(VFIOContainerBase *bcontainer) VFIO_DEVICE_FEATURE_DMA_LOGGING_STOP; QLIST_FOREACH(vbasedev, &bcontainer->device_list, container_next) { + int ret; + if (!vbasedev->dirty_tracking) { continue; } - if (ioctl(vbasedev->fd, VFIO_DEVICE_FEATURE, feature)) { + ret = vbasedev->io_ops->device_feature(vbasedev, feature); + + if (ret != 0) { warn_report("%s: Failed to stop DMA logging, err %d (%s)", - vbasedev->name, -errno, strerror(errno)); + vbasedev->name, -ret, strerror(-ret)); } vbasedev->dirty_tracking = false; } @@ -908,10 +975,9 @@ static bool vfio_devices_dma_logging_start(VFIOContainerBase *bcontainer, continue; } - ret = ioctl(vbasedev->fd, VFIO_DEVICE_FEATURE, feature); + ret = vbasedev->io_ops->device_feature(vbasedev, feature); if (ret) { - ret = -errno; - error_setg_errno(errp, errno, "%s: Failed to start DMA logging", + error_setg_errno(errp, -ret, "%s: Failed to start DMA logging", vbasedev->name); goto out; } @@ -988,6 +1054,8 @@ static void vfio_iommu_map_dirty_notify(IOMMUNotifier *n, IOMMUTLBEntry *iotlb) ram_addr_t translated_addr; Error *local_err = NULL; int ret = -EINVAL; + MemoryRegion *mr; + hwaddr xlat; trace_vfio_iommu_map_dirty_notify(iova, iova + iotlb->addr_mask); @@ -999,9 +1067,11 @@ static void vfio_iommu_map_dirty_notify(IOMMUNotifier *n, IOMMUTLBEntry *iotlb) } rcu_read_lock(); - if (!vfio_get_xlat_addr(iotlb, NULL, &translated_addr, NULL, &local_err)) { + mr = vfio_translate_iotlb(iotlb, &xlat, &local_err); + if (!mr) { goto out_unlock; } + translated_addr = memory_region_get_ram_addr(mr) + xlat; ret = vfio_container_query_dirty_bitmap(bcontainer, iova, iotlb->addr_mask + 1, translated_addr, &local_err); @@ -1053,19 +1123,8 @@ vfio_sync_ram_discard_listener_dirty_bitmap(VFIOContainerBase *bcontainer, MemoryRegionSection *section) { RamDiscardManager *rdm = memory_region_get_ram_discard_manager(section->mr); - VFIORamDiscardListener *vrdl = NULL; - - QLIST_FOREACH(vrdl, &bcontainer->vrdl_list, next) { - if (vrdl->mr == section->mr && - vrdl->offset_within_address_space == - section->offset_within_address_space) { - break; - } - } - - if (!vrdl) { - hw_error("vfio: Trying to sync missing RAM discard listener"); - } + VFIORamDiscardListener *vrdl = + vfio_find_ram_discard_listener(bcontainer, section); /* * We only want/can synchronize the bitmap for actually mapped parts - @@ -1165,6 +1224,8 @@ static void vfio_listener_log_sync(MemoryListener *listener, static const MemoryListener vfio_memory_listener = { .name = "vfio", + .begin = vfio_listener_begin, + .commit = vfio_listener_commit, .region_add = vfio_listener_region_add, .region_del = vfio_listener_region_del, .log_global_start = vfio_listener_log_global_start, diff --git a/hw/vfio/meson.build b/hw/vfio/meson.build index bccb050..bfaf6be 100644 --- a/hw/vfio/meson.build +++ b/hw/vfio/meson.build @@ -1,3 +1,5 @@ +# SPDX-License-Identifier: GPL-2.0-or-later + vfio_ss = ss.source_set() vfio_ss.add(files( 'listener.c', @@ -21,6 +23,7 @@ system_ss.add(when: 'CONFIG_VFIO_XGMAC', if_true: files('calxeda-xgmac.c')) system_ss.add(when: 'CONFIG_VFIO_AMD_XGBE', if_true: files('amd-xgbe.c')) system_ss.add(when: 'CONFIG_VFIO', if_true: files( 'cpr.c', + 'cpr-legacy.c', 'device.c', 'migration.c', 'migration-multifd.c', @@ -28,7 +31,9 @@ system_ss.add(when: 'CONFIG_VFIO', if_true: files( )) system_ss.add(when: ['CONFIG_VFIO', 'CONFIG_IOMMUFD'], if_true: files( 'iommufd.c', + 'cpr-iommufd.c', )) +system_ss.add(when: 'CONFIG_IOMMUFD', if_false: files('iommufd-stubs.c')) system_ss.add(when: 'CONFIG_VFIO_PCI', if_true: files( 'display.c', )) diff --git a/hw/vfio/migration-multifd.c b/hw/vfio/migration-multifd.c index 850a319..e478503 100644 --- a/hw/vfio/migration-multifd.c +++ b/hw/vfio/migration-multifd.c @@ -13,7 +13,6 @@ #include "hw/vfio/vfio-device.h" #include "migration/misc.h" #include "qapi/error.h" -#include "qemu/bswap.h" #include "qemu/error-report.h" #include "qemu/lockable.h" #include "qemu/main-loop.h" @@ -23,6 +22,7 @@ #include "migration-multifd.h" #include "vfio-migration-internal.h" #include "trace.h" +#include "vfio-helpers.h" #define VFIO_DEVICE_STATE_CONFIG_STATE (1) @@ -35,6 +35,18 @@ typedef struct VFIODeviceStatePacket { uint8_t data[0]; } QEMU_PACKED VFIODeviceStatePacket; +bool vfio_load_config_after_iter(VFIODevice *vbasedev) +{ + if (vbasedev->migration_load_config_after_iter == ON_OFF_AUTO_ON) { + return true; + } else if (vbasedev->migration_load_config_after_iter == ON_OFF_AUTO_OFF) { + return false; + } + + assert(vbasedev->migration_load_config_after_iter == ON_OFF_AUTO_AUTO); + return vfio_arch_wants_loading_config_after_iter(); +} + /* type safety */ typedef struct VFIOStateBuffers { GArray *array; @@ -50,12 +62,16 @@ typedef struct VFIOMultifd { bool load_bufs_thread_running; bool load_bufs_thread_want_exit; + bool load_bufs_iter_done; + QemuCond load_bufs_iter_done_cond; + VFIOStateBuffers load_bufs; QemuCond load_bufs_buffer_ready_cond; QemuCond load_bufs_thread_finished_cond; QemuMutex load_bufs_mutex; /* Lock order: this lock -> BQL */ uint32_t load_buf_idx; uint32_t load_buf_idx_last; + size_t load_buf_queued_pending_buffers_size; } VFIOMultifd; static void vfio_state_buffer_clear(gpointer data) @@ -112,6 +128,7 @@ static bool vfio_load_state_buffer_insert(VFIODevice *vbasedev, VFIOMigration *migration = vbasedev->migration; VFIOMultifd *multifd = migration->multifd; VFIOStateBuffer *lb; + size_t data_size = packet_total_size - sizeof(*packet); vfio_state_buffers_assert_init(&multifd->load_bufs); if (packet->idx >= vfio_state_buffers_size_get(&multifd->load_bufs)) { @@ -127,8 +144,19 @@ static bool vfio_load_state_buffer_insert(VFIODevice *vbasedev, assert(packet->idx >= multifd->load_buf_idx); - lb->data = g_memdup2(&packet->data, packet_total_size - sizeof(*packet)); - lb->len = packet_total_size - sizeof(*packet); + multifd->load_buf_queued_pending_buffers_size += data_size; + if (multifd->load_buf_queued_pending_buffers_size > + vbasedev->migration_max_queued_buffers_size) { + error_setg(errp, + "%s: queuing state buffer %" PRIu32 + " would exceed the size max of %" PRIu64, + vbasedev->name, packet->idx, + vbasedev->migration_max_queued_buffers_size); + return false; + } + + lb->data = g_memdup2(&packet->data, data_size); + lb->len = data_size; lb->is_present = true; return true; @@ -312,6 +340,9 @@ static bool vfio_load_state_buffer_write(VFIODevice *vbasedev, assert(wr_ret <= buf_len); buf_len -= wr_ret; buf_cur += wr_ret; + + assert(multifd->load_buf_queued_pending_buffers_size >= wr_ret); + multifd->load_buf_queued_pending_buffers_size -= wr_ret; } trace_vfio_load_state_device_buffer_load_end(vbasedev->name, @@ -394,6 +425,22 @@ static bool vfio_load_bufs_thread(void *opaque, bool *should_quit, Error **errp) multifd->load_buf_idx++; } + if (vfio_load_config_after_iter(vbasedev)) { + while (!multifd->load_bufs_iter_done) { + qemu_cond_wait(&multifd->load_bufs_iter_done_cond, + &multifd->load_bufs_mutex); + + /* + * Need to re-check cancellation immediately after wait in case + * cond was signalled by vfio_load_cleanup_load_bufs_thread(). + */ + if (vfio_load_bufs_thread_want_exit(multifd, should_quit)) { + error_setg(errp, "operation cancelled"); + goto thread_exit; + } + } + } + if (!vfio_load_bufs_thread_load_config(vbasedev, errp)) { goto thread_exit; } @@ -413,6 +460,48 @@ thread_exit: return ret; } +int vfio_load_state_config_load_ready(VFIODevice *vbasedev) +{ + VFIOMigration *migration = vbasedev->migration; + VFIOMultifd *multifd = migration->multifd; + int ret = 0; + + if (!vfio_multifd_transfer_enabled(vbasedev)) { + error_report("%s: got DEV_CONFIG_LOAD_READY outside multifd transfer", + vbasedev->name); + return -EINVAL; + } + + if (!vfio_load_config_after_iter(vbasedev)) { + error_report("%s: got DEV_CONFIG_LOAD_READY but was disabled", + vbasedev->name); + return -EINVAL; + } + + assert(multifd); + + /* The lock order is load_bufs_mutex -> BQL so unlock BQL here first */ + bql_unlock(); + WITH_QEMU_LOCK_GUARD(&multifd->load_bufs_mutex) { + if (multifd->load_bufs_iter_done) { + /* Can't print error here as we're outside BQL */ + ret = -EINVAL; + break; + } + + multifd->load_bufs_iter_done = true; + qemu_cond_signal(&multifd->load_bufs_iter_done_cond); + } + bql_lock(); + + if (ret) { + error_report("%s: duplicate DEV_CONFIG_LOAD_READY", + vbasedev->name); + } + + return ret; +} + static VFIOMultifd *vfio_multifd_new(void) { VFIOMultifd *multifd = g_new(VFIOMultifd, 1); @@ -423,8 +512,12 @@ static VFIOMultifd *vfio_multifd_new(void) multifd->load_buf_idx = 0; multifd->load_buf_idx_last = UINT32_MAX; + multifd->load_buf_queued_pending_buffers_size = 0; qemu_cond_init(&multifd->load_bufs_buffer_ready_cond); + multifd->load_bufs_iter_done = false; + qemu_cond_init(&multifd->load_bufs_iter_done_cond); + multifd->load_bufs_thread_running = false; multifd->load_bufs_thread_want_exit = false; qemu_cond_init(&multifd->load_bufs_thread_finished_cond); @@ -448,6 +541,7 @@ static void vfio_load_cleanup_load_bufs_thread(VFIOMultifd *multifd) multifd->load_bufs_thread_want_exit = true; qemu_cond_signal(&multifd->load_bufs_buffer_ready_cond); + qemu_cond_signal(&multifd->load_bufs_iter_done_cond); qemu_cond_wait(&multifd->load_bufs_thread_finished_cond, &multifd->load_bufs_mutex); } @@ -460,6 +554,7 @@ static void vfio_multifd_free(VFIOMultifd *multifd) vfio_load_cleanup_load_bufs_thread(multifd); qemu_cond_destroy(&multifd->load_bufs_thread_finished_cond); + qemu_cond_destroy(&multifd->load_bufs_iter_done_cond); vfio_state_buffers_destroy(&multifd->load_bufs); qemu_cond_destroy(&multifd->load_bufs_buffer_ready_cond); qemu_mutex_destroy(&multifd->load_bufs_mutex); @@ -583,7 +678,7 @@ vfio_save_complete_precopy_thread_config_state(VFIODevice *vbasedev, /* * This thread is spawned by the migration core directly via - * .save_live_complete_precopy_thread SaveVMHandler. + * .save_complete_precopy_thread SaveVMHandler. * * It exits after either: * * completing saving the remaining device state and device config, OR: @@ -592,7 +687,7 @@ vfio_save_complete_precopy_thread_config_state(VFIODevice *vbasedev, * multifd_device_state_save_thread_should_exit() returning true. */ bool -vfio_multifd_save_complete_precopy_thread(SaveLiveCompletePrecopyThreadData *d, +vfio_multifd_save_complete_precopy_thread(SaveCompletePrecopyThreadData *d, Error **errp) { VFIODevice *vbasedev = d->handler_opaque; diff --git a/hw/vfio/migration-multifd.h b/hw/vfio/migration-multifd.h index 0bab632..82d2d3a 100644 --- a/hw/vfio/migration-multifd.h +++ b/hw/vfio/migration-multifd.h @@ -20,13 +20,16 @@ void vfio_multifd_cleanup(VFIODevice *vbasedev); bool vfio_multifd_transfer_supported(void); bool vfio_multifd_transfer_enabled(VFIODevice *vbasedev); +bool vfio_load_config_after_iter(VFIODevice *vbasedev); bool vfio_multifd_load_state_buffer(void *opaque, char *data, size_t data_size, Error **errp); +int vfio_load_state_config_load_ready(VFIODevice *vbasedev); + void vfio_multifd_emit_dummy_eos(VFIODevice *vbasedev, QEMUFile *f); bool -vfio_multifd_save_complete_precopy_thread(SaveLiveCompletePrecopyThreadData *d, +vfio_multifd_save_complete_precopy_thread(SaveCompletePrecopyThreadData *d, Error **errp); int vfio_multifd_switchover_start(VFIODevice *vbasedev); diff --git a/hw/vfio/migration.c b/hw/vfio/migration.c index 1dceab1..4c06e3d 100644 --- a/hw/vfio/migration.c +++ b/hw/vfio/migration.c @@ -675,7 +675,11 @@ static void vfio_save_state(QEMUFile *f, void *opaque) int ret; if (vfio_multifd_transfer_enabled(vbasedev)) { - vfio_multifd_emit_dummy_eos(vbasedev, f); + if (vfio_load_config_after_iter(vbasedev)) { + qemu_put_be64(f, VFIO_MIG_FLAG_DEV_CONFIG_LOAD_READY); + } else { + vfio_multifd_emit_dummy_eos(vbasedev, f); + } return; } @@ -784,6 +788,10 @@ static int vfio_load_state(QEMUFile *f, void *opaque, int version_id) return ret; } + case VFIO_MIG_FLAG_DEV_CONFIG_LOAD_READY: + { + return vfio_load_state_config_load_ready(vbasedev); + } default: error_report("%s: Unknown tag 0x%"PRIx64, vbasedev->name, data); return -EINVAL; @@ -824,7 +832,7 @@ static const SaveVMHandlers savevm_vfio_handlers = { .state_pending_exact = vfio_state_pending_exact, .is_active_iterate = vfio_is_active_iterate, .save_live_iterate = vfio_save_iterate, - .save_live_complete_precopy = vfio_save_complete_precopy, + .save_complete = vfio_save_complete_precopy, .save_state = vfio_save_state, .load_setup = vfio_load_setup, .load_cleanup = vfio_load_cleanup, @@ -835,7 +843,7 @@ static const SaveVMHandlers savevm_vfio_handlers = { */ .load_state_buffer = vfio_multifd_load_state_buffer, .switchover_start = vfio_switchover_start, - .save_live_complete_precopy_thread = vfio_multifd_save_complete_precopy_thread, + .save_complete_precopy_thread = vfio_multifd_save_complete_precopy_thread, }; /* ---------------------------------------------------------------------- */ @@ -1016,7 +1024,7 @@ static int vfio_migration_init(VFIODevice *vbasedev) vfio_vmstate_change_prepare : NULL; migration->vm_state = qdev_add_vm_change_state_handler_full( - vbasedev->dev, vfio_vmstate_change, prepare_cb, vbasedev); + vbasedev->dev, vfio_vmstate_change, prepare_cb, NULL, vbasedev); migration_add_notifier(&migration->migration_state, vfio_migration_state_notifier); diff --git a/hw/vfio/pci.c b/hw/vfio/pci.c index 6908bcc..4fa692c 100644 --- a/hw/vfio/pci.c +++ b/hw/vfio/pci.c @@ -29,7 +29,9 @@ #include "hw/pci/pci_bridge.h" #include "hw/qdev-properties.h" #include "hw/qdev-properties-system.h" +#include "hw/vfio/vfio-cpr.h" #include "migration/vmstate.h" +#include "migration/cpr.h" #include "qobject/qdict.h" #include "qemu/error-report.h" #include "qemu/main-loop.h" @@ -47,8 +49,6 @@ #include "vfio-migration-internal.h" #include "vfio-helpers.h" -#define TYPE_VFIO_PCI_NOHOTPLUG "vfio-pci-nohotplug" - /* Protected by BQL */ static KVMRouteChange vfio_route_change; @@ -56,6 +56,36 @@ static void vfio_disable_interrupts(VFIOPCIDevice *vdev); static void vfio_mmap_set_enabled(VFIOPCIDevice *vdev, bool enabled); static void vfio_msi_disable_common(VFIOPCIDevice *vdev); +/* Create new or reuse existing eventfd */ +static bool vfio_notifier_init(VFIOPCIDevice *vdev, EventNotifier *e, + const char *name, int nr, Error **errp) +{ + int fd, ret; + + fd = vfio_cpr_load_vector_fd(vdev, name, nr); + if (fd >= 0) { + event_notifier_init_fd(e, fd); + return true; + } + + ret = event_notifier_init(e, 0); + if (ret) { + error_setg_errno(errp, -ret, "vfio_notifier_init %s failed", name); + return false; + } + + fd = event_notifier_get_fd(e); + vfio_cpr_save_vector_fd(vdev, name, nr, fd); + return true; +} + +static void vfio_notifier_cleanup(VFIOPCIDevice *vdev, EventNotifier *e, + const char *name, int nr) +{ + vfio_cpr_delete_vector_fd(vdev, name, nr); + event_notifier_cleanup(e); +} + /* * Disabling BAR mmaping can be slow, but toggling it around INTx can * also be a huge overhead. We try to get the best of both worlds by @@ -103,7 +133,7 @@ static void vfio_intx_interrupt(void *opaque) } } -static void vfio_intx_eoi(VFIODevice *vbasedev) +void vfio_pci_intx_eoi(VFIODevice *vbasedev) { VFIOPCIDevice *vdev = container_of(vbasedev, VFIOPCIDevice, vbasedev); @@ -111,7 +141,7 @@ static void vfio_intx_eoi(VFIODevice *vbasedev) return; } - trace_vfio_intx_eoi(vbasedev->name); + trace_vfio_pci_intx_eoi(vbasedev->name); vdev->intx.pending = false; pci_irq_deassert(&vdev->pdev); @@ -136,8 +166,7 @@ static bool vfio_intx_enable_kvm(VFIOPCIDevice *vdev, Error **errp) pci_irq_deassert(&vdev->pdev); /* Get an eventfd for resample/unmask */ - if (event_notifier_init(&vdev->intx.unmask, 0)) { - error_setg(errp, "event_notifier_init failed eoi"); + if (!vfio_notifier_init(vdev, &vdev->intx.unmask, "intx-unmask", 0, errp)) { goto fail; } @@ -169,7 +198,7 @@ fail_vfio: kvm_irqchip_remove_irqfd_notifier_gsi(kvm_state, &vdev->intx.interrupt, vdev->intx.route.irq); fail_irqfd: - event_notifier_cleanup(&vdev->intx.unmask); + vfio_notifier_cleanup(vdev, &vdev->intx.unmask, "intx-unmask", 0); fail: qemu_set_fd_handler(irq_fd, vfio_intx_interrupt, NULL, vdev); vfio_device_irq_unmask(&vdev->vbasedev, VFIO_PCI_INTX_IRQ_INDEX); @@ -179,6 +208,36 @@ fail: #endif } +static bool vfio_cpr_intx_enable_kvm(VFIOPCIDevice *vdev, Error **errp) +{ +#ifdef CONFIG_KVM + if (vdev->no_kvm_intx || !kvm_irqfds_enabled() || + vdev->intx.route.mode != PCI_INTX_ENABLED || + !kvm_resamplefds_enabled()) { + return true; + } + + if (!vfio_notifier_init(vdev, &vdev->intx.unmask, "intx-unmask", 0, errp)) { + return false; + } + + if (kvm_irqchip_add_irqfd_notifier_gsi(kvm_state, + &vdev->intx.interrupt, + &vdev->intx.unmask, + vdev->intx.route.irq)) { + error_setg_errno(errp, errno, "failed to setup resample irqfd"); + vfio_notifier_cleanup(vdev, &vdev->intx.unmask, "intx-unmask", 0); + return false; + } + + vdev->intx.kvm_accel = true; + trace_vfio_intx_enable_kvm(vdev->vbasedev.name); + return true; +#else + return true; +#endif +} + static void vfio_intx_disable_kvm(VFIOPCIDevice *vdev) { #ifdef CONFIG_KVM @@ -201,7 +260,7 @@ static void vfio_intx_disable_kvm(VFIOPCIDevice *vdev) } /* We only need to close the eventfd for VFIO to cleanup the kernel side */ - event_notifier_cleanup(&vdev->intx.unmask); + vfio_notifier_cleanup(vdev, &vdev->intx.unmask, "intx-unmask", 0); /* QEMU starts listening for interrupt events. */ qemu_set_fd_handler(event_notifier_get_fd(&vdev->intx.interrupt), @@ -236,12 +295,12 @@ static void vfio_intx_update(VFIOPCIDevice *vdev, PCIINTxRoute *route) } /* Re-enable the interrupt in cased we missed an EOI */ - vfio_intx_eoi(&vdev->vbasedev); + vfio_pci_intx_eoi(&vdev->vbasedev); } static void vfio_intx_routing_notifier(PCIDevice *pdev) { - VFIOPCIDevice *vdev = VFIO_PCI(pdev); + VFIOPCIDevice *vdev = VFIO_PCI_BASE(pdev); PCIINTxRoute route; if (vdev->interrupt != VFIO_INT_INTx) { @@ -268,14 +327,19 @@ static bool vfio_intx_enable(VFIOPCIDevice *vdev, Error **errp) uint8_t pin = vfio_pci_read_config(&vdev->pdev, PCI_INTERRUPT_PIN, 1); Error *err = NULL; int32_t fd; - int ret; if (!pin) { return true; } - vfio_disable_interrupts(vdev); + /* + * Do not alter interrupt state during vfio_realize and cpr load. + * The incoming state is cleared thereafter. + */ + if (!cpr_is_incoming()) { + vfio_disable_interrupts(vdev); + } vdev->intx.pin = pin - 1; /* Pin A (1) -> irq[0] */ pci_config_set_interrupt_pin(vdev->pdev.config, pin); @@ -291,18 +355,25 @@ static bool vfio_intx_enable(VFIOPCIDevice *vdev, Error **errp) } #endif - ret = event_notifier_init(&vdev->intx.interrupt, 0); - if (ret) { - error_setg_errno(errp, -ret, "event_notifier_init failed"); + if (!vfio_notifier_init(vdev, &vdev->intx.interrupt, "intx-interrupt", 0, + errp)) { return false; } fd = event_notifier_get_fd(&vdev->intx.interrupt); qemu_set_fd_handler(fd, vfio_intx_interrupt, NULL, vdev); + + if (cpr_is_incoming()) { + if (!vfio_cpr_intx_enable_kvm(vdev, &err)) { + warn_reportf_err(err, VFIO_MSG_PREFIX, vdev->vbasedev.name); + } + goto skip_signaling; + } + if (!vfio_device_irq_set_signaling(&vdev->vbasedev, VFIO_PCI_INTX_IRQ_INDEX, 0, VFIO_IRQ_SET_ACTION_TRIGGER, fd, errp)) { qemu_set_fd_handler(fd, NULL, NULL, vdev); - event_notifier_cleanup(&vdev->intx.interrupt); + vfio_notifier_cleanup(vdev, &vdev->intx.interrupt, "intx-interrupt", 0); return false; } @@ -310,6 +381,7 @@ static bool vfio_intx_enable(VFIOPCIDevice *vdev, Error **errp) warn_reportf_err(err, VFIO_MSG_PREFIX, vdev->vbasedev.name); } +skip_signaling: vdev->interrupt = VFIO_INT_INTx; trace_vfio_intx_enable(vdev->vbasedev.name); @@ -329,13 +401,18 @@ static void vfio_intx_disable(VFIOPCIDevice *vdev) fd = event_notifier_get_fd(&vdev->intx.interrupt); qemu_set_fd_handler(fd, NULL, NULL, vdev); - event_notifier_cleanup(&vdev->intx.interrupt); + vfio_notifier_cleanup(vdev, &vdev->intx.interrupt, "intx-interrupt", 0); vdev->interrupt = VFIO_INT_NONE; trace_vfio_intx_disable(vdev->vbasedev.name); } +bool vfio_pci_intx_enable(VFIOPCIDevice *vdev, Error **errp) +{ + return vfio_intx_enable(vdev, errp); +} + /* * MSI/X */ @@ -374,6 +451,14 @@ static void vfio_msi_interrupt(void *opaque) notify(&vdev->pdev, nr); } +void vfio_pci_msi_set_handler(VFIOPCIDevice *vdev, int nr) +{ + VFIOMSIVector *vector = &vdev->msi_vectors[nr]; + int fd = event_notifier_get_fd(&vector->interrupt); + + qemu_set_fd_handler(fd, vfio_msi_interrupt, NULL, vector); +} + /* * Get MSI-X enabled, but no vector enabled, by setting vector 0 with an invalid * fd to kernel. @@ -381,7 +466,7 @@ static void vfio_msi_interrupt(void *opaque) static int vfio_enable_msix_no_vec(VFIOPCIDevice *vdev) { g_autofree struct vfio_irq_set *irq_set = NULL; - int ret = 0, argsz; + int argsz; int32_t *fd; argsz = sizeof(*irq_set) + sizeof(*fd); @@ -396,9 +481,7 @@ static int vfio_enable_msix_no_vec(VFIOPCIDevice *vdev) fd = (int32_t *)&irq_set->data; *fd = -1; - ret = ioctl(vdev->vbasedev.fd, VFIO_DEVICE_SET_IRQS, irq_set); - - return ret; + return vdev->vbasedev.io_ops->set_irqs(&vdev->vbasedev, irq_set); } static int vfio_enable_vectors(VFIOPCIDevice *vdev, bool msix) @@ -455,15 +538,15 @@ static int vfio_enable_vectors(VFIOPCIDevice *vdev, bool msix) fds[i] = fd; } - ret = ioctl(vdev->vbasedev.fd, VFIO_DEVICE_SET_IRQS, irq_set); + ret = vdev->vbasedev.io_ops->set_irqs(&vdev->vbasedev, irq_set); g_free(irq_set); return ret; } -static void vfio_add_kvm_msi_virq(VFIOPCIDevice *vdev, VFIOMSIVector *vector, - int vector_n, bool msix) +void vfio_pci_add_kvm_msi_virq(VFIOPCIDevice *vdev, VFIOMSIVector *vector, + int vector_n, bool msix) { if ((msix && vdev->no_kvm_msix) || (!msix && vdev->no_kvm_msi)) { return; @@ -473,13 +556,16 @@ static void vfio_add_kvm_msi_virq(VFIOPCIDevice *vdev, VFIOMSIVector *vector, vector_n, &vdev->pdev); } -static void vfio_connect_kvm_msi_virq(VFIOMSIVector *vector) +static void vfio_connect_kvm_msi_virq(VFIOMSIVector *vector, int nr) { + const char *name = "kvm_interrupt"; + if (vector->virq < 0) { return; } - if (event_notifier_init(&vector->kvm_interrupt, 0)) { + if (!vfio_notifier_init(vector->vdev, &vector->kvm_interrupt, name, nr, + NULL)) { goto fail_notifier; } @@ -491,19 +577,20 @@ static void vfio_connect_kvm_msi_virq(VFIOMSIVector *vector) return; fail_kvm: - event_notifier_cleanup(&vector->kvm_interrupt); + vfio_notifier_cleanup(vector->vdev, &vector->kvm_interrupt, name, nr); fail_notifier: kvm_irqchip_release_virq(kvm_state, vector->virq); vector->virq = -1; } -static void vfio_remove_kvm_msi_virq(VFIOMSIVector *vector) +static void vfio_remove_kvm_msi_virq(VFIOPCIDevice *vdev, VFIOMSIVector *vector, + int nr) { kvm_irqchip_remove_irqfd_notifier_gsi(kvm_state, &vector->kvm_interrupt, vector->virq); kvm_irqchip_release_virq(kvm_state, vector->virq); vector->virq = -1; - event_notifier_cleanup(&vector->kvm_interrupt); + vfio_notifier_cleanup(vdev, &vector->kvm_interrupt, "kvm_interrupt", nr); } static void vfio_update_kvm_msi_virq(VFIOMSIVector *vector, MSIMessage msg, @@ -513,10 +600,47 @@ static void vfio_update_kvm_msi_virq(VFIOMSIVector *vector, MSIMessage msg, kvm_irqchip_commit_routes(kvm_state); } +static void set_irq_signalling(VFIODevice *vbasedev, VFIOMSIVector *vector, + unsigned int nr) +{ + Error *err = NULL; + int32_t fd; + + if (vector->virq >= 0) { + fd = event_notifier_get_fd(&vector->kvm_interrupt); + } else { + fd = event_notifier_get_fd(&vector->interrupt); + } + + if (!vfio_device_irq_set_signaling(vbasedev, VFIO_PCI_MSIX_IRQ_INDEX, nr, + VFIO_IRQ_SET_ACTION_TRIGGER, + fd, &err)) { + error_reportf_err(err, VFIO_MSG_PREFIX, vbasedev->name); + } +} + +void vfio_pci_vector_init(VFIOPCIDevice *vdev, int nr) +{ + VFIOMSIVector *vector = &vdev->msi_vectors[nr]; + PCIDevice *pdev = &vdev->pdev; + Error *local_err = NULL; + + vector->vdev = vdev; + vector->virq = -1; + if (!vfio_notifier_init(vdev, &vector->interrupt, "interrupt", nr, + &local_err)) { + error_report_err(local_err); + } + vector->use = true; + if (vdev->interrupt == VFIO_INT_MSIX) { + msix_vector_use(pdev, nr); + } +} + static int vfio_msix_vector_do_use(PCIDevice *pdev, unsigned int nr, MSIMessage *msg, IOHandler *handler) { - VFIOPCIDevice *vdev = VFIO_PCI(pdev); + VFIOPCIDevice *vdev = VFIO_PCI_BASE(pdev); VFIOMSIVector *vector; int ret; bool resizing = !!(vdev->nr_vectors < nr + 1); @@ -526,13 +650,7 @@ static int vfio_msix_vector_do_use(PCIDevice *pdev, unsigned int nr, vector = &vdev->msi_vectors[nr]; if (!vector->use) { - vector->vdev = vdev; - vector->virq = -1; - if (event_notifier_init(&vector->interrupt, 0)) { - error_report("vfio: Error: event_notifier_init failed"); - } - vector->use = true; - msix_vector_use(pdev, nr); + vfio_pci_vector_init(vdev, nr); } qemu_set_fd_handler(event_notifier_get_fd(&vector->interrupt), @@ -544,19 +662,19 @@ static int vfio_msix_vector_do_use(PCIDevice *pdev, unsigned int nr, */ if (vector->virq >= 0) { if (!msg) { - vfio_remove_kvm_msi_virq(vector); + vfio_remove_kvm_msi_virq(vdev, vector, nr); } else { vfio_update_kvm_msi_virq(vector, *msg, pdev); } } else { if (msg) { if (vdev->defer_kvm_irq_routing) { - vfio_add_kvm_msi_virq(vdev, vector, nr, true); + vfio_pci_add_kvm_msi_virq(vdev, vector, nr, true); } else { vfio_route_change = kvm_irqchip_begin_route_changes(kvm_state); - vfio_add_kvm_msi_virq(vdev, vector, nr, true); + vfio_pci_add_kvm_msi_virq(vdev, vector, nr, true); kvm_irqchip_commit_route_changes(&vfio_route_change); - vfio_connect_kvm_msi_virq(vector); + vfio_connect_kvm_msi_virq(vector, nr); } } } @@ -581,24 +699,11 @@ static int vfio_msix_vector_do_use(PCIDevice *pdev, unsigned int nr, vfio_device_irq_disable(&vdev->vbasedev, VFIO_PCI_MSIX_IRQ_INDEX); ret = vfio_enable_vectors(vdev, true); if (ret) { - error_report("vfio: failed to enable vectors, %d", ret); + error_report("vfio: failed to enable vectors, %s", + strerror(-ret)); } } else { - Error *err = NULL; - int32_t fd; - - if (vector->virq >= 0) { - fd = event_notifier_get_fd(&vector->kvm_interrupt); - } else { - fd = event_notifier_get_fd(&vector->interrupt); - } - - if (!vfio_device_irq_set_signaling(&vdev->vbasedev, - VFIO_PCI_MSIX_IRQ_INDEX, nr, - VFIO_IRQ_SET_ACTION_TRIGGER, fd, - &err)) { - error_reportf_err(err, VFIO_MSG_PREFIX, vdev->vbasedev.name); - } + set_irq_signalling(&vdev->vbasedev, vector, nr); } } @@ -616,12 +721,21 @@ static int vfio_msix_vector_do_use(PCIDevice *pdev, unsigned int nr, static int vfio_msix_vector_use(PCIDevice *pdev, unsigned int nr, MSIMessage msg) { + /* + * Ignore the callback from msix_set_vector_notifiers during resume. + * The necessary subset of these actions is called from + * vfio_cpr_claim_vectors during post load. + */ + if (cpr_is_incoming()) { + return 0; + } + return vfio_msix_vector_do_use(pdev, nr, &msg, vfio_msi_interrupt); } static void vfio_msix_vector_release(PCIDevice *pdev, unsigned int nr) { - VFIOPCIDevice *vdev = VFIO_PCI(pdev); + VFIOPCIDevice *vdev = VFIO_PCI_BASE(pdev); VFIOMSIVector *vector = &vdev->msi_vectors[nr]; trace_vfio_msix_vector_release(vdev->vbasedev.name, nr); @@ -646,14 +760,20 @@ static void vfio_msix_vector_release(PCIDevice *pdev, unsigned int nr) } } -static void vfio_prepare_kvm_msi_virq_batch(VFIOPCIDevice *vdev) +void vfio_pci_msix_set_notifiers(VFIOPCIDevice *vdev) +{ + msix_set_vector_notifiers(&vdev->pdev, vfio_msix_vector_use, + vfio_msix_vector_release, NULL); +} + +void vfio_pci_prepare_kvm_msi_virq_batch(VFIOPCIDevice *vdev) { assert(!vdev->defer_kvm_irq_routing); vdev->defer_kvm_irq_routing = true; vfio_route_change = kvm_irqchip_begin_route_changes(kvm_state); } -static void vfio_commit_kvm_msi_virq_batch(VFIOPCIDevice *vdev) +void vfio_pci_commit_kvm_msi_virq_batch(VFIOPCIDevice *vdev) { int i; @@ -663,7 +783,7 @@ static void vfio_commit_kvm_msi_virq_batch(VFIOPCIDevice *vdev) kvm_irqchip_commit_route_changes(&vfio_route_change); for (i = 0; i < vdev->nr_vectors; i++) { - vfio_connect_kvm_msi_virq(&vdev->msi_vectors[i]); + vfio_connect_kvm_msi_virq(&vdev->msi_vectors[i], i); } } @@ -683,19 +803,20 @@ static void vfio_msix_enable(VFIOPCIDevice *vdev) * routes once rather than per vector provides a substantial * performance improvement. */ - vfio_prepare_kvm_msi_virq_batch(vdev); + vfio_pci_prepare_kvm_msi_virq_batch(vdev); if (msix_set_vector_notifiers(&vdev->pdev, vfio_msix_vector_use, vfio_msix_vector_release, NULL)) { error_report("vfio: msix_set_vector_notifiers failed"); } - vfio_commit_kvm_msi_virq_batch(vdev); + vfio_pci_commit_kvm_msi_virq_batch(vdev); if (vdev->nr_vectors) { ret = vfio_enable_vectors(vdev, true); if (ret) { - error_report("vfio: failed to enable vectors, %d", ret); + error_report("vfio: failed to enable vectors, %s", + strerror(-ret)); } } else { /* @@ -712,7 +833,8 @@ static void vfio_msix_enable(VFIOPCIDevice *vdev) */ ret = vfio_enable_msix_no_vec(vdev); if (ret) { - error_report("vfio: failed to enable MSI-X, %d", ret); + error_report("vfio: failed to enable MSI-X, %s", + strerror(-ret)); } } @@ -732,19 +854,21 @@ retry: * Deferring to commit the KVM routes once rather than per vector * provides a substantial performance improvement. */ - vfio_prepare_kvm_msi_virq_batch(vdev); + vfio_pci_prepare_kvm_msi_virq_batch(vdev); vdev->msi_vectors = g_new0(VFIOMSIVector, vdev->nr_vectors); for (i = 0; i < vdev->nr_vectors; i++) { VFIOMSIVector *vector = &vdev->msi_vectors[i]; + Error *local_err = NULL; vector->vdev = vdev; vector->virq = -1; vector->use = true; - if (event_notifier_init(&vector->interrupt, 0)) { - error_report("vfio: Error: event_notifier_init failed"); + if (!vfio_notifier_init(vdev, &vector->interrupt, "interrupt", i, + &local_err)) { + error_report_err(local_err); } qemu_set_fd_handler(event_notifier_get_fd(&vector->interrupt), @@ -754,10 +878,10 @@ retry: * Attempt to enable route through KVM irqchip, * default to userspace handling if unavailable. */ - vfio_add_kvm_msi_virq(vdev, vector, i, false); + vfio_pci_add_kvm_msi_virq(vdev, vector, i, false); } - vfio_commit_kvm_msi_virq_batch(vdev); + vfio_pci_commit_kvm_msi_virq_batch(vdev); /* Set interrupt type prior to possible interrupts */ vdev->interrupt = VFIO_INT_MSI; @@ -765,7 +889,8 @@ retry: ret = vfio_enable_vectors(vdev, false); if (ret) { if (ret < 0) { - error_report("vfio: Error: Failed to setup MSI fds: %m"); + error_report("vfio: Error: Failed to setup MSI fds: %s", + strerror(-ret)); } else { error_report("vfio: Error: Failed to enable %d " "MSI vectors, retry with %d", vdev->nr_vectors, ret); @@ -799,11 +924,11 @@ static void vfio_msi_disable_common(VFIOPCIDevice *vdev) VFIOMSIVector *vector = &vdev->msi_vectors[i]; if (vdev->msi_vectors[i].use) { if (vector->virq >= 0) { - vfio_remove_kvm_msi_virq(vector); + vfio_remove_kvm_msi_virq(vdev, vector, i); } qemu_set_fd_handler(event_notifier_get_fd(&vector->interrupt), NULL, NULL, NULL); - event_notifier_cleanup(&vector->interrupt); + vfio_notifier_cleanup(vdev, &vector->interrupt, "interrupt", i); } } @@ -881,18 +1006,22 @@ static void vfio_update_msi(VFIOPCIDevice *vdev) static void vfio_pci_load_rom(VFIOPCIDevice *vdev) { - g_autofree struct vfio_region_info *reg_info = NULL; + VFIODevice *vbasedev = &vdev->vbasedev; + struct vfio_region_info *reg_info = NULL; uint64_t size; off_t off = 0; ssize_t bytes; + int ret; + + ret = vfio_device_get_region_info(vbasedev, VFIO_PCI_ROM_REGION_INDEX, + ®_info); - if (vfio_device_get_region_info(&vdev->vbasedev, - VFIO_PCI_ROM_REGION_INDEX, ®_info)) { - error_report("vfio: Error getting ROM info: %m"); + if (ret != 0) { + error_report("vfio: Error getting ROM info: %s", strerror(-ret)); return; } - trace_vfio_pci_load_rom(vdev->vbasedev.name, (unsigned long)reg_info->size, + trace_vfio_pci_load_rom(vbasedev->name, (unsigned long)reg_info->size, (unsigned long)reg_info->offset, (unsigned long)reg_info->flags); @@ -901,8 +1030,7 @@ static void vfio_pci_load_rom(VFIOPCIDevice *vdev) if (!vdev->rom_size) { vdev->rom_read_failed = true; - error_report("vfio-pci: Cannot read device rom at " - "%s", vdev->vbasedev.name); + error_report("vfio-pci: Cannot read device rom at %s", vbasedev->name); error_printf("Device option ROM contents are probably invalid " "(check dmesg).\nSkip option ROM probe with rombar=0, " "or load from file with romfile=\n"); @@ -913,18 +1041,22 @@ static void vfio_pci_load_rom(VFIOPCIDevice *vdev) memset(vdev->rom, 0xff, size); while (size) { - bytes = pread(vdev->vbasedev.fd, vdev->rom + off, - size, vdev->rom_offset + off); + bytes = vbasedev->io_ops->region_read(vbasedev, + VFIO_PCI_ROM_REGION_INDEX, + off, size, vdev->rom + off); + if (bytes == 0) { break; } else if (bytes > 0) { off += bytes; size -= bytes; } else { - if (errno == EINTR || errno == EAGAIN) { + if (bytes == -EINTR || bytes == -EAGAIN) { continue; } - error_report("vfio: Error reading device ROM: %m"); + error_report("vfio: Error reading device ROM: %s", + strreaderror(bytes)); + break; } } @@ -960,6 +1092,24 @@ static void vfio_pci_load_rom(VFIOPCIDevice *vdev) } } +/* "Raw" read of underlying config space. */ +static int vfio_pci_config_space_read(VFIOPCIDevice *vdev, off_t offset, + uint32_t size, void *data) +{ + return vdev->vbasedev.io_ops->region_read(&vdev->vbasedev, + VFIO_PCI_CONFIG_REGION_INDEX, + offset, size, data); +} + +/* "Raw" write of underlying config space. */ +static int vfio_pci_config_space_write(VFIOPCIDevice *vdev, off_t offset, + uint32_t size, void *data) +{ + return vdev->vbasedev.io_ops->region_write(&vdev->vbasedev, + VFIO_PCI_CONFIG_REGION_INDEX, + offset, size, data, false); +} + static uint64_t vfio_rom_read(void *opaque, hwaddr addr, unsigned size) { VFIOPCIDevice *vdev = opaque; @@ -1012,10 +1162,9 @@ static const MemoryRegionOps vfio_rom_ops = { static void vfio_pci_size_rom(VFIOPCIDevice *vdev) { + VFIODevice *vbasedev = &vdev->vbasedev; uint32_t orig, size = cpu_to_le32((uint32_t)PCI_ROM_ADDRESS_MASK); - off_t offset = vdev->config_offset + PCI_ROM_ADDRESS; char *name; - int fd = vdev->vbasedev.fd; if (vdev->pdev.romfile || !vdev->pdev.rom_bar) { /* Since pci handles romfile, just print a message and return */ @@ -1032,11 +1181,12 @@ static void vfio_pci_size_rom(VFIOPCIDevice *vdev) * Use the same size ROM BAR as the physical device. The contents * will get filled in later when the guest tries to read it. */ - if (pread(fd, &orig, 4, offset) != 4 || - pwrite(fd, &size, 4, offset) != 4 || - pread(fd, &size, 4, offset) != 4 || - pwrite(fd, &orig, 4, offset) != 4) { - error_report("%s(%s) failed: %m", __func__, vdev->vbasedev.name); + if (vfio_pci_config_space_read(vdev, PCI_ROM_ADDRESS, 4, &orig) != 4 || + vfio_pci_config_space_write(vdev, PCI_ROM_ADDRESS, 4, &size) != 4 || + vfio_pci_config_space_read(vdev, PCI_ROM_ADDRESS, 4, &size) != 4 || + vfio_pci_config_space_write(vdev, PCI_ROM_ADDRESS, 4, &orig) != 4) { + + error_report("%s(%s) ROM access failed", __func__, vbasedev->name); return; } @@ -1169,7 +1319,7 @@ static const MemoryRegionOps vfio_vga_ops = { */ static void vfio_sub_page_bar_update_mapping(PCIDevice *pdev, int bar) { - VFIOPCIDevice *vdev = VFIO_PCI(pdev); + VFIOPCIDevice *vdev = VFIO_PCI_BASE(pdev); VFIORegion *region = &vdev->bars[bar].region; MemoryRegion *mmap_mr, *region_mr, *base_mr; PCIIORegion *r; @@ -1215,7 +1365,8 @@ static void vfio_sub_page_bar_update_mapping(PCIDevice *pdev, int bar) */ uint32_t vfio_pci_read_config(PCIDevice *pdev, uint32_t addr, int len) { - VFIOPCIDevice *vdev = VFIO_PCI(pdev); + VFIOPCIDevice *vdev = VFIO_PCI_BASE(pdev); + VFIODevice *vbasedev = &vdev->vbasedev; uint32_t emu_bits = 0, emu_val = 0, phys_val = 0, val; memcpy(&emu_bits, vdev->emulated_config_bits + addr, len); @@ -1228,12 +1379,12 @@ uint32_t vfio_pci_read_config(PCIDevice *pdev, uint32_t addr, int len) if (~emu_bits & (0xffffffffU >> (32 - len * 8))) { ssize_t ret; - ret = pread(vdev->vbasedev.fd, &phys_val, len, - vdev->config_offset + addr); + ret = vfio_pci_config_space_read(vdev, addr, len, &phys_val); if (ret != len) { - error_report("%s(%s, 0x%x, 0x%x) failed: %m", - __func__, vdev->vbasedev.name, addr, len); - return -errno; + error_report("%s(%s, 0x%x, 0x%x) failed: %s", + __func__, vbasedev->name, addr, len, + strreaderror(ret)); + return -1; } phys_val = le32_to_cpu(phys_val); } @@ -1248,16 +1399,19 @@ uint32_t vfio_pci_read_config(PCIDevice *pdev, uint32_t addr, int len) void vfio_pci_write_config(PCIDevice *pdev, uint32_t addr, uint32_t val, int len) { - VFIOPCIDevice *vdev = VFIO_PCI(pdev); + VFIOPCIDevice *vdev = VFIO_PCI_BASE(pdev); + VFIODevice *vbasedev = &vdev->vbasedev; uint32_t val_le = cpu_to_le32(val); + int ret; trace_vfio_pci_write_config(vdev->vbasedev.name, addr, val, len); /* Write everything to VFIO, let it filter out what we can't write */ - if (pwrite(vdev->vbasedev.fd, &val_le, len, vdev->config_offset + addr) - != len) { - error_report("%s(%s, 0x%x, 0x%x, 0x%x) failed: %m", - __func__, vdev->vbasedev.name, addr, val, len); + ret = vfio_pci_config_space_write(vdev, addr, len, &val_le); + if (ret != len) { + error_report("%s(%s, 0x%x, 0x%x, 0x%x) failed: %s", + __func__, vbasedev->name, addr, val, len, + strwriteerror(ret)); } /* MSI/MSI-X Enabling/Disabling */ @@ -1345,9 +1499,11 @@ static bool vfio_msi_setup(VFIOPCIDevice *vdev, int pos, Error **errp) int ret, entries; Error *err = NULL; - if (pread(vdev->vbasedev.fd, &ctrl, sizeof(ctrl), - vdev->config_offset + pos + PCI_CAP_FLAGS) != sizeof(ctrl)) { - error_setg_errno(errp, errno, "failed reading MSI PCI_CAP_FLAGS"); + ret = vfio_pci_config_space_read(vdev, pos + PCI_CAP_FLAGS, + sizeof(ctrl), &ctrl); + if (ret != sizeof(ctrl)) { + error_setg(errp, "failed reading MSI PCI_CAP_FLAGS: %s", + strreaderror(ret)); return false; } ctrl = le16_to_cpu(ctrl); @@ -1554,31 +1710,35 @@ static bool vfio_msix_early_setup(VFIOPCIDevice *vdev, Error **errp) uint8_t pos; uint16_t ctrl; uint32_t table, pba; - int ret, fd = vdev->vbasedev.fd; - struct vfio_irq_info irq_info = { .argsz = sizeof(irq_info), - .index = VFIO_PCI_MSIX_IRQ_INDEX }; + struct vfio_irq_info irq_info; VFIOMSIXInfo *msix; + int ret; pos = pci_find_capability(&vdev->pdev, PCI_CAP_ID_MSIX); if (!pos) { return true; } - if (pread(fd, &ctrl, sizeof(ctrl), - vdev->config_offset + pos + PCI_MSIX_FLAGS) != sizeof(ctrl)) { - error_setg_errno(errp, errno, "failed to read PCI MSIX FLAGS"); + ret = vfio_pci_config_space_read(vdev, pos + PCI_MSIX_FLAGS, + sizeof(ctrl), &ctrl); + if (ret != sizeof(ctrl)) { + error_setg(errp, "failed to read PCI MSIX FLAGS: %s", + strreaderror(ret)); return false; } - if (pread(fd, &table, sizeof(table), - vdev->config_offset + pos + PCI_MSIX_TABLE) != sizeof(table)) { - error_setg_errno(errp, errno, "failed to read PCI MSIX TABLE"); + ret = vfio_pci_config_space_read(vdev, pos + PCI_MSIX_TABLE, + sizeof(table), &table); + if (ret != sizeof(table)) { + error_setg(errp, "failed to read PCI MSIX TABLE: %s", + strreaderror(ret)); return false; } - if (pread(fd, &pba, sizeof(pba), - vdev->config_offset + pos + PCI_MSIX_PBA) != sizeof(pba)) { - error_setg_errno(errp, errno, "failed to read PCI MSIX PBA"); + ret = vfio_pci_config_space_read(vdev, pos + PCI_MSIX_PBA, + sizeof(pba), &pba); + if (ret != sizeof(pba)) { + error_setg(errp, "failed to read PCI MSIX PBA: %s", strreaderror(ret)); return false; } @@ -1593,7 +1753,8 @@ static bool vfio_msix_early_setup(VFIOPCIDevice *vdev, Error **errp) msix->pba_offset = pba & ~PCI_MSIX_FLAGS_BIRMASK; msix->entries = (ctrl & PCI_MSIX_FLAGS_QSIZE) + 1; - ret = ioctl(vdev->vbasedev.fd, VFIO_DEVICE_GET_IRQ_INFO, &irq_info); + ret = vfio_device_get_irq_info(&vdev->vbasedev, VFIO_PCI_MSIX_IRQ_INDEX, + &irq_info); if (ret < 0) { error_setg_errno(errp, -ret, "failed to get MSI-X irq info"); g_free(msix); @@ -1700,7 +1861,7 @@ static bool vfio_msix_setup(VFIOPCIDevice *vdev, int pos, Error **errp) return true; } -static void vfio_teardown_msi(VFIOPCIDevice *vdev) +void vfio_pci_teardown_msi(VFIOPCIDevice *vdev) { msi_uninit(&vdev->pdev); @@ -1737,10 +1898,10 @@ static void vfio_bar_prepare(VFIOPCIDevice *vdev, int nr) } /* Determine what type of BAR this is for registration */ - ret = pread(vdev->vbasedev.fd, &pci_bar, sizeof(pci_bar), - vdev->config_offset + PCI_BASE_ADDRESS_0 + (4 * nr)); + ret = vfio_pci_config_space_read(vdev, PCI_BASE_ADDRESS_0 + (4 * nr), + sizeof(pci_bar), &pci_bar); if (ret != sizeof(pci_bar)) { - error_report("vfio: Failed to read BAR %d (%m)", nr); + error_report("vfio: Failed to read BAR %d: %s", nr, strreaderror(ret)); return; } @@ -1750,6 +1911,9 @@ static void vfio_bar_prepare(VFIOPCIDevice *vdev, int nr) bar->type = pci_bar & (bar->ioport ? ~PCI_BASE_ADDRESS_IO_MASK : ~PCI_BASE_ADDRESS_MEM_MASK); bar->size = bar->region.size; + + /* IO regions are sync, memory can be async */ + bar->region.post_wr = (bar->ioport == 0); } static void vfio_bars_prepare(VFIOPCIDevice *vdev) @@ -1796,7 +1960,7 @@ static void vfio_bars_register(VFIOPCIDevice *vdev) } } -static void vfio_bars_exit(VFIOPCIDevice *vdev) +void vfio_pci_bars_exit(VFIOPCIDevice *vdev) { int i; @@ -2387,7 +2551,7 @@ static void vfio_add_ext_cap(VFIOPCIDevice *vdev) g_free(config); } -static bool vfio_add_capabilities(VFIOPCIDevice *vdev, Error **errp) +bool vfio_pci_add_capabilities(VFIOPCIDevice *vdev, Error **errp) { PCIDevice *pdev = &vdev->pdev; @@ -2443,21 +2607,23 @@ void vfio_pci_pre_reset(VFIOPCIDevice *vdev) void vfio_pci_post_reset(VFIOPCIDevice *vdev) { + VFIODevice *vbasedev = &vdev->vbasedev; Error *err = NULL; - int nr; + int ret, nr; if (!vfio_intx_enable(vdev, &err)) { error_reportf_err(err, VFIO_MSG_PREFIX, vdev->vbasedev.name); } for (nr = 0; nr < PCI_NUM_REGIONS - 1; ++nr) { - off_t addr = vdev->config_offset + PCI_BASE_ADDRESS_0 + (4 * nr); + off_t addr = PCI_BASE_ADDRESS_0 + (4 * nr); uint32_t val = 0; uint32_t len = sizeof(val); - if (pwrite(vdev->vbasedev.fd, &val, len, addr) != len) { - error_report("%s(%s) reset bar %d failed: %m", __func__, - vdev->vbasedev.name, nr); + ret = vfio_pci_config_space_write(vdev, addr, len, &val); + if (ret != len) { + error_report("%s(%s) reset bar %d failed: %s", __func__, + vbasedev->name, nr, strwriteerror(ret)); } } @@ -2658,10 +2824,24 @@ static int vfio_pci_load_config(VFIODevice *vbasedev, QEMUFile *f) return ret; } +void vfio_sub_page_bar_update_mappings(VFIOPCIDevice *vdev) +{ + PCIDevice *pdev = &vdev->pdev; + int page_size = qemu_real_host_page_size(); + int bar; + + for (bar = 0; bar < PCI_ROM_SLOT; bar++) { + PCIIORegion *r = &pdev->io_regions[bar]; + if (r->addr != PCI_BAR_UNMAPPED && r->size > 0 && r->size < page_size) { + vfio_sub_page_bar_update_mapping(pdev, bar); + } + } +} + static VFIODeviceOps vfio_pci_ops = { .vfio_compute_needs_reset = vfio_pci_compute_needs_reset, .vfio_hot_reset_multi = vfio_pci_hot_reset_multi, - .vfio_eoi = vfio_intx_eoi, + .vfio_eoi = vfio_pci_intx_eoi, .vfio_get_object = vfio_pci_get_object, .vfio_save_config = vfio_pci_save_config, .vfio_load_config = vfio_pci_load_config, @@ -2670,7 +2850,7 @@ static VFIODeviceOps vfio_pci_ops = { bool vfio_populate_vga(VFIOPCIDevice *vdev, Error **errp) { VFIODevice *vbasedev = &vdev->vbasedev; - g_autofree struct vfio_region_info *reg_info = NULL; + struct vfio_region_info *reg_info = NULL; int ret; ret = vfio_device_get_region_info(vbasedev, VFIO_PCI_VGA_REGION_INDEX, ®_info); @@ -2725,18 +2905,14 @@ bool vfio_populate_vga(VFIOPCIDevice *vdev, Error **errp) "vfio-vga-io@0x3c0", QEMU_PCI_VGA_IO_HI_SIZE); - pci_register_vga(&vdev->pdev, &vdev->vga->region[QEMU_PCI_VGA_MEM].mem, - &vdev->vga->region[QEMU_PCI_VGA_IO_LO].mem, - &vdev->vga->region[QEMU_PCI_VGA_IO_HI].mem); - return true; } -static bool vfio_populate_device(VFIOPCIDevice *vdev, Error **errp) +bool vfio_pci_populate_device(VFIOPCIDevice *vdev, Error **errp) { VFIODevice *vbasedev = &vdev->vbasedev; - g_autofree struct vfio_region_info *reg_info = NULL; - struct vfio_irq_info irq_info = { .argsz = sizeof(irq_info) }; + struct vfio_region_info *reg_info = NULL; + struct vfio_irq_info irq_info; int i, ret = -1; /* Sanity check device */ @@ -2778,7 +2954,7 @@ static bool vfio_populate_device(VFIOPCIDevice *vdev, Error **errp) return false; } - trace_vfio_populate_device_config(vdev->vbasedev.name, + trace_vfio_pci_populate_device_config(vdev->vbasedev.name, (unsigned long)reg_info->size, (unsigned long)reg_info->offset, (unsigned long)reg_info->flags); @@ -2797,12 +2973,10 @@ static bool vfio_populate_device(VFIOPCIDevice *vdev, Error **errp) } } - irq_info.index = VFIO_PCI_ERR_IRQ_INDEX; - - ret = ioctl(vdev->vbasedev.fd, VFIO_DEVICE_GET_IRQ_INFO, &irq_info); + ret = vfio_device_get_irq_info(vbasedev, VFIO_PCI_ERR_IRQ_INDEX, &irq_info); if (ret) { /* This can fail for an old kernel or legacy PCI dev */ - trace_vfio_populate_device_get_irq_info_failure(strerror(errno)); + trace_vfio_pci_populate_device_get_irq_info_failure(strerror(-ret)); } else if (irq_info.count == 1) { vdev->pci_aer = true; } else { @@ -2814,11 +2988,23 @@ static bool vfio_populate_device(VFIOPCIDevice *vdev, Error **errp) return true; } -static void vfio_pci_put_device(VFIOPCIDevice *vdev) +void vfio_pci_put_device(VFIOPCIDevice *vdev) { + vfio_display_finalize(vdev); + vfio_bars_finalize(vdev); + g_free(vdev->emulated_config_bits); + g_free(vdev->rom); + /* + * XXX Leaking igd_opregion is not an oversight, we can't remove the + * fw_cfg entry therefore leaking this allocation seems like the safest + * option. + * + * g_free(vdev->igd_opregion); + */ + vfio_device_detach(&vdev->vbasedev); - g_free(vdev->vbasedev.name); + vfio_device_free_name(&vdev->vbasedev); g_free(vdev->msix); } @@ -2850,7 +3036,7 @@ static void vfio_err_notifier_handler(void *opaque) * and continue after disabling error recovery support for the * device. */ -static void vfio_register_err_notifier(VFIOPCIDevice *vdev) +void vfio_pci_register_err_notifier(VFIOPCIDevice *vdev) { Error *err = NULL; int32_t fd; @@ -2859,8 +3045,9 @@ static void vfio_register_err_notifier(VFIOPCIDevice *vdev) return; } - if (event_notifier_init(&vdev->err_notifier, 0)) { - error_report("vfio: Unable to init event notifier for error detection"); + if (!vfio_notifier_init(vdev, &vdev->err_notifier, "err_notifier", 0, + &err)) { + error_report_err(err); vdev->pci_aer = false; return; } @@ -2868,11 +3055,16 @@ static void vfio_register_err_notifier(VFIOPCIDevice *vdev) fd = event_notifier_get_fd(&vdev->err_notifier); qemu_set_fd_handler(fd, vfio_err_notifier_handler, NULL, vdev); + /* Do not alter irq_signaling during vfio_realize for cpr */ + if (cpr_is_incoming()) { + return; + } + if (!vfio_device_irq_set_signaling(&vdev->vbasedev, VFIO_PCI_ERR_IRQ_INDEX, 0, VFIO_IRQ_SET_ACTION_TRIGGER, fd, &err)) { error_reportf_err(err, VFIO_MSG_PREFIX, vdev->vbasedev.name); qemu_set_fd_handler(fd, NULL, NULL, vdev); - event_notifier_cleanup(&vdev->err_notifier); + vfio_notifier_cleanup(vdev, &vdev->err_notifier, "err_notifier", 0); vdev->pci_aer = false; } } @@ -2891,7 +3083,7 @@ static void vfio_unregister_err_notifier(VFIOPCIDevice *vdev) } qemu_set_fd_handler(event_notifier_get_fd(&vdev->err_notifier), NULL, NULL, vdev); - event_notifier_cleanup(&vdev->err_notifier); + vfio_notifier_cleanup(vdev, &vdev->err_notifier, "err_notifier", 0); } static void vfio_req_notifier_handler(void *opaque) @@ -2909,35 +3101,43 @@ static void vfio_req_notifier_handler(void *opaque) } } -static void vfio_register_req_notifier(VFIOPCIDevice *vdev) +void vfio_pci_register_req_notifier(VFIOPCIDevice *vdev) { - struct vfio_irq_info irq_info = { .argsz = sizeof(irq_info), - .index = VFIO_PCI_REQ_IRQ_INDEX }; + struct vfio_irq_info irq_info; Error *err = NULL; int32_t fd; + int ret; if (!(vdev->features & VFIO_FEATURE_ENABLE_REQ)) { return; } - if (ioctl(vdev->vbasedev.fd, - VFIO_DEVICE_GET_IRQ_INFO, &irq_info) < 0 || irq_info.count < 1) { + ret = vfio_device_get_irq_info(&vdev->vbasedev, VFIO_PCI_REQ_IRQ_INDEX, + &irq_info); + if (ret < 0 || irq_info.count < 1) { return; } - if (event_notifier_init(&vdev->req_notifier, 0)) { - error_report("vfio: Unable to init event notifier for device request"); + if (!vfio_notifier_init(vdev, &vdev->req_notifier, "req_notifier", 0, + &err)) { + error_report_err(err); return; } fd = event_notifier_get_fd(&vdev->req_notifier); qemu_set_fd_handler(fd, vfio_req_notifier_handler, NULL, vdev); + /* Do not alter irq_signaling during vfio_realize for cpr */ + if (cpr_is_incoming()) { + vdev->req_enabled = true; + return; + } + if (!vfio_device_irq_set_signaling(&vdev->vbasedev, VFIO_PCI_REQ_IRQ_INDEX, 0, VFIO_IRQ_SET_ACTION_TRIGGER, fd, &err)) { error_reportf_err(err, VFIO_MSG_PREFIX, vdev->vbasedev.name); qemu_set_fd_handler(fd, NULL, NULL, vdev); - event_notifier_cleanup(&vdev->req_notifier); + vfio_notifier_cleanup(vdev, &vdev->req_notifier, "req_notifier", 0); } else { vdev->req_enabled = true; } @@ -2957,15 +3157,37 @@ static void vfio_unregister_req_notifier(VFIOPCIDevice *vdev) } qemu_set_fd_handler(event_notifier_get_fd(&vdev->req_notifier), NULL, NULL, vdev); - event_notifier_cleanup(&vdev->req_notifier); + vfio_notifier_cleanup(vdev, &vdev->req_notifier, "req_notifier", 0); vdev->req_enabled = false; } -static bool vfio_pci_config_setup(VFIOPCIDevice *vdev, Error **errp) +void vfio_pci_config_register_vga(VFIOPCIDevice *vdev) +{ + assert(vdev->vga != NULL); + + pci_register_vga(&vdev->pdev, &vdev->vga->region[QEMU_PCI_VGA_MEM].mem, + &vdev->vga->region[QEMU_PCI_VGA_IO_LO].mem, + &vdev->vga->region[QEMU_PCI_VGA_IO_HI].mem); +} + +bool vfio_pci_config_setup(VFIOPCIDevice *vdev, Error **errp) { PCIDevice *pdev = &vdev->pdev; VFIODevice *vbasedev = &vdev->vbasedev; + uint32_t config_space_size; + int ret; + + config_space_size = MIN(pci_config_size(&vdev->pdev), vdev->config_size); + + /* Get a copy of config space */ + ret = vfio_pci_config_space_read(vdev, 0, config_space_size, + vdev->pdev.config); + if (ret < (int)config_space_size) { + ret = ret < 0 ? -ret : EFAULT; + error_setg_errno(errp, ret, "failed to read device config space"); + return false; + } /* vfio emulates a lot for us, but some bits need extra love */ vdev->emulated_config_bits = g_malloc0(vdev->config_size); @@ -3023,6 +3245,23 @@ static bool vfio_pci_config_setup(VFIOPCIDevice *vdev, Error **errp) vdev->sub_device_id); } + /* + * Class code is a 24-bit value at config space 0x09. Allow overriding it + * with any 24-bit value. + */ + if (vdev->class_code != PCI_ANY_ID) { + if (vdev->class_code > 0xffffff) { + error_setg(errp, "invalid PCI class code provided"); + return false; + } + /* Higher 24 bits of PCI_CLASS_REVISION are class code */ + vfio_add_emulated_long(vdev, PCI_CLASS_REVISION, + vdev->class_code << 8, ~0xff); + trace_vfio_pci_emulated_class_code(vbasedev->name, vdev->class_code); + } else { + vdev->class_code = pci_get_long(pdev->config + PCI_CLASS_REVISION) >> 8; + } + /* QEMU can change multi-function devices to single function, or reverse */ vdev->emulated_config_bits[PCI_HEADER_TYPE] = PCI_HEADER_TYPE_MULTI_FUNCTION; @@ -3052,10 +3291,14 @@ static bool vfio_pci_config_setup(VFIOPCIDevice *vdev, Error **errp) vfio_bars_register(vdev); + if (vdev->vga && vfio_is_vga(vdev)) { + vfio_pci_config_register_vga(vdev); + } + return true; } -static bool vfio_interrupt_setup(VFIOPCIDevice *vdev, Error **errp) +bool vfio_pci_interrupt_setup(VFIOPCIDevice *vdev, Error **errp) { PCIDevice *pdev = &vdev->pdev; @@ -3077,7 +3320,13 @@ static bool vfio_interrupt_setup(VFIOPCIDevice *vdev, Error **errp) vfio_intx_routing_notifier); vdev->irqchip_change_notifier.notify = vfio_irqchip_change; kvm_irqchip_add_change_notifier(&vdev->irqchip_change_notifier); - if (!vfio_intx_enable(vdev, errp)) { + + /* + * During CPR, do not call vfio_intx_enable at this time. Instead, + * call it from vfio_pci_post_load after the intx routing data has + * been loaded from vmstate. + */ + if (!cpr_is_incoming() && !vfio_intx_enable(vdev, errp)) { timer_free(vdev->intx.mmap_timer); pci_device_set_intx_routing_notifier(&vdev->pdev, NULL); kvm_irqchip_remove_change_notifier(&vdev->irqchip_change_notifier); @@ -3087,12 +3336,12 @@ static bool vfio_interrupt_setup(VFIOPCIDevice *vdev, Error **errp) return true; } -static void vfio_realize(PCIDevice *pdev, Error **errp) +static void vfio_pci_realize(PCIDevice *pdev, Error **errp) { ERRP_GUARD(); - VFIOPCIDevice *vdev = VFIO_PCI(pdev); + VFIOPCIDevice *vdev = VFIO_PCI_BASE(pdev); VFIODevice *vbasedev = &vdev->vbasedev; - int i, ret; + int i; char uuid[UUID_STR_LEN]; g_autofree char *name = NULL; @@ -3145,17 +3394,7 @@ static void vfio_realize(PCIDevice *pdev, Error **errp) goto error; } - if (!vfio_populate_device(vdev, errp)) { - goto error; - } - - /* Get a copy of config space */ - ret = pread(vbasedev->fd, vdev->pdev.config, - MIN(pci_config_size(&vdev->pdev), vdev->config_size), - vdev->config_offset); - if (ret < (int)MIN(pci_config_size(&vdev->pdev), vdev->config_size)) { - ret = ret < 0 ? -errno : -EFAULT; - error_setg_errno(errp, -ret, "failed to read device config space"); + if (!vfio_pci_populate_device(vdev, errp)) { goto error; } @@ -3169,7 +3408,7 @@ static void vfio_realize(PCIDevice *pdev, Error **errp) goto out_teardown; } - if (!vfio_add_capabilities(vdev, errp)) { + if (!vfio_pci_add_capabilities(vdev, errp)) { goto out_unset_idev; } @@ -3185,7 +3424,7 @@ static void vfio_realize(PCIDevice *pdev, Error **errp) vfio_bar_quirk_setup(vdev, i); } - if (!vfio_interrupt_setup(vdev, errp)) { + if (!vfio_pci_interrupt_setup(vdev, errp)) { goto out_unset_idev; } @@ -3229,8 +3468,8 @@ static void vfio_realize(PCIDevice *pdev, Error **errp) } } - vfio_register_err_notifier(vdev); - vfio_register_req_notifier(vdev); + vfio_pci_register_err_notifier(vdev); + vfio_pci_register_req_notifier(vdev); vfio_setup_resetfn_quirk(vdev); return; @@ -3251,33 +3490,22 @@ out_unset_idev: pci_device_unset_iommu_device(pdev); } out_teardown: - vfio_teardown_msi(vdev); - vfio_bars_exit(vdev); + vfio_pci_teardown_msi(vdev); + vfio_pci_bars_exit(vdev); error: error_prepend(errp, VFIO_MSG_PREFIX, vbasedev->name); } static void vfio_instance_finalize(Object *obj) { - VFIOPCIDevice *vdev = VFIO_PCI(obj); + VFIOPCIDevice *vdev = VFIO_PCI_BASE(obj); - vfio_display_finalize(vdev); - vfio_bars_finalize(vdev); - g_free(vdev->emulated_config_bits); - g_free(vdev->rom); - /* - * XXX Leaking igd_opregion is not an oversight, we can't remove the - * fw_cfg entry therefore leaking this allocation seems like the safest - * option. - * - * g_free(vdev->igd_opregion); - */ vfio_pci_put_device(vdev); } static void vfio_exitfn(PCIDevice *pdev) { - VFIOPCIDevice *vdev = VFIO_PCI(pdev); + VFIOPCIDevice *vdev = VFIO_PCI_BASE(pdev); VFIODevice *vbasedev = &vdev->vbasedev; vfio_unregister_req_notifier(vdev); @@ -3290,9 +3518,9 @@ static void vfio_exitfn(PCIDevice *pdev) if (vdev->intx.mmap_timer) { timer_free(vdev->intx.mmap_timer); } - vfio_teardown_msi(vdev); + vfio_pci_teardown_msi(vdev); vfio_pci_disable_rp_atomics(vdev); - vfio_bars_exit(vdev); + vfio_pci_bars_exit(vdev); vfio_migration_exit(vbasedev); if (!vbasedev->mdev) { pci_device_unset_iommu_device(pdev); @@ -3301,7 +3529,12 @@ static void vfio_exitfn(PCIDevice *pdev) static void vfio_pci_reset(DeviceState *dev) { - VFIOPCIDevice *vdev = VFIO_PCI(dev); + VFIOPCIDevice *vdev = VFIO_PCI_BASE(dev); + + /* Do not reset the device during qemu_system_reset prior to cpr load */ + if (cpr_is_incoming()) { + return; + } trace_vfio_pci_reset(vdev->vbasedev.name); @@ -3341,7 +3574,7 @@ post_reset: static void vfio_instance_init(Object *obj) { PCIDevice *pci_dev = PCI_DEVICE(obj); - VFIOPCIDevice *vdev = VFIO_PCI(obj); + VFIOPCIDevice *vdev = VFIO_PCI_BASE(obj); VFIODevice *vbasedev = &vdev->vbasedev; device_add_bootindex_property(obj, &vdev->bootindex, @@ -3360,8 +3593,40 @@ static void vfio_instance_init(Object *obj) /* QEMU_PCI_CAP_EXPRESS initialization does not depend on QEMU command * line, therefore, no need to wait to realize like other devices */ pci_dev->cap_present |= QEMU_PCI_CAP_EXPRESS; + + /* + * A device that is resuming for cpr is already configured, so do not + * reset it during qemu_system_reset prior to cpr load, else interrupts + * may be lost. + */ + pci_dev->cap_present |= QEMU_PCI_SKIP_RESET_ON_CPR; } +static void vfio_pci_base_dev_class_init(ObjectClass *klass, const void *data) +{ + DeviceClass *dc = DEVICE_CLASS(klass); + PCIDeviceClass *pdc = PCI_DEVICE_CLASS(klass); + + dc->desc = "VFIO PCI base device"; + set_bit(DEVICE_CATEGORY_MISC, dc->categories); + pdc->exit = vfio_exitfn; + pdc->config_read = vfio_pci_read_config; + pdc->config_write = vfio_pci_write_config; +} + +static const TypeInfo vfio_pci_base_dev_info = { + .name = TYPE_VFIO_PCI_BASE, + .parent = TYPE_PCI_DEVICE, + .instance_size = sizeof(VFIOPCIDevice), + .abstract = true, + .class_init = vfio_pci_base_dev_class_init, + .interfaces = (const InterfaceInfo[]) { + { INTERFACE_PCIE_DEVICE }, + { INTERFACE_CONVENTIONAL_PCI_DEVICE }, + { } + }, +}; + static PropertyInfo vfio_pci_migration_multifd_transfer_prop; static const Property vfio_pci_dev_properties[] = { @@ -3385,7 +3650,7 @@ static const Property vfio_pci_dev_properties[] = { DEFINE_PROP_BIT("x-req", VFIOPCIDevice, features, VFIO_FEATURE_ENABLE_REQ_BIT, true), DEFINE_PROP_BIT("x-igd-opregion", VFIOPCIDevice, features, - VFIO_FEATURE_ENABLE_IGD_OPREGION_BIT, false), + VFIO_FEATURE_ENABLE_IGD_OPREGION_BIT, true), DEFINE_PROP_BIT("x-igd-lpc", VFIOPCIDevice, features, VFIO_FEATURE_ENABLE_IGD_LPC_BIT, false), DEFINE_PROP_ON_OFF_AUTO("x-igd-legacy-mode", VFIOPCIDevice, @@ -3396,6 +3661,11 @@ static const Property vfio_pci_dev_properties[] = { vbasedev.migration_multifd_transfer, vfio_pci_migration_multifd_transfer_prop, OnOffAuto, .set_default = true, .defval.i = ON_OFF_AUTO_AUTO), + DEFINE_PROP_ON_OFF_AUTO("x-migration-load-config-after-iter", VFIOPCIDevice, + vbasedev.migration_load_config_after_iter, + ON_OFF_AUTO_AUTO), + DEFINE_PROP_SIZE("x-migration-max-queued-buffers-size", VFIOPCIDevice, + vbasedev.migration_max_queued_buffers_size, UINT64_MAX), DEFINE_PROP_BOOL("migration-events", VFIOPCIDevice, vbasedev.migration_events, false), DEFINE_PROP_BOOL("x-no-mmap", VFIOPCIDevice, vbasedev.no_mmap, false), @@ -3416,6 +3686,8 @@ static const Property vfio_pci_dev_properties[] = { sub_vendor_id, PCI_ANY_ID), DEFINE_PROP_UINT32("x-pci-sub-device-id", VFIOPCIDevice, sub_device_id, PCI_ANY_ID), + DEFINE_PROP_UINT32("x-pci-class-code", VFIOPCIDevice, + class_code, PCI_ANY_ID), DEFINE_PROP_UINT32("x-igd-gms", VFIOPCIDevice, igd_gms, 0), DEFINE_PROP_UNSIGNED_NODEFAULT("x-nv-gpudirect-clique", VFIOPCIDevice, nv_gpudirect_clique, @@ -3432,7 +3704,8 @@ static const Property vfio_pci_dev_properties[] = { #ifdef CONFIG_IOMMUFD static void vfio_pci_set_fd(Object *obj, const char *str, Error **errp) { - vfio_device_set_fd(&VFIO_PCI(obj)->vbasedev, str, errp); + VFIOPCIDevice *vdev = VFIO_PCI_BASE(obj); + vfio_device_set_fd(&vdev->vbasedev, str, errp); } #endif @@ -3446,12 +3719,9 @@ static void vfio_pci_dev_class_init(ObjectClass *klass, const void *data) #ifdef CONFIG_IOMMUFD object_class_property_add_str(klass, "fd", NULL, vfio_pci_set_fd); #endif + dc->vmsd = &vfio_cpr_pci_vmstate; dc->desc = "VFIO-based PCI device assignment"; - set_bit(DEVICE_CATEGORY_MISC, dc->categories); - pdc->realize = vfio_realize; - pdc->exit = vfio_exitfn; - pdc->config_read = vfio_pci_read_config; - pdc->config_write = vfio_pci_write_config; + pdc->realize = vfio_pci_realize; object_class_property_set_description(klass, /* 1.3 */ "host", @@ -3572,24 +3842,34 @@ static void vfio_pci_dev_class_init(ObjectClass *klass, const void *data) "x-migration-multifd-transfer", "Transfer this device state via " "multifd channels when live migrating it"); + object_class_property_set_description(klass, /* 10.1 */ + "x-migration-load-config-after-iter", + "Start the config load only after " + "all iterables were loaded (during " + "non-iterables loading phase) when " + "doing live migration of device state " + "via multifd channels"); + object_class_property_set_description(klass, /* 10.1 */ + "x-migration-max-queued-buffers-size", + "Maximum size of in-flight VFIO " + "device state buffers queued at the " + "destination when doing live " + "migration of device state via " + "multifd channels"); } static const TypeInfo vfio_pci_dev_info = { .name = TYPE_VFIO_PCI, - .parent = TYPE_PCI_DEVICE, - .instance_size = sizeof(VFIOPCIDevice), + .parent = TYPE_VFIO_PCI_BASE, .class_init = vfio_pci_dev_class_init, .instance_init = vfio_instance_init, .instance_finalize = vfio_instance_finalize, - .interfaces = (const InterfaceInfo[]) { - { INTERFACE_PCIE_DEVICE }, - { INTERFACE_CONVENTIONAL_PCI_DEVICE }, - { } - }, }; static const Property vfio_pci_dev_nohotplug_properties[] = { DEFINE_PROP_BOOL("ramfb", VFIOPCIDevice, enable_ramfb, false), + DEFINE_PROP_BOOL("use-legacy-x86-rom", VFIOPCIDevice, + use_legacy_x86_rom, false), DEFINE_PROP_ON_OFF_AUTO("x-ramfb-migrate", VFIOPCIDevice, ramfb_migrate, ON_OFF_AUTO_AUTO), }; @@ -3632,6 +3912,7 @@ static void register_vfio_pci_dev_type(void) vfio_pci_migration_multifd_transfer_prop = qdev_prop_on_off_auto; vfio_pci_migration_multifd_transfer_prop.realized_set_allowed = true; + type_register_static(&vfio_pci_base_dev_info); type_register_static(&vfio_pci_dev_info); type_register_static(&vfio_pci_nohotplug_dev_info); } diff --git a/hw/vfio/pci.h b/hw/vfio/pci.h index f835b1d..81465a8 100644 --- a/hw/vfio/pci.h +++ b/hw/vfio/pci.h @@ -14,6 +14,7 @@ #include "system/memory.h" #include "hw/pci/pci_device.h" +#include "hw/vfio/types.h" #include "hw/vfio/vfio-device.h" #include "hw/vfio/vfio-region.h" #include "qemu/event_notifier.h" @@ -116,10 +117,10 @@ typedef struct VFIOMSIXInfo { uint32_t pba_offset; unsigned long *pending; bool noresize; + MemoryRegion *pba_region; } VFIOMSIXInfo; -#define TYPE_VFIO_PCI "vfio-pci" -OBJECT_DECLARE_SIMPLE_TYPE(VFIOPCIDevice, VFIO_PCI) +OBJECT_DECLARE_SIMPLE_TYPE(VFIOPCIDevice, VFIO_PCI_BASE) struct VFIOPCIDevice { PCIDevice pdev; @@ -148,6 +149,7 @@ struct VFIOPCIDevice { uint32_t device_id; uint32_t sub_vendor_id; uint32_t sub_device_id; + uint32_t class_code; uint32_t features; #define VFIO_FEATURE_ENABLE_VGA_BIT 0 #define VFIO_FEATURE_ENABLE_VGA (1 << VFIO_FEATURE_ENABLE_VGA_BIT) @@ -179,6 +181,7 @@ struct VFIOPCIDevice { bool no_kvm_ioeventfd; bool no_vfio_ioeventfd; bool enable_ramfb; + bool use_legacy_x86_rom; OnOffAuto ramfb_migrate; bool defer_kvm_irq_routing; bool clear_parent_atomics_on_exit; @@ -196,12 +199,19 @@ static inline bool vfio_pci_is(VFIOPCIDevice *vdev, uint32_t vendor, uint32_t de static inline bool vfio_is_vga(VFIOPCIDevice *vdev) { - PCIDevice *pdev = &vdev->pdev; - uint16_t class = pci_get_word(pdev->config + PCI_CLASS_DEVICE); - - return class == PCI_CLASS_DISPLAY_VGA; + return (vdev->class_code >> 8) == PCI_CLASS_DISPLAY_VGA; } +/* MSI/MSI-X/INTx */ +void vfio_pci_vector_init(VFIOPCIDevice *vdev, int nr); +void vfio_pci_add_kvm_msi_virq(VFIOPCIDevice *vdev, VFIOMSIVector *vector, + int vector_n, bool msix); +void vfio_pci_prepare_kvm_msi_virq_batch(VFIOPCIDevice *vdev); +void vfio_pci_commit_kvm_msi_virq_batch(VFIOPCIDevice *vdev); +bool vfio_pci_intx_enable(VFIOPCIDevice *vdev, Error **errp); +void vfio_pci_msix_set_notifiers(VFIOPCIDevice *vdev); +void vfio_pci_msi_set_handler(VFIOPCIDevice *vdev, int nr); + uint32_t vfio_pci_read_config(PCIDevice *pdev, uint32_t addr, int len); void vfio_pci_write_config(PCIDevice *pdev, uint32_t addr, uint32_t val, int len); @@ -209,6 +219,7 @@ void vfio_pci_write_config(PCIDevice *pdev, uint64_t vfio_vga_read(void *opaque, hwaddr addr, unsigned size); void vfio_vga_write(void *opaque, hwaddr addr, uint64_t data, unsigned size); +void vfio_sub_page_bar_update_mappings(VFIOPCIDevice *vdev); bool vfio_opt_rom_in_denylist(VFIOPCIDevice *vdev); bool vfio_config_quirk_setup(VFIOPCIDevice *vdev, Error **errp); void vfio_vga_quirk_setup(VFIOPCIDevice *vdev); @@ -240,4 +251,16 @@ void vfio_display_finalize(VFIOPCIDevice *vdev); extern const VMStateDescription vfio_display_vmstate; +void vfio_pci_bars_exit(VFIOPCIDevice *vdev); +bool vfio_pci_add_capabilities(VFIOPCIDevice *vdev, Error **errp); +void vfio_pci_config_register_vga(VFIOPCIDevice *vdev); +bool vfio_pci_config_setup(VFIOPCIDevice *vdev, Error **errp); +bool vfio_pci_interrupt_setup(VFIOPCIDevice *vdev, Error **errp); +void vfio_pci_intx_eoi(VFIODevice *vbasedev); +void vfio_pci_put_device(VFIOPCIDevice *vdev); +bool vfio_pci_populate_device(VFIOPCIDevice *vdev, Error **errp); +void vfio_pci_register_err_notifier(VFIOPCIDevice *vdev); +void vfio_pci_register_req_notifier(VFIOPCIDevice *vdev); +void vfio_pci_teardown_msi(VFIOPCIDevice *vdev); + #endif /* HW_VFIO_VFIO_PCI_H */ diff --git a/hw/vfio/platform.c b/hw/vfio/platform.c index ffb3681..5c1795a 100644 --- a/hw/vfio/platform.c +++ b/hw/vfio/platform.c @@ -474,10 +474,10 @@ static bool vfio_populate_device(VFIODevice *vbasedev, Error **errp) QSIMPLEQ_INIT(&vdev->pending_intp_queue); for (i = 0; i < vbasedev->num_irqs; i++) { - struct vfio_irq_info irq = { .argsz = sizeof(irq) }; + struct vfio_irq_info irq; + + ret = vfio_device_get_irq_info(vbasedev, i, &irq); - irq.index = i; - ret = ioctl(vbasedev->fd, VFIO_DEVICE_GET_IRQ_INFO, &irq); if (ret) { error_setg_errno(errp, -ret, "failed to get device irq info"); goto irq_err; @@ -530,7 +530,7 @@ static bool vfio_base_device_init(VFIODevice *vbasedev, Error **errp) { /* @fd takes precedence over @sysfsdev which takes precedence over @host */ if (vbasedev->fd < 0 && vbasedev->sysfsdev) { - g_free(vbasedev->name); + vfio_device_free_name(vbasedev); vbasedev->name = g_path_get_basename(vbasedev->sysfsdev); } else if (vbasedev->fd < 0) { if (!vbasedev->name || strchr(vbasedev->name, '/')) { diff --git a/hw/vfio/region.c b/hw/vfio/region.c index 04bf9eb..d04c57d 100644 --- a/hw/vfio/region.c +++ b/hw/vfio/region.c @@ -45,6 +45,7 @@ void vfio_region_write(void *opaque, hwaddr addr, uint32_t dword; uint64_t qword; } buf; + int ret; switch (size) { case 1: @@ -64,11 +65,13 @@ void vfio_region_write(void *opaque, hwaddr addr, break; } - if (pwrite(vbasedev->fd, &buf, size, region->fd_offset + addr) != size) { + ret = vbasedev->io_ops->region_write(vbasedev, region->nr, + addr, size, &buf, region->post_wr); + if (ret != size) { error_report("%s(%s:region%d+0x%"HWADDR_PRIx", 0x%"PRIx64 - ",%d) failed: %m", + ",%d) failed: %s", __func__, vbasedev->name, region->nr, - addr, data, size); + addr, data, size, strwriteerror(ret)); } trace_vfio_region_write(vbasedev->name, region->nr, addr, data, size); @@ -96,11 +99,13 @@ uint64_t vfio_region_read(void *opaque, uint64_t qword; } buf; uint64_t data = 0; + int ret; - if (pread(vbasedev->fd, &buf, size, region->fd_offset + addr) != size) { - error_report("%s(%s:region%d+0x%"HWADDR_PRIx", %d) failed: %m", + ret = vbasedev->io_ops->region_read(vbasedev, region->nr, addr, size, &buf); + if (ret != size) { + error_report("%s(%s:region%d+0x%"HWADDR_PRIx", %d) failed: %s", __func__, vbasedev->name, region->nr, - addr, size); + addr, size, strreaderror(ret)); return (uint64_t)-1; } switch (size) { @@ -182,7 +187,7 @@ static int vfio_setup_region_sparse_mmaps(VFIORegion *region, int vfio_region_setup(Object *obj, VFIODevice *vbasedev, VFIORegion *region, int index, const char *name) { - g_autofree struct vfio_region_info *info = NULL; + struct vfio_region_info *info = NULL; int ret; ret = vfio_device_get_region_info(vbasedev, index, &info); @@ -195,6 +200,7 @@ int vfio_region_setup(Object *obj, VFIODevice *vbasedev, VFIORegion *region, region->size = info->size; region->fd_offset = info->offset; region->nr = index; + region->post_wr = false; if (region->size) { region->mem = g_new0(MemoryRegion, 1); @@ -236,6 +242,7 @@ int vfio_region_mmap(VFIORegion *region) { int i, ret, prot = 0; char *name; + int fd; if (!region->mem) { return 0; @@ -266,14 +273,15 @@ int vfio_region_mmap(VFIORegion *region) goto no_mmap; } + fd = vfio_device_get_region_fd(region->vbasedev, region->nr); + map_align = (void *)ROUND_UP((uintptr_t)map_base, (uintptr_t)align); munmap(map_base, map_align - map_base); munmap(map_align + region->mmaps[i].size, align - (map_align - map_base)); region->mmaps[i].mmap = mmap(map_align, region->mmaps[i].size, prot, - MAP_SHARED | MAP_FIXED, - region->vbasedev->fd, + MAP_SHARED | MAP_FIXED, fd, region->fd_offset + region->mmaps[i].offset); if (region->mmaps[i].mmap == MAP_FAILED) { diff --git a/hw/vfio/trace-events b/hw/vfio/trace-events index e90ec9b..fc6ed23 100644 --- a/hw/vfio/trace-events +++ b/hw/vfio/trace-events @@ -1,8 +1,10 @@ # See docs/devel/tracing.rst for syntax documentation. +# +# SPDX-License-Identifier: GPL-2.0-or-later # pci.c vfio_intx_interrupt(const char *name, char line) " (%s) Pin %c" -vfio_intx_eoi(const char *name) " (%s) EOI" +vfio_pci_intx_eoi(const char *name) " (%s) EOI" vfio_intx_enable_kvm(const char *name) " (%s) KVM INTx accel enabled" vfio_intx_disable_kvm(const char *name) " (%s) KVM INTx accel disabled" vfio_intx_update(const char *name, int new_irq, int target_irq) " (%s) IRQ moved %d -> %d" @@ -35,8 +37,8 @@ vfio_pci_hot_reset(const char *name, const char *type) " (%s) %s" vfio_pci_hot_reset_has_dep_devices(const char *name) "%s: hot reset dependent devices:" vfio_pci_hot_reset_dep_devices(int domain, int bus, int slot, int function, int group_id) "\t%04x:%02x:%02x.%x group %d" vfio_pci_hot_reset_result(const char *name, const char *result) "%s hot reset: %s" -vfio_populate_device_config(const char *name, unsigned long size, unsigned long offset, unsigned long flags) "Device '%s' config: size: 0x%lx, offset: 0x%lx, flags: 0x%lx" -vfio_populate_device_get_irq_info_failure(const char *errstr) "VFIO_DEVICE_GET_IRQ_INFO failure: %s" +vfio_pci_populate_device_config(const char *name, unsigned long size, unsigned long offset, unsigned long flags) "Device '%s' config: size: 0x%lx, offset: 0x%lx, flags: 0x%lx" +vfio_pci_populate_device_get_irq_info_failure(const char *errstr) "VFIO_DEVICE_GET_IRQ_INFO failure: %s" vfio_mdev(const char *name, bool is_mdev) " (%s) is_mdev %d" vfio_add_ext_cap_dropped(const char *name, uint16_t cap, uint16_t offset) "%s 0x%x@0x%x" vfio_pci_reset(const char *name) " (%s)" @@ -46,6 +48,7 @@ vfio_pci_emulated_vendor_id(const char *name, uint16_t val) "%s 0x%04x" vfio_pci_emulated_device_id(const char *name, uint16_t val) "%s 0x%04x" vfio_pci_emulated_sub_vendor_id(const char *name, uint16_t val) "%s 0x%04x" vfio_pci_emulated_sub_device_id(const char *name, uint16_t val) "%s 0x%04x" +vfio_pci_emulated_class_code(const char *name, uint32_t val) "%s 0x%06x" # pci-quirks.c vfio_quirk_rom_in_denylist(const char *name, uint16_t vid, uint16_t did) "%s %04x:%04x" @@ -195,6 +198,9 @@ iommufd_cdev_alloc_ioas(int iommufd, int ioas_id) " [iommufd=%d] new IOMMUFD con iommufd_cdev_device_info(char *name, int devfd, int num_irqs, int num_regions, int flags) " %s (%d) num_irqs=%d num_regions=%d flags=%d" iommufd_cdev_pci_hot_reset_dep_devices(int domain, int bus, int slot, int function, int dev_id) "\t%04x:%02x:%02x.%x devid %d" +# cpr-iommufd.c +vfio_cpr_find_device(uint32_t ioas_id, int devid, uint32_t hwpt_id) "ioas_id %u, devid %d, hwpt_id %u" + # device.c vfio_device_get_region_info_type(const char *name, int index, uint32_t type, uint32_t subtype) "%s index %d, %08x/%08x" vfio_device_reset_handler(void) "" diff --git a/hw/vfio/trace.h b/hw/vfio/trace.h index 5a343aa..b34b61d 100644 --- a/hw/vfio/trace.h +++ b/hw/vfio/trace.h @@ -1 +1,4 @@ +/* + * SPDX-License-Identifier: GPL-2.0-or-later + */ #include "trace/trace-hw_vfio.h" diff --git a/hw/vfio/types.h b/hw/vfio/types.h new file mode 100644 index 0000000..c19334f --- /dev/null +++ b/hw/vfio/types.h @@ -0,0 +1,23 @@ +/* + * VFIO types definition + * + * Copyright Red Hat, Inc. 2025 + * + * SPDX-License-Identifier: GPL-2.0-or-later + */ +#ifndef HW_VFIO_VFIO_TYPES_H +#define HW_VFIO_VFIO_TYPES_H + +/* + * TYPE_VFIO_PCI_BASE is an abstract type used to share code + * between VFIO implementations that use a kernel driver + * with those that use user sockets. + */ +#define TYPE_VFIO_PCI_BASE "vfio-pci-base" + +#define TYPE_VFIO_PCI "vfio-pci" +/* TYPE_VFIO_PCI shares struct VFIOPCIDevice. */ + +#define TYPE_VFIO_PCI_NOHOTPLUG "vfio-pci-nohotplug" + +#endif /* HW_VFIO_VFIO_TYPES_H */ diff --git a/hw/vfio/vfio-cpr.h b/hw/vfio/vfio-cpr.h deleted file mode 100644 index 134b83a..0000000 --- a/hw/vfio/vfio-cpr.h +++ /dev/null @@ -1,15 +0,0 @@ -/* - * VFIO CPR - * - * Copyright (c) 2025 Oracle and/or its affiliates. - * - * SPDX-License-Identifier: GPL-2.0-or-later - */ - -#ifndef HW_VFIO_CPR_H -#define HW_VFIO_CPR_H - -bool vfio_cpr_register_container(VFIOContainerBase *bcontainer, Error **errp); -void vfio_cpr_unregister_container(VFIOContainerBase *bcontainer); - -#endif /* HW_VFIO_CPR_H */ diff --git a/hw/vfio/vfio-helpers.h b/hw/vfio/vfio-helpers.h index 54a327f..ce31758 100644 --- a/hw/vfio/vfio-helpers.h +++ b/hw/vfio/vfio-helpers.h @@ -32,4 +32,6 @@ struct vfio_device_info *vfio_get_device_info(int fd); int vfio_kvm_device_add_fd(int fd, Error **errp); int vfio_kvm_device_del_fd(int fd, Error **errp); +bool vfio_arch_wants_loading_config_after_iter(void); + #endif /* HW_VFIO_VFIO_HELPERS_H */ diff --git a/hw/vfio/vfio-migration-internal.h b/hw/vfio/vfio-migration-internal.h index a8b456b..814fbd9 100644 --- a/hw/vfio/vfio-migration-internal.h +++ b/hw/vfio/vfio-migration-internal.h @@ -13,7 +13,6 @@ #include <linux/vfio.h> #endif -#include "qemu/typedefs.h" #include "qemu/notify.h" /* @@ -32,6 +31,7 @@ #define VFIO_MIG_FLAG_DEV_SETUP_STATE (0xffffffffef100003ULL) #define VFIO_MIG_FLAG_DEV_DATA_STATE (0xffffffffef100004ULL) #define VFIO_MIG_FLAG_DEV_INIT_DATA_SENT (0xffffffffef100005ULL) +#define VFIO_MIG_FLAG_DEV_CONFIG_LOAD_READY (0xffffffffef100006ULL) typedef struct VFIODevice VFIODevice; typedef struct VFIOMultifd VFIOMultifd; diff --git a/hw/virtio/meson.build b/hw/virtio/meson.build index 164f6fd..3ea7b3c 100644 --- a/hw/virtio/meson.build +++ b/hw/virtio/meson.build @@ -1,6 +1,7 @@ system_virtio_ss = ss.source_set() system_virtio_ss.add(files('virtio-bus.c')) system_virtio_ss.add(files('iothread-vq-mapping.c')) +system_virtio_ss.add(files('virtio-config-io.c')) system_virtio_ss.add(when: 'CONFIG_VIRTIO_PCI', if_true: files('virtio-pci.c')) system_virtio_ss.add(when: 'CONFIG_VIRTIO_MMIO', if_true: files('virtio-mmio.c')) system_virtio_ss.add(when: 'CONFIG_VIRTIO_CRYPTO', if_true: files('virtio-crypto.c')) @@ -10,11 +11,11 @@ system_virtio_ss.add(when: 'CONFIG_VHOST_VDPA_DEV', if_true: files('vdpa-dev.c') specific_virtio_ss = ss.source_set() specific_virtio_ss.add(files('virtio.c')) -specific_virtio_ss.add(files('virtio-config-io.c', 'virtio-qmp.c')) +specific_virtio_ss.add(files('virtio-qmp.c')) if have_vhost system_virtio_ss.add(files('vhost.c')) - specific_virtio_ss.add(files('vhost-backend.c', 'vhost-iova-tree.c')) + system_virtio_ss.add(files('vhost-backend.c', 'vhost-iova-tree.c')) if have_vhost_user # fixme - this really should be generic specific_virtio_ss.add(files('vhost-user.c')) @@ -43,22 +44,22 @@ if have_vhost endif if have_vhost_vdpa system_virtio_ss.add(files('vhost-vdpa.c')) - specific_virtio_ss.add(files('vhost-shadow-virtqueue.c')) + system_virtio_ss.add(files('vhost-shadow-virtqueue.c')) endif else system_virtio_ss.add(files('vhost-stub.c')) endif +system_virtio_ss.add(when: 'CONFIG_VHOST_USER_VSOCK', if_true: files('vhost-user-vsock.c')) +system_virtio_ss.add(when: 'CONFIG_VIRTIO_RNG', if_true: files('virtio-rng.c')) specific_virtio_ss.add(when: 'CONFIG_VIRTIO_BALLOON', if_true: files('virtio-balloon.c')) specific_virtio_ss.add(when: 'CONFIG_VHOST_USER_FS', if_true: files('vhost-user-fs.c')) specific_virtio_ss.add(when: 'CONFIG_VIRTIO_PMEM', if_true: files('virtio-pmem.c')) specific_virtio_ss.add(when: 'CONFIG_VHOST_VSOCK', if_true: files('vhost-vsock.c')) -specific_virtio_ss.add(when: 'CONFIG_VHOST_USER_VSOCK', if_true: files('vhost-user-vsock.c')) -specific_virtio_ss.add(when: 'CONFIG_VIRTIO_RNG', if_true: files('virtio-rng.c')) -specific_virtio_ss.add(when: 'CONFIG_VIRTIO_NSM', if_true: [files('virtio-nsm.c', 'cbor-helpers.c'), libcbor]) specific_virtio_ss.add(when: 'CONFIG_VIRTIO_MEM', if_true: files('virtio-mem.c')) -specific_virtio_ss.add(when: 'CONFIG_VHOST_USER_SCMI', if_true: files('vhost-user-scmi.c')) -specific_virtio_ss.add(when: ['CONFIG_VIRTIO_PCI', 'CONFIG_VHOST_USER_SCMI'], if_true: files('vhost-user-scmi-pci.c')) +system_virtio_ss.add(when: 'CONFIG_VIRTIO_NSM', if_true: files('virtio-nsm.c')) +system_virtio_ss.add(when: 'CONFIG_VIRTIO_NSM', if_true: [files('cbor-helpers.c'), libcbor]) +system_virtio_ss.add(when: 'CONFIG_VHOST_USER_SCMI', if_true: files('vhost-user-scmi.c')) virtio_pci_ss = ss.source_set() virtio_pci_ss.add(when: 'CONFIG_VHOST_VSOCK', if_true: files('vhost-vsock-pci.c')) @@ -67,6 +68,7 @@ virtio_pci_ss.add(when: 'CONFIG_VHOST_USER_BLK', if_true: files('vhost-user-blk- virtio_pci_ss.add(when: 'CONFIG_VHOST_USER_SCSI', if_true: files('vhost-user-scsi-pci.c')) virtio_pci_ss.add(when: 'CONFIG_VHOST_SCSI', if_true: files('vhost-scsi-pci.c')) virtio_pci_ss.add(when: 'CONFIG_VHOST_USER_FS', if_true: files('vhost-user-fs-pci.c')) +virtio_pci_ss.add(when: 'CONFIG_VHOST_USER_SCMI', if_true: files('vhost-user-scmi-pci.c')) virtio_pci_ss.add(when: 'CONFIG_VIRTIO_CRYPTO', if_true: files('virtio-crypto-pci.c')) virtio_pci_ss.add(when: 'CONFIG_VIRTIO_INPUT_HOST', if_true: files('virtio-input-host-pci.c')) @@ -85,7 +87,7 @@ virtio_pci_ss.add(when: 'CONFIG_VIRTIO_MEM', if_true: files('virtio-mem-pci.c')) virtio_pci_ss.add(when: 'CONFIG_VHOST_VDPA_DEV', if_true: files('vdpa-dev-pci.c')) virtio_pci_ss.add(when: 'CONFIG_VIRTIO_MD', if_true: files('virtio-md-pci.c')) -specific_virtio_ss.add_all(when: 'CONFIG_VIRTIO_PCI', if_true: virtio_pci_ss) +system_virtio_ss.add_all(when: 'CONFIG_VIRTIO_PCI', if_true: virtio_pci_ss) system_ss.add_all(when: 'CONFIG_VIRTIO', if_true: system_virtio_ss) system_ss.add(when: 'CONFIG_VIRTIO', if_false: files('vhost-stub.c')) diff --git a/hw/virtio/vdpa-dev.c b/hw/virtio/vdpa-dev.c index dd8837c..d1da40a 100644 --- a/hw/virtio/vdpa-dev.c +++ b/hw/virtio/vdpa-dev.c @@ -312,7 +312,7 @@ static void vhost_vdpa_device_stop(VirtIODevice *vdev) vhost_dev_disable_notifiers(&s->dev, vdev); } -static void vhost_vdpa_device_set_status(VirtIODevice *vdev, uint8_t status) +static int vhost_vdpa_device_set_status(VirtIODevice *vdev, uint8_t status) { VhostVdpaDevice *s = VHOST_VDPA_DEVICE(vdev); bool should_start = virtio_device_started(vdev, status); @@ -324,7 +324,7 @@ static void vhost_vdpa_device_set_status(VirtIODevice *vdev, uint8_t status) } if (s->started == should_start) { - return; + return 0; } if (should_start) { @@ -335,6 +335,7 @@ static void vhost_vdpa_device_set_status(VirtIODevice *vdev, uint8_t status) } else { vhost_vdpa_device_stop(vdev); } + return 0; } static const Property vhost_vdpa_device_properties[] = { diff --git a/hw/virtio/vhost-user-base.c b/hw/virtio/vhost-user-base.c index 7714332..ff67a02 100644 --- a/hw/virtio/vhost-user-base.c +++ b/hw/virtio/vhost-user-base.c @@ -66,7 +66,7 @@ err_host_notifiers: vhost_dev_disable_notifiers(&vub->vhost_dev, vdev); } -static void vub_stop(VirtIODevice *vdev) +static int vub_stop(VirtIODevice *vdev) { VHostUserBase *vub = VHOST_USER_BASE(vdev); BusState *qbus = BUS(qdev_get_parent_bus(DEVICE(vdev))); @@ -74,34 +74,39 @@ static void vub_stop(VirtIODevice *vdev) int ret; if (!k->set_guest_notifiers) { - return; + return 0; } - vhost_dev_stop(&vub->vhost_dev, vdev, true); + ret = vhost_dev_stop(&vub->vhost_dev, vdev, true); - ret = k->set_guest_notifiers(qbus->parent, vub->vhost_dev.nvqs, false); - if (ret < 0) { + if (k->set_guest_notifiers(qbus->parent, vub->vhost_dev.nvqs, false) < 0) { error_report("vhost guest notifier cleanup failed: %d", ret); - return; + return -1; } vhost_dev_disable_notifiers(&vub->vhost_dev, vdev); + return ret; } -static void vub_set_status(VirtIODevice *vdev, uint8_t status) +static int vub_set_status(VirtIODevice *vdev, uint8_t status) { VHostUserBase *vub = VHOST_USER_BASE(vdev); bool should_start = virtio_device_should_start(vdev, status); if (vhost_dev_is_started(&vub->vhost_dev) == should_start) { - return; + return 0; } if (should_start) { vub_start(vdev); } else { - vub_stop(vdev); + int ret; + ret = vub_stop(vdev); + if (ret < 0) { + return ret; + } } + return 0; } /* diff --git a/hw/virtio/vhost-user-fs.c b/hw/virtio/vhost-user-fs.c index f6d1fc8..e77c69e 100644 --- a/hw/virtio/vhost-user-fs.c +++ b/hw/virtio/vhost-user-fs.c @@ -100,7 +100,7 @@ err_host_notifiers: vhost_dev_disable_notifiers(&fs->vhost_dev, vdev); } -static void vuf_stop(VirtIODevice *vdev) +static int vuf_stop(VirtIODevice *vdev) { VHostUserFS *fs = VHOST_USER_FS(vdev); BusState *qbus = BUS(qdev_get_parent_bus(DEVICE(vdev))); @@ -108,34 +108,39 @@ static void vuf_stop(VirtIODevice *vdev) int ret; if (!k->set_guest_notifiers) { - return; + return 0; } - vhost_dev_stop(&fs->vhost_dev, vdev, true); + ret = vhost_dev_stop(&fs->vhost_dev, vdev, true); - ret = k->set_guest_notifiers(qbus->parent, fs->vhost_dev.nvqs, false); - if (ret < 0) { + if (k->set_guest_notifiers(qbus->parent, fs->vhost_dev.nvqs, false) < 0) { error_report("vhost guest notifier cleanup failed: %d", ret); - return; + return -1; } vhost_dev_disable_notifiers(&fs->vhost_dev, vdev); + return ret; } -static void vuf_set_status(VirtIODevice *vdev, uint8_t status) +static int vuf_set_status(VirtIODevice *vdev, uint8_t status) { VHostUserFS *fs = VHOST_USER_FS(vdev); bool should_start = virtio_device_should_start(vdev, status); if (vhost_dev_is_started(&fs->vhost_dev) == should_start) { - return; + return 0; } if (should_start) { vuf_start(vdev); } else { - vuf_stop(vdev); + int ret; + ret = vuf_stop(vdev); + if (ret < 0) { + return ret; + } } + return 0; } static uint64_t vuf_get_features(VirtIODevice *vdev, diff --git a/hw/virtio/vhost-user-scmi.c b/hw/virtio/vhost-user-scmi.c index 7a0f622..f9264c4 100644 --- a/hw/virtio/vhost-user-scmi.c +++ b/hw/virtio/vhost-user-scmi.c @@ -83,7 +83,7 @@ err_host_notifiers: return ret; } -static void vu_scmi_stop(VirtIODevice *vdev) +static int vu_scmi_stop(VirtIODevice *vdev) { VHostUserSCMI *scmi = VHOST_USER_SCMI(vdev); BusState *qbus = BUS(qdev_get_parent_bus(DEVICE(vdev))); @@ -93,41 +93,46 @@ static void vu_scmi_stop(VirtIODevice *vdev) /* vhost_dev_is_started() check in the callers is not fully reliable. */ if (!scmi->started_vu) { - return; + return 0; } scmi->started_vu = false; if (!k->set_guest_notifiers) { - return; + return 0; } - vhost_dev_stop(vhost_dev, vdev, true); + ret = vhost_dev_stop(vhost_dev, vdev, true); - ret = k->set_guest_notifiers(qbus->parent, vhost_dev->nvqs, false); - if (ret < 0) { + if (k->set_guest_notifiers(qbus->parent, vhost_dev->nvqs, false) < 0) { error_report("vhost guest notifier cleanup failed: %d", ret); - return; + return -1; } vhost_dev_disable_notifiers(vhost_dev, vdev); + return ret; } -static void vu_scmi_set_status(VirtIODevice *vdev, uint8_t status) +static int vu_scmi_set_status(VirtIODevice *vdev, uint8_t status) { VHostUserSCMI *scmi = VHOST_USER_SCMI(vdev); bool should_start = virtio_device_should_start(vdev, status); if (!scmi->connected) { - return; + return -1; } if (vhost_dev_is_started(&scmi->vhost_dev) == should_start) { - return; + return 0; } if (should_start) { vu_scmi_start(vdev); } else { - vu_scmi_stop(vdev); + int ret; + ret = vu_scmi_stop(vdev); + if (ret < 0) { + return ret; + } } + return 0; } static uint64_t vu_scmi_get_features(VirtIODevice *vdev, uint64_t features, diff --git a/hw/virtio/vhost-user-vsock.c b/hw/virtio/vhost-user-vsock.c index 2776792..993c287 100644 --- a/hw/virtio/vhost-user-vsock.c +++ b/hw/virtio/vhost-user-vsock.c @@ -54,23 +54,28 @@ const VhostDevConfigOps vsock_ops = { .vhost_dev_config_notifier = vuv_handle_config_change, }; -static void vuv_set_status(VirtIODevice *vdev, uint8_t status) +static int vuv_set_status(VirtIODevice *vdev, uint8_t status) { VHostVSockCommon *vvc = VHOST_VSOCK_COMMON(vdev); bool should_start = virtio_device_should_start(vdev, status); + int ret; if (vhost_dev_is_started(&vvc->vhost_dev) == should_start) { - return; + return 0; } if (should_start) { - int ret = vhost_vsock_common_start(vdev); + ret = vhost_vsock_common_start(vdev); if (ret < 0) { - return; + return ret; } } else { - vhost_vsock_common_stop(vdev); + ret = vhost_vsock_common_stop(vdev); + if (ret < 0) { + return ret; + } } + return 0; } static uint64_t vuv_get_features(VirtIODevice *vdev, diff --git a/hw/virtio/vhost-vdpa.c b/hw/virtio/vhost-vdpa.c index 1ab2c11..7061b6e 100644 --- a/hw/virtio/vhost-vdpa.c +++ b/hw/virtio/vhost-vdpa.c @@ -209,6 +209,8 @@ static void vhost_vdpa_iommu_map_notify(IOMMUNotifier *n, IOMMUTLBEntry *iotlb) int ret; Int128 llend; Error *local_err = NULL; + MemoryRegion *mr; + hwaddr xlat; if (iotlb->target_as != &address_space_memory) { error_report("Wrong target AS \"%s\", only system memory is allowed", @@ -228,11 +230,14 @@ static void vhost_vdpa_iommu_map_notify(IOMMUNotifier *n, IOMMUTLBEntry *iotlb) if ((iotlb->perm & IOMMU_RW) != IOMMU_NONE) { bool read_only; - if (!memory_get_xlat_addr(iotlb, &vaddr, NULL, &read_only, NULL, - &local_err)) { + mr = memory_translate_iotlb(iotlb, &xlat, &local_err); + if (!mr) { error_report_err(local_err); return; } + vaddr = memory_region_get_ram_ptr(mr) + xlat; + read_only = !(iotlb->perm & IOMMU_WO) || mr->readonly; + ret = vhost_vdpa_dma_map(s, VHOST_VDPA_GUEST_PA_ASID, iova, iotlb->addr_mask + 1, vaddr, read_only); if (ret) { @@ -594,6 +599,36 @@ static void vhost_vdpa_init_svq(struct vhost_dev *hdev, struct vhost_vdpa *v) v->shadow_vqs = g_steal_pointer(&shadow_vqs); } +static int vhost_vdpa_set_backend_cap(struct vhost_dev *dev) +{ + struct vhost_vdpa *v = dev->opaque; + + uint64_t features; + uint64_t f = 0x1ULL << VHOST_BACKEND_F_IOTLB_MSG_V2 | + 0x1ULL << VHOST_BACKEND_F_IOTLB_BATCH | + 0x1ULL << VHOST_BACKEND_F_IOTLB_ASID | + 0x1ULL << VHOST_BACKEND_F_SUSPEND; + int r; + + if (vhost_vdpa_call(dev, VHOST_GET_BACKEND_FEATURES, &features)) { + return -EFAULT; + } + + features &= f; + + if (vhost_vdpa_first_dev(dev)) { + r = vhost_vdpa_call(dev, VHOST_SET_BACKEND_FEATURES, &features); + if (r) { + return -EFAULT; + } + } + + dev->backend_cap = features; + v->shared->backend_cap = features; + + return 0; +} + static int vhost_vdpa_init(struct vhost_dev *dev, void *opaque, Error **errp) { struct vhost_vdpa *v = opaque; @@ -603,7 +638,12 @@ static int vhost_vdpa_init(struct vhost_dev *dev, void *opaque, Error **errp) v->dev = dev; dev->opaque = opaque ; - v->shared->listener = vhost_vdpa_memory_listener; + + ret = vhost_vdpa_set_backend_cap(dev); + if (unlikely(ret != 0)) { + return ret; + } + vhost_vdpa_init_svq(dev, v); error_propagate(&dev->migration_blocker, v->migration_blocker); @@ -639,6 +679,7 @@ static int vhost_vdpa_init(struct vhost_dev *dev, void *opaque, Error **errp) vhost_vdpa_add_status(dev, VIRTIO_CONFIG_S_ACKNOWLEDGE | VIRTIO_CONFIG_S_DRIVER); + v->shared->listener = vhost_vdpa_memory_listener; return 0; } @@ -841,36 +882,6 @@ static int vhost_vdpa_set_features(struct vhost_dev *dev, return vhost_vdpa_add_status(dev, VIRTIO_CONFIG_S_FEATURES_OK); } -static int vhost_vdpa_set_backend_cap(struct vhost_dev *dev) -{ - struct vhost_vdpa *v = dev->opaque; - - uint64_t features; - uint64_t f = 0x1ULL << VHOST_BACKEND_F_IOTLB_MSG_V2 | - 0x1ULL << VHOST_BACKEND_F_IOTLB_BATCH | - 0x1ULL << VHOST_BACKEND_F_IOTLB_ASID | - 0x1ULL << VHOST_BACKEND_F_SUSPEND; - int r; - - if (vhost_vdpa_call(dev, VHOST_GET_BACKEND_FEATURES, &features)) { - return -EFAULT; - } - - features &= f; - - if (vhost_vdpa_first_dev(dev)) { - r = vhost_vdpa_call(dev, VHOST_SET_BACKEND_FEATURES, &features); - if (r) { - return -EFAULT; - } - } - - dev->backend_cap = features; - v->shared->backend_cap = features; - - return 0; -} - static int vhost_vdpa_get_device_id(struct vhost_dev *dev, uint32_t *device_id) { @@ -888,8 +899,14 @@ static int vhost_vdpa_reset_device(struct vhost_dev *dev) ret = vhost_vdpa_call(dev, VHOST_VDPA_SET_STATUS, &status); trace_vhost_vdpa_reset_device(dev); + if (ret) { + return ret; + } + + memory_listener_unregister(&v->shared->listener); + v->shared->listener_registered = false; v->suspended = false; - return ret; + return 0; } static int vhost_vdpa_get_vq_index(struct vhost_dev *dev, int idx) @@ -1373,7 +1390,15 @@ static int vhost_vdpa_dev_start(struct vhost_dev *dev, bool started) "IOMMU and try again"); return -1; } - memory_listener_register(&v->shared->listener, dev->vdev->dma_as); + if (v->shared->listener_registered && + dev->vdev->dma_as != v->shared->listener.address_space) { + memory_listener_unregister(&v->shared->listener); + v->shared->listener_registered = false; + } + if (!v->shared->listener_registered) { + memory_listener_register(&v->shared->listener, dev->vdev->dma_as); + v->shared->listener_registered = true; + } return vhost_vdpa_add_status(dev, VIRTIO_CONFIG_S_DRIVER_OK); } @@ -1383,8 +1408,6 @@ static int vhost_vdpa_dev_start(struct vhost_dev *dev, bool started) static void vhost_vdpa_reset_status(struct vhost_dev *dev) { - struct vhost_vdpa *v = dev->opaque; - if (!vhost_vdpa_last_dev(dev)) { return; } @@ -1392,7 +1415,6 @@ static void vhost_vdpa_reset_status(struct vhost_dev *dev) vhost_vdpa_reset_device(dev); vhost_vdpa_add_status(dev, VIRTIO_CONFIG_S_ACKNOWLEDGE | VIRTIO_CONFIG_S_DRIVER); - memory_listener_unregister(&v->shared->listener); } static int vhost_vdpa_set_log_base(struct vhost_dev *dev, uint64_t base, @@ -1526,12 +1548,27 @@ static int vhost_vdpa_get_features(struct vhost_dev *dev, static int vhost_vdpa_set_owner(struct vhost_dev *dev) { + int r; + struct vhost_vdpa *v; + if (!vhost_vdpa_first_dev(dev)) { return 0; } trace_vhost_vdpa_set_owner(dev); - return vhost_vdpa_call(dev, VHOST_SET_OWNER, NULL); + r = vhost_vdpa_call(dev, VHOST_SET_OWNER, NULL); + if (unlikely(r < 0)) { + return r; + } + + /* + * Being optimistic and listening address space memory. If the device + * uses vIOMMU, it is changed at vhost_vdpa_dev_start. + */ + v = dev->opaque; + memory_listener_register(&v->shared->listener, &address_space_memory); + v->shared->listener_registered = true; + return 0; } static int vhost_vdpa_vq_get_addr(struct vhost_dev *dev, @@ -1563,7 +1600,6 @@ const VhostOps vdpa_ops = { .vhost_set_vring_kick = vhost_vdpa_set_vring_kick, .vhost_set_vring_call = vhost_vdpa_set_vring_call, .vhost_get_features = vhost_vdpa_get_features, - .vhost_set_backend_cap = vhost_vdpa_set_backend_cap, .vhost_set_owner = vhost_vdpa_set_owner, .vhost_set_vring_endian = NULL, .vhost_backend_memslots_limit = vhost_vdpa_memslots_limit, diff --git a/hw/virtio/vhost-vsock-common.c b/hw/virtio/vhost-vsock-common.c index 4b4fbb4..c6c44d8 100644 --- a/hw/virtio/vhost-vsock-common.c +++ b/hw/virtio/vhost-vsock-common.c @@ -95,7 +95,7 @@ err_host_notifiers: return ret; } -void vhost_vsock_common_stop(VirtIODevice *vdev) +int vhost_vsock_common_stop(VirtIODevice *vdev) { VHostVSockCommon *vvc = VHOST_VSOCK_COMMON(vdev); BusState *qbus = BUS(qdev_get_parent_bus(DEVICE(vdev))); @@ -103,18 +103,18 @@ void vhost_vsock_common_stop(VirtIODevice *vdev) int ret; if (!k->set_guest_notifiers) { - return; + return 0; } - vhost_dev_stop(&vvc->vhost_dev, vdev, true); + ret = vhost_dev_stop(&vvc->vhost_dev, vdev, true); - ret = k->set_guest_notifiers(qbus->parent, vvc->vhost_dev.nvqs, false); - if (ret < 0) { + if (k->set_guest_notifiers(qbus->parent, vvc->vhost_dev.nvqs, false) < 0) { error_report("vhost guest notifier cleanup failed: %d", ret); - return; + return -1; } vhost_dev_disable_notifiers(&vvc->vhost_dev, vdev); + return ret; } diff --git a/hw/virtio/vhost-vsock.c b/hw/virtio/vhost-vsock.c index b73dc72..6e40888 100644 --- a/hw/virtio/vhost-vsock.c +++ b/hw/virtio/vhost-vsock.c @@ -67,37 +67,38 @@ static int vhost_vsock_set_running(VirtIODevice *vdev, int start) } -static void vhost_vsock_set_status(VirtIODevice *vdev, uint8_t status) +static int vhost_vsock_set_status(VirtIODevice *vdev, uint8_t status) { VHostVSockCommon *vvc = VHOST_VSOCK_COMMON(vdev); bool should_start = virtio_device_should_start(vdev, status); int ret; if (vhost_dev_is_started(&vvc->vhost_dev) == should_start) { - return; + return 0; } if (should_start) { ret = vhost_vsock_common_start(vdev); if (ret < 0) { - return; + return 0; } ret = vhost_vsock_set_running(vdev, 1); if (ret < 0) { vhost_vsock_common_stop(vdev); error_report("Error starting vhost vsock: %d", -ret); - return; + return 0; } } else { ret = vhost_vsock_set_running(vdev, 0); if (ret < 0) { error_report("vhost vsock set running failed: %d", ret); - return; + return 0; } vhost_vsock_common_stop(vdev); } + return 0; } static uint64_t vhost_vsock_get_features(VirtIODevice *vdev, diff --git a/hw/virtio/vhost.c b/hw/virtio/vhost.c index 4cae7c1..6557c58 100644 --- a/hw/virtio/vhost.c +++ b/hw/virtio/vhost.c @@ -47,12 +47,6 @@ static struct vhost_log *vhost_log[VHOST_BACKEND_TYPE_MAX]; static struct vhost_log *vhost_log_shm[VHOST_BACKEND_TYPE_MAX]; static QLIST_HEAD(, vhost_dev) vhost_log_devs[VHOST_BACKEND_TYPE_MAX]; -/* Memslots used by backends that support private memslots (without an fd). */ -static unsigned int used_memslots; - -/* Memslots used by backends that only support shared memslots (with an fd). */ -static unsigned int used_shared_memslots; - static QLIST_HEAD(, vhost_dev) vhost_devices = QLIST_HEAD_INITIALIZER(vhost_devices); @@ -74,15 +68,15 @@ unsigned int vhost_get_free_memslots(void) QLIST_FOREACH(hdev, &vhost_devices, entry) { unsigned int r = hdev->vhost_ops->vhost_backend_memslots_limit(hdev); - unsigned int cur_free; + unsigned int cur_free = r - hdev->mem->nregions; - if (hdev->vhost_ops->vhost_backend_no_private_memslots && - hdev->vhost_ops->vhost_backend_no_private_memslots(hdev)) { - cur_free = r - used_shared_memslots; + if (unlikely(r < hdev->mem->nregions)) { + warn_report_once("used (%u) vhost backend memory slots exceed" + " the device limit (%u).", hdev->mem->nregions, r); + free = 0; } else { - cur_free = r - used_memslots; + free = MIN(free, cur_free); } - free = MIN(free, cur_free); } return free; } @@ -666,13 +660,6 @@ static void vhost_commit(MemoryListener *listener) dev->mem = g_realloc(dev->mem, regions_size); dev->mem->nregions = dev->n_mem_sections; - if (dev->vhost_ops->vhost_backend_no_private_memslots && - dev->vhost_ops->vhost_backend_no_private_memslots(dev)) { - used_shared_memslots = dev->mem->nregions; - } else { - used_memslots = dev->mem->nregions; - } - for (i = 0; i < dev->n_mem_sections; i++) { struct vhost_memory_region *cur_vmr = dev->mem->regions + i; struct MemoryRegionSection *mrs = dev->mem_sections + i; @@ -1123,7 +1110,8 @@ static bool vhost_log_global_start(MemoryListener *listener, Error **errp) r = vhost_migration_log(listener, true); if (r < 0) { - abort(); + error_setg_errno(errp, -r, "vhost: Failed to start logging"); + return false; } return true; } @@ -1134,7 +1122,8 @@ static void vhost_log_global_stop(MemoryListener *listener) r = vhost_migration_log(listener, false); if (r < 0) { - abort(); + /* Not fatal, so report it, but take no further action */ + warn_report("vhost: Failed to stop logging"); } } @@ -1367,25 +1356,30 @@ fail_alloc_desc: return r; } -void vhost_virtqueue_stop(struct vhost_dev *dev, - struct VirtIODevice *vdev, - struct vhost_virtqueue *vq, - unsigned idx) +static int do_vhost_virtqueue_stop(struct vhost_dev *dev, + struct VirtIODevice *vdev, + struct vhost_virtqueue *vq, + unsigned idx, bool force) { int vhost_vq_index = dev->vhost_ops->vhost_get_vq_index(dev, idx); struct vhost_vring_state state = { .index = vhost_vq_index, }; - int r; + int r = 0; if (virtio_queue_get_desc_addr(vdev, idx) == 0) { /* Don't stop the virtqueue which might have not been started */ - return; + return 0; } - r = dev->vhost_ops->vhost_get_vring_base(dev, &state); - if (r < 0) { - VHOST_OPS_DEBUG(r, "vhost VQ %u ring restore failed: %d", idx, r); + if (!force) { + r = dev->vhost_ops->vhost_get_vring_base(dev, &state); + if (r < 0) { + VHOST_OPS_DEBUG(r, "vhost VQ %u ring restore failed: %d", idx, r); + } + } + + if (r < 0 || force) { /* Connection to the backend is broken, so let's sync internal * last avail idx to the device used idx. */ @@ -1411,6 +1405,15 @@ void vhost_virtqueue_stop(struct vhost_dev *dev, 0, virtio_queue_get_avail_size(vdev, idx)); vhost_memory_unmap(dev, vq->desc, virtio_queue_get_desc_size(vdev, idx), 0, virtio_queue_get_desc_size(vdev, idx)); + return r; +} + +int vhost_virtqueue_stop(struct vhost_dev *dev, + struct VirtIODevice *vdev, + struct vhost_virtqueue *vq, + unsigned idx) +{ + return do_vhost_virtqueue_stop(dev, vdev, vq, idx, false); } static int vhost_virtqueue_set_busyloop_timeout(struct vhost_dev *dev, @@ -1618,15 +1621,11 @@ int vhost_dev_init(struct vhost_dev *hdev, void *opaque, QLIST_INSERT_HEAD(&vhost_devices, hdev, entry); /* - * The listener we registered properly updated the corresponding counter. - * So we can trust that these values are accurate. + * The listener we registered properly setup the number of required + * memslots in vhost_commit(). */ - if (hdev->vhost_ops->vhost_backend_no_private_memslots && - hdev->vhost_ops->vhost_backend_no_private_memslots(hdev)) { - used = used_shared_memslots; - } else { - used = used_memslots; - } + used = hdev->mem->nregions; + /* * We assume that all reserved memslots actually require a real memslot * in our vhost backend. This might not be true, for example, if the @@ -2135,9 +2134,11 @@ fail_features: } /* Host notifiers must be enabled at this point. */ -void vhost_dev_stop(struct vhost_dev *hdev, VirtIODevice *vdev, bool vrings) +static int do_vhost_dev_stop(struct vhost_dev *hdev, VirtIODevice *vdev, + bool vrings, bool force) { int i; + int rc = 0; /* should only be called after backend is connected */ assert(hdev->vhost_ops); @@ -2156,10 +2157,11 @@ void vhost_dev_stop(struct vhost_dev *hdev, VirtIODevice *vdev, bool vrings) vhost_dev_set_vring_enable(hdev, false); } for (i = 0; i < hdev->nvqs; ++i) { - vhost_virtqueue_stop(hdev, - vdev, - hdev->vqs + i, - hdev->vq_index + i); + rc |= do_vhost_virtqueue_stop(hdev, + vdev, + hdev->vqs + i, + hdev->vq_index + i, + force); } if (hdev->vhost_ops->vhost_reset_status) { hdev->vhost_ops->vhost_reset_status(hdev); @@ -2176,6 +2178,18 @@ void vhost_dev_stop(struct vhost_dev *hdev, VirtIODevice *vdev, bool vrings) hdev->started = false; vdev->vhost_started = false; hdev->vdev = NULL; + return rc; +} + +int vhost_dev_stop(struct vhost_dev *hdev, VirtIODevice *vdev, bool vrings) +{ + return do_vhost_dev_stop(hdev, vdev, vrings, false); +} + +int vhost_dev_force_stop(struct vhost_dev *hdev, VirtIODevice *vdev, + bool vrings) +{ + return do_vhost_dev_stop(hdev, vdev, vrings, true); } int vhost_net_set_backend(struct vhost_dev *hdev, diff --git a/hw/virtio/virtio-balloon.c b/hw/virtio/virtio-balloon.c index 91510ec..db787d0 100644 --- a/hw/virtio/virtio-balloon.c +++ b/hw/virtio/virtio-balloon.c @@ -958,7 +958,7 @@ static void virtio_balloon_device_reset(VirtIODevice *vdev) s->poison_val = 0; } -static void virtio_balloon_set_status(VirtIODevice *vdev, uint8_t status) +static int virtio_balloon_set_status(VirtIODevice *vdev, uint8_t status) { VirtIOBalloon *s = VIRTIO_BALLOON(vdev); @@ -988,6 +988,7 @@ static void virtio_balloon_set_status(VirtIODevice *vdev, uint8_t status) qemu_mutex_unlock(&s->free_page_lock); } } + return 0; } static ResettableState *virtio_balloon_get_reset_state(Object *obj) diff --git a/hw/virtio/virtio-config-io.c b/hw/virtio/virtio-config-io.c index ad78e0b..f58d90b 100644 --- a/hw/virtio/virtio-config-io.c +++ b/hw/virtio/virtio-config-io.c @@ -11,7 +11,6 @@ #include "qemu/osdep.h" #include "hw/virtio/virtio.h" -#include "cpu.h" uint32_t virtio_config_readb(VirtIODevice *vdev, uint32_t addr) { diff --git a/hw/virtio/virtio-crypto.c b/hw/virtio/virtio-crypto.c index e24d691..517f208 100644 --- a/hw/virtio/virtio-crypto.c +++ b/hw/virtio/virtio-crypto.c @@ -1197,11 +1197,12 @@ static void virtio_crypto_vhost_status(VirtIOCrypto *c, uint8_t status) } } -static void virtio_crypto_set_status(VirtIODevice *vdev, uint8_t status) +static int virtio_crypto_set_status(VirtIODevice *vdev, uint8_t status) { VirtIOCrypto *vcrypto = VIRTIO_CRYPTO(vdev); virtio_crypto_vhost_status(vcrypto, status); + return 0; } static void virtio_crypto_guest_notifier_mask(VirtIODevice *vdev, int idx, diff --git a/hw/virtio/virtio-iommu.c b/hw/virtio/virtio-iommu.c index 5406098..3500f1b 100644 --- a/hw/virtio/virtio-iommu.c +++ b/hw/virtio/virtio-iommu.c @@ -1522,9 +1522,10 @@ static void virtio_iommu_device_reset_exit(Object *obj, ResetType type) NULL, NULL, virtio_iommu_put_endpoint); } -static void virtio_iommu_set_status(VirtIODevice *vdev, uint8_t status) +static int virtio_iommu_set_status(VirtIODevice *vdev, uint8_t status) { trace_virtio_iommu_device_status(status); + return 0; } static void virtio_iommu_instance_init(Object *obj) diff --git a/hw/virtio/virtio-mem.c b/hw/virtio/virtio-mem.c index a3d1a67..c46f6f9 100644 --- a/hw/virtio/virtio-mem.c +++ b/hw/virtio/virtio-mem.c @@ -244,28 +244,6 @@ static int virtio_mem_for_each_plugged_range(VirtIOMEM *vmem, void *arg, return ret; } -/* - * Adjust the memory section to cover the intersection with the given range. - * - * Returns false if the intersection is empty, otherwise returns true. - */ -static bool virtio_mem_intersect_memory_section(MemoryRegionSection *s, - uint64_t offset, uint64_t size) -{ - uint64_t start = MAX(s->offset_within_region, offset); - uint64_t end = MIN(s->offset_within_region + int128_get64(s->size), - offset + size); - - if (end <= start) { - return false; - } - - s->offset_within_address_space += start - s->offset_within_region; - s->offset_within_region = start; - s->size = int128_make64(end - start); - return true; -} - typedef int (*virtio_mem_section_cb)(MemoryRegionSection *s, void *arg); static int virtio_mem_for_each_plugged_section(const VirtIOMEM *vmem, @@ -287,7 +265,7 @@ static int virtio_mem_for_each_plugged_section(const VirtIOMEM *vmem, first_bit + 1) - 1; size = (last_bit - first_bit + 1) * vmem->block_size; - if (!virtio_mem_intersect_memory_section(&tmp, offset, size)) { + if (!memory_region_section_intersect_range(&tmp, offset, size)) { break; } ret = cb(&tmp, arg); @@ -319,7 +297,7 @@ static int virtio_mem_for_each_unplugged_section(const VirtIOMEM *vmem, first_bit + 1) - 1; size = (last_bit - first_bit + 1) * vmem->block_size; - if (!virtio_mem_intersect_memory_section(&tmp, offset, size)) { + if (!memory_region_section_intersect_range(&tmp, offset, size)) { break; } ret = cb(&tmp, arg); @@ -355,7 +333,7 @@ static void virtio_mem_notify_unplug(VirtIOMEM *vmem, uint64_t offset, QLIST_FOREACH(rdl, &vmem->rdl_list, next) { MemoryRegionSection tmp = *rdl->section; - if (!virtio_mem_intersect_memory_section(&tmp, offset, size)) { + if (!memory_region_section_intersect_range(&tmp, offset, size)) { continue; } rdl->notify_discard(rdl, &tmp); @@ -371,7 +349,7 @@ static int virtio_mem_notify_plug(VirtIOMEM *vmem, uint64_t offset, QLIST_FOREACH(rdl, &vmem->rdl_list, next) { MemoryRegionSection tmp = *rdl->section; - if (!virtio_mem_intersect_memory_section(&tmp, offset, size)) { + if (!memory_region_section_intersect_range(&tmp, offset, size)) { continue; } ret = rdl->notify_populate(rdl, &tmp); @@ -388,7 +366,7 @@ static int virtio_mem_notify_plug(VirtIOMEM *vmem, uint64_t offset, if (rdl2 == rdl) { break; } - if (!virtio_mem_intersect_memory_section(&tmp, offset, size)) { + if (!memory_region_section_intersect_range(&tmp, offset, size)) { continue; } rdl2->notify_discard(rdl2, &tmp); @@ -1070,6 +1048,17 @@ static void virtio_mem_device_realize(DeviceState *dev, Error **errp) } /* + * Set ourselves as RamDiscardManager before the plug handler maps the + * memory region and exposes it via an address space. + */ + if (memory_region_set_ram_discard_manager(&vmem->memdev->mr, + RAM_DISCARD_MANAGER(vmem))) { + error_setg(errp, "Failed to set RamDiscardManager"); + ram_block_coordinated_discard_require(false); + return; + } + + /* * We don't know at this point whether shared RAM is migrated using * QEMU or migrated using the file content. "x-ignore-shared" will be * configured after realizing the device. So in case we have an @@ -1083,6 +1072,7 @@ static void virtio_mem_device_realize(DeviceState *dev, Error **errp) ret = ram_block_discard_range(rb, 0, qemu_ram_get_used_length(rb)); if (ret) { error_setg_errno(errp, -ret, "Unexpected error discarding RAM"); + memory_region_set_ram_discard_manager(&vmem->memdev->mr, NULL); ram_block_coordinated_discard_require(false); return; } @@ -1144,13 +1134,6 @@ static void virtio_mem_device_realize(DeviceState *dev, Error **errp) vmem->system_reset = VIRTIO_MEM_SYSTEM_RESET(obj); vmem->system_reset->vmem = vmem; qemu_register_resettable(obj); - - /* - * Set ourselves as RamDiscardManager before the plug handler maps the - * memory region and exposes it via an address space. - */ - memory_region_set_ram_discard_manager(&vmem->memdev->mr, - RAM_DISCARD_MANAGER(vmem)); } static void virtio_mem_device_unrealize(DeviceState *dev) @@ -1158,12 +1141,6 @@ static void virtio_mem_device_unrealize(DeviceState *dev) VirtIODevice *vdev = VIRTIO_DEVICE(dev); VirtIOMEM *vmem = VIRTIO_MEM(dev); - /* - * The unplug handler unmapped the memory region, it cannot be - * found via an address space anymore. Unset ourselves. - */ - memory_region_set_ram_discard_manager(&vmem->memdev->mr, NULL); - qemu_unregister_resettable(OBJECT(vmem->system_reset)); object_unref(OBJECT(vmem->system_reset)); @@ -1176,6 +1153,11 @@ static void virtio_mem_device_unrealize(DeviceState *dev) virtio_del_queue(vdev, 0); virtio_cleanup(vdev); g_free(vmem->bitmap); + /* + * The unplug handler unmapped the memory region, it cannot be + * found via an address space anymore. Unset ourselves. + */ + memory_region_set_ram_discard_manager(&vmem->memdev->mr, NULL); ram_block_coordinated_discard_require(false); } @@ -1750,7 +1732,7 @@ static bool virtio_mem_rdm_is_populated(const RamDiscardManager *rdm, } struct VirtIOMEMReplayData { - void *fn; + ReplayRamDiscardState fn; void *opaque; }; @@ -1758,12 +1740,12 @@ static int virtio_mem_rdm_replay_populated_cb(MemoryRegionSection *s, void *arg) { struct VirtIOMEMReplayData *data = arg; - return ((ReplayRamPopulate)data->fn)(s, data->opaque); + return data->fn(s, data->opaque); } static int virtio_mem_rdm_replay_populated(const RamDiscardManager *rdm, MemoryRegionSection *s, - ReplayRamPopulate replay_fn, + ReplayRamDiscardState replay_fn, void *opaque) { const VirtIOMEM *vmem = VIRTIO_MEM(rdm); @@ -1782,14 +1764,13 @@ static int virtio_mem_rdm_replay_discarded_cb(MemoryRegionSection *s, { struct VirtIOMEMReplayData *data = arg; - ((ReplayRamDiscard)data->fn)(s, data->opaque); - return 0; + return data->fn(s, data->opaque); } -static void virtio_mem_rdm_replay_discarded(const RamDiscardManager *rdm, - MemoryRegionSection *s, - ReplayRamDiscard replay_fn, - void *opaque) +static int virtio_mem_rdm_replay_discarded(const RamDiscardManager *rdm, + MemoryRegionSection *s, + ReplayRamDiscardState replay_fn, + void *opaque) { const VirtIOMEM *vmem = VIRTIO_MEM(rdm); struct VirtIOMEMReplayData data = { @@ -1798,8 +1779,8 @@ static void virtio_mem_rdm_replay_discarded(const RamDiscardManager *rdm, }; g_assert(s->mr == &vmem->memdev->mr); - virtio_mem_for_each_unplugged_section(vmem, s, &data, - virtio_mem_rdm_replay_discarded_cb); + return virtio_mem_for_each_unplugged_section(vmem, s, &data, + virtio_mem_rdm_replay_discarded_cb); } static void virtio_mem_rdm_register_listener(RamDiscardManager *rdm, diff --git a/hw/virtio/virtio-net-pci.c b/hw/virtio/virtio-net-pci.c index 8cf9788..f857a84 100644 --- a/hw/virtio/virtio-net-pci.c +++ b/hw/virtio/virtio-net-pci.c @@ -74,6 +74,7 @@ static void virtio_net_pci_class_init(ObjectClass *klass, const void *data) k->device_id = PCI_DEVICE_ID_VIRTIO_NET; k->revision = VIRTIO_PCI_ABI_VERSION; k->class_id = PCI_CLASS_NETWORK_ETHERNET; + k->sriov_vf_user_creatable = true; set_bit(DEVICE_CATEGORY_NETWORK, dc->categories); device_class_set_props(dc, virtio_net_properties); vpciklass->realize = virtio_net_pci_realize; diff --git a/hw/virtio/virtio-pci.c b/hw/virtio/virtio-pci.c index 0fa8fe4..767216d 100644 --- a/hw/virtio/virtio-pci.c +++ b/hw/virtio/virtio-pci.c @@ -30,6 +30,7 @@ #include "qemu/error-report.h" #include "qemu/log.h" #include "qemu/module.h" +#include "qemu/bswap.h" #include "hw/pci/msi.h" #include "hw/pci/msix.h" #include "hw/loader.h" @@ -146,9 +147,7 @@ static const VMStateDescription vmstate_virtio_pci = { static bool virtio_pci_has_extra_state(DeviceState *d) { - VirtIOPCIProxy *proxy = to_virtio_pci_proxy(d); - - return proxy->flags & VIRTIO_PCI_FLAG_MIGRATE_EXTRA; + return true; } static void virtio_pci_save_extra_state(DeviceState *d, QEMUFile *f) @@ -1215,7 +1214,12 @@ static int virtio_pci_set_guest_notifier(DeviceState *d, int n, bool assign, static bool virtio_pci_query_guest_notifiers(DeviceState *d) { VirtIOPCIProxy *proxy = to_virtio_pci_proxy(d); - return msix_enabled(&proxy->pci_dev); + + if (msix_enabled(&proxy->pci_dev)) { + return true; + } else { + return pci_irq_disabled(&proxy->pci_dev); + } } static int virtio_pci_set_guest_notifiers(DeviceState *d, int nvqs, bool assign) @@ -1962,6 +1966,7 @@ static void virtio_pci_device_plugged(DeviceState *d, Error **errp) uint8_t *config; uint32_t size; VirtIODevice *vdev = virtio_bus_get_device(bus); + int16_t res; /* * Virtio capabilities present without @@ -2109,6 +2114,18 @@ static void virtio_pci_device_plugged(DeviceState *d, Error **errp) pci_register_bar(&proxy->pci_dev, proxy->legacy_io_bar_idx, PCI_BASE_ADDRESS_SPACE_IO, &proxy->bar); } + + if (pci_is_vf(&proxy->pci_dev)) { + pcie_ari_init(&proxy->pci_dev, proxy->last_pcie_cap_offset); + proxy->last_pcie_cap_offset += PCI_ARI_SIZEOF; + } else { + res = pcie_sriov_pf_init_from_user_created_vfs( + &proxy->pci_dev, proxy->last_pcie_cap_offset, errp); + if (res > 0) { + proxy->last_pcie_cap_offset += res; + virtio_add_feature(&vdev->host_features, VIRTIO_F_SR_IOV); + } + } } static void virtio_pci_device_unplugged(DeviceState *d) @@ -2199,7 +2216,7 @@ static void virtio_pci_realize(PCIDevice *pci_dev, Error **errp) if (pcie_port && pci_is_express(pci_dev)) { int pos; - uint16_t last_pcie_cap_offset = PCI_CONFIG_SPACE_SIZE; + proxy->last_pcie_cap_offset = PCI_CONFIG_SPACE_SIZE; pos = pcie_endpoint_cap_init(pci_dev, 0); assert(pos > 0); @@ -2216,9 +2233,9 @@ static void virtio_pci_realize(PCIDevice *pci_dev, Error **errp) pci_set_word(pci_dev->config + pos + PCI_PM_PMC, 0x3); if (proxy->flags & VIRTIO_PCI_FLAG_AER) { - pcie_aer_init(pci_dev, PCI_ERR_VER, last_pcie_cap_offset, + pcie_aer_init(pci_dev, PCI_ERR_VER, proxy->last_pcie_cap_offset, PCI_ERR_SIZEOF, NULL); - last_pcie_cap_offset += PCI_ERR_SIZEOF; + proxy->last_pcie_cap_offset += PCI_ERR_SIZEOF; } if (proxy->flags & VIRTIO_PCI_FLAG_INIT_DEVERR) { @@ -2243,9 +2260,9 @@ static void virtio_pci_realize(PCIDevice *pci_dev, Error **errp) } if (proxy->flags & VIRTIO_PCI_FLAG_ATS) { - pcie_ats_init(pci_dev, last_pcie_cap_offset, + pcie_ats_init(pci_dev, proxy->last_pcie_cap_offset, proxy->flags & VIRTIO_PCI_FLAG_ATS_PAGE_ALIGNED); - last_pcie_cap_offset += PCI_EXT_CAP_ATS_SIZEOF; + proxy->last_pcie_cap_offset += PCI_EXT_CAP_ATS_SIZEOF; } if (proxy->flags & VIRTIO_PCI_FLAG_INIT_FLR) { @@ -2273,6 +2290,7 @@ static void virtio_pci_exit(PCIDevice *pci_dev) !pci_bus_is_root(pci_get_bus(pci_dev)); bool modern_pio = proxy->flags & VIRTIO_PCI_FLAG_MODERN_PIO_NOTIFY; + pcie_sriov_pf_exit(&proxy->pci_dev); msix_uninit_exclusive_bar(pci_dev); if (proxy->flags & VIRTIO_PCI_FLAG_AER && pcie_port && pci_is_express(pci_dev)) { @@ -2349,12 +2367,8 @@ static void virtio_pci_bus_reset_hold(Object *obj, ResetType type) static const Property virtio_pci_properties[] = { DEFINE_PROP_BIT("virtio-pci-bus-master-bug-migration", VirtIOPCIProxy, flags, VIRTIO_PCI_FLAG_BUS_MASTER_BUG_MIGRATION_BIT, false), - DEFINE_PROP_BIT("migrate-extra", VirtIOPCIProxy, flags, - VIRTIO_PCI_FLAG_MIGRATE_EXTRA_BIT, true), DEFINE_PROP_BIT("modern-pio-notify", VirtIOPCIProxy, flags, VIRTIO_PCI_FLAG_MODERN_PIO_NOTIFY_BIT, false), - DEFINE_PROP_BIT("x-disable-pcie", VirtIOPCIProxy, flags, - VIRTIO_PCI_FLAG_DISABLE_PCIE_BIT, false), DEFINE_PROP_BIT("page-per-vq", VirtIOPCIProxy, flags, VIRTIO_PCI_FLAG_PAGE_PER_VQ_BIT, false), DEFINE_PROP_BOOL("x-ignore-backend-features", VirtIOPCIProxy, @@ -2383,8 +2397,7 @@ static void virtio_pci_dc_realize(DeviceState *qdev, Error **errp) VirtIOPCIProxy *proxy = VIRTIO_PCI(qdev); PCIDevice *pci_dev = &proxy->pci_dev; - if (!(proxy->flags & VIRTIO_PCI_FLAG_DISABLE_PCIE) && - virtio_pci_modern(proxy)) { + if (virtio_pci_modern(proxy)) { pci_dev->cap_present |= QEMU_PCI_CAP_EXPRESS; } diff --git a/hw/virtio/virtio-rng.c b/hw/virtio/virtio-rng.c index dcb3c71..3df5d25 100644 --- a/hw/virtio/virtio-rng.c +++ b/hw/virtio/virtio-rng.c @@ -159,17 +159,18 @@ static void check_rate_limit(void *opaque) vrng->activate_timer = true; } -static void virtio_rng_set_status(VirtIODevice *vdev, uint8_t status) +static int virtio_rng_set_status(VirtIODevice *vdev, uint8_t status) { VirtIORNG *vrng = VIRTIO_RNG(vdev); if (!vdev->vm_running) { - return; + return 0; } vdev->status = status; /* Something changed, try to process buffers */ virtio_rng_process(vrng); + return 0; } static void virtio_rng_device_realize(DeviceState *dev, Error **errp) diff --git a/hw/virtio/virtio.c b/hw/virtio/virtio.c index 480c2e5..9a81ad9 100644 --- a/hw/virtio/virtio.c +++ b/hw/virtio/virtio.c @@ -20,7 +20,7 @@ #include "qemu/log.h" #include "qemu/main-loop.h" #include "qemu/module.h" -#include "exec/tswap.h" +#include "qemu/target-info.h" #include "qom/object_interfaces.h" #include "hw/core/cpu.h" #include "hw/virtio/virtio.h" @@ -205,6 +205,15 @@ static const char *virtio_id_to_name(uint16_t device_id) return name; } +static void virtio_check_indirect_feature(VirtIODevice *vdev) +{ + if (!virtio_vdev_has_feature(vdev, VIRTIO_RING_F_INDIRECT_DESC)) { + qemu_log_mask(LOG_GUEST_ERROR, + "Device %s: indirect_desc was not negotiated!\n", + vdev->name); + } +} + /* Called within call_rcu(). */ static void virtio_free_region_cache(VRingMemoryRegionCaches *caches) { @@ -929,18 +938,18 @@ static void virtqueue_packed_fill(VirtQueue *vq, const VirtQueueElement *elem, static void virtqueue_ordered_fill(VirtQueue *vq, const VirtQueueElement *elem, unsigned int len) { - unsigned int i, steps, max_steps; + unsigned int i, steps, max_steps, ndescs; i = vq->used_idx % vq->vring.num; steps = 0; /* - * We shouldn't need to increase 'i' by more than the distance - * between used_idx and last_avail_idx. + * We shouldn't need to increase 'i' by more than or equal to + * the distance between used_idx and last_avail_idx (max_steps). */ max_steps = (vq->last_avail_idx - vq->used_idx) % vq->vring.num; /* Search for element in vq->used_elems */ - while (steps <= max_steps) { + while (steps < max_steps) { /* Found element, set length and mark as filled */ if (vq->used_elems[i].index == elem->index) { vq->used_elems[i].len = len; @@ -948,8 +957,18 @@ static void virtqueue_ordered_fill(VirtQueue *vq, const VirtQueueElement *elem, break; } - i += vq->used_elems[i].ndescs; - steps += vq->used_elems[i].ndescs; + ndescs = vq->used_elems[i].ndescs; + + /* Defensive sanity check */ + if (unlikely(ndescs == 0 || ndescs > vq->vring.num)) { + qemu_log_mask(LOG_GUEST_ERROR, + "%s: %s invalid ndescs %u at position %u\n", + __func__, vq->vdev->name, ndescs, i); + return; + } + + i += ndescs; + steps += ndescs; if (i >= vq->vring.num) { i -= vq->vring.num; @@ -1680,8 +1699,8 @@ static void *virtqueue_split_pop(VirtQueue *vq, size_t sz) VirtIODevice *vdev = vq->vdev; VirtQueueElement *elem = NULL; unsigned out_num, in_num, elem_entries; - hwaddr addr[VIRTQUEUE_MAX_SIZE]; - struct iovec iov[VIRTQUEUE_MAX_SIZE]; + hwaddr QEMU_UNINITIALIZED addr[VIRTQUEUE_MAX_SIZE]; + struct iovec QEMU_UNINITIALIZED iov[VIRTQUEUE_MAX_SIZE]; VRingDesc desc; int rc; @@ -1733,6 +1752,7 @@ static void *virtqueue_split_pop(VirtQueue *vq, size_t sz) virtio_error(vdev, "Invalid size for indirect buffer table"); goto done; } + virtio_check_indirect_feature(vdev); /* loop over the indirect descriptor table */ len = address_space_cache_init(&indirect_desc_cache, vdev->dma_as, @@ -1826,8 +1846,8 @@ static void *virtqueue_packed_pop(VirtQueue *vq, size_t sz) VirtIODevice *vdev = vq->vdev; VirtQueueElement *elem = NULL; unsigned out_num, in_num, elem_entries; - hwaddr addr[VIRTQUEUE_MAX_SIZE]; - struct iovec iov[VIRTQUEUE_MAX_SIZE]; + hwaddr QEMU_UNINITIALIZED addr[VIRTQUEUE_MAX_SIZE]; + struct iovec QEMU_UNINITIALIZED iov[VIRTQUEUE_MAX_SIZE]; VRingPackedDesc desc; uint16_t id; int rc; @@ -1870,6 +1890,7 @@ static void *virtqueue_packed_pop(VirtQueue *vq, size_t sz) virtio_error(vdev, "Invalid size for indirect buffer table"); goto done; } + virtio_check_indirect_feature(vdev); /* loop over the indirect descriptor table */ len = address_space_cache_init(&indirect_desc_cache, vdev->dma_as, @@ -2221,12 +2242,12 @@ int virtio_set_status(VirtIODevice *vdev, uint8_t val) { VirtioDeviceClass *k = VIRTIO_DEVICE_GET_CLASS(vdev); trace_virtio_set_status(vdev, val); + int ret = 0; if (virtio_vdev_has_feature(vdev, VIRTIO_F_VERSION_1)) { if (!(vdev->status & VIRTIO_CONFIG_S_FEATURES_OK) && val & VIRTIO_CONFIG_S_FEATURES_OK) { - int ret = virtio_validate_features(vdev); - + ret = virtio_validate_features(vdev); if (ret) { return ret; } @@ -2239,11 +2260,15 @@ int virtio_set_status(VirtIODevice *vdev, uint8_t val) } if (k->set_status) { - k->set_status(vdev, val); + ret = k->set_status(vdev, val); + if (ret) { + qemu_log("set %s status to %d failed, old status: %d\n", + vdev->name, val, vdev->status); + } } vdev->status = val; - return 0; + return ret; } static enum virtio_device_endian virtio_default_endian(void) @@ -2316,49 +2341,6 @@ void virtio_queue_enable(VirtIODevice *vdev, uint32_t queue_index) } } -void virtio_reset(void *opaque) -{ - VirtIODevice *vdev = opaque; - VirtioDeviceClass *k = VIRTIO_DEVICE_GET_CLASS(vdev); - int i; - - virtio_set_status(vdev, 0); - if (current_cpu) { - /* Guest initiated reset */ - vdev->device_endian = virtio_current_cpu_endian(); - } else { - /* System reset */ - vdev->device_endian = virtio_default_endian(); - } - - if (k->get_vhost) { - struct vhost_dev *hdev = k->get_vhost(vdev); - /* Only reset when vhost back-end is connected */ - if (hdev && hdev->vhost_ops) { - vhost_reset_device(hdev); - } - } - - if (k->reset) { - k->reset(vdev); - } - - vdev->start_on_kick = false; - vdev->started = false; - vdev->broken = false; - vdev->guest_features = 0; - vdev->queue_sel = 0; - vdev->status = 0; - vdev->disabled = false; - qatomic_set(&vdev->isr, 0); - vdev->config_vector = VIRTIO_NO_VECTOR; - virtio_notify_vector(vdev, vdev->config_vector); - - for(i = 0; i < VIRTIO_QUEUE_MAX; i++) { - __virtio_queue_reset(vdev, i); - } -} - void virtio_queue_set_addr(VirtIODevice *vdev, int n, hwaddr addr) { if (!vdev->vq[n].vring.num) { @@ -3169,6 +3151,49 @@ int virtio_set_features(VirtIODevice *vdev, uint64_t val) return ret; } +void virtio_reset(void *opaque) +{ + VirtIODevice *vdev = opaque; + VirtioDeviceClass *k = VIRTIO_DEVICE_GET_CLASS(vdev); + int i; + + virtio_set_status(vdev, 0); + if (current_cpu) { + /* Guest initiated reset */ + vdev->device_endian = virtio_current_cpu_endian(); + } else { + /* System reset */ + vdev->device_endian = virtio_default_endian(); + } + + if (k->get_vhost) { + struct vhost_dev *hdev = k->get_vhost(vdev); + /* Only reset when vhost back-end is connected */ + if (hdev && hdev->vhost_ops) { + vhost_reset_device(hdev); + } + } + + if (k->reset) { + k->reset(vdev); + } + + vdev->start_on_kick = false; + vdev->started = false; + vdev->broken = false; + virtio_set_features_nocheck(vdev, 0); + vdev->queue_sel = 0; + vdev->status = 0; + vdev->disabled = false; + qatomic_set(&vdev->isr, 0); + vdev->config_vector = VIRTIO_NO_VECTOR; + virtio_notify_vector(vdev, vdev->config_vector); + + for (i = 0; i < VIRTIO_QUEUE_MAX; i++) { + __virtio_queue_reset(vdev, i); + } +} + static void virtio_device_check_notification_compatibility(VirtIODevice *vdev, Error **errp) { @@ -3255,13 +3280,6 @@ virtio_load(VirtIODevice *vdev, QEMUFile *f, int version_id) config_len--; } - if (vdc->pre_load_queues) { - ret = vdc->pre_load_queues(vdev); - if (ret) { - return ret; - } - } - num = qemu_get_be32(f); if (num > VIRTIO_QUEUE_MAX) { @@ -3269,6 +3287,13 @@ virtio_load(VirtIODevice *vdev, QEMUFile *f, int version_id) return -1; } + if (vdc->pre_load_queues) { + ret = vdc->pre_load_queues(vdev, num); + if (ret) { + return ret; + } + } + for (i = 0; i < num; i++) { vdev->vq[i].vring.num = qemu_get_be32(f); if (k->has_variable_vring_alignment) { @@ -3419,7 +3444,7 @@ void virtio_cleanup(VirtIODevice *vdev) qemu_del_vm_change_state_handler(vdev->vmstate); } -static void virtio_vmstate_change(void *opaque, bool running, RunState state) +static int virtio_vmstate_change(void *opaque, bool running, RunState state) { VirtIODevice *vdev = opaque; BusState *qbus = qdev_get_parent_bus(DEVICE(vdev)); @@ -3436,8 +3461,12 @@ static void virtio_vmstate_change(void *opaque, bool running, RunState state) } if (!backend_run) { - virtio_set_status(vdev, vdev->status); + int ret = virtio_set_status(vdev, vdev->status); + if (ret) { + return ret; + } } + return 0; } void virtio_instance_init_common(Object *proxy_obj, void *data, @@ -3489,7 +3518,7 @@ void virtio_init(VirtIODevice *vdev, uint16_t device_id, size_t config_size) vdev->config = NULL; } vdev->vmstate = qdev_add_vm_change_state_handler(DEVICE(vdev), - virtio_vmstate_change, vdev); + NULL, virtio_vmstate_change, vdev); vdev->device_endian = virtio_default_endian(); vdev->use_guest_notifier_mask = true; } diff --git a/hw/vmapple/virtio-blk.c b/hw/vmapple/virtio-blk.c index 532b564..9de9aaa 100644 --- a/hw/vmapple/virtio-blk.c +++ b/hw/vmapple/virtio-blk.c @@ -19,7 +19,6 @@ #include "hw/vmapple/vmapple.h" #include "hw/virtio/virtio-blk.h" #include "hw/virtio/virtio-pci.h" -#include "qemu/bswap.h" #include "qemu/log.h" #include "qemu/module.h" #include "qapi/error.h" diff --git a/hw/xen/xen-hvm-common.c b/hw/xen/xen-hvm-common.c index 9a677e8..78e0bc8 100644 --- a/hw/xen/xen-hvm-common.c +++ b/hw/xen/xen-hvm-common.c @@ -711,7 +711,7 @@ static int xen_map_ioreq_server(XenIOState *state) /* * If we fail to map the shared page with xenforeignmemory_map_resource() * or if we're using buffered ioreqs, we need xen_get_ioreq_server_info() - * to provide the the addresses to map the shared page and/or to get the + * to provide the addresses to map the shared page and/or to get the * event-channel port for buffered ioreqs. */ if (state->shared_page == NULL || state->has_bufioreq) { diff --git a/hw/xen/xen_pt.c b/hw/xen/xen_pt.c index 9d16644..006b5b5 100644 --- a/hw/xen/xen_pt.c +++ b/hw/xen/xen_pt.c @@ -54,6 +54,7 @@ #include "qemu/osdep.h" #include "qapi/error.h" +#include "qemu/error-report.h" #include <sys/ioctl.h> #include "hw/pci/pci.h" |