diff options
Diffstat (limited to 'hw')
175 files changed, 6822 insertions, 1960 deletions
diff --git a/hw/9pfs/9p.c b/hw/9pfs/9p.c index 8b001b9..acfa7db 100644 --- a/hw/9pfs/9p.c +++ b/hw/9pfs/9p.c @@ -201,8 +201,7 @@ void v9fs_path_free(V9fsPath *path) } -void G_GNUC_PRINTF(2, 3) -v9fs_path_sprintf(V9fsPath *path, const char *fmt, ...) +void v9fs_path_sprintf(V9fsPath *path, const char *fmt, ...) { va_list ap; diff --git a/hw/9pfs/9p.h b/hw/9pfs/9p.h index 259ad32..65cc45e 100644 --- a/hw/9pfs/9p.h +++ b/hw/9pfs/9p.h @@ -456,7 +456,8 @@ static inline uint8_t v9fs_request_cancelled(V9fsPDU *pdu) void coroutine_fn v9fs_reclaim_fd(V9fsPDU *pdu); void v9fs_path_init(V9fsPath *path); void v9fs_path_free(V9fsPath *path); -void v9fs_path_sprintf(V9fsPath *path, const char *fmt, ...); +void G_GNUC_PRINTF(2, 3) v9fs_path_sprintf(V9fsPath *path, const char *fmt, + ...); void v9fs_path_copy(V9fsPath *dst, const V9fsPath *src); size_t v9fs_readdir_response_size(V9fsString *name); int v9fs_name_to_path(V9fsState *s, V9fsPath *dirpath, diff --git a/hw/acpi/acpi-pci-hotplug-stub.c b/hw/acpi/acpi-pci-hotplug-stub.c index b7bc6e4..d58ea72 100644 --- a/hw/acpi/acpi-pci-hotplug-stub.c +++ b/hw/acpi/acpi-pci-hotplug-stub.c @@ -4,7 +4,7 @@ const VMStateDescription vmstate_acpi_pcihp_pci_status; -void acpi_pcihp_init(Object *owner, AcpiPciHpState *s, PCIBus *root_bus, +void acpi_pcihp_init(Object *owner, AcpiPciHpState *s, MemoryRegion *address_space_io, uint16_t io_base) { } diff --git a/hw/acpi/aml-build.c b/hw/acpi/aml-build.c index f8f93a9..1e685f9 100644 --- a/hw/acpi/aml-build.c +++ b/hw/acpi/aml-build.c @@ -160,7 +160,7 @@ void crs_replace_with_free_ranges(GPtrArray *ranges, */ static void crs_range_merge(GPtrArray *range) { - GPtrArray *tmp = g_ptr_array_new_with_free_func(crs_range_free); + g_autoptr(GPtrArray) tmp = g_ptr_array_new_with_free_func(crs_range_free); CrsRangeEntry *entry; uint64_t range_base, range_limit; int i; @@ -191,7 +191,6 @@ static void crs_range_merge(GPtrArray *range) entry = g_ptr_array_index(tmp, i); crs_range_insert(range, entry->base, entry->limit); } - g_ptr_array_free(tmp, true); } static void @@ -2153,6 +2152,7 @@ void build_pptt(GArray *table_data, BIOSLinker *linker, MachineState *ms, int64_t socket_id = -1, cluster_id = -1, core_id = -1; uint32_t socket_offset = 0, cluster_offset = 0, core_offset = 0; uint32_t pptt_start = table_data->len; + uint32_t root_offset; int n; AcpiTable table = { .sig = "PPTT", .rev = 2, .oem_id = oem_id, .oem_table_id = oem_table_id }; @@ -2160,6 +2160,18 @@ void build_pptt(GArray *table_data, BIOSLinker *linker, MachineState *ms, acpi_table_begin(&table, table_data); /* + * Build a root node for all the processor nodes. Otherwise when + * building a multi-socket system each socket tree is separated + * and will be hard for the OS like Linux to know whether the + * system is homogeneous. + */ + root_offset = table_data->len - pptt_start; + build_processor_hierarchy_node(table_data, + (1 << 0) | /* Physical package */ + (1 << 4), /* Identical Implementation */ + 0, 0, NULL, 0); + + /* * This works with the assumption that cpus[n].props.*_id has been * sorted from top to down levels in mc->possible_cpu_arch_ids(). * Otherwise, the unexpected and duplicated containers will be @@ -2173,8 +2185,9 @@ void build_pptt(GArray *table_data, BIOSLinker *linker, MachineState *ms, core_id = -1; socket_offset = table_data->len - pptt_start; build_processor_hierarchy_node(table_data, - (1 << 0), /* Physical package */ - 0, socket_id, NULL, 0); + (1 << 0) | /* Physical package */ + (1 << 4), /* Identical Implementation */ + root_offset, socket_id, NULL, 0); } if (mc->smp_props.clusters_supported && mc->smp_props.has_clusters) { @@ -2184,7 +2197,8 @@ void build_pptt(GArray *table_data, BIOSLinker *linker, MachineState *ms, core_id = -1; cluster_offset = table_data->len - pptt_start; build_processor_hierarchy_node(table_data, - (0 << 0), /* Not a physical package */ + (0 << 0) | /* Not a physical package */ + (1 << 4), /* Identical Implementation */ socket_offset, cluster_id, NULL, 0); } } else { @@ -2202,7 +2216,8 @@ void build_pptt(GArray *table_data, BIOSLinker *linker, MachineState *ms, core_id = cpus->cpus[n].props.core_id; core_offset = table_data->len - pptt_start; build_processor_hierarchy_node(table_data, - (0 << 0), /* Not a physical package */ + (0 << 0) | /* Not a physical package */ + (1 << 4), /* Identical Implementation */ cluster_offset, core_id, NULL, 0); } diff --git a/hw/acpi/bios-linker-loader.c b/hw/acpi/bios-linker-loader.c index 1080618..c9ffe44 100644 --- a/hw/acpi/bios-linker-loader.c +++ b/hw/acpi/bios-linker-loader.c @@ -22,8 +22,6 @@ #include "hw/acpi/bios-linker-loader.h" #include "hw/nvram/fw_cfg.h" -#include "qemu/bswap.h" - /* * Linker/loader is a paravirtualized interface that passes commands to guest. * The commands can be used to request guest to diff --git a/hw/acpi/cxl.c b/hw/acpi/cxl.c index 9cd7905..75d5b30 100644 --- a/hw/acpi/cxl.c +++ b/hw/acpi/cxl.c @@ -22,6 +22,7 @@ #include "hw/pci/pci_bridge.h" #include "hw/pci/pci_host.h" #include "hw/cxl/cxl.h" +#include "hw/cxl/cxl_host.h" #include "hw/mem/memory-device.h" #include "hw/acpi/acpi.h" #include "hw/acpi/aml-build.h" @@ -135,55 +136,52 @@ static void cedt_build_chbs(GArray *table_data, PXBCXLDev *cxl) * Interleave ways encoding in CXL 2.0 ECN: 3, 6, 12 and 16-way memory * interleaving. */ -static void cedt_build_cfmws(GArray *table_data, CXLState *cxls) +static void cedt_build_cfmws(CXLFixedWindow *fw, Aml *cedt) { - GList *it; + GArray *table_data = cedt->buf; + int i; - for (it = cxls->fixed_windows; it; it = it->next) { - CXLFixedWindow *fw = it->data; - int i; - - /* Type */ - build_append_int_noprefix(table_data, 1, 1); + /* Type */ + build_append_int_noprefix(table_data, 1, 1); - /* Reserved */ - build_append_int_noprefix(table_data, 0, 1); + /* Reserved */ + build_append_int_noprefix(table_data, 0, 1); - /* Record Length */ - build_append_int_noprefix(table_data, 36 + 4 * fw->num_targets, 2); + /* Record Length */ + build_append_int_noprefix(table_data, 36 + 4 * fw->num_targets, 2); - /* Reserved */ - build_append_int_noprefix(table_data, 0, 4); + /* Reserved */ + build_append_int_noprefix(table_data, 0, 4); - /* Base HPA */ - build_append_int_noprefix(table_data, fw->mr.addr, 8); + /* Base HPA */ + build_append_int_noprefix(table_data, fw->mr.addr, 8); - /* Window Size */ - build_append_int_noprefix(table_data, fw->size, 8); + /* Window Size */ + build_append_int_noprefix(table_data, fw->size, 8); - /* Host Bridge Interleave Ways */ - build_append_int_noprefix(table_data, fw->enc_int_ways, 1); + /* Host Bridge Interleave Ways */ + build_append_int_noprefix(table_data, fw->enc_int_ways, 1); - /* Host Bridge Interleave Arithmetic */ - build_append_int_noprefix(table_data, 0, 1); + /* Host Bridge Interleave Arithmetic */ + build_append_int_noprefix(table_data, 0, 1); - /* Reserved */ - build_append_int_noprefix(table_data, 0, 2); + /* Reserved */ + build_append_int_noprefix(table_data, 0, 2); - /* Host Bridge Interleave Granularity */ - build_append_int_noprefix(table_data, fw->enc_int_gran, 4); + /* Host Bridge Interleave Granularity */ + build_append_int_noprefix(table_data, fw->enc_int_gran, 4); - /* Window Restrictions */ - build_append_int_noprefix(table_data, 0x0f, 2); /* No restrictions */ + /* Window Restrictions */ + build_append_int_noprefix(table_data, 0x0f, 2); - /* QTG ID */ - build_append_int_noprefix(table_data, 0, 2); + /* QTG ID */ + build_append_int_noprefix(table_data, 0, 2); - /* Host Bridge List (list of UIDs - currently bus_nr) */ - for (i = 0; i < fw->num_targets; i++) { - g_assert(fw->target_hbs[i]); - build_append_int_noprefix(table_data, PXB_DEV(fw->target_hbs[i])->bus_nr, 4); - } + /* Host Bridge List (list of UIDs - currently bus_nr) */ + for (i = 0; i < fw->num_targets; i++) { + g_assert(fw->target_hbs[i]); + build_append_int_noprefix(table_data, + PXB_DEV(fw->target_hbs[i])->bus_nr, 4); } } @@ -202,6 +200,7 @@ void cxl_build_cedt(GArray *table_offsets, GArray *table_data, BIOSLinker *linker, const char *oem_id, const char *oem_table_id, CXLState *cxl_state) { + GSList *cfmws_list, *iter; Aml *cedt; AcpiTable table = { .sig = "CEDT", .rev = 1, .oem_id = oem_id, .oem_table_id = oem_table_id }; @@ -213,7 +212,12 @@ void cxl_build_cedt(GArray *table_offsets, GArray *table_data, /* reserve space for CEDT header */ object_child_foreach_recursive(object_get_root(), cxl_foreach_pxb_hb, cedt); - cedt_build_cfmws(cedt->buf, cxl_state); + + cfmws_list = cxl_fmws_get_all_sorted(); + for (iter = cfmws_list; iter; iter = iter->next) { + cedt_build_cfmws(CXL_FMW(iter->data), cedt); + } + g_slist_free(cfmws_list); /* copy AML table into ACPI tables blob and patch header there */ g_array_append_vals(table_data, cedt->buf->data, cedt->buf->len); diff --git a/hw/acpi/generic_event_device.c b/hw/acpi/generic_event_device.c index 7a62f8d..95682b7 100644 --- a/hw/acpi/generic_event_device.c +++ b/hw/acpi/generic_event_device.c @@ -12,10 +12,13 @@ #include "qemu/osdep.h" #include "qapi/error.h" #include "hw/acpi/acpi.h" +#include "hw/acpi/pcihp.h" #include "hw/acpi/generic_event_device.h" +#include "hw/pci/pci.h" #include "hw/irq.h" #include "hw/mem/pc-dimm.h" #include "hw/mem/nvdimm.h" +#include "hw/pci/pci_device.h" #include "hw/qdev-properties.h" #include "migration/vmstate.h" #include "qemu/error-report.h" @@ -26,6 +29,7 @@ static const uint32_t ged_supported_events[] = { ACPI_GED_PWR_DOWN_EVT, ACPI_GED_NVDIMM_HOTPLUG_EVT, ACPI_GED_CPU_HOTPLUG_EVT, + ACPI_GED_PCI_HOTPLUG_EVT, }; /* @@ -121,6 +125,12 @@ void build_ged_aml(Aml *table, const char *name, HotplugHandler *hotplug_dev, aml_notify(aml_name("\\_SB.NVDR"), aml_int(0x80))); break; + case ACPI_GED_PCI_HOTPLUG_EVT: + aml_append(if_ctx, + aml_acquire(aml_name("\\_SB.PCI0.BLCK"), 0xFFFF)); + aml_append(if_ctx, aml_call0("\\_SB.PCI0.PCNT")); + aml_append(if_ctx, aml_release(aml_name("\\_SB.PCI0.BLCK"))); + break; default: /* * Please make sure all the events in ged_supported_events[] @@ -227,6 +237,14 @@ static const MemoryRegionOps ged_regs_ops = { }, }; +static void acpi_ged_device_pre_plug_cb(HotplugHandler *hotplug_dev, + DeviceState *dev, Error **errp) +{ + if (object_dynamic_cast(OBJECT(dev), TYPE_PCI_DEVICE)) { + acpi_pcihp_device_pre_plug_cb(hotplug_dev, dev, errp); + } +} + static void acpi_ged_device_plug_cb(HotplugHandler *hotplug_dev, DeviceState *dev, Error **errp) { @@ -240,6 +258,8 @@ static void acpi_ged_device_plug_cb(HotplugHandler *hotplug_dev, } } else if (object_dynamic_cast(OBJECT(dev), TYPE_CPU)) { acpi_cpu_plug_cb(hotplug_dev, &s->cpuhp_state, dev, errp); + } else if (object_dynamic_cast(OBJECT(dev), TYPE_PCI_DEVICE)) { + acpi_pcihp_device_plug_cb(hotplug_dev, &s->pcihp_state, dev, errp); } else { error_setg(errp, "virt: device plug request for unsupported device" " type: %s", object_get_typename(OBJECT(dev))); @@ -256,6 +276,9 @@ static void acpi_ged_unplug_request_cb(HotplugHandler *hotplug_dev, acpi_memory_unplug_request_cb(hotplug_dev, &s->memhp_state, dev, errp); } else if (object_dynamic_cast(OBJECT(dev), TYPE_CPU)) { acpi_cpu_unplug_request_cb(hotplug_dev, &s->cpuhp_state, dev, errp); + } else if (object_dynamic_cast(OBJECT(dev), TYPE_PCI_DEVICE)) { + acpi_pcihp_device_unplug_request_cb(hotplug_dev, &s->pcihp_state, + dev, errp); } else { error_setg(errp, "acpi: device unplug request for unsupported device" " type: %s", object_get_typename(OBJECT(dev))); @@ -271,6 +294,8 @@ static void acpi_ged_unplug_cb(HotplugHandler *hotplug_dev, acpi_memory_unplug_cb(&s->memhp_state, dev, errp); } else if (object_dynamic_cast(OBJECT(dev), TYPE_CPU)) { acpi_cpu_unplug_cb(&s->cpuhp_state, dev, errp); + } else if (object_dynamic_cast(OBJECT(dev), TYPE_PCI_DEVICE)) { + acpi_pcihp_device_unplug_cb(hotplug_dev, &s->pcihp_state, dev, errp); } else { error_setg(errp, "acpi: device unplug for unsupported device" " type: %s", object_get_typename(OBJECT(dev))); @@ -299,6 +324,8 @@ static void acpi_ged_send_event(AcpiDeviceIf *adev, AcpiEventStatusBits ev) sel = ACPI_GED_NVDIMM_HOTPLUG_EVT; } else if (ev & ACPI_CPU_HOTPLUG_STATUS) { sel = ACPI_GED_CPU_HOTPLUG_EVT; + } else if (ev & ACPI_PCI_HOTPLUG_STATUS) { + sel = ACPI_GED_PCI_HOTPLUG_EVT; } else { /* Unknown event. Return without generating interrupt. */ warn_report("GED: Unsupported event %d. No irq injected", ev); @@ -318,6 +345,10 @@ static void acpi_ged_send_event(AcpiDeviceIf *adev, AcpiEventStatusBits ev) static const Property acpi_ged_properties[] = { DEFINE_PROP_UINT32("ged-event", AcpiGedState, ged_event_bitmap, 0), + DEFINE_PROP_BOOL(ACPI_PM_PROP_ACPI_PCIHP_BRIDGE, AcpiGedState, + pcihp_state.use_acpi_hotplug_bridge, 0), + DEFINE_PROP_LINK("bus", AcpiGedState, pcihp_state.root, + TYPE_PCI_BUS, PCIBus *), }; static const VMStateDescription vmstate_memhp_state = { @@ -386,6 +417,25 @@ static const VMStateDescription vmstate_ghes_state = { } }; +static bool pcihp_needed(void *opaque) +{ + AcpiGedState *s = opaque; + return s->pcihp_state.use_acpi_hotplug_bridge; +} + +static const VMStateDescription vmstate_pcihp_state = { + .name = "acpi-ged/pcihp", + .version_id = 1, + .minimum_version_id = 1, + .needed = pcihp_needed, + .fields = (const VMStateField[]) { + VMSTATE_PCI_HOTPLUG(pcihp_state, + AcpiGedState, + NULL, NULL), + VMSTATE_END_OF_LIST() + } +}; + static const VMStateDescription vmstate_acpi_ged = { .name = "acpi-ged", .version_id = 1, @@ -398,6 +448,7 @@ static const VMStateDescription vmstate_acpi_ged = { &vmstate_memhp_state, &vmstate_cpuhp_state, &vmstate_ghes_state, + &vmstate_pcihp_state, NULL } }; @@ -406,9 +457,13 @@ static void acpi_ged_realize(DeviceState *dev, Error **errp) { SysBusDevice *sbd = SYS_BUS_DEVICE(dev); AcpiGedState *s = ACPI_GED(dev); + AcpiPciHpState *pcihp_state = &s->pcihp_state; uint32_t ged_events; int i; + if (pcihp_state->use_acpi_hotplug_bridge) { + s->ged_event_bitmap |= ACPI_GED_PCI_HOTPLUG_EVT; + } ged_events = ctpop32(s->ged_event_bitmap); for (i = 0; i < ARRAY_SIZE(ged_supported_events) && ged_events; i++) { @@ -428,6 +483,13 @@ static void acpi_ged_realize(DeviceState *dev, Error **errp) cpu_hotplug_hw_init(&s->container_cpuhp, OBJECT(dev), &s->cpuhp_state, 0); break; + case ACPI_GED_PCI_HOTPLUG_EVT: + memory_region_init(&s->container_pcihp, OBJECT(dev), + ACPI_PCIHP_REGION_NAME, ACPI_PCIHP_SIZE); + sysbus_init_mmio(sbd, &s->container_pcihp); + acpi_pcihp_init(OBJECT(s), &s->pcihp_state, + &s->container_pcihp, 0); + qbus_set_hotplug_handler(BUS(s->pcihp_state.root), OBJECT(dev)); } ged_events--; } @@ -469,20 +531,34 @@ static void acpi_ged_initfn(Object *obj) sysbus_init_mmio(sbd, &ged_st->regs); } +static void ged_reset_hold(Object *obj, ResetType type) +{ + AcpiGedState *s = ACPI_GED(obj); + + if (s->pcihp_state.use_acpi_hotplug_bridge) { + acpi_pcihp_reset(&s->pcihp_state); + } +} + static void acpi_ged_class_init(ObjectClass *class, const void *data) { DeviceClass *dc = DEVICE_CLASS(class); HotplugHandlerClass *hc = HOTPLUG_HANDLER_CLASS(class); AcpiDeviceIfClass *adevc = ACPI_DEVICE_IF_CLASS(class); + ResettableClass *rc = RESETTABLE_CLASS(class); + AcpiGedClass *gedc = ACPI_GED_CLASS(class); dc->desc = "ACPI Generic Event Device"; device_class_set_props(dc, acpi_ged_properties); dc->vmsd = &vmstate_acpi_ged; dc->realize = acpi_ged_realize; + hc->pre_plug = acpi_ged_device_pre_plug_cb; hc->plug = acpi_ged_device_plug_cb; hc->unplug_request = acpi_ged_unplug_request_cb; hc->unplug = acpi_ged_unplug_cb; + resettable_class_set_parent_phases(rc, NULL, ged_reset_hold, NULL, + &gedc->parent_phases); adevc->ospm_status = acpi_ged_ospm_status; adevc->send_event = acpi_ged_send_event; @@ -494,6 +570,7 @@ static const TypeInfo acpi_ged_info = { .instance_size = sizeof(AcpiGedState), .instance_init = acpi_ged_initfn, .class_init = acpi_ged_class_init, + .class_size = sizeof(AcpiGedClass), .interfaces = (const InterfaceInfo[]) { { TYPE_HOTPLUG_HANDLER }, { TYPE_ACPI_DEVICE_IF }, diff --git a/hw/acpi/ich9.c b/hw/acpi/ich9.c index 967b674..2b3b493 100644 --- a/hw/acpi/ich9.c +++ b/hw/acpi/ich9.c @@ -322,9 +322,10 @@ void ich9_pm_init(PCIDevice *lpc_pci, ICH9LPCPMRegs *pm, qemu_irq sci_irq) } if (pm->acpi_pci_hotplug.use_acpi_hotplug_bridge) { + object_property_set_link(OBJECT(lpc_pci), "bus", + OBJECT(pci_get_bus(lpc_pci)), &error_abort); acpi_pcihp_init(OBJECT(lpc_pci), &pm->acpi_pci_hotplug, - pci_get_bus(lpc_pci), pci_address_space_io(lpc_pci), ACPI_PCIHP_ADDR_ICH9); @@ -428,6 +429,10 @@ void ich9_pm_add_properties(Object *obj, ICH9LPCPMRegs *pm) object_property_add_uint32_ptr(obj, ACPI_PM_PROP_PM_IO_BASE, &pm->pm_io_base, OBJ_PROP_FLAG_READ); + object_property_add_link(obj, "bus", TYPE_PCI_BUS, + (Object **)&pm->acpi_pci_hotplug.root, + object_property_allow_set_link, + OBJ_PROP_LINK_STRONG); object_property_add(obj, ACPI_PM_PROP_GPE0_BLK, "uint32", ich9_pm_get_gpe0_blk, NULL, NULL, pm); diff --git a/hw/acpi/nvdimm.c b/hw/acpi/nvdimm.c index 9ba9080..732d613 100644 --- a/hw/acpi/nvdimm.c +++ b/hw/acpi/nvdimm.c @@ -535,7 +535,7 @@ nvdimm_dsm_no_payload(uint32_t func_ret_status, hwaddr dsm_mem_addr) #define NVDIMM_QEMU_RSVD_HANDLE_ROOT 0x10000 -/* Read FIT data, defined in docs/specs/acpi_nvdimm.txt. */ +/* Read FIT data, defined in docs/specs/acpi_nvdimm.rst. */ static void nvdimm_dsm_func_read_fit(NVDIMMState *state, NvdimmDsmIn *in, hwaddr dsm_mem_addr) { diff --git a/hw/acpi/pci-bridge.c b/hw/acpi/pci-bridge.c index 7baa703..394a919 100644 --- a/hw/acpi/pci-bridge.c +++ b/hw/acpi/pci-bridge.c @@ -35,3 +35,57 @@ void build_pci_bridge_aml(AcpiDevAmlIf *adev, Aml *scope) } } } + +Aml *build_pci_bridge_edsm(void) +{ + Aml *method, *ifctx; + Aml *zero = aml_int(0); + Aml *func = aml_arg(2); + Aml *ret = aml_local(0); + Aml *aidx = aml_local(1); + Aml *params = aml_arg(4); + + method = aml_method("EDSM", 5, AML_SERIALIZED); + + /* get supported functions */ + ifctx = aml_if(aml_equal(func, zero)); + { + /* 1: have supported functions */ + /* 7: support for function 7 */ + const uint8_t caps = 1 | BIT(7); + build_append_pci_dsm_func0_common(ifctx, ret); + aml_append(ifctx, aml_store(aml_int(caps), aml_index(ret, zero))); + aml_append(ifctx, aml_return(ret)); + } + aml_append(method, ifctx); + + /* handle specific functions requests */ + /* + * PCI Firmware Specification 3.1 + * 4.6.7. _DSM for Naming a PCI or PCI Express Device Under + * Operating Systems + */ + ifctx = aml_if(aml_equal(func, aml_int(7))); + { + Aml *pkg = aml_package(2); + aml_append(pkg, zero); + /* optional, if not impl. should return null string */ + aml_append(pkg, aml_string("%s", "")); + aml_append(ifctx, aml_store(pkg, ret)); + + /* + * IASL is fine when initializing Package with computational data, + * however it makes guest unhappy /it fails to process such AML/. + * So use runtime assignment to set acpi-index after initializer + * to make OSPM happy. + */ + aml_append(ifctx, + aml_store(aml_derefof(aml_index(params, aml_int(0))), aidx)); + aml_append(ifctx, aml_store(aidx, aml_index(ret, zero))); + aml_append(ifctx, aml_return(ret)); + } + aml_append(method, ifctx); + + return method; +} + diff --git a/hw/acpi/pci.c b/hw/acpi/pci.c index d511a85..2228f12 100644 --- a/hw/acpi/pci.c +++ b/hw/acpi/pci.c @@ -301,3 +301,53 @@ void build_srat_generic_affinity_structures(GArray *table_data) object_child_foreach_recursive(object_get_root(), build_acpi_generic_port, table_data); } + +Aml *build_pci_host_bridge_osc_method(bool enable_native_pcie_hotplug) +{ + Aml *if_ctx; + Aml *if_ctx2; + Aml *else_ctx; + Aml *method; + Aml *a_cwd1 = aml_name("CDW1"); + Aml *a_ctrl = aml_local(0); + + method = aml_method("_OSC", 4, AML_NOTSERIALIZED); + aml_append(method, aml_create_dword_field(aml_arg(3), aml_int(0), "CDW1")); + + if_ctx = aml_if(aml_equal( + aml_arg(0), aml_touuid("33DB4D5B-1FF7-401C-9657-7441C03DD766"))); + aml_append(if_ctx, aml_create_dword_field(aml_arg(3), aml_int(4), "CDW2")); + aml_append(if_ctx, aml_create_dword_field(aml_arg(3), aml_int(8), "CDW3")); + + aml_append(if_ctx, aml_store(aml_name("CDW3"), a_ctrl)); + + /* + * Always allow native PME, AER (no dependencies) + * Allow SHPC (PCI bridges can have SHPC controller) + * Disable PCIe Native Hot-plug if ACPI PCI Hot-plug is enabled. + */ + aml_append(if_ctx, aml_and(a_ctrl, + aml_int(0x1E | (enable_native_pcie_hotplug ? 0x1 : 0x0)), a_ctrl)); + + if_ctx2 = aml_if(aml_lnot(aml_equal(aml_arg(1), aml_int(1)))); + /* Unknown revision */ + aml_append(if_ctx2, aml_or(a_cwd1, aml_int(0x08), a_cwd1)); + aml_append(if_ctx, if_ctx2); + + if_ctx2 = aml_if(aml_lnot(aml_equal(aml_name("CDW3"), a_ctrl))); + /* Capabilities bits were masked */ + aml_append(if_ctx2, aml_or(a_cwd1, aml_int(0x10), a_cwd1)); + aml_append(if_ctx, if_ctx2); + + /* Update DWORD3 in the buffer */ + aml_append(if_ctx, aml_store(a_ctrl, aml_name("CDW3"))); + aml_append(method, if_ctx); + + else_ctx = aml_else(); + /* Unrecognized UUID */ + aml_append(else_ctx, aml_or(a_cwd1, aml_int(4), a_cwd1)); + aml_append(method, else_ctx); + + aml_append(method, aml_return(aml_arg(3))); + return method; +} diff --git a/hw/acpi/pcihp.c b/hw/acpi/pcihp.c index aac9001..4922bbc 100644 --- a/hw/acpi/pcihp.c +++ b/hw/acpi/pcihp.c @@ -3,7 +3,7 @@ * * QEMU supports PCI hotplug via ACPI. This module * implements the interface between QEMU and the ACPI BIOS. - * Interface specification - see docs/specs/acpi_pci_hotplug.txt + * Interface specification - see docs/specs/acpi_pci_hotplug.rst * * Copyright (c) 2013, Red Hat Inc, Michael S. Tsirkin (mst@redhat.com) * Copyright (c) 2006 Fabrice Bellard @@ -26,7 +26,8 @@ #include "qemu/osdep.h" #include "hw/acpi/pcihp.h" - +#include "hw/acpi/aml-build.h" +#include "hw/acpi/acpi_aml_interface.h" #include "hw/pci-host/i440fx.h" #include "hw/pci/pci.h" #include "hw/pci/pci_bridge.h" @@ -39,9 +40,9 @@ #include "migration/vmstate.h" #include "qapi/error.h" #include "qom/qom-qobject.h" +#include "qobject/qnum.h" #include "trace.h" -#define ACPI_PCIHP_SIZE 0x0018 #define PCI_UP_BASE 0x0000 #define PCI_DOWN_BASE 0x0004 #define PCI_EJ_BASE 0x0008 @@ -97,10 +98,10 @@ static void *acpi_set_bsel(PCIBus *bus, void *opaque) return info; } -static void acpi_set_pci_info(bool has_bridge_hotplug) +static void acpi_set_pci_info(AcpiPciHpState *s) { static bool bsel_is_set; - Object *host = acpi_get_i386_pci_host(); + bool has_bridge_hotplug = s->use_acpi_hotplug_bridge; PCIBus *bus; BSELInfo info = { .bsel_alloc = ACPI_PCIHP_BSEL_DEFAULT, .has_bridge_hotplug = has_bridge_hotplug }; @@ -110,11 +111,8 @@ static void acpi_set_pci_info(bool has_bridge_hotplug) } bsel_is_set = true; - if (!host) { - return; - } - bus = PCI_HOST_BRIDGE(host)->bus; + bus = s->root; if (bus) { /* Scan all PCI buses. Set property to enable acpi based hotplug. */ pci_for_each_bus_depth_first(bus, acpi_set_bsel, NULL, &info); @@ -264,7 +262,7 @@ static void acpi_pcihp_update(AcpiPciHpState *s) void acpi_pcihp_reset(AcpiPciHpState *s) { - acpi_set_pci_info(s->use_acpi_hotplug_bridge); + acpi_set_pci_info(s); acpi_pcihp_update(s); } @@ -495,13 +493,13 @@ static const MemoryRegionOps acpi_pcihp_io_ops = { }, }; -void acpi_pcihp_init(Object *owner, AcpiPciHpState *s, PCIBus *root_bus, +void acpi_pcihp_init(Object *owner, AcpiPciHpState *s, MemoryRegion *io, uint16_t io_base) { s->io_len = ACPI_PCIHP_SIZE; s->io_base = io_base; - s->root = root_bus; + assert(s->root); memory_region_init_io(&s->io, owner, &acpi_pcihp_io_ops, s, "acpi-pci-hotplug", s->io_len); @@ -513,6 +511,425 @@ void acpi_pcihp_init(Object *owner, AcpiPciHpState *s, PCIBus *root_bus, OBJ_PROP_FLAG_READ); } +void build_append_pci_dsm_func0_common(Aml *ctx, Aml *retvar) +{ + Aml *UUID, *ifctx1; + uint8_t byte_list[1] = { 0 }; /* nothing supported yet */ + + aml_append(ctx, aml_store(aml_buffer(1, byte_list), retvar)); + /* + * PCI Firmware Specification 3.1 + * 4.6. _DSM Definitions for PCI + */ + UUID = aml_touuid("E5C937D0-3553-4D7A-9117-EA4D19C3434D"); + ifctx1 = aml_if(aml_lnot(aml_equal(aml_arg(0), UUID))); + { + /* call is for unsupported UUID, bail out */ + aml_append(ifctx1, aml_return(retvar)); + } + aml_append(ctx, ifctx1); + + ifctx1 = aml_if(aml_lless(aml_arg(1), aml_int(2))); + { + /* call is for unsupported REV, bail out */ + aml_append(ifctx1, aml_return(retvar)); + } + aml_append(ctx, ifctx1); +} + +static Aml *aml_pci_pdsm(void) +{ + Aml *method, *ifctx, *ifctx1; + Aml *ret = aml_local(0); + Aml *caps = aml_local(1); + Aml *acpi_index = aml_local(2); + Aml *zero = aml_int(0); + Aml *one = aml_int(1); + Aml *not_supp = aml_int(0xFFFFFFFF); + Aml *func = aml_arg(2); + Aml *params = aml_arg(4); + Aml *bnum = aml_derefof(aml_index(params, aml_int(0))); + Aml *sunum = aml_derefof(aml_index(params, aml_int(1))); + + method = aml_method("PDSM", 5, AML_SERIALIZED); + + /* get supported functions */ + ifctx = aml_if(aml_equal(func, zero)); + { + build_append_pci_dsm_func0_common(ifctx, ret); + + aml_append(ifctx, aml_store(zero, caps)); + aml_append(ifctx, + aml_store(aml_call2("AIDX", bnum, sunum), acpi_index)); + /* + * advertise function 7 if device has acpi-index + * acpi_index values: + * 0: not present (default value) + * FFFFFFFF: not supported (old QEMU without PIDX reg) + * other: device's acpi-index + */ + ifctx1 = aml_if(aml_lnot( + aml_or(aml_equal(acpi_index, zero), + aml_equal(acpi_index, not_supp), NULL) + )); + { + /* have supported functions */ + aml_append(ifctx1, aml_or(caps, one, caps)); + /* support for function 7 */ + aml_append(ifctx1, + aml_or(caps, aml_shiftleft(one, aml_int(7)), caps)); + } + aml_append(ifctx, ifctx1); + + aml_append(ifctx, aml_store(caps, aml_index(ret, zero))); + aml_append(ifctx, aml_return(ret)); + } + aml_append(method, ifctx); + + /* handle specific functions requests */ + /* + * PCI Firmware Specification 3.1 + * 4.6.7. _DSM for Naming a PCI or PCI Express Device Under + * Operating Systems + */ + ifctx = aml_if(aml_equal(func, aml_int(7))); + { + Aml *pkg = aml_package(2); + + aml_append(ifctx, aml_store(aml_call2("AIDX", bnum, sunum), acpi_index)); + aml_append(ifctx, aml_store(pkg, ret)); + /* + * Windows calls func=7 without checking if it's available, + * as workaround Microsoft has suggested to return invalid for func7 + * Package, so return 2 elements package but only initialize elements + * when acpi_index is supported and leave them uninitialized, which + * leads elements to being Uninitialized ObjectType and should trip + * Windows into discarding result as an unexpected and prevent setting + * bogus 'PCI Label' on the device. + */ + ifctx1 = aml_if(aml_lnot(aml_lor( + aml_equal(acpi_index, zero), aml_equal(acpi_index, not_supp) + ))); + { + aml_append(ifctx1, aml_store(acpi_index, aml_index(ret, zero))); + /* + * optional, if not impl. should return null string + */ + aml_append(ifctx1, aml_store(aml_string("%s", ""), + aml_index(ret, one))); + } + aml_append(ifctx, ifctx1); + + aml_append(ifctx, aml_return(ret)); + } + + aml_append(method, ifctx); + return method; +} + +void build_acpi_pci_hotplug(Aml *table, AmlRegionSpace rs, uint64_t pcihp_addr) +{ + Aml *scope; + Aml *field; + Aml *method; + + scope = aml_scope("_SB.PCI0"); + + aml_append(scope, + aml_operation_region("PCST", rs, aml_int(pcihp_addr), 0x08)); + field = aml_field("PCST", AML_DWORD_ACC, AML_NOLOCK, AML_WRITE_AS_ZEROS); + aml_append(field, aml_named_field("PCIU", 32)); + aml_append(field, aml_named_field("PCID", 32)); + aml_append(scope, field); + + aml_append(scope, + aml_operation_region("SEJ", rs, + aml_int(pcihp_addr + ACPI_PCIHP_SEJ_BASE), 0x04)); + field = aml_field("SEJ", AML_DWORD_ACC, AML_NOLOCK, AML_WRITE_AS_ZEROS); + aml_append(field, aml_named_field("B0EJ", 32)); + aml_append(scope, field); + + aml_append(scope, + aml_operation_region("BNMR", rs, + aml_int(pcihp_addr + ACPI_PCIHP_BNMR_BASE), 0x08)); + field = aml_field("BNMR", AML_DWORD_ACC, AML_NOLOCK, AML_WRITE_AS_ZEROS); + aml_append(field, aml_named_field("BNUM", 32)); + aml_append(field, aml_named_field("PIDX", 32)); + aml_append(scope, field); + + aml_append(scope, aml_mutex("BLCK", 0)); + + method = aml_method("PCEJ", 2, AML_NOTSERIALIZED); + aml_append(method, aml_acquire(aml_name("BLCK"), 0xFFFF)); + aml_append(method, aml_store(aml_arg(0), aml_name("BNUM"))); + aml_append(method, + aml_store(aml_shiftleft(aml_int(1), aml_arg(1)), aml_name("B0EJ"))); + aml_append(method, aml_release(aml_name("BLCK"))); + aml_append(method, aml_return(aml_int(0))); + aml_append(scope, method); + + method = aml_method("AIDX", 2, AML_NOTSERIALIZED); + aml_append(method, aml_acquire(aml_name("BLCK"), 0xFFFF)); + aml_append(method, aml_store(aml_arg(0), aml_name("BNUM"))); + aml_append(method, + aml_store(aml_shiftleft(aml_int(1), aml_arg(1)), aml_name("PIDX"))); + aml_append(method, aml_store(aml_name("PIDX"), aml_local(0))); + aml_append(method, aml_release(aml_name("BLCK"))); + aml_append(method, aml_return(aml_local(0))); + aml_append(scope, method); + + aml_append(scope, aml_pci_pdsm()); + + aml_append(table, scope); +} + +/* Reserve PCIHP resources */ +void build_append_pcihp_resources(Aml *scope /* \\_SB.PCI0 */, + uint64_t io_addr, uint64_t io_len) +{ + Aml *dev, *crs; + + dev = aml_device("PHPR"); + aml_append(dev, aml_name_decl("_HID", aml_string("PNP0A06"))); + aml_append(dev, + aml_name_decl("_UID", aml_string("PCI Hotplug resources"))); + /* device present, functioning, decoding, not shown in UI */ + aml_append(dev, aml_name_decl("_STA", aml_int(0xB))); + crs = aml_resource_template(); + aml_append(crs, aml_io(AML_DECODE16, io_addr, io_addr, 1, io_len)); + aml_append(dev, aml_name_decl("_CRS", crs)); + aml_append(scope, dev); +} + +bool build_append_notification_callback(Aml *parent_scope, const PCIBus *bus) +{ + Aml *method; + PCIBus *sec; + QObject *bsel; + int nr_notifiers = 0; + GQueue *pcnt_bus_list = g_queue_new(); + + QLIST_FOREACH(sec, &bus->child, sibling) { + Aml *br_scope = aml_scope("S%.02X", sec->parent_dev->devfn); + if (pci_bus_is_root(sec)) { + continue; + } + nr_notifiers = nr_notifiers + + build_append_notification_callback(br_scope, sec); + /* + * add new child scope to parent + * and keep track of bus that have PCNT, + * bus list is used later to call children PCNTs from this level PCNT + */ + if (nr_notifiers) { + g_queue_push_tail(pcnt_bus_list, sec); + aml_append(parent_scope, br_scope); + } + } + + /* + * Append PCNT method to notify about events on local and child buses. + * ps: hostbridge might not have hotplug (bsel) enabled but might have + * child bridges that do have bsel. + */ + method = aml_method("PCNT", 0, AML_NOTSERIALIZED); + + /* If bus supports hotplug select it and notify about local events */ + bsel = object_property_get_qobject(OBJECT(bus), ACPI_PCIHP_PROP_BSEL, NULL); + if (bsel) { + uint64_t bsel_val = qnum_get_uint(qobject_to(QNum, bsel)); + + aml_append(method, aml_store(aml_int(bsel_val), aml_name("BNUM"))); + aml_append(method, aml_call2("DVNT", aml_name("PCIU"), + aml_int(1))); /* Device Check */ + aml_append(method, aml_call2("DVNT", aml_name("PCID"), + aml_int(3))); /* Eject Request */ + nr_notifiers++; + } + + /* Notify about child bus events in any case */ + while ((sec = g_queue_pop_head(pcnt_bus_list))) { + aml_append(method, aml_name("^S%.02X.PCNT", sec->parent_dev->devfn)); + } + + aml_append(parent_scope, method); + qobject_unref(bsel); + g_queue_free(pcnt_bus_list); + return !!nr_notifiers; +} + +static Aml *aml_pci_device_dsm(void) +{ + Aml *method; + + method = aml_method("_DSM", 4, AML_SERIALIZED); + { + Aml *params = aml_local(0); + Aml *pkg = aml_package(2); + aml_append(pkg, aml_int(0)); + aml_append(pkg, aml_int(0)); + aml_append(method, aml_store(pkg, params)); + aml_append(method, + aml_store(aml_name("BSEL"), aml_index(params, aml_int(0)))); + aml_append(method, + aml_store(aml_name("ASUN"), aml_index(params, aml_int(1)))); + aml_append(method, + aml_return(aml_call5("PDSM", aml_arg(0), aml_arg(1), + aml_arg(2), aml_arg(3), params)) + ); + } + return method; +} + +static Aml *aml_pci_static_endpoint_dsm(PCIDevice *pdev) +{ + Aml *method; + + g_assert(pdev->acpi_index != 0); + method = aml_method("_DSM", 4, AML_SERIALIZED); + { + Aml *params = aml_local(0); + Aml *pkg = aml_package(1); + aml_append(pkg, aml_int(pdev->acpi_index)); + aml_append(method, aml_store(pkg, params)); + aml_append(method, + aml_return(aml_call5("EDSM", aml_arg(0), aml_arg(1), + aml_arg(2), aml_arg(3), params)) + ); + } + return method; +} + +static void build_append_pcihp_notify_entry(Aml *method, int slot) +{ + Aml *if_ctx; + int32_t devfn = PCI_DEVFN(slot, 0); + + if_ctx = aml_if(aml_and(aml_arg(0), aml_int(0x1U << slot), NULL)); + aml_append(if_ctx, aml_notify(aml_name("S%.02X", devfn), aml_arg(1))); + aml_append(method, if_ctx); +} + +static bool is_devfn_ignored_generic(const int devfn, const PCIBus *bus) +{ + const PCIDevice *pdev = bus->devices[devfn]; + + if (PCI_FUNC(devfn)) { + if (IS_PCI_BRIDGE(pdev)) { + /* + * Ignore only hotplugged PCI bridges on !0 functions, but + * allow describing cold plugged bridges on all functions + */ + if (DEVICE(pdev)->hotplugged) { + return true; + } + } + } + return false; +} + +static bool is_devfn_ignored_hotplug(const int devfn, const PCIBus *bus) +{ + PCIDevice *pdev = bus->devices[devfn]; + if (pdev) { + return is_devfn_ignored_generic(devfn, bus) || + !DEVICE_GET_CLASS(pdev)->hotpluggable || + /* Cold plugged bridges aren't themselves hot-pluggable */ + (IS_PCI_BRIDGE(pdev) && !DEVICE(pdev)->hotplugged); + } else { /* non populated slots */ + /* + * hotplug is supported only for non-multifunction device + * so generate device description only for function 0 + */ + if (PCI_FUNC(devfn) || + (pci_bus_is_express(bus) && PCI_SLOT(devfn) > 0)) { + return true; + } + } + return false; +} + +void build_append_pcihp_slots(Aml *parent_scope, PCIBus *bus) +{ + int devfn; + Aml *dev, *notify_method = NULL, *method; + QObject *bsel = object_property_get_qobject(OBJECT(bus), + ACPI_PCIHP_PROP_BSEL, NULL); + uint64_t bsel_val = qnum_get_uint(qobject_to(QNum, bsel)); + qobject_unref(bsel); + + aml_append(parent_scope, aml_name_decl("BSEL", aml_int(bsel_val))); + notify_method = aml_method("DVNT", 2, AML_NOTSERIALIZED); + + for (devfn = 0; devfn < ARRAY_SIZE(bus->devices); devfn++) { + int slot = PCI_SLOT(devfn); + int adr = slot << 16 | PCI_FUNC(devfn); + + if (is_devfn_ignored_hotplug(devfn, bus)) { + continue; + } + + if (bus->devices[devfn]) { + dev = aml_scope("S%.02X", devfn); + } else { + dev = aml_device("S%.02X", devfn); + aml_append(dev, aml_name_decl("_ADR", aml_int(adr))); + } + + /* + * Can't declare _SUN here for every device as it changes 'slot' + * enumeration order in linux kernel, so use another variable for it + */ + aml_append(dev, aml_name_decl("ASUN", aml_int(slot))); + aml_append(dev, aml_pci_device_dsm()); + + aml_append(dev, aml_name_decl("_SUN", aml_int(slot))); + /* add _EJ0 to make slot hotpluggable */ + method = aml_method("_EJ0", 1, AML_NOTSERIALIZED); + aml_append(method, + aml_call2("PCEJ", aml_name("BSEL"), aml_name("_SUN")) + ); + aml_append(dev, method); + + build_append_pcihp_notify_entry(notify_method, slot); + + /* device descriptor has been composed, add it into parent context */ + aml_append(parent_scope, dev); + } + aml_append(parent_scope, notify_method); +} + +void build_append_pci_bus_devices(Aml *parent_scope, PCIBus *bus) +{ + int devfn; + Aml *dev; + + for (devfn = 0; devfn < ARRAY_SIZE(bus->devices); devfn++) { + /* ACPI spec: 1.0b: Table 6-2 _ADR Object Bus Types, PCI type */ + int adr = PCI_SLOT(devfn) << 16 | PCI_FUNC(devfn); + PCIDevice *pdev = bus->devices[devfn]; + + if (!pdev || is_devfn_ignored_generic(devfn, bus)) { + continue; + } + + /* start to compose PCI device descriptor */ + dev = aml_device("S%.02X", devfn); + aml_append(dev, aml_name_decl("_ADR", aml_int(adr))); + + call_dev_aml_func(DEVICE(bus->devices[devfn]), dev); + /* add _DSM if device has acpi-index set */ + if (pdev->acpi_index && + !object_property_get_bool(OBJECT(pdev), "hotpluggable", + &error_abort)) { + aml_append(dev, aml_pci_static_endpoint_dsm(pdev)); + } + + /* device descriptor has been composed, add it into parent context */ + aml_append(parent_scope, dev); + } +} + const VMStateDescription vmstate_acpi_pcihp_pci_status = { .name = "acpi_pcihp_pci_status", .version_id = 1, diff --git a/hw/acpi/piix4.c b/hw/acpi/piix4.c index d98b80d..7a18f18 100644 --- a/hw/acpi/piix4.c +++ b/hw/acpi/piix4.c @@ -567,7 +567,8 @@ static void piix4_acpi_system_hot_add_init(MemoryRegion *parent, if (s->acpi_pci_hotplug.use_acpi_hotplug_bridge || s->acpi_pci_hotplug.use_acpi_root_pci_hotplug) { - acpi_pcihp_init(OBJECT(s), &s->acpi_pci_hotplug, bus, parent, + object_property_set_link(OBJECT(s), "bus", OBJECT(bus), &error_abort); + acpi_pcihp_init(OBJECT(s), &s->acpi_pci_hotplug, parent, ACPI_PCIHP_ADDR_PIIX4); qbus_set_hotplug_handler(BUS(pci_get_bus(PCI_DEVICE(s))), OBJECT(s)); } @@ -611,6 +612,8 @@ static const Property piix4_pm_properties[] = { acpi_pci_hotplug.use_acpi_hotplug_bridge, true), DEFINE_PROP_BOOL(ACPI_PM_PROP_ACPI_PCI_ROOTHP, PIIX4PMState, acpi_pci_hotplug.use_acpi_root_pci_hotplug, true), + DEFINE_PROP_LINK("bus", PIIX4PMState, acpi_pci_hotplug.root, + TYPE_PCI_BUS, PCIBus *), DEFINE_PROP_BOOL("memory-hotplug-support", PIIX4PMState, acpi_memory_hotplug.is_enabled, true), DEFINE_PROP_BOOL("smm-compat", PIIX4PMState, smm_compat, false), diff --git a/hw/acpi/vmgenid.c b/hw/acpi/vmgenid.c index fac3d6d..33c35c8 100644 --- a/hw/acpi/vmgenid.c +++ b/hw/acpi/vmgenid.c @@ -38,7 +38,7 @@ void vmgenid_build_acpi(VmGenIdState *vms, GArray *table_data, GArray *guid, guid_le = qemu_uuid_bswap(vms->guid); /* The GUID is written at a fixed offset into the fw_cfg file * in order to implement the "OVMF SDT Header probe suppressor" - * see docs/specs/vmgenid.txt for more details + * see docs/specs/vmgenid.rst for more details */ g_array_insert_vals(guid, VMGENID_GUID_OFFSET, guid_le.data, ARRAY_SIZE(guid_le.data)); @@ -101,7 +101,7 @@ void vmgenid_build_acpi(VmGenIdState *vms, GArray *table_data, GArray *guid, * < 4GB, but write 64 bits anyway. * The address that is patched in is offset in order to implement * the "OVMF SDT Header probe suppressor" - * see docs/specs/vmgenid.txt for more details. + * see docs/specs/vmgenid.rst for more details. */ bios_linker_loader_write_pointer(linker, VMGENID_ADDR_FW_CFG_FILE, 0, sizeof(uint64_t), @@ -153,7 +153,7 @@ static void vmgenid_update_guest(VmGenIdState *vms) guid_le = qemu_uuid_bswap(vms->guid); /* The GUID is written at a fixed offset into the fw_cfg file * in order to implement the "OVMF SDT Header probe suppressor" - * see docs/specs/vmgenid.txt for more details. + * see docs/specs/vmgenid.rst for more details. */ cpu_physical_memory_write(vmgenid_addr, guid_le.data, sizeof(guid_le.data)); diff --git a/hw/arm/Kconfig b/hw/arm/Kconfig index f543d94..2aa4b5d 100644 --- a/hw/arm/Kconfig +++ b/hw/arm/Kconfig @@ -34,6 +34,8 @@ config ARM_VIRT select ACPI_HW_REDUCED select ACPI_APEI select ACPI_VIOT + select ACPI_PCIHP + select ACPI_PCI_BRIDGE select VIRTIO_MEM_SUPPORTED select ACPI_CXL select ACPI_HMAT @@ -95,6 +97,12 @@ config INTEGRATOR select PL181 # display select SMC91C111 +config MAX78000FTHR + bool + default y + depends on TCG && ARM + select MAX78000_SOC + config MPS3R bool default y @@ -357,6 +365,15 @@ config ALLWINNER_R40 select USB_EHCI_SYSBUS select SD +config MAX78000_SOC + bool + select ARM_V7M + select MAX78000_ICC + select MAX78000_UART + select MAX78000_GCR + select MAX78000_TRNG + select MAX78000_AES + config RASPI bool default y @@ -532,6 +549,7 @@ config ASPEED_SOC select I2C select DPS310 select PCA9552 + select PCA9554 select SERIAL_MM select SMBUS_EEPROM select PCA954X diff --git a/hw/arm/allwinner-r40.c b/hw/arm/allwinner-r40.c index 0bf7008..c8eda39 100644 --- a/hw/arm/allwinner-r40.c +++ b/hw/arm/allwinner-r40.c @@ -20,7 +20,6 @@ #include "qemu/osdep.h" #include "qapi/error.h" #include "qemu/error-report.h" -#include "qemu/bswap.h" #include "qemu/module.h" #include "qemu/units.h" #include "hw/boards.h" diff --git a/hw/arm/aspeed.c b/hw/arm/aspeed.c index d0b3336..c31bbe7 100644 --- a/hw/arm/aspeed.c +++ b/hw/arm/aspeed.c @@ -19,6 +19,7 @@ #include "hw/i2c/i2c_mux_pca954x.h" #include "hw/i2c/smbus_eeprom.h" #include "hw/gpio/pca9552.h" +#include "hw/gpio/pca9554.h" #include "hw/nvram/eeprom_at24c.h" #include "hw/sensor/tmp105.h" #include "hw/misc/led.h" @@ -197,9 +198,12 @@ struct AspeedMachineState { #define FUJI_BMC_HW_STRAP2 0x00000000 /* Bletchley hardware value */ -/* TODO: Leave same as EVB for now. */ -#define BLETCHLEY_BMC_HW_STRAP1 AST2600_EVB_HW_STRAP1 -#define BLETCHLEY_BMC_HW_STRAP2 AST2600_EVB_HW_STRAP2 +#define BLETCHLEY_BMC_HW_STRAP1 0x00002000 +#define BLETCHLEY_BMC_HW_STRAP2 0x00000801 + +/* GB200NVL hardware value */ +#define GB200NVL_BMC_HW_STRAP1 AST2600_EVB_HW_STRAP1 +#define GB200NVL_BMC_HW_STRAP2 AST2600_EVB_HW_STRAP2 /* Qualcomm DC-SCM hardware value */ #define QCOM_DC_SCM_V1_BMC_HW_STRAP1 0x00000000 @@ -465,6 +469,8 @@ static void aspeed_machine_init(MachineState *machine) aspeed_board_init_flashes(&bmc->soc->spi[0], bmc->spi_model ? bmc->spi_model : amc->spi_model, 1, amc->num_cs); + aspeed_board_init_flashes(&bmc->soc->spi[1], + amc->spi2_model, 1, amc->num_cs2); } if (machine->kernel_filename && sc->num_cpus > 1) { @@ -645,6 +651,12 @@ static void create_pca9552(AspeedSoCState *soc, int bus_id, int addr) TYPE_PCA9552, addr); } +static I2CSlave *create_pca9554(AspeedSoCState *soc, int bus_id, int addr) +{ + return i2c_slave_create_simple(aspeed_i2c_get_bus(&soc->i2c, bus_id), + TYPE_PCA9554, addr); +} + static void sonorapass_bmc_i2c_init(AspeedMachineState *bmc) { AspeedSoCState *soc = bmc->soc; @@ -1003,6 +1015,180 @@ static void fuji_bmc_i2c_init(AspeedMachineState *bmc) } #define TYPE_TMP421 "tmp421" +#define TYPE_DS1338 "ds1338" + +/* Catalina hardware value */ +#define CATALINA_BMC_HW_STRAP1 0x00002002 +#define CATALINA_BMC_HW_STRAP2 0x00000800 + +#define CATALINA_BMC_RAM_SIZE ASPEED_RAM_SIZE(2 * GiB) + +static void catalina_bmc_i2c_init(AspeedMachineState *bmc) +{ + /* Reference from v6.16-rc2 aspeed-bmc-facebook-catalina.dts */ + + AspeedSoCState *soc = bmc->soc; + I2CBus *i2c[16] = {}; + I2CSlave *i2c_mux; + + /* busses 0-15 are all used. */ + for (int i = 0; i < ARRAY_SIZE(i2c); i++) { + i2c[i] = aspeed_i2c_get_bus(&soc->i2c, i); + } + + /* &i2c0 */ + /* i2c-mux@71 (PCA9546) on i2c0 */ + i2c_slave_create_simple(i2c[0], TYPE_PCA9546, 0x71); + + /* i2c-mux@72 (PCA9546) on i2c0 */ + i2c_mux = i2c_slave_create_simple(i2c[0], TYPE_PCA9546, 0x72); + + /* i2c0mux1ch1 */ + /* io_expander7 - pca9535@20 */ + i2c_slave_create_simple(pca954x_i2c_get_bus(i2c_mux, 1), + TYPE_PCA9552, 0x20); + /* eeprom@50 */ + at24c_eeprom_init(pca954x_i2c_get_bus(i2c_mux, 1), 0x50, 8 * KiB); + + /* i2c-mux@73 (PCA9546) on i2c0 */ + i2c_slave_create_simple(i2c[0], TYPE_PCA9546, 0x73); + + /* i2c-mux@75 (PCA9546) on i2c0 */ + i2c_slave_create_simple(i2c[0], TYPE_PCA9546, 0x75); + + /* i2c-mux@76 (PCA9546) on i2c0 */ + i2c_mux = i2c_slave_create_simple(i2c[0], TYPE_PCA9546, 0x76); + + /* i2c0mux4ch1 */ + /* io_expander8 - pca9535@21 */ + i2c_slave_create_simple(pca954x_i2c_get_bus(i2c_mux, 1), + TYPE_PCA9552, 0x21); + /* eeprom@50 */ + at24c_eeprom_init(pca954x_i2c_get_bus(i2c_mux, 1), 0x50, 8 * KiB); + + /* i2c-mux@77 (PCA9546) on i2c0 */ + i2c_slave_create_simple(i2c[0], TYPE_PCA9546, 0x77); + + + /* &i2c1 */ + /* i2c-mux@70 (PCA9548) on i2c1 */ + i2c_mux = i2c_slave_create_simple(i2c[1], TYPE_PCA9548, 0x70); + /* i2c1mux0ch0 */ + /* ina238@41 - no model */ + /* ina238@42 - no model */ + /* ina238@44 - no model */ + /* i2c1mux0ch1 */ + /* ina238@41 - no model */ + /* ina238@43 - no model */ + /* i2c1mux0ch4 */ + /* ltc4287@42 - no model */ + /* ltc4287@43 - no model */ + + /* i2c1mux0ch5 */ + /* eeprom@54 */ + at24c_eeprom_init(pca954x_i2c_get_bus(i2c_mux, 5), 0x54, 8 * KiB); + /* tpm75@4f */ + i2c_slave_create_simple(pca954x_i2c_get_bus(i2c_mux, 5), TYPE_TMP75, 0x4f); + + /* i2c1mux0ch6 */ + /* io_expander5 - pca9554@27 */ + i2c_slave_create_simple(pca954x_i2c_get_bus(i2c_mux, 6), + TYPE_PCA9554, 0x27); + /* io_expander6 - pca9555@25 */ + i2c_slave_create_simple(pca954x_i2c_get_bus(i2c_mux, 6), + TYPE_PCA9552, 0x25); + /* eeprom@51 */ + at24c_eeprom_init(pca954x_i2c_get_bus(i2c_mux, 6), 0x51, 8 * KiB); + + /* i2c1mux0ch7 */ + /* eeprom@53 */ + at24c_eeprom_init(pca954x_i2c_get_bus(i2c_mux, 7), 0x53, 8 * KiB); + /* temperature-sensor@4b - tmp75 */ + i2c_slave_create_simple(pca954x_i2c_get_bus(i2c_mux, 7), TYPE_TMP75, 0x4b); + + /* &i2c2 */ + /* io_expander0 - pca9555@20 */ + i2c_slave_create_simple(i2c[2], TYPE_PCA9552, 0x20); + /* io_expander0 - pca9555@21 */ + i2c_slave_create_simple(i2c[2], TYPE_PCA9552, 0x21); + /* io_expander0 - pca9555@27 */ + i2c_slave_create_simple(i2c[2], TYPE_PCA9552, 0x27); + /* eeprom@50 */ + at24c_eeprom_init(i2c[2], 0x50, 8 * KiB); + /* eeprom@51 */ + at24c_eeprom_init(i2c[2], 0x51, 8 * KiB); + + /* &i2c5 */ + /* i2c-mux@70 (PCA9548) on i2c5 */ + i2c_mux = i2c_slave_create_simple(i2c[5], TYPE_PCA9548, 0x70); + /* i2c5mux0ch6 */ + /* eeprom@52 */ + at24c_eeprom_init(pca954x_i2c_get_bus(i2c_mux, 6), 0x52, 8 * KiB); + /* i2c5mux0ch7 */ + /* ina230@40 - no model */ + /* ina230@41 - no model */ + /* ina230@44 - no model */ + /* ina230@45 - no model */ + + /* &i2c6 */ + /* io_expander3 - pca9555@21 */ + i2c_slave_create_simple(i2c[6], TYPE_PCA9552, 0x21); + /* rtc@6f - nct3018y */ + i2c_slave_create_simple(i2c[6], TYPE_DS1338, 0x6f); + + /* &i2c9 */ + /* io_expander4 - pca9555@4f */ + i2c_slave_create_simple(i2c[9], TYPE_PCA9552, 0x4f); + /* temperature-sensor@4b - tpm75 */ + i2c_slave_create_simple(i2c[9], TYPE_TMP75, 0x4b); + /* eeprom@50 */ + at24c_eeprom_init(i2c[9], 0x50, 8 * KiB); + /* eeprom@56 */ + at24c_eeprom_init(i2c[9], 0x56, 8 * KiB); + + /* &i2c10 */ + /* temperature-sensor@1f - tpm421 */ + i2c_slave_create_simple(i2c[10], TYPE_TMP421, 0x1f); + /* eeprom@50 */ + at24c_eeprom_init(i2c[10], 0x50, 8 * KiB); + + /* &i2c11 */ + /* ssif-bmc@10 - no model */ + + /* &i2c12 */ + /* eeprom@50 */ + at24c_eeprom_init(i2c[12], 0x50, 8 * KiB); + + /* &i2c13 */ + /* eeprom@50 */ + at24c_eeprom_init(i2c[13], 0x50, 8 * KiB); + /* eeprom@54 */ + at24c_eeprom_init(i2c[13], 0x54, 256); + /* eeprom@55 */ + at24c_eeprom_init(i2c[13], 0x55, 256); + /* eeprom@57 */ + at24c_eeprom_init(i2c[13], 0x57, 256); + + /* &i2c14 */ + /* io_expander9 - pca9555@10 */ + i2c_slave_create_simple(i2c[14], TYPE_PCA9552, 0x10); + /* io_expander10 - pca9555@11 */ + i2c_slave_create_simple(i2c[14], TYPE_PCA9552, 0x11); + /* io_expander11 - pca9555@12 */ + i2c_slave_create_simple(i2c[14], TYPE_PCA9552, 0x12); + /* io_expander12 - pca9555@13 */ + i2c_slave_create_simple(i2c[14], TYPE_PCA9552, 0x13); + /* io_expander13 - pca9555@14 */ + i2c_slave_create_simple(i2c[14], TYPE_PCA9552, 0x14); + /* io_expander14 - pca9555@15 */ + i2c_slave_create_simple(i2c[14], TYPE_PCA9552, 0x15); + + /* &i2c15 */ + /* temperature-sensor@1f - tmp421 */ + i2c_slave_create_simple(i2c[15], TYPE_TMP421, 0x1f); + /* eeprom@52 */ + at24c_eeprom_init(i2c[15], 0x52, 8 * KiB); +} static void bletchley_bmc_i2c_init(AspeedMachineState *bmc) { @@ -1050,6 +1236,45 @@ static void bletchley_bmc_i2c_init(AspeedMachineState *bmc) i2c_slave_create_simple(i2c[12], TYPE_PCA9552, 0x67); } + +static void gb200nvl_bmc_i2c_init(AspeedMachineState *bmc) +{ + AspeedSoCState *soc = bmc->soc; + I2CBus *i2c[15] = {}; + DeviceState *dev; + for (int i = 0; i < sizeof(i2c) / sizeof(i2c[0]); i++) { + if ((i == 11) || (i == 12) || (i == 13)) { + continue; + } + i2c[i] = aspeed_i2c_get_bus(&soc->i2c, i); + } + + /* Bus 5 Expander */ + create_pca9554(soc, 4, 0x21); + + /* Mux I2c Expanders */ + i2c_slave_create_simple(i2c[5], "pca9546", 0x71); + i2c_slave_create_simple(i2c[5], "pca9546", 0x72); + i2c_slave_create_simple(i2c[5], "pca9546", 0x73); + i2c_slave_create_simple(i2c[5], "pca9546", 0x75); + i2c_slave_create_simple(i2c[5], "pca9546", 0x76); + i2c_slave_create_simple(i2c[5], "pca9546", 0x77); + + /* Bus 10 */ + dev = DEVICE(create_pca9554(soc, 9, 0x20)); + + /* Set FPGA_READY */ + object_property_set_str(OBJECT(dev), "pin1", "high", &error_fatal); + + create_pca9554(soc, 9, 0x21); + at24c_eeprom_init(i2c[9], 0x50, 64 * KiB); + at24c_eeprom_init(i2c[9], 0x51, 64 * KiB); + + /* Bus 11 */ + at24c_eeprom_init_rom(i2c[10], 0x50, 256, gb200nvl_bmc_fruid, + gb200nvl_bmc_fruid_len); +} + static void fby35_i2c_init(AspeedMachineState *bmc) { AspeedSoCState *soc = bmc->soc; @@ -1585,6 +1810,52 @@ static void aspeed_machine_bletchley_class_init(ObjectClass *oc, aspeed_machine_class_init_cpus_defaults(mc); } +static void aspeed_machine_catalina_class_init(ObjectClass *oc, + const void *data) +{ + MachineClass *mc = MACHINE_CLASS(oc); + AspeedMachineClass *amc = ASPEED_MACHINE_CLASS(oc); + + mc->desc = "Facebook Catalina BMC (Cortex-A7)"; + amc->soc_name = "ast2600-a3"; + amc->hw_strap1 = CATALINA_BMC_HW_STRAP1; + amc->hw_strap2 = CATALINA_BMC_HW_STRAP2; + amc->fmc_model = "w25q01jvq"; + amc->spi_model = NULL; + amc->num_cs = 2; + amc->macs_mask = ASPEED_MAC2_ON; + amc->i2c_init = catalina_bmc_i2c_init; + mc->auto_create_sdcard = true; + mc->default_ram_size = CATALINA_BMC_RAM_SIZE; + aspeed_machine_class_init_cpus_defaults(mc); + aspeed_machine_ast2600_class_emmc_init(oc); +} + +#define GB200NVL_BMC_RAM_SIZE ASPEED_RAM_SIZE(1 * GiB) + +static void aspeed_machine_gb200nvl_class_init(ObjectClass *oc, + const void *data) +{ + MachineClass *mc = MACHINE_CLASS(oc); + AspeedMachineClass *amc = ASPEED_MACHINE_CLASS(oc); + + mc->desc = "Nvidia GB200NVL BMC (Cortex-A7)"; + amc->soc_name = "ast2600-a3"; + amc->hw_strap1 = GB200NVL_BMC_HW_STRAP1; + amc->hw_strap2 = GB200NVL_BMC_HW_STRAP2; + amc->fmc_model = "mx66u51235f"; + amc->spi_model = "mx66u51235f"; + amc->num_cs = 2; + + amc->spi2_model = "mx66u51235f"; + amc->num_cs2 = 1; + amc->macs_mask = ASPEED_MAC0_ON | ASPEED_MAC1_ON; + amc->i2c_init = gb200nvl_bmc_i2c_init; + mc->default_ram_size = GB200NVL_BMC_RAM_SIZE; + aspeed_machine_class_init_cpus_defaults(mc); + aspeed_machine_ast2600_class_emmc_init(oc); +} + static void fby35_reset(MachineState *state, ResetType type) { AspeedMachineState *bmc = ASPEED_MACHINE(state); @@ -1878,6 +2149,14 @@ static const TypeInfo aspeed_machine_types[] = { .parent = TYPE_ASPEED_MACHINE, .class_init = aspeed_machine_bletchley_class_init, }, { + .name = MACHINE_TYPE_NAME("gb200nvl-bmc"), + .parent = TYPE_ASPEED_MACHINE, + .class_init = aspeed_machine_gb200nvl_class_init, + }, { + .name = MACHINE_TYPE_NAME("catalina-bmc"), + .parent = TYPE_ASPEED_MACHINE, + .class_init = aspeed_machine_catalina_class_init, + }, { .name = MACHINE_TYPE_NAME("fby35-bmc"), .parent = MACHINE_TYPE_NAME("ast2600-evb"), .class_init = aspeed_machine_fby35_class_init, diff --git a/hw/arm/aspeed_eeprom.c b/hw/arm/aspeed_eeprom.c index daa3d32..8bbbdec 100644 --- a/hw/arm/aspeed_eeprom.c +++ b/hw/arm/aspeed_eeprom.c @@ -162,6 +162,25 @@ const uint8_t rainier_bmc_fruid[] = { 0x31, 0x50, 0x46, 0x04, 0x00, 0x00, 0x00, 0x00, 0x00, }; +const uint8_t gb200nvl_bmc_fruid[] = { + 0x01, 0x00, 0x00, 0x01, 0x0b, 0x00, 0x00, 0xf3, 0x01, 0x0a, 0x19, 0x1f, + 0x0f, 0xe6, 0xc6, 0x4e, 0x56, 0x49, 0x44, 0x49, 0x41, 0xc5, 0x50, 0x33, + 0x38, 0x30, 0x39, 0xcd, 0x31, 0x35, 0x38, 0x33, 0x33, 0x32, 0x34, 0x38, + 0x30, 0x30, 0x31, 0x35, 0x30, 0xd2, 0x36, 0x39, 0x39, 0x2d, 0x31, 0x33, + 0x38, 0x30, 0x39, 0x2d, 0x30, 0x34, 0x30, 0x34, 0x2d, 0x36, 0x30, 0x30, + 0xc0, 0x01, 0x01, 0xd6, 0x4d, 0x41, 0x43, 0x3a, 0x20, 0x33, 0x43, 0x3a, + 0x36, 0x44, 0x3a, 0x36, 0x36, 0x3a, 0x31, 0x34, 0x3a, 0x43, 0x38, 0x3a, + 0x37, 0x41, 0xc1, 0x3b, 0x01, 0x09, 0x19, 0xc6, 0x4e, 0x56, 0x49, 0x44, + 0x49, 0x41, 0xc9, 0x50, 0x33, 0x38, 0x30, 0x39, 0x2d, 0x42, 0x4d, 0x43, + 0xd2, 0x36, 0x39, 0x39, 0x2d, 0x31, 0x33, 0x38, 0x30, 0x39, 0x2d, 0x30, + 0x34, 0x30, 0x34, 0x2d, 0x36, 0x30, 0x30, 0xc4, 0x41, 0x45, 0x2e, 0x31, + 0xcd, 0x31, 0x35, 0x38, 0x33, 0x33, 0x32, 0x34, 0x38, 0x30, 0x30, 0x31, + 0x35, 0x30, 0xc0, 0xc4, 0x76, 0x30, 0x2e, 0x31, 0xc1, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0xb4, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, + 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff + +}; + const size_t tiogapass_bmc_fruid_len = sizeof(tiogapass_bmc_fruid); const size_t fby35_nic_fruid_len = sizeof(fby35_nic_fruid); const size_t fby35_bb_fruid_len = sizeof(fby35_bb_fruid); @@ -169,3 +188,5 @@ const size_t fby35_bmc_fruid_len = sizeof(fby35_bmc_fruid); const size_t yosemitev2_bmc_fruid_len = sizeof(yosemitev2_bmc_fruid); const size_t rainier_bb_fruid_len = sizeof(rainier_bb_fruid); const size_t rainier_bmc_fruid_len = sizeof(rainier_bmc_fruid); +const size_t gb200nvl_bmc_fruid_len = sizeof(gb200nvl_bmc_fruid); + diff --git a/hw/arm/aspeed_eeprom.h b/hw/arm/aspeed_eeprom.h index f08c16e..3ed9bc1 100644 --- a/hw/arm/aspeed_eeprom.h +++ b/hw/arm/aspeed_eeprom.h @@ -26,4 +26,7 @@ extern const size_t rainier_bb_fruid_len; extern const uint8_t rainier_bmc_fruid[]; extern const size_t rainier_bmc_fruid_len; +extern const uint8_t gb200nvl_bmc_fruid[]; +extern const size_t gb200nvl_bmc_fruid_len; + #endif diff --git a/hw/arm/boot.c b/hw/arm/boot.c index becd827..d391cd0 100644 --- a/hw/arm/boot.c +++ b/hw/arm/boot.c @@ -15,6 +15,7 @@ #include "hw/arm/boot.h" #include "hw/arm/linux-boot-if.h" #include "cpu.h" +#include "exec/tswap.h" #include "exec/target_page.h" #include "system/kvm.h" #include "system/tcg.h" @@ -29,6 +30,7 @@ #include "qemu/config-file.h" #include "qemu/option.h" #include "qemu/units.h" +#include "qemu/bswap.h" /* Kernel boot protocol is specified in the kernel docs * Documentation/arm/Booting and Documentation/arm64/booting.txt diff --git a/hw/arm/fsl-imx8mp.c b/hw/arm/fsl-imx8mp.c index 23e662c..866f4d1 100644 --- a/hw/arm/fsl-imx8mp.c +++ b/hw/arm/fsl-imx8mp.c @@ -356,6 +356,10 @@ static void fsl_imx8mp_realize(DeviceState *dev, Error **errp) qdev_get_gpio_in(cpudev, ARM_CPU_IRQ)); sysbus_connect_irq(gicsbd, i + ms->smp.cpus, qdev_get_gpio_in(cpudev, ARM_CPU_FIQ)); + sysbus_connect_irq(gicsbd, i + 2 * ms->smp.cpus, + qdev_get_gpio_in(cpudev, ARM_CPU_VIRQ)); + sysbus_connect_irq(gicsbd, i + 3 * ms->smp.cpus, + qdev_get_gpio_in(cpudev, ARM_CPU_VFIQ)); } } diff --git a/hw/arm/highbank.c b/hw/arm/highbank.c index 3ae26eb..165c0b7 100644 --- a/hw/arm/highbank.c +++ b/hw/arm/highbank.c @@ -357,6 +357,7 @@ static void highbank_class_init(ObjectClass *oc, const void *data) mc->max_cpus = 4; mc->ignore_memory_transaction_failures = true; mc->default_ram_id = "highbank.dram"; + mc->deprecation_reason = "no known users left for this machine"; } static const TypeInfo highbank_type = { @@ -381,6 +382,7 @@ static void midway_class_init(ObjectClass *oc, const void *data) mc->max_cpus = 4; mc->ignore_memory_transaction_failures = true; mc->default_ram_id = "highbank.dram"; + mc->deprecation_reason = "no known users left for this machine"; } static const TypeInfo midway_type = { diff --git a/hw/arm/max78000_soc.c b/hw/arm/max78000_soc.c new file mode 100644 index 0000000..7f1856f --- /dev/null +++ b/hw/arm/max78000_soc.c @@ -0,0 +1,232 @@ +/* + * MAX78000 SOC + * + * Copyright (c) 2025 Jackson Donaldson <jcksn@duck.com> + * + * SPDX-License-Identifier: GPL-2.0-or-later + * + * Implementation based on stm32f205 and Max78000 user guide at + * https://www.analog.com/media/en/technical-documentation/user-guides/max78000-user-guide.pdf + */ + +#include "qemu/osdep.h" +#include "qapi/error.h" +#include "system/address-spaces.h" +#include "system/system.h" +#include "hw/arm/max78000_soc.h" +#include "hw/qdev-clock.h" +#include "hw/misc/unimp.h" + +static const uint32_t max78000_icc_addr[] = {0x4002a000, 0x4002a800}; +static const uint32_t max78000_uart_addr[] = {0x40042000, 0x40043000, + 0x40044000}; + +static const int max78000_uart_irq[] = {14, 15, 34}; + +static void max78000_soc_initfn(Object *obj) +{ + MAX78000State *s = MAX78000_SOC(obj); + int i; + + object_initialize_child(obj, "armv7m", &s->armv7m, TYPE_ARMV7M); + + object_initialize_child(obj, "gcr", &s->gcr, TYPE_MAX78000_GCR); + + for (i = 0; i < MAX78000_NUM_ICC; i++) { + g_autofree char *name = g_strdup_printf("icc%d", i); + object_initialize_child(obj, name, &s->icc[i], TYPE_MAX78000_ICC); + } + + for (i = 0; i < MAX78000_NUM_UART; i++) { + g_autofree char *name = g_strdup_printf("uart%d", i); + object_initialize_child(obj, name, &s->uart[i], + TYPE_MAX78000_UART); + } + + object_initialize_child(obj, "trng", &s->trng, TYPE_MAX78000_TRNG); + + object_initialize_child(obj, "aes", &s->aes, TYPE_MAX78000_AES); + + s->sysclk = qdev_init_clock_in(DEVICE(s), "sysclk", NULL, NULL, 0); +} + +static void max78000_soc_realize(DeviceState *dev_soc, Error **errp) +{ + MAX78000State *s = MAX78000_SOC(dev_soc); + MemoryRegion *system_memory = get_system_memory(); + DeviceState *dev, *gcrdev, *armv7m; + SysBusDevice *busdev; + Error *err = NULL; + int i; + + if (!clock_has_source(s->sysclk)) { + error_setg(errp, "sysclk clock must be wired up by the board code"); + return; + } + + memory_region_init_rom(&s->flash, OBJECT(dev_soc), "MAX78000.flash", + FLASH_SIZE, &err); + if (err != NULL) { + error_propagate(errp, err); + return; + } + + memory_region_add_subregion(system_memory, FLASH_BASE_ADDRESS, &s->flash); + + memory_region_init_ram(&s->sram, NULL, "MAX78000.sram", SRAM_SIZE, + &err); + + gcrdev = DEVICE(&s->gcr); + object_property_set_link(OBJECT(gcrdev), "sram", OBJECT(&s->sram), + &err); + + if (err != NULL) { + error_propagate(errp, err); + return; + } + memory_region_add_subregion(system_memory, SRAM_BASE_ADDRESS, &s->sram); + + armv7m = DEVICE(&s->armv7m); + + /* + * The MAX78000 user guide's Interrupt Vector Table section + * suggests that there are 120 IRQs in the text, while only listing + * 104 in table 5-1. Implement the more generous of the two. + * This has not been tested in hardware. + */ + qdev_prop_set_uint32(armv7m, "num-irq", 120); + qdev_prop_set_uint8(armv7m, "num-prio-bits", 3); + qdev_prop_set_string(armv7m, "cpu-type", ARM_CPU_TYPE_NAME("cortex-m4")); + qdev_prop_set_bit(armv7m, "enable-bitband", true); + qdev_connect_clock_in(armv7m, "cpuclk", s->sysclk); + object_property_set_link(OBJECT(&s->armv7m), "memory", + OBJECT(system_memory), &error_abort); + if (!sysbus_realize(SYS_BUS_DEVICE(&s->armv7m), errp)) { + return; + } + + for (i = 0; i < MAX78000_NUM_ICC; i++) { + dev = DEVICE(&(s->icc[i])); + sysbus_realize(SYS_BUS_DEVICE(dev), errp); + sysbus_mmio_map(SYS_BUS_DEVICE(dev), 0, max78000_icc_addr[i]); + } + + for (i = 0; i < MAX78000_NUM_UART; i++) { + g_autofree char *link = g_strdup_printf("uart%d", i); + dev = DEVICE(&(s->uart[i])); + qdev_prop_set_chr(dev, "chardev", serial_hd(i)); + if (!sysbus_realize(SYS_BUS_DEVICE(&s->uart[i]), errp)) { + return; + } + + object_property_set_link(OBJECT(gcrdev), link, OBJECT(dev), + &err); + + busdev = SYS_BUS_DEVICE(dev); + sysbus_mmio_map(busdev, 0, max78000_uart_addr[i]); + sysbus_connect_irq(busdev, 0, qdev_get_gpio_in(armv7m, + max78000_uart_irq[i])); + } + + dev = DEVICE(&s->trng); + sysbus_realize(SYS_BUS_DEVICE(dev), errp); + sysbus_mmio_map(SYS_BUS_DEVICE(dev), 0, 0x4004d000); + sysbus_connect_irq(SYS_BUS_DEVICE(dev), 0, qdev_get_gpio_in(armv7m, 4)); + + object_property_set_link(OBJECT(gcrdev), "trng", OBJECT(dev), &err); + + dev = DEVICE(&s->aes); + sysbus_realize(SYS_BUS_DEVICE(dev), errp); + sysbus_mmio_map(SYS_BUS_DEVICE(dev), 0, 0x40007400); + sysbus_connect_irq(SYS_BUS_DEVICE(dev), 0, qdev_get_gpio_in(armv7m, 5)); + + object_property_set_link(OBJECT(gcrdev), "aes", OBJECT(dev), &err); + + dev = DEVICE(&s->gcr); + sysbus_realize(SYS_BUS_DEVICE(dev), errp); + sysbus_mmio_map(SYS_BUS_DEVICE(dev), 0, 0x40000000); + + create_unimplemented_device("systemInterface", 0x40000400, 0x400); + create_unimplemented_device("functionControl", 0x40000800, 0x400); + create_unimplemented_device("watchdogTimer0", 0x40003000, 0x400); + create_unimplemented_device("dynamicVoltScale", 0x40003c00, 0x40); + create_unimplemented_device("SIMO", 0x40004400, 0x400); + create_unimplemented_device("trimSystemInit", 0x40005400, 0x400); + create_unimplemented_device("generalCtrlFunc", 0x40005800, 0x400); + create_unimplemented_device("wakeupTimer", 0x40006400, 0x400); + create_unimplemented_device("powerSequencer", 0x40006800, 0x400); + create_unimplemented_device("miscControl", 0x40006c00, 0x400); + + create_unimplemented_device("gpio0", 0x40008000, 0x1000); + create_unimplemented_device("gpio1", 0x40009000, 0x1000); + + create_unimplemented_device("parallelCamInterface", 0x4000e000, 0x1000); + create_unimplemented_device("CRC", 0x4000f000, 0x1000); + + create_unimplemented_device("timer0", 0x40010000, 0x1000); + create_unimplemented_device("timer1", 0x40011000, 0x1000); + create_unimplemented_device("timer2", 0x40012000, 0x1000); + create_unimplemented_device("timer3", 0x40013000, 0x1000); + + create_unimplemented_device("i2c0", 0x4001d000, 0x1000); + create_unimplemented_device("i2c1", 0x4001e000, 0x1000); + create_unimplemented_device("i2c2", 0x4001f000, 0x1000); + + create_unimplemented_device("standardDMA", 0x40028000, 0x1000); + create_unimplemented_device("flashController0", 0x40029000, 0x400); + + create_unimplemented_device("adc", 0x40034000, 0x1000); + create_unimplemented_device("pulseTrainEngine", 0x4003c000, 0xa0); + create_unimplemented_device("oneWireMaster", 0x4003d000, 0x1000); + create_unimplemented_device("semaphore", 0x4003e000, 0x1000); + + create_unimplemented_device("spi1", 0x40046000, 0x2000); + create_unimplemented_device("i2s", 0x40060000, 0x1000); + create_unimplemented_device("lowPowerControl", 0x40080000, 0x400); + create_unimplemented_device("gpio2", 0x40080400, 0x200); + create_unimplemented_device("lowPowerWatchdogTimer", 0x40080800, 0x400); + create_unimplemented_device("lowPowerTimer4", 0x40080c00, 0x400); + + create_unimplemented_device("lowPowerTimer5", 0x40081000, 0x400); + create_unimplemented_device("lowPowerUART0", 0x40081400, 0x400); + create_unimplemented_device("lowPowerComparator", 0x40088000, 0x400); + + create_unimplemented_device("spi0", 0x400be000, 0x400); + + /* + * The MAX78000 user guide's base address map lists the CNN TX FIFO as + * beginning at 0x400c0400 and ending at 0x400c0400. Given that CNN_FIFO + * is listed as having data accessible up to offset 0x1000, the user + * guide is likely incorrect. + */ + create_unimplemented_device("cnnTxFIFO", 0x400c0400, 0x2000); + + create_unimplemented_device("cnnGlobalControl", 0x50000000, 0x10000); + create_unimplemented_device("cnnx16quad0", 0x50100000, 0x40000); + create_unimplemented_device("cnnx16quad1", 0x50500000, 0x40000); + create_unimplemented_device("cnnx16quad2", 0x50900000, 0x40000); + create_unimplemented_device("cnnx16quad3", 0x50d00000, 0x40000); + +} + +static void max78000_soc_class_init(ObjectClass *klass, const void *data) +{ + DeviceClass *dc = DEVICE_CLASS(klass); + + dc->realize = max78000_soc_realize; +} + +static const TypeInfo max78000_soc_info = { + .name = TYPE_MAX78000_SOC, + .parent = TYPE_SYS_BUS_DEVICE, + .instance_size = sizeof(MAX78000State), + .instance_init = max78000_soc_initfn, + .class_init = max78000_soc_class_init, +}; + +static void max78000_soc_types(void) +{ + type_register_static(&max78000_soc_info); +} + +type_init(max78000_soc_types) diff --git a/hw/arm/max78000fthr.c b/hw/arm/max78000fthr.c new file mode 100644 index 0000000..c4f6b5b --- /dev/null +++ b/hw/arm/max78000fthr.c @@ -0,0 +1,50 @@ +/* + * MAX78000FTHR Evaluation Board + * + * Copyright (c) 2025 Jackson Donaldson <jcksn@duck.com> + * + * SPDX-License-Identifier: GPL-2.0-or-later + */ + +#include "qemu/osdep.h" +#include "qapi/error.h" +#include "hw/boards.h" +#include "hw/qdev-properties.h" +#include "hw/qdev-clock.h" +#include "qemu/error-report.h" +#include "hw/arm/max78000_soc.h" +#include "hw/arm/boot.h" + +/* 60MHz is the default, but other clocks can be selected. */ +#define SYSCLK_FRQ 60000000ULL +static void max78000_init(MachineState *machine) +{ + DeviceState *dev; + Clock *sysclk; + + sysclk = clock_new(OBJECT(machine), "SYSCLK"); + clock_set_hz(sysclk, SYSCLK_FRQ); + + dev = qdev_new(TYPE_MAX78000_SOC); + object_property_add_child(OBJECT(machine), "soc", OBJECT(dev)); + qdev_connect_clock_in(dev, "sysclk", sysclk); + sysbus_realize_and_unref(SYS_BUS_DEVICE(dev), &error_fatal); + + armv7m_load_kernel(ARM_CPU(first_cpu), + machine->kernel_filename, + 0x00000000, FLASH_SIZE); +} + +static void max78000_machine_init(MachineClass *mc) +{ + static const char * const valid_cpu_types[] = { + ARM_CPU_TYPE_NAME("cortex-m4"), + NULL + }; + + mc->desc = "MAX78000FTHR Board (Cortex-M4 / (Unimplemented) RISC-V)"; + mc->init = max78000_init; + mc->valid_cpu_types = valid_cpu_types; +} + +DEFINE_MACHINE("max78000fthr", max78000_machine_init) diff --git a/hw/arm/meson.build b/hw/arm/meson.build index d90be8f..dc683913 100644 --- a/hw/arm/meson.build +++ b/hw/arm/meson.build @@ -27,6 +27,7 @@ arm_common_ss.add(when: 'CONFIG_OMAP', if_true: files('omap1.c')) arm_common_ss.add(when: 'CONFIG_ALLWINNER_A10', if_true: files('allwinner-a10.c', 'cubieboard.c')) arm_common_ss.add(when: 'CONFIG_ALLWINNER_H3', if_true: files('allwinner-h3.c', 'orangepi.c')) arm_common_ss.add(when: 'CONFIG_ALLWINNER_R40', if_true: files('allwinner-r40.c', 'bananapi_m2u.c')) +arm_common_ss.add(when: 'CONFIG_MAX78000_SOC', if_true: files('max78000_soc.c')) arm_ss.add(when: 'CONFIG_RASPI', if_true: files('bcm2836.c', 'raspi.c')) arm_common_ss.add(when: ['CONFIG_RASPI', 'TARGET_AARCH64'], if_true: files('bcm2838.c', 'raspi4b.c')) arm_common_ss.add(when: 'CONFIG_STM32F100_SOC', if_true: files('stm32f100_soc.c')) @@ -71,6 +72,7 @@ arm_ss.add(when: 'CONFIG_XEN', if_true: files( arm_common_ss.add(when: 'CONFIG_ARM_SMMUV3', if_true: files('smmu-common.c')) arm_common_ss.add(when: 'CONFIG_COLLIE', if_true: files('collie.c')) arm_common_ss.add(when: 'CONFIG_EXYNOS4', if_true: files('exynos4_boards.c')) +arm_common_ss.add(when: 'CONFIG_MAX78000FTHR', if_true: files('max78000fthr.c')) arm_common_ss.add(when: 'CONFIG_NETDUINO2', if_true: files('netduino2.c')) arm_common_ss.add(when: 'CONFIG_RASPI', if_true: files('bcm2835_peripherals.c')) arm_common_ss.add(when: 'CONFIG_RASPI', if_true: files('bcm2838_peripherals.c')) diff --git a/hw/arm/npcm7xx.c b/hw/arm/npcm7xx.c index 2f30c49..ecfae32 100644 --- a/hw/arm/npcm7xx.c +++ b/hw/arm/npcm7xx.c @@ -24,7 +24,7 @@ #include "hw/qdev-clock.h" #include "hw/qdev-properties.h" #include "qapi/error.h" -#include "qemu/bswap.h" +#include "exec/tswap.h" #include "qemu/units.h" #include "system/system.h" #include "target/arm/cpu-qom.h" diff --git a/hw/arm/smmu-common.c b/hw/arm/smmu-common.c index f39b99e..0dcaf2f 100644 --- a/hw/arm/smmu-common.c +++ b/hw/arm/smmu-common.c @@ -319,7 +319,7 @@ void smmu_iotlb_inv_vmid(SMMUState *s, int vmid) g_hash_table_foreach_remove(s->iotlb, smmu_hash_remove_by_vmid, &vmid); } -inline void smmu_iotlb_inv_vmid_s1(SMMUState *s, int vmid) +void smmu_iotlb_inv_vmid_s1(SMMUState *s, int vmid) { trace_smmu_iotlb_inv_vmid_s1(vmid); g_hash_table_foreach_remove(s->iotlb, smmu_hash_remove_by_vmid_s1, &vmid); diff --git a/hw/arm/virt-acpi-build.c b/hw/arm/virt-acpi-build.c index cd90c47..b01fc4f 100644 --- a/hw/arm/virt-acpi-build.c +++ b/hw/arm/virt-acpi-build.c @@ -34,15 +34,18 @@ #include "hw/core/cpu.h" #include "hw/acpi/acpi-defs.h" #include "hw/acpi/acpi.h" +#include "hw/acpi/pcihp.h" #include "hw/nvram/fw_cfg_acpi.h" #include "hw/acpi/bios-linker-loader.h" #include "hw/acpi/aml-build.h" #include "hw/acpi/utils.h" #include "hw/acpi/pci.h" +#include "hw/acpi/cxl.h" #include "hw/acpi/memory_hotplug.h" #include "hw/acpi/generic_event_device.h" #include "hw/acpi/tpm.h" #include "hw/acpi/hmat.h" +#include "hw/cxl/cxl.h" #include "hw/pci/pcie_host.h" #include "hw/pci/pci.h" #include "hw/pci/pci_bus.h" @@ -119,16 +122,44 @@ static void acpi_dsdt_add_flash(Aml *scope, const MemMapEntry *flash_memmap) aml_append(scope, dev); } +static void build_acpi0017(Aml *table) +{ + Aml *dev, *scope, *method; + + scope = aml_scope("_SB"); + dev = aml_device("CXLM"); + aml_append(dev, aml_name_decl("_HID", aml_string("ACPI0017"))); + + method = aml_method("_STA", 0, AML_NOTSERIALIZED); + aml_append(method, aml_return(aml_int(0x0B))); + aml_append(dev, method); + build_cxl_dsm_method(dev); + + aml_append(scope, dev); + aml_append(table, scope); +} + static void acpi_dsdt_add_pci(Aml *scope, const MemMapEntry *memmap, uint32_t irq, VirtMachineState *vms) { int ecam_id = VIRT_ECAM_ID(vms->highmem_ecam); + bool cxl_present = false; + PCIBus *bus = vms->bus; + bool acpi_pcihp = false; + + if (vms->acpi_dev) { + acpi_pcihp = object_property_get_bool(OBJECT(vms->acpi_dev), + ACPI_PM_PROP_ACPI_PCIHP_BRIDGE, + NULL); + } + struct GPEXConfig cfg = { .mmio32 = memmap[VIRT_PCIE_MMIO], .pio = memmap[VIRT_PCIE_PIO], .ecam = memmap[ecam_id], .irq = irq, .bus = vms->bus, + .pci_native_hotplug = !acpi_pcihp, }; if (vms->highmem_mmio) { @@ -136,6 +167,14 @@ static void acpi_dsdt_add_pci(Aml *scope, const MemMapEntry *memmap, } acpi_dsdt_add_gpex(scope, &cfg); + QLIST_FOREACH(bus, &vms->bus->child, sibling) { + if (pci_bus_is_cxl(bus)) { + cxl_present = true; + } + } + if (cxl_present) { + build_acpi0017(scope); + } } static void acpi_dsdt_add_gpio(Aml *scope, const MemMapEntry *gpio_memmap, @@ -329,12 +368,6 @@ build_iort(GArray *table_data, BIOSLinker *linker, VirtMachineState *vms) /* Sort the smmu idmap by input_base */ g_array_sort(rc_smmu_idmaps, iort_idmap_compare); - /* - * Knowing the ID ranges from the RC to the SMMU, it's possible to - * determine the ID ranges from RC that are directed to the ITS. - */ - create_rc_its_idmaps(rc_its_idmaps, rc_smmu_idmaps); - nb_nodes = 2; /* RC and SMMUv3 */ rc_mapping_count = rc_smmu_idmaps->len; @@ -874,6 +907,7 @@ build_dsdt(GArray *table_data, BIOSLinker *linker, VirtMachineState *vms) const int *irqmap = vms->irqmap; AcpiTable table = { .sig = "DSDT", .rev = 2, .oem_id = vms->oem_id, .oem_table_id = vms->oem_table_id }; + Aml *pci0_scope; acpi_table_begin(&table, table_data); dsdt = init_aml_allocator(); @@ -927,6 +961,33 @@ build_dsdt(GArray *table_data, BIOSLinker *linker, VirtMachineState *vms) aml_append(dsdt, scope); + pci0_scope = aml_scope("\\_SB.PCI0"); + + aml_append(pci0_scope, build_pci_bridge_edsm()); + build_append_pci_bus_devices(pci0_scope, vms->bus); + if (object_property_find(OBJECT(vms->bus), ACPI_PCIHP_PROP_BSEL)) { + build_append_pcihp_slots(pci0_scope, vms->bus); + } + + if (vms->acpi_dev) { + bool acpi_pcihp; + + acpi_pcihp = object_property_get_bool(OBJECT(vms->acpi_dev), + ACPI_PM_PROP_ACPI_PCIHP_BRIDGE, + NULL); + + if (acpi_pcihp) { + build_acpi_pci_hotplug(dsdt, AML_SYSTEM_MEMORY, + memmap[VIRT_ACPI_PCIHP].base); + build_append_pcihp_resources(pci0_scope, + memmap[VIRT_ACPI_PCIHP].base, + memmap[VIRT_ACPI_PCIHP].size); + + build_append_notification_callback(pci0_scope, vms->bus); + } + } + aml_append(dsdt, pci0_scope); + /* copy AML table into ACPI tables blob */ g_array_append_vals(table_data, dsdt->buf->data, dsdt->buf->len); @@ -1000,7 +1061,10 @@ void virt_acpi_build(VirtMachineState *vms, AcpiBuildTables *tables) } acpi_add_table(table_offsets, tables_blob); - spcr_setup(tables_blob, tables->linker, vms); + + if (ms->acpi_spcr_enabled) { + spcr_setup(tables_blob, tables->linker, vms); + } acpi_add_table(table_offsets, tables_blob); build_dbg2(tables_blob, tables->linker, vms); @@ -1027,6 +1091,11 @@ void virt_acpi_build(VirtMachineState *vms, AcpiBuildTables *tables) } } + if (vms->cxl_devices_state.is_enabled) { + cxl_build_cedt(table_offsets, tables_blob, tables->linker, + vms->oem_id, vms->oem_table_id, &vms->cxl_devices_state); + } + if (ms->nvdimms_state->is_enabled) { nvdimm_build_acpi(table_offsets, tables_blob, tables->linker, ms->nvdimms_state, ms->ram_slots, vms->oem_id, diff --git a/hw/arm/virt.c b/hw/arm/virt.c index 3bcdf92..ef6be36 100644 --- a/hw/arm/virt.c +++ b/hw/arm/virt.c @@ -57,6 +57,7 @@ #include "qemu/error-report.h" #include "qemu/module.h" #include "hw/pci-host/gpex.h" +#include "hw/pci-bridge/pci_expander_bridge.h" #include "hw/virtio/virtio-pci.h" #include "hw/core/sysbus-fdt.h" #include "hw/platform-bus.h" @@ -75,6 +76,7 @@ #include "standard-headers/linux/input.h" #include "hw/arm/smmuv3.h" #include "hw/acpi/acpi.h" +#include "hw/acpi/pcihp.h" #include "target/arm/cpu-qom.h" #include "target/arm/internals.h" #include "target/arm/multiprocessing.h" @@ -86,6 +88,8 @@ #include "hw/virtio/virtio-md-pci.h" #include "hw/virtio/virtio-iommu.h" #include "hw/char/pl011.h" +#include "hw/cxl/cxl.h" +#include "hw/cxl/cxl_host.h" #include "qemu/guest-random.h" static GlobalProperty arm_virt_compat[] = { @@ -183,6 +187,7 @@ static const MemMapEntry base_memmap[] = { [VIRT_NVDIMM_ACPI] = { 0x09090000, NVDIMM_ACPI_IO_LEN}, [VIRT_PVTIME] = { 0x090a0000, 0x00010000 }, [VIRT_SECURE_GPIO] = { 0x090b0000, 0x00001000 }, + [VIRT_ACPI_PCIHP] = { 0x090c0000, ACPI_PCIHP_SIZE }, [VIRT_MMIO] = { 0x0a000000, 0x00000200 }, /* ...repeating for a total of NUM_VIRTIO_TRANSPORTS, each of that size */ [VIRT_PLATFORM_BUS] = { 0x0c000000, 0x02000000 }, @@ -220,9 +225,11 @@ static const MemMapEntry base_memmap[] = { static MemMapEntry extended_memmap[] = { /* Additional 64 MB redist region (can contain up to 512 redistributors) */ [VIRT_HIGH_GIC_REDIST2] = { 0x0, 64 * MiB }, + [VIRT_CXL_HOST] = { 0x0, 64 * KiB * 16 }, /* 16 UID */ [VIRT_HIGH_PCIE_ECAM] = { 0x0, 256 * MiB }, /* Second PCIe window */ [VIRT_HIGH_PCIE_MMIO] = { 0x0, DEFAULT_HIGH_PCIE_MMIO_SIZE }, + /* Any CXL Fixed memory windows come here */ }; static const int a15irqmap[] = { @@ -681,8 +688,10 @@ static inline DeviceState *create_acpi_ged(VirtMachineState *vms) { DeviceState *dev; MachineState *ms = MACHINE(vms); + SysBusDevice *sbdev; int irq = vms->irqmap[VIRT_ACPI_GED]; uint32_t event = ACPI_GED_PWR_DOWN_EVT; + bool acpi_pcihp; if (ms->ram_slots) { event |= ACPI_GED_MEM_HOTPLUG_EVT; @@ -694,11 +703,26 @@ static inline DeviceState *create_acpi_ged(VirtMachineState *vms) dev = qdev_new(TYPE_ACPI_GED); qdev_prop_set_uint32(dev, "ged-event", event); - sysbus_realize_and_unref(SYS_BUS_DEVICE(dev), &error_fatal); + object_property_set_link(OBJECT(dev), "bus", OBJECT(vms->bus), &error_abort); + sbdev = SYS_BUS_DEVICE(dev); + sysbus_realize_and_unref(sbdev, &error_fatal); + + sysbus_mmio_map_name(sbdev, TYPE_ACPI_GED, vms->memmap[VIRT_ACPI_GED].base); + sysbus_mmio_map_name(sbdev, ACPI_MEMHP_REGION_NAME, + vms->memmap[VIRT_PCDIMM_ACPI].base); + + acpi_pcihp = object_property_get_bool(OBJECT(dev), + ACPI_PM_PROP_ACPI_PCIHP_BRIDGE, NULL); + + if (acpi_pcihp) { + int pcihp_region_index; + + pcihp_region_index = sysbus_mmio_map_name(sbdev, ACPI_PCIHP_REGION_NAME, + vms->memmap[VIRT_ACPI_PCIHP].base); + assert(pcihp_region_index >= 0); + } - sysbus_mmio_map(SYS_BUS_DEVICE(dev), 0, vms->memmap[VIRT_ACPI_GED].base); - sysbus_mmio_map(SYS_BUS_DEVICE(dev), 1, vms->memmap[VIRT_PCDIMM_ACPI].base); - sysbus_connect_irq(SYS_BUS_DEVICE(dev), 0, qdev_get_gpio_in(vms->gic, irq)); + sysbus_connect_irq(sbdev, 0, qdev_get_gpio_in(vms->gic, irq)); return dev; } @@ -792,6 +816,13 @@ static void create_gic(VirtMachineState *vms, MemoryRegion *mem) default: g_assert_not_reached(); } + + if (kvm_enabled() && vms->virt && + (revision != 3 || !kvm_irqchip_in_kernel())) { + error_report("KVM EL2 is only supported with in-kernel GICv3"); + exit(1); + } + vms->gic = qdev_new(gictype); qdev_prop_set_uint32(vms->gic, "revision", revision); qdev_prop_set_uint32(vms->gic, "num-cpu", smp_cpus); @@ -828,6 +859,9 @@ static void create_gic(VirtMachineState *vms, MemoryRegion *mem) OBJECT(mem), &error_fatal); qdev_prop_set_bit(vms->gic, "has-lpi", true); } + } else if (vms->virt) { + qdev_prop_set_uint32(vms->gic, "maintenance-interrupt-id", + ARCH_GIC_MAINT_IRQ); } } else { if (!kvm_irqchip_in_kernel()) { @@ -1623,6 +1657,17 @@ static void create_pcie(VirtMachineState *vms) } } +static void create_cxl_host_reg_region(VirtMachineState *vms) +{ + MemoryRegion *sysmem = get_system_memory(); + MemoryRegion *mr = &vms->cxl_devices_state.host_mr; + + memory_region_init(mr, OBJECT(vms), "cxl_host_reg", + vms->memmap[VIRT_CXL_HOST].size); + memory_region_add_subregion(sysmem, vms->memmap[VIRT_CXL_HOST].base, mr); + vms->highmem_cxl = true; +} + static void create_platform_bus(VirtMachineState *vms) { DeviceState *dev; @@ -1739,6 +1784,12 @@ void virt_machine_done(Notifier *notifier, void *data) struct arm_boot_info *info = &vms->bootinfo; AddressSpace *as = arm_boot_address_space(cpu, info); + cxl_hook_up_pxb_registers(vms->bus, &vms->cxl_devices_state, + &error_fatal); + + if (vms->cxl_devices_state.is_enabled) { + cxl_fmws_link_targets(&error_fatal); + } /* * If the user provided a dtb, we assume the dynamic sysbus nodes * already are integrated there. This corresponds to a use case where @@ -1785,6 +1836,7 @@ static inline bool *virt_get_high_memmap_enabled(VirtMachineState *vms, { bool *enabled_array[] = { &vms->highmem_redists, + &vms->highmem_cxl, &vms->highmem_ecam, &vms->highmem_mmio, }; @@ -1892,6 +1944,9 @@ static void virt_set_memmap(VirtMachineState *vms, int pa_bits) if (device_memory_size > 0) { machine_memory_devices_init(ms, device_memory_base, device_memory_size); } + vms->highest_gpa = cxl_fmws_set_memmap(ROUND_UP(vms->highest_gpa + 1, + 256 * MiB), + BIT_ULL(pa_bits)) - 1; } static VirtGICType finalize_gic_version_do(const char *accel_name, @@ -2063,6 +2118,10 @@ static void virt_post_cpus_gic_realized(VirtMachineState *vms, memory_region_init_ram(pvtime, NULL, "pvtime", pvtime_size, NULL); memory_region_add_subregion(sysmem, pvtime_reg_base, pvtime); } + if (!aarch64 && vms->virt) { + error_report("KVM does not support EL2 on an AArch32 vCPU"); + exit(1); + } CPU_FOREACH(cpu) { if (pmu) { @@ -2208,7 +2267,13 @@ static void machvirt_init(MachineState *machine) exit(1); } - if (vms->virt && !tcg_enabled() && !qtest_enabled()) { + if (vms->virt && kvm_enabled() && !kvm_arm_el2_supported()) { + error_report("mach-virt: host kernel KVM does not support providing " + "Virtualization extensions to the guest CPU"); + exit(1); + } + + if (vms->virt && !kvm_enabled() && !tcg_enabled() && !qtest_enabled()) { error_report("mach-virt: %s does not support providing " "Virtualization extensions to the guest CPU", current_accel_name()); @@ -2343,6 +2408,8 @@ static void machvirt_init(MachineState *machine) memory_region_add_subregion(sysmem, vms->memmap[VIRT_MEM].base, machine->ram); + cxl_fmws_update_mmio(); + virt_flash_fdt(vms, sysmem, secure_sysmem ?: sysmem); create_gic(vms, sysmem); @@ -2398,6 +2465,7 @@ static void machvirt_init(MachineState *machine) create_rtc(vms); create_pcie(vms); + create_cxl_host_reg_region(vms); if (has_ged && aarch64 && firmware_loaded && virt_is_acpi_enabled(vms)) { vms->acpi_dev = create_acpi_ged(vms); @@ -3364,6 +3432,7 @@ static void virt_instance_init(Object *obj) vms->oem_id = g_strndup(ACPI_BUILD_APPNAME6, 6); vms->oem_table_id = g_strndup(ACPI_BUILD_APPNAME8, 8); + cxl_machine_init(obj, &vms->cxl_devices_state); } static const TypeInfo virt_machine_info = { diff --git a/hw/arm/xen-pvh.c b/hw/arm/xen-pvh.c index 4b26bcf..1a9eeb0 100644 --- a/hw/arm/xen-pvh.c +++ b/hw/arm/xen-pvh.c @@ -10,7 +10,6 @@ #include "hw/boards.h" #include "system/system.h" #include "hw/xen/xen-pvh-common.h" -#include "hw/xen/arch_hvm.h" #define TYPE_XEN_ARM MACHINE_TYPE_NAME("xenpvh") diff --git a/hw/block/hd-geometry.c b/hw/block/hd-geometry.c index f3939e7..db22190 100644 --- a/hw/block/hd-geometry.c +++ b/hw/block/hd-geometry.c @@ -33,7 +33,6 @@ #include "qemu/osdep.h" #include "system/block-backend.h" #include "qapi/qapi-types-block.h" -#include "qemu/bswap.h" #include "hw/block/block.h" #include "trace.h" diff --git a/hw/block/meson.build b/hw/block/meson.build index 6557044..43ed296 100644 --- a/hw/block/meson.build +++ b/hw/block/meson.build @@ -13,7 +13,9 @@ system_ss.add(when: 'CONFIG_SSI_M25P80', if_true: files('m25p80_sfdp.c')) system_ss.add(when: 'CONFIG_SWIM', if_true: files('swim.c')) system_ss.add(when: 'CONFIG_XEN_BUS', if_true: files('xen-block.c')) -specific_ss.add(when: 'CONFIG_VIRTIO_BLK', if_true: files('virtio-blk.c', 'virtio-blk-common.c')) -specific_ss.add(when: 'CONFIG_VHOST_USER_BLK', if_true: files('vhost-user-blk.c', 'virtio-blk-common.c')) +specific_ss.add(when: 'CONFIG_VIRTIO_BLK', if_true: files('virtio-blk.c')) +system_ss.add(when: 'CONFIG_VIRTIO_BLK', if_true: files('virtio-blk-common.c')) +specific_ss.add(when: 'CONFIG_VHOST_USER_BLK', if_true: files('vhost-user-blk.c')) +system_ss.add(when: 'CONFIG_VHOST_USER_BLK', if_true: files('virtio-blk-common.c')) subdir('dataplane') diff --git a/hw/block/vhost-user-blk.c b/hw/block/vhost-user-blk.c index 0eebbcd..c0cc5f6 100644 --- a/hw/block/vhost-user-blk.c +++ b/hw/block/vhost-user-blk.c @@ -210,6 +210,7 @@ static int vhost_user_blk_stop(VirtIODevice *vdev) BusState *qbus = BUS(qdev_get_parent_bus(DEVICE(vdev))); VirtioBusClass *k = VIRTIO_BUS_GET_CLASS(qbus); int ret; + bool force_stop = false; if (!s->started_vu) { return 0; @@ -220,7 +221,11 @@ static int vhost_user_blk_stop(VirtIODevice *vdev) return 0; } - ret = vhost_dev_stop(&s->dev, vdev, true); + force_stop = s->skip_get_vring_base_on_force_shutdown && + qemu_force_shutdown_requested(); + + ret = force_stop ? vhost_dev_force_stop(&s->dev, vdev, true) : + vhost_dev_stop(&s->dev, vdev, true); if (k->set_guest_notifiers(qbus->parent, s->dev.nvqs, false) < 0) { error_report("vhost guest notifier cleanup failed: %d", ret); @@ -584,6 +589,8 @@ static const Property vhost_user_blk_properties[] = { VIRTIO_BLK_F_DISCARD, true), DEFINE_PROP_BIT64("write-zeroes", VHostUserBlk, parent_obj.host_features, VIRTIO_BLK_F_WRITE_ZEROES, true), + DEFINE_PROP_BOOL("skip-get-vring-base-on-force-shutdown", VHostUserBlk, + skip_get_vring_base_on_force_shutdown, false), }; static void vhost_user_blk_class_init(ObjectClass *klass, const void *data) diff --git a/hw/char/Kconfig b/hw/char/Kconfig index 9d517f3..020c0a8 100644 --- a/hw/char/Kconfig +++ b/hw/char/Kconfig @@ -48,6 +48,9 @@ config VIRTIO_SERIAL default y depends on VIRTIO +config MAX78000_UART + bool + config STM32F2XX_USART bool diff --git a/hw/char/max78000_uart.c b/hw/char/max78000_uart.c new file mode 100644 index 0000000..19506d5 --- /dev/null +++ b/hw/char/max78000_uart.c @@ -0,0 +1,285 @@ +/* + * MAX78000 UART + * + * Copyright (c) 2025 Jackson Donaldson <jcksn@duck.com> + * + * SPDX-License-Identifier: GPL-2.0-or-later + */ + +#include "qemu/osdep.h" +#include "hw/char/max78000_uart.h" +#include "hw/irq.h" +#include "hw/qdev-properties.h" +#include "hw/qdev-properties-system.h" +#include "qemu/log.h" +#include "qemu/module.h" +#include "migration/vmstate.h" +#include "trace.h" + + +static int max78000_uart_can_receive(void *opaque) +{ + Max78000UartState *s = opaque; + if (!(s->ctrl & UART_BCLKEN)) { + return 0; + } + return fifo8_num_free(&s->rx_fifo); +} + +static void max78000_update_irq(Max78000UartState *s) +{ + int interrupt_level; + + interrupt_level = s->int_fl & s->int_en; + qemu_set_irq(s->irq, interrupt_level); +} + +static void max78000_uart_receive(void *opaque, const uint8_t *buf, int size) +{ + Max78000UartState *s = opaque; + + assert(size <= fifo8_num_free(&s->rx_fifo)); + + fifo8_push_all(&s->rx_fifo, buf, size); + + uint32_t rx_threshold = s->ctrl & 0xf; + + if (fifo8_num_used(&s->rx_fifo) >= rx_threshold) { + s->int_fl |= UART_RX_THD; + } + + max78000_update_irq(s); +} + +static void max78000_uart_reset_hold(Object *obj, ResetType type) +{ + Max78000UartState *s = MAX78000_UART(obj); + + s->ctrl = 0; + s->status = UART_TX_EM | UART_RX_EM; + s->int_en = 0; + s->int_fl = 0; + s->osr = 0; + s->txpeek = 0; + s->pnr = UART_RTS; + s->fifo = 0; + s->dma = 0; + s->wken = 0; + s->wkfl = 0; + fifo8_reset(&s->rx_fifo); +} + +static uint64_t max78000_uart_read(void *opaque, hwaddr addr, + unsigned int size) +{ + Max78000UartState *s = opaque; + uint64_t retvalue = 0; + switch (addr) { + case UART_CTRL: + retvalue = s->ctrl; + break; + case UART_STATUS: + retvalue = (fifo8_num_used(&s->rx_fifo) << UART_RX_LVL) | + UART_TX_EM | + (fifo8_is_empty(&s->rx_fifo) ? UART_RX_EM : 0); + break; + case UART_INT_EN: + retvalue = s->int_en; + break; + case UART_INT_FL: + retvalue = s->int_fl; + break; + case UART_CLKDIV: + retvalue = s->clkdiv; + break; + case UART_OSR: + retvalue = s->osr; + break; + case UART_TXPEEK: + if (!fifo8_is_empty(&s->rx_fifo)) { + retvalue = fifo8_peek(&s->rx_fifo); + } + break; + case UART_PNR: + retvalue = s->pnr; + break; + case UART_FIFO: + if (!fifo8_is_empty(&s->rx_fifo)) { + retvalue = fifo8_pop(&s->rx_fifo); + max78000_update_irq(s); + } + break; + case UART_DMA: + /* DMA not implemented */ + retvalue = s->dma; + break; + case UART_WKEN: + retvalue = s->wken; + break; + case UART_WKFL: + retvalue = s->wkfl; + break; + default: + qemu_log_mask(LOG_GUEST_ERROR, + "%s: Bad offset 0x%"HWADDR_PRIx"\n", __func__, addr); + break; + } + + return retvalue; +} + +static void max78000_uart_write(void *opaque, hwaddr addr, + uint64_t val64, unsigned int size) +{ + Max78000UartState *s = opaque; + + uint32_t value = val64; + uint8_t data; + + switch (addr) { + case UART_CTRL: + if (value & UART_FLUSH_RX) { + fifo8_reset(&s->rx_fifo); + } + if (value & UART_BCLKEN) { + value = value | UART_BCLKRDY; + } + s->ctrl = value & ~(UART_FLUSH_RX | UART_FLUSH_TX); + + /* + * Software can manage UART flow control manually by setting hfc_en + * in UART_CTRL. This would require emulating uart at a lower level, + * and is currently unimplemented. + */ + + return; + case UART_STATUS: + /* UART_STATUS is read only */ + return; + case UART_INT_EN: + s->int_en = value; + return; + case UART_INT_FL: + s->int_fl = s->int_fl & ~(value); + max78000_update_irq(s); + return; + case UART_CLKDIV: + s->clkdiv = value; + return; + case UART_OSR: + s->osr = value; + return; + case UART_PNR: + s->pnr = value; + return; + case UART_FIFO: + data = value & 0xff; + /* + * XXX this blocks entire thread. Rewrite to use + * qemu_chr_fe_write and background I/O callbacks + */ + qemu_chr_fe_write_all(&s->chr, &data, 1); + + /* TX is always empty */ + s->int_fl |= UART_TX_HE; + max78000_update_irq(s); + + return; + case UART_DMA: + /* DMA not implemented */ + s->dma = value; + return; + case UART_WKEN: + s->wken = value; + return; + case UART_WKFL: + s->wkfl = value; + return; + default: + qemu_log_mask(LOG_GUEST_ERROR, "%s: Bad offset 0x%" + HWADDR_PRIx "\n", __func__, addr); + } +} + +static const MemoryRegionOps max78000_uart_ops = { + .read = max78000_uart_read, + .write = max78000_uart_write, + .endianness = DEVICE_LITTLE_ENDIAN, + .valid.min_access_size = 4, + .valid.max_access_size = 4, +}; + +static const Property max78000_uart_properties[] = { + DEFINE_PROP_CHR("chardev", Max78000UartState, chr), +}; + +static const VMStateDescription max78000_uart_vmstate = { + .name = TYPE_MAX78000_UART, + .version_id = 1, + .minimum_version_id = 1, + .fields = (VMStateField[]) { + VMSTATE_UINT32(ctrl, Max78000UartState), + VMSTATE_UINT32(status, Max78000UartState), + VMSTATE_UINT32(int_en, Max78000UartState), + VMSTATE_UINT32(int_fl, Max78000UartState), + VMSTATE_UINT32(clkdiv, Max78000UartState), + VMSTATE_UINT32(osr, Max78000UartState), + VMSTATE_UINT32(txpeek, Max78000UartState), + VMSTATE_UINT32(pnr, Max78000UartState), + VMSTATE_UINT32(fifo, Max78000UartState), + VMSTATE_UINT32(dma, Max78000UartState), + VMSTATE_UINT32(wken, Max78000UartState), + VMSTATE_UINT32(wkfl, Max78000UartState), + VMSTATE_FIFO8(rx_fifo, Max78000UartState), + VMSTATE_END_OF_LIST() + } +}; + +static void max78000_uart_init(Object *obj) +{ + Max78000UartState *s = MAX78000_UART(obj); + fifo8_create(&s->rx_fifo, 8); + + sysbus_init_irq(SYS_BUS_DEVICE(obj), &s->irq); + + memory_region_init_io(&s->mmio, obj, &max78000_uart_ops, s, + TYPE_MAX78000_UART, 0x400); + sysbus_init_mmio(SYS_BUS_DEVICE(obj), &s->mmio); +} + +static void max78000_uart_realize(DeviceState *dev, Error **errp) +{ + Max78000UartState *s = MAX78000_UART(dev); + + qemu_chr_fe_set_handlers(&s->chr, max78000_uart_can_receive, + max78000_uart_receive, NULL, NULL, + s, NULL, true); +} + +static void max78000_uart_class_init(ObjectClass *klass, const void *data) +{ + DeviceClass *dc = DEVICE_CLASS(klass); + ResettableClass *rc = RESETTABLE_CLASS(klass); + + rc->phases.hold = max78000_uart_reset_hold; + + device_class_set_props(dc, max78000_uart_properties); + dc->realize = max78000_uart_realize; + + dc->vmsd = &max78000_uart_vmstate; +} + +static const TypeInfo max78000_uart_info = { + .name = TYPE_MAX78000_UART, + .parent = TYPE_SYS_BUS_DEVICE, + .instance_size = sizeof(Max78000UartState), + .instance_init = max78000_uart_init, + .class_init = max78000_uart_class_init, +}; + +static void max78000_uart_register_types(void) +{ + type_register_static(&max78000_uart_info); +} + +type_init(max78000_uart_register_types) diff --git a/hw/char/meson.build b/hw/char/meson.build index 4e439da..a9e1dc2 100644 --- a/hw/char/meson.build +++ b/hw/char/meson.build @@ -26,6 +26,7 @@ system_ss.add(when: 'CONFIG_AVR_USART', if_true: files('avr_usart.c')) system_ss.add(when: 'CONFIG_COLDFIRE', if_true: files('mcf_uart.c')) system_ss.add(when: 'CONFIG_DIGIC', if_true: files('digic-uart.c')) system_ss.add(when: 'CONFIG_EXYNOS4', if_true: files('exynos4210_uart.c')) +system_ss.add(when: 'CONFIG_MAX78000_UART', if_true: files('max78000_uart.c')) system_ss.add(when: 'CONFIG_OMAP', if_true: files('omap_uart.c')) system_ss.add(when: 'CONFIG_RASPI', if_true: files('bcm2835_aux.c')) system_ss.add(when: 'CONFIG_RENESAS_SCI', if_true: files('renesas_sci.c')) diff --git a/hw/char/riscv_htif.c b/hw/char/riscv_htif.c index c884be5..a78ea9b 100644 --- a/hw/char/riscv_htif.c +++ b/hw/char/riscv_htif.c @@ -29,7 +29,6 @@ #include "qemu/timer.h" #include "qemu/error-report.h" #include "system/address-spaces.h" -#include "exec/tswap.h" #include "system/dma.h" #include "system/runstate.h" #include "trace.h" diff --git a/hw/char/sifive_uart.c b/hw/char/sifive_uart.c index 0fc89e7..9bc697a 100644 --- a/hw/char/sifive_uart.c +++ b/hw/char/sifive_uart.c @@ -128,8 +128,10 @@ static void sifive_uart_write_tx_fifo(SiFiveUARTState *s, const uint8_t *buf, s->txfifo |= SIFIVE_UART_TXFIFO_FULL; } - timer_mod(s->fifo_trigger_handle, current_time + - TX_INTERRUPT_TRIGGER_DELAY_NS); + if (!timer_pending(s->fifo_trigger_handle)) { + timer_mod(s->fifo_trigger_handle, current_time + + TX_INTERRUPT_TRIGGER_DELAY_NS); + } } static uint64_t diff --git a/hw/core/cpu-system.c b/hw/core/cpu-system.c index 3c84176..a975405 100644 --- a/hw/core/cpu-system.c +++ b/hw/core/cpu-system.c @@ -24,7 +24,7 @@ #include "exec/cputlb.h" #include "system/memory.h" #include "exec/tb-flush.h" -#include "exec/tswap.h" +#include "qemu/target-info.h" #include "hw/qdev-core.h" #include "hw/qdev-properties.h" #include "hw/core/sysemu-cpu-ops.h" diff --git a/hw/core/machine-hmp-cmds.c b/hw/core/machine-hmp-cmds.c index c6325cd..3a612e2 100644 --- a/hw/core/machine-hmp-cmds.c +++ b/hw/core/machine-hmp-cmds.c @@ -18,6 +18,7 @@ #include "monitor/monitor.h" #include "qapi/error.h" #include "qapi/qapi-builtin-visit.h" +#include "qapi/qapi-commands-accelerator.h" #include "qapi/qapi-commands-machine.h" #include "qobject/qdict.h" #include "qapi/string-output-visitor.h" @@ -32,6 +33,7 @@ void hmp_info_cpus(Monitor *mon, const QDict *qdict) cpu_list = qmp_query_cpus_fast(NULL); for (cpu = cpu_list; cpu; cpu = cpu->next) { + g_autofree char *cpu_model = cpu_model_from_type(cpu->value->qom_type); int active = ' '; if (cpu->value->cpu_index == monitor_get_cpu_index(mon)) { @@ -40,7 +42,8 @@ void hmp_info_cpus(Monitor *mon, const QDict *qdict) monitor_printf(mon, "%c CPU #%" PRId64 ":", active, cpu->value->cpu_index); - monitor_printf(mon, " thread_id=%" PRId64 "\n", cpu->value->thread_id); + monitor_printf(mon, " thread_id=%" PRId64 " model=%s\n", + cpu->value->thread_id, cpu_model); } qapi_free_CpuInfoFastList(cpu_list); diff --git a/hw/core/machine-qmp-cmds.c b/hw/core/machine-qmp-cmds.c index d82043e..6aca1a6 100644 --- a/hw/core/machine-qmp-cmds.c +++ b/hw/core/machine-qmp-cmds.c @@ -14,12 +14,13 @@ #include "hw/mem/memory-device.h" #include "qapi/error.h" #include "qapi/qapi-builtin-visit.h" +#include "qapi/qapi-commands-accelerator.h" #include "qapi/qapi-commands-machine.h" #include "qobject/qobject.h" #include "qapi/qobject-input-visitor.h" #include "qapi/type-helpers.h" #include "qemu/uuid.h" -#include "qemu/target-info.h" +#include "qemu/target-info-qapi.h" #include "qom/qom-qobject.h" #include "system/hostmem.h" #include "system/hw_accel.h" @@ -37,8 +38,7 @@ CpuInfoFastList *qmp_query_cpus_fast(Error **errp) MachineState *ms = MACHINE(qdev_get_machine()); MachineClass *mc = MACHINE_GET_CLASS(ms); CpuInfoFastList *head = NULL, **tail = &head; - SysEmuTarget target = qapi_enum_parse(&SysEmuTarget_lookup, target_name(), - -1, &error_abort); + SysEmuTarget target = target_arch(); CPUState *cpu; CPU_FOREACH(cpu) { @@ -47,6 +47,7 @@ CpuInfoFastList *qmp_query_cpus_fast(Error **errp) value->cpu_index = cpu->cpu_index; value->qom_path = object_get_canonical_path(OBJECT(cpu)); value->thread_id = cpu->thread_id; + value->qom_type = g_strdup(object_get_typename(OBJECT(cpu))); if (mc->cpu_index_to_instance_props) { CpuInstanceProperties *props; @@ -139,8 +140,7 @@ QemuTargetInfo *qmp_query_target(Error **errp) { QemuTargetInfo *info = g_malloc0(sizeof(*info)); - info->arch = qapi_enum_parse(&SysEmuTarget_lookup, target_name(), -1, - &error_abort); + info->arch = target_arch(); return info; } diff --git a/hw/core/machine.c b/hw/core/machine.c index e869821..bd47527 100644 --- a/hw/core/machine.c +++ b/hw/core/machine.c @@ -39,6 +39,9 @@ GlobalProperty hw_compat_10_0[] = { { "scsi-hd", "dpofua", "off" }, + { "vfio-pci", "x-migration-load-config-after-iter", "off" }, + { "ramfb", "use-legacy-x86-rom", "true"}, + { "vfio-pci-nohotplug", "use-legacy-x86-rom", "true" }, }; const size_t hw_compat_10_0_len = G_N_ELEMENTS(hw_compat_10_0); @@ -577,6 +580,20 @@ static void machine_set_nvdimm(Object *obj, bool value, Error **errp) ms->nvdimms_state->is_enabled = value; } +static bool machine_get_spcr(Object *obj, Error **errp) +{ + MachineState *ms = MACHINE(obj); + + return ms->acpi_spcr_enabled; +} + +static void machine_set_spcr(Object *obj, bool value, Error **errp) +{ + MachineState *ms = MACHINE(obj); + + ms->acpi_spcr_enabled = value; +} + static bool machine_get_hmat(Object *obj, Error **errp) { MachineState *ms = MACHINE(obj); @@ -1281,6 +1298,14 @@ static void machine_initfn(Object *obj) "Table (HMAT)"); } + /* SPCR */ + ms->acpi_spcr_enabled = true; + object_property_add_bool(obj, "spcr", machine_get_spcr, machine_set_spcr); + object_property_set_description(obj, "spcr", + "Set on/off to enable/disable " + "ACPI Serial Port Console Redirection " + "Table (spcr)"); + /* default to mc->default_cpus */ ms->smp.cpus = mc->default_cpus; ms->smp.max_cpus = mc->default_cpus; diff --git a/hw/core/qdev-properties-system.c b/hw/core/qdev-properties-system.c index 24e145d..1f810b7 100644 --- a/hw/core/qdev-properties-system.c +++ b/hw/core/qdev-properties-system.c @@ -1299,3 +1299,47 @@ const PropertyInfo qdev_prop_vmapple_virtio_blk_variant = { .set = qdev_propinfo_set_enum, .set_default_value = qdev_propinfo_set_default_value_enum, }; + +/* --- VirtIOGPUOutputList --- */ + +static void get_virtio_gpu_output_list(Object *obj, Visitor *v, + const char *name, void *opaque, Error **errp) +{ + VirtIOGPUOutputList **prop_ptr = + object_field_prop_ptr(obj, opaque); + + visit_type_VirtIOGPUOutputList(v, name, prop_ptr, errp); +} + +static void set_virtio_gpu_output_list(Object *obj, Visitor *v, + const char *name, void *opaque, Error **errp) +{ + VirtIOGPUOutputList **prop_ptr = + object_field_prop_ptr(obj, opaque); + VirtIOGPUOutputList *list; + + if (!visit_type_VirtIOGPUOutputList(v, name, &list, errp)) { + return; + } + + qapi_free_VirtIOGPUOutputList(*prop_ptr); + *prop_ptr = list; +} + +static void release_virtio_gpu_output_list(Object *obj, + const char *name, void *opaque) +{ + VirtIOGPUOutputList **prop_ptr = + object_field_prop_ptr(obj, opaque); + + qapi_free_VirtIOGPUOutputList(*prop_ptr); + *prop_ptr = NULL; +} + +const PropertyInfo qdev_prop_virtio_gpu_output_list = { + .type = "VirtIOGPUOutputList", + .description = "VirtIO GPU output list [{\"name\":\"<name>\"},...]", + .get = get_virtio_gpu_output_list, + .set = set_virtio_gpu_output_list, + .release = release_virtio_gpu_output_list, +}; diff --git a/hw/core/qdev-properties.c b/hw/core/qdev-properties.c index 147b3ff..b7e8a89 100644 --- a/hw/core/qdev-properties.c +++ b/hw/core/qdev-properties.c @@ -2,6 +2,7 @@ #include "hw/qdev-properties.h" #include "qapi/error.h" #include "qapi/qapi-types-misc.h" +#include "qapi/qapi-visit-common.h" #include "qobject/qlist.h" #include "qemu/ctype.h" #include "qemu/error-report.h" @@ -180,7 +181,8 @@ const PropertyInfo qdev_prop_bit = { static uint64_t qdev_get_prop_mask64(const Property *prop) { - assert(prop->info == &qdev_prop_bit64); + assert(prop->info == &qdev_prop_bit64 || + prop->info == &qdev_prop_on_off_auto_bit64); return 0x1ull << prop->bitnr; } @@ -225,6 +227,69 @@ const PropertyInfo qdev_prop_bit64 = { .set_default_value = set_default_value_bool, }; +static void prop_get_on_off_auto_bit64(Object *obj, Visitor *v, + const char *name, void *opaque, + Error **errp) +{ + Property *prop = opaque; + OnOffAutoBit64 *p = object_field_prop_ptr(obj, prop); + OnOffAuto value; + uint64_t mask = qdev_get_prop_mask64(prop); + + if (p->auto_bits & mask) { + value = ON_OFF_AUTO_AUTO; + } else if (p->on_bits & mask) { + value = ON_OFF_AUTO_ON; + } else { + value = ON_OFF_AUTO_OFF; + } + + visit_type_OnOffAuto(v, name, &value, errp); +} + +static void prop_set_on_off_auto_bit64(Object *obj, Visitor *v, + const char *name, void *opaque, + Error **errp) +{ + Property *prop = opaque; + OnOffAutoBit64 *p = object_field_prop_ptr(obj, prop); + OnOffAuto value; + uint64_t mask = qdev_get_prop_mask64(prop); + + if (!visit_type_OnOffAuto(v, name, &value, errp)) { + return; + } + + switch (value) { + case ON_OFF_AUTO_AUTO: + p->on_bits &= ~mask; + p->auto_bits |= mask; + break; + + case ON_OFF_AUTO_ON: + p->on_bits |= mask; + p->auto_bits &= ~mask; + break; + + case ON_OFF_AUTO_OFF: + p->on_bits &= ~mask; + p->auto_bits &= ~mask; + break; + + case ON_OFF_AUTO__MAX: + g_assert_not_reached(); + } +} + +const PropertyInfo qdev_prop_on_off_auto_bit64 = { + .type = "OnOffAuto", + .description = "on/off/auto", + .enum_table = &OnOffAuto_lookup, + .get = prop_get_on_off_auto_bit64, + .set = prop_set_on_off_auto_bit64, + .set_default_value = qdev_propinfo_set_default_value_enum, +}; + /* --- bool --- */ static void get_bool(Object *obj, Visitor *v, const char *name, void *opaque, diff --git a/hw/core/sysbus.c b/hw/core/sysbus.c index e71367a..ec69e87 100644 --- a/hw/core/sysbus.c +++ b/hw/core/sysbus.c @@ -151,6 +151,17 @@ void sysbus_mmio_map(SysBusDevice *dev, int n, hwaddr addr) sysbus_mmio_map_common(dev, n, addr, false, 0); } +int sysbus_mmio_map_name(SysBusDevice *dev, const char *name, hwaddr addr) +{ + for (int i = 0; i < dev->num_mmio; i++) { + if (!strcmp(dev->mmio[i].memory->name, name)) { + sysbus_mmio_map(dev, i, addr); + return i; + } + } + return -1; +} + void sysbus_mmio_map_overlap(SysBusDevice *dev, int n, hwaddr addr, int priority) { diff --git a/hw/cxl/cxl-events.c b/hw/cxl/cxl-events.c index 12dee2e..7583dd9 100644 --- a/hw/cxl/cxl-events.c +++ b/hw/cxl/cxl-events.c @@ -8,8 +8,6 @@ */ #include "qemu/osdep.h" - -#include "qemu/bswap.h" #include "qemu/error-report.h" #include "hw/pci/msi.h" #include "hw/pci/msix.h" @@ -260,3 +258,41 @@ void cxl_event_irq_assert(CXLType3Dev *ct3d) } } } + +void cxl_create_dc_event_records_for_extents(CXLType3Dev *ct3d, + CXLDCEventType type, + CXLDCExtentRaw extents[], + uint32_t ext_count) +{ + CXLEventDynamicCapacity event_rec = {}; + int i; + + cxl_assign_event_header(&event_rec.hdr, + &dynamic_capacity_uuid, + (1 << CXL_EVENT_TYPE_INFO), + sizeof(event_rec), + cxl_device_get_timestamp(&ct3d->cxl_dstate)); + event_rec.type = type; + event_rec.validity_flags = 1; + event_rec.host_id = 0; + event_rec.updated_region_id = 0; + event_rec.extents_avail = CXL_NUM_EXTENTS_SUPPORTED - + ct3d->dc.total_extent_count; + + for (i = 0; i < ext_count; i++) { + memcpy(&event_rec.dynamic_capacity_extent, + &extents[i], + sizeof(CXLDCExtentRaw)); + event_rec.flags = 0; + if (i < ext_count - 1) { + /* Set "More" flag */ + event_rec.flags |= BIT(0); + } + + if (cxl_event_insert(&ct3d->cxl_dstate, + CXL_EVENT_TYPE_DYNAMIC_CAP, + (CXLEventRecordRaw *)&event_rec)) { + cxl_event_irq_assert(ct3d); + } + } +} diff --git a/hw/cxl/cxl-host-stubs.c b/hw/cxl/cxl-host-stubs.c index cae4afc..c015baa 100644 --- a/hw/cxl/cxl-host-stubs.c +++ b/hw/cxl/cxl-host-stubs.c @@ -8,8 +8,13 @@ #include "hw/cxl/cxl.h" #include "hw/cxl/cxl_host.h" -void cxl_fmws_link_targets(CXLState *stat, Error **errp) {}; +void cxl_fmws_link_targets(Error **errp) {}; void cxl_machine_init(Object *obj, CXLState *state) {}; void cxl_hook_up_pxb_registers(PCIBus *bus, CXLState *state, Error **errp) {}; +hwaddr cxl_fmws_set_memmap(hwaddr base, hwaddr max_addr) +{ + return base; +}; +void cxl_fmws_update_mmio(void) {}; const MemoryRegionOps cfmws_ops; diff --git a/hw/cxl/cxl-host.c b/hw/cxl/cxl-host.c index e010163..5c2ce25 100644 --- a/hw/cxl/cxl-host.c +++ b/hw/cxl/cxl-host.c @@ -22,15 +22,17 @@ #include "hw/pci/pcie_port.h" #include "hw/pci-bridge/pci_expander_bridge.h" -static void cxl_fixed_memory_window_config(CXLState *cxl_state, - CXLFixedMemoryWindowOptions *object, - Error **errp) +static void cxl_fixed_memory_window_config(CXLFixedMemoryWindowOptions *object, + int index, Error **errp) { ERRP_GUARD(); - g_autofree CXLFixedWindow *fw = g_malloc0(sizeof(*fw)); + DeviceState *dev = qdev_new(TYPE_CXL_FMW); + CXLFixedWindow *fw = CXL_FMW(dev); strList *target; int i; + fw->index = index; + for (target = object->targets; target; target = target->next) { fw->num_targets++; } @@ -65,35 +67,39 @@ static void cxl_fixed_memory_window_config(CXLState *cxl_state, fw->targets[i] = g_strdup(target->value); } - cxl_state->fixed_windows = g_list_append(cxl_state->fixed_windows, - g_steal_pointer(&fw)); + sysbus_realize_and_unref(SYS_BUS_DEVICE(dev), errp); } -void cxl_fmws_link_targets(CXLState *cxl_state, Error **errp) +static int cxl_fmws_link(Object *obj, void *opaque) { - if (cxl_state && cxl_state->fixed_windows) { - GList *it; - - for (it = cxl_state->fixed_windows; it; it = it->next) { - CXLFixedWindow *fw = it->data; - int i; - - for (i = 0; i < fw->num_targets; i++) { - Object *o; - bool ambig; - - o = object_resolve_path_type(fw->targets[i], - TYPE_PXB_CXL_DEV, - &ambig); - if (!o) { - error_setg(errp, "Could not resolve CXLFM target %s", - fw->targets[i]); - return; - } - fw->target_hbs[i] = PXB_CXL_DEV(o); - } + struct CXLFixedWindow *fw; + int i; + + if (!object_dynamic_cast(obj, TYPE_CXL_FMW)) { + return 0; + } + fw = CXL_FMW(obj); + + for (i = 0; i < fw->num_targets; i++) { + Object *o; + bool ambig; + + o = object_resolve_path_type(fw->targets[i], TYPE_PXB_CXL_DEV, + &ambig); + if (!o) { + error_setg(&error_fatal, "Could not resolve CXLFM target %s", + fw->targets[i]); + return 1; } + fw->target_hbs[i] = PXB_CXL_DEV(o); } + return 0; +} + +void cxl_fmws_link_targets(Error **errp) +{ + /* Order doesn't matter for this, so no need to build list */ + object_child_foreach_recursive(object_get_root(), cxl_fmws_link, NULL); } static bool cxl_hdm_find_target(uint32_t *cache_mem, hwaddr addr, @@ -325,14 +331,15 @@ static void machine_set_cfmw(Object *obj, Visitor *v, const char *name, CXLState *state = opaque; CXLFixedMemoryWindowOptionsList *cfmw_list = NULL; CXLFixedMemoryWindowOptionsList *it; + int index; visit_type_CXLFixedMemoryWindowOptionsList(v, name, &cfmw_list, errp); if (!cfmw_list) { return; } - for (it = cfmw_list; it; it = it->next) { - cxl_fixed_memory_window_config(state, it->value, errp); + for (it = cfmw_list, index = 0; it; it = it->next, index++) { + cxl_fixed_memory_window_config(it->value, index, errp); } state->cfmw_list = cfmw_list; } @@ -370,3 +377,110 @@ void cxl_hook_up_pxb_registers(PCIBus *bus, CXLState *state, Error **errp) } } } + +static int cxl_fmws_find(Object *obj, void *opaque) +{ + GSList **list = opaque; + + if (!object_dynamic_cast(obj, TYPE_CXL_FMW)) { + return 0; + } + *list = g_slist_prepend(*list, obj); + + return 0; +} + +static GSList *cxl_fmws_get_all(void) +{ + GSList *list = NULL; + + object_child_foreach_recursive(object_get_root(), cxl_fmws_find, &list); + + return list; +} + +static gint cfmws_cmp(gconstpointer a, gconstpointer b, gpointer d) +{ + const struct CXLFixedWindow *ap = a; + const struct CXLFixedWindow *bp = b; + + return ap->index > bp->index; +} + +GSList *cxl_fmws_get_all_sorted(void) +{ + return g_slist_sort_with_data(cxl_fmws_get_all(), cfmws_cmp, NULL); +} + +static int cxl_fmws_mmio_map(Object *obj, void *opaque) +{ + struct CXLFixedWindow *fw; + + if (!object_dynamic_cast(obj, TYPE_CXL_FMW)) { + return 0; + } + fw = CXL_FMW(obj); + sysbus_mmio_map(SYS_BUS_DEVICE(fw), 0, fw->base); + + return 0; +} + +void cxl_fmws_update_mmio(void) +{ + /* Ordering is not required for this */ + object_child_foreach_recursive(object_get_root(), cxl_fmws_mmio_map, NULL); +} + +hwaddr cxl_fmws_set_memmap(hwaddr base, hwaddr max_addr) +{ + GSList *cfmws_list, *iter; + CXLFixedWindow *fw; + + cfmws_list = cxl_fmws_get_all_sorted(); + for (iter = cfmws_list; iter; iter = iter->next) { + fw = CXL_FMW(iter->data); + if (base + fw->size <= max_addr) { + fw->base = base; + base += fw->size; + } + } + g_slist_free(cfmws_list); + + return base; +} + +static void cxl_fmw_realize(DeviceState *dev, Error **errp) +{ + CXLFixedWindow *fw = CXL_FMW(dev); + + memory_region_init_io(&fw->mr, OBJECT(dev), &cfmws_ops, fw, + "cxl-fixed-memory-region", fw->size); + sysbus_init_mmio(SYS_BUS_DEVICE(dev), &fw->mr); +} + +/* + * Note: Fixed memory windows represent fixed address decoders on the host and + * as such have no dynamic state to reset or migrate + */ +static void cxl_fmw_class_init(ObjectClass *klass, const void *data) +{ + DeviceClass *dc = DEVICE_CLASS(klass); + + dc->desc = "CXL Fixed Memory Window"; + dc->realize = cxl_fmw_realize; + /* Reason - created by machines as tightly coupled to machine memory map */ + dc->user_creatable = false; +} + +static const TypeInfo cxl_fmw_info = { + .name = TYPE_CXL_FMW, + .parent = TYPE_SYS_BUS_DEVICE, + .instance_size = sizeof(CXLFixedWindow), + .class_init = cxl_fmw_class_init, +}; + +static void cxl_host_register_types(void) +{ + type_register_static(&cxl_fmw_info); +} +type_init(cxl_host_register_types) diff --git a/hw/cxl/cxl-mailbox-utils.c b/hw/cxl/cxl-mailbox-utils.c index 299f232..68c7cc9 100644 --- a/hw/cxl/cxl-mailbox-utils.c +++ b/hw/cxl/cxl-mailbox-utils.c @@ -18,15 +18,16 @@ #include "hw/pci/pci.h" #include "hw/pci-bridge/cxl_upstream_port.h" #include "qemu/cutils.h" +#include "qemu/host-utils.h" #include "qemu/log.h" #include "qemu/units.h" #include "qemu/uuid.h" #include "system/hostmem.h" #include "qemu/range.h" +#include "qapi/qapi-types-cxl.h" #define CXL_CAPACITY_MULTIPLIER (256 * MiB) #define CXL_DC_EVENT_LOG_SIZE 8 -#define CXL_NUM_EXTENTS_SUPPORTED 512 #define CXL_NUM_TAGS_SUPPORTED 0 #define CXL_ALERTS_LIFE_USED_WARN_THRESH (1 << 0) #define CXL_ALERTS_OVER_TEMP_WARN_THRESH (1 << 1) @@ -117,6 +118,13 @@ enum { #define GET_PHYSICAL_PORT_STATE 0x1 TUNNEL = 0x53, #define MANAGEMENT_COMMAND 0x0 + FMAPI_DCD_MGMT = 0x56, + #define GET_DCD_INFO 0x0 + #define GET_HOST_DC_REGION_CONFIG 0x1 + #define SET_DC_REGION_CONFIG 0x2 + #define GET_DC_REGION_EXTENT_LIST 0x3 + #define INITIATE_DC_ADD 0x4 + #define INITIATE_DC_RELEASE 0x5 }; /* CCI Message Format CXL r3.1 Figure 7-19 */ @@ -2750,7 +2758,7 @@ static CXLRetCode cmd_dcd_get_dyn_cap_ext_list(const struct cxl_cmd *cmd, uint16_t out_pl_len, size; CXLDCExtent *ent; - if (start_extent_id > ct3d->dc.total_extent_count) { + if (start_extent_id > ct3d->dc.nr_extents_accepted) { return CXL_MBOX_INVALID_INPUT; } @@ -2761,7 +2769,7 @@ static CXLRetCode cmd_dcd_get_dyn_cap_ext_list(const struct cxl_cmd *cmd, out_pl_len = sizeof(*out) + record_count * sizeof(out->records[0]); stl_le_p(&out->count, record_count); - stl_le_p(&out->total_extents, ct3d->dc.total_extent_count); + stl_le_p(&out->total_extents, ct3d->dc.nr_extents_accepted); stl_le_p(&out->generation_num, ct3d->dc.ext_list_gen_seq); if (record_count > 0) { @@ -2883,16 +2891,20 @@ void cxl_extent_group_list_insert_tail(CXLDCExtentGroupList *list, QTAILQ_INSERT_TAIL(list, group, node); } -void cxl_extent_group_list_delete_front(CXLDCExtentGroupList *list) +uint32_t cxl_extent_group_list_delete_front(CXLDCExtentGroupList *list) { CXLDCExtent *ent, *ent_next; CXLDCExtentGroup *group = QTAILQ_FIRST(list); + uint32_t extents_deleted = 0; QTAILQ_REMOVE(list, group, node); QTAILQ_FOREACH_SAFE(ent, &group->list, node, ent_next) { cxl_remove_extent_from_extent_list(&group->list, ent); + extents_deleted++; } g_free(group); + + return extents_deleted; } /* @@ -3011,7 +3023,7 @@ static CXLRetCode cmd_dcd_add_dyn_cap_rsp(const struct cxl_cmd *cmd, CXLUpdateDCExtentListInPl *in = (void *)payload_in; CXLType3Dev *ct3d = CXL_TYPE3(cci->d); CXLDCExtentList *extent_list = &ct3d->dc.extents; - uint32_t i; + uint32_t i, num; uint64_t dpa, len; CXLRetCode ret; @@ -3020,7 +3032,8 @@ static CXLRetCode cmd_dcd_add_dyn_cap_rsp(const struct cxl_cmd *cmd, } if (in->num_entries_updated == 0) { - cxl_extent_group_list_delete_front(&ct3d->dc.extents_pending); + num = cxl_extent_group_list_delete_front(&ct3d->dc.extents_pending); + ct3d->dc.total_extent_count -= num; return CXL_MBOX_SUCCESS; } @@ -3051,10 +3064,12 @@ static CXLRetCode cmd_dcd_add_dyn_cap_rsp(const struct cxl_cmd *cmd, cxl_insert_extent_to_extent_list(extent_list, dpa, len, NULL, 0); ct3d->dc.total_extent_count += 1; + ct3d->dc.nr_extents_accepted += 1; ct3_set_region_block_backed(ct3d, dpa, len); } /* Remove the first extent group in the pending list */ - cxl_extent_group_list_delete_front(&ct3d->dc.extents_pending); + num = cxl_extent_group_list_delete_front(&ct3d->dc.extents_pending); + ct3d->dc.total_extent_count -= num; return CXL_MBOX_SUCCESS; } @@ -3160,7 +3175,7 @@ free_and_exit: } *updated_list_size = 0; } else { - *updated_list_size = ct3d->dc.total_extent_count + cnt_delta; + *updated_list_size = ct3d->dc.nr_extents_accepted + cnt_delta; } return ret; @@ -3222,11 +3237,495 @@ static CXLRetCode cmd_dcd_release_dyn_cap(const struct cxl_cmd *cmd, ct3_set_region_block_backed(ct3d, ent->start_dpa, ent->len); cxl_remove_extent_from_extent_list(&updated_list, ent); } - ct3d->dc.total_extent_count = updated_list_size; + ct3d->dc.total_extent_count += (updated_list_size - + ct3d->dc.nr_extents_accepted); + + ct3d->dc.nr_extents_accepted = updated_list_size; + + return CXL_MBOX_SUCCESS; +} + +/* CXL r3.2 section 7.6.7.6.1: Get DCD Info (Opcode 5600h) */ +static CXLRetCode cmd_fm_get_dcd_info(const struct cxl_cmd *cmd, + uint8_t *payload_in, + size_t len_in, + uint8_t *payload_out, + size_t *len_out, + CXLCCI *cci) +{ + struct { + uint8_t num_hosts; + uint8_t num_regions_supported; + uint8_t rsvd1[2]; + uint16_t supported_add_sel_policy_bitmask; + uint8_t rsvd2[2]; + uint16_t supported_removal_policy_bitmask; + uint8_t sanitize_on_release_bitmask; + uint8_t rsvd3; + uint64_t total_dynamic_capacity; + uint64_t region_blk_size_bitmasks[8]; + } QEMU_PACKED *out = (void *)payload_out; + CXLType3Dev *ct3d = CXL_TYPE3(cci->d); + CXLDCRegion *region; + int i; + + out->num_hosts = 1; + out->num_regions_supported = ct3d->dc.num_regions; + stw_le_p(&out->supported_add_sel_policy_bitmask, + BIT(CXL_EXTENT_SELECTION_POLICY_PRESCRIPTIVE)); + stw_le_p(&out->supported_removal_policy_bitmask, + BIT(CXL_EXTENT_REMOVAL_POLICY_PRESCRIPTIVE)); + out->sanitize_on_release_bitmask = 0; + + stq_le_p(&out->total_dynamic_capacity, + ct3d->dc.total_capacity / CXL_CAPACITY_MULTIPLIER); + + for (i = 0; i < ct3d->dc.num_regions; i++) { + region = &ct3d->dc.regions[i]; + memcpy(&out->region_blk_size_bitmasks[i], + ®ion->supported_blk_size_bitmask, + sizeof(out->region_blk_size_bitmasks[i])); + } + + *len_out = sizeof(*out); + return CXL_MBOX_SUCCESS; +} + +static void build_dsmas_flags(uint8_t *flags, CXLDCRegion *region) +{ + *flags = 0; + + if (region->nonvolatile) { + *flags |= BIT(CXL_DSMAS_FLAGS_NONVOLATILE); + } + if (region->sharable) { + *flags |= BIT(CXL_DSMAS_FLAGS_SHARABLE); + } + if (region->hw_managed_coherency) { + *flags |= BIT(CXL_DSMAS_FLAGS_HW_MANAGED_COHERENCY); + } + if (region->ic_specific_dc_management) { + *flags |= BIT(CXL_DSMAS_FLAGS_IC_SPECIFIC_DC_MANAGEMENT); + } + if (region->rdonly) { + *flags |= BIT(CXL_DSMAS_FLAGS_RDONLY); + } +} + +/* + * CXL r3.2 section 7.6.7.6.2: + * Get Host DC Region Configuration (Opcode 5601h) + */ +static CXLRetCode cmd_fm_get_host_dc_region_config(const struct cxl_cmd *cmd, + uint8_t *payload_in, + size_t len_in, + uint8_t *payload_out, + size_t *len_out, + CXLCCI *cci) +{ + struct { + uint16_t host_id; + uint8_t region_cnt; + uint8_t start_rid; + } QEMU_PACKED *in = (void *)payload_in; + struct { + uint16_t host_id; + uint8_t num_regions; + uint8_t regions_returned; + struct { + uint64_t base; + uint64_t decode_len; + uint64_t region_len; + uint64_t block_size; + uint8_t flags; + uint8_t rsvd1[3]; + uint8_t sanitize; + uint8_t rsvd2[3]; + } QEMU_PACKED records[]; + } QEMU_PACKED *out = (void *)payload_out; + struct { + uint32_t num_extents_supported; + uint32_t num_extents_available; + uint32_t num_tags_supported; + uint32_t num_tags_available; + } QEMU_PACKED *extra_out; + CXLType3Dev *ct3d = CXL_TYPE3(cci->d); + uint16_t record_count, out_pl_len, i; + + if (in->start_rid >= ct3d->dc.num_regions) { + return CXL_MBOX_INVALID_INPUT; + } + record_count = MIN(ct3d->dc.num_regions - in->start_rid, in->region_cnt); + + out_pl_len = sizeof(*out) + record_count * sizeof(out->records[0]); + extra_out = (void *)out + out_pl_len; + out_pl_len += sizeof(*extra_out); + + assert(out_pl_len <= CXL_MAILBOX_MAX_PAYLOAD_SIZE); + + stw_le_p(&out->host_id, 0); + out->num_regions = ct3d->dc.num_regions; + out->regions_returned = record_count; + + for (i = 0; i < record_count; i++) { + stq_le_p(&out->records[i].base, + ct3d->dc.regions[in->start_rid + i].base); + stq_le_p(&out->records[i].decode_len, + ct3d->dc.regions[in->start_rid + i].decode_len / + CXL_CAPACITY_MULTIPLIER); + stq_le_p(&out->records[i].region_len, + ct3d->dc.regions[in->start_rid + i].len); + stq_le_p(&out->records[i].block_size, + ct3d->dc.regions[in->start_rid + i].block_size); + build_dsmas_flags(&out->records[i].flags, + &ct3d->dc.regions[in->start_rid + i]); + /* Sanitize is bit 0 of flags. */ + out->records[i].sanitize = + ct3d->dc.regions[in->start_rid + i].flags & BIT(0); + } + + stl_le_p(&extra_out->num_extents_supported, CXL_NUM_EXTENTS_SUPPORTED); + stl_le_p(&extra_out->num_extents_available, CXL_NUM_EXTENTS_SUPPORTED - + ct3d->dc.total_extent_count); + stl_le_p(&extra_out->num_tags_supported, CXL_NUM_TAGS_SUPPORTED); + stl_le_p(&extra_out->num_tags_available, CXL_NUM_TAGS_SUPPORTED); + + *len_out = out_pl_len; + return CXL_MBOX_SUCCESS; +} + +/* CXL r3.2 section 7.6.7.6.3: Set Host DC Region Configuration (Opcode 5602) */ +static CXLRetCode cmd_fm_set_dc_region_config(const struct cxl_cmd *cmd, + uint8_t *payload_in, + size_t len_in, + uint8_t *payload_out, + size_t *len_out, + CXLCCI *cci) +{ + struct { + uint8_t reg_id; + uint8_t rsvd[3]; + uint64_t block_sz; + uint8_t flags; + uint8_t rsvd2[3]; + } QEMU_PACKED *in = (void *)payload_in; + CXLType3Dev *ct3d = CXL_TYPE3(cci->d); + CXLEventDynamicCapacity dcEvent = {}; + CXLDCRegion *region = &ct3d->dc.regions[in->reg_id]; + + /* + * CXL r3.2 7.6.7.6.3: Set DC Region Configuration + * This command shall fail with Unsupported when the Sanitize on Release + * field does not match the region’s configuration... and the device + * does not support reconfiguration of the Sanitize on Release setting. + * + * Currently not reconfigurable, so always fail if sanitize bit (bit 0) + * doesn't match. + */ + if ((in->flags & 0x1) != (region->flags & 0x1)) { + return CXL_MBOX_UNSUPPORTED; + } + + if (in->reg_id >= DCD_MAX_NUM_REGION) { + return CXL_MBOX_UNSUPPORTED; + } + + /* Check that no extents are in the region being reconfigured */ + if (!bitmap_empty(region->blk_bitmap, region->len / region->block_size)) { + return CXL_MBOX_UNSUPPORTED; + } + + /* Check that new block size is supported */ + if (!is_power_of_2(in->block_sz) || + !(in->block_sz & region->supported_blk_size_bitmask)) { + return CXL_MBOX_INVALID_INPUT; + } + + /* Return success if new block size == current block size */ + if (in->block_sz == region->block_size) { + return CXL_MBOX_SUCCESS; + } + + /* Free bitmap and create new one for new block size. */ + qemu_mutex_lock(®ion->bitmap_lock); + g_free(region->blk_bitmap); + region->blk_bitmap = bitmap_new(region->len / in->block_sz); + qemu_mutex_unlock(®ion->bitmap_lock); + region->block_size = in->block_sz; + + /* Create event record and insert into event log */ + cxl_assign_event_header(&dcEvent.hdr, + &dynamic_capacity_uuid, + (1 << CXL_EVENT_TYPE_INFO), + sizeof(dcEvent), + cxl_device_get_timestamp(&ct3d->cxl_dstate)); + dcEvent.type = DC_EVENT_REGION_CONFIG_UPDATED; + dcEvent.validity_flags = 1; + dcEvent.host_id = 0; + dcEvent.updated_region_id = in->reg_id; + + if (cxl_event_insert(&ct3d->cxl_dstate, + CXL_EVENT_TYPE_DYNAMIC_CAP, + (CXLEventRecordRaw *)&dcEvent)) { + cxl_event_irq_assert(ct3d); + } + return CXL_MBOX_SUCCESS; +} + +/* CXL r3.2 section 7.6.7.6.4: Get DC Region Extent Lists (Opcode 5603h) */ +static CXLRetCode cmd_fm_get_dc_region_extent_list(const struct cxl_cmd *cmd, + uint8_t *payload_in, + size_t len_in, + uint8_t *payload_out, + size_t *len_out, + CXLCCI *cci) +{ + struct { + uint16_t host_id; + uint8_t rsvd[2]; + uint32_t extent_cnt; + uint32_t start_extent_id; + } QEMU_PACKED *in = (void *)payload_in; + struct { + uint16_t host_id; + uint8_t rsvd[2]; + uint32_t start_extent_id; + uint32_t extents_returned; + uint32_t total_extents; + uint32_t list_generation_num; + uint8_t rsvd2[4]; + CXLDCExtentRaw records[]; + } QEMU_PACKED *out = (void *)payload_out; + QEMU_BUILD_BUG_ON(sizeof(*in) != 0xc); + CXLType3Dev *ct3d = CXL_TYPE3(cci->d); + CXLDCExtent *ent; + CXLDCExtentRaw *out_rec; + uint16_t record_count = 0, record_done = 0, i = 0; + uint16_t out_pl_len, max_size; + + if (in->host_id != 0) { + return CXL_MBOX_INVALID_INPUT; + } + + if (in->start_extent_id > ct3d->dc.nr_extents_accepted) { + return CXL_MBOX_INVALID_INPUT; + } + record_count = MIN(in->extent_cnt, + ct3d->dc.nr_extents_accepted - in->start_extent_id); + max_size = CXL_MAILBOX_MAX_PAYLOAD_SIZE - sizeof(*out); + record_count = MIN(record_count, max_size / sizeof(out->records[0])); + out_pl_len = sizeof(*out) + record_count * sizeof(out->records[0]); + + stw_le_p(&out->host_id, in->host_id); + stl_le_p(&out->start_extent_id, in->start_extent_id); + stl_le_p(&out->extents_returned, record_count); + stl_le_p(&out->total_extents, ct3d->dc.nr_extents_accepted); + stl_le_p(&out->list_generation_num, ct3d->dc.ext_list_gen_seq); + + if (record_count > 0) { + QTAILQ_FOREACH(ent, &ct3d->dc.extents, node) { + if (i++ < in->start_extent_id) { + continue; + } + out_rec = &out->records[record_done]; + stq_le_p(&out_rec->start_dpa, ent->start_dpa); + stq_le_p(&out_rec->len, ent->len); + memcpy(&out_rec->tag, ent->tag, 0x10); + stw_le_p(&out_rec->shared_seq, ent->shared_seq); + + record_done++; + if (record_done == record_count) { + break; + } + } + } + + *len_out = out_pl_len; return CXL_MBOX_SUCCESS; } +/* + * Helper function to convert CXLDCExtentRaw to CXLUpdateDCExtentListInPl + * in order to reuse cxl_detect_malformed_extent_list() function which accepts + * CXLUpdateDCExtentListInPl as a parameter. + */ +static void convert_raw_extents(CXLDCExtentRaw raw_extents[], + CXLUpdateDCExtentListInPl *extent_list, + int count) +{ + int i; + + extent_list->num_entries_updated = count; + + for (i = 0; i < count; i++) { + extent_list->updated_entries[i].start_dpa = raw_extents[i].start_dpa; + extent_list->updated_entries[i].len = raw_extents[i].len; + } +} + +/* CXL r3.2 Section 7.6.7.6.5: Initiate Dynamic Capacity Add (Opcode 5604h) */ +static CXLRetCode cmd_fm_initiate_dc_add(const struct cxl_cmd *cmd, + uint8_t *payload_in, + size_t len_in, + uint8_t *payload_out, + size_t *len_out, + CXLCCI *cci) +{ + struct { + uint16_t host_id; + uint8_t selection_policy; + uint8_t reg_num; + uint64_t length; + uint8_t tag[0x10]; + uint32_t ext_count; + CXLDCExtentRaw extents[]; + } QEMU_PACKED *in = (void *)payload_in; + CXLType3Dev *ct3d = CXL_TYPE3(cci->d); + int i, rc; + + switch (in->selection_policy) { + case CXL_EXTENT_SELECTION_POLICY_PRESCRIPTIVE: { + /* Adding extents exceeds device's extent tracking ability. */ + if (in->ext_count + ct3d->dc.total_extent_count > + CXL_NUM_EXTENTS_SUPPORTED) { + return CXL_MBOX_RESOURCES_EXHAUSTED; + } + + g_autofree CXLUpdateDCExtentListInPl *list = + g_malloc0(sizeof(*list) + + in->ext_count * sizeof(*list->updated_entries)); + + convert_raw_extents(in->extents, list, in->ext_count); + rc = cxl_detect_malformed_extent_list(ct3d, list); + + for (i = 0; i < in->ext_count; i++) { + CXLDCExtentRaw *ext = &in->extents[i]; + + /* Check requested extents do not overlap with pending ones. */ + if (cxl_extent_groups_overlaps_dpa_range(&ct3d->dc.extents_pending, + ext->start_dpa, + ext->len)) { + return CXL_MBOX_INVALID_EXTENT_LIST; + } + /* Check requested extents do not overlap with existing ones. */ + if (cxl_extents_overlaps_dpa_range(&ct3d->dc.extents, + ext->start_dpa, + ext->len)) { + return CXL_MBOX_INVALID_EXTENT_LIST; + } + } + + if (rc) { + return rc; + } + + CXLDCExtentGroup *group = NULL; + for (i = 0; i < in->ext_count; i++) { + CXLDCExtentRaw *ext = &in->extents[i]; + + group = cxl_insert_extent_to_extent_group(group, ext->start_dpa, + ext->len, ext->tag, + ext->shared_seq); + } + + cxl_extent_group_list_insert_tail(&ct3d->dc.extents_pending, group); + ct3d->dc.total_extent_count += in->ext_count; + cxl_create_dc_event_records_for_extents(ct3d, + DC_EVENT_ADD_CAPACITY, + in->extents, + in->ext_count); + + return CXL_MBOX_SUCCESS; + } + default: { + qemu_log_mask(LOG_UNIMP, + "CXL extent selection policy not supported.\n"); + return CXL_MBOX_INVALID_INPUT; + } + } +} + +#define CXL_EXTENT_REMOVAL_POLICY_MASK 0x0F +#define CXL_FORCED_REMOVAL_MASK (1 << 4) +/* + * CXL r3.2 Section 7.6.7.6.6: + * Initiate Dynamic Capacity Release (Opcode 5605h) + */ +static CXLRetCode cmd_fm_initiate_dc_release(const struct cxl_cmd *cmd, + uint8_t *payload_in, + size_t len_in, + uint8_t *payload_out, + size_t *len_out, + CXLCCI *cci) +{ + struct { + uint16_t host_id; + uint8_t flags; + uint8_t reg_num; + uint64_t length; + uint8_t tag[0x10]; + uint32_t ext_count; + CXLDCExtentRaw extents[]; + } QEMU_PACKED *in = (void *)payload_in; + CXLType3Dev *ct3d = CXL_TYPE3(cci->d); + int i, rc; + + switch (in->flags & CXL_EXTENT_REMOVAL_POLICY_MASK) { + case CXL_EXTENT_REMOVAL_POLICY_PRESCRIPTIVE: { + CXLDCExtentList updated_list; + uint32_t updated_list_size; + g_autofree CXLUpdateDCExtentListInPl *list = + g_malloc0(sizeof(*list) + + in->ext_count * sizeof(*list->updated_entries)); + + convert_raw_extents(in->extents, list, in->ext_count); + rc = cxl_detect_malformed_extent_list(ct3d, list); + if (rc) { + return rc; + } + + /* + * Fail with Invalid PA if an extent is pending and Forced Removal + * flag not set. + */ + if (!(in->flags & CXL_FORCED_REMOVAL_MASK)) { + for (i = 0; i < in->ext_count; i++) { + CXLDCExtentRaw ext = in->extents[i]; + /* + * Check requested extents don't overlap with pending + * extents. + */ + if (cxl_extent_groups_overlaps_dpa_range( + &ct3d->dc.extents_pending, + ext.start_dpa, + ext.len)) { + return CXL_MBOX_INVALID_PA; + } + } + } + + rc = cxl_dc_extent_release_dry_run(ct3d, + list, + &updated_list, + &updated_list_size); + if (rc) { + return rc; + } + cxl_create_dc_event_records_for_extents(ct3d, + DC_EVENT_RELEASE_CAPACITY, + in->extents, + in->ext_count); + return CXL_MBOX_SUCCESS; + } + default: { + qemu_log_mask(LOG_UNIMP, + "CXL extent removal policy not supported.\n"); + return CXL_MBOX_INVALID_INPUT; + } + } +} + static const struct cxl_cmd cxl_cmd_set[256][256] = { [INFOSTAT][BACKGROUND_OPERATION_ABORT] = { "BACKGROUND_OPERATION_ABORT", cmd_infostat_bg_op_abort, 0, 0 }, @@ -3340,6 +3839,36 @@ static const struct cxl_cmd cxl_cmd_set_sw[256][256] = { cmd_tunnel_management_cmd, ~0, 0 }, }; +static const struct cxl_cmd cxl_cmd_set_fm_dcd[256][256] = { + [FMAPI_DCD_MGMT][GET_DCD_INFO] = { "GET_DCD_INFO", + cmd_fm_get_dcd_info, 0, 0 }, + [FMAPI_DCD_MGMT][GET_HOST_DC_REGION_CONFIG] = { "GET_HOST_DC_REGION_CONFIG", + cmd_fm_get_host_dc_region_config, 4, 0 }, + [FMAPI_DCD_MGMT][SET_DC_REGION_CONFIG] = { "SET_DC_REGION_CONFIG", + cmd_fm_set_dc_region_config, 16, + (CXL_MBOX_CONFIG_CHANGE_COLD_RESET | + CXL_MBOX_CONFIG_CHANGE_CONV_RESET | + CXL_MBOX_CONFIG_CHANGE_CXL_RESET | + CXL_MBOX_IMMEDIATE_CONFIG_CHANGE | + CXL_MBOX_IMMEDIATE_DATA_CHANGE) }, + [FMAPI_DCD_MGMT][GET_DC_REGION_EXTENT_LIST] = { "GET_DC_REGION_EXTENT_LIST", + cmd_fm_get_dc_region_extent_list, 12, 0 }, + [FMAPI_DCD_MGMT][INITIATE_DC_ADD] = { "INIT_DC_ADD", + cmd_fm_initiate_dc_add, ~0, + (CXL_MBOX_CONFIG_CHANGE_COLD_RESET | + CXL_MBOX_CONFIG_CHANGE_CONV_RESET | + CXL_MBOX_CONFIG_CHANGE_CXL_RESET | + CXL_MBOX_IMMEDIATE_CONFIG_CHANGE | + CXL_MBOX_IMMEDIATE_DATA_CHANGE) }, + [FMAPI_DCD_MGMT][INITIATE_DC_RELEASE] = { "INIT_DC_RELEASE", + cmd_fm_initiate_dc_release, ~0, + (CXL_MBOX_CONFIG_CHANGE_COLD_RESET | + CXL_MBOX_CONFIG_CHANGE_CONV_RESET | + CXL_MBOX_CONFIG_CHANGE_CXL_RESET | + CXL_MBOX_IMMEDIATE_CONFIG_CHANGE | + CXL_MBOX_IMMEDIATE_DATA_CHANGE) }, +}; + /* * While the command is executing in the background, the device should * update the percentage complete in the Background Command Status Register @@ -3614,7 +4143,12 @@ void cxl_initialize_t3_fm_owned_ld_mctpcci(CXLCCI *cci, DeviceState *d, DeviceState *intf, size_t payload_max) { + CXLType3Dev *ct3d = CXL_TYPE3(d); + cxl_copy_cci_commands(cci, cxl_cmd_set_t3_fm_owned_ld_mctp); + if (ct3d->dc.num_regions) { + cxl_copy_cci_commands(cci, cxl_cmd_set_fm_dcd); + } cci->d = d; cci->intf = intf; cxl_init_cci(cci, payload_max); diff --git a/hw/display/artist.c b/hw/display/artist.c index 3fafc8a..3c884c9 100644 --- a/hw/display/artist.c +++ b/hw/display/artist.c @@ -12,6 +12,7 @@ #include "qemu/log.h" #include "qemu/module.h" #include "qemu/units.h" +#include "qemu/bswap.h" #include "qapi/error.h" #include "hw/sysbus.h" #include "hw/loader.h" diff --git a/hw/display/ati.c b/hw/display/ati.c index 7de2773..f7c0006 100644 --- a/hw/display/ati.c +++ b/hw/display/ati.c @@ -22,6 +22,7 @@ #include "vga-access.h" #include "hw/qdev-properties.h" #include "vga_regs.h" +#include "qemu/bswap.h" #include "qemu/log.h" #include "qemu/module.h" #include "qemu/error-report.h" diff --git a/hw/display/qxl-render.c b/hw/display/qxl-render.c index eda6d3d..c6a9ac1 100644 --- a/hw/display/qxl-render.c +++ b/hw/display/qxl-render.c @@ -222,6 +222,7 @@ static void qxl_unpack_chunks(void *dest, size_t size, PCIQXLDevice *qxl, uint32_t max_chunks = 32; size_t offset = 0; size_t bytes; + QXLPHYSICAL next_chunk_phys = 0; for (;;) { bytes = MIN(size - offset, chunk->data_size); @@ -230,7 +231,15 @@ static void qxl_unpack_chunks(void *dest, size_t size, PCIQXLDevice *qxl, if (offset == size) { return; } - chunk = qxl_phys2virt(qxl, chunk->next_chunk, group_id, + next_chunk_phys = chunk->next_chunk; + /* fist time, only get the next chunk's data size */ + chunk = qxl_phys2virt(qxl, next_chunk_phys, group_id, + sizeof(QXLDataChunk)); + if (!chunk) { + return; + } + /* second time, check data size and get data */ + chunk = qxl_phys2virt(qxl, next_chunk_phys, group_id, sizeof(QXLDataChunk) + chunk->data_size); if (!chunk) { return; diff --git a/hw/display/ramfb-standalone.c b/hw/display/ramfb-standalone.c index 08f2d5d..72b2071 100644 --- a/hw/display/ramfb-standalone.c +++ b/hw/display/ramfb-standalone.c @@ -17,6 +17,7 @@ struct RAMFBStandaloneState { QemuConsole *con; RAMFBState *state; bool migrate; + bool use_legacy_x86_rom; }; static void display_update_wrapper(void *dev) @@ -39,7 +40,7 @@ static void ramfb_realizefn(DeviceState *dev, Error **errp) RAMFBStandaloneState *ramfb = RAMFB(dev); ramfb->con = graphic_console_init(dev, 0, &wrapper_ops, dev); - ramfb->state = ramfb_setup(errp); + ramfb->state = ramfb_setup(ramfb->use_legacy_x86_rom, errp); } static bool migrate_needed(void *opaque) @@ -62,6 +63,8 @@ static const VMStateDescription ramfb_dev_vmstate = { static const Property ramfb_properties[] = { DEFINE_PROP_BOOL("x-migrate", RAMFBStandaloneState, migrate, true), + DEFINE_PROP_BOOL("use-legacy-x86-rom", RAMFBStandaloneState, + use_legacy_x86_rom, false), }; static void ramfb_class_initfn(ObjectClass *klass, const void *data) diff --git a/hw/display/ramfb-stubs.c b/hw/display/ramfb-stubs.c index cf64733..b835513 100644 --- a/hw/display/ramfb-stubs.c +++ b/hw/display/ramfb-stubs.c @@ -8,7 +8,7 @@ void ramfb_display_update(QemuConsole *con, RAMFBState *s) { } -RAMFBState *ramfb_setup(Error **errp) +RAMFBState *ramfb_setup(bool romfile, Error **errp) { error_setg(errp, "ramfb support not available"); return NULL; diff --git a/hw/display/ramfb.c b/hw/display/ramfb.c index 8c0f907..9a17d97 100644 --- a/hw/display/ramfb.c +++ b/hw/display/ramfb.c @@ -135,7 +135,7 @@ const VMStateDescription ramfb_vmstate = { } }; -RAMFBState *ramfb_setup(Error **errp) +RAMFBState *ramfb_setup(bool romfile, Error **errp) { FWCfgState *fw_cfg = fw_cfg_find(); RAMFBState *s; @@ -147,7 +147,9 @@ RAMFBState *ramfb_setup(Error **errp) s = g_new0(RAMFBState, 1); - rom_add_vga("vgabios-ramfb.bin"); + if (romfile) { + rom_add_vga("vgabios-ramfb.bin"); + } fw_cfg_add_file_callback(fw_cfg, "etc/ramfb", NULL, ramfb_fw_cfg_write, s, &s->cfg, sizeof(s->cfg), false); diff --git a/hw/display/sm501.c b/hw/display/sm501.c index 6d2f186..bc091b3 100644 --- a/hw/display/sm501.c +++ b/hw/display/sm501.c @@ -26,6 +26,7 @@ #include "qemu/osdep.h" #include "qemu/units.h" #include "qapi/error.h" +#include "qemu/error-report.h" #include "qemu/log.h" #include "qemu/module.h" #include "hw/usb/hcd-ohci.h" diff --git a/hw/display/vga.c b/hw/display/vga.c index 20475eb..90b89cf 100644 --- a/hw/display/vga.c +++ b/hw/display/vga.c @@ -26,7 +26,7 @@ #include "qemu/units.h" #include "system/reset.h" #include "qapi/error.h" -#include "exec/tswap.h" +#include "qemu/target-info.h" #include "hw/display/vga.h" #include "hw/i386/x86.h" #include "hw/pci/pci.h" diff --git a/hw/display/virtio-gpu-base.c b/hw/display/virtio-gpu-base.c index 9eb806b..7269477 100644 --- a/hw/display/virtio-gpu-base.c +++ b/hw/display/virtio-gpu-base.c @@ -19,6 +19,7 @@ #include "qemu/error-report.h" #include "hw/display/edid.h" #include "trace.h" +#include "qapi/qapi-types-virtio.h" void virtio_gpu_base_reset(VirtIOGPUBase *g) @@ -56,6 +57,8 @@ void virtio_gpu_base_generate_edid(VirtIOGPUBase *g, int scanout, struct virtio_gpu_resp_edid *edid) { + size_t output_idx; + VirtIOGPUOutputList *node; qemu_edid_info info = { .width_mm = g->req_state[scanout].width_mm, .height_mm = g->req_state[scanout].height_mm, @@ -64,6 +67,14 @@ virtio_gpu_base_generate_edid(VirtIOGPUBase *g, int scanout, .refresh_rate = g->req_state[scanout].refresh_rate, }; + for (output_idx = 0, node = g->conf.outputs; + output_idx <= scanout && node; output_idx++, node = node->next) { + if (output_idx == scanout && node->value && node->value->name) { + info.name = node->value->name; + break; + } + } + edid->size = cpu_to_le32(sizeof(edid->edid)); qemu_edid_generate(edid->edid, sizeof(edid->edid), &info); } @@ -172,6 +183,8 @@ virtio_gpu_base_device_realize(DeviceState *qdev, VirtIOHandleOutput cursor_cb, Error **errp) { + size_t output_idx; + VirtIOGPUOutputList *node; VirtIODevice *vdev = VIRTIO_DEVICE(qdev); VirtIOGPUBase *g = VIRTIO_GPU_BASE(qdev); int i; @@ -181,6 +194,20 @@ virtio_gpu_base_device_realize(DeviceState *qdev, return false; } + for (output_idx = 0, node = g->conf.outputs; + node; output_idx++, node = node->next) { + if (output_idx == g->conf.max_outputs) { + error_setg(errp, "invalid outputs > %d", g->conf.max_outputs); + return false; + } + if (node->value && node->value->name && + strlen(node->value->name) > EDID_NAME_MAX_LENGTH) { + error_setg(errp, "invalid output name '%s' > %d", + node->value->name, EDID_NAME_MAX_LENGTH); + return false; + } + } + if (virtio_gpu_virgl_enabled(g->conf)) { error_setg(&g->migration_blocker, "virgl is not yet migratable"); if (migrate_add_blocker(&g->migration_blocker, errp) < 0) { diff --git a/hw/display/virtio-gpu-virgl.c b/hw/display/virtio-gpu-virgl.c index 145a0b3..94ddc01 100644 --- a/hw/display/virtio-gpu-virgl.c +++ b/hw/display/virtio-gpu-virgl.c @@ -970,6 +970,15 @@ void virtio_gpu_virgl_process_cmd(VirtIOGPU *g, } trace_virtio_gpu_fence_ctrl(cmd->cmd_hdr.fence_id, cmd->cmd_hdr.type); +#if VIRGL_VERSION_MAJOR >= 1 + if (cmd->cmd_hdr.flags & VIRTIO_GPU_FLAG_INFO_RING_IDX) { + virgl_renderer_context_create_fence(cmd->cmd_hdr.ctx_id, + VIRGL_RENDERER_FENCE_FLAG_MERGEABLE, + cmd->cmd_hdr.ring_idx, + cmd->cmd_hdr.fence_id); + return; + } +#endif virgl_renderer_create_fence(cmd->cmd_hdr.fence_id, cmd->cmd_hdr.type); } @@ -983,6 +992,11 @@ static void virgl_write_fence(void *opaque, uint32_t fence) * the guest can end up emitting fences out of order * so we should check all fenced cmds not just the first one. */ +#if VIRGL_VERSION_MAJOR >= 1 + if (cmd->cmd_hdr.flags & VIRTIO_GPU_FLAG_INFO_RING_IDX) { + continue; + } +#endif if (cmd->cmd_hdr.fence_id > fence) { continue; } @@ -997,6 +1011,29 @@ static void virgl_write_fence(void *opaque, uint32_t fence) } } +#if VIRGL_VERSION_MAJOR >= 1 +static void virgl_write_context_fence(void *opaque, uint32_t ctx_id, + uint32_t ring_idx, uint64_t fence_id) { + VirtIOGPU *g = opaque; + struct virtio_gpu_ctrl_command *cmd, *tmp; + + QTAILQ_FOREACH_SAFE(cmd, &g->fenceq, next, tmp) { + if (cmd->cmd_hdr.flags & VIRTIO_GPU_FLAG_INFO_RING_IDX && + cmd->cmd_hdr.ctx_id == ctx_id && cmd->cmd_hdr.ring_idx == ring_idx && + cmd->cmd_hdr.fence_id <= fence_id) { + trace_virtio_gpu_fence_resp(cmd->cmd_hdr.fence_id); + virtio_gpu_ctrl_response_nodata(g, cmd, VIRTIO_GPU_RESP_OK_NODATA); + QTAILQ_REMOVE(&g->fenceq, cmd, next); + g_free(cmd); + g->inflight--; + if (virtio_gpu_stats_enabled(g->parent_obj.conf)) { + trace_virtio_gpu_dec_inflight_fences(g->inflight); + } + } + } +} +#endif + static virgl_renderer_gl_context virgl_create_context(void *opaque, int scanout_idx, struct virgl_renderer_gl_ctx_param *params) @@ -1031,11 +1068,18 @@ static int virgl_make_context_current(void *opaque, int scanout_idx, } static struct virgl_renderer_callbacks virtio_gpu_3d_cbs = { +#if VIRGL_VERSION_MAJOR >= 1 + .version = 3, +#else .version = 1, +#endif .write_fence = virgl_write_fence, .create_gl_context = virgl_create_context, .destroy_gl_context = virgl_destroy_context, .make_current = virgl_make_context_current, +#if VIRGL_VERSION_MAJOR >= 1 + .write_context_fence = virgl_write_context_fence, +#endif }; static void virtio_gpu_print_stats(void *opaque) diff --git a/hw/i386/Kconfig b/hw/i386/Kconfig index eb65bda..5139d23 100644 --- a/hw/i386/Kconfig +++ b/hw/i386/Kconfig @@ -4,7 +4,7 @@ config X86_FW_OVMF config SEV bool select X86_FW_OVMF - depends on KVM + depends on KVM && X86_64 config SGX bool @@ -13,7 +13,7 @@ config SGX config TDX bool select X86_FW_OVMF - depends on KVM + depends on KVM && X86_64 config PC bool diff --git a/hw/i386/acpi-build.c b/hw/i386/acpi-build.c index 61851cc..423c495 100644 --- a/hw/i386/acpi-build.c +++ b/hw/i386/acpi-build.c @@ -338,405 +338,6 @@ build_facs(GArray *table_data) g_array_append_vals(table_data, reserved, 40); /* Reserved */ } -Aml *aml_pci_device_dsm(void) -{ - Aml *method; - - method = aml_method("_DSM", 4, AML_SERIALIZED); - { - Aml *params = aml_local(0); - Aml *pkg = aml_package(2); - aml_append(pkg, aml_int(0)); - aml_append(pkg, aml_int(0)); - aml_append(method, aml_store(pkg, params)); - aml_append(method, - aml_store(aml_name("BSEL"), aml_index(params, aml_int(0)))); - aml_append(method, - aml_store(aml_name("ASUN"), aml_index(params, aml_int(1)))); - aml_append(method, - aml_return(aml_call5("PDSM", aml_arg(0), aml_arg(1), - aml_arg(2), aml_arg(3), params)) - ); - } - return method; -} - -static void build_append_pci_dsm_func0_common(Aml *ctx, Aml *retvar) -{ - Aml *UUID, *ifctx1; - uint8_t byte_list[1] = { 0 }; /* nothing supported yet */ - - aml_append(ctx, aml_store(aml_buffer(1, byte_list), retvar)); - /* - * PCI Firmware Specification 3.1 - * 4.6. _DSM Definitions for PCI - */ - UUID = aml_touuid("E5C937D0-3553-4D7A-9117-EA4D19C3434D"); - ifctx1 = aml_if(aml_lnot(aml_equal(aml_arg(0), UUID))); - { - /* call is for unsupported UUID, bail out */ - aml_append(ifctx1, aml_return(retvar)); - } - aml_append(ctx, ifctx1); - - ifctx1 = aml_if(aml_lless(aml_arg(1), aml_int(2))); - { - /* call is for unsupported REV, bail out */ - aml_append(ifctx1, aml_return(retvar)); - } - aml_append(ctx, ifctx1); -} - -static Aml *aml_pci_edsm(void) -{ - Aml *method, *ifctx; - Aml *zero = aml_int(0); - Aml *func = aml_arg(2); - Aml *ret = aml_local(0); - Aml *aidx = aml_local(1); - Aml *params = aml_arg(4); - - method = aml_method("EDSM", 5, AML_SERIALIZED); - - /* get supported functions */ - ifctx = aml_if(aml_equal(func, zero)); - { - /* 1: have supported functions */ - /* 7: support for function 7 */ - const uint8_t caps = 1 | BIT(7); - build_append_pci_dsm_func0_common(ifctx, ret); - aml_append(ifctx, aml_store(aml_int(caps), aml_index(ret, zero))); - aml_append(ifctx, aml_return(ret)); - } - aml_append(method, ifctx); - - /* handle specific functions requests */ - /* - * PCI Firmware Specification 3.1 - * 4.6.7. _DSM for Naming a PCI or PCI Express Device Under - * Operating Systems - */ - ifctx = aml_if(aml_equal(func, aml_int(7))); - { - Aml *pkg = aml_package(2); - aml_append(pkg, zero); - /* optional, if not impl. should return null string */ - aml_append(pkg, aml_string("%s", "")); - aml_append(ifctx, aml_store(pkg, ret)); - - /* - * IASL is fine when initializing Package with computational data, - * however it makes guest unhappy /it fails to process such AML/. - * So use runtime assignment to set acpi-index after initializer - * to make OSPM happy. - */ - aml_append(ifctx, - aml_store(aml_derefof(aml_index(params, aml_int(0))), aidx)); - aml_append(ifctx, aml_store(aidx, aml_index(ret, zero))); - aml_append(ifctx, aml_return(ret)); - } - aml_append(method, ifctx); - - return method; -} - -static Aml *aml_pci_static_endpoint_dsm(PCIDevice *pdev) -{ - Aml *method; - - g_assert(pdev->acpi_index != 0); - method = aml_method("_DSM", 4, AML_SERIALIZED); - { - Aml *params = aml_local(0); - Aml *pkg = aml_package(1); - aml_append(pkg, aml_int(pdev->acpi_index)); - aml_append(method, aml_store(pkg, params)); - aml_append(method, - aml_return(aml_call5("EDSM", aml_arg(0), aml_arg(1), - aml_arg(2), aml_arg(3), params)) - ); - } - return method; -} - -static void build_append_pcihp_notify_entry(Aml *method, int slot) -{ - Aml *if_ctx; - int32_t devfn = PCI_DEVFN(slot, 0); - - if_ctx = aml_if(aml_and(aml_arg(0), aml_int(0x1U << slot), NULL)); - aml_append(if_ctx, aml_notify(aml_name("S%.02X", devfn), aml_arg(1))); - aml_append(method, if_ctx); -} - -static bool is_devfn_ignored_generic(const int devfn, const PCIBus *bus) -{ - const PCIDevice *pdev = bus->devices[devfn]; - - if (PCI_FUNC(devfn)) { - if (IS_PCI_BRIDGE(pdev)) { - /* - * Ignore only hotplugged PCI bridges on !0 functions, but - * allow describing cold plugged bridges on all functions - */ - if (DEVICE(pdev)->hotplugged) { - return true; - } - } - } - return false; -} - -static bool is_devfn_ignored_hotplug(const int devfn, const PCIBus *bus) -{ - PCIDevice *pdev = bus->devices[devfn]; - if (pdev) { - return is_devfn_ignored_generic(devfn, bus) || - !DEVICE_GET_CLASS(pdev)->hotpluggable || - /* Cold plugged bridges aren't themselves hot-pluggable */ - (IS_PCI_BRIDGE(pdev) && !DEVICE(pdev)->hotplugged); - } else { /* non populated slots */ - /* - * hotplug is supported only for non-multifunction device - * so generate device description only for function 0 - */ - if (PCI_FUNC(devfn) || - (pci_bus_is_express(bus) && PCI_SLOT(devfn) > 0)) { - return true; - } - } - return false; -} - -void build_append_pcihp_slots(Aml *parent_scope, PCIBus *bus) -{ - int devfn; - Aml *dev, *notify_method = NULL, *method; - QObject *bsel = object_property_get_qobject(OBJECT(bus), - ACPI_PCIHP_PROP_BSEL, NULL); - uint64_t bsel_val = qnum_get_uint(qobject_to(QNum, bsel)); - qobject_unref(bsel); - - aml_append(parent_scope, aml_name_decl("BSEL", aml_int(bsel_val))); - notify_method = aml_method("DVNT", 2, AML_NOTSERIALIZED); - - for (devfn = 0; devfn < ARRAY_SIZE(bus->devices); devfn++) { - int slot = PCI_SLOT(devfn); - int adr = slot << 16 | PCI_FUNC(devfn); - - if (is_devfn_ignored_hotplug(devfn, bus)) { - continue; - } - - if (bus->devices[devfn]) { - dev = aml_scope("S%.02X", devfn); - } else { - dev = aml_device("S%.02X", devfn); - aml_append(dev, aml_name_decl("_ADR", aml_int(adr))); - } - - /* - * Can't declare _SUN here for every device as it changes 'slot' - * enumeration order in linux kernel, so use another variable for it - */ - aml_append(dev, aml_name_decl("ASUN", aml_int(slot))); - aml_append(dev, aml_pci_device_dsm()); - - aml_append(dev, aml_name_decl("_SUN", aml_int(slot))); - /* add _EJ0 to make slot hotpluggable */ - method = aml_method("_EJ0", 1, AML_NOTSERIALIZED); - aml_append(method, - aml_call2("PCEJ", aml_name("BSEL"), aml_name("_SUN")) - ); - aml_append(dev, method); - - build_append_pcihp_notify_entry(notify_method, slot); - - /* device descriptor has been composed, add it into parent context */ - aml_append(parent_scope, dev); - } - aml_append(parent_scope, notify_method); -} - -void build_append_pci_bus_devices(Aml *parent_scope, PCIBus *bus) -{ - int devfn; - Aml *dev; - - for (devfn = 0; devfn < ARRAY_SIZE(bus->devices); devfn++) { - /* ACPI spec: 1.0b: Table 6-2 _ADR Object Bus Types, PCI type */ - int adr = PCI_SLOT(devfn) << 16 | PCI_FUNC(devfn); - PCIDevice *pdev = bus->devices[devfn]; - - if (!pdev || is_devfn_ignored_generic(devfn, bus)) { - continue; - } - - /* start to compose PCI device descriptor */ - dev = aml_device("S%.02X", devfn); - aml_append(dev, aml_name_decl("_ADR", aml_int(adr))); - - call_dev_aml_func(DEVICE(bus->devices[devfn]), dev); - /* add _DSM if device has acpi-index set */ - if (pdev->acpi_index && - !object_property_get_bool(OBJECT(pdev), "hotpluggable", - &error_abort)) { - aml_append(dev, aml_pci_static_endpoint_dsm(pdev)); - } - - /* device descriptor has been composed, add it into parent context */ - aml_append(parent_scope, dev); - } -} - -static bool build_append_notification_callback(Aml *parent_scope, - const PCIBus *bus) -{ - Aml *method; - PCIBus *sec; - QObject *bsel; - int nr_notifiers = 0; - GQueue *pcnt_bus_list = g_queue_new(); - - QLIST_FOREACH(sec, &bus->child, sibling) { - Aml *br_scope = aml_scope("S%.02X", sec->parent_dev->devfn); - if (pci_bus_is_root(sec)) { - continue; - } - nr_notifiers = nr_notifiers + - build_append_notification_callback(br_scope, sec); - /* - * add new child scope to parent - * and keep track of bus that have PCNT, - * bus list is used later to call children PCNTs from this level PCNT - */ - if (nr_notifiers) { - g_queue_push_tail(pcnt_bus_list, sec); - aml_append(parent_scope, br_scope); - } - } - - /* - * Append PCNT method to notify about events on local and child buses. - * ps: hostbridge might not have hotplug (bsel) enabled but might have - * child bridges that do have bsel. - */ - method = aml_method("PCNT", 0, AML_NOTSERIALIZED); - - /* If bus supports hotplug select it and notify about local events */ - bsel = object_property_get_qobject(OBJECT(bus), ACPI_PCIHP_PROP_BSEL, NULL); - if (bsel) { - uint64_t bsel_val = qnum_get_uint(qobject_to(QNum, bsel)); - - aml_append(method, aml_store(aml_int(bsel_val), aml_name("BNUM"))); - aml_append(method, aml_call2("DVNT", aml_name("PCIU"), - aml_int(1))); /* Device Check */ - aml_append(method, aml_call2("DVNT", aml_name("PCID"), - aml_int(3))); /* Eject Request */ - nr_notifiers++; - } - - /* Notify about child bus events in any case */ - while ((sec = g_queue_pop_head(pcnt_bus_list))) { - aml_append(method, aml_name("^S%.02X.PCNT", sec->parent_dev->devfn)); - } - - aml_append(parent_scope, method); - qobject_unref(bsel); - g_queue_free(pcnt_bus_list); - return !!nr_notifiers; -} - -static Aml *aml_pci_pdsm(void) -{ - Aml *method, *ifctx, *ifctx1; - Aml *ret = aml_local(0); - Aml *caps = aml_local(1); - Aml *acpi_index = aml_local(2); - Aml *zero = aml_int(0); - Aml *one = aml_int(1); - Aml *not_supp = aml_int(0xFFFFFFFF); - Aml *func = aml_arg(2); - Aml *params = aml_arg(4); - Aml *bnum = aml_derefof(aml_index(params, aml_int(0))); - Aml *sunum = aml_derefof(aml_index(params, aml_int(1))); - - method = aml_method("PDSM", 5, AML_SERIALIZED); - - /* get supported functions */ - ifctx = aml_if(aml_equal(func, zero)); - { - build_append_pci_dsm_func0_common(ifctx, ret); - - aml_append(ifctx, aml_store(zero, caps)); - aml_append(ifctx, - aml_store(aml_call2("AIDX", bnum, sunum), acpi_index)); - /* - * advertise function 7 if device has acpi-index - * acpi_index values: - * 0: not present (default value) - * FFFFFFFF: not supported (old QEMU without PIDX reg) - * other: device's acpi-index - */ - ifctx1 = aml_if(aml_lnot( - aml_or(aml_equal(acpi_index, zero), - aml_equal(acpi_index, not_supp), NULL) - )); - { - /* have supported functions */ - aml_append(ifctx1, aml_or(caps, one, caps)); - /* support for function 7 */ - aml_append(ifctx1, - aml_or(caps, aml_shiftleft(one, aml_int(7)), caps)); - } - aml_append(ifctx, ifctx1); - - aml_append(ifctx, aml_store(caps, aml_index(ret, zero))); - aml_append(ifctx, aml_return(ret)); - } - aml_append(method, ifctx); - - /* handle specific functions requests */ - /* - * PCI Firmware Specification 3.1 - * 4.6.7. _DSM for Naming a PCI or PCI Express Device Under - * Operating Systems - */ - ifctx = aml_if(aml_equal(func, aml_int(7))); - { - Aml *pkg = aml_package(2); - - aml_append(ifctx, aml_store(aml_call2("AIDX", bnum, sunum), acpi_index)); - aml_append(ifctx, aml_store(pkg, ret)); - /* - * Windows calls func=7 without checking if it's available, - * as workaround Microsoft has suggested to return invalid for func7 - * Package, so return 2 elements package but only initialize elements - * when acpi_index is supported and leave them uninitialized, which - * leads elements to being Uninitialized ObjectType and should trip - * Windows into discarding result as an unexpected and prevent setting - * bogus 'PCI Label' on the device. - */ - ifctx1 = aml_if(aml_lnot(aml_lor( - aml_equal(acpi_index, zero), aml_equal(acpi_index, not_supp) - ))); - { - aml_append(ifctx1, aml_store(acpi_index, aml_index(ret, zero))); - /* - * optional, if not impl. should return null string - */ - aml_append(ifctx1, aml_store(aml_string("%s", ""), - aml_index(ret, one))); - } - aml_append(ifctx, ifctx1); - - aml_append(ifctx, aml_return(ret)); - } - - aml_append(method, ifctx); - return method; -} - /* * build_prt - Define interrupt routing rules * @@ -1227,112 +828,6 @@ static Aml *build_q35_dram_controller(const AcpiMcfgInfo *mcfg) return dev; } -static void build_x86_acpi_pci_hotplug(Aml *table, uint64_t pcihp_addr) -{ - Aml *scope; - Aml *field; - Aml *method; - - scope = aml_scope("_SB.PCI0"); - - aml_append(scope, - aml_operation_region("PCST", AML_SYSTEM_IO, aml_int(pcihp_addr), 0x08)); - field = aml_field("PCST", AML_DWORD_ACC, AML_NOLOCK, AML_WRITE_AS_ZEROS); - aml_append(field, aml_named_field("PCIU", 32)); - aml_append(field, aml_named_field("PCID", 32)); - aml_append(scope, field); - - aml_append(scope, - aml_operation_region("SEJ", AML_SYSTEM_IO, - aml_int(pcihp_addr + ACPI_PCIHP_SEJ_BASE), 0x04)); - field = aml_field("SEJ", AML_DWORD_ACC, AML_NOLOCK, AML_WRITE_AS_ZEROS); - aml_append(field, aml_named_field("B0EJ", 32)); - aml_append(scope, field); - - aml_append(scope, - aml_operation_region("BNMR", AML_SYSTEM_IO, - aml_int(pcihp_addr + ACPI_PCIHP_BNMR_BASE), 0x08)); - field = aml_field("BNMR", AML_DWORD_ACC, AML_NOLOCK, AML_WRITE_AS_ZEROS); - aml_append(field, aml_named_field("BNUM", 32)); - aml_append(field, aml_named_field("PIDX", 32)); - aml_append(scope, field); - - aml_append(scope, aml_mutex("BLCK", 0)); - - method = aml_method("PCEJ", 2, AML_NOTSERIALIZED); - aml_append(method, aml_acquire(aml_name("BLCK"), 0xFFFF)); - aml_append(method, aml_store(aml_arg(0), aml_name("BNUM"))); - aml_append(method, - aml_store(aml_shiftleft(aml_int(1), aml_arg(1)), aml_name("B0EJ"))); - aml_append(method, aml_release(aml_name("BLCK"))); - aml_append(method, aml_return(aml_int(0))); - aml_append(scope, method); - - method = aml_method("AIDX", 2, AML_NOTSERIALIZED); - aml_append(method, aml_acquire(aml_name("BLCK"), 0xFFFF)); - aml_append(method, aml_store(aml_arg(0), aml_name("BNUM"))); - aml_append(method, - aml_store(aml_shiftleft(aml_int(1), aml_arg(1)), aml_name("PIDX"))); - aml_append(method, aml_store(aml_name("PIDX"), aml_local(0))); - aml_append(method, aml_release(aml_name("BLCK"))); - aml_append(method, aml_return(aml_local(0))); - aml_append(scope, method); - - aml_append(scope, aml_pci_pdsm()); - - aml_append(table, scope); -} - -static Aml *build_q35_osc_method(bool enable_native_pcie_hotplug) -{ - Aml *if_ctx; - Aml *if_ctx2; - Aml *else_ctx; - Aml *method; - Aml *a_cwd1 = aml_name("CDW1"); - Aml *a_ctrl = aml_local(0); - - method = aml_method("_OSC", 4, AML_NOTSERIALIZED); - aml_append(method, aml_create_dword_field(aml_arg(3), aml_int(0), "CDW1")); - - if_ctx = aml_if(aml_equal( - aml_arg(0), aml_touuid("33DB4D5B-1FF7-401C-9657-7441C03DD766"))); - aml_append(if_ctx, aml_create_dword_field(aml_arg(3), aml_int(4), "CDW2")); - aml_append(if_ctx, aml_create_dword_field(aml_arg(3), aml_int(8), "CDW3")); - - aml_append(if_ctx, aml_store(aml_name("CDW3"), a_ctrl)); - - /* - * Always allow native PME, AER (no dependencies) - * Allow SHPC (PCI bridges can have SHPC controller) - * Disable PCIe Native Hot-plug if ACPI PCI Hot-plug is enabled. - */ - aml_append(if_ctx, aml_and(a_ctrl, - aml_int(0x1E | (enable_native_pcie_hotplug ? 0x1 : 0x0)), a_ctrl)); - - if_ctx2 = aml_if(aml_lnot(aml_equal(aml_arg(1), aml_int(1)))); - /* Unknown revision */ - aml_append(if_ctx2, aml_or(a_cwd1, aml_int(0x08), a_cwd1)); - aml_append(if_ctx, if_ctx2); - - if_ctx2 = aml_if(aml_lnot(aml_equal(aml_name("CDW3"), a_ctrl))); - /* Capabilities bits were masked */ - aml_append(if_ctx2, aml_or(a_cwd1, aml_int(0x10), a_cwd1)); - aml_append(if_ctx, if_ctx2); - - /* Update DWORD3 in the buffer */ - aml_append(if_ctx, aml_store(a_ctrl, aml_name("CDW3"))); - aml_append(method, if_ctx); - - else_ctx = aml_else(); - /* Unrecognized UUID */ - aml_append(else_ctx, aml_or(a_cwd1, aml_int(4), a_cwd1)); - aml_append(method, else_ctx); - - aml_append(method, aml_return(aml_arg(3))); - return method; -} - static void build_acpi0017(Aml *table) { Aml *dev, *scope, *method; @@ -1389,12 +884,12 @@ build_dsdt(GArray *table_data, BIOSLinker *linker, dev = aml_device("PCI0"); aml_append(dev, aml_name_decl("_HID", aml_eisaid("PNP0A03"))); aml_append(dev, aml_name_decl("_UID", aml_int(pcmc->pci_root_uid))); - aml_append(dev, aml_pci_edsm()); + aml_append(dev, build_pci_bridge_edsm()); aml_append(sb_scope, dev); aml_append(dsdt, sb_scope); if (pm->pcihp_bridge_en || pm->pcihp_root_en) { - build_x86_acpi_pci_hotplug(dsdt, pm->pcihp_io_base); + build_acpi_pci_hotplug(dsdt, AML_SYSTEM_IO, pm->pcihp_io_base); } build_piix4_pci0_int(dsdt); } else if (q35) { @@ -1403,8 +898,8 @@ build_dsdt(GArray *table_data, BIOSLinker *linker, aml_append(dev, aml_name_decl("_HID", aml_eisaid("PNP0A08"))); aml_append(dev, aml_name_decl("_CID", aml_eisaid("PNP0A03"))); aml_append(dev, aml_name_decl("_UID", aml_int(pcmc->pci_root_uid))); - aml_append(dev, build_q35_osc_method(!pm->pcihp_bridge_en)); - aml_append(dev, aml_pci_edsm()); + aml_append(dev, build_pci_host_bridge_osc_method(!pm->pcihp_bridge_en)); + aml_append(dev, build_pci_bridge_edsm()); aml_append(sb_scope, dev); if (mcfg_valid) { aml_append(sb_scope, build_q35_dram_controller(&mcfg)); @@ -1438,7 +933,7 @@ build_dsdt(GArray *table_data, BIOSLinker *linker, aml_append(dsdt, sb_scope); if (pm->pcihp_bridge_en) { - build_x86_acpi_pci_hotplug(dsdt, pm->pcihp_io_base); + build_acpi_pci_hotplug(dsdt, AML_SYSTEM_IO, pm->pcihp_io_base); } build_q35_pci0_int(dsdt); } @@ -1525,7 +1020,7 @@ build_dsdt(GArray *table_data, BIOSLinker *linker, aml_append(dev, aml_name_decl("_CID", aml_eisaid("PNP0A03"))); /* Expander bridges do not have ACPI PCI Hot-plug enabled */ - aml_append(dev, build_q35_osc_method(true)); + aml_append(dev, build_pci_host_bridge_osc_method(true)); } else { aml_append(dev, aml_name_decl("_HID", aml_eisaid("PNP0A03"))); } @@ -1654,19 +1149,8 @@ build_dsdt(GArray *table_data, BIOSLinker *linker, /* reserve PCIHP resources */ if (pm->pcihp_io_len && (pm->pcihp_bridge_en || pm->pcihp_root_en)) { - dev = aml_device("PHPR"); - aml_append(dev, aml_name_decl("_HID", aml_string("PNP0A06"))); - aml_append(dev, - aml_name_decl("_UID", aml_string("PCI Hotplug resources"))); - /* device present, functioning, decoding, not shown in UI */ - aml_append(dev, aml_name_decl("_STA", aml_int(0xB))); - crs = aml_resource_template(); - aml_append(crs, - aml_io(AML_DECODE16, pm->pcihp_io_base, pm->pcihp_io_base, 1, - pm->pcihp_io_len) - ); - aml_append(dev, aml_name_decl("_CRS", crs)); - aml_append(scope, dev); + build_append_pcihp_resources(scope, + pm->pcihp_io_base, pm->pcihp_io_len); } aml_append(dsdt, scope); diff --git a/hw/i386/acpi-build.h b/hw/i386/acpi-build.h index 275ec05..8ba3c33 100644 --- a/hw/i386/acpi-build.h +++ b/hw/i386/acpi-build.h @@ -5,10 +5,6 @@ extern const struct AcpiGenericAddress x86_nvdimm_acpi_dsmio; -/* PCI Hot-plug registers' base. See docs/specs/acpi_pci_hotplug.rst */ -#define ACPI_PCIHP_SEJ_BASE 0x8 -#define ACPI_PCIHP_BNMR_BASE 0x10 - void acpi_setup(void); Object *acpi_get_i386_pci_host(void); diff --git a/hw/i386/amd_iommu.c b/hw/i386/amd_iommu.c index 963aa24..26be69b 100644 --- a/hw/i386/amd_iommu.c +++ b/hw/i386/amd_iommu.c @@ -123,8 +123,13 @@ static void amdvi_writew(AMDVIState *s, hwaddr addr, uint16_t val) uint16_t romask = lduw_le_p(&s->romask[addr]); uint16_t w1cmask = lduw_le_p(&s->w1cmask[addr]); uint16_t oldval = lduw_le_p(&s->mmior[addr]); + + uint16_t oldval_preserved = oldval & (romask | w1cmask); + uint16_t newval_write = val & ~romask; + uint16_t newval_w1c_set = val & w1cmask; + stw_le_p(&s->mmior[addr], - ((oldval & romask) | (val & ~romask)) & ~(val & w1cmask)); + (oldval_preserved | newval_write) & ~newval_w1c_set); } static void amdvi_writel(AMDVIState *s, hwaddr addr, uint32_t val) @@ -132,23 +137,33 @@ static void amdvi_writel(AMDVIState *s, hwaddr addr, uint32_t val) uint32_t romask = ldl_le_p(&s->romask[addr]); uint32_t w1cmask = ldl_le_p(&s->w1cmask[addr]); uint32_t oldval = ldl_le_p(&s->mmior[addr]); + + uint32_t oldval_preserved = oldval & (romask | w1cmask); + uint32_t newval_write = val & ~romask; + uint32_t newval_w1c_set = val & w1cmask; + stl_le_p(&s->mmior[addr], - ((oldval & romask) | (val & ~romask)) & ~(val & w1cmask)); + (oldval_preserved | newval_write) & ~newval_w1c_set); } static void amdvi_writeq(AMDVIState *s, hwaddr addr, uint64_t val) { uint64_t romask = ldq_le_p(&s->romask[addr]); uint64_t w1cmask = ldq_le_p(&s->w1cmask[addr]); - uint32_t oldval = ldq_le_p(&s->mmior[addr]); + uint64_t oldval = ldq_le_p(&s->mmior[addr]); + + uint64_t oldval_preserved = oldval & (romask | w1cmask); + uint64_t newval_write = val & ~romask; + uint64_t newval_w1c_set = val & w1cmask; + stq_le_p(&s->mmior[addr], - ((oldval & romask) | (val & ~romask)) & ~(val & w1cmask)); + (oldval_preserved | newval_write) & ~newval_w1c_set); } -/* OR a 64-bit register with a 64-bit value */ +/* AND a 64-bit register with a 64-bit value */ static bool amdvi_test_mask(AMDVIState *s, hwaddr addr, uint64_t val) { - return amdvi_readq(s, addr) | val; + return amdvi_readq(s, addr) & val; } /* OR a 64-bit register with a 64-bit value storing result in the register */ @@ -177,19 +192,31 @@ static void amdvi_generate_msi_interrupt(AMDVIState *s) } } +static uint32_t get_next_eventlog_entry(AMDVIState *s) +{ + uint32_t evtlog_size = s->evtlog_len * AMDVI_EVENT_LEN; + return (s->evtlog_tail + AMDVI_EVENT_LEN) % evtlog_size; +} + static void amdvi_log_event(AMDVIState *s, uint64_t *evt) { + uint32_t evtlog_tail_next; + /* event logging not enabled */ if (!s->evtlog_enabled || amdvi_test_mask(s, AMDVI_MMIO_STATUS, AMDVI_MMIO_STATUS_EVT_OVF)) { return; } + evtlog_tail_next = get_next_eventlog_entry(s); + /* event log buffer full */ - if (s->evtlog_tail >= s->evtlog_len) { - amdvi_assign_orq(s, AMDVI_MMIO_STATUS, AMDVI_MMIO_STATUS_EVT_OVF); - /* generate interrupt */ - amdvi_generate_msi_interrupt(s); + if (evtlog_tail_next == s->evtlog_head) { + /* generate overflow interrupt */ + if (s->evtlog_intr) { + amdvi_assign_orq(s, AMDVI_MMIO_STATUS, AMDVI_MMIO_STATUS_EVT_OVF); + amdvi_generate_msi_interrupt(s); + } return; } @@ -198,9 +225,13 @@ static void amdvi_log_event(AMDVIState *s, uint64_t *evt) trace_amdvi_evntlog_fail(s->evtlog, s->evtlog_tail); } - s->evtlog_tail += AMDVI_EVENT_LEN; - amdvi_assign_orq(s, AMDVI_MMIO_STATUS, AMDVI_MMIO_STATUS_COMP_INT); - amdvi_generate_msi_interrupt(s); + s->evtlog_tail = evtlog_tail_next; + amdvi_writeq_raw(s, AMDVI_MMIO_EVENT_TAIL, s->evtlog_tail); + + if (s->evtlog_intr) { + amdvi_assign_orq(s, AMDVI_MMIO_STATUS, AMDVI_MMIO_STATUS_EVENT_INT); + amdvi_generate_msi_interrupt(s); + } } static void amdvi_setevent_bits(uint64_t *buffer, uint64_t value, int start, @@ -508,7 +539,7 @@ static void amdvi_inval_inttable(AMDVIState *s, uint64_t *cmd) static void iommu_inval_iotlb(AMDVIState *s, uint64_t *cmd) { - uint16_t devid = extract64(cmd[0], 0, 16); + uint16_t devid = cpu_to_le16(extract64(cmd[0], 0, 16)); if (extract64(cmd[1], 1, 1) || extract64(cmd[1], 3, 1) || extract64(cmd[1], 6, 6)) { amdvi_log_illegalcom_error(s, extract64(cmd[0], 60, 4), @@ -521,7 +552,7 @@ static void iommu_inval_iotlb(AMDVIState *s, uint64_t *cmd) &devid); } else { amdvi_iotlb_remove_page(s, cpu_to_le64(extract64(cmd[1], 12, 52)) << 12, - cpu_to_le16(extract64(cmd[1], 0, 16))); + devid); } trace_amdvi_iotlb_inval(); } @@ -592,18 +623,31 @@ static void amdvi_cmdbuf_run(AMDVIState *s) } } -static void amdvi_mmio_trace(hwaddr addr, unsigned size) +static inline uint8_t amdvi_mmio_get_index(hwaddr addr) { uint8_t index = (addr & ~0x2000) / 8; if ((addr & 0x2000)) { /* high table */ index = index >= AMDVI_MMIO_REGS_HIGH ? AMDVI_MMIO_REGS_HIGH : index; - trace_amdvi_mmio_read(amdvi_mmio_high[index], addr, size, addr & ~0x07); } else { index = index >= AMDVI_MMIO_REGS_LOW ? AMDVI_MMIO_REGS_LOW : index; - trace_amdvi_mmio_read(amdvi_mmio_low[index], addr, size, addr & ~0x07); } + + return index; +} + +static void amdvi_mmio_trace_read(hwaddr addr, unsigned size) +{ + uint8_t index = amdvi_mmio_get_index(addr); + trace_amdvi_mmio_read(amdvi_mmio_low[index], addr, size, addr & ~0x07); +} + +static void amdvi_mmio_trace_write(hwaddr addr, unsigned size, uint64_t val) +{ + uint8_t index = amdvi_mmio_get_index(addr); + trace_amdvi_mmio_write(amdvi_mmio_low[index], addr, size, val, + addr & ~0x07); } static uint64_t amdvi_mmio_read(void *opaque, hwaddr addr, unsigned size) @@ -623,7 +667,7 @@ static uint64_t amdvi_mmio_read(void *opaque, hwaddr addr, unsigned size) } else if (size == 8) { val = amdvi_readq(s, addr); } - amdvi_mmio_trace(addr, size); + amdvi_mmio_trace_read(addr, size); return val; } @@ -633,7 +677,6 @@ static void amdvi_handle_control_write(AMDVIState *s) unsigned long control = amdvi_readq(s, AMDVI_MMIO_CONTROL); s->enabled = !!(control & AMDVI_MMIO_CONTROL_AMDVIEN); - s->ats_enabled = !!(control & AMDVI_MMIO_CONTROL_HTTUNEN); s->evtlog_enabled = s->enabled && !!(control & AMDVI_MMIO_CONTROL_EVENTLOGEN); @@ -665,8 +708,8 @@ static inline void amdvi_handle_devtab_write(AMDVIState *s) uint64_t val = amdvi_readq(s, AMDVI_MMIO_DEVICE_TABLE); s->devtab = (val & AMDVI_MMIO_DEVTAB_BASE_MASK); - /* set device table length */ - s->devtab_len = ((val & AMDVI_MMIO_DEVTAB_SIZE_MASK) + 1 * + /* set device table length (i.e. number of entries table can hold) */ + s->devtab_len = (((val & AMDVI_MMIO_DEVTAB_SIZE_MASK) + 1) * (AMDVI_MMIO_DEVTAB_SIZE_UNIT / AMDVI_MMIO_DEVTAB_ENTRY_SIZE)); } @@ -704,9 +747,19 @@ static inline void amdvi_handle_excllim_write(AMDVIState *s) static inline void amdvi_handle_evtbase_write(AMDVIState *s) { uint64_t val = amdvi_readq(s, AMDVI_MMIO_EVENT_BASE); + + if (amdvi_readq(s, AMDVI_MMIO_STATUS) & AMDVI_MMIO_STATUS_EVENT_INT) + /* Do not reset if eventlog interrupt bit is set*/ + return; + s->evtlog = val & AMDVI_MMIO_EVTLOG_BASE_MASK; s->evtlog_len = 1UL << (amdvi_readq(s, AMDVI_MMIO_EVTLOG_SIZE_BYTE) & AMDVI_MMIO_EVTLOG_SIZE_MASK); + + /* clear tail and head pointer to 0 when event base is updated */ + s->evtlog_tail = s->evtlog_head = 0; + amdvi_writeq_raw(s, AMDVI_MMIO_EVENT_HEAD, s->evtlog_head); + amdvi_writeq_raw(s, AMDVI_MMIO_EVENT_TAIL, s->evtlog_tail); } static inline void amdvi_handle_evttail_write(AMDVIState *s) @@ -770,7 +823,7 @@ static void amdvi_mmio_write(void *opaque, hwaddr addr, uint64_t val, return; } - amdvi_mmio_trace(addr, size); + amdvi_mmio_trace_write(addr, size, val); switch (addr & ~0x07) { case AMDVI_MMIO_CONTROL: amdvi_mmio_reg_write(s, size, val, addr); @@ -835,6 +888,9 @@ static void amdvi_mmio_write(void *opaque, hwaddr addr, uint64_t val, amdvi_mmio_reg_write(s, size, val, addr); amdvi_handle_pprtail_write(s); break; + case AMDVI_MMIO_STATUS: + amdvi_mmio_reg_write(s, size, val, addr); + break; } } @@ -848,9 +904,10 @@ static inline uint64_t amdvi_get_perms(uint64_t entry) static bool amdvi_validate_dte(AMDVIState *s, uint16_t devid, uint64_t *dte) { - if ((dte[0] & AMDVI_DTE_LOWER_QUAD_RESERVED) - || (dte[1] & AMDVI_DTE_MIDDLE_QUAD_RESERVED) - || (dte[2] & AMDVI_DTE_UPPER_QUAD_RESERVED) || dte[3]) { + if ((dte[0] & AMDVI_DTE_QUAD0_RESERVED) || + (dte[1] & AMDVI_DTE_QUAD1_RESERVED) || + (dte[2] & AMDVI_DTE_QUAD2_RESERVED) || + (dte[3] & AMDVI_DTE_QUAD3_RESERVED)) { amdvi_log_illegaldevtab_error(s, devid, s->devtab + devid * AMDVI_DEVTAB_ENTRY_SIZE, 0); @@ -1541,7 +1598,6 @@ static void amdvi_init(AMDVIState *s) s->excl_allow = false; s->mmio_enabled = false; s->enabled = false; - s->ats_enabled = false; s->cmdbuf_enabled = false; /* reset MMIO */ @@ -1612,7 +1668,8 @@ static const VMStateDescription vmstate_amdvi_sysbus_migratable = { /* Updated in amdvi_handle_control_write() */ VMSTATE_BOOL(enabled, AMDVIState), VMSTATE_BOOL(ga_enabled, AMDVIState), - VMSTATE_BOOL(ats_enabled, AMDVIState), + /* bool ats_enabled is obsolete */ + VMSTATE_UNUSED(1), /* was ats_enabled */ VMSTATE_BOOL(cmdbuf_enabled, AMDVIState), VMSTATE_BOOL(completion_wait_intr, AMDVIState), VMSTATE_BOOL(evtlog_enabled, AMDVIState), @@ -1685,9 +1742,6 @@ static void amdvi_sysbus_realize(DeviceState *dev, Error **errp) s->iotlb = g_hash_table_new_full(amdvi_uint64_hash, amdvi_uint64_equal, g_free, g_free); - /* Pseudo address space under root PCI bus. */ - x86ms->ioapic_as = amdvi_host_dma_iommu(bus, s, AMDVI_IOAPIC_SB_DEVID); - /* set up MMIO */ memory_region_init_io(&s->mr_mmio, OBJECT(s), &mmio_mem_ops, s, "amdvi-mmio", AMDVI_MMIO_SIZE); @@ -1710,6 +1764,9 @@ static void amdvi_sysbus_realize(DeviceState *dev, Error **errp) memory_region_add_subregion_overlap(&s->mr_sys, AMDVI_INT_ADDR_FIRST, &s->mr_ir, 1); + /* Pseudo address space under root PCI bus. */ + x86ms->ioapic_as = amdvi_host_dma_iommu(bus, s, AMDVI_IOAPIC_SB_DEVID); + if (kvm_enabled() && x86ms->apic_id_limit > 255 && !s->xtsup) { error_report("AMD IOMMU with x2APIC configuration requires xtsup=on"); exit(EXIT_FAILURE); diff --git a/hw/i386/amd_iommu.h b/hw/i386/amd_iommu.h index 5672bde..2476296 100644 --- a/hw/i386/amd_iommu.h +++ b/hw/i386/amd_iommu.h @@ -25,6 +25,8 @@ #include "hw/i386/x86-iommu.h" #include "qom/object.h" +#define GENMASK64(h, l) (((~0ULL) >> (63 - (h) + (l))) << (l)) + /* Capability registers */ #define AMDVI_CAPAB_BAR_LOW 0x04 #define AMDVI_CAPAB_BAR_HIGH 0x08 @@ -66,34 +68,34 @@ #define AMDVI_MMIO_SIZE 0x4000 -#define AMDVI_MMIO_DEVTAB_SIZE_MASK ((1ULL << 12) - 1) -#define AMDVI_MMIO_DEVTAB_BASE_MASK (((1ULL << 52) - 1) & ~ \ - AMDVI_MMIO_DEVTAB_SIZE_MASK) +#define AMDVI_MMIO_DEVTAB_SIZE_MASK GENMASK64(8, 0) +#define AMDVI_MMIO_DEVTAB_BASE_MASK GENMASK64(51, 12) + #define AMDVI_MMIO_DEVTAB_ENTRY_SIZE 32 #define AMDVI_MMIO_DEVTAB_SIZE_UNIT 4096 /* some of this are similar but just for readability */ #define AMDVI_MMIO_CMDBUF_SIZE_BYTE (AMDVI_MMIO_COMMAND_BASE + 7) #define AMDVI_MMIO_CMDBUF_SIZE_MASK 0x0f -#define AMDVI_MMIO_CMDBUF_BASE_MASK AMDVI_MMIO_DEVTAB_BASE_MASK -#define AMDVI_MMIO_CMDBUF_HEAD_MASK (((1ULL << 19) - 1) & ~0x0f) -#define AMDVI_MMIO_CMDBUF_TAIL_MASK AMDVI_MMIO_EVTLOG_HEAD_MASK +#define AMDVI_MMIO_CMDBUF_BASE_MASK GENMASK64(51, 12) +#define AMDVI_MMIO_CMDBUF_HEAD_MASK GENMASK64(18, 4) +#define AMDVI_MMIO_CMDBUF_TAIL_MASK GENMASK64(18, 4) #define AMDVI_MMIO_EVTLOG_SIZE_BYTE (AMDVI_MMIO_EVENT_BASE + 7) -#define AMDVI_MMIO_EVTLOG_SIZE_MASK AMDVI_MMIO_CMDBUF_SIZE_MASK -#define AMDVI_MMIO_EVTLOG_BASE_MASK AMDVI_MMIO_CMDBUF_BASE_MASK -#define AMDVI_MMIO_EVTLOG_HEAD_MASK (((1ULL << 19) - 1) & ~0x0f) -#define AMDVI_MMIO_EVTLOG_TAIL_MASK AMDVI_MMIO_EVTLOG_HEAD_MASK +#define AMDVI_MMIO_EVTLOG_SIZE_MASK 0x0f +#define AMDVI_MMIO_EVTLOG_BASE_MASK GENMASK64(51, 12) +#define AMDVI_MMIO_EVTLOG_HEAD_MASK GENMASK64(18, 4) +#define AMDVI_MMIO_EVTLOG_TAIL_MASK GENMASK64(18, 4) -#define AMDVI_MMIO_PPRLOG_SIZE_BYTE (AMDVI_MMIO_EVENT_BASE + 7) -#define AMDVI_MMIO_PPRLOG_HEAD_MASK AMDVI_MMIO_EVTLOG_HEAD_MASK -#define AMDVI_MMIO_PPRLOG_TAIL_MASK AMDVI_MMIO_EVTLOG_HEAD_MASK -#define AMDVI_MMIO_PPRLOG_BASE_MASK AMDVI_MMIO_EVTLOG_BASE_MASK -#define AMDVI_MMIO_PPRLOG_SIZE_MASK AMDVI_MMIO_EVTLOG_SIZE_MASK +#define AMDVI_MMIO_PPRLOG_SIZE_BYTE (AMDVI_MMIO_PPR_BASE + 7) +#define AMDVI_MMIO_PPRLOG_SIZE_MASK 0x0f +#define AMDVI_MMIO_PPRLOG_BASE_MASK GENMASK64(51, 12) +#define AMDVI_MMIO_PPRLOG_HEAD_MASK GENMASK64(18, 4) +#define AMDVI_MMIO_PPRLOG_TAIL_MASK GENMASK64(18, 4) #define AMDVI_MMIO_EXCL_ENABLED_MASK (1ULL << 0) #define AMDVI_MMIO_EXCL_ALLOW_MASK (1ULL << 1) -#define AMDVI_MMIO_EXCL_LIMIT_MASK AMDVI_MMIO_DEVTAB_BASE_MASK +#define AMDVI_MMIO_EXCL_LIMIT_MASK GENMASK64(51, 12) #define AMDVI_MMIO_EXCL_LIMIT_LOW 0xfff /* mmio control register flags */ @@ -109,6 +111,7 @@ #define AMDVI_MMIO_STATUS_CMDBUF_RUN (1 << 4) #define AMDVI_MMIO_STATUS_EVT_RUN (1 << 3) #define AMDVI_MMIO_STATUS_COMP_INT (1 << 2) +#define AMDVI_MMIO_STATUS_EVENT_INT (1 << 1) #define AMDVI_MMIO_STATUS_EVT_OVF (1 << 0) #define AMDVI_CMDBUF_ID_BYTE 0x07 @@ -130,14 +133,14 @@ #define AMDVI_DEV_TRANSLATION_VALID (1ULL << 1) #define AMDVI_DEV_MODE_MASK 0x7 #define AMDVI_DEV_MODE_RSHIFT 9 -#define AMDVI_DEV_PT_ROOT_MASK 0xffffffffff000 +#define AMDVI_DEV_PT_ROOT_MASK GENMASK64(51, 12) #define AMDVI_DEV_PT_ROOT_RSHIFT 12 #define AMDVI_DEV_PERM_SHIFT 61 #define AMDVI_DEV_PERM_READ (1ULL << 61) #define AMDVI_DEV_PERM_WRITE (1ULL << 62) /* Device table entry bits 64:127 */ -#define AMDVI_DEV_DOMID_ID_MASK ((1ULL << 16) - 1) +#define AMDVI_DEV_DOMID_ID_MASK GENMASK64(15, 0) /* Event codes and flags, as stored in the info field */ #define AMDVI_EVENT_ILLEGAL_DEVTAB_ENTRY (0x1U << 12) @@ -162,9 +165,10 @@ #define AMDVI_FEATURE_PC (1ULL << 9) /* Perf counters */ /* reserved DTE bits */ -#define AMDVI_DTE_LOWER_QUAD_RESERVED 0x80300000000000fc -#define AMDVI_DTE_MIDDLE_QUAD_RESERVED 0x0000000000000100 -#define AMDVI_DTE_UPPER_QUAD_RESERVED 0x08f0000000000000 +#define AMDVI_DTE_QUAD0_RESERVED (GENMASK64(6, 2) | GENMASK64(63, 63)) +#define AMDVI_DTE_QUAD1_RESERVED 0 +#define AMDVI_DTE_QUAD2_RESERVED GENMASK64(53, 52) +#define AMDVI_DTE_QUAD3_RESERVED (GENMASK64(14, 0) | GENMASK64(53, 48)) /* AMDVI paging mode */ #define AMDVI_GATS_MODE (2ULL << 12) @@ -194,16 +198,12 @@ #define AMDVI_PAGE_SIZE (1ULL << AMDVI_PAGE_SHIFT) #define AMDVI_PAGE_SHIFT_4K 12 -#define AMDVI_PAGE_MASK_4K (~((1ULL << AMDVI_PAGE_SHIFT_4K) - 1)) - -#define AMDVI_MAX_VA_ADDR (48UL << 5) -#define AMDVI_MAX_PH_ADDR (40UL << 8) -#define AMDVI_MAX_GVA_ADDR (48UL << 15) +#define AMDVI_PAGE_MASK_4K GENMASK64(63, 12) -/* Completion Wait data size */ -#define AMDVI_COMPLETION_DATA_SIZE 8 +#define AMDVI_MAX_GVA_ADDR (2UL << 5) +#define AMDVI_MAX_PH_ADDR (40UL << 8) +#define AMDVI_MAX_VA_ADDR (48UL << 15) -#define AMDVI_COMMAND_SIZE 16 /* Completion Wait data size */ #define AMDVI_COMPLETION_DATA_SIZE 8 @@ -228,7 +228,7 @@ #define AMDVI_IR_INTCTL_PASS 1 #define AMDVI_IR_INTCTL_REMAP 2 -#define AMDVI_IR_PHYS_ADDR_MASK (((1ULL << 45) - 1) << 6) +#define AMDVI_IR_PHYS_ADDR_MASK GENMASK64(51, 6) /* MSI data 10:0 bits (section 2.2.5.1 Fig 14) */ #define AMDVI_IRTE_OFFSET 0x7ff @@ -323,7 +323,6 @@ struct AMDVIState { uint64_t mmio_addr; bool enabled; /* IOMMU enabled */ - bool ats_enabled; /* address translation enabled */ bool cmdbuf_enabled; /* command buffer enabled */ bool evtlog_enabled; /* event log enabled */ bool excl_enabled; diff --git a/hw/i386/intel_iommu.c b/hw/i386/intel_iommu.c index 69d72ad..83c5e44 100644 --- a/hw/i386/intel_iommu.c +++ b/hw/i386/intel_iommu.c @@ -1987,9 +1987,9 @@ static int vtd_iova_to_flpte(IntelIOMMUState *s, VTDContextEntry *ce, uint32_t pasid) { dma_addr_t addr = vtd_get_iova_pgtbl_base(s, ce, pasid); - uint32_t level = vtd_get_iova_level(s, ce, pasid); uint32_t offset; uint64_t flpte, flag_ad = VTD_FL_A; + *flpte_level = vtd_get_iova_level(s, ce, pasid); if (!vtd_iova_fl_check_canonical(s, iova, ce, pasid)) { error_report_once("%s: detected non canonical IOVA (iova=0x%" PRIx64 "," @@ -1998,11 +1998,11 @@ static int vtd_iova_to_flpte(IntelIOMMUState *s, VTDContextEntry *ce, } while (true) { - offset = vtd_iova_level_offset(iova, level); + offset = vtd_iova_level_offset(iova, *flpte_level); flpte = vtd_get_pte(addr, offset); if (flpte == (uint64_t)-1) { - if (level == vtd_get_iova_level(s, ce, pasid)) { + if (*flpte_level == vtd_get_iova_level(s, ce, pasid)) { /* Invalid programming of pasid-entry */ return -VTD_FR_PASID_ENTRY_FSPTPTR_INV; } else { @@ -2028,15 +2028,15 @@ static int vtd_iova_to_flpte(IntelIOMMUState *s, VTDContextEntry *ce, if (is_write && !(flpte & VTD_FL_RW)) { return -VTD_FR_SM_WRITE; } - if (vtd_flpte_nonzero_rsvd(flpte, level)) { + if (vtd_flpte_nonzero_rsvd(flpte, *flpte_level)) { error_report_once("%s: detected flpte reserved non-zero " "iova=0x%" PRIx64 ", level=0x%" PRIx32 "flpte=0x%" PRIx64 ", pasid=0x%" PRIX32 ")", - __func__, iova, level, flpte, pasid); + __func__, iova, *flpte_level, flpte, pasid); return -VTD_FR_FS_PAGING_ENTRY_RSVD; } - if (vtd_is_last_pte(flpte, level) && is_write) { + if (vtd_is_last_pte(flpte, *flpte_level) && is_write) { flag_ad |= VTD_FL_D; } @@ -2044,14 +2044,13 @@ static int vtd_iova_to_flpte(IntelIOMMUState *s, VTDContextEntry *ce, return -VTD_FR_FS_BIT_UPDATE_FAILED; } - if (vtd_is_last_pte(flpte, level)) { + if (vtd_is_last_pte(flpte, *flpte_level)) { *flptep = flpte; - *flpte_level = level; return 0; } addr = vtd_get_pte_addr(flpte, aw_bits); - level--; + (*flpte_level)--; } } @@ -2092,7 +2091,8 @@ static bool vtd_do_iommu_translate(VTDAddressSpace *vtd_as, PCIBus *bus, uint8_t bus_num = pci_bus_num(bus); VTDContextCacheEntry *cc_entry; uint64_t pte, page_mask; - uint32_t level, pasid = vtd_as->pasid; + uint32_t level = UINT32_MAX; + uint32_t pasid = vtd_as->pasid; uint16_t source_id = PCI_BUILD_BDF(bus_num, devfn); int ret_fr; bool is_fpd_set = false; @@ -2251,14 +2251,19 @@ out: entry->iova = addr & page_mask; entry->translated_addr = vtd_get_pte_addr(pte, s->aw_bits) & page_mask; entry->addr_mask = ~page_mask; - entry->perm = access_flags; + entry->perm = (is_write ? access_flags : (access_flags & (~IOMMU_WO))); return true; error: vtd_iommu_unlock(s); entry->iova = 0; entry->translated_addr = 0; - entry->addr_mask = 0; + /* + * Set the mask for ATS (the range must be present even when the + * translation fails : PCIe rev 5 10.2.3.5) + */ + entry->addr_mask = (level != UINT32_MAX) ? + (~vtd_pt_level_page_mask(level)) : (~VTD_PAGE_MASK_4K); entry->perm = IOMMU_NONE; return false; } @@ -2503,6 +2508,7 @@ static void vtd_iotlb_page_invalidate_notify(IntelIOMMUState *s, .translated_addr = 0, .addr_mask = size - 1, .perm = IOMMU_NONE, + .pasid = vtd_as->pasid, }, }; memory_region_notify_iommu(&vtd_as->iommu, 0, event); @@ -2822,6 +2828,7 @@ static bool vtd_process_wait_desc(IntelIOMMUState *s, VTDInvDesc *inv_desc) { uint64_t mask[4] = {VTD_INV_DESC_WAIT_RSVD_LO, VTD_INV_DESC_WAIT_RSVD_HI, VTD_INV_DESC_ALL_ONE, VTD_INV_DESC_ALL_ONE}; + bool ret = true; if (!vtd_inv_desc_reserved_check(s, inv_desc, mask, false, __func__, "wait")) { @@ -2833,8 +2840,6 @@ static bool vtd_process_wait_desc(IntelIOMMUState *s, VTDInvDesc *inv_desc) uint32_t status_data = (uint32_t)(inv_desc->lo >> VTD_INV_DESC_WAIT_DATA_SHIFT); - assert(!(inv_desc->lo & VTD_INV_DESC_WAIT_IF)); - /* FIXME: need to be masked with HAW? */ dma_addr_t status_addr = inv_desc->hi; trace_vtd_inv_desc_wait_sw(status_addr, status_data); @@ -2843,18 +2848,22 @@ static bool vtd_process_wait_desc(IntelIOMMUState *s, VTDInvDesc *inv_desc) &status_data, sizeof(status_data), MEMTXATTRS_UNSPECIFIED)) { trace_vtd_inv_desc_wait_write_fail(inv_desc->hi, inv_desc->lo); - return false; + ret = false; } - } else if (inv_desc->lo & VTD_INV_DESC_WAIT_IF) { + } + + if (inv_desc->lo & VTD_INV_DESC_WAIT_IF) { /* Interrupt flag */ vtd_generate_completion_event(s); - } else { + } + + if (!(inv_desc->lo & (VTD_INV_DESC_WAIT_IF | VTD_INV_DESC_WAIT_SW))) { error_report_once("%s: invalid wait desc: hi=%"PRIx64", lo=%"PRIx64 " (unknown type)", __func__, inv_desc->hi, inv_desc->lo); return false; } - return true; + return ret; } static bool vtd_process_context_cache_desc(IntelIOMMUState *s, @@ -3090,6 +3099,7 @@ static void do_invalidate_device_tlb(VTDAddressSpace *vtd_dev_as, event.entry.iova = addr; event.entry.perm = IOMMU_NONE; event.entry.translated_addr = 0; + event.entry.pasid = vtd_dev_as->pasid; memory_region_notify_iommu(&vtd_dev_as->iommu, 0, event); } @@ -3672,6 +3682,7 @@ static IOMMUTLBEntry vtd_iommu_translate(IOMMUMemoryRegion *iommu, hwaddr addr, IOMMUTLBEntry iotlb = { /* We'll fill in the rest later. */ .target_as = &address_space_memory, + .pasid = vtd_as->pasid, }; bool success; @@ -4587,7 +4598,7 @@ static void vtd_cap_init(IntelIOMMUState *s) } if (s->pasid) { - s->ecap |= VTD_ECAP_PASID; + s->ecap |= VTD_ECAP_PASID | VTD_ECAP_PSS; } } @@ -4730,10 +4741,118 @@ static AddressSpace *vtd_host_dma_iommu(PCIBus *bus, void *opaque, int devfn) return &vtd_as->as; } +static IOMMUTLBEntry vtd_iommu_ats_do_translate(IOMMUMemoryRegion *iommu, + hwaddr addr, + IOMMUAccessFlags flags) +{ + IOMMUTLBEntry entry; + VTDAddressSpace *vtd_as = container_of(iommu, VTDAddressSpace, iommu); + + if (vtd_is_interrupt_addr(addr)) { + vtd_report_ir_illegal_access(vtd_as, addr, flags & IOMMU_WO); + entry.target_as = &address_space_memory; + entry.iova = 0; + entry.translated_addr = 0; + entry.addr_mask = ~VTD_PAGE_MASK_4K; + entry.perm = IOMMU_NONE; + entry.pasid = PCI_NO_PASID; + } else { + entry = vtd_iommu_translate(iommu, addr, flags, 0); + } + + return entry; +} + +static ssize_t vtd_ats_request_translation(PCIBus *bus, void *opaque, + int devfn, uint32_t pasid, + bool priv_req, bool exec_req, + hwaddr addr, size_t length, + bool no_write, IOMMUTLBEntry *result, + size_t result_length, + uint32_t *err_count) +{ + IntelIOMMUState *s = opaque; + VTDAddressSpace *vtd_as; + IOMMUAccessFlags flags = IOMMU_ACCESS_FLAG_FULL(true, !no_write, exec_req, + priv_req, false, false); + ssize_t res_index = 0; + hwaddr target_address = addr + length; + IOMMUTLBEntry entry; + + vtd_as = vtd_find_add_as(s, bus, devfn, pasid); + *err_count = 0; + + while ((addr < target_address) && (res_index < result_length)) { + entry = vtd_iommu_ats_do_translate(&vtd_as->iommu, addr, flags); + entry.perm &= ~IOMMU_GLOBAL; /* Spec 4.1.2: Global Mapping never set */ + + if ((entry.perm & flags) != flags) { + *err_count += 1; /* Less than expected */ + } + + result[res_index] = entry; + res_index += 1; + addr = (addr & (~entry.addr_mask)) + (entry.addr_mask + 1); + } + + /* Buffer too small */ + if (addr < target_address) { + return -ENOMEM; + } + + return res_index; +} + +static void vtd_init_iotlb_notifier(PCIBus *bus, void *opaque, int devfn, + IOMMUNotifier *n, IOMMUNotify fn, + void *user_opaque) +{ + n->opaque = user_opaque; + iommu_notifier_init(n, fn, IOMMU_NOTIFIER_DEVIOTLB_EVENTS, 0, + HWADDR_MAX, 0); +} + +static void vtd_get_iotlb_info(void *opaque, uint8_t *addr_width, + uint32_t *min_page_size) +{ + IntelIOMMUState *s = opaque; + + *addr_width = s->aw_bits; + *min_page_size = VTD_PAGE_SIZE; +} + +static void vtd_register_iotlb_notifier(PCIBus *bus, void *opaque, + int devfn, uint32_t pasid, + IOMMUNotifier *n) +{ + IntelIOMMUState *s = opaque; + VTDAddressSpace *vtd_as; + + vtd_as = vtd_find_add_as(s, bus, devfn, pasid); + memory_region_register_iommu_notifier(MEMORY_REGION(&vtd_as->iommu), n, + &error_fatal); +} + +static void vtd_unregister_iotlb_notifier(PCIBus *bus, void *opaque, + int devfn, uint32_t pasid, + IOMMUNotifier *n) +{ + IntelIOMMUState *s = opaque; + VTDAddressSpace *vtd_as; + + vtd_as = vtd_find_add_as(s, bus, devfn, pasid); + memory_region_unregister_iommu_notifier(MEMORY_REGION(&vtd_as->iommu), n); +} + static PCIIOMMUOps vtd_iommu_ops = { .get_address_space = vtd_host_dma_iommu, .set_iommu_device = vtd_dev_set_iommu_device, .unset_iommu_device = vtd_dev_unset_iommu_device, + .get_iotlb_info = vtd_get_iotlb_info, + .init_iotlb_notifier = vtd_init_iotlb_notifier, + .register_iotlb_notifier = vtd_register_iotlb_notifier, + .unregister_iotlb_notifier = vtd_unregister_iotlb_notifier, + .ats_request_translation = vtd_ats_request_translation, }; static bool vtd_decide_config(IntelIOMMUState *s, Error **errp) diff --git a/hw/i386/intel_iommu_internal.h b/hw/i386/intel_iommu_internal.h index e8b211e..360e937 100644 --- a/hw/i386/intel_iommu_internal.h +++ b/hw/i386/intel_iommu_internal.h @@ -192,6 +192,7 @@ #define VTD_ECAP_SC (1ULL << 7) #define VTD_ECAP_MHMV (15ULL << 20) #define VTD_ECAP_SRS (1ULL << 31) +#define VTD_ECAP_PSS (7ULL << 35) /* limit: MemTxAttrs::pid */ #define VTD_ECAP_PASID (1ULL << 40) #define VTD_ECAP_SMTS (1ULL << 43) #define VTD_ECAP_SLTS (1ULL << 46) diff --git a/hw/i386/microvm.c b/hw/i386/microvm.c index e0daf0d..94d22a2 100644 --- a/hw/i386/microvm.c +++ b/hw/i386/microvm.c @@ -49,6 +49,7 @@ #include "hw/acpi/generic_event_device.h" #include "hw/pci-host/gpex.h" #include "hw/usb/xhci.h" +#include "hw/vfio/types.h" #include "elf.h" #include "kvm/kvm_i386.h" @@ -633,6 +634,8 @@ GlobalProperty microvm_properties[] = { * so reserving io space is not going to work. Turn it off. */ { "pcie-root-port", "io-reserve", "0" }, + { TYPE_RAMFB_DEVICE, "use-legacy-x86-rom", "true" }, + { TYPE_VFIO_PCI_NOHOTPLUG, "use-legacy-x86-rom", "true" }, }; static void microvm_class_init(ObjectClass *oc, const void *data) diff --git a/hw/i386/pc.c b/hw/i386/pc.c index b211633..2f58e73 100644 --- a/hw/i386/pc.c +++ b/hw/i386/pc.c @@ -81,7 +81,10 @@ { "qemu64-" TYPE_X86_CPU, "model-id", "QEMU Virtual CPU version " v, },\ { "athlon-" TYPE_X86_CPU, "model-id", "QEMU Virtual CPU version " v, }, -GlobalProperty pc_compat_10_0[] = {}; +GlobalProperty pc_compat_10_0[] = { + { TYPE_X86_CPU, "x-consistent-cache", "false" }, + { TYPE_X86_CPU, "x-vendor-cpuid-only-v2", "false" }, +}; const size_t pc_compat_10_0_len = G_N_ELEMENTS(pc_compat_10_0); GlobalProperty pc_compat_9_2[] = {}; @@ -609,7 +612,7 @@ void pc_machine_done(Notifier *notifier, void *data) &error_fatal); if (pcms->cxl_devices_state.is_enabled) { - cxl_fmws_link_targets(&pcms->cxl_devices_state, &error_fatal); + cxl_fmws_link_targets(&error_fatal); } /* set the number of CPUs */ @@ -718,20 +721,28 @@ static uint64_t pc_get_cxl_range_start(PCMachineState *pcms) return cxl_base; } -static uint64_t pc_get_cxl_range_end(PCMachineState *pcms) +static int cxl_get_fmw_end(Object *obj, void *opaque) { - uint64_t start = pc_get_cxl_range_start(pcms) + MiB; + struct CXLFixedWindow *fw; + uint64_t *start = opaque; - if (pcms->cxl_devices_state.fixed_windows) { - GList *it; - - start = ROUND_UP(start, 256 * MiB); - for (it = pcms->cxl_devices_state.fixed_windows; it; it = it->next) { - CXLFixedWindow *fw = it->data; - start += fw->size; - } + if (!object_dynamic_cast(obj, TYPE_CXL_FMW)) { + return 0; } + fw = CXL_FMW(obj); + + *start += fw->size; + + return 0; +} +static uint64_t pc_get_cxl_range_end(PCMachineState *pcms) +{ + uint64_t start = pc_get_cxl_range_start(pcms) + MiB; + + /* Ordering doesn't matter so no need to build a sorted list */ + object_child_foreach_recursive(object_get_root(), cxl_get_fmw_end, + &start); return start; } @@ -933,23 +944,9 @@ void pc_memory_init(PCMachineState *pcms, cxl_base = pc_get_cxl_range_start(pcms); memory_region_init(mr, OBJECT(machine), "cxl_host_reg", cxl_size); memory_region_add_subregion(system_memory, cxl_base, mr); - cxl_resv_end = cxl_base + cxl_size; - if (pcms->cxl_devices_state.fixed_windows) { - hwaddr cxl_fmw_base; - GList *it; - - cxl_fmw_base = ROUND_UP(cxl_base + cxl_size, 256 * MiB); - for (it = pcms->cxl_devices_state.fixed_windows; it; it = it->next) { - CXLFixedWindow *fw = it->data; - - fw->base = cxl_fmw_base; - memory_region_init_io(&fw->mr, OBJECT(machine), &cfmws_ops, fw, - "cxl-fixed-memory-region", fw->size); - memory_region_add_subregion(system_memory, fw->base, &fw->mr); - cxl_fmw_base += fw->size; - cxl_resv_end = cxl_fmw_base; - } - } + cxl_base = ROUND_UP(cxl_base + cxl_size, 256 * MiB); + cxl_resv_end = cxl_fmws_set_memmap(cxl_base, maxphysaddr); + cxl_fmws_update_mmio(); } /* Initialize PC system firmware */ @@ -1833,6 +1830,18 @@ static void pc_machine_class_init(ObjectClass *oc, const void *data) object_class_property_add_bool(oc, "fd-bootchk", pc_machine_get_fd_bootchk, pc_machine_set_fd_bootchk); + +#if defined(CONFIG_IGVM) + object_class_property_add_link(oc, "igvm-cfg", + TYPE_IGVM_CFG, + offsetof(X86MachineState, igvm), + object_property_allow_set_link, + OBJ_PROP_LINK_STRONG); + object_class_property_set_description(oc, "igvm-cfg", + "Set IGVM configuration"); +#endif + + } static const TypeInfo pc_machine_info = { diff --git a/hw/i386/pc_piix.c b/hw/i386/pc_piix.c index ea7572e..c033242 100644 --- a/hw/i386/pc_piix.c +++ b/hw/i386/pc_piix.c @@ -49,6 +49,7 @@ #include "hw/i2c/smbus_eeprom.h" #include "system/memory.h" #include "hw/acpi/acpi.h" +#include "hw/vfio/types.h" #include "qapi/error.h" #include "qemu/error-report.h" #include "system/xen.h" @@ -77,6 +78,13 @@ static const int ide_iobase2[MAX_IDE_BUS] = { 0x3f6, 0x376 }; static const int ide_irq[MAX_IDE_BUS] = { 14, 15 }; #endif +static GlobalProperty pc_piix_compat_defaults[] = { + { TYPE_RAMFB_DEVICE, "use-legacy-x86-rom", "true" }, + { TYPE_VFIO_PCI_NOHOTPLUG, "use-legacy-x86-rom", "true" }, +}; +static const size_t pc_piix_compat_defaults_len = + G_N_ELEMENTS(pc_piix_compat_defaults); + /* * Return the global irq number corresponding to a given device irq * pin. We could also use the bus number to have a more precise mapping. @@ -366,6 +374,16 @@ static void pc_init1(MachineState *machine, const char *pci_type) x86_nvdimm_acpi_dsmio, x86ms->fw_cfg, OBJECT(pcms)); } + +#if defined(CONFIG_IGVM) + /* Apply guest state from IGVM if supplied */ + if (x86ms->igvm) { + if (IGVM_CFG_GET_CLASS(x86ms->igvm) + ->process(x86ms->igvm, machine->cgs, false, &error_fatal) < 0) { + g_assert_not_reached(); + } + } +#endif } typedef enum PCSouthBridgeOption { @@ -482,6 +500,8 @@ static void pc_i440fx_machine_options(MachineClass *m) pc_set_south_bridge); object_class_property_set_description(oc, "x-south-bridge", "Use a different south bridge than PIIX3"); + compat_props_add(m->compat_props, + pc_piix_compat_defaults, pc_piix_compat_defaults_len); } static void pc_i440fx_machine_10_1_options(MachineClass *m) diff --git a/hw/i386/pc_q35.c b/hw/i386/pc_q35.c index 33211b1..b309b2b 100644 --- a/hw/i386/pc_q35.c +++ b/hw/i386/pc_q35.c @@ -45,6 +45,7 @@ #include "hw/i386/pc.h" #include "hw/i386/amd_iommu.h" #include "hw/i386/intel_iommu.h" +#include "hw/vfio/types.h" #include "hw/virtio/virtio-iommu.h" #include "hw/display/ramfb.h" #include "hw/ide/pci.h" @@ -67,6 +68,8 @@ static GlobalProperty pc_q35_compat_defaults[] = { { TYPE_VIRTIO_IOMMU_PCI, "aw-bits", "39" }, + { TYPE_RAMFB_DEVICE, "use-legacy-x86-rom", "true" }, + { TYPE_VFIO_PCI_NOHOTPLUG, "use-legacy-x86-rom", "true" }, }; static const size_t pc_q35_compat_defaults_len = G_N_ELEMENTS(pc_q35_compat_defaults); @@ -325,6 +328,16 @@ static void pc_q35_init(MachineState *machine) x86_nvdimm_acpi_dsmio, x86ms->fw_cfg, OBJECT(pcms)); } + +#if defined(CONFIG_IGVM) + /* Apply guest state from IGVM if supplied */ + if (x86ms->igvm) { + if (IGVM_CFG_GET_CLASS(x86ms->igvm) + ->process(x86ms->igvm, machine->cgs, false, &error_fatal) < 0) { + g_assert_not_reached(); + } + } +#endif } #define DEFINE_Q35_MACHINE(major, minor) \ diff --git a/hw/i386/pc_sysfw.c b/hw/i386/pc_sysfw.c index 821396c..1a12b63 100644 --- a/hw/i386/pc_sysfw.c +++ b/hw/i386/pc_sysfw.c @@ -220,7 +220,13 @@ void pc_system_firmware_init(PCMachineState *pcms, BlockBackend *pflash_blk[ARRAY_SIZE(pcms->flash)]; if (!pcmc->pci_enabled) { - x86_bios_rom_init(X86_MACHINE(pcms), "bios.bin", rom_memory, true); + /* + * If an IGVM file is specified then the firmware must be provided + * in the IGVM file. + */ + if (!X86_MACHINE(pcms)->igvm) { + x86_bios_rom_init(X86_MACHINE(pcms), "bios.bin", rom_memory, true); + } return; } @@ -240,8 +246,13 @@ void pc_system_firmware_init(PCMachineState *pcms, } if (!pflash_blk[0]) { - /* Machine property pflash0 not set, use ROM mode */ - x86_bios_rom_init(X86_MACHINE(pcms), "bios.bin", rom_memory, false); + /* + * Machine property pflash0 not set, use ROM mode unless using IGVM, + * in which case the firmware must be provided by the IGVM file. + */ + if (!X86_MACHINE(pcms)->igvm) { + x86_bios_rom_init(X86_MACHINE(pcms), "bios.bin", rom_memory, false); + } } else { if (kvm_enabled() && !kvm_readonly_mem_enabled()) { /* @@ -257,6 +268,20 @@ void pc_system_firmware_init(PCMachineState *pcms, } pc_system_flash_cleanup_unused(pcms); + + /* + * The user should not have specified any pflash devices when using IGVM + * to configure the guest. + */ + if (X86_MACHINE(pcms)->igvm) { + for (i = 0; i < ARRAY_SIZE(pcms->flash); i++) { + if (pcms->flash[i]) { + error_report("pflash devices cannot be configured when " + "using IGVM"); + exit(1); + } + } + } } void x86_firmware_configure(hwaddr gpa, void *ptr, int size) diff --git a/hw/intc/arm_gicv3_common.c b/hw/intc/arm_gicv3_common.c index 1cee681..e438d8c 100644 --- a/hw/intc/arm_gicv3_common.c +++ b/hw/intc/arm_gicv3_common.c @@ -612,6 +612,7 @@ static const Property arm_gicv3_common_properties[] = { DEFINE_PROP_BOOL("has-lpi", GICv3State, lpi_enable, 0), DEFINE_PROP_BOOL("has-nmi", GICv3State, nmi_support, 0), DEFINE_PROP_BOOL("has-security-extensions", GICv3State, security_extn, 0), + DEFINE_PROP_UINT32("maintenance-interrupt-id", GICv3State, maint_irq, 0), /* * Compatibility property: force 8 bits of physical priority, even * if the CPU being emulated should have fewer. diff --git a/hw/intc/arm_gicv3_kvm.c b/hw/intc/arm_gicv3_kvm.c index 3be3bf6..8ed88e7 100644 --- a/hw/intc/arm_gicv3_kvm.c +++ b/hw/intc/arm_gicv3_kvm.c @@ -22,6 +22,7 @@ #include "qemu/osdep.h" #include "qapi/error.h" #include "hw/intc/arm_gicv3_common.h" +#include "hw/arm/virt.h" #include "qemu/error-report.h" #include "qemu/module.h" #include "system/kvm.h" @@ -825,6 +826,34 @@ static void kvm_arm_gicv3_realize(DeviceState *dev, Error **errp) return; } + if (s->maint_irq) { + Error *kvm_nv_migration_blocker = NULL; + int ret; + + error_setg(&kvm_nv_migration_blocker, + "Live migration disabled because KVM nested virt is enabled"); + if (migrate_add_blocker(&kvm_nv_migration_blocker, errp)) { + error_free(kvm_nv_migration_blocker); + return; + } + + ret = kvm_device_check_attr(s->dev_fd, + KVM_DEV_ARM_VGIC_GRP_MAINT_IRQ, 0); + if (!ret) { + error_setg_errno(errp, errno, + "VGICv3 setting maintenance IRQ is not " + "supported by this host kernel"); + return; + } + + ret = kvm_device_access(s->dev_fd, KVM_DEV_ARM_VGIC_GRP_MAINT_IRQ, 0, + &s->maint_irq, true, errp); + if (ret) { + error_setg_errno(errp, errno, "Failed to set VGIC maintenance IRQ"); + return; + } + } + multiple_redist_region_allowed = kvm_device_check_attr(s->dev_fd, KVM_DEV_ARM_VGIC_GRP_ADDR, KVM_VGIC_V3_ADDR_TYPE_REDIST_REGION); diff --git a/hw/intc/armv7m_nvic.c b/hw/intc/armv7m_nvic.c index 6d85720..7c78961 100644 --- a/hw/intc/armv7m_nvic.c +++ b/hw/intc/armv7m_nvic.c @@ -1279,7 +1279,7 @@ static uint32_t nvic_readl(NVICState *s, uint32_t offset, MemTxAttrs attrs) if (!arm_feature(&cpu->env, ARM_FEATURE_M_MAIN)) { goto bad_offset; } - return cpu->id_afr0; + return GET_IDREG(isar, ID_AFR0); case 0xd50: /* MMFR0. */ if (!arm_feature(&cpu->env, ARM_FEATURE_M_MAIN)) { goto bad_offset; @@ -1331,7 +1331,7 @@ static uint32_t nvic_readl(NVICState *s, uint32_t offset, MemTxAttrs attrs) } return GET_IDREG(&cpu->isar, ID_ISAR5); case 0xd78: /* CLIDR */ - return cpu->clidr; + return GET_IDREG(&cpu->isar, CLIDR); case 0xd7c: /* CTR */ return cpu->ctr; case 0xd80: /* CSSIDR */ diff --git a/hw/intc/loongarch_extioi.c b/hw/intc/loongarch_extioi.c index 8b8ac6b..3e9c88d 100644 --- a/hw/intc/loongarch_extioi.c +++ b/hw/intc/loongarch_extioi.c @@ -377,13 +377,6 @@ static void loongarch_extioi_realize(DeviceState *dev, Error **errp) } } -static void loongarch_extioi_unrealize(DeviceState *dev) -{ - LoongArchExtIOICommonState *s = LOONGARCH_EXTIOI_COMMON(dev); - - g_free(s->cpu); -} - static void loongarch_extioi_reset_hold(Object *obj, ResetType type) { LoongArchExtIOIClass *lec = LOONGARCH_EXTIOI_GET_CLASS(obj); @@ -436,8 +429,6 @@ static void loongarch_extioi_class_init(ObjectClass *klass, const void *data) device_class_set_parent_realize(dc, loongarch_extioi_realize, &lec->parent_realize); - device_class_set_parent_unrealize(dc, loongarch_extioi_unrealize, - &lec->parent_unrealize); resettable_class_set_parent_phases(rc, NULL, loongarch_extioi_reset_hold, NULL, &lec->parent_phases); lecc->pre_save = vmstate_extioi_pre_save; diff --git a/hw/intc/loongarch_extioi_common.c b/hw/intc/loongarch_extioi_common.c index 4a904b3..ba03383 100644 --- a/hw/intc/loongarch_extioi_common.c +++ b/hw/intc/loongarch_extioi_common.c @@ -108,6 +108,13 @@ static void loongarch_extioi_common_realize(DeviceState *dev, Error **errp) } } +static void loongarch_extioi_common_unrealize(DeviceState *dev) +{ + LoongArchExtIOICommonState *s = LOONGARCH_EXTIOI_COMMON(dev); + + g_free(s->cpu); +} + static void loongarch_extioi_common_reset_hold(Object *obj, ResetType type) { LoongArchExtIOICommonClass *lecc = LOONGARCH_EXTIOI_COMMON_GET_CLASS(obj); @@ -221,6 +228,8 @@ static void loongarch_extioi_common_class_init(ObjectClass *klass, device_class_set_parent_realize(dc, loongarch_extioi_common_realize, &lecc->parent_realize); + device_class_set_parent_unrealize(dc, loongarch_extioi_common_unrealize, + &lecc->parent_unrealize); resettable_class_set_parent_phases(rc, NULL, loongarch_extioi_common_reset_hold, NULL, &lecc->parent_phases); diff --git a/hw/intc/loongarch_extioi_kvm.c b/hw/intc/loongarch_extioi_kvm.c index 0133540..aa2e8c7 100644 --- a/hw/intc/loongarch_extioi_kvm.c +++ b/hw/intc/loongarch_extioi_kvm.c @@ -6,7 +6,6 @@ */ #include "qemu/osdep.h" -#include "qemu/typedefs.h" #include "hw/intc/loongarch_extioi.h" #include "linux/kvm.h" #include "qapi/error.h" diff --git a/hw/intc/loongarch_ipi_kvm.c b/hw/intc/loongarch_ipi_kvm.c index 4cb3acc..dd4c367 100644 --- a/hw/intc/loongarch_ipi_kvm.c +++ b/hw/intc/loongarch_ipi_kvm.c @@ -23,36 +23,41 @@ static void kvm_ipi_access_regs(void *opaque, bool write) LoongarchIPIState *lis = LOONGARCH_IPI(opaque); IPICore *core; uint64_t attr; - int cpu, fd = lis->dev_fd; + int i, cpu_index, fd = lis->dev_fd; if (fd == 0) { return; } - for (cpu = 0; cpu < ipi->num_cpu; cpu++) { - core = &ipi->cpu[cpu]; - attr = (cpu << 16) | CORE_STATUS_OFF; + for (i = 0; i < ipi->num_cpu; i++) { + core = &ipi->cpu[i]; + if (core->cpu == NULL) { + continue; + } + cpu_index = i; + + attr = (cpu_index << 16) | CORE_STATUS_OFF; kvm_ipi_access_reg(fd, attr, &core->status, write); - attr = (cpu << 16) | CORE_EN_OFF; + attr = (cpu_index << 16) | CORE_EN_OFF; kvm_ipi_access_reg(fd, attr, &core->en, write); - attr = (cpu << 16) | CORE_SET_OFF; + attr = (cpu_index << 16) | CORE_SET_OFF; kvm_ipi_access_reg(fd, attr, &core->set, write); - attr = (cpu << 16) | CORE_CLEAR_OFF; + attr = (cpu_index << 16) | CORE_CLEAR_OFF; kvm_ipi_access_reg(fd, attr, &core->clear, write); - attr = (cpu << 16) | CORE_BUF_20; + attr = (cpu_index << 16) | CORE_BUF_20; kvm_ipi_access_reg(fd, attr, &core->buf[0], write); - attr = (cpu << 16) | CORE_BUF_28; + attr = (cpu_index << 16) | CORE_BUF_28; kvm_ipi_access_reg(fd, attr, &core->buf[2], write); - attr = (cpu << 16) | CORE_BUF_30; + attr = (cpu_index << 16) | CORE_BUF_30; kvm_ipi_access_reg(fd, attr, &core->buf[4], write); - attr = (cpu << 16) | CORE_BUF_38; + attr = (cpu_index << 16) | CORE_BUF_38; kvm_ipi_access_reg(fd, attr, &core->buf[6], write); } } diff --git a/hw/intc/pnv_xive.c b/hw/intc/pnv_xive.c index 935c0e4..c2ca40b 100644 --- a/hw/intc/pnv_xive.c +++ b/hw/intc/pnv_xive.c @@ -470,14 +470,13 @@ static bool pnv_xive_is_cpu_enabled(PnvXive *xive, PowerPCCPU *cpu) return xive->regs[reg >> 3] & PPC_BIT(bit); } -static int pnv_xive_match_nvt(XivePresenter *xptr, uint8_t format, - uint8_t nvt_blk, uint32_t nvt_idx, - bool crowd, bool cam_ignore, uint8_t priority, - uint32_t logic_serv, XiveTCTXMatch *match) +static bool pnv_xive_match_nvt(XivePresenter *xptr, uint8_t format, + uint8_t nvt_blk, uint32_t nvt_idx, + bool crowd, bool cam_ignore, uint8_t priority, + uint32_t logic_serv, XiveTCTXMatch *match) { PnvXive *xive = PNV_XIVE(xptr); PnvChip *chip = xive->chip; - int count = 0; int i, j; for (i = 0; i < chip->nr_cores; i++) { @@ -510,17 +509,18 @@ static int pnv_xive_match_nvt(XivePresenter *xptr, uint8_t format, qemu_log_mask(LOG_GUEST_ERROR, "XIVE: already found a " "thread context NVT %x/%x\n", nvt_blk, nvt_idx); - return -1; + match->count++; + continue; } match->ring = ring; match->tctx = tctx; - count++; + match->count++; } } } - return count; + return !!match->count; } static uint32_t pnv_xive_presenter_get_config(XivePresenter *xptr) diff --git a/hw/intc/pnv_xive2.c b/hw/intc/pnv_xive2.c index ec8b0c6..e019cad 100644 --- a/hw/intc/pnv_xive2.c +++ b/hw/intc/pnv_xive2.c @@ -101,12 +101,10 @@ static uint32_t pnv_xive2_block_id(PnvXive2 *xive) } /* - * Remote access to controllers. HW uses MMIOs. For now, a simple scan - * of the chips is good enough. - * - * TODO: Block scope support + * Remote access to INT controllers. HW uses MMIOs(?). For now, a simple + * scan of all the chips INT controller is good enough. */ -static PnvXive2 *pnv_xive2_get_remote(uint8_t blk) +static PnvXive2 *pnv_xive2_get_remote(uint32_t vsd_type, hwaddr fwd_addr) { PnvMachineState *pnv = PNV_MACHINE(qdev_get_machine()); int i; @@ -115,10 +113,23 @@ static PnvXive2 *pnv_xive2_get_remote(uint8_t blk) Pnv10Chip *chip10 = PNV10_CHIP(pnv->chips[i]); PnvXive2 *xive = &chip10->xive; - if (pnv_xive2_block_id(xive) == blk) { + /* + * Is this the XIVE matching the forwarded VSD address is for this + * VSD type + */ + if ((vsd_type == VST_ESB && fwd_addr == xive->esb_base) || + (vsd_type == VST_END && fwd_addr == xive->end_base) || + ((vsd_type == VST_NVP || + vsd_type == VST_NVG) && fwd_addr == xive->nvpg_base) || + (vsd_type == VST_NVC && fwd_addr == xive->nvc_base)) { return xive; } } + + qemu_log_mask(LOG_GUEST_ERROR, + "XIVE: >>>>> %s vsd_type %u fwd_addr 0x%"HWADDR_PRIx + " NOT FOUND\n", + __func__, vsd_type, fwd_addr); return NULL; } @@ -251,8 +262,7 @@ static uint64_t pnv_xive2_vst_addr(PnvXive2 *xive, uint32_t type, uint8_t blk, /* Remote VST access */ if (GETFIELD(VSD_MODE, vsd) == VSD_MODE_FORWARD) { - xive = pnv_xive2_get_remote(blk); - + xive = pnv_xive2_get_remote(type, (vsd & VSD_ADDRESS_MASK)); return xive ? pnv_xive2_vst_addr(xive, type, blk, idx) : 0; } @@ -595,20 +605,28 @@ static uint32_t pnv_xive2_get_config(Xive2Router *xrtr) { PnvXive2 *xive = PNV_XIVE2(xrtr); uint32_t cfg = 0; + uint64_t reg = xive->cq_regs[CQ_XIVE_CFG >> 3]; - if (xive->cq_regs[CQ_XIVE_CFG >> 3] & CQ_XIVE_CFG_GEN1_TIMA_OS) { + if (reg & CQ_XIVE_CFG_GEN1_TIMA_OS) { cfg |= XIVE2_GEN1_TIMA_OS; } - if (xive->cq_regs[CQ_XIVE_CFG >> 3] & CQ_XIVE_CFG_EN_VP_SAVE_RESTORE) { + if (reg & CQ_XIVE_CFG_EN_VP_SAVE_RESTORE) { cfg |= XIVE2_VP_SAVE_RESTORE; } - if (GETFIELD(CQ_XIVE_CFG_HYP_HARD_RANGE, - xive->cq_regs[CQ_XIVE_CFG >> 3]) == CQ_XIVE_CFG_THREADID_8BITS) { + if (GETFIELD(CQ_XIVE_CFG_HYP_HARD_RANGE, reg) == + CQ_XIVE_CFG_THREADID_8BITS) { cfg |= XIVE2_THREADID_8BITS; } + if (reg & CQ_XIVE_CFG_EN_VP_GRP_PRIORITY) { + cfg |= XIVE2_EN_VP_GRP_PRIORITY; + } + + cfg = SETFIELD(XIVE2_VP_INT_PRIO, cfg, + GETFIELD(CQ_XIVE_CFG_VP_INT_PRIO, reg)); + return cfg; } @@ -622,24 +640,28 @@ static bool pnv_xive2_is_cpu_enabled(PnvXive2 *xive, PowerPCCPU *cpu) return xive->tctxt_regs[reg >> 3] & PPC_BIT(bit); } -static int pnv_xive2_match_nvt(XivePresenter *xptr, uint8_t format, - uint8_t nvt_blk, uint32_t nvt_idx, - bool crowd, bool cam_ignore, uint8_t priority, - uint32_t logic_serv, XiveTCTXMatch *match) +static bool pnv_xive2_match_nvt(XivePresenter *xptr, uint8_t format, + uint8_t nvt_blk, uint32_t nvt_idx, + bool crowd, bool cam_ignore, uint8_t priority, + uint32_t logic_serv, XiveTCTXMatch *match) { PnvXive2 *xive = PNV_XIVE2(xptr); PnvChip *chip = xive->chip; - int count = 0; int i, j; bool gen1_tima_os = xive->cq_regs[CQ_XIVE_CFG >> 3] & CQ_XIVE_CFG_GEN1_TIMA_OS; + static int next_start_core; + static int next_start_thread; + int start_core = next_start_core; + int start_thread = next_start_thread; for (i = 0; i < chip->nr_cores; i++) { - PnvCore *pc = chip->cores[i]; + PnvCore *pc = chip->cores[(i + start_core) % chip->nr_cores]; CPUCore *cc = CPU_CORE(pc); for (j = 0; j < cc->nr_threads; j++) { - PowerPCCPU *cpu = pc->threads[j]; + /* Start search for match with different thread each call */ + PowerPCCPU *cpu = pc->threads[(j + start_thread) % cc->nr_threads]; XiveTCTX *tctx; int ring; @@ -669,7 +691,8 @@ static int pnv_xive2_match_nvt(XivePresenter *xptr, uint8_t format, "thread context NVT %x/%x\n", nvt_blk, nvt_idx); /* Should set a FIR if we ever model it */ - return -1; + match->count++; + continue; } /* * For a group notification, we need to know if the @@ -684,14 +707,23 @@ static int pnv_xive2_match_nvt(XivePresenter *xptr, uint8_t format, if (!match->tctx) { match->ring = ring; match->tctx = tctx; + + next_start_thread = j + start_thread + 1; + if (next_start_thread >= cc->nr_threads) { + next_start_thread = 0; + next_start_core = i + start_core + 1; + if (next_start_core >= chip->nr_cores) { + next_start_core = 0; + } + } } - count++; + match->count++; } } } } - return count; + return !!match->count; } static uint32_t pnv_xive2_presenter_get_config(XivePresenter *xptr) @@ -1173,7 +1205,8 @@ static void pnv_xive2_ic_cq_write(void *opaque, hwaddr offset, case CQ_FIRMASK_OR: /* FIR error reporting */ break; default: - xive2_error(xive, "CQ: invalid write 0x%"HWADDR_PRIx, offset); + xive2_error(xive, "CQ: invalid write 0x%"HWADDR_PRIx" value 0x%"PRIx64, + offset, val); return; } @@ -1304,7 +1337,6 @@ static uint64_t pnv_xive2_ic_vc_read(void *opaque, hwaddr offset, case VC_ENDC_WATCH2_SPEC: case VC_ENDC_WATCH3_SPEC: watch_engine = (offset - VC_ENDC_WATCH0_SPEC) >> 6; - xive->vc_regs[reg] &= ~(VC_ENDC_WATCH_FULL | VC_ENDC_WATCH_CONFLICT); pnv_xive2_endc_cache_watch_release(xive, watch_engine); val = xive->vc_regs[reg]; break; @@ -1315,10 +1347,11 @@ static uint64_t pnv_xive2_ic_vc_read(void *opaque, hwaddr offset, case VC_ENDC_WATCH3_DATA0: /* * Load DATA registers from cache with data requested by the - * SPEC register + * SPEC register. Clear gen_flipped bit in word 1. */ watch_engine = (offset - VC_ENDC_WATCH0_DATA0) >> 6; pnv_xive2_end_cache_load(xive, watch_engine); + xive->vc_regs[reg] &= ~(uint64_t)END2_W1_GEN_FLIPPED; val = xive->vc_regs[reg]; break; @@ -1386,7 +1419,14 @@ static void pnv_xive2_ic_vc_write(void *opaque, hwaddr offset, /* * ESB cache updates (not modeled) */ - /* case VC_ESBC_FLUSH_CTRL: */ + case VC_ESBC_FLUSH_CTRL: + if (val & VC_ESBC_FLUSH_CTRL_WANT_CACHE_DISABLE) { + xive2_error(xive, "VC: unsupported write @0x%"HWADDR_PRIx + " value 0x%"PRIx64" bit[2] poll_want_cache_disable", + offset, val); + return; + } + break; case VC_ESBC_FLUSH_POLL: xive->vc_regs[VC_ESBC_FLUSH_CTRL >> 3] |= VC_ESBC_FLUSH_CTRL_POLL_VALID; /* ESB update */ @@ -1402,7 +1442,14 @@ static void pnv_xive2_ic_vc_write(void *opaque, hwaddr offset, /* * EAS cache updates (not modeled) */ - /* case VC_EASC_FLUSH_CTRL: */ + case VC_EASC_FLUSH_CTRL: + if (val & VC_EASC_FLUSH_CTRL_WANT_CACHE_DISABLE) { + xive2_error(xive, "VC: unsupported write @0x%"HWADDR_PRIx + " value 0x%"PRIx64" bit[2] poll_want_cache_disable", + offset, val); + return; + } + break; case VC_EASC_FLUSH_POLL: xive->vc_regs[VC_EASC_FLUSH_CTRL >> 3] |= VC_EASC_FLUSH_CTRL_POLL_VALID; /* EAS update */ @@ -1441,7 +1488,14 @@ static void pnv_xive2_ic_vc_write(void *opaque, hwaddr offset, break; - /* case VC_ENDC_FLUSH_CTRL: */ + case VC_ENDC_FLUSH_CTRL: + if (val & VC_ENDC_FLUSH_CTRL_WANT_CACHE_DISABLE) { + xive2_error(xive, "VC: unsupported write @0x%"HWADDR_PRIx + " value 0x%"PRIx64" bit[2] poll_want_cache_disable", + offset, val); + return; + } + break; case VC_ENDC_FLUSH_POLL: xive->vc_regs[VC_ENDC_FLUSH_CTRL >> 3] |= VC_ENDC_FLUSH_CTRL_POLL_VALID; break; @@ -1470,7 +1524,8 @@ static void pnv_xive2_ic_vc_write(void *opaque, hwaddr offset, break; default: - xive2_error(xive, "VC: invalid write @%"HWADDR_PRIx, offset); + xive2_error(xive, "VC: invalid write @0x%"HWADDR_PRIx" value 0x%"PRIx64, + offset, val); return; } @@ -1661,7 +1716,14 @@ static void pnv_xive2_ic_pc_write(void *opaque, hwaddr offset, pnv_xive2_nxc_update(xive, watch_engine); break; - /* case PC_NXC_FLUSH_CTRL: */ + case PC_NXC_FLUSH_CTRL: + if (val & PC_NXC_FLUSH_CTRL_WANT_CACHE_DISABLE) { + xive2_error(xive, "VC: unsupported write @0x%"HWADDR_PRIx + " value 0x%"PRIx64" bit[2] poll_want_cache_disable", + offset, val); + return; + } + break; case PC_NXC_FLUSH_POLL: xive->pc_regs[PC_NXC_FLUSH_CTRL >> 3] |= PC_NXC_FLUSH_CTRL_POLL_VALID; break; @@ -1678,7 +1740,8 @@ static void pnv_xive2_ic_pc_write(void *opaque, hwaddr offset, break; default: - xive2_error(xive, "PC: invalid write @%"HWADDR_PRIx, offset); + xive2_error(xive, "PC: invalid write @0x%"HWADDR_PRIx" value 0x%"PRIx64, + offset, val); return; } @@ -1765,7 +1828,8 @@ static void pnv_xive2_ic_tctxt_write(void *opaque, hwaddr offset, xive->tctxt_regs[reg] = val; break; default: - xive2_error(xive, "TCTXT: invalid write @%"HWADDR_PRIx, offset); + xive2_error(xive, "TCTXT: invalid write @0x%"HWADDR_PRIx + " data 0x%"PRIx64, offset, val); return; } } @@ -1836,7 +1900,8 @@ static void pnv_xive2_xscom_write(void *opaque, hwaddr offset, pnv_xive2_ic_tctxt_write(opaque, mmio_offset, val, size); break; default: - xive2_error(xive, "XSCOM: invalid write @%"HWADDR_PRIx, offset); + xive2_error(xive, "XSCOM: invalid write @%"HWADDR_PRIx + " value 0x%"PRIx64, offset, val); } } @@ -1904,7 +1969,8 @@ static void pnv_xive2_ic_notify_write(void *opaque, hwaddr offset, break; default: - xive2_error(xive, "NOTIFY: invalid write @%"HWADDR_PRIx, offset); + xive2_error(xive, "NOTIFY: invalid write @%"HWADDR_PRIx + " value 0x%"PRIx64, offset, val); } } @@ -1946,7 +2012,8 @@ static void pnv_xive2_ic_lsi_write(void *opaque, hwaddr offset, { PnvXive2 *xive = PNV_XIVE2(opaque); - xive2_error(xive, "LSI: invalid write @%"HWADDR_PRIx, offset); + xive2_error(xive, "LSI: invalid write @%"HWADDR_PRIx" value 0x%"PRIx64, + offset, val); } static const MemoryRegionOps pnv_xive2_ic_lsi_ops = { @@ -2049,7 +2116,8 @@ static void pnv_xive2_ic_sync_write(void *opaque, hwaddr offset, inject_type = PNV_XIVE2_QUEUE_NXC_ST_RMT_CI; break; default: - xive2_error(xive, "SYNC: invalid write @%"HWADDR_PRIx, offset); + xive2_error(xive, "SYNC: invalid write @%"HWADDR_PRIx" value 0x%"PRIx64, + offset, val); return; } diff --git a/hw/intc/pnv_xive2_regs.h b/hw/intc/pnv_xive2_regs.h index e8b87b3..d53300f 100644 --- a/hw/intc/pnv_xive2_regs.h +++ b/hw/intc/pnv_xive2_regs.h @@ -66,6 +66,7 @@ #define CQ_XIVE_CFG_GEN1_TIMA_HYP_BLK0 PPC_BIT(26) /* 0 if bit[25]=0 */ #define CQ_XIVE_CFG_GEN1_TIMA_CROWD_DIS PPC_BIT(27) /* 0 if bit[25]=0 */ #define CQ_XIVE_CFG_GEN1_END_ESX PPC_BIT(28) +#define CQ_XIVE_CFG_EN_VP_GRP_PRIORITY PPC_BIT(32) /* 0 if bit[25]=1 */ #define CQ_XIVE_CFG_EN_VP_SAVE_RESTORE PPC_BIT(38) /* 0 if bit[25]=1 */ #define CQ_XIVE_CFG_EN_VP_SAVE_REST_STRICT PPC_BIT(39) /* 0 if bit[25]=1 */ diff --git a/hw/intc/riscv_aclint.c b/hw/intc/riscv_aclint.c index b0139f0..4623cfa0 100644 --- a/hw/intc/riscv_aclint.c +++ b/hw/intc/riscv_aclint.c @@ -28,6 +28,7 @@ #include "qemu/module.h" #include "hw/sysbus.h" #include "target/riscv/cpu.h" +#include "target/riscv/time_helper.h" #include "hw/qdev-properties.h" #include "hw/intc/riscv_aclint.h" #include "qemu/timer.h" @@ -240,6 +241,10 @@ static void riscv_aclint_mtimer_write(void *opaque, hwaddr addr, riscv_aclint_mtimer_write_timecmp(mtimer, RISCV_CPU(cpu), mtimer->hartid_base + i, mtimer->timecmp[i]); + riscv_timer_write_timecmp(env, env->stimer, env->stimecmp, 0, MIP_STIP); + riscv_timer_write_timecmp(env, env->vstimer, env->vstimecmp, + env->htimedelta, MIP_VSTIP); + } return; } diff --git a/hw/intc/riscv_aplic.c b/hw/intc/riscv_aplic.c index 8bcd9f4..a1d9fa5 100644 --- a/hw/intc/riscv_aplic.c +++ b/hw/intc/riscv_aplic.c @@ -628,7 +628,7 @@ static void riscv_aplic_request(void *opaque, int irq, int level) static uint64_t riscv_aplic_read(void *opaque, hwaddr addr, unsigned size) { - uint32_t irq, word, idc; + uint32_t irq, word, idc, sm; RISCVAPLICState *aplic = opaque; /* Reads must be 4 byte words */ @@ -696,6 +696,10 @@ static uint64_t riscv_aplic_read(void *opaque, hwaddr addr, unsigned size) } else if ((APLIC_TARGET_BASE <= addr) && (addr < (APLIC_TARGET_BASE + (aplic->num_irqs - 1) * 4))) { irq = ((addr - APLIC_TARGET_BASE) >> 2) + 1; + sm = aplic->sourcecfg[irq] & APLIC_SOURCECFG_SM_MASK; + if (sm == APLIC_SOURCECFG_SM_INACTIVE) { + return 0; + } return aplic->target[irq]; } else if (!aplic->msimode && (APLIC_IDC_BASE <= addr) && (addr < (APLIC_IDC_BASE + aplic->num_harts * APLIC_IDC_SIZE))) { @@ -962,10 +966,18 @@ static const Property riscv_aplic_properties[] = { DEFINE_PROP_BOOL("mmode", RISCVAPLICState, mmode, 0), }; +static bool riscv_aplic_state_needed(void *opaque) +{ + RISCVAPLICState *aplic = opaque; + + return riscv_use_emulated_aplic(aplic->msimode); +} + static const VMStateDescription vmstate_riscv_aplic = { .name = "riscv_aplic", - .version_id = 2, - .minimum_version_id = 2, + .version_id = 3, + .minimum_version_id = 3, + .needed = riscv_aplic_state_needed, .fields = (const VMStateField[]) { VMSTATE_UINT32(domaincfg, RISCVAPLICState), VMSTATE_UINT32(mmsicfgaddr, RISCVAPLICState), diff --git a/hw/intc/riscv_imsic.c b/hw/intc/riscv_imsic.c index 2169988..6174e1a 100644 --- a/hw/intc/riscv_imsic.c +++ b/hw/intc/riscv_imsic.c @@ -398,10 +398,16 @@ static const Property riscv_imsic_properties[] = { DEFINE_PROP_UINT32("num-irqs", RISCVIMSICState, num_irqs, 0), }; +static bool riscv_imsic_state_needed(void *opaque) +{ + return !kvm_irqchip_in_kernel(); +} + static const VMStateDescription vmstate_riscv_imsic = { .name = "riscv_imsic", - .version_id = 1, - .minimum_version_id = 1, + .version_id = 2, + .minimum_version_id = 2, + .needed = riscv_imsic_state_needed, .fields = (const VMStateField[]) { VMSTATE_VARRAY_UINT32(eidelivery, RISCVIMSICState, num_pages, 0, diff --git a/hw/intc/spapr_xive.c b/hw/intc/spapr_xive.c index 440edb9..e393f5d 100644 --- a/hw/intc/spapr_xive.c +++ b/hw/intc/spapr_xive.c @@ -428,14 +428,13 @@ static int spapr_xive_write_nvt(XiveRouter *xrtr, uint8_t nvt_blk, g_assert_not_reached(); } -static int spapr_xive_match_nvt(XivePresenter *xptr, uint8_t format, - uint8_t nvt_blk, uint32_t nvt_idx, - bool crowd, bool cam_ignore, - uint8_t priority, - uint32_t logic_serv, XiveTCTXMatch *match) +static bool spapr_xive_match_nvt(XivePresenter *xptr, uint8_t format, + uint8_t nvt_blk, uint32_t nvt_idx, + bool crowd, bool cam_ignore, + uint8_t priority, + uint32_t logic_serv, XiveTCTXMatch *match) { CPUState *cs; - int count = 0; CPU_FOREACH(cs) { PowerPCCPU *cpu = POWERPC_CPU(cs); @@ -463,16 +462,17 @@ static int spapr_xive_match_nvt(XivePresenter *xptr, uint8_t format, if (match->tctx) { qemu_log_mask(LOG_GUEST_ERROR, "XIVE: already found a thread " "context NVT %x/%x\n", nvt_blk, nvt_idx); - return -1; + match->count++; + continue; } match->ring = ring; match->tctx = tctx; - count++; + match->count++; } } - return count; + return !!match->count; } static uint32_t spapr_xive_presenter_get_config(XivePresenter *xptr) diff --git a/hw/intc/trace-events b/hw/intc/trace-events index 334aa6a..018c609 100644 --- a/hw/intc/trace-events +++ b/hw/intc/trace-events @@ -274,11 +274,13 @@ kvm_xive_cpu_connect(uint32_t id) "connect CPU%d to KVM device" kvm_xive_source_reset(uint32_t srcno) "IRQ 0x%x" # xive.c -xive_tctx_accept(uint32_t index, uint8_t ring, uint8_t ipb, uint8_t pipr, uint8_t cppr, uint8_t nsr) "target=%d ring=0x%x IBP=0x%02x PIPR=0x%02x CPPR=0x%02x NSR=0x%02x ACK" -xive_tctx_notify(uint32_t index, uint8_t ring, uint8_t ipb, uint8_t pipr, uint8_t cppr, uint8_t nsr) "target=%d ring=0x%x IBP=0x%02x PIPR=0x%02x CPPR=0x%02x NSR=0x%02x raise !" -xive_tctx_set_cppr(uint32_t index, uint8_t ring, uint8_t ipb, uint8_t pipr, uint8_t cppr, uint8_t nsr) "target=%d ring=0x%x IBP=0x%02x PIPR=0x%02x new CPPR=0x%02x NSR=0x%02x" +xive_tctx_accept(uint32_t index, uint8_t ring, uint8_t ipb, uint8_t pipr, uint8_t cppr, uint8_t nsr) "target=%d ring=0x%x IPB=0x%02x PIPR=0x%02x CPPR=0x%02x NSR=0x%02x ACK" +xive_tctx_notify(uint32_t index, uint8_t ring, uint8_t ipb, uint8_t pipr, uint8_t cppr, uint8_t nsr) "target=%d ring=0x%x IPB=0x%02x PIPR=0x%02x CPPR=0x%02x NSR=0x%02x raise !" +xive_tctx_set_cppr(uint32_t index, uint8_t ring, uint8_t ipb, uint8_t pipr, uint8_t cppr, uint8_t nsr) "target=%d ring=0x%x IPB=0x%02x PIPR=0x%02x new CPPR=0x%02x NSR=0x%02x" xive_source_esb_read(uint64_t addr, uint32_t srcno, uint64_t value) "@0x%"PRIx64" IRQ 0x%x val=0x%"PRIx64 xive_source_esb_write(uint64_t addr, uint32_t srcno, uint64_t value) "@0x%"PRIx64" IRQ 0x%x val=0x%"PRIx64 +xive_source_notify(uint32_t srcno) "Processing notification for queued IRQ 0x%x" +xive_source_blocked(uint32_t srcno) "No action needed for IRQ 0x%x currently" xive_router_end_notify(uint8_t end_blk, uint32_t end_idx, uint32_t end_data) "END 0x%02x/0x%04x -> enqueue 0x%08x" xive_router_end_escalate(uint8_t end_blk, uint32_t end_idx, uint8_t esc_blk, uint32_t esc_idx, uint32_t end_data) "END 0x%02x/0x%04x -> escalate END 0x%02x/0x%04x data 0x%08x" xive_tctx_tm_write(uint32_t index, uint64_t offset, unsigned int size, uint64_t value) "target=%d @0x%"PRIx64" sz=%d val=0x%" PRIx64 @@ -289,6 +291,10 @@ xive_end_source_read(uint8_t end_blk, uint32_t end_idx, uint64_t addr) "END 0x%x # xive2.c xive_nvp_backlog_op(uint8_t blk, uint32_t idx, uint8_t op, uint8_t priority, uint8_t rc) "NVP 0x%x/0x%x operation=%d priority=%d rc=%d" xive_nvgc_backlog_op(bool c, uint8_t blk, uint32_t idx, uint8_t op, uint8_t priority, uint32_t rc) "NVGC crowd=%d 0x%x/0x%x operation=%d priority=%d rc=%d" +xive_redistribute(uint32_t index, uint8_t ring, uint8_t end_blk, uint32_t end_idx) "Redistribute from target=%d ring=0x%x NVP 0x%x/0x%x" +xive_end_enqueue(uint8_t end_blk, uint32_t end_idx, uint32_t end_data) "Queue event for END 0x%x/0x%x data=0x%x" +xive_escalate_end(uint8_t end_blk, uint32_t end_idx, uint8_t esc_blk, uint32_t esc_idx, uint32_t esc_data) "Escalate from END 0x%x/0x%x to END 0x%x/0x%x data=0x%x" +xive_escalate_esb(uint8_t end_blk, uint32_t end_idx, uint32_t lisn) "Escalate from END 0x%x/0x%x to LISN=0x%x" # pnv_xive.c pnv_xive_ic_hw_trigger(uint64_t addr, uint64_t val) "@0x%"PRIx64" val=0x%"PRIx64 diff --git a/hw/intc/xive.c b/hw/intc/xive.c index 27b473e..e0ffcf8 100644 --- a/hw/intc/xive.c +++ b/hw/intc/xive.c @@ -25,6 +25,58 @@ /* * XIVE Thread Interrupt Management context */ +bool xive_ring_valid(XiveTCTX *tctx, uint8_t ring) +{ + uint8_t cur_ring; + + for (cur_ring = ring; cur_ring <= TM_QW3_HV_PHYS; + cur_ring += XIVE_TM_RING_SIZE) { + if (!(tctx->regs[cur_ring + TM_WORD2] & 0x80)) { + return false; + } + } + return true; +} + +bool xive_nsr_indicates_exception(uint8_t ring, uint8_t nsr) +{ + switch (ring) { + case TM_QW1_OS: + return !!(nsr & TM_QW1_NSR_EO); + case TM_QW2_HV_POOL: + case TM_QW3_HV_PHYS: + return !!(nsr & TM_QW3_NSR_HE); + default: + g_assert_not_reached(); + } +} + +bool xive_nsr_indicates_group_exception(uint8_t ring, uint8_t nsr) +{ + if ((nsr & TM_NSR_GRP_LVL) > 0) { + g_assert(xive_nsr_indicates_exception(ring, nsr)); + return true; + } + return false; +} + +uint8_t xive_nsr_exception_ring(uint8_t ring, uint8_t nsr) +{ + /* NSR determines if pool/phys ring is for phys or pool interrupt */ + if ((ring == TM_QW3_HV_PHYS) || (ring == TM_QW2_HV_POOL)) { + uint8_t he = (nsr & TM_QW3_NSR_HE) >> 6; + + if (he == TM_QW3_NSR_HE_PHYS) { + return TM_QW3_HV_PHYS; + } else if (he == TM_QW3_NSR_HE_POOL) { + return TM_QW2_HV_POOL; + } else { + /* Don't support LSI mode */ + g_assert_not_reached(); + } + } + return ring; +} static qemu_irq xive_tctx_output(XiveTCTX *tctx, uint8_t ring) { @@ -41,74 +93,83 @@ static qemu_irq xive_tctx_output(XiveTCTX *tctx, uint8_t ring) } } -static uint64_t xive_tctx_accept(XiveTCTX *tctx, uint8_t ring) +/* + * interrupt is accepted on the presentation ring, for PHYS ring the NSR + * directs it to the PHYS or POOL rings. + */ +uint64_t xive_tctx_accept(XiveTCTX *tctx, uint8_t sig_ring) { - uint8_t *regs = &tctx->regs[ring]; - uint8_t nsr = regs[TM_NSR]; + uint8_t *sig_regs = &tctx->regs[sig_ring]; + uint8_t nsr = sig_regs[TM_NSR]; - qemu_irq_lower(xive_tctx_output(tctx, ring)); + g_assert(sig_ring == TM_QW1_OS || sig_ring == TM_QW3_HV_PHYS); - if (regs[TM_NSR] != 0) { - uint8_t cppr = regs[TM_PIPR]; - uint8_t alt_ring; - uint8_t *alt_regs; + g_assert(tctx->regs[TM_QW2_HV_POOL + TM_NSR] == 0); + g_assert(tctx->regs[TM_QW2_HV_POOL + TM_PIPR] == 0); + g_assert(tctx->regs[TM_QW2_HV_POOL + TM_CPPR] == 0); - /* POOL interrupt uses IPB in QW2, POOL ring */ - if ((ring == TM_QW3_HV_PHYS) && (nsr & (TM_QW3_NSR_HE_POOL << 6))) { - alt_ring = TM_QW2_HV_POOL; - } else { - alt_ring = ring; - } - alt_regs = &tctx->regs[alt_ring]; + if (xive_nsr_indicates_exception(sig_ring, nsr)) { + uint8_t cppr = sig_regs[TM_PIPR]; + uint8_t ring; + uint8_t *regs; + + ring = xive_nsr_exception_ring(sig_ring, nsr); + regs = &tctx->regs[ring]; - regs[TM_CPPR] = cppr; + sig_regs[TM_CPPR] = cppr; /* * If the interrupt was for a specific VP, reset the pending * buffer bit, otherwise clear the logical server indicator */ - if (regs[TM_NSR] & TM_NSR_GRP_LVL) { - regs[TM_NSR] &= ~TM_NSR_GRP_LVL; - } else { - alt_regs[TM_IPB] &= ~xive_priority_to_ipb(cppr); + if (!xive_nsr_indicates_group_exception(sig_ring, nsr)) { + regs[TM_IPB] &= ~xive_priority_to_ipb(cppr); } - /* Drop the exception bit and any group/crowd */ - regs[TM_NSR] = 0; + /* Clear the exception from NSR */ + sig_regs[TM_NSR] = 0; + qemu_irq_lower(xive_tctx_output(tctx, sig_ring)); - trace_xive_tctx_accept(tctx->cs->cpu_index, alt_ring, - alt_regs[TM_IPB], regs[TM_PIPR], - regs[TM_CPPR], regs[TM_NSR]); + trace_xive_tctx_accept(tctx->cs->cpu_index, ring, + regs[TM_IPB], sig_regs[TM_PIPR], + sig_regs[TM_CPPR], sig_regs[TM_NSR]); } - return ((uint64_t)nsr << 8) | regs[TM_CPPR]; + return ((uint64_t)nsr << 8) | sig_regs[TM_CPPR]; } -void xive_tctx_notify(XiveTCTX *tctx, uint8_t ring, uint8_t group_level) +/* Change PIPR and calculate NSR and irq based on PIPR, CPPR, group */ +void xive_tctx_pipr_set(XiveTCTX *tctx, uint8_t ring, uint8_t pipr, + uint8_t group_level) { - /* HV_POOL ring uses HV_PHYS NSR, CPPR and PIPR registers */ - uint8_t alt_ring = (ring == TM_QW2_HV_POOL) ? TM_QW3_HV_PHYS : ring; - uint8_t *alt_regs = &tctx->regs[alt_ring]; + uint8_t *sig_regs = xive_tctx_signal_regs(tctx, ring); uint8_t *regs = &tctx->regs[ring]; - if (alt_regs[TM_PIPR] < alt_regs[TM_CPPR]) { + g_assert(!xive_nsr_indicates_group_exception(ring, sig_regs[TM_NSR])); + + sig_regs[TM_PIPR] = pipr; + + if (pipr < sig_regs[TM_CPPR]) { switch (ring) { case TM_QW1_OS: - regs[TM_NSR] = TM_QW1_NSR_EO | (group_level & 0x3F); + sig_regs[TM_NSR] = TM_QW1_NSR_EO | (group_level & 0x3F); break; case TM_QW2_HV_POOL: - alt_regs[TM_NSR] = (TM_QW3_NSR_HE_POOL << 6) | (group_level & 0x3F); + sig_regs[TM_NSR] = (TM_QW3_NSR_HE_POOL << 6) | (group_level & 0x3F); break; case TM_QW3_HV_PHYS: - regs[TM_NSR] = (TM_QW3_NSR_HE_PHYS << 6) | (group_level & 0x3F); + sig_regs[TM_NSR] = (TM_QW3_NSR_HE_PHYS << 6) | (group_level & 0x3F); break; default: g_assert_not_reached(); } trace_xive_tctx_notify(tctx->cs->cpu_index, ring, - regs[TM_IPB], alt_regs[TM_PIPR], - alt_regs[TM_CPPR], alt_regs[TM_NSR]); + regs[TM_IPB], pipr, + sig_regs[TM_CPPR], sig_regs[TM_NSR]); qemu_irq_raise(xive_tctx_output(tctx, ring)); + } else { + sig_regs[TM_NSR] = 0; + qemu_irq_lower(xive_tctx_output(tctx, ring)); } } @@ -124,25 +185,32 @@ void xive_tctx_reset_signal(XiveTCTX *tctx, uint8_t ring) static void xive_tctx_set_cppr(XiveTCTX *tctx, uint8_t ring, uint8_t cppr) { - uint8_t *regs = &tctx->regs[ring]; + uint8_t *sig_regs = &tctx->regs[ring]; uint8_t pipr_min; uint8_t ring_min; + g_assert(ring == TM_QW1_OS || ring == TM_QW3_HV_PHYS); + + g_assert(tctx->regs[TM_QW2_HV_POOL + TM_NSR] == 0); + g_assert(tctx->regs[TM_QW2_HV_POOL + TM_PIPR] == 0); + g_assert(tctx->regs[TM_QW2_HV_POOL + TM_CPPR] == 0); + + /* XXX: should show pool IPB for PHYS ring */ trace_xive_tctx_set_cppr(tctx->cs->cpu_index, ring, - regs[TM_IPB], regs[TM_PIPR], - cppr, regs[TM_NSR]); + sig_regs[TM_IPB], sig_regs[TM_PIPR], + cppr, sig_regs[TM_NSR]); if (cppr > XIVE_PRIORITY_MAX) { cppr = 0xff; } - tctx->regs[ring + TM_CPPR] = cppr; + sig_regs[TM_CPPR] = cppr; /* * Recompute the PIPR based on local pending interrupts. The PHYS * ring must take the minimum of both the PHYS and POOL PIPR values. */ - pipr_min = xive_ipb_to_pipr(regs[TM_IPB]); + pipr_min = xive_ipb_to_pipr(sig_regs[TM_IPB]); ring_min = ring; /* PHYS updates also depend on POOL values */ @@ -151,7 +219,6 @@ static void xive_tctx_set_cppr(XiveTCTX *tctx, uint8_t ring, uint8_t cppr) /* POOL values only matter if POOL ctx is valid */ if (pool_regs[TM_WORD2] & 0x80) { - uint8_t pool_pipr = xive_ipb_to_pipr(pool_regs[TM_IPB]); /* @@ -165,30 +232,39 @@ static void xive_tctx_set_cppr(XiveTCTX *tctx, uint8_t ring, uint8_t cppr) } } - regs[TM_PIPR] = pipr_min; + /* CPPR has changed, this may present or preclude a pending exception */ + xive_tctx_pipr_set(tctx, ring_min, pipr_min, 0); +} + +static void xive_tctx_pipr_recompute_from_ipb(XiveTCTX *tctx, uint8_t ring) +{ + uint8_t *sig_regs = xive_tctx_signal_regs(tctx, ring); + uint8_t *regs = &tctx->regs[ring]; - /* CPPR has changed, check if we need to raise a pending exception */ - xive_tctx_notify(tctx, ring_min, 0); + /* Does not support a presented group interrupt */ + g_assert(!xive_nsr_indicates_group_exception(ring, sig_regs[TM_NSR])); + + xive_tctx_pipr_set(tctx, ring, xive_ipb_to_pipr(regs[TM_IPB]), 0); } -void xive_tctx_pipr_update(XiveTCTX *tctx, uint8_t ring, uint8_t priority, - uint8_t group_level) - { - /* HV_POOL ring uses HV_PHYS NSR, CPPR and PIPR registers */ - uint8_t alt_ring = (ring == TM_QW2_HV_POOL) ? TM_QW3_HV_PHYS : ring; - uint8_t *alt_regs = &tctx->regs[alt_ring]; +void xive_tctx_pipr_present(XiveTCTX *tctx, uint8_t ring, uint8_t priority, + uint8_t group_level) +{ + uint8_t *sig_regs = xive_tctx_signal_regs(tctx, ring); uint8_t *regs = &tctx->regs[ring]; + uint8_t pipr = xive_priority_to_pipr(priority); if (group_level == 0) { - /* VP-specific */ regs[TM_IPB] |= xive_priority_to_ipb(priority); - alt_regs[TM_PIPR] = xive_ipb_to_pipr(regs[TM_IPB]); - } else { - /* VP-group */ - alt_regs[TM_PIPR] = xive_priority_to_pipr(priority); + if (pipr >= sig_regs[TM_PIPR]) { + /* VP interrupts can come here with lower priority than PIPR */ + return; + } } - xive_tctx_notify(tctx, ring, group_level); - } + g_assert(pipr <= xive_ipb_to_pipr(regs[TM_IPB])); + g_assert(pipr < sig_regs[TM_PIPR]); + xive_tctx_pipr_set(tctx, ring, pipr, group_level); +} /* * XIVE Thread Interrupt Management Area (TIMA) @@ -206,25 +282,78 @@ static uint64_t xive_tm_ack_hv_reg(XivePresenter *xptr, XiveTCTX *tctx, return xive_tctx_accept(tctx, TM_QW3_HV_PHYS); } +static void xive_pool_cam_decode(uint32_t cam, uint8_t *nvt_blk, + uint32_t *nvt_idx, bool *vp) +{ + if (nvt_blk) { + *nvt_blk = xive_nvt_blk(cam); + } + if (nvt_idx) { + *nvt_idx = xive_nvt_idx(cam); + } + if (vp) { + *vp = !!(cam & TM_QW2W2_VP); + } +} + +static uint32_t xive_tctx_get_pool_cam(XiveTCTX *tctx, uint8_t *nvt_blk, + uint32_t *nvt_idx, bool *vp) +{ + uint32_t qw2w2 = xive_tctx_word2(&tctx->regs[TM_QW2_HV_POOL]); + uint32_t cam = be32_to_cpu(qw2w2); + + xive_pool_cam_decode(cam, nvt_blk, nvt_idx, vp); + return qw2w2; +} + +static void xive_tctx_set_pool_cam(XiveTCTX *tctx, uint32_t qw2w2) +{ + memcpy(&tctx->regs[TM_QW2_HV_POOL + TM_WORD2], &qw2w2, 4); +} + static uint64_t xive_tm_pull_pool_ctx(XivePresenter *xptr, XiveTCTX *tctx, hwaddr offset, unsigned size) { - uint32_t qw2w2_prev = xive_tctx_word2(&tctx->regs[TM_QW2_HV_POOL]); uint32_t qw2w2; + uint32_t qw2w2_new; + uint8_t nvt_blk; + uint32_t nvt_idx; + bool vp; + + qw2w2 = xive_tctx_get_pool_cam(tctx, &nvt_blk, &nvt_idx, &vp); + + if (!vp) { + qemu_log_mask(LOG_GUEST_ERROR, "XIVE: pull invalid POOL NVT %x/%x !?\n", + nvt_blk, nvt_idx); + } + + /* Invalidate CAM line */ + qw2w2_new = xive_set_field32(TM_QW2W2_VP, qw2w2, 0); + xive_tctx_set_pool_cam(tctx, qw2w2_new); + + xive_tctx_reset_signal(tctx, TM_QW1_OS); + xive_tctx_reset_signal(tctx, TM_QW2_HV_POOL); + /* Re-check phys for interrupts if pool was disabled */ + xive_tctx_pipr_recompute_from_ipb(tctx, TM_QW3_HV_PHYS); - qw2w2 = xive_set_field32(TM_QW2W2_VP, qw2w2_prev, 0); - memcpy(&tctx->regs[TM_QW2_HV_POOL + TM_WORD2], &qw2w2, 4); return qw2w2; } static uint64_t xive_tm_pull_phys_ctx(XivePresenter *xptr, XiveTCTX *tctx, hwaddr offset, unsigned size) { - uint8_t qw3b8_prev = tctx->regs[TM_QW3_HV_PHYS + TM_WORD2]; - uint8_t qw3b8; + uint8_t qw3b8 = tctx->regs[TM_QW3_HV_PHYS + TM_WORD2]; + uint8_t qw3b8_new; + + qw3b8 = tctx->regs[TM_QW3_HV_PHYS + TM_WORD2]; + if (!(qw3b8 & TM_QW3B8_VT)) { + qemu_log_mask(LOG_GUEST_ERROR, "XIVE: pulling invalid PHYS thread!?\n"); + } + qw3b8_new = qw3b8 & ~TM_QW3B8_VT; + tctx->regs[TM_QW3_HV_PHYS + TM_WORD2] = qw3b8_new; - qw3b8 = qw3b8_prev & ~TM_QW3B8_VT; - tctx->regs[TM_QW3_HV_PHYS + TM_WORD2] = qw3b8; + xive_tctx_reset_signal(tctx, TM_QW1_OS); + xive_tctx_reset_signal(tctx, TM_QW3_HV_PHYS); return qw3b8; } @@ -255,14 +384,14 @@ static uint64_t xive_tm_vt_poll(XivePresenter *xptr, XiveTCTX *tctx, static const uint8_t xive_tm_hw_view[] = { 3, 0, 0, 0, 0, 0, 0, 0, 3, 3, 3, 3, 0, 0, 0, 0, /* QW-0 User */ - 3, 3, 3, 3, 3, 3, 0, 2, 3, 3, 3, 3, 0, 0, 0, 0, /* QW-1 OS */ + 3, 3, 3, 3, 3, 3, 0, 2, 3, 3, 3, 3, 0, 0, 0, 3, /* QW-1 OS */ 0, 0, 3, 3, 0, 3, 3, 0, 3, 3, 3, 3, 0, 0, 0, 0, /* QW-2 POOL */ 3, 3, 3, 3, 0, 3, 0, 2, 3, 0, 0, 3, 3, 3, 3, 0, /* QW-3 PHYS */ }; static const uint8_t xive_tm_hv_view[] = { 3, 0, 0, 0, 0, 0, 0, 0, 3, 3, 3, 3, 0, 0, 0, 0, /* QW-0 User */ - 3, 3, 3, 3, 3, 3, 0, 2, 3, 3, 3, 3, 0, 0, 0, 0, /* QW-1 OS */ + 3, 3, 3, 3, 3, 3, 0, 2, 3, 3, 3, 3, 0, 0, 0, 3, /* QW-1 OS */ 0, 0, 3, 3, 0, 3, 3, 0, 0, 3, 3, 3, 0, 0, 0, 0, /* QW-2 POOL */ 3, 3, 3, 3, 0, 3, 0, 2, 3, 0, 0, 3, 0, 0, 0, 0, /* QW-3 PHYS */ }; @@ -326,7 +455,7 @@ static void xive_tm_raw_write(XiveTCTX *tctx, hwaddr offset, uint64_t value, */ if (size < 4 || !mask || ring_offset == TM_QW0_USER) { qemu_log_mask(LOG_GUEST_ERROR, "XIVE: invalid write access at TIMA @%" - HWADDR_PRIx"\n", offset); + HWADDR_PRIx" size %d\n", offset, size); return; } @@ -357,7 +486,7 @@ static uint64_t xive_tm_raw_read(XiveTCTX *tctx, hwaddr offset, unsigned size) */ if (size < 4 || !mask || ring_offset == TM_QW0_USER) { qemu_log_mask(LOG_GUEST_ERROR, "XIVE: invalid read access at TIMA @%" - HWADDR_PRIx"\n", offset); + HWADDR_PRIx" size %d\n", offset, size); return -1; } @@ -403,6 +532,12 @@ static void xive_tm_set_os_lgs(XivePresenter *xptr, XiveTCTX *tctx, xive_tctx_set_lgs(tctx, TM_QW1_OS, value & 0xff); } +static void xive_tm_set_pool_lgs(XivePresenter *xptr, XiveTCTX *tctx, + hwaddr offset, uint64_t value, unsigned size) +{ + xive_tctx_set_lgs(tctx, TM_QW2_HV_POOL, value & 0xff); +} + /* * Adjust the PIPR to allow a CPU to process event queues of other * priorities during one physical interrupt cycle. @@ -410,7 +545,12 @@ static void xive_tm_set_os_lgs(XivePresenter *xptr, XiveTCTX *tctx, static void xive_tm_set_os_pending(XivePresenter *xptr, XiveTCTX *tctx, hwaddr offset, uint64_t value, unsigned size) { - xive_tctx_pipr_update(tctx, TM_QW1_OS, value & 0xff, 0); + uint8_t ring = TM_QW1_OS; + uint8_t *regs = &tctx->regs[ring]; + + /* XXX: how should this work exactly? */ + regs[TM_IPB] |= xive_priority_to_ipb(value & 0xff); + xive_tctx_pipr_recompute_from_ipb(tctx, ring); } static void xive_os_cam_decode(uint32_t cam, uint8_t *nvt_blk, @@ -454,7 +594,7 @@ static uint64_t xive_tm_pull_os_ctx(XivePresenter *xptr, XiveTCTX *tctx, qw1w2 = xive_tctx_get_os_cam(tctx, &nvt_blk, &nvt_idx, &vo); if (!vo) { - qemu_log_mask(LOG_GUEST_ERROR, "XIVE: pulling invalid NVT %x/%x !?\n", + qemu_log_mask(LOG_GUEST_ERROR, "XIVE: pull invalid OS NVT %x/%x !?\n", nvt_blk, nvt_idx); } @@ -466,7 +606,7 @@ static uint64_t xive_tm_pull_os_ctx(XivePresenter *xptr, XiveTCTX *tctx, return qw1w2; } -static void xive_tctx_need_resend(XiveRouter *xrtr, XiveTCTX *tctx, +static void xive_tctx_restore_nvp(XiveRouter *xrtr, XiveTCTX *tctx, uint8_t nvt_blk, uint32_t nvt_idx) { XiveNVT nvt; @@ -492,16 +632,6 @@ static void xive_tctx_need_resend(XiveRouter *xrtr, XiveTCTX *tctx, uint8_t *regs = &tctx->regs[TM_QW1_OS]; regs[TM_IPB] |= ipb; } - - /* - * Always call xive_tctx_pipr_update(). Even if there were no - * escalation triggered, there could be a pending interrupt which - * was saved when the context was pulled and that we need to take - * into account by recalculating the PIPR (which is not - * saved/restored). - * It will also raise the External interrupt signal if needed. - */ - xive_tctx_pipr_update(tctx, TM_QW1_OS, 0xFF, 0); /* fxb */ } /* @@ -523,7 +653,17 @@ static void xive_tm_push_os_ctx(XivePresenter *xptr, XiveTCTX *tctx, /* Check the interrupt pending bits */ if (vo) { - xive_tctx_need_resend(XIVE_ROUTER(xptr), tctx, nvt_blk, nvt_idx); + xive_tctx_restore_nvp(XIVE_ROUTER(xptr), tctx, nvt_blk, nvt_idx); + + /* + * Always call xive_tctx_recompute_from_ipb(). Even if there were no + * escalation triggered, there could be a pending interrupt which + * was saved when the context was pulled and that we need to take + * into account by recalculating the PIPR (which is not + * saved/restored). + * It will also raise the External interrupt signal if needed. + */ + xive_tctx_pipr_recompute_from_ipb(tctx, TM_QW1_OS); /* fxb */ } } @@ -542,6 +682,8 @@ typedef struct XiveTmOp { uint8_t page_offset; uint32_t op_offset; unsigned size; + bool hw_ok; + bool sw_ok; void (*write_handler)(XivePresenter *xptr, XiveTCTX *tctx, hwaddr offset, uint64_t value, unsigned size); @@ -554,34 +696,34 @@ static const XiveTmOp xive_tm_operations[] = { * MMIOs below 2K : raw values and special operations without side * effects */ - { XIVE_TM_OS_PAGE, TM_QW1_OS + TM_CPPR, 1, xive_tm_set_os_cppr, - NULL }, - { XIVE_TM_HV_PAGE, TM_QW1_OS + TM_WORD2, 4, xive_tm_push_os_ctx, - NULL }, - { XIVE_TM_HV_PAGE, TM_QW3_HV_PHYS + TM_CPPR, 1, xive_tm_set_hv_cppr, - NULL }, - { XIVE_TM_HV_PAGE, TM_QW3_HV_PHYS + TM_WORD2, 1, xive_tm_vt_push, - NULL }, - { XIVE_TM_HV_PAGE, TM_QW3_HV_PHYS + TM_WORD2, 1, NULL, - xive_tm_vt_poll }, + { XIVE_TM_OS_PAGE, TM_QW1_OS + TM_CPPR, 1, true, true, + xive_tm_set_os_cppr, NULL }, + { XIVE_TM_HV_PAGE, TM_QW1_OS + TM_WORD2, 4, true, true, + xive_tm_push_os_ctx, NULL }, + { XIVE_TM_HV_PAGE, TM_QW3_HV_PHYS + TM_CPPR, 1, true, true, + xive_tm_set_hv_cppr, NULL }, + { XIVE_TM_HV_PAGE, TM_QW3_HV_PHYS + TM_WORD2, 1, false, true, + xive_tm_vt_push, NULL }, + { XIVE_TM_HV_PAGE, TM_QW3_HV_PHYS + TM_WORD2, 1, true, true, + NULL, xive_tm_vt_poll }, /* MMIOs above 2K : special operations with side effects */ - { XIVE_TM_OS_PAGE, TM_SPC_ACK_OS_REG, 2, NULL, - xive_tm_ack_os_reg }, - { XIVE_TM_OS_PAGE, TM_SPC_SET_OS_PENDING, 1, xive_tm_set_os_pending, - NULL }, - { XIVE_TM_HV_PAGE, TM_SPC_PULL_OS_CTX, 4, NULL, - xive_tm_pull_os_ctx }, - { XIVE_TM_HV_PAGE, TM_SPC_PULL_OS_CTX, 8, NULL, - xive_tm_pull_os_ctx }, - { XIVE_TM_HV_PAGE, TM_SPC_ACK_HV_REG, 2, NULL, - xive_tm_ack_hv_reg }, - { XIVE_TM_HV_PAGE, TM_SPC_PULL_POOL_CTX, 4, NULL, - xive_tm_pull_pool_ctx }, - { XIVE_TM_HV_PAGE, TM_SPC_PULL_POOL_CTX, 8, NULL, - xive_tm_pull_pool_ctx }, - { XIVE_TM_HV_PAGE, TM_SPC_PULL_PHYS_CTX, 1, NULL, - xive_tm_pull_phys_ctx }, + { XIVE_TM_OS_PAGE, TM_SPC_ACK_OS_REG, 2, true, false, + NULL, xive_tm_ack_os_reg }, + { XIVE_TM_OS_PAGE, TM_SPC_SET_OS_PENDING, 1, true, false, + xive_tm_set_os_pending, NULL }, + { XIVE_TM_HV_PAGE, TM_SPC_PULL_OS_CTX, 4, true, false, + NULL, xive_tm_pull_os_ctx }, + { XIVE_TM_HV_PAGE, TM_SPC_PULL_OS_CTX, 8, true, false, + NULL, xive_tm_pull_os_ctx }, + { XIVE_TM_HV_PAGE, TM_SPC_ACK_HV_REG, 2, true, false, + NULL, xive_tm_ack_hv_reg }, + { XIVE_TM_HV_PAGE, TM_SPC_PULL_POOL_CTX, 4, true, false, + NULL, xive_tm_pull_pool_ctx }, + { XIVE_TM_HV_PAGE, TM_SPC_PULL_POOL_CTX, 8, true, false, + NULL, xive_tm_pull_pool_ctx }, + { XIVE_TM_HV_PAGE, TM_SPC_PULL_PHYS_CTX, 1, true, false, + NULL, xive_tm_pull_phys_ctx }, }; static const XiveTmOp xive2_tm_operations[] = { @@ -589,50 +731,58 @@ static const XiveTmOp xive2_tm_operations[] = { * MMIOs below 2K : raw values and special operations without side * effects */ - { XIVE_TM_OS_PAGE, TM_QW1_OS + TM_CPPR, 1, xive2_tm_set_os_cppr, - NULL }, - { XIVE_TM_HV_PAGE, TM_QW1_OS + TM_WORD2, 4, xive2_tm_push_os_ctx, - NULL }, - { XIVE_TM_HV_PAGE, TM_QW1_OS + TM_WORD2, 8, xive2_tm_push_os_ctx, - NULL }, - { XIVE_TM_OS_PAGE, TM_QW1_OS + TM_LGS, 1, xive_tm_set_os_lgs, - NULL }, - { XIVE_TM_HV_PAGE, TM_QW3_HV_PHYS + TM_CPPR, 1, xive2_tm_set_hv_cppr, - NULL }, - { XIVE_TM_HV_PAGE, TM_QW3_HV_PHYS + TM_WORD2, 1, xive_tm_vt_push, - NULL }, - { XIVE_TM_HV_PAGE, TM_QW3_HV_PHYS + TM_WORD2, 1, NULL, - xive_tm_vt_poll }, - { XIVE_TM_HV_PAGE, TM_QW3_HV_PHYS + TM_T, 1, xive2_tm_set_hv_target, - NULL }, + { XIVE_TM_OS_PAGE, TM_QW1_OS + TM_CPPR, 1, true, true, + xive2_tm_set_os_cppr, NULL }, + { XIVE_TM_HV_PAGE, TM_QW1_OS + TM_WORD2, 4, true, true, + xive2_tm_push_os_ctx, NULL }, + { XIVE_TM_HV_PAGE, TM_QW1_OS + TM_WORD2, 8, true, true, + xive2_tm_push_os_ctx, NULL }, + { XIVE_TM_OS_PAGE, TM_QW1_OS + TM_LGS, 1, true, true, + xive_tm_set_os_lgs, NULL }, + { XIVE_TM_HV_PAGE, TM_QW2_HV_POOL + TM_WORD2, 4, true, true, + xive2_tm_push_pool_ctx, NULL }, + { XIVE_TM_HV_PAGE, TM_QW2_HV_POOL + TM_WORD2, 8, true, true, + xive2_tm_push_pool_ctx, NULL }, + { XIVE_TM_HV_PAGE, TM_QW2_HV_POOL + TM_LGS, 1, true, true, + xive_tm_set_pool_lgs, NULL }, + { XIVE_TM_HV_PAGE, TM_QW3_HV_PHYS + TM_CPPR, 1, true, true, + xive2_tm_set_hv_cppr, NULL }, + { XIVE_TM_HV_PAGE, TM_QW3_HV_PHYS + TM_WORD2, 1, false, true, + xive2_tm_push_phys_ctx, NULL }, + { XIVE_TM_HV_PAGE, TM_QW3_HV_PHYS + TM_WORD2, 1, true, true, + NULL, xive_tm_vt_poll }, + { XIVE_TM_HV_PAGE, TM_QW3_HV_PHYS + TM_T, 1, true, true, + xive2_tm_set_hv_target, NULL }, /* MMIOs above 2K : special operations with side effects */ - { XIVE_TM_OS_PAGE, TM_SPC_ACK_OS_REG, 2, NULL, - xive_tm_ack_os_reg }, - { XIVE_TM_OS_PAGE, TM_SPC_SET_OS_PENDING, 1, xive_tm_set_os_pending, - NULL }, - { XIVE_TM_HV_PAGE, TM_SPC_PULL_OS_CTX_G2, 4, NULL, - xive2_tm_pull_os_ctx }, - { XIVE_TM_HV_PAGE, TM_SPC_PULL_OS_CTX, 4, NULL, - xive2_tm_pull_os_ctx }, - { XIVE_TM_HV_PAGE, TM_SPC_PULL_OS_CTX, 8, NULL, - xive2_tm_pull_os_ctx }, - { XIVE_TM_HV_PAGE, TM_SPC_ACK_HV_REG, 2, NULL, - xive_tm_ack_hv_reg }, - { XIVE_TM_HV_PAGE, TM_SPC_PULL_POOL_CTX_G2, 4, NULL, - xive_tm_pull_pool_ctx }, - { XIVE_TM_HV_PAGE, TM_SPC_PULL_POOL_CTX, 4, NULL, - xive_tm_pull_pool_ctx }, - { XIVE_TM_HV_PAGE, TM_SPC_PULL_POOL_CTX, 8, NULL, - xive_tm_pull_pool_ctx }, - { XIVE_TM_HV_PAGE, TM_SPC_PULL_OS_CTX_OL, 1, xive2_tm_pull_os_ctx_ol, - NULL }, - { XIVE_TM_HV_PAGE, TM_SPC_PULL_PHYS_CTX_G2, 4, NULL, - xive_tm_pull_phys_ctx }, - { XIVE_TM_HV_PAGE, TM_SPC_PULL_PHYS_CTX, 1, NULL, - xive_tm_pull_phys_ctx }, - { XIVE_TM_HV_PAGE, TM_SPC_PULL_PHYS_CTX_OL, 1, xive2_tm_pull_phys_ctx_ol, - NULL }, + { XIVE_TM_OS_PAGE, TM_SPC_ACK_OS_REG, 2, true, false, + NULL, xive_tm_ack_os_reg }, + { XIVE_TM_OS_PAGE, TM_SPC_SET_OS_PENDING, 1, true, false, + xive2_tm_set_os_pending, NULL }, + { XIVE_TM_HV_PAGE, TM_SPC_PULL_OS_CTX_G2, 4, true, false, + NULL, xive2_tm_pull_os_ctx }, + { XIVE_TM_HV_PAGE, TM_SPC_PULL_OS_CTX, 4, true, false, + NULL, xive2_tm_pull_os_ctx }, + { XIVE_TM_HV_PAGE, TM_SPC_PULL_OS_CTX, 8, true, false, + NULL, xive2_tm_pull_os_ctx }, + { XIVE_TM_HV_PAGE, TM_SPC_ACK_HV_REG, 2, true, false, + NULL, xive_tm_ack_hv_reg }, + { XIVE_TM_HV_PAGE, TM_SPC_PULL_POOL_CTX_G2, 4, true, false, + NULL, xive2_tm_pull_pool_ctx }, + { XIVE_TM_HV_PAGE, TM_SPC_PULL_POOL_CTX, 4, true, false, + NULL, xive2_tm_pull_pool_ctx }, + { XIVE_TM_HV_PAGE, TM_SPC_PULL_POOL_CTX, 8, true, false, + NULL, xive2_tm_pull_pool_ctx }, + { XIVE_TM_HV_PAGE, TM_SPC_PULL_OS_CTX_OL, 1, true, false, + xive2_tm_pull_os_ctx_ol, NULL }, + { XIVE_TM_HV_PAGE, TM_SPC_PULL_PHYS_CTX_G2, 4, true, false, + NULL, xive2_tm_pull_phys_ctx }, + { XIVE_TM_HV_PAGE, TM_SPC_PULL_PHYS_CTX, 1, true, false, + NULL, xive2_tm_pull_phys_ctx }, + { XIVE_TM_HV_PAGE, TM_SPC_PULL_PHYS_CTX_OL, 1, true, false, + xive2_tm_pull_phys_ctx_ol, NULL }, + { XIVE_TM_OS_PAGE, TM_SPC_ACK_OS_EL, 1, true, false, + xive2_tm_ack_os_el, NULL }, }; static const XiveTmOp *xive_tm_find_op(XivePresenter *xptr, hwaddr offset, @@ -674,21 +824,31 @@ void xive_tctx_tm_write(XivePresenter *xptr, XiveTCTX *tctx, hwaddr offset, uint64_t value, unsigned size) { const XiveTmOp *xto; + uint8_t ring = offset & TM_RING_OFFSET; + bool is_valid = xive_ring_valid(tctx, ring); + bool hw_owned = is_valid; trace_xive_tctx_tm_write(tctx->cs->cpu_index, offset, size, value); /* - * TODO: check V bit in Q[0-3]W2 - */ - - /* * First, check for special operations in the 2K region */ + xto = xive_tm_find_op(tctx->xptr, offset, size, true); + if (xto) { + if (hw_owned && !xto->hw_ok) { + qemu_log_mask(LOG_GUEST_ERROR, "XIVE: undefined write to HW TIMA " + "@%"HWADDR_PRIx" size %d\n", offset, size); + } + if (!hw_owned && !xto->sw_ok) { + qemu_log_mask(LOG_GUEST_ERROR, "XIVE: undefined write to SW TIMA " + "@%"HWADDR_PRIx" size %d\n", offset, size); + } + } + if (offset & TM_SPECIAL_OP) { - xto = xive_tm_find_op(tctx->xptr, offset, size, true); if (!xto) { qemu_log_mask(LOG_GUEST_ERROR, "XIVE: invalid write access at TIMA " - "@%"HWADDR_PRIx"\n", offset); + "@%"HWADDR_PRIx" size %d\n", offset, size); } else { xto->write_handler(xptr, tctx, offset, value, size); } @@ -698,7 +858,6 @@ void xive_tctx_tm_write(XivePresenter *xptr, XiveTCTX *tctx, hwaddr offset, /* * Then, for special operations in the region below 2K. */ - xto = xive_tm_find_op(tctx->xptr, offset, size, true); if (xto) { xto->write_handler(xptr, tctx, offset, value, size); return; @@ -707,6 +866,11 @@ void xive_tctx_tm_write(XivePresenter *xptr, XiveTCTX *tctx, hwaddr offset, /* * Finish with raw access to the register values */ + if (hw_owned) { + /* Store context operations are dangerous when context is valid */ + qemu_log_mask(LOG_GUEST_ERROR, "XIVE: undefined write to HW TIMA " + "@%"HWADDR_PRIx" size %d\n", offset, size); + } xive_tm_raw_write(tctx, offset, value, size); } @@ -714,20 +878,30 @@ uint64_t xive_tctx_tm_read(XivePresenter *xptr, XiveTCTX *tctx, hwaddr offset, unsigned size) { const XiveTmOp *xto; + uint8_t ring = offset & TM_RING_OFFSET; + bool is_valid = xive_ring_valid(tctx, ring); + bool hw_owned = is_valid; uint64_t ret; - /* - * TODO: check V bit in Q[0-3]W2 - */ + xto = xive_tm_find_op(tctx->xptr, offset, size, false); + if (xto) { + if (hw_owned && !xto->hw_ok) { + qemu_log_mask(LOG_GUEST_ERROR, "XIVE: undefined read to HW TIMA " + "@%"HWADDR_PRIx" size %d\n", offset, size); + } + if (!hw_owned && !xto->sw_ok) { + qemu_log_mask(LOG_GUEST_ERROR, "XIVE: undefined read to SW TIMA " + "@%"HWADDR_PRIx" size %d\n", offset, size); + } + } /* * First, check for special operations in the 2K region */ if (offset & TM_SPECIAL_OP) { - xto = xive_tm_find_op(tctx->xptr, offset, size, false); if (!xto) { qemu_log_mask(LOG_GUEST_ERROR, "XIVE: invalid read access to TIMA" - "@%"HWADDR_PRIx"\n", offset); + "@%"HWADDR_PRIx" size %d\n", offset, size); return -1; } ret = xto->read_handler(xptr, tctx, offset, size); @@ -737,7 +911,6 @@ uint64_t xive_tctx_tm_read(XivePresenter *xptr, XiveTCTX *tctx, hwaddr offset, /* * Then, for special operations in the region below 2K. */ - xto = xive_tm_find_op(tctx->xptr, offset, size, false); if (xto) { ret = xto->read_handler(xptr, tctx, offset, size); goto out; @@ -1191,6 +1364,7 @@ static uint64_t xive_source_esb_read(void *opaque, hwaddr addr, unsigned size) /* Forward the source event notification for routing */ if (ret) { + trace_xive_source_notify(srcno); xive_source_notify(xsrc, srcno); } break; @@ -1286,6 +1460,8 @@ out: /* Forward the source event notification for routing */ if (notify) { xive_source_notify(xsrc, srcno); + } else { + trace_xive_source_blocked(srcno); } } @@ -1672,8 +1848,8 @@ uint32_t xive_get_vpgroup_size(uint32_t nvp_index) return 1U << (first_zero + 1); } -static uint8_t xive_get_group_level(bool crowd, bool ignore, - uint32_t nvp_blk, uint32_t nvp_index) +uint8_t xive_get_group_level(bool crowd, bool ignore, + uint32_t nvp_blk, uint32_t nvp_index) { int first_zero; uint8_t level; @@ -1791,15 +1967,14 @@ int xive_presenter_tctx_match(XivePresenter *xptr, XiveTCTX *tctx, * This is our simple Xive Presenter Engine model. It is merged in the * Router as it does not require an extra object. */ -bool xive_presenter_notify(XiveFabric *xfb, uint8_t format, +bool xive_presenter_match(XiveFabric *xfb, uint8_t format, uint8_t nvt_blk, uint32_t nvt_idx, bool crowd, bool cam_ignore, uint8_t priority, - uint32_t logic_serv, bool *precluded) + uint32_t logic_serv, XiveTCTXMatch *match) { XiveFabricClass *xfc = XIVE_FABRIC_GET_CLASS(xfb); - XiveTCTXMatch match = { .tctx = NULL, .ring = 0, .precluded = false }; - uint8_t group_level; - int count; + + memset(match, 0, sizeof(*match)); /* * Ask the machine to scan the interrupt controllers for a match. @@ -1824,22 +1999,8 @@ bool xive_presenter_notify(XiveFabric *xfb, uint8_t format, * a new command to the presenters (the equivalent of the "assign" * power bus command in the documented full notify sequence. */ - count = xfc->match_nvt(xfb, format, nvt_blk, nvt_idx, crowd, cam_ignore, - priority, logic_serv, &match); - if (count < 0) { - return false; - } - - /* handle CPU exception delivery */ - if (count) { - group_level = xive_get_group_level(crowd, cam_ignore, nvt_blk, nvt_idx); - trace_xive_presenter_notify(nvt_blk, nvt_idx, match.ring, group_level); - xive_tctx_pipr_update(match.tctx, match.ring, priority, group_level); - } else { - *precluded = match.precluded; - } - - return !!count; + return xfc->match_nvt(xfb, format, nvt_blk, nvt_idx, crowd, cam_ignore, + priority, logic_serv, match); } /* @@ -1876,7 +2037,7 @@ void xive_router_end_notify(XiveRouter *xrtr, XiveEAS *eas) uint8_t nvt_blk; uint32_t nvt_idx; XiveNVT nvt; - bool found, precluded; + XiveTCTXMatch match; uint8_t end_blk = xive_get_field64(EAS_END_BLOCK, eas->w); uint32_t end_idx = xive_get_field64(EAS_END_INDEX, eas->w); @@ -1956,16 +2117,16 @@ void xive_router_end_notify(XiveRouter *xrtr, XiveEAS *eas) return; } - found = xive_presenter_notify(xrtr->xfb, format, nvt_blk, nvt_idx, - false /* crowd */, - xive_get_field32(END_W7_F0_IGNORE, end.w7), - priority, - xive_get_field32(END_W7_F1_LOG_SERVER_ID, end.w7), - &precluded); - /* we don't support VP-group notification on P9, so precluded is not used */ /* TODO: Auto EOI. */ - - if (found) { + /* we don't support VP-group notification on P9, so precluded is not used */ + if (xive_presenter_match(xrtr->xfb, format, nvt_blk, nvt_idx, + false /* crowd */, + xive_get_field32(END_W7_F0_IGNORE, end.w7), + priority, + xive_get_field32(END_W7_F1_LOG_SERVER_ID, end.w7), + &match)) { + trace_xive_presenter_notify(nvt_blk, nvt_idx, match.ring, 0); + xive_tctx_pipr_present(match.tctx, match.ring, priority, 0); return; } diff --git a/hw/intc/xive2.c b/hw/intc/xive2.c index a08cf90..ee5fa26 100644 --- a/hw/intc/xive2.c +++ b/hw/intc/xive2.c @@ -19,6 +19,13 @@ #include "hw/ppc/xive2_regs.h" #include "trace.h" +static void xive2_router_end_notify(Xive2Router *xrtr, uint8_t end_blk, + uint32_t end_idx, uint32_t end_data, + bool redistribute); + +static int xive2_tctx_get_nvp_indexes(XiveTCTX *tctx, uint8_t ring, + uint8_t *nvp_blk, uint32_t *nvp_idx); + uint32_t xive2_router_get_config(Xive2Router *xrtr) { Xive2RouterClass *xrc = XIVE2_ROUTER_GET_CLASS(xrtr); @@ -188,12 +195,27 @@ void xive2_eas_pic_print_info(Xive2Eas *eas, uint32_t lisn, GString *buf) (uint32_t) xive_get_field64(EAS2_END_DATA, eas->w)); } +#define XIVE2_QSIZE_CHUNK_CL 128 +#define XIVE2_QSIZE_CHUNK_4k 4096 +/* Calculate max number of queue entries for an END */ +static uint32_t xive2_end_get_qentries(Xive2End *end) +{ + uint32_t w3 = end->w3; + uint32_t qsize = xive_get_field32(END2_W3_QSIZE, w3); + if (xive_get_field32(END2_W3_CL, w3)) { + g_assert(qsize <= 4); + return (XIVE2_QSIZE_CHUNK_CL << qsize) / sizeof(uint32_t); + } else { + g_assert(qsize <= 12); + return (XIVE2_QSIZE_CHUNK_4k << qsize) / sizeof(uint32_t); + } +} + void xive2_end_queue_pic_print_info(Xive2End *end, uint32_t width, GString *buf) { uint64_t qaddr_base = xive2_end_qaddr(end); - uint32_t qsize = xive_get_field32(END2_W3_QSIZE, end->w3); uint32_t qindex = xive_get_field32(END2_W1_PAGE_OFF, end->w1); - uint32_t qentries = 1 << (qsize + 10); + uint32_t qentries = xive2_end_get_qentries(end); int i; /* @@ -223,8 +245,7 @@ void xive2_end_pic_print_info(Xive2End *end, uint32_t end_idx, GString *buf) uint64_t qaddr_base = xive2_end_qaddr(end); uint32_t qindex = xive_get_field32(END2_W1_PAGE_OFF, end->w1); uint32_t qgen = xive_get_field32(END2_W1_GENERATION, end->w1); - uint32_t qsize = xive_get_field32(END2_W3_QSIZE, end->w3); - uint32_t qentries = 1 << (qsize + 10); + uint32_t qentries = xive2_end_get_qentries(end); uint32_t nvx_blk = xive_get_field32(END2_W6_VP_BLOCK, end->w6); uint32_t nvx_idx = xive_get_field32(END2_W6_VP_OFFSET, end->w6); @@ -341,13 +362,12 @@ void xive2_nvgc_pic_print_info(Xive2Nvgc *nvgc, uint32_t nvgc_idx, GString *buf) static void xive2_end_enqueue(Xive2End *end, uint32_t data) { uint64_t qaddr_base = xive2_end_qaddr(end); - uint32_t qsize = xive_get_field32(END2_W3_QSIZE, end->w3); uint32_t qindex = xive_get_field32(END2_W1_PAGE_OFF, end->w1); uint32_t qgen = xive_get_field32(END2_W1_GENERATION, end->w1); uint64_t qaddr = qaddr_base + (qindex << 2); uint32_t qdata = cpu_to_be32((qgen << 31) | (data & 0x7fffffff)); - uint32_t qentries = 1 << (qsize + 10); + uint32_t qentries = xive2_end_get_qentries(end); if (dma_memory_write(&address_space_memory, qaddr, &qdata, sizeof(qdata), MEMTXATTRS_UNSPECIFIED)) { @@ -361,8 +381,8 @@ static void xive2_end_enqueue(Xive2End *end, uint32_t data) qgen ^= 1; end->w1 = xive_set_field32(END2_W1_GENERATION, end->w1, qgen); - /* TODO(PowerNV): reset GF bit on a cache watch operation */ - end->w1 = xive_set_field32(END2_W1_GEN_FLIPPED, end->w1, qgen); + /* Set gen flipped to 1, it gets reset on a cache watch operation */ + end->w1 = xive_set_field32(END2_W1_GEN_FLIPPED, end->w1, 1); } end->w1 = xive_set_field32(END2_W1_PAGE_OFF, end->w1, qindex); } @@ -492,12 +512,13 @@ static void xive2_presenter_backlog_decr(XivePresenter *xptr, */ static void xive2_tctx_save_ctx(Xive2Router *xrtr, XiveTCTX *tctx, - uint8_t nvp_blk, uint32_t nvp_idx, - uint8_t ring) + uint8_t ring, + uint8_t nvp_blk, uint32_t nvp_idx) { CPUPPCState *env = &POWERPC_CPU(tctx->cs)->env; uint32_t pir = env->spr_cb[SPR_PIR].default_value; Xive2Nvp nvp; + uint8_t *sig_regs = xive_tctx_signal_regs(tctx, ring); uint8_t *regs = &tctx->regs[ring]; if (xive2_router_get_nvp(xrtr, nvp_blk, nvp_idx, &nvp)) { @@ -533,7 +554,14 @@ static void xive2_tctx_save_ctx(Xive2Router *xrtr, XiveTCTX *tctx, } nvp.w2 = xive_set_field32(NVP2_W2_IPB, nvp.w2, regs[TM_IPB]); - nvp.w2 = xive_set_field32(NVP2_W2_CPPR, nvp.w2, regs[TM_CPPR]); + + if ((nvp.w0 & NVP2_W0_P) || ring != TM_QW2_HV_POOL) { + /* + * Non-pool contexts always save CPPR (ignore p bit). XXX: Clarify + * whether that is the correct behaviour. + */ + nvp.w2 = xive_set_field32(NVP2_W2_CPPR, nvp.w2, sig_regs[TM_CPPR]); + } if (nvp.w0 & NVP2_W0_L) { /* * Typically not used. If LSMFB is restored with 0, it will @@ -555,6 +583,7 @@ static void xive2_tctx_save_ctx(Xive2Router *xrtr, XiveTCTX *tctx, xive2_router_write_nvp(xrtr, nvp_blk, nvp_idx, &nvp, 1); } +/* POOL cam is the same as OS cam encoding */ static void xive2_cam_decode(uint32_t cam, uint8_t *nvp_blk, uint32_t *nvp_idx, bool *valid, bool *hw) { @@ -584,6 +613,79 @@ static uint32_t xive2_tctx_hw_cam_line(XivePresenter *xptr, XiveTCTX *tctx) return xive2_nvp_cam_line(blk, 1 << tid_shift | (pir & tid_mask)); } +static void xive2_redistribute(Xive2Router *xrtr, XiveTCTX *tctx, uint8_t ring) +{ + uint8_t *sig_regs = xive_tctx_signal_regs(tctx, ring); + uint8_t nsr = sig_regs[TM_NSR]; + uint8_t pipr = sig_regs[TM_PIPR]; + uint8_t crowd = NVx_CROWD_LVL(nsr); + uint8_t group = NVx_GROUP_LVL(nsr); + uint8_t nvgc_blk, end_blk, nvp_blk; + uint32_t nvgc_idx, end_idx, nvp_idx; + Xive2Nvgc nvgc; + uint8_t prio_limit; + uint32_t cfg; + + /* redistribution is only for group/crowd interrupts */ + if (!xive_nsr_indicates_group_exception(ring, nsr)) { + return; + } + + /* Don't check return code since ring is expected to be invalidated */ + xive2_tctx_get_nvp_indexes(tctx, ring, &nvp_blk, &nvp_idx); + + trace_xive_redistribute(tctx->cs->cpu_index, ring, nvp_blk, nvp_idx); + + trace_xive_redistribute(tctx->cs->cpu_index, ring, nvp_blk, nvp_idx); + /* convert crowd/group to blk/idx */ + if (group > 0) { + nvgc_idx = (nvp_idx & (0xffffffff << group)) | + ((1 << (group - 1)) - 1); + } else { + nvgc_idx = nvp_idx; + } + + if (crowd > 0) { + crowd = (crowd == 3) ? 4 : crowd; + nvgc_blk = (nvp_blk & (0xffffffff << crowd)) | + ((1 << (crowd - 1)) - 1); + } else { + nvgc_blk = nvp_blk; + } + + /* Use blk/idx to retrieve the NVGC */ + if (xive2_router_get_nvgc(xrtr, crowd, nvgc_blk, nvgc_idx, &nvgc)) { + qemu_log_mask(LOG_GUEST_ERROR, "XIVE: no %s %x/%x\n", + crowd ? "NVC" : "NVG", nvgc_blk, nvgc_idx); + return; + } + + /* retrieve the END blk/idx from the NVGC */ + end_blk = xive_get_field32(NVGC2_W1_END_BLK, nvgc.w1); + end_idx = xive_get_field32(NVGC2_W1_END_IDX, nvgc.w1); + + /* determine number of priorities being used */ + cfg = xive2_router_get_config(xrtr); + if (cfg & XIVE2_EN_VP_GRP_PRIORITY) { + prio_limit = 1 << GETFIELD(NVGC2_W1_PSIZE, nvgc.w1); + } else { + prio_limit = 1 << GETFIELD(XIVE2_VP_INT_PRIO, cfg); + } + + /* add priority offset to end index */ + end_idx += pipr % prio_limit; + + /* trigger the group END */ + xive2_router_end_notify(xrtr, end_blk, end_idx, 0, true); + + /* clear interrupt indication for the context */ + sig_regs[TM_NSR] = 0; + sig_regs[TM_PIPR] = sig_regs[TM_CPPR]; + xive_tctx_reset_signal(tctx, ring); +} + +static void xive2_tctx_process_pending(XiveTCTX *tctx, uint8_t sig_ring); + static uint64_t xive2_tm_pull_ctx(XivePresenter *xptr, XiveTCTX *tctx, hwaddr offset, unsigned size, uint8_t ring) { @@ -595,10 +697,11 @@ static uint64_t xive2_tm_pull_ctx(XivePresenter *xptr, XiveTCTX *tctx, uint8_t cur_ring; bool valid; bool do_save; + uint8_t nsr; xive2_cam_decode(cam, &nvp_blk, &nvp_idx, &valid, &do_save); - if (!valid) { + if (xive2_tctx_get_nvp_indexes(tctx, ring, &nvp_blk, &nvp_idx)) { qemu_log_mask(LOG_GUEST_ERROR, "XIVE: pulling invalid NVP %x/%x !?\n", nvp_blk, nvp_idx); } @@ -608,21 +711,53 @@ static uint64_t xive2_tm_pull_ctx(XivePresenter *xptr, XiveTCTX *tctx, cur_ring += XIVE_TM_RING_SIZE) { uint32_t ringw2 = xive_tctx_word2(&tctx->regs[cur_ring]); uint32_t ringw2_new = xive_set_field32(TM2_QW1W2_VO, ringw2, 0); + bool is_valid = !!(xive_get_field32(TM2_QW1W2_VO, ringw2)); + uint8_t *sig_regs; + memcpy(&tctx->regs[cur_ring + TM_WORD2], &ringw2_new, 4); + + /* Skip the rest for USER or invalid contexts */ + if ((cur_ring == TM_QW0_USER) || !is_valid) { + continue; + } + + /* Active group/crowd interrupts need to be redistributed */ + sig_regs = xive_tctx_signal_regs(tctx, ring); + nsr = sig_regs[TM_NSR]; + if (xive_nsr_indicates_group_exception(cur_ring, nsr)) { + /* Ensure ring matches NSR (for HV NSR POOL vs PHYS rings) */ + if (cur_ring == xive_nsr_exception_ring(cur_ring, nsr)) { + xive2_redistribute(xrtr, tctx, cur_ring); + } + } + + /* + * Lower external interrupt line of requested ring and below except for + * USER, which doesn't exist. + */ + if (xive_nsr_indicates_exception(cur_ring, nsr)) { + if (cur_ring == xive_nsr_exception_ring(cur_ring, nsr)) { + xive_tctx_reset_signal(tctx, cur_ring); + } + } } - if (xive2_router_get_config(xrtr) & XIVE2_VP_SAVE_RESTORE && do_save) { - xive2_tctx_save_ctx(xrtr, tctx, nvp_blk, nvp_idx, ring); + if (ring == TM_QW2_HV_POOL) { + /* Re-check phys for interrupts if pool was disabled */ + nsr = tctx->regs[TM_QW3_HV_PHYS + TM_NSR]; + if (xive_nsr_indicates_exception(TM_QW3_HV_PHYS, nsr)) { + /* Ring must be PHYS because POOL would have been redistributed */ + g_assert(xive_nsr_exception_ring(TM_QW3_HV_PHYS, nsr) == + TM_QW3_HV_PHYS); + } else { + xive2_tctx_process_pending(tctx, TM_QW3_HV_PHYS); + } } - /* - * Lower external interrupt line of requested ring and below except for - * USER, which doesn't exist. - */ - for (cur_ring = TM_QW1_OS; cur_ring <= ring; - cur_ring += XIVE_TM_RING_SIZE) { - xive_tctx_reset_signal(tctx, cur_ring); + if (xive2_router_get_config(xrtr) & XIVE2_VP_SAVE_RESTORE && do_save) { + xive2_tctx_save_ctx(xrtr, tctx, ring, nvp_blk, nvp_idx); } + return target_ringw2; } @@ -632,6 +767,18 @@ uint64_t xive2_tm_pull_os_ctx(XivePresenter *xptr, XiveTCTX *tctx, return xive2_tm_pull_ctx(xptr, tctx, offset, size, TM_QW1_OS); } +uint64_t xive2_tm_pull_pool_ctx(XivePresenter *xptr, XiveTCTX *tctx, + hwaddr offset, unsigned size) +{ + return xive2_tm_pull_ctx(xptr, tctx, offset, size, TM_QW2_HV_POOL); +} + +uint64_t xive2_tm_pull_phys_ctx(XivePresenter *xptr, XiveTCTX *tctx, + hwaddr offset, unsigned size) +{ + return xive2_tm_pull_ctx(xptr, tctx, offset, size, TM_QW3_HV_PHYS); +} + #define REPORT_LINE_GEN1_SIZE 16 static void xive2_tm_report_line_gen1(XiveTCTX *tctx, uint8_t *data, @@ -741,12 +888,15 @@ void xive2_tm_pull_phys_ctx_ol(XivePresenter *xptr, XiveTCTX *tctx, xive2_tm_pull_ctx_ol(xptr, tctx, offset, value, size, TM_QW3_HV_PHYS); } -static uint8_t xive2_tctx_restore_os_ctx(Xive2Router *xrtr, XiveTCTX *tctx, - uint8_t nvp_blk, uint32_t nvp_idx, - Xive2Nvp *nvp) +static uint8_t xive2_tctx_restore_ctx(Xive2Router *xrtr, XiveTCTX *tctx, + uint8_t ring, + uint8_t nvp_blk, uint32_t nvp_idx, + Xive2Nvp *nvp) { CPUPPCState *env = &POWERPC_CPU(tctx->cs)->env; uint32_t pir = env->spr_cb[SPR_PIR].default_value; + uint8_t *sig_regs = xive_tctx_signal_regs(tctx, ring); + uint8_t *regs = &tctx->regs[ring]; uint8_t cppr; if (!xive2_nvp_is_hw(nvp)) { @@ -759,10 +909,10 @@ static uint8_t xive2_tctx_restore_os_ctx(Xive2Router *xrtr, XiveTCTX *tctx, nvp->w2 = xive_set_field32(NVP2_W2_CPPR, nvp->w2, 0); xive2_router_write_nvp(xrtr, nvp_blk, nvp_idx, nvp, 2); - tctx->regs[TM_QW1_OS + TM_CPPR] = cppr; - tctx->regs[TM_QW1_OS + TM_LSMFB] = xive_get_field32(NVP2_W2_LSMFB, nvp->w2); - tctx->regs[TM_QW1_OS + TM_LGS] = xive_get_field32(NVP2_W2_LGS, nvp->w2); - tctx->regs[TM_QW1_OS + TM_T] = xive_get_field32(NVP2_W2_T, nvp->w2); + sig_regs[TM_CPPR] = cppr; + regs[TM_LSMFB] = xive_get_field32(NVP2_W2_LSMFB, nvp->w2); + regs[TM_LGS] = xive_get_field32(NVP2_W2_LGS, nvp->w2); + regs[TM_T] = xive_get_field32(NVP2_W2_T, nvp->w2); nvp->w1 = xive_set_field32(NVP2_W1_CO, nvp->w1, 1); nvp->w1 = xive_set_field32(NVP2_W1_CO_THRID_VALID, nvp->w1, 1); @@ -771,9 +921,18 @@ static uint8_t xive2_tctx_restore_os_ctx(Xive2Router *xrtr, XiveTCTX *tctx, /* * Checkout privilege: 0:OS, 1:Pool, 2:Hard * - * TODO: we only support OS push/pull + * TODO: we don't support hard push/pull */ - nvp->w1 = xive_set_field32(NVP2_W1_CO_PRIV, nvp->w1, 0); + switch (ring) { + case TM_QW1_OS: + nvp->w1 = xive_set_field32(NVP2_W1_CO_PRIV, nvp->w1, 0); + break; + case TM_QW2_HV_POOL: + nvp->w1 = xive_set_field32(NVP2_W1_CO_PRIV, nvp->w1, 1); + break; + default: + g_assert_not_reached(); + } xive2_router_write_nvp(xrtr, nvp_blk, nvp_idx, nvp, 1); @@ -781,18 +940,14 @@ static uint8_t xive2_tctx_restore_os_ctx(Xive2Router *xrtr, XiveTCTX *tctx, return cppr; } -static void xive2_tctx_need_resend(Xive2Router *xrtr, XiveTCTX *tctx, +/* Restore TIMA VP context from NVP backlog */ +static void xive2_tctx_restore_nvp(Xive2Router *xrtr, XiveTCTX *tctx, + uint8_t ring, uint8_t nvp_blk, uint32_t nvp_idx, bool do_restore) { - XivePresenter *xptr = XIVE_PRESENTER(xrtr); + uint8_t *regs = &tctx->regs[ring]; uint8_t ipb; - uint8_t backlog_level; - uint8_t group_level; - uint8_t first_group; - uint8_t backlog_prio; - uint8_t group_prio; - uint8_t *regs = &tctx->regs[TM_QW1_OS]; Xive2Nvp nvp; /* @@ -812,9 +967,8 @@ static void xive2_tctx_need_resend(Xive2Router *xrtr, XiveTCTX *tctx, } /* Automatically restore thread context registers */ - if (xive2_router_get_config(xrtr) & XIVE2_VP_SAVE_RESTORE && - do_restore) { - xive2_tctx_restore_os_ctx(xrtr, tctx, nvp_blk, nvp_idx, &nvp); + if (xive2_router_get_config(xrtr) & XIVE2_VP_SAVE_RESTORE && do_restore) { + xive2_tctx_restore_ctx(xrtr, tctx, ring, nvp_blk, nvp_idx, &nvp); } ipb = xive_get_field32(NVP2_W2_IPB, nvp.w2); @@ -822,143 +976,230 @@ static void xive2_tctx_need_resend(Xive2Router *xrtr, XiveTCTX *tctx, nvp.w2 = xive_set_field32(NVP2_W2_IPB, nvp.w2, 0); xive2_router_write_nvp(xrtr, nvp_blk, nvp_idx, &nvp, 2); } + /* IPB bits in the backlog are merged with the TIMA IPB bits */ regs[TM_IPB] |= ipb; - backlog_prio = xive_ipb_to_pipr(ipb); - backlog_level = 0; - - first_group = xive_get_field32(NVP2_W0_PGOFIRST, nvp.w0); - if (first_group && regs[TM_LSMFB] < backlog_prio) { - group_prio = xive2_presenter_backlog_scan(xptr, nvp_blk, nvp_idx, - first_group, &group_level); - regs[TM_LSMFB] = group_prio; - if (regs[TM_LGS] && group_prio < backlog_prio) { - /* VP can take a group interrupt */ - xive2_presenter_backlog_decr(xptr, nvp_blk, nvp_idx, - group_prio, group_level); - backlog_prio = group_prio; - backlog_level = group_level; - } - } - - /* - * Compute the PIPR based on the restored state. - * It will raise the External interrupt signal if needed. - */ - xive_tctx_pipr_update(tctx, TM_QW1_OS, backlog_prio, backlog_level); } /* - * Updating the OS CAM line can trigger a resend of interrupt + * Updating the ring CAM line can trigger a resend of interrupt */ -void xive2_tm_push_os_ctx(XivePresenter *xptr, XiveTCTX *tctx, - hwaddr offset, uint64_t value, unsigned size) +static void xive2_tm_push_ctx(XivePresenter *xptr, XiveTCTX *tctx, + hwaddr offset, uint64_t value, unsigned size, + uint8_t ring) { uint32_t cam; - uint32_t qw1w2; - uint64_t qw1dw1; + uint32_t w2; + uint64_t dw1; uint8_t nvp_blk; uint32_t nvp_idx; - bool vo; + bool v; bool do_restore; + if (xive_ring_valid(tctx, ring)) { + qemu_log_mask(LOG_GUEST_ERROR, "XIVE: Attempt to push VP to enabled" + " ring 0x%02x\n", ring); + return; + } + /* First update the thead context */ switch (size) { + case 1: + tctx->regs[ring + TM_WORD2] = value & 0xff; + cam = xive2_tctx_hw_cam_line(xptr, tctx); + cam |= ((value & 0xc0) << 24); /* V and H bits */ + break; case 4: cam = value; - qw1w2 = cpu_to_be32(cam); - memcpy(&tctx->regs[TM_QW1_OS + TM_WORD2], &qw1w2, 4); + w2 = cpu_to_be32(cam); + memcpy(&tctx->regs[ring + TM_WORD2], &w2, 4); break; case 8: cam = value >> 32; - qw1dw1 = cpu_to_be64(value); - memcpy(&tctx->regs[TM_QW1_OS + TM_WORD2], &qw1dw1, 8); + dw1 = cpu_to_be64(value); + memcpy(&tctx->regs[ring + TM_WORD2], &dw1, 8); break; default: g_assert_not_reached(); } - xive2_cam_decode(cam, &nvp_blk, &nvp_idx, &vo, &do_restore); + xive2_cam_decode(cam, &nvp_blk, &nvp_idx, &v, &do_restore); /* Check the interrupt pending bits */ - if (vo) { - xive2_tctx_need_resend(XIVE2_ROUTER(xptr), tctx, nvp_blk, nvp_idx, - do_restore); + if (v) { + Xive2Router *xrtr = XIVE2_ROUTER(xptr); + uint8_t cur_ring; + + xive2_tctx_restore_nvp(xrtr, tctx, ring, + nvp_blk, nvp_idx, do_restore); + + for (cur_ring = TM_QW1_OS; cur_ring <= ring; + cur_ring += XIVE_TM_RING_SIZE) { + uint8_t *sig_regs = xive_tctx_signal_regs(tctx, cur_ring); + uint8_t nsr = sig_regs[TM_NSR]; + + if (!xive_ring_valid(tctx, cur_ring)) { + continue; + } + + if (cur_ring == TM_QW2_HV_POOL) { + if (xive_nsr_indicates_exception(cur_ring, nsr)) { + g_assert(xive_nsr_exception_ring(cur_ring, nsr) == + TM_QW3_HV_PHYS); + xive2_redistribute(xrtr, tctx, + xive_nsr_exception_ring(ring, nsr)); + } + xive2_tctx_process_pending(tctx, TM_QW3_HV_PHYS); + break; + } + xive2_tctx_process_pending(tctx, cur_ring); + } } } +void xive2_tm_push_os_ctx(XivePresenter *xptr, XiveTCTX *tctx, + hwaddr offset, uint64_t value, unsigned size) +{ + xive2_tm_push_ctx(xptr, tctx, offset, value, size, TM_QW1_OS); +} + +void xive2_tm_push_pool_ctx(XivePresenter *xptr, XiveTCTX *tctx, + hwaddr offset, uint64_t value, unsigned size) +{ + xive2_tm_push_ctx(xptr, tctx, offset, value, size, TM_QW2_HV_POOL); +} + +void xive2_tm_push_phys_ctx(XivePresenter *xptr, XiveTCTX *tctx, + hwaddr offset, uint64_t value, unsigned size) +{ + xive2_tm_push_ctx(xptr, tctx, offset, value, size, TM_QW3_HV_PHYS); +} + +/* returns -1 if ring is invalid, but still populates block and index */ static int xive2_tctx_get_nvp_indexes(XiveTCTX *tctx, uint8_t ring, - uint32_t *nvp_blk, uint32_t *nvp_idx) + uint8_t *nvp_blk, uint32_t *nvp_idx) { - uint32_t w2, cam; + uint32_t w2; + uint32_t cam = 0; + int rc = 0; w2 = xive_tctx_word2(&tctx->regs[ring]); switch (ring) { case TM_QW1_OS: if (!(be32_to_cpu(w2) & TM2_QW1W2_VO)) { - return -1; + rc = -1; } cam = xive_get_field32(TM2_QW1W2_OS_CAM, w2); break; case TM_QW2_HV_POOL: if (!(be32_to_cpu(w2) & TM2_QW2W2_VP)) { - return -1; + rc = -1; } cam = xive_get_field32(TM2_QW2W2_POOL_CAM, w2); break; case TM_QW3_HV_PHYS: if (!(be32_to_cpu(w2) & TM2_QW3W2_VT)) { - return -1; + rc = -1; } cam = xive2_tctx_hw_cam_line(tctx->xptr, tctx); break; default: - return -1; + rc = -1; } *nvp_blk = xive2_nvp_blk(cam); *nvp_idx = xive2_nvp_idx(cam); - return 0; + return rc; } -static void xive2_tctx_set_cppr(XiveTCTX *tctx, uint8_t ring, uint8_t cppr) +static void xive2_tctx_accept_el(XivePresenter *xptr, XiveTCTX *tctx, + uint8_t ring, uint8_t cl_ring) { - uint8_t *regs = &tctx->regs[ring]; - Xive2Router *xrtr = XIVE2_ROUTER(tctx->xptr); - uint8_t old_cppr, backlog_prio, first_group, group_level = 0; - uint8_t pipr_min, lsmfb_min, ring_min; - bool group_enabled; - uint32_t nvp_blk, nvp_idx; + uint64_t rd; + Xive2Router *xrtr = XIVE2_ROUTER(xptr); + uint32_t nvp_idx, xive2_cfg; + uint8_t nvp_blk; Xive2Nvp nvp; - int rc; + uint64_t phys_addr; + uint8_t OGen = 0; - trace_xive_tctx_set_cppr(tctx->cs->cpu_index, ring, - regs[TM_IPB], regs[TM_PIPR], - cppr, regs[TM_NSR]); + xive2_tctx_get_nvp_indexes(tctx, cl_ring, &nvp_blk, &nvp_idx); - if (cppr > XIVE_PRIORITY_MAX) { - cppr = 0xff; + if (xive2_router_get_nvp(xrtr, (uint8_t)nvp_blk, nvp_idx, &nvp)) { + qemu_log_mask(LOG_GUEST_ERROR, "XIVE: No NVP %x/%x\n", + nvp_blk, nvp_idx); + return; + } + + if (!xive2_nvp_is_valid(&nvp)) { + qemu_log_mask(LOG_GUEST_ERROR, "XIVE: invalid NVP %x/%x\n", + nvp_blk, nvp_idx); + return; + } + + + rd = xive_tctx_accept(tctx, ring); + + if (ring == TM_QW1_OS) { + OGen = tctx->regs[ring + TM_OGEN]; + } + xive2_cfg = xive2_router_get_config(xrtr); + phys_addr = xive2_nvp_reporting_addr(&nvp); + uint8_t report_data[REPORT_LINE_GEN1_SIZE]; + memset(report_data, 0xff, sizeof(report_data)); + if ((OGen == 1) || (xive2_cfg & XIVE2_GEN1_TIMA_OS)) { + report_data[8] = (rd >> 8) & 0xff; + report_data[9] = rd & 0xff; + } else { + report_data[0] = (rd >> 8) & 0xff; + report_data[1] = rd & 0xff; } + cpu_physical_memory_write(phys_addr, report_data, REPORT_LINE_GEN1_SIZE); +} + +void xive2_tm_ack_os_el(XivePresenter *xptr, XiveTCTX *tctx, + hwaddr offset, uint64_t value, unsigned size) +{ + xive2_tctx_accept_el(xptr, tctx, TM_QW1_OS, TM_QW1_OS); +} + +/* Re-calculate and present pending interrupts */ +static void xive2_tctx_process_pending(XiveTCTX *tctx, uint8_t sig_ring) +{ + uint8_t *sig_regs = &tctx->regs[sig_ring]; + Xive2Router *xrtr = XIVE2_ROUTER(tctx->xptr); + uint8_t backlog_prio; + uint8_t first_group; + uint8_t group_level; + uint8_t pipr_min; + uint8_t lsmfb_min; + uint8_t ring_min; + uint8_t cppr = sig_regs[TM_CPPR]; + bool group_enabled; + Xive2Nvp nvp; + int rc; - old_cppr = regs[TM_CPPR]; - regs[TM_CPPR] = cppr; + g_assert(sig_ring == TM_QW3_HV_PHYS || sig_ring == TM_QW1_OS); + g_assert(sig_regs[TM_WORD2] & 0x80); + g_assert(!xive_nsr_indicates_group_exception(sig_ring, sig_regs[TM_NSR])); /* * Recompute the PIPR based on local pending interrupts. It will * be adjusted below if needed in case of pending group interrupts. */ - pipr_min = xive_ipb_to_pipr(regs[TM_IPB]); - group_enabled = !!regs[TM_LGS]; - lsmfb_min = (group_enabled) ? regs[TM_LSMFB] : 0xff; - ring_min = ring; +again: + pipr_min = xive_ipb_to_pipr(sig_regs[TM_IPB]); + group_enabled = !!sig_regs[TM_LGS]; + lsmfb_min = group_enabled ? sig_regs[TM_LSMFB] : 0xff; + ring_min = sig_ring; + group_level = 0; /* PHYS updates also depend on POOL values */ - if (ring == TM_QW3_HV_PHYS) { - uint8_t *pregs = &tctx->regs[TM_QW2_HV_POOL]; + if (sig_ring == TM_QW3_HV_PHYS) { + uint8_t *pool_regs = &tctx->regs[TM_QW2_HV_POOL]; /* POOL values only matter if POOL ctx is valid */ - if (pregs[TM_WORD2] & 0x80) { - - uint8_t pool_pipr = xive_ipb_to_pipr(pregs[TM_IPB]); - uint8_t pool_lsmfb = pregs[TM_LSMFB]; + if (pool_regs[TM_WORD2] & 0x80) { + uint8_t pool_pipr = xive_ipb_to_pipr(pool_regs[TM_IPB]); + uint8_t pool_lsmfb = pool_regs[TM_LSMFB]; /* * Determine highest priority interrupt and @@ -972,7 +1213,7 @@ static void xive2_tctx_set_cppr(XiveTCTX *tctx, uint8_t ring, uint8_t cppr) } /* Values needed for group priority calculation */ - if (pregs[TM_LGS] && (pool_lsmfb < lsmfb_min)) { + if (pool_regs[TM_LGS] && (pool_lsmfb < lsmfb_min)) { group_enabled = true; lsmfb_min = pool_lsmfb; if (lsmfb_min < pipr_min) { @@ -981,32 +1222,26 @@ static void xive2_tctx_set_cppr(XiveTCTX *tctx, uint8_t ring, uint8_t cppr) } } } - regs[TM_PIPR] = pipr_min; - - rc = xive2_tctx_get_nvp_indexes(tctx, ring_min, &nvp_blk, &nvp_idx); - if (rc) { - qemu_log_mask(LOG_GUEST_ERROR, "XIVE: set CPPR on invalid context\n"); - return; - } - - if (cppr < old_cppr) { - /* - * FIXME: check if there's a group interrupt being presented - * and if the new cppr prevents it. If so, then the group - * interrupt needs to be re-added to the backlog and - * re-triggered (see re-trigger END info in the NVGC - * structure) - */ - } if (group_enabled && lsmfb_min < cppr && - lsmfb_min < regs[TM_PIPR]) { + lsmfb_min < pipr_min) { + + uint8_t nvp_blk; + uint32_t nvp_idx; + /* * Thread has seen a group interrupt with a higher priority * than the new cppr or pending local interrupt. Check the * backlog */ + rc = xive2_tctx_get_nvp_indexes(tctx, ring_min, &nvp_blk, &nvp_idx); + if (rc) { + qemu_log_mask(LOG_GUEST_ERROR, "XIVE: set CPPR on invalid " + "context\n"); + return; + } + if (xive2_router_get_nvp(xrtr, nvp_blk, nvp_idx, &nvp)) { qemu_log_mask(LOG_GUEST_ERROR, "XIVE: No NVP %x/%x\n", nvp_blk, nvp_idx); @@ -1030,14 +1265,85 @@ static void xive2_tctx_set_cppr(XiveTCTX *tctx, uint8_t ring, uint8_t cppr) nvp_blk, nvp_idx, first_group, &group_level); tctx->regs[ring_min + TM_LSMFB] = backlog_prio; - if (backlog_prio != 0xFF) { - xive2_presenter_backlog_decr(tctx->xptr, nvp_blk, nvp_idx, - backlog_prio, group_level); - regs[TM_PIPR] = backlog_prio; + if (backlog_prio != lsmfb_min) { + /* + * If the group backlog scan finds a less favored or no interrupt, + * then re-do the processing which may turn up a more favored + * interrupt from IPB or the other pool. Backlog should not + * find a priority < LSMFB. + */ + g_assert(backlog_prio >= lsmfb_min); + goto again; + } + + xive2_presenter_backlog_decr(tctx->xptr, nvp_blk, nvp_idx, + backlog_prio, group_level); + pipr_min = backlog_prio; + } + + if (pipr_min > cppr) { + pipr_min = cppr; + } + xive_tctx_pipr_set(tctx, ring_min, pipr_min, group_level); +} + +/* NOTE: CPPR only exists for TM_QW1_OS and TM_QW3_HV_PHYS */ +static void xive2_tctx_set_cppr(XiveTCTX *tctx, uint8_t sig_ring, uint8_t cppr) +{ + uint8_t *sig_regs = &tctx->regs[sig_ring]; + Xive2Router *xrtr = XIVE2_ROUTER(tctx->xptr); + uint8_t old_cppr; + uint8_t nsr = sig_regs[TM_NSR]; + + g_assert(sig_ring == TM_QW1_OS || sig_ring == TM_QW3_HV_PHYS); + + g_assert(tctx->regs[TM_QW2_HV_POOL + TM_NSR] == 0); + g_assert(tctx->regs[TM_QW2_HV_POOL + TM_PIPR] == 0); + g_assert(tctx->regs[TM_QW2_HV_POOL + TM_CPPR] == 0); + + /* XXX: should show pool IPB for PHYS ring */ + trace_xive_tctx_set_cppr(tctx->cs->cpu_index, sig_ring, + sig_regs[TM_IPB], sig_regs[TM_PIPR], + cppr, nsr); + + if (cppr > XIVE_PRIORITY_MAX) { + cppr = 0xff; + } + + old_cppr = sig_regs[TM_CPPR]; + sig_regs[TM_CPPR] = cppr; + + /* Handle increased CPPR priority (lower value) */ + if (cppr < old_cppr) { + if (cppr <= sig_regs[TM_PIPR]) { + /* CPPR lowered below PIPR, must un-present interrupt */ + if (xive_nsr_indicates_exception(sig_ring, nsr)) { + if (xive_nsr_indicates_group_exception(sig_ring, nsr)) { + /* redistribute precluded active grp interrupt */ + xive2_redistribute(xrtr, tctx, + xive_nsr_exception_ring(sig_ring, nsr)); + return; + } + } + + /* interrupt is VP directed, pending in IPB */ + xive_tctx_pipr_set(tctx, sig_ring, cppr, 0); + return; + } else { + /* CPPR was lowered, but still above PIPR. No action needed. */ + return; } } - /* CPPR has changed, check if we need to raise a pending exception */ - xive_tctx_notify(tctx, ring_min, group_level); + + /* CPPR didn't change, nothing needs to be done */ + if (cppr == old_cppr) { + return; + } + + /* CPPR priority decreased (higher value) */ + if (!xive_nsr_indicates_exception(sig_ring, nsr)) { + xive2_tctx_process_pending(tctx, sig_ring); + } } void xive2_tm_set_hv_cppr(XivePresenter *xptr, XiveTCTX *tctx, @@ -1052,6 +1358,34 @@ void xive2_tm_set_os_cppr(XivePresenter *xptr, XiveTCTX *tctx, xive2_tctx_set_cppr(tctx, TM_QW1_OS, value & 0xff); } +/* + * Adjust the IPB to allow a CPU to process event queues of other + * priorities during one physical interrupt cycle. + */ +void xive2_tm_set_os_pending(XivePresenter *xptr, XiveTCTX *tctx, + hwaddr offset, uint64_t value, unsigned size) +{ + Xive2Router *xrtr = XIVE2_ROUTER(xptr); + uint8_t ring = TM_QW1_OS; + uint8_t *regs = &tctx->regs[ring]; + uint8_t priority = value & 0xff; + + /* + * XXX: should this simply set a bit in IPB and wait for it to be picked + * up next cycle, or is it supposed to present it now? We implement the + * latter here. + */ + regs[TM_IPB] |= xive_priority_to_ipb(priority); + if (xive_ipb_to_pipr(regs[TM_IPB]) >= regs[TM_PIPR]) { + return; + } + if (xive_nsr_indicates_group_exception(ring, regs[TM_NSR])) { + xive2_redistribute(xrtr, tctx, ring); + } + + xive_tctx_pipr_present(tctx, ring, priority, 0); +} + static void xive2_tctx_set_target(XiveTCTX *tctx, uint8_t ring, uint8_t target) { uint8_t *regs = &tctx->regs[ring]; @@ -1259,9 +1593,7 @@ int xive2_presenter_tctx_match(XivePresenter *xptr, XiveTCTX *tctx, bool xive2_tm_irq_precluded(XiveTCTX *tctx, int ring, uint8_t priority) { - /* HV_POOL ring uses HV_PHYS NSR, CPPR and PIPR registers */ - uint8_t alt_ring = (ring == TM_QW2_HV_POOL) ? TM_QW3_HV_PHYS : ring; - uint8_t *alt_regs = &tctx->regs[alt_ring]; + uint8_t *sig_regs = xive_tctx_signal_regs(tctx, ring); /* * The xive2_presenter_tctx_match() above tells if there's a match @@ -1269,7 +1601,7 @@ bool xive2_tm_irq_precluded(XiveTCTX *tctx, int ring, uint8_t priority) * priority to know if the thread can take the interrupt now or if * it is precluded. */ - if (priority < alt_regs[TM_CPPR]) { + if (priority < sig_regs[TM_PIPR]) { return false; } return true; @@ -1322,12 +1654,14 @@ static bool xive2_router_end_es_notify(Xive2Router *xrtr, uint8_t end_blk, * message has the same parameters than in the function below. */ static void xive2_router_end_notify(Xive2Router *xrtr, uint8_t end_blk, - uint32_t end_idx, uint32_t end_data) + uint32_t end_idx, uint32_t end_data, + bool redistribute) { Xive2End end; uint8_t priority; uint8_t format; - bool found, precluded; + XiveTCTXMatch match; + bool crowd, cam_ignore; uint8_t nvx_blk; uint32_t nvx_idx; @@ -1350,7 +1684,8 @@ static void xive2_router_end_notify(Xive2Router *xrtr, uint8_t end_blk, return; } - if (xive2_end_is_enqueue(&end)) { + if (!redistribute && xive2_end_is_enqueue(&end)) { + trace_xive_end_enqueue(end_blk, end_idx, end_data); xive2_end_enqueue(&end, end_data); /* Enqueuing event data modifies the EQ toggle and index */ xive2_router_write_end(xrtr, end_blk, end_idx, &end, 1); @@ -1396,16 +1731,28 @@ static void xive2_router_end_notify(Xive2Router *xrtr, uint8_t end_blk, */ nvx_blk = xive_get_field32(END2_W6_VP_BLOCK, end.w6); nvx_idx = xive_get_field32(END2_W6_VP_OFFSET, end.w6); - - found = xive_presenter_notify(xrtr->xfb, format, nvx_blk, nvx_idx, - xive2_end_is_crowd(&end), xive2_end_is_ignore(&end), - priority, - xive_get_field32(END2_W7_F1_LOG_SERVER_ID, end.w7), - &precluded); + crowd = xive2_end_is_crowd(&end); + cam_ignore = xive2_end_is_ignore(&end); /* TODO: Auto EOI. */ + if (xive_presenter_match(xrtr->xfb, format, nvx_blk, nvx_idx, + crowd, cam_ignore, priority, + xive_get_field32(END2_W7_F1_LOG_SERVER_ID, end.w7), + &match)) { + XiveTCTX *tctx = match.tctx; + uint8_t ring = match.ring; + uint8_t *sig_regs = xive_tctx_signal_regs(tctx, ring); + uint8_t nsr = sig_regs[TM_NSR]; + uint8_t group_level; + + if (priority < sig_regs[TM_PIPR] && + xive_nsr_indicates_group_exception(ring, nsr)) { + xive2_redistribute(xrtr, tctx, xive_nsr_exception_ring(ring, nsr)); + } - if (found) { + group_level = xive_get_group_level(crowd, cam_ignore, nvx_blk, nvx_idx); + trace_xive_presenter_notify(nvx_blk, nvx_idx, ring, group_level); + xive_tctx_pipr_present(tctx, ring, priority, group_level); return; } @@ -1423,7 +1770,7 @@ static void xive2_router_end_notify(Xive2Router *xrtr, uint8_t end_blk, return; } - if (!xive2_end_is_ignore(&end)) { + if (!cam_ignore) { uint8_t ipb; Xive2Nvp nvp; @@ -1452,9 +1799,6 @@ static void xive2_router_end_notify(Xive2Router *xrtr, uint8_t end_blk, } else { Xive2Nvgc nvgc; uint32_t backlog; - bool crowd; - - crowd = xive2_end_is_crowd(&end); /* * For groups and crowds, the per-priority backlog @@ -1486,9 +1830,7 @@ static void xive2_router_end_notify(Xive2Router *xrtr, uint8_t end_blk, if (backlog == 1) { XiveFabricClass *xfc = XIVE_FABRIC_GET_CLASS(xrtr->xfb); xfc->broadcast(xrtr->xfb, nvx_blk, nvx_idx, - xive2_end_is_crowd(&end), - xive2_end_is_ignore(&end), - priority); + crowd, cam_ignore, priority); if (!xive2_end_is_precluded_escalation(&end)) { /* @@ -1522,18 +1864,41 @@ do_escalation: } } - /* - * The END trigger becomes an Escalation trigger - */ - xive2_router_end_notify(xrtr, - xive_get_field32(END2_W4_END_BLOCK, end.w4), - xive_get_field32(END2_W4_ESC_END_INDEX, end.w4), - xive_get_field32(END2_W5_ESC_END_DATA, end.w5)); + if (xive2_end_is_escalate_end(&end)) { + /* + * Perform END Adaptive escalation processing + * The END trigger becomes an Escalation trigger + */ + uint8_t esc_blk = xive_get_field32(END2_W4_END_BLOCK, end.w4); + uint32_t esc_idx = xive_get_field32(END2_W4_ESC_END_INDEX, end.w4); + uint32_t esc_data = xive_get_field32(END2_W5_ESC_END_DATA, end.w5); + trace_xive_escalate_end(end_blk, end_idx, esc_blk, esc_idx, esc_data); + xive2_router_end_notify(xrtr, esc_blk, esc_idx, esc_data, false); + } /* end END adaptive escalation */ + + else { + uint32_t lisn; /* Logical Interrupt Source Number */ + + /* + * Perform ESB escalation processing + * E[N] == 1 --> N + * Req[Block] <- E[ESB_Block] + * Req[Index] <- E[ESB_Index] + * Req[Offset] <- 0x000 + * Execute <ESB Store> Req command + */ + lisn = XIVE_EAS(xive_get_field32(END2_W4_END_BLOCK, end.w4), + xive_get_field32(END2_W4_ESC_END_INDEX, end.w4)); + + trace_xive_escalate_esb(end_blk, end_idx, lisn); + xive2_notify(xrtr, lisn, true /* pq_checked */); + } + + return; } -void xive2_router_notify(XiveNotifier *xn, uint32_t lisn, bool pq_checked) +void xive2_notify(Xive2Router *xrtr , uint32_t lisn, bool pq_checked) { - Xive2Router *xrtr = XIVE2_ROUTER(xn); uint8_t eas_blk = XIVE_EAS_BLOCK(lisn); uint32_t eas_idx = XIVE_EAS_INDEX(lisn); Xive2Eas eas; @@ -1576,13 +1941,31 @@ void xive2_router_notify(XiveNotifier *xn, uint32_t lisn, bool pq_checked) return; } + /* TODO: add support for EAS resume */ + if (xive2_eas_is_resume(&eas)) { + qemu_log_mask(LOG_UNIMP, + "XIVE: EAS resume processing unimplemented - LISN %x\n", + lisn); + return; + } + /* * The event trigger becomes an END trigger */ xive2_router_end_notify(xrtr, - xive_get_field64(EAS2_END_BLOCK, eas.w), - xive_get_field64(EAS2_END_INDEX, eas.w), - xive_get_field64(EAS2_END_DATA, eas.w)); + xive_get_field64(EAS2_END_BLOCK, eas.w), + xive_get_field64(EAS2_END_INDEX, eas.w), + xive_get_field64(EAS2_END_DATA, eas.w), + false); + return; +} + +void xive2_router_notify(XiveNotifier *xn, uint32_t lisn, bool pq_checked) +{ + Xive2Router *xrtr = XIVE2_ROUTER(xn); + + xive2_notify(xrtr, lisn, pq_checked); + return; } static const Property xive2_router_properties[] = { diff --git a/hw/loongarch/virt-acpi-build.c b/hw/loongarch/virt-acpi-build.c index 2cd2d9d..8c2228a 100644 --- a/hw/loongarch/virt-acpi-build.c +++ b/hw/loongarch/virt-acpi-build.c @@ -557,7 +557,9 @@ static void acpi_build(AcpiBuildTables *tables, MachineState *machine) acpi_add_table(table_offsets, tables_blob); build_srat(tables_blob, tables->linker, machine); acpi_add_table(table_offsets, tables_blob); - spcr_setup(tables_blob, tables->linker, machine); + + if (machine->acpi_spcr_enabled) + spcr_setup(tables_blob, tables->linker, machine); if (machine->numa_state->num_nodes) { if (machine->numa_state->have_numa_distance) { diff --git a/hw/mem/cxl_type3.c b/hw/mem/cxl_type3.c index 94e7274..be609ff 100644 --- a/hw/mem/cxl_type3.c +++ b/hw/mem/cxl_type3.c @@ -8,6 +8,7 @@ * * SPDX-License-Identifier: GPL-v2-only */ +#include <math.h> #include "qemu/osdep.h" #include "qemu/units.h" @@ -225,10 +226,16 @@ static int ct3_build_cdat_table(CDATSubHeader ***cdat_table, void *priv) * future. */ for (i = 0; i < ct3d->dc.num_regions; i++) { + ct3d->dc.regions[i].nonvolatile = false; + ct3d->dc.regions[i].sharable = false; + ct3d->dc.regions[i].hw_managed_coherency = false; + ct3d->dc.regions[i].ic_specific_dc_management = false; + ct3d->dc.regions[i].rdonly = false; ct3_build_cdat_entries_for_mr(&(table[cur_ent]), dsmad_handle++, ct3d->dc.regions[i].len, - false, true, region_base); + ct3d->dc.regions[i].nonvolatile, + true, region_base); ct3d->dc.regions[i].dsmadhandle = dsmad_handle - 1; cur_ent += CT3_CDAT_NUM_ENTRIES; @@ -634,6 +641,8 @@ static bool cxl_create_dc_regions(CXLType3Dev *ct3d, Error **errp) uint64_t region_len; uint64_t decode_len; uint64_t blk_size = 2 * MiB; + /* Only 1 block size is supported for now. */ + uint64_t supported_blk_size_bitmask = blk_size; CXLDCRegion *region; MemoryRegion *mr; uint64_t dc_size; @@ -679,9 +688,11 @@ static bool cxl_create_dc_regions(CXLType3Dev *ct3d, Error **errp) .block_size = blk_size, /* dsmad_handle set when creating CDAT table entries */ .flags = 0, + .supported_blk_size_bitmask = supported_blk_size_bitmask, }; ct3d->dc.total_capacity += region->len; region->blk_bitmap = bitmap_new(region->len / region->block_size); + qemu_mutex_init(®ion->bitmap_lock); } QTAILQ_INIT(&ct3d->dc.extents); QTAILQ_INIT(&ct3d->dc.extents_pending); @@ -1010,6 +1021,7 @@ void ct3_set_region_block_backed(CXLType3Dev *ct3d, uint64_t dpa, return; } + QEMU_LOCK_GUARD(®ion->bitmap_lock); bitmap_set(region->blk_bitmap, (dpa - region->base) / region->block_size, len / region->block_size); } @@ -1036,6 +1048,7 @@ bool ct3_test_region_block_backed(CXLType3Dev *ct3d, uint64_t dpa, * if bits between [dpa, dpa + len) are all 1s, meaning the DPA range is * backed with DC extents, return true; else return false. */ + QEMU_LOCK_GUARD(®ion->bitmap_lock); return find_next_zero_bit(region->blk_bitmap, nr + nbits, nr) == nr + nbits; } @@ -1057,6 +1070,7 @@ void ct3_clear_region_block_backed(CXLType3Dev *ct3d, uint64_t dpa, nr = (dpa - region->base) / region->block_size; nbits = len / region->block_size; + QEMU_LOCK_GUARD(®ion->bitmap_lock); bitmap_clear(region->blk_bitmap, nr, nbits); } @@ -1576,9 +1590,9 @@ void qmp_cxl_inject_correctable_error(const char *path, CxlCorErrorType type, pcie_aer_inject_error(PCI_DEVICE(obj), &err); } -static void cxl_assign_event_header(CXLEventRecordHdr *hdr, - const QemuUUID *uuid, uint32_t flags, - uint8_t length, uint64_t timestamp) +void cxl_assign_event_header(CXLEventRecordHdr *hdr, + const QemuUUID *uuid, uint32_t flags, + uint8_t length, uint64_t timestamp) { st24_le_p(&hdr->flags, flags); hdr->length = length; @@ -1866,28 +1880,13 @@ void qmp_cxl_inject_memory_module_event(const char *path, CxlEventLog log, } } -/* CXL r3.1 Table 8-50: Dynamic Capacity Event Record */ -static const QemuUUID dynamic_capacity_uuid = { - .data = UUID(0xca95afa7, 0xf183, 0x4018, 0x8c, 0x2f, - 0x95, 0x26, 0x8e, 0x10, 0x1a, 0x2a), -}; - -typedef enum CXLDCEventType { - DC_EVENT_ADD_CAPACITY = 0x0, - DC_EVENT_RELEASE_CAPACITY = 0x1, - DC_EVENT_FORCED_RELEASE_CAPACITY = 0x2, - DC_EVENT_REGION_CONFIG_UPDATED = 0x3, - DC_EVENT_ADD_CAPACITY_RSP = 0x4, - DC_EVENT_CAPACITY_RELEASED = 0x5, -} CXLDCEventType; - /* * Check whether the range [dpa, dpa + len - 1] has overlaps with extents in * the list. * Return value: return true if has overlaps; otherwise, return false */ -static bool cxl_extents_overlaps_dpa_range(CXLDCExtentList *list, - uint64_t dpa, uint64_t len) +bool cxl_extents_overlaps_dpa_range(CXLDCExtentList *list, + uint64_t dpa, uint64_t len) { CXLDCExtent *ent; Range range1, range2; @@ -1932,8 +1931,8 @@ bool cxl_extents_contains_dpa_range(CXLDCExtentList *list, return false; } -static bool cxl_extent_groups_overlaps_dpa_range(CXLDCExtentGroupList *list, - uint64_t dpa, uint64_t len) +bool cxl_extent_groups_overlaps_dpa_range(CXLDCExtentGroupList *list, + uint64_t dpa, uint64_t len) { CXLDCExtentGroup *group; @@ -1958,15 +1957,11 @@ static void qmp_cxl_process_dynamic_capacity_prescriptive(const char *path, CxlDynamicCapacityExtentList *records, Error **errp) { Object *obj; - CXLEventDynamicCapacity dCap = {}; - CXLEventRecordHdr *hdr = &dCap.hdr; CXLType3Dev *dcd; - uint8_t flags = 1 << CXL_EVENT_TYPE_INFO; uint32_t num_extents = 0; CxlDynamicCapacityExtentList *list; CXLDCExtentGroup *group = NULL; g_autofree CXLDCExtentRaw *extents = NULL; - uint8_t enc_log = CXL_EVENT_TYPE_DYNAMIC_CAP; uint64_t dpa, offset, len, block_size; g_autofree unsigned long *blk_bitmap = NULL; int i; @@ -2076,40 +2071,10 @@ static void qmp_cxl_process_dynamic_capacity_prescriptive(const char *path, } if (group) { cxl_extent_group_list_insert_tail(&dcd->dc.extents_pending, group); + dcd->dc.total_extent_count += num_extents; } - /* - * CXL r3.1 section 8.2.9.2.1.6: Dynamic Capacity Event Record - * - * All Dynamic Capacity event records shall set the Event Record Severity - * field in the Common Event Record Format to Informational Event. All - * Dynamic Capacity related events shall be logged in the Dynamic Capacity - * Event Log. - */ - cxl_assign_event_header(hdr, &dynamic_capacity_uuid, flags, sizeof(dCap), - cxl_device_get_timestamp(&dcd->cxl_dstate)); - - dCap.type = type; - /* FIXME: for now, validity flag is cleared */ - dCap.validity_flags = 0; - stw_le_p(&dCap.host_id, hid); - /* only valid for DC_REGION_CONFIG_UPDATED event */ - dCap.updated_region_id = 0; - for (i = 0; i < num_extents; i++) { - memcpy(&dCap.dynamic_capacity_extent, &extents[i], - sizeof(CXLDCExtentRaw)); - - dCap.flags = 0; - if (i < num_extents - 1) { - /* Set "More" flag */ - dCap.flags |= BIT(0); - } - - if (cxl_event_insert(&dcd->cxl_dstate, enc_log, - (CXLEventRecordRaw *)&dCap)) { - cxl_event_irq_assert(dcd); - } - } + cxl_create_dc_event_records_for_extents(dcd, type, extents, num_extents); } void qmp_cxl_add_dynamic_capacity(const char *path, uint16_t host_id, diff --git a/hw/microblaze/Kconfig b/hw/microblaze/Kconfig index b0214b2..72d8072 100644 --- a/hw/microblaze/Kconfig +++ b/hw/microblaze/Kconfig @@ -1,7 +1,7 @@ config PETALOGIX_S3ADSP1800 bool default y - depends on MICROBLAZE + depends on MICROBLAZE && FDT select PFLASH_CFI01 select XILINX select XILINX_AXI @@ -11,7 +11,7 @@ config PETALOGIX_S3ADSP1800 config PETALOGIX_ML605 bool default y - depends on MICROBLAZE + depends on MICROBLAZE && FDT select PFLASH_CFI01 select SERIAL_MM select SSI_M25P80 diff --git a/hw/mips/Kconfig b/hw/mips/Kconfig index b09c89a..f84fffc 100644 --- a/hw/mips/Kconfig +++ b/hw/mips/Kconfig @@ -76,7 +76,7 @@ config LOONGSON3V config MIPS_CPS bool - select MIPS_ITU + select MIPS_ITU if TCG config MIPS_BOSTON bool diff --git a/hw/mips/cps.c b/hw/mips/cps.c index 2a3ba3f..e47695e 100644 --- a/hw/mips/cps.c +++ b/hw/mips/cps.c @@ -24,7 +24,7 @@ #include "hw/mips/mips.h" #include "hw/qdev-clock.h" #include "hw/qdev-properties.h" -#include "system/kvm.h" +#include "system/tcg.h" #include "system/reset.h" qemu_irq get_cps_irq(MIPSCPSState *s, int pin_number) @@ -59,7 +59,7 @@ static bool cpu_mips_itu_supported(CPUMIPSState *env) { bool is_mt = (env->CP0_Config5 & (1 << CP0C5_VP)) || ase_mt_available(env); - return is_mt && !kvm_enabled(); + return is_mt && tcg_enabled(); } static void mips_cps_realize(DeviceState *dev, Error **errp) diff --git a/hw/misc/Kconfig b/hw/misc/Kconfig index ec0fa5a..4e35657 100644 --- a/hw/misc/Kconfig +++ b/hw/misc/Kconfig @@ -47,6 +47,18 @@ config A9SCU config ARM11SCU bool +config MAX78000_AES + bool + +config MAX78000_GCR + bool + +config MAX78000_ICC + bool + +config MAX78000_TRNG + bool + config MOS6522 bool @@ -107,6 +119,7 @@ config STM32L4X5_RCC config MIPS_ITU bool + depends on TCG config MPS2_FPGAIO bool diff --git a/hw/misc/aspeed_scu.c b/hw/misc/aspeed_scu.c index 4930e00..a0ab5ee 100644 --- a/hw/misc/aspeed_scu.c +++ b/hw/misc/aspeed_scu.c @@ -91,6 +91,7 @@ #define BMC_DEV_ID TO_REG(0x1A4) #define AST2600_PROT_KEY TO_REG(0x00) +#define AST2600_PROT_KEY2 TO_REG(0x10) #define AST2600_SILICON_REV TO_REG(0x04) #define AST2600_SILICON_REV2 TO_REG(0x14) #define AST2600_SYS_RST_CTRL TO_REG(0x40) @@ -176,6 +177,7 @@ #define AST2700_SCUIO_UARTCLK_GEN TO_REG(0x330) #define AST2700_SCUIO_HUARTCLK_GEN TO_REG(0x334) #define AST2700_SCUIO_CLK_DUTY_MEAS_RST TO_REG(0x388) +#define AST2700_SCUIO_FREQ_CNT_CTL TO_REG(0x3A0) #define SCU_IO_REGION_SIZE 0x1000 @@ -722,6 +724,8 @@ static void aspeed_ast2600_scu_write(void *opaque, hwaddr offset, int reg = TO_REG(offset); /* Truncate here so bitwise operations below behave as expected */ uint32_t data = data64; + bool prot_data_state = data == ASPEED_SCU_PROT_KEY; + bool unlocked = s->regs[AST2600_PROT_KEY] && s->regs[AST2600_PROT_KEY2]; if (reg >= ASPEED_AST2600_SCU_NR_REGS) { qemu_log_mask(LOG_GUEST_ERROR, @@ -730,15 +734,24 @@ static void aspeed_ast2600_scu_write(void *opaque, hwaddr offset, return; } - if (reg > PROT_KEY && !s->regs[PROT_KEY]) { + if ((reg != AST2600_PROT_KEY && reg != AST2600_PROT_KEY2) && !unlocked) { qemu_log_mask(LOG_GUEST_ERROR, "%s: SCU is locked!\n", __func__); + return; } trace_aspeed_scu_write(offset, size, data); switch (reg) { case AST2600_PROT_KEY: - s->regs[reg] = (data == ASPEED_SCU_PROT_KEY) ? 1 : 0; + /* + * Writing a value to SCU000 will modify both protection + * registers to each protection register individually. + */ + s->regs[AST2600_PROT_KEY] = prot_data_state; + s->regs[AST2600_PROT_KEY2] = prot_data_state; + return; + case AST2600_PROT_KEY2: + s->regs[AST2600_PROT_KEY2] = prot_data_state; return; case AST2600_HW_STRAP1: case AST2600_HW_STRAP2: @@ -1022,6 +1035,10 @@ static void aspeed_ast2700_scuio_write(void *opaque, hwaddr offset, s->regs[reg - 1] ^= data; updated = true; break; + case AST2700_SCUIO_FREQ_CNT_CTL: + s->regs[reg] = deposit32(s->regs[reg], 6, 1, !!(data & BIT(1))); + updated = true; + break; default: qemu_log_mask(LOG_GUEST_ERROR, "%s: Unhandled write at offset 0x%" HWADDR_PRIx "\n", @@ -1066,6 +1083,7 @@ static const uint32_t ast2700_a0_resets_io[ASPEED_AST2700_SCU_NR_REGS] = { [AST2700_SCUIO_UARTCLK_GEN] = 0x00014506, [AST2700_SCUIO_HUARTCLK_GEN] = 0x000145c0, [AST2700_SCUIO_CLK_DUTY_MEAS_RST] = 0x0c9100d2, + [AST2700_SCUIO_FREQ_CNT_CTL] = 0x00000080, }; static void aspeed_2700_scuio_class_init(ObjectClass *klass, const void *data) diff --git a/hw/misc/aspeed_sdmc.c b/hw/misc/aspeed_sdmc.c index f04d993..dff7cc3 100644 --- a/hw/misc/aspeed_sdmc.c +++ b/hw/misc/aspeed_sdmc.c @@ -570,6 +570,9 @@ static void aspeed_2700_sdmc_reset(DeviceState *dev) /* Set ram size bit and defaults values */ s->regs[R_MAIN_CONF] = asc->compute_conf(s, 0); + /* Skipping dram init */ + s->regs[R_MAIN_CONTROL] = BIT(16); + if (s->unlocked) { s->regs[R_2700_PROT] = PROT_UNLOCKED; } diff --git a/hw/misc/ivshmem-flat.c b/hw/misc/ivshmem-flat.c index be28c24..fe4be6b 100644 --- a/hw/misc/ivshmem-flat.c +++ b/hw/misc/ivshmem-flat.c @@ -362,7 +362,7 @@ static bool ivshmem_flat_connect_server(DeviceState *dev, Error **errp) * * ivshmem_flat_recv_msg() calls return 'msg' and 'fd'. * - * See ./docs/specs/ivshmem-spec.txt for details on the protocol. + * See docs/specs/ivshmem-spec.rst for details on the protocol. */ /* Step 0 */ diff --git a/hw/misc/ivshmem-pci.c b/hw/misc/ivshmem-pci.c index 5a10bca..d47ae73 100644 --- a/hw/misc/ivshmem-pci.c +++ b/hw/misc/ivshmem-pci.c @@ -479,6 +479,11 @@ static void process_msg_shmem(IVShmemState *s, int fd, Error **errp) struct stat buf; size_t size; + if (fd < 0) { + error_setg(errp, "server didn't provide fd with shared memory message"); + return; + } + if (s->ivshmem_bar2) { error_setg(errp, "server sent unexpected shared memory message"); close(fd); @@ -553,7 +558,9 @@ static void process_msg(IVShmemState *s, int64_t msg, int fd, Error **errp) if (msg < -1 || msg > IVSHMEM_MAX_PEERS) { error_setg(errp, "server sent invalid message %" PRId64, msg); - close(fd); + if (fd >= 0) { + close(fd); + } return; } diff --git a/hw/misc/max78000_aes.c b/hw/misc/max78000_aes.c new file mode 100644 index 0000000..d883ddd --- /dev/null +++ b/hw/misc/max78000_aes.c @@ -0,0 +1,229 @@ +/* + * MAX78000 AES + * + * Copyright (c) 2025 Jackson Donaldson <jcksn@duck.com> + * + * SPDX-License-Identifier: GPL-2.0-or-later + */ + +#include "qemu/osdep.h" +#include "qemu/log.h" +#include "trace.h" +#include "hw/irq.h" +#include "migration/vmstate.h" +#include "hw/misc/max78000_aes.h" +#include "crypto/aes.h" + +static void max78000_aes_set_status(Max78000AesState *s) +{ + s->status = 0; + if (s->result_index >= 16) { + s->status |= OUTPUT_FULL; + } + if (s->result_index == 0) { + s->status |= OUTPUT_EMPTY; + } + if (s->data_index >= 16) { + s->status |= INPUT_FULL; + } + if (s->data_index == 0) { + s->status |= INPUT_EMPTY; + } +} + +static uint64_t max78000_aes_read(void *opaque, hwaddr addr, + unsigned int size) +{ + Max78000AesState *s = opaque; + switch (addr) { + case CTRL: + return s->ctrl; + + case STATUS: + return s->status; + + case INTFL: + return s->intfl; + + case INTEN: + return s->inten; + + case FIFO: + if (s->result_index >= 4) { + s->intfl &= ~DONE; + s->result_index -= 4; + max78000_aes_set_status(s); + return ldl_be_p(&s->result[s->result_index]); + } else{ + return 0; + } + + default: + qemu_log_mask(LOG_GUEST_ERROR, "%s: Bad offset 0x%" + HWADDR_PRIx "\n", __func__, addr); + break; + + } + return 0; +} + +static void max78000_aes_do_crypto(Max78000AesState *s) +{ + int keylen = 256; + uint8_t *keydata = s->key; + if ((s->ctrl & KEY_SIZE) == 0) { + keylen = 128; + keydata += 16; + } else if ((s->ctrl & KEY_SIZE) == 1 << 6) { + keylen = 192; + keydata += 8; + } + + /* + * The MAX78000 AES engine stores an internal key, which it uses only + * for decryption. This results in the slighly odd looking pairs of + * set_encrypt and set_decrypt calls below; s->internal_key is + * being stored for later use in both cases. + */ + AES_KEY key; + if ((s->ctrl & TYPE) == 0) { + AES_set_encrypt_key(keydata, keylen, &key); + AES_set_decrypt_key(keydata, keylen, &s->internal_key); + AES_encrypt(s->data, s->result, &key); + s->result_index = 16; + } else if ((s->ctrl & TYPE) == 1 << 8) { + AES_set_decrypt_key(keydata, keylen, &key); + AES_set_decrypt_key(keydata, keylen, &s->internal_key); + AES_decrypt(s->data, s->result, &key); + s->result_index = 16; + } else{ + AES_decrypt(s->data, s->result, &s->internal_key); + s->result_index = 16; + } + s->intfl |= DONE; +} + +static void max78000_aes_write(void *opaque, hwaddr addr, + uint64_t val64, unsigned int size) +{ + Max78000AesState *s = opaque; + uint32_t val = val64; + switch (addr) { + case CTRL: + if (val & OUTPUT_FLUSH) { + s->result_index = 0; + val &= ~OUTPUT_FLUSH; + } + if (val & INPUT_FLUSH) { + s->data_index = 0; + val &= ~INPUT_FLUSH; + } + if (val & START) { + max78000_aes_do_crypto(s); + } + + /* Hardware appears to stay enabled even if 0 written */ + s->ctrl = val | (s->ctrl & AES_EN); + break; + + case FIFO: + assert(s->data_index <= 12); + stl_be_p(&s->data[12 - s->data_index], val); + s->data_index += 4; + if (s->data_index >= 16) { + s->data_index = 0; + max78000_aes_do_crypto(s); + } + break; + + case KEY_BASE ... KEY_END - 4: + stl_be_p(&s->key[(KEY_END - KEY_BASE - 4) - (addr - KEY_BASE)], val); + break; + + default: + qemu_log_mask(LOG_GUEST_ERROR, "%s: Bad offset 0x%" + HWADDR_PRIx "\n", __func__, addr); + break; + + } + max78000_aes_set_status(s); +} + +static void max78000_aes_reset_hold(Object *obj, ResetType type) +{ + Max78000AesState *s = MAX78000_AES(obj); + s->ctrl = 0; + s->status = 0; + s->intfl = 0; + s->inten = 0; + + s->data_index = 0; + s->result_index = 0; + + memset(s->data, 0, sizeof(s->data)); + memset(s->key, 0, sizeof(s->key)); + memset(s->result, 0, sizeof(s->result)); + memset(&s->internal_key, 0, sizeof(s->internal_key)); +} + +static const MemoryRegionOps max78000_aes_ops = { + .read = max78000_aes_read, + .write = max78000_aes_write, + .endianness = DEVICE_LITTLE_ENDIAN, + .valid.min_access_size = 4, + .valid.max_access_size = 4, +}; + +static const VMStateDescription vmstate_max78000_aes = { + .name = TYPE_MAX78000_AES, + .version_id = 1, + .minimum_version_id = 1, + .fields = (const VMStateField[]) { + VMSTATE_UINT32(ctrl, Max78000AesState), + VMSTATE_UINT32(status, Max78000AesState), + VMSTATE_UINT32(intfl, Max78000AesState), + VMSTATE_UINT32(inten, Max78000AesState), + VMSTATE_UINT8_ARRAY(data, Max78000AesState, 16), + VMSTATE_UINT8_ARRAY(key, Max78000AesState, 32), + VMSTATE_UINT8_ARRAY(result, Max78000AesState, 16), + VMSTATE_UINT32_ARRAY(internal_key.rd_key, Max78000AesState, 60), + VMSTATE_INT32(internal_key.rounds, Max78000AesState), + VMSTATE_END_OF_LIST() + } +}; + +static void max78000_aes_init(Object *obj) +{ + Max78000AesState *s = MAX78000_AES(obj); + sysbus_init_irq(SYS_BUS_DEVICE(obj), &s->irq); + + memory_region_init_io(&s->mmio, obj, &max78000_aes_ops, s, + TYPE_MAX78000_AES, 0xc00); + sysbus_init_mmio(SYS_BUS_DEVICE(obj), &s->mmio); + +} + +static void max78000_aes_class_init(ObjectClass *klass, const void *data) +{ + ResettableClass *rc = RESETTABLE_CLASS(klass); + DeviceClass *dc = DEVICE_CLASS(klass); + + rc->phases.hold = max78000_aes_reset_hold; + dc->vmsd = &vmstate_max78000_aes; + +} + +static const TypeInfo max78000_aes_info = { + .name = TYPE_MAX78000_AES, + .parent = TYPE_SYS_BUS_DEVICE, + .instance_size = sizeof(Max78000AesState), + .instance_init = max78000_aes_init, + .class_init = max78000_aes_class_init, +}; + +static void max78000_aes_register_types(void) +{ + type_register_static(&max78000_aes_info); +} + +type_init(max78000_aes_register_types) diff --git a/hw/misc/max78000_gcr.c b/hw/misc/max78000_gcr.c new file mode 100644 index 0000000..fbbc92c --- /dev/null +++ b/hw/misc/max78000_gcr.c @@ -0,0 +1,351 @@ +/* + * MAX78000 Global Control Registers + * + * Copyright (c) 2025 Jackson Donaldson <jcksn@duck.com> + * + * SPDX-License-Identifier: GPL-2.0-or-later + */ + +#include "qemu/osdep.h" +#include "qemu/log.h" +#include "trace.h" +#include "hw/irq.h" +#include "system/runstate.h" +#include "migration/vmstate.h" +#include "hw/qdev-properties.h" +#include "hw/char/max78000_uart.h" +#include "hw/misc/max78000_trng.h" +#include "hw/misc/max78000_aes.h" +#include "hw/misc/max78000_gcr.h" + + +static void max78000_gcr_reset_hold(Object *obj, ResetType type) +{ + DeviceState *dev = DEVICE(obj); + Max78000GcrState *s = MAX78000_GCR(dev); + s->sysctrl = 0x21002; + s->rst0 = 0; + /* All clocks are always ready */ + s->clkctrl = 0x3e140008; + s->pm = 0x3f000; + s->pclkdiv = 0; + s->pclkdis0 = 0xffffffff; + s->memctrl = 0x5; + s->memz = 0; + s->sysst = 0; + s->rst1 = 0; + s->pckdis1 = 0xffffffff; + s->eventen = 0; + s->revision = 0xa1; + s->sysie = 0; + s->eccerr = 0; + s->ecced = 0; + s->eccie = 0; + s->eccaddr = 0; +} + +static uint64_t max78000_gcr_read(void *opaque, hwaddr addr, + unsigned int size) +{ + Max78000GcrState *s = opaque; + + switch (addr) { + case SYSCTRL: + return s->sysctrl; + + case RST0: + return s->rst0; + + case CLKCTRL: + return s->clkctrl; + + case PM: + return s->pm; + + case PCLKDIV: + return s->pclkdiv; + + case PCLKDIS0: + return s->pclkdis0; + + case MEMCTRL: + return s->memctrl; + + case MEMZ: + return s->memz; + + case SYSST: + return s->sysst; + + case RST1: + return s->rst1; + + case PCKDIS1: + return s->pckdis1; + + case EVENTEN: + return s->eventen; + + case REVISION: + return s->revision; + + case SYSIE: + return s->sysie; + + case ECCERR: + return s->eccerr; + + case ECCED: + return s->ecced; + + case ECCIE: + return s->eccie; + + case ECCADDR: + return s->eccaddr; + + default: + qemu_log_mask(LOG_GUEST_ERROR, "%s: Bad offset 0x%" + HWADDR_PRIx "\n", __func__, addr); + return 0; + + } +} + +static void max78000_gcr_write(void *opaque, hwaddr addr, + uint64_t val64, unsigned int size) +{ + Max78000GcrState *s = opaque; + uint32_t val = val64; + uint8_t zero[0xc000] = {0}; + switch (addr) { + case SYSCTRL: + /* Checksum calculations always pass immediately */ + s->sysctrl = (val & 0x30000) | 0x1002; + break; + + case RST0: + if (val & SYSTEM_RESET) { + qemu_system_reset_request(SHUTDOWN_CAUSE_GUEST_RESET); + } + if (val & PERIPHERAL_RESET) { + /* + * Peripheral reset resets all peripherals. The CPU + * retains its state. The GPIO, watchdog timers, AoD, + * RAM retention, and general control registers (GCR), + * including the clock configuration, are unaffected. + */ + val = UART2_RESET | UART1_RESET | UART0_RESET | + ADC_RESET | CNN_RESET | TRNG_RESET | + RTC_RESET | I2C0_RESET | SPI1_RESET | + TMR3_RESET | TMR2_RESET | TMR1_RESET | + TMR0_RESET | WDT0_RESET | DMA_RESET; + } + if (val & SOFT_RESET) { + /* Soft reset also resets GPIO */ + val = UART2_RESET | UART1_RESET | UART0_RESET | + ADC_RESET | CNN_RESET | TRNG_RESET | + RTC_RESET | I2C0_RESET | SPI1_RESET | + TMR3_RESET | TMR2_RESET | TMR1_RESET | + TMR0_RESET | GPIO1_RESET | GPIO0_RESET | + DMA_RESET; + } + if (val & UART2_RESET) { + device_cold_reset(s->uart2); + } + if (val & UART1_RESET) { + device_cold_reset(s->uart1); + } + if (val & UART0_RESET) { + device_cold_reset(s->uart0); + } + if (val & TRNG_RESET) { + device_cold_reset(s->trng); + } + if (val & AES_RESET) { + device_cold_reset(s->aes); + } + /* TODO: As other devices are implemented, add them here */ + break; + + case CLKCTRL: + s->clkctrl = val | SYSCLK_RDY; + break; + + case PM: + s->pm = val; + break; + + case PCLKDIV: + s->pclkdiv = val; + break; + + case PCLKDIS0: + s->pclkdis0 = val; + break; + + case MEMCTRL: + s->memctrl = val; + break; + + case MEMZ: + if (val & ram0) { + address_space_write(&s->sram_as, SYSRAM0_START, + MEMTXATTRS_UNSPECIFIED, zero, 0x8000); + } + if (val & ram1) { + address_space_write(&s->sram_as, SYSRAM1_START, + MEMTXATTRS_UNSPECIFIED, zero, 0x8000); + } + if (val & ram2) { + address_space_write(&s->sram_as, SYSRAM2_START, + MEMTXATTRS_UNSPECIFIED, zero, 0xC000); + } + if (val & ram3) { + address_space_write(&s->sram_as, SYSRAM3_START, + MEMTXATTRS_UNSPECIFIED, zero, 0x4000); + } + break; + + case SYSST: + s->sysst = val; + break; + + case RST1: + /* TODO: As other devices are implemented, add them here */ + s->rst1 = val; + break; + + case PCKDIS1: + s->pckdis1 = val; + break; + + case EVENTEN: + s->eventen = val; + break; + + case REVISION: + s->revision = val; + break; + + case SYSIE: + s->sysie = val; + break; + + case ECCERR: + s->eccerr = val; + break; + + case ECCED: + s->ecced = val; + break; + + case ECCIE: + s->eccie = val; + break; + + case ECCADDR: + s->eccaddr = val; + break; + + default: + qemu_log_mask(LOG_GUEST_ERROR, "%s: Bad offset 0x%" HWADDR_PRIx "\n", + __func__, addr); + break; + + } +} + +static const Property max78000_gcr_properties[] = { + DEFINE_PROP_LINK("sram", Max78000GcrState, sram, + TYPE_MEMORY_REGION, MemoryRegion*), + DEFINE_PROP_LINK("uart0", Max78000GcrState, uart0, + TYPE_MAX78000_UART, DeviceState*), + DEFINE_PROP_LINK("uart1", Max78000GcrState, uart1, + TYPE_MAX78000_UART, DeviceState*), + DEFINE_PROP_LINK("uart2", Max78000GcrState, uart2, + TYPE_MAX78000_UART, DeviceState*), + DEFINE_PROP_LINK("trng", Max78000GcrState, trng, + TYPE_MAX78000_TRNG, DeviceState*), + DEFINE_PROP_LINK("aes", Max78000GcrState, aes, + TYPE_MAX78000_AES, DeviceState*), +}; + +static const MemoryRegionOps max78000_gcr_ops = { + .read = max78000_gcr_read, + .write = max78000_gcr_write, + .endianness = DEVICE_LITTLE_ENDIAN, + .valid.min_access_size = 4, + .valid.max_access_size = 4, +}; + +static const VMStateDescription vmstate_max78000_gcr = { + .name = TYPE_MAX78000_GCR, + .version_id = 1, + .minimum_version_id = 1, + .fields = (const VMStateField[]) { + VMSTATE_UINT32(sysctrl, Max78000GcrState), + VMSTATE_UINT32(rst0, Max78000GcrState), + VMSTATE_UINT32(clkctrl, Max78000GcrState), + VMSTATE_UINT32(pm, Max78000GcrState), + VMSTATE_UINT32(pclkdiv, Max78000GcrState), + VMSTATE_UINT32(pclkdis0, Max78000GcrState), + VMSTATE_UINT32(memctrl, Max78000GcrState), + VMSTATE_UINT32(memz, Max78000GcrState), + VMSTATE_UINT32(sysst, Max78000GcrState), + VMSTATE_UINT32(rst1, Max78000GcrState), + VMSTATE_UINT32(pckdis1, Max78000GcrState), + VMSTATE_UINT32(eventen, Max78000GcrState), + VMSTATE_UINT32(revision, Max78000GcrState), + VMSTATE_UINT32(sysie, Max78000GcrState), + VMSTATE_UINT32(eccerr, Max78000GcrState), + VMSTATE_UINT32(ecced, Max78000GcrState), + VMSTATE_UINT32(eccie, Max78000GcrState), + VMSTATE_UINT32(eccaddr, Max78000GcrState), + VMSTATE_END_OF_LIST() + } +}; + +static void max78000_gcr_init(Object *obj) +{ + Max78000GcrState *s = MAX78000_GCR(obj); + + memory_region_init_io(&s->mmio, obj, &max78000_gcr_ops, s, + TYPE_MAX78000_GCR, 0x400); + sysbus_init_mmio(SYS_BUS_DEVICE(obj), &s->mmio); + +} + +static void max78000_gcr_realize(DeviceState *dev, Error **errp) +{ + Max78000GcrState *s = MAX78000_GCR(dev); + + address_space_init(&s->sram_as, s->sram, "sram"); +} + +static void max78000_gcr_class_init(ObjectClass *klass, const void *data) +{ + DeviceClass *dc = DEVICE_CLASS(klass); + ResettableClass *rc = RESETTABLE_CLASS(klass); + + device_class_set_props(dc, max78000_gcr_properties); + + dc->realize = max78000_gcr_realize; + dc->vmsd = &vmstate_max78000_gcr; + rc->phases.hold = max78000_gcr_reset_hold; +} + +static const TypeInfo max78000_gcr_info = { + .name = TYPE_MAX78000_GCR, + .parent = TYPE_SYS_BUS_DEVICE, + .instance_size = sizeof(Max78000GcrState), + .instance_init = max78000_gcr_init, + .class_init = max78000_gcr_class_init, +}; + +static void max78000_gcr_register_types(void) +{ + type_register_static(&max78000_gcr_info); +} + +type_init(max78000_gcr_register_types) diff --git a/hw/misc/max78000_icc.c b/hw/misc/max78000_icc.c new file mode 100644 index 0000000..6f7d2b2 --- /dev/null +++ b/hw/misc/max78000_icc.c @@ -0,0 +1,120 @@ +/* + * MAX78000 Instruction Cache + * + * Copyright (c) 2025 Jackson Donaldson <jcksn@duck.com> + * + * SPDX-License-Identifier: GPL-2.0-or-later + */ + +#include "qemu/osdep.h" +#include "qemu/log.h" +#include "trace.h" +#include "hw/irq.h" +#include "migration/vmstate.h" +#include "hw/misc/max78000_icc.h" + + +static uint64_t max78000_icc_read(void *opaque, hwaddr addr, + unsigned int size) +{ + Max78000IccState *s = opaque; + switch (addr) { + case ICC_INFO: + return s->info; + + case ICC_SZ: + return s->sz; + + case ICC_CTRL: + return s->ctrl; + + default: + qemu_log_mask(LOG_GUEST_ERROR, + "%s: Bad offset 0x%" HWADDR_PRIx "\n", + __func__, addr); + return 0; + + } +} + +static void max78000_icc_write(void *opaque, hwaddr addr, + uint64_t val64, unsigned int size) +{ + Max78000IccState *s = opaque; + + switch (addr) { + case ICC_CTRL: + s->ctrl = 0x10000 | (val64 & 1); + break; + + case ICC_INVALIDATE: + break; + + default: + qemu_log_mask(LOG_GUEST_ERROR, + "%s: Bad offset 0x%" HWADDR_PRIx "\n", + __func__, addr); + break; + } +} + +static const MemoryRegionOps max78000_icc_ops = { + .read = max78000_icc_read, + .write = max78000_icc_write, + .endianness = DEVICE_LITTLE_ENDIAN, + .valid.min_access_size = 4, + .valid.max_access_size = 4, +}; + +static const VMStateDescription max78000_icc_vmstate = { + .name = TYPE_MAX78000_ICC, + .version_id = 1, + .minimum_version_id = 1, + .fields = (const VMStateField[]) { + VMSTATE_UINT32(info, Max78000IccState), + VMSTATE_UINT32(sz, Max78000IccState), + VMSTATE_UINT32(ctrl, Max78000IccState), + VMSTATE_END_OF_LIST() + } +}; + +static void max78000_icc_reset_hold(Object *obj, ResetType type) +{ + Max78000IccState *s = MAX78000_ICC(obj); + s->info = 0; + s->sz = 0x10000010; + s->ctrl = 0x10000; +} + +static void max78000_icc_init(Object *obj) +{ + Max78000IccState *s = MAX78000_ICC(obj); + + memory_region_init_io(&s->mmio, obj, &max78000_icc_ops, s, + TYPE_MAX78000_ICC, 0x800); + sysbus_init_mmio(SYS_BUS_DEVICE(obj), &s->mmio); +} + +static void max78000_icc_class_init(ObjectClass *klass, const void *data) +{ + DeviceClass *dc = DEVICE_CLASS(klass); + ResettableClass *rc = RESETTABLE_CLASS(klass); + + rc->phases.hold = max78000_icc_reset_hold; + dc->vmsd = &max78000_icc_vmstate; +} + +static const TypeInfo max78000_icc_info = { + .name = TYPE_MAX78000_ICC, + .parent = TYPE_SYS_BUS_DEVICE, + .instance_size = sizeof(Max78000IccState), + .instance_init = max78000_icc_init, + .class_init = max78000_icc_class_init, +}; + +static void max78000_icc_register_types(void) +{ + type_register_static(&max78000_icc_info); +} + +type_init(max78000_icc_register_types) diff --git a/hw/misc/max78000_trng.c b/hw/misc/max78000_trng.c new file mode 100644 index 0000000..ecdaef5 --- /dev/null +++ b/hw/misc/max78000_trng.c @@ -0,0 +1,139 @@ +/* + * MAX78000 True Random Number Generator + * + * Copyright (c) 2025 Jackson Donaldson <jcksn@duck.com> + * + * SPDX-License-Identifier: GPL-2.0-or-later + */ + +#include "qemu/osdep.h" +#include "qemu/log.h" +#include "trace.h" +#include "hw/irq.h" +#include "migration/vmstate.h" +#include "hw/misc/max78000_trng.h" +#include "qemu/guest-random.h" + +static uint64_t max78000_trng_read(void *opaque, hwaddr addr, + unsigned int size) +{ + uint32_t data; + + Max78000TrngState *s = opaque; + switch (addr) { + case CTRL: + return s->ctrl; + + case STATUS: + return 1; + + case DATA: + /* + * When interrupts are enabled, reading random data should cause a + * new interrupt to be generated; since there's always a random number + * available, we could qemu_set_irq(s->irq, s->ctrl & RND_IE). Because + * of how trng_write is set up, this is always a noop, so don't + */ + qemu_guest_getrandom_nofail(&data, sizeof(data)); + return data; + + default: + qemu_log_mask(LOG_GUEST_ERROR, "%s: Bad offset 0x%" + HWADDR_PRIx "\n", __func__, addr); + break; + } + return 0; +} + +static void max78000_trng_write(void *opaque, hwaddr addr, + uint64_t val64, unsigned int size) +{ + Max78000TrngState *s = opaque; + uint32_t val = val64; + switch (addr) { + case CTRL: + /* TODO: implement AES keygen */ + s->ctrl = val; + + /* + * This device models random number generation as taking 0 time. + * A new random number is always available, so the condition for the + * RND interrupt is always fulfilled; we can just set irq to 1. + */ + if (val & RND_IE) { + qemu_set_irq(s->irq, 1); + } else{ + qemu_set_irq(s->irq, 0); + } + break; + + default: + qemu_log_mask(LOG_GUEST_ERROR, "%s: Bad offset 0x%" + HWADDR_PRIx "\n", __func__, addr); + break; + } +} + +static void max78000_trng_reset_hold(Object *obj, ResetType type) +{ + Max78000TrngState *s = MAX78000_TRNG(obj); + s->ctrl = 0; + s->status = 0; + s->data = 0; +} + +static const MemoryRegionOps max78000_trng_ops = { + .read = max78000_trng_read, + .write = max78000_trng_write, + .endianness = DEVICE_LITTLE_ENDIAN, + .valid.min_access_size = 4, + .valid.max_access_size = 4, +}; + +static const VMStateDescription max78000_trng_vmstate = { + .name = TYPE_MAX78000_TRNG, + .version_id = 1, + .minimum_version_id = 1, + .fields = (const VMStateField[]) { + VMSTATE_UINT32(ctrl, Max78000TrngState), + VMSTATE_UINT32(status, Max78000TrngState), + VMSTATE_UINT32(data, Max78000TrngState), + VMSTATE_END_OF_LIST() + } +}; + +static void max78000_trng_init(Object *obj) +{ + Max78000TrngState *s = MAX78000_TRNG(obj); + sysbus_init_irq(SYS_BUS_DEVICE(obj), &s->irq); + + memory_region_init_io(&s->mmio, obj, &max78000_trng_ops, s, + TYPE_MAX78000_TRNG, 0x1000); + sysbus_init_mmio(SYS_BUS_DEVICE(obj), &s->mmio); + +} + +static void max78000_trng_class_init(ObjectClass *klass, const void *data) +{ + ResettableClass *rc = RESETTABLE_CLASS(klass); + DeviceClass *dc = DEVICE_CLASS(klass); + + rc->phases.hold = max78000_trng_reset_hold; + dc->vmsd = &max78000_trng_vmstate; + +} + +static const TypeInfo max78000_trng_info = { + .name = TYPE_MAX78000_TRNG, + .parent = TYPE_SYS_BUS_DEVICE, + .instance_size = sizeof(Max78000TrngState), + .instance_init = max78000_trng_init, + .class_init = max78000_trng_class_init, +}; + +static void max78000_trng_register_types(void) +{ + type_register_static(&max78000_trng_info); +} + +type_init(max78000_trng_register_types) diff --git a/hw/misc/meson.build b/hw/misc/meson.build index 6d47de4..b1d8d8e 100644 --- a/hw/misc/meson.build +++ b/hw/misc/meson.build @@ -70,6 +70,10 @@ system_ss.add(when: 'CONFIG_IMX', if_true: files( 'imx_ccm.c', 'imx_rngc.c', )) +system_ss.add(when: 'CONFIG_MAX78000_AES', if_true: files('max78000_aes.c')) +system_ss.add(when: 'CONFIG_MAX78000_GCR', if_true: files('max78000_gcr.c')) +system_ss.add(when: 'CONFIG_MAX78000_ICC', if_true: files('max78000_icc.c')) +system_ss.add(when: 'CONFIG_MAX78000_TRNG', if_true: files('max78000_trng.c')) system_ss.add(when: 'CONFIG_NPCM7XX', if_true: files( 'npcm_clk.c', 'npcm_gcr.c', diff --git a/hw/net/cadence_gem.c b/hw/net/cadence_gem.c index 50025d5..4444666 100644 --- a/hw/net/cadence_gem.c +++ b/hw/net/cadence_gem.c @@ -1756,6 +1756,7 @@ static void gem_realize(DeviceState *dev, Error **errp) sysbus_init_irq(SYS_BUS_DEVICE(dev), &s->irq[i]); } + gem_init_register_masks(s); qemu_macaddr_default_if_unset(&s->conf.macaddr); s->nic = qemu_new_nic(&net_gem_info, &s->conf, @@ -1776,7 +1777,6 @@ static void gem_init(Object *obj) DB_PRINT("\n"); - gem_init_register_masks(s); memory_region_init_io(&s->iomem, OBJECT(s), &gem_ops, s, "enet", sizeof(s->regs)); diff --git a/hw/net/can/ctucan_core.c b/hw/net/can/ctucan_core.c index 17131a4..6bd99c4 100644 --- a/hw/net/can/ctucan_core.c +++ b/hw/net/can/ctucan_core.c @@ -28,7 +28,6 @@ #include "qemu/osdep.h" #include "qemu/log.h" -#include "qemu/bswap.h" #include "qemu/bitops.h" #include "hw/irq.h" #include "migration/vmstate.h" diff --git a/hw/net/lan9118.c b/hw/net/lan9118.c index 6dda1e5..3017e12 100644 --- a/hw/net/lan9118.c +++ b/hw/net/lan9118.c @@ -21,6 +21,7 @@ #include "hw/ptimer.h" #include "hw/qdev-properties.h" #include "qapi/error.h" +#include "qemu/bswap.h" #include "qemu/log.h" #include "qemu/module.h" #include <zlib.h> /* for crc32 */ diff --git a/hw/net/npcm_gmac.c b/hw/net/npcm_gmac.c index a434112..5e32cd3 100644 --- a/hw/net/npcm_gmac.c +++ b/hw/net/npcm_gmac.c @@ -516,8 +516,6 @@ static void gmac_try_send_next_packet(NPCMGMACState *gmac) uint32_t desc_addr; struct NPCMGMACTxDesc tx_desc; uint32_t tx_buf_addr, tx_buf_len; - uint16_t length = 0; - uint8_t *buf = tx_send_buffer; uint32_t prev_buf_size = 0; int csum = 0; @@ -568,22 +566,20 @@ static void gmac_try_send_next_packet(NPCMGMACState *gmac) tx_buf_addr = tx_desc.tdes2; gmac->regs[R_NPCM_DMA_CUR_TX_BUF_ADDR] = tx_buf_addr; tx_buf_len = TX_DESC_TDES1_BFFR1_SZ_MASK(tx_desc.tdes1); - buf = &tx_send_buffer[prev_buf_size]; - if ((prev_buf_size + tx_buf_len) > sizeof(buf)) { + if ((prev_buf_size + tx_buf_len) > tx_buffer_size) { tx_buffer_size = prev_buf_size + tx_buf_len; tx_send_buffer = g_realloc(tx_send_buffer, tx_buffer_size); - buf = &tx_send_buffer[prev_buf_size]; } /* step 5 */ - if (dma_memory_read(&address_space_memory, tx_buf_addr, buf, + if (dma_memory_read(&address_space_memory, tx_buf_addr, + tx_send_buffer + prev_buf_size, tx_buf_len, MEMTXATTRS_UNSPECIFIED)) { qemu_log_mask(LOG_GUEST_ERROR, "%s: Failed to read packet @ 0x%x\n", __func__, tx_buf_addr); return; } - length += tx_buf_len; prev_buf_size += tx_buf_len; /* If not chained we'll have a second buffer. */ @@ -591,30 +587,32 @@ static void gmac_try_send_next_packet(NPCMGMACState *gmac) tx_buf_addr = tx_desc.tdes3; gmac->regs[R_NPCM_DMA_CUR_TX_BUF_ADDR] = tx_buf_addr; tx_buf_len = TX_DESC_TDES1_BFFR2_SZ_MASK(tx_desc.tdes1); - buf = &tx_send_buffer[prev_buf_size]; - if ((prev_buf_size + tx_buf_len) > sizeof(buf)) { + if ((prev_buf_size + tx_buf_len) > tx_buffer_size) { tx_buffer_size = prev_buf_size + tx_buf_len; tx_send_buffer = g_realloc(tx_send_buffer, tx_buffer_size); - buf = &tx_send_buffer[prev_buf_size]; } - if (dma_memory_read(&address_space_memory, tx_buf_addr, buf, + if (dma_memory_read(&address_space_memory, tx_buf_addr, + tx_send_buffer + prev_buf_size, tx_buf_len, MEMTXATTRS_UNSPECIFIED)) { qemu_log_mask(LOG_GUEST_ERROR, "%s: Failed to read packet @ 0x%x\n", __func__, tx_buf_addr); return; } - length += tx_buf_len; prev_buf_size += tx_buf_len; } if (tx_desc.tdes1 & TX_DESC_TDES1_LAST_SEG_MASK) { + /* + * This will truncate the packet at 64K. + * TODO: find out if this is the correct behaviour. + */ + uint16_t length = prev_buf_size; net_checksum_calculate(tx_send_buffer, length, csum); qemu_send_packet(qemu_get_queue(gmac->nic), tx_send_buffer, length); trace_npcm_gmac_packet_sent(DEVICE(gmac)->canonical_path, length); - buf = tx_send_buffer; - length = 0; + prev_buf_size = 0; } /* step 6 */ diff --git a/hw/net/rtl8139.c b/hw/net/rtl8139.c index 654a087..324fb93 100644 --- a/hw/net/rtl8139.c +++ b/hw/net/rtl8139.c @@ -57,6 +57,7 @@ #include "system/dma.h" #include "qemu/module.h" #include "qemu/timer.h" +#include "qemu/bswap.h" #include "net/net.h" #include "net/eth.h" #include "system/system.h" diff --git a/hw/net/vhost_net-stub.c b/hw/net/vhost_net-stub.c index 72df6d7..7d49f82 100644 --- a/hw/net/vhost_net-stub.c +++ b/hw/net/vhost_net-stub.c @@ -13,7 +13,6 @@ #include "qemu/osdep.h" #include "net/net.h" #include "net/tap.h" -#include "net/vhost-user.h" #include "hw/virtio/virtio-net.h" #include "net/vhost_net.h" @@ -101,7 +100,7 @@ VHostNetState *get_vhost_net(NetClientState *nc) return 0; } -int vhost_set_vring_enable(NetClientState *nc, int enable) +int vhost_net_set_vring_enable(NetClientState *nc, int enable) { return 0; } diff --git a/hw/net/vhost_net.c b/hw/net/vhost_net.c index 891f235..540492b 100644 --- a/hw/net/vhost_net.c +++ b/hw/net/vhost_net.c @@ -16,7 +16,6 @@ #include "qemu/osdep.h" #include "net/net.h" #include "net/tap.h" -#include "net/vhost-user.h" #include "net/vhost-vdpa.h" #include "standard-headers/linux/vhost_types.h" @@ -36,94 +35,9 @@ #include "hw/virtio/virtio-bus.h" #include "linux-headers/linux/vhost.h" - -/* Features supported by host kernel. */ -static const int kernel_feature_bits[] = { - VIRTIO_F_NOTIFY_ON_EMPTY, - VIRTIO_RING_F_INDIRECT_DESC, - VIRTIO_RING_F_EVENT_IDX, - VIRTIO_NET_F_MRG_RXBUF, - VIRTIO_F_VERSION_1, - VIRTIO_NET_F_MTU, - VIRTIO_F_IOMMU_PLATFORM, - VIRTIO_F_RING_PACKED, - VIRTIO_F_RING_RESET, - VIRTIO_F_IN_ORDER, - VIRTIO_F_NOTIFICATION_DATA, - VIRTIO_NET_F_RSC_EXT, - VIRTIO_NET_F_HASH_REPORT, - VHOST_INVALID_FEATURE_BIT -}; - -/* Features supported by others. */ -static const int user_feature_bits[] = { - VIRTIO_F_NOTIFY_ON_EMPTY, - VIRTIO_F_NOTIFICATION_DATA, - VIRTIO_RING_F_INDIRECT_DESC, - VIRTIO_RING_F_EVENT_IDX, - - VIRTIO_F_ANY_LAYOUT, - VIRTIO_F_VERSION_1, - VIRTIO_NET_F_CSUM, - VIRTIO_NET_F_GUEST_CSUM, - VIRTIO_NET_F_GSO, - VIRTIO_NET_F_GUEST_TSO4, - VIRTIO_NET_F_GUEST_TSO6, - VIRTIO_NET_F_GUEST_ECN, - VIRTIO_NET_F_GUEST_UFO, - VIRTIO_NET_F_HOST_TSO4, - VIRTIO_NET_F_HOST_TSO6, - VIRTIO_NET_F_HOST_ECN, - VIRTIO_NET_F_HOST_UFO, - VIRTIO_NET_F_MRG_RXBUF, - VIRTIO_NET_F_MTU, - VIRTIO_F_IOMMU_PLATFORM, - VIRTIO_F_RING_PACKED, - VIRTIO_F_RING_RESET, - VIRTIO_F_IN_ORDER, - VIRTIO_NET_F_RSS, - VIRTIO_NET_F_RSC_EXT, - VIRTIO_NET_F_HASH_REPORT, - VIRTIO_NET_F_GUEST_USO4, - VIRTIO_NET_F_GUEST_USO6, - VIRTIO_NET_F_HOST_USO, - - /* This bit implies RARP isn't sent by QEMU out of band */ - VIRTIO_NET_F_GUEST_ANNOUNCE, - - VIRTIO_NET_F_MQ, - - VHOST_INVALID_FEATURE_BIT -}; - -static const int *vhost_net_get_feature_bits(struct vhost_net *net) -{ - const int *feature_bits = 0; - - switch (net->nc->info->type) { - case NET_CLIENT_DRIVER_TAP: - feature_bits = kernel_feature_bits; - break; - case NET_CLIENT_DRIVER_VHOST_USER: - feature_bits = user_feature_bits; - break; -#ifdef CONFIG_VHOST_NET_VDPA - case NET_CLIENT_DRIVER_VHOST_VDPA: - feature_bits = vdpa_feature_bits; - break; -#endif - default: - error_report("Feature bits not defined for this type: %d", - net->nc->info->type); - break; - } - - return feature_bits; -} - uint64_t vhost_net_get_features(struct vhost_net *net, uint64_t features) { - return vhost_get_features(&net->dev, vhost_net_get_feature_bits(net), + return vhost_get_features(&net->dev, net->feature_bits, features); } int vhost_net_get_config(struct vhost_net *net, uint8_t *config, @@ -140,7 +54,7 @@ int vhost_net_set_config(struct vhost_net *net, const uint8_t *data, void vhost_net_ack_features(struct vhost_net *net, uint64_t features) { net->dev.acked_features = net->dev.backend_features; - vhost_ack_features(&net->dev, vhost_net_get_feature_bits(net), features); + vhost_ack_features(&net->dev, net->feature_bits, features); } uint64_t vhost_net_get_max_queues(VHostNetState *net) @@ -155,11 +69,11 @@ uint64_t vhost_net_get_acked_features(VHostNetState *net) void vhost_net_save_acked_features(NetClientState *nc) { -#ifdef CONFIG_VHOST_NET_USER - if (nc->info->type == NET_CLIENT_DRIVER_VHOST_USER) { - vhost_user_save_acked_features(nc); + struct vhost_net *net = get_vhost_net(nc); + + if (net && net->save_acked_features) { + net->save_acked_features(nc); } -#endif } static void vhost_net_disable_notifiers_nvhosts(VirtIODevice *dev, @@ -329,6 +243,10 @@ struct vhost_net *vhost_net_init(VhostNetOptions *options) } net->nc = options->net_backend; net->dev.nvqs = options->nvqs; + net->feature_bits = options->feature_bits; + net->save_acked_features = options->save_acked_features; + net->max_tx_queue_size = options->max_tx_queue_size; + net->is_vhost_user = options->is_vhost_user; net->dev.max_queues = 1; net->dev.vqs = net->vqs; @@ -372,9 +290,8 @@ struct vhost_net *vhost_net_init(VhostNetOptions *options) } /* Set sane init value. Override when guest acks. */ -#ifdef CONFIG_VHOST_NET_USER - if (net->nc->info->type == NET_CLIENT_DRIVER_VHOST_USER) { - features = vhost_user_get_acked_features(net->nc); + if (options->get_acked_features) { + features = options->get_acked_features(net->nc); if (~net->dev.features & features) { fprintf(stderr, "vhost lacks feature mask 0x%" PRIx64 " for backend\n", @@ -382,7 +299,6 @@ struct vhost_net *vhost_net_init(VhostNetOptions *options) goto fail; } } -#endif vhost_net_ack_features(net, features); @@ -525,7 +441,7 @@ int vhost_net_start(VirtIODevice *dev, NetClientState *ncs, * because vhost user doesn't interrupt masking/unmasking * properly. */ - if (net->nc->info->type == NET_CLIENT_DRIVER_VHOST_USER) { + if (net->is_vhost_user) { dev->use_guest_notifier_mask = false; } } @@ -551,7 +467,7 @@ int vhost_net_start(VirtIODevice *dev, NetClientState *ncs, if (peer->vring_enable) { /* restore vring enable state */ - r = vhost_set_vring_enable(peer, peer->vring_enable); + r = vhost_net_set_vring_enable(peer, peer->vring_enable); if (r < 0) { goto err_guest_notifiers; @@ -649,44 +565,21 @@ void vhost_net_config_mask(VHostNetState *net, VirtIODevice *dev, bool mask) { vhost_config_mask(&net->dev, dev, mask); } + VHostNetState *get_vhost_net(NetClientState *nc) { - VHostNetState *vhost_net = 0; - if (!nc) { return 0; } - switch (nc->info->type) { - case NET_CLIENT_DRIVER_TAP: - vhost_net = tap_get_vhost_net(nc); - /* - * tap_get_vhost_net() can return NULL if a tap net-device backend is - * created with 'vhost=off' option, 'vhostforce=off' or no vhost or - * vhostforce or vhostfd options at all. Please see net_init_tap_one(). - * Hence, we omit the assertion here. - */ - break; -#ifdef CONFIG_VHOST_NET_USER - case NET_CLIENT_DRIVER_VHOST_USER: - vhost_net = vhost_user_get_vhost_net(nc); - assert(vhost_net); - break; -#endif -#ifdef CONFIG_VHOST_NET_VDPA - case NET_CLIENT_DRIVER_VHOST_VDPA: - vhost_net = vhost_vdpa_get_vhost_net(nc); - assert(vhost_net); - break; -#endif - default: - break; + if (nc->info->get_vhost_net) { + return nc->info->get_vhost_net(nc); } - return vhost_net; + return NULL; } -int vhost_set_vring_enable(NetClientState *nc, int enable) +int vhost_net_set_vring_enable(NetClientState *nc, int enable) { VHostNetState *net = get_vhost_net(nc); const VhostOps *vhost_ops = net->dev.vhost_ops; diff --git a/hw/net/virtio-net.c b/hw/net/virtio-net.c index eb93607..6b5b5da 100644 --- a/hw/net/virtio-net.c +++ b/hw/net/virtio-net.c @@ -158,7 +158,7 @@ static void virtio_net_get_config(VirtIODevice *vdev, uint8_t *config) virtio_host_has_feature(vdev, VIRTIO_NET_F_RSS) ? VIRTIO_NET_RSS_MAX_TABLE_LEN : 1); virtio_stl_p(vdev, &netcfg.supported_hash_types, - VIRTIO_NET_RSS_SUPPORTED_HASHES); + n->rss_data.supported_hash_types); memcpy(config, &netcfg, n->config_size); /* @@ -670,34 +670,36 @@ static void virtio_net_set_mrg_rx_bufs(VirtIONet *n, int mergeable_rx_bufs, static int virtio_net_max_tx_queue_size(VirtIONet *n) { NetClientState *peer = n->nic_conf.peers.ncs[0]; + struct vhost_net *net; - /* - * Backends other than vhost-user or vhost-vdpa don't support max queue - * size. - */ if (!peer) { - return VIRTIO_NET_TX_QUEUE_DEFAULT_SIZE; + goto default_value; } - switch(peer->info->type) { - case NET_CLIENT_DRIVER_VHOST_USER: - case NET_CLIENT_DRIVER_VHOST_VDPA: - return VIRTQUEUE_MAX_SIZE; - default: - return VIRTIO_NET_TX_QUEUE_DEFAULT_SIZE; - }; + net = get_vhost_net(peer); + + if (!net || !net->max_tx_queue_size) { + goto default_value; + } + + return net->max_tx_queue_size; + +default_value: + return VIRTIO_NET_TX_QUEUE_DEFAULT_SIZE; } static int peer_attach(VirtIONet *n, int index) { NetClientState *nc = qemu_get_subqueue(n->nic, index); + struct vhost_net *net; if (!nc->peer) { return 0; } - if (nc->peer->info->type == NET_CLIENT_DRIVER_VHOST_USER) { - vhost_set_vring_enable(nc->peer, 1); + net = get_vhost_net(nc->peer); + if (net && net->is_vhost_user) { + vhost_net_set_vring_enable(nc->peer, 1); } if (nc->peer->info->type != NET_CLIENT_DRIVER_TAP) { @@ -714,13 +716,15 @@ static int peer_attach(VirtIONet *n, int index) static int peer_detach(VirtIONet *n, int index) { NetClientState *nc = qemu_get_subqueue(n->nic, index); + struct vhost_net *net; if (!nc->peer) { return 0; } - if (nc->peer->info->type == NET_CLIENT_DRIVER_VHOST_USER) { - vhost_set_vring_enable(nc->peer, 0); + net = get_vhost_net(nc->peer); + if (net && net->is_vhost_user) { + vhost_net_set_vring_enable(nc->peer, 0); } if (nc->peer->info->type != NET_CLIENT_DRIVER_TAP) { @@ -752,79 +756,6 @@ static void virtio_net_set_queue_pairs(VirtIONet *n) static void virtio_net_set_multiqueue(VirtIONet *n, int multiqueue); -static uint64_t virtio_net_get_features(VirtIODevice *vdev, uint64_t features, - Error **errp) -{ - VirtIONet *n = VIRTIO_NET(vdev); - NetClientState *nc = qemu_get_queue(n->nic); - - /* Firstly sync all virtio-net possible supported features */ - features |= n->host_features; - - virtio_add_feature(&features, VIRTIO_NET_F_MAC); - - if (!peer_has_vnet_hdr(n)) { - virtio_clear_feature(&features, VIRTIO_NET_F_CSUM); - virtio_clear_feature(&features, VIRTIO_NET_F_HOST_TSO4); - virtio_clear_feature(&features, VIRTIO_NET_F_HOST_TSO6); - virtio_clear_feature(&features, VIRTIO_NET_F_HOST_ECN); - - virtio_clear_feature(&features, VIRTIO_NET_F_GUEST_CSUM); - virtio_clear_feature(&features, VIRTIO_NET_F_GUEST_TSO4); - virtio_clear_feature(&features, VIRTIO_NET_F_GUEST_TSO6); - virtio_clear_feature(&features, VIRTIO_NET_F_GUEST_ECN); - - virtio_clear_feature(&features, VIRTIO_NET_F_HOST_USO); - virtio_clear_feature(&features, VIRTIO_NET_F_GUEST_USO4); - virtio_clear_feature(&features, VIRTIO_NET_F_GUEST_USO6); - - virtio_clear_feature(&features, VIRTIO_NET_F_HASH_REPORT); - } - - if (!peer_has_vnet_hdr(n) || !peer_has_ufo(n)) { - virtio_clear_feature(&features, VIRTIO_NET_F_GUEST_UFO); - virtio_clear_feature(&features, VIRTIO_NET_F_HOST_UFO); - } - - if (!peer_has_uso(n)) { - virtio_clear_feature(&features, VIRTIO_NET_F_HOST_USO); - virtio_clear_feature(&features, VIRTIO_NET_F_GUEST_USO4); - virtio_clear_feature(&features, VIRTIO_NET_F_GUEST_USO6); - } - - if (!get_vhost_net(nc->peer)) { - return features; - } - - if (!ebpf_rss_is_loaded(&n->ebpf_rss)) { - virtio_clear_feature(&features, VIRTIO_NET_F_RSS); - } - features = vhost_net_get_features(get_vhost_net(nc->peer), features); - vdev->backend_features = features; - - if (n->mtu_bypass_backend && - (n->host_features & 1ULL << VIRTIO_NET_F_MTU)) { - features |= (1ULL << VIRTIO_NET_F_MTU); - } - - /* - * Since GUEST_ANNOUNCE is emulated the feature bit could be set without - * enabled. This happens in the vDPA case. - * - * Make sure the feature set is not incoherent, as the driver could refuse - * to start. - * - * TODO: QEMU is able to emulate a CVQ just for guest_announce purposes, - * helping guest to notify the new location with vDPA devices that does not - * support it. - */ - if (!virtio_has_feature(vdev->backend_features, VIRTIO_NET_F_CTRL_VQ)) { - virtio_clear_feature(&features, VIRTIO_NET_F_GUEST_ANNOUNCE); - } - - return features; -} - static uint64_t virtio_net_bad_features(VirtIODevice *vdev) { uint64_t features = 0; @@ -998,8 +929,9 @@ static void virtio_net_set_features(VirtIODevice *vdev, uint64_t features) vhost_net_save_acked_features(nc->peer); } - if (!virtio_has_feature(features, VIRTIO_NET_F_CTRL_VLAN)) { - memset(n->vlans, 0xff, MAX_VLAN >> 3); + if (virtio_has_feature(vdev->guest_features ^ features, VIRTIO_NET_F_CTRL_VLAN)) { + bool vlan = virtio_has_feature(features, VIRTIO_NET_F_CTRL_VLAN); + memset(n->vlans, vlan ? 0 : 0xff, MAX_VLAN >> 3); } if (virtio_has_feature(features, VIRTIO_NET_F_STANDBY)) { @@ -1251,7 +1183,7 @@ static void rss_data_to_rss_config(struct VirtioNetRssData *data, { config->redirect = data->redirect; config->populate_hash = data->populate_hash; - config->hash_types = data->hash_types; + config->hash_types = data->runtime_hash_types; config->indirections_len = data->indirections_len; config->default_queue = data->default_queue; } @@ -1286,6 +1218,10 @@ static void virtio_net_detach_ebpf_rss(VirtIONet *n) static void virtio_net_commit_rss_config(VirtIONet *n) { + if (n->rss_data.peer_hash_available) { + return; + } + if (n->rss_data.enabled) { n->rss_data.enabled_software_rss = n->rss_data.populate_hash; if (n->rss_data.populate_hash) { @@ -1300,7 +1236,7 @@ static void virtio_net_commit_rss_config(VirtIONet *n) } trace_virtio_net_rss_enable(n, - n->rss_data.hash_types, + n->rss_data.runtime_hash_types, n->rss_data.indirections_len, sizeof(n->rss_data.key)); } else { @@ -1411,7 +1347,7 @@ static uint16_t virtio_net_handle_rss(VirtIONet *n, err_value = (uint32_t)s; goto error; } - n->rss_data.hash_types = virtio_ldl_p(vdev, &cfg.hash_types); + n->rss_data.runtime_hash_types = virtio_ldl_p(vdev, &cfg.hash_types); n->rss_data.indirections_len = virtio_lduw_p(vdev, &cfg.indirection_table_mask); if (!do_rss) { @@ -1474,12 +1410,12 @@ static uint16_t virtio_net_handle_rss(VirtIONet *n, err_value = temp.b; goto error; } - if (!temp.b && n->rss_data.hash_types) { + if (!temp.b && n->rss_data.runtime_hash_types) { err_msg = "No key provided"; err_value = 0; goto error; } - if (!temp.b && !n->rss_data.hash_types) { + if (!temp.b && !n->rss_data.runtime_hash_types) { virtio_net_disable_rss(n); return queue_pairs; } @@ -1881,7 +1817,7 @@ static int virtio_net_process_rss(NetClientState *nc, const uint8_t *buf, net_rx_pkt_set_protocols(pkt, &iov, 1, n->host_hdr_len); net_rx_pkt_get_protocols(pkt, &hasip4, &hasip6, &l4hdr_proto); net_hash_type = virtio_net_get_hash_type(hasip4, hasip6, l4hdr_proto, - n->rss_data.hash_types); + n->rss_data.runtime_hash_types); if (net_hash_type > NetPktRssIpV6UdpEx) { if (n->rss_data.populate_hash) { hdr->hash_value = VIRTIO_NET_HASH_REPORT_NONE; @@ -3022,11 +2958,10 @@ static void virtio_net_del_queue(VirtIONet *n, int index) virtio_del_queue(vdev, index * 2 + 1); } -static void virtio_net_change_num_queue_pairs(VirtIONet *n, int new_max_queue_pairs) +static void virtio_net_change_num_queues(VirtIONet *n, int new_num_queues) { VirtIODevice *vdev = VIRTIO_DEVICE(n); int old_num_queues = virtio_get_num_queues(vdev); - int new_num_queues = new_max_queue_pairs * 2 + 1; int i; assert(old_num_queues >= 3); @@ -3062,20 +2997,115 @@ static void virtio_net_set_multiqueue(VirtIONet *n, int multiqueue) int max = multiqueue ? n->max_queue_pairs : 1; n->multiqueue = multiqueue; - virtio_net_change_num_queue_pairs(n, max); + virtio_net_change_num_queues(n, max * 2 + 1); virtio_net_set_queue_pairs(n); } -static int virtio_net_pre_load_queues(VirtIODevice *vdev) +static int virtio_net_pre_load_queues(VirtIODevice *vdev, uint32_t n) { - virtio_net_set_multiqueue(VIRTIO_NET(vdev), - virtio_has_feature(vdev->guest_features, VIRTIO_NET_F_RSS) || - virtio_has_feature(vdev->guest_features, VIRTIO_NET_F_MQ)); + virtio_net_change_num_queues(VIRTIO_NET(vdev), n); return 0; } +static uint64_t virtio_net_get_features(VirtIODevice *vdev, uint64_t features, + Error **errp) +{ + VirtIONet *n = VIRTIO_NET(vdev); + NetClientState *nc = qemu_get_queue(n->nic); + uint32_t supported_hash_types = n->rss_data.supported_hash_types; + uint32_t peer_hash_types = n->rss_data.peer_hash_types; + bool use_own_hash = + (supported_hash_types & VIRTIO_NET_RSS_SUPPORTED_HASHES) == + supported_hash_types; + bool use_peer_hash = + n->rss_data.peer_hash_available && + (supported_hash_types & peer_hash_types) == supported_hash_types; + + /* Firstly sync all virtio-net possible supported features */ + features |= n->host_features; + + virtio_add_feature(&features, VIRTIO_NET_F_MAC); + + if (!peer_has_vnet_hdr(n)) { + virtio_clear_feature(&features, VIRTIO_NET_F_CSUM); + virtio_clear_feature(&features, VIRTIO_NET_F_HOST_TSO4); + virtio_clear_feature(&features, VIRTIO_NET_F_HOST_TSO6); + virtio_clear_feature(&features, VIRTIO_NET_F_HOST_ECN); + + virtio_clear_feature(&features, VIRTIO_NET_F_GUEST_CSUM); + virtio_clear_feature(&features, VIRTIO_NET_F_GUEST_TSO4); + virtio_clear_feature(&features, VIRTIO_NET_F_GUEST_TSO6); + virtio_clear_feature(&features, VIRTIO_NET_F_GUEST_ECN); + + virtio_clear_feature(&features, VIRTIO_NET_F_HOST_USO); + virtio_clear_feature(&features, VIRTIO_NET_F_GUEST_USO4); + virtio_clear_feature(&features, VIRTIO_NET_F_GUEST_USO6); + + virtio_clear_feature(&features, VIRTIO_NET_F_HASH_REPORT); + } + + if (!peer_has_vnet_hdr(n) || !peer_has_ufo(n)) { + virtio_clear_feature(&features, VIRTIO_NET_F_GUEST_UFO); + virtio_clear_feature(&features, VIRTIO_NET_F_HOST_UFO); + } + + if (!peer_has_uso(n)) { + virtio_clear_feature(&features, VIRTIO_NET_F_HOST_USO); + virtio_clear_feature(&features, VIRTIO_NET_F_GUEST_USO4); + virtio_clear_feature(&features, VIRTIO_NET_F_GUEST_USO6); + } + + if (!get_vhost_net(nc->peer)) { + if (!use_own_hash) { + virtio_clear_feature(&features, VIRTIO_NET_F_HASH_REPORT); + virtio_clear_feature(&features, VIRTIO_NET_F_RSS); + } else if (virtio_has_feature(features, VIRTIO_NET_F_RSS)) { + virtio_net_load_ebpf(n, errp); + } + + return features; + } + + if (!use_peer_hash) { + virtio_clear_feature(&features, VIRTIO_NET_F_HASH_REPORT); + + if (!use_own_hash || !virtio_net_attach_ebpf_to_backend(n->nic, -1)) { + if (!virtio_net_load_ebpf(n, errp)) { + return features; + } + + virtio_clear_feature(&features, VIRTIO_NET_F_RSS); + } + } + + features = vhost_net_get_features(get_vhost_net(nc->peer), features); + vdev->backend_features = features; + + if (n->mtu_bypass_backend && + (n->host_features & 1ULL << VIRTIO_NET_F_MTU)) { + features |= (1ULL << VIRTIO_NET_F_MTU); + } + + /* + * Since GUEST_ANNOUNCE is emulated the feature bit could be set without + * enabled. This happens in the vDPA case. + * + * Make sure the feature set is not incoherent, as the driver could refuse + * to start. + * + * TODO: QEMU is able to emulate a CVQ just for guest_announce purposes, + * helping guest to notify the new location with vDPA devices that does not + * support it. + */ + if (!virtio_has_feature(vdev->backend_features, VIRTIO_NET_F_CTRL_VQ)) { + virtio_clear_feature(&features, VIRTIO_NET_F_GUEST_ANNOUNCE); + } + + return features; +} + static int virtio_net_post_load_device(void *opaque, int version_id) { VirtIONet *n = opaque; @@ -3314,6 +3344,17 @@ static const VMStateDescription vmstate_virtio_net_has_vnet = { }, }; +static int virtio_net_rss_post_load(void *opaque, int version_id) +{ + VirtIONet *n = VIRTIO_NET(opaque); + + if (version_id == 1) { + n->rss_data.supported_hash_types = VIRTIO_NET_RSS_SUPPORTED_HASHES; + } + + return 0; +} + static bool virtio_net_rss_needed(void *opaque) { return VIRTIO_NET(opaque)->rss_data.enabled; @@ -3321,14 +3362,16 @@ static bool virtio_net_rss_needed(void *opaque) static const VMStateDescription vmstate_virtio_net_rss = { .name = "virtio-net-device/rss", - .version_id = 1, + .version_id = 2, .minimum_version_id = 1, + .post_load = virtio_net_rss_post_load, .needed = virtio_net_rss_needed, .fields = (const VMStateField[]) { VMSTATE_BOOL(rss_data.enabled, VirtIONet), VMSTATE_BOOL(rss_data.redirect, VirtIONet), VMSTATE_BOOL(rss_data.populate_hash, VirtIONet), - VMSTATE_UINT32(rss_data.hash_types, VirtIONet), + VMSTATE_UINT32(rss_data.runtime_hash_types, VirtIONet), + VMSTATE_UINT32_V(rss_data.supported_hash_types, VirtIONet, 2), VMSTATE_UINT16(rss_data.indirections_len, VirtIONet), VMSTATE_UINT16(rss_data.default_queue, VirtIONet), VMSTATE_UINT8_ARRAY(rss_data.key, VirtIONet, @@ -3900,6 +3943,7 @@ static void virtio_net_device_realize(DeviceState *dev, Error **errp) n->mac_table.macs = g_malloc0(MAC_TABLE_ENTRIES * ETH_ALEN); n->vlans = g_malloc0(MAX_VLAN >> 3); + memset(n->vlans, 0xff, MAX_VLAN >> 3); nc = qemu_get_queue(n->nic); nc->rxfilter_notify_enabled = 1; @@ -3915,8 +3959,17 @@ static void virtio_net_device_realize(DeviceState *dev, Error **errp) net_rx_pkt_init(&n->rx_pkt); - if (virtio_has_feature(n->host_features, VIRTIO_NET_F_RSS)) { - virtio_net_load_ebpf(n, errp); + if (qemu_get_vnet_hash_supported_types(qemu_get_queue(n->nic)->peer, + &n->rss_data.peer_hash_types)) { + n->rss_data.peer_hash_available = true; + n->rss_data.supported_hash_types = + n->rss_data.specified_hash_types.on_bits | + (n->rss_data.specified_hash_types.auto_bits & + n->rss_data.peer_hash_types); + } else { + n->rss_data.supported_hash_types = + n->rss_data.specified_hash_types.on_bits | + n->rss_data.specified_hash_types.auto_bits; } } @@ -3990,7 +4043,6 @@ static void virtio_net_reset(VirtIODevice *vdev) memset(n->mac_table.macs, 0, MAC_TABLE_ENTRIES * ETH_ALEN); memcpy(&n->mac[0], &n->nic->conf->macaddr, sizeof(n->mac)); qemu_format_nic_info_str(qemu_get_queue(n->nic), n->mac); - memset(n->vlans, 0, MAX_VLAN >> 3); /* Flush any async TX */ for (i = 0; i < n->max_queue_pairs; i++) { @@ -4133,6 +4185,42 @@ static const Property virtio_net_properties[] = { VIRTIO_NET_F_GUEST_USO6, true), DEFINE_PROP_BIT64("host_uso", VirtIONet, host_features, VIRTIO_NET_F_HOST_USO, true), + DEFINE_PROP_ON_OFF_AUTO_BIT64("hash-ipv4", VirtIONet, + rss_data.specified_hash_types, + VIRTIO_NET_HASH_REPORT_IPv4 - 1, + ON_OFF_AUTO_AUTO), + DEFINE_PROP_ON_OFF_AUTO_BIT64("hash-tcp4", VirtIONet, + rss_data.specified_hash_types, + VIRTIO_NET_HASH_REPORT_TCPv4 - 1, + ON_OFF_AUTO_AUTO), + DEFINE_PROP_ON_OFF_AUTO_BIT64("hash-udp4", VirtIONet, + rss_data.specified_hash_types, + VIRTIO_NET_HASH_REPORT_UDPv4 - 1, + ON_OFF_AUTO_AUTO), + DEFINE_PROP_ON_OFF_AUTO_BIT64("hash-ipv6", VirtIONet, + rss_data.specified_hash_types, + VIRTIO_NET_HASH_REPORT_IPv6 - 1, + ON_OFF_AUTO_AUTO), + DEFINE_PROP_ON_OFF_AUTO_BIT64("hash-tcp6", VirtIONet, + rss_data.specified_hash_types, + VIRTIO_NET_HASH_REPORT_TCPv6 - 1, + ON_OFF_AUTO_AUTO), + DEFINE_PROP_ON_OFF_AUTO_BIT64("hash-udp6", VirtIONet, + rss_data.specified_hash_types, + VIRTIO_NET_HASH_REPORT_UDPv6 - 1, + ON_OFF_AUTO_AUTO), + DEFINE_PROP_ON_OFF_AUTO_BIT64("hash-ipv6ex", VirtIONet, + rss_data.specified_hash_types, + VIRTIO_NET_HASH_REPORT_IPv6_EX - 1, + ON_OFF_AUTO_AUTO), + DEFINE_PROP_ON_OFF_AUTO_BIT64("hash-tcp6ex", VirtIONet, + rss_data.specified_hash_types, + VIRTIO_NET_HASH_REPORT_TCPv6_EX - 1, + ON_OFF_AUTO_AUTO), + DEFINE_PROP_ON_OFF_AUTO_BIT64("hash-udp6ex", VirtIONet, + rss_data.specified_hash_types, + VIRTIO_NET_HASH_REPORT_UDPv6_EX - 1, + ON_OFF_AUTO_AUTO), }; static void virtio_net_class_init(ObjectClass *klass, const void *data) diff --git a/hw/net/vmxnet3.c b/hw/net/vmxnet3.c index 7c0ca56..af73aa8 100644 --- a/hw/net/vmxnet3.c +++ b/hw/net/vmxnet3.c @@ -22,7 +22,6 @@ #include "net/tap.h" #include "net/checksum.h" #include "system/system.h" -#include "qemu/bswap.h" #include "qemu/log.h" #include "qemu/module.h" #include "hw/pci/msix.h" diff --git a/hw/nvme/ctrl.c b/hw/nvme/ctrl.c index 2200028..e764ec7 100644 --- a/hw/nvme/ctrl.c +++ b/hw/nvme/ctrl.c @@ -22,7 +22,7 @@ * * Usage * ----- - * See docs/system/nvme.rst for extensive documentation. + * See docs/system/devices/nvme.rst for extensive documentation. * * Add options: * -drive file=<file>,if=none,id=<drive_id> diff --git a/hw/pci-host/Kconfig b/hw/pci-host/Kconfig index 35c0415..9824fa1 100644 --- a/hw/pci-host/Kconfig +++ b/hw/pci-host/Kconfig @@ -54,6 +54,7 @@ config PCI_EXPRESS_Q35 config PCI_EXPRESS_GENERIC_BRIDGE bool select PCI_EXPRESS + imply ACPI_PCI config PCI_EXPRESS_XILINX bool diff --git a/hw/pci-host/gpex-acpi.c b/hw/pci-host/gpex-acpi.c index 0aba47c..952a0ac 100644 --- a/hw/pci-host/gpex-acpi.c +++ b/hw/pci-host/gpex-acpi.c @@ -1,5 +1,6 @@ #include "qemu/osdep.h" #include "hw/acpi/aml-build.h" +#include "hw/acpi/pci.h" #include "hw/pci-host/gpex.h" #include "hw/arm/virt.h" #include "hw/pci/pci_bus.h" @@ -50,61 +51,10 @@ static void acpi_dsdt_add_pci_route_table(Aml *dev, uint32_t irq, } } -static void acpi_dsdt_add_pci_osc(Aml *dev) +static Aml *build_pci_host_bridge_dsm_method(void) { - Aml *method, *UUID, *ifctx, *ifctx1, *elsectx, *buf; - - /* Declare an _OSC (OS Control Handoff) method */ - aml_append(dev, aml_name_decl("SUPP", aml_int(0))); - aml_append(dev, aml_name_decl("CTRL", aml_int(0))); - method = aml_method("_OSC", 4, AML_NOTSERIALIZED); - aml_append(method, - aml_create_dword_field(aml_arg(3), aml_int(0), "CDW1")); - - /* PCI Firmware Specification 3.0 - * 4.5.1. _OSC Interface for PCI Host Bridge Devices - * The _OSC interface for a PCI/PCI-X/PCI Express hierarchy is - * identified by the Universal Unique IDentifier (UUID) - * 33DB4D5B-1FF7-401C-9657-7441C03DD766 - */ - UUID = aml_touuid("33DB4D5B-1FF7-401C-9657-7441C03DD766"); - ifctx = aml_if(aml_equal(aml_arg(0), UUID)); - aml_append(ifctx, - aml_create_dword_field(aml_arg(3), aml_int(4), "CDW2")); - aml_append(ifctx, - aml_create_dword_field(aml_arg(3), aml_int(8), "CDW3")); - aml_append(ifctx, aml_store(aml_name("CDW2"), aml_name("SUPP"))); - aml_append(ifctx, aml_store(aml_name("CDW3"), aml_name("CTRL"))); - - /* - * Allow OS control for all 5 features: - * PCIeHotplug SHPCHotplug PME AER PCIeCapability. - */ - aml_append(ifctx, aml_and(aml_name("CTRL"), aml_int(0x1F), - aml_name("CTRL"))); - - ifctx1 = aml_if(aml_lnot(aml_equal(aml_arg(1), aml_int(0x1)))); - aml_append(ifctx1, aml_or(aml_name("CDW1"), aml_int(0x08), - aml_name("CDW1"))); - aml_append(ifctx, ifctx1); - - ifctx1 = aml_if(aml_lnot(aml_equal(aml_name("CDW3"), aml_name("CTRL")))); - aml_append(ifctx1, aml_or(aml_name("CDW1"), aml_int(0x10), - aml_name("CDW1"))); - aml_append(ifctx, ifctx1); - - aml_append(ifctx, aml_store(aml_name("CTRL"), aml_name("CDW3"))); - aml_append(ifctx, aml_return(aml_arg(3))); - aml_append(method, ifctx); - - elsectx = aml_else(); - aml_append(elsectx, aml_or(aml_name("CDW1"), aml_int(4), - aml_name("CDW1"))); - aml_append(elsectx, aml_return(aml_arg(3))); - aml_append(method, elsectx); - aml_append(dev, method); - - method = aml_method("_DSM", 4, AML_NOTSERIALIZED); + Aml *method = aml_method("_DSM", 4, AML_NOTSERIALIZED); + Aml *UUID, *ifctx, *ifctx1, *buf; /* PCI Firmware Specification 3.0 * 4.6.1. _DSM for PCI Express Slot Information @@ -123,7 +73,16 @@ static void acpi_dsdt_add_pci_osc(Aml *dev) byte_list[0] = 0; buf = aml_buffer(1, byte_list); aml_append(method, aml_return(buf)); - aml_append(dev, method); + return method; +} + +static void acpi_dsdt_add_host_bridge_methods(Aml *dev, + bool enable_native_pcie_hotplug) +{ + /* Declare an _OSC (OS Control Handoff) method */ + aml_append(dev, + build_pci_host_bridge_osc_method(enable_native_pcie_hotplug)); + aml_append(dev, build_pci_host_bridge_dsm_method()); } void acpi_dsdt_add_gpex(Aml *scope, struct GPEXConfig *cfg) @@ -192,7 +151,8 @@ void acpi_dsdt_add_gpex(Aml *scope, struct GPEXConfig *cfg) if (is_cxl) { build_cxl_osc_method(dev); } else { - acpi_dsdt_add_pci_osc(dev); + /* pxb bridges do not have ACPI PCI Hot-plug enabled */ + acpi_dsdt_add_host_bridge_methods(dev, true); } aml_append(scope, dev); @@ -267,7 +227,7 @@ void acpi_dsdt_add_gpex(Aml *scope, struct GPEXConfig *cfg) } aml_append(dev, aml_name_decl("_CRS", rbuf)); - acpi_dsdt_add_pci_osc(dev); + acpi_dsdt_add_host_bridge_methods(dev, cfg->pci_native_hotplug); Aml *dev_res0 = aml_device("%s", "RES0"); aml_append(dev_res0, aml_name_decl("_HID", aml_string("PNP0C02"))); diff --git a/hw/pci-host/gt64120.c b/hw/pci-host/gt64120.c index b12a256..b1d96f6 100644 --- a/hw/pci-host/gt64120.c +++ b/hw/pci-host/gt64120.c @@ -28,6 +28,7 @@ #include "qapi/error.h" #include "qemu/units.h" #include "qemu/log.h" +#include "qemu/bswap.h" #include "hw/qdev-properties.h" #include "hw/registerfields.h" #include "hw/pci/pci_device.h" diff --git a/hw/pci-host/pnv_phb3.c b/hw/pci-host/pnv_phb3.c index a4335f4..5d8383f 100644 --- a/hw/pci-host/pnv_phb3.c +++ b/hw/pci-host/pnv_phb3.c @@ -8,6 +8,7 @@ */ #include "qemu/osdep.h" #include "qemu/log.h" +#include "qemu/bswap.h" #include "qapi/visitor.h" #include "qapi/error.h" #include "hw/pci-host/pnv_phb3_regs.h" diff --git a/hw/pci-host/pnv_phb4.c b/hw/pci-host/pnv_phb4.c index 77ea352..1899205 100644 --- a/hw/pci-host/pnv_phb4.c +++ b/hw/pci-host/pnv_phb4.c @@ -8,6 +8,7 @@ */ #include "qemu/osdep.h" #include "qemu/log.h" +#include "qemu/bswap.h" #include "qapi/visitor.h" #include "qapi/error.h" #include "target/ppc/cpu.h" diff --git a/hw/pci-host/ppce500.c b/hw/pci-host/ppce500.c index 52269b0..975d191 100644 --- a/hw/pci-host/ppce500.c +++ b/hw/pci-host/ppce500.c @@ -20,7 +20,6 @@ #include "migration/vmstate.h" #include "hw/pci/pci_device.h" #include "hw/pci/pci_host.h" -#include "qemu/bswap.h" #include "hw/pci-host/ppce500.h" #include "qom/object.h" diff --git a/hw/pci-host/sh_pci.c b/hw/pci-host/sh_pci.c index de8f6a8..62fb945 100644 --- a/hw/pci-host/sh_pci.c +++ b/hw/pci-host/sh_pci.c @@ -28,7 +28,6 @@ #include "hw/irq.h" #include "hw/pci/pci_device.h" #include "hw/pci/pci_host.h" -#include "qemu/bswap.h" #include "qemu/module.h" #include "qom/object.h" diff --git a/hw/pci/pcie_sriov.c b/hw/pci/pcie_sriov.c index 3ad1874..8a4bf0d 100644 --- a/hw/pci/pcie_sriov.c +++ b/hw/pci/pcie_sriov.c @@ -64,6 +64,27 @@ static void unregister_vfs(PCIDevice *dev) pci_set_word(dev->wmask + dev->exp.sriov_cap + PCI_SRIOV_NUM_VF, 0xffff); } +static void consume_config(PCIDevice *dev) +{ + uint8_t *cfg = dev->config + dev->exp.sriov_cap; + + if (pci_get_word(cfg + PCI_SRIOV_CTRL) & PCI_SRIOV_CTRL_VFE) { + register_vfs(dev); + } else { + uint8_t *wmask = dev->wmask + dev->exp.sriov_cap; + uint16_t num_vfs = pci_get_word(cfg + PCI_SRIOV_NUM_VF); + uint16_t wmask_val = PCI_SRIOV_CTRL_MSE | PCI_SRIOV_CTRL_ARI; + + unregister_vfs(dev); + + if (num_vfs <= pci_get_word(cfg + PCI_SRIOV_TOTAL_VF)) { + wmask_val |= PCI_SRIOV_CTRL_VFE; + } + + pci_set_word(wmask + PCI_SRIOV_CTRL, wmask_val); + } +} + static bool pcie_sriov_pf_init_common(PCIDevice *dev, uint16_t offset, uint16_t vf_dev_id, uint16_t init_vfs, uint16_t total_vfs, uint16_t vf_offset, @@ -416,30 +437,13 @@ void pcie_sriov_config_write(PCIDevice *dev, uint32_t address, trace_sriov_config_write(dev->name, PCI_SLOT(dev->devfn), PCI_FUNC(dev->devfn), off, val, len); - if (range_covers_byte(off, len, PCI_SRIOV_CTRL)) { - if (val & PCI_SRIOV_CTRL_VFE) { - register_vfs(dev); - } else { - unregister_vfs(dev); - } - } else if (range_covers_byte(off, len, PCI_SRIOV_NUM_VF)) { - uint8_t *cfg = dev->config + sriov_cap; - uint8_t *wmask = dev->wmask + sriov_cap; - uint16_t num_vfs = pci_get_word(cfg + PCI_SRIOV_NUM_VF); - uint16_t wmask_val = PCI_SRIOV_CTRL_MSE | PCI_SRIOV_CTRL_ARI; - - if (num_vfs <= pci_get_word(cfg + PCI_SRIOV_TOTAL_VF)) { - wmask_val |= PCI_SRIOV_CTRL_VFE; - } - - pci_set_word(wmask + PCI_SRIOV_CTRL, wmask_val); - } + consume_config(dev); } void pcie_sriov_pf_post_load(PCIDevice *dev) { if (dev->exp.sriov_cap) { - register_vfs(dev); + consume_config(dev); } } diff --git a/hw/ppc/pnv.c b/hw/ppc/pnv.c index 4a49e9d..d84c906 100644 --- a/hw/ppc/pnv.c +++ b/hw/ppc/pnv.c @@ -2608,62 +2608,46 @@ static void pnv_pic_print_info(InterruptStatsProvider *obj, GString *buf) } } -static int pnv_match_nvt(XiveFabric *xfb, uint8_t format, - uint8_t nvt_blk, uint32_t nvt_idx, - bool crowd, bool cam_ignore, uint8_t priority, - uint32_t logic_serv, - XiveTCTXMatch *match) +static bool pnv_match_nvt(XiveFabric *xfb, uint8_t format, + uint8_t nvt_blk, uint32_t nvt_idx, + bool crowd, bool cam_ignore, uint8_t priority, + uint32_t logic_serv, + XiveTCTXMatch *match) { PnvMachineState *pnv = PNV_MACHINE(xfb); - int total_count = 0; int i; for (i = 0; i < pnv->num_chips; i++) { Pnv9Chip *chip9 = PNV9_CHIP(pnv->chips[i]); XivePresenter *xptr = XIVE_PRESENTER(&chip9->xive); XivePresenterClass *xpc = XIVE_PRESENTER_GET_CLASS(xptr); - int count; - count = xpc->match_nvt(xptr, format, nvt_blk, nvt_idx, crowd, - cam_ignore, priority, logic_serv, match); - - if (count < 0) { - return count; - } - - total_count += count; + xpc->match_nvt(xptr, format, nvt_blk, nvt_idx, crowd, + cam_ignore, priority, logic_serv, match); } - return total_count; + return !!match->count; } -static int pnv10_xive_match_nvt(XiveFabric *xfb, uint8_t format, - uint8_t nvt_blk, uint32_t nvt_idx, - bool crowd, bool cam_ignore, uint8_t priority, - uint32_t logic_serv, - XiveTCTXMatch *match) +static bool pnv10_xive_match_nvt(XiveFabric *xfb, uint8_t format, + uint8_t nvt_blk, uint32_t nvt_idx, + bool crowd, bool cam_ignore, uint8_t priority, + uint32_t logic_serv, + XiveTCTXMatch *match) { PnvMachineState *pnv = PNV_MACHINE(xfb); - int total_count = 0; int i; for (i = 0; i < pnv->num_chips; i++) { Pnv10Chip *chip10 = PNV10_CHIP(pnv->chips[i]); XivePresenter *xptr = XIVE_PRESENTER(&chip10->xive); XivePresenterClass *xpc = XIVE_PRESENTER_GET_CLASS(xptr); - int count; - - count = xpc->match_nvt(xptr, format, nvt_blk, nvt_idx, crowd, - cam_ignore, priority, logic_serv, match); - - if (count < 0) { - return count; - } - total_count += count; + xpc->match_nvt(xptr, format, nvt_blk, nvt_idx, crowd, + cam_ignore, priority, logic_serv, match); } - return total_count; + return !!match->count; } static int pnv10_xive_broadcast(XiveFabric *xfb, diff --git a/hw/ppc/spapr.c b/hw/ppc/spapr.c index 702f774..1855a3c 100644 --- a/hw/ppc/spapr.c +++ b/hw/ppc/spapr.c @@ -577,7 +577,7 @@ static int spapr_dt_dynamic_memory(SpaprMachineState *spapr, void *fdt, /* * Adds ibm,dynamic-reconfiguration-memory node. - * Refer to docs/specs/ppc-spapr-hotplug.txt for the documentation + * Refer to docs/specs/ppc-spapr-hotplug.rst for the documentation * of this device tree node. */ static int spapr_dt_dynamic_reconfiguration_memory(SpaprMachineState *spapr, @@ -2518,7 +2518,7 @@ static void htab_save_cleanup(void *opaque) static SaveVMHandlers savevm_htab_handlers = { .save_setup = htab_save_setup, .save_live_iterate = htab_save_iterate, - .save_live_complete_precopy = htab_save_complete, + .save_complete = htab_save_complete, .save_cleanup = htab_save_cleanup, .load_state = htab_load, }; @@ -4468,21 +4468,14 @@ static void spapr_pic_print_info(InterruptStatsProvider *obj, GString *buf) /* * This is a XIVE only operation */ -static int spapr_match_nvt(XiveFabric *xfb, uint8_t format, - uint8_t nvt_blk, uint32_t nvt_idx, - bool crowd, bool cam_ignore, uint8_t priority, - uint32_t logic_serv, XiveTCTXMatch *match) +static bool spapr_match_nvt(XiveFabric *xfb, uint8_t format, + uint8_t nvt_blk, uint32_t nvt_idx, + bool crowd, bool cam_ignore, uint8_t priority, + uint32_t logic_serv, XiveTCTXMatch *match) { SpaprMachineState *spapr = SPAPR_MACHINE(xfb); XivePresenter *xptr = XIVE_PRESENTER(spapr->active_intc); XivePresenterClass *xpc = XIVE_PRESENTER_GET_CLASS(xptr); - int count; - - count = xpc->match_nvt(xptr, format, nvt_blk, nvt_idx, crowd, cam_ignore, - priority, logic_serv, match); - if (count < 0) { - return count; - } /* * When we implement the save and restore of the thread interrupt @@ -4493,12 +4486,14 @@ static int spapr_match_nvt(XiveFabric *xfb, uint8_t format, * Until this is done, the sPAPR machine should find at least one * matching context always. */ - if (count == 0) { + if (!xpc->match_nvt(xptr, format, nvt_blk, nvt_idx, crowd, cam_ignore, + priority, logic_serv, match)) { qemu_log_mask(LOG_GUEST_ERROR, "XIVE: NVT %x/%x is not dispatched\n", nvt_blk, nvt_idx); + return false; } - return count; + return true; } int spapr_get_vcpu_id(PowerPCCPU *cpu) diff --git a/hw/riscv/Kconfig b/hw/riscv/Kconfig index e6a0ac1..fc9c35b 100644 --- a/hw/riscv/Kconfig +++ b/hw/riscv/Kconfig @@ -119,3 +119,12 @@ config SPIKE select HTIF select RISCV_ACLINT select SIFIVE_PLIC + +config XIANGSHAN_KUNMINGHU + bool + default y + depends on RISCV64 + select RISCV_ACLINT + select RISCV_APLIC + select RISCV_IMSIC + select SERIAL_MM diff --git a/hw/riscv/meson.build b/hw/riscv/meson.build index c22f3a7..2a8d5b1 100644 --- a/hw/riscv/meson.build +++ b/hw/riscv/meson.build @@ -13,5 +13,6 @@ riscv_ss.add(when: 'CONFIG_ACPI', if_true: files('virt-acpi-build.c')) riscv_ss.add(when: 'CONFIG_RISCV_IOMMU', if_true: files( 'riscv-iommu.c', 'riscv-iommu-pci.c', 'riscv-iommu-sys.c', 'riscv-iommu-hpm.c')) riscv_ss.add(when: 'CONFIG_MICROBLAZE_V', if_true: files('microblaze-v-generic.c')) +riscv_ss.add(when: 'CONFIG_XIANGSHAN_KUNMINGHU', if_true: files('xiangshan_kmh.c')) hw_arch += {'riscv': riscv_ss} diff --git a/hw/riscv/riscv-iommu-bits.h b/hw/riscv/riscv-iommu-bits.h index 1017d73..47fe01b 100644 --- a/hw/riscv/riscv-iommu-bits.h +++ b/hw/riscv/riscv-iommu-bits.h @@ -79,6 +79,7 @@ struct riscv_iommu_pq_record { #define RISCV_IOMMU_CAP_SV39 BIT_ULL(9) #define RISCV_IOMMU_CAP_SV48 BIT_ULL(10) #define RISCV_IOMMU_CAP_SV57 BIT_ULL(11) +#define RISCV_IOMMU_CAP_SVRSW60T59B BIT_ULL(14) #define RISCV_IOMMU_CAP_SV32X4 BIT_ULL(16) #define RISCV_IOMMU_CAP_SV39X4 BIT_ULL(17) #define RISCV_IOMMU_CAP_SV48X4 BIT_ULL(18) diff --git a/hw/riscv/riscv-iommu.c b/hw/riscv/riscv-iommu.c index a877e5d..96a7fbd 100644 --- a/hw/riscv/riscv-iommu.c +++ b/hw/riscv/riscv-iommu.c @@ -1935,11 +1935,7 @@ static void riscv_iommu_process_dbg(RISCVIOMMUState *s) iova = RISCV_IOMMU_TR_RESPONSE_FAULT | (((uint64_t) fault) << 10); } else { iova = iotlb.translated_addr & ~iotlb.addr_mask; - iova >>= TARGET_PAGE_BITS; - iova &= RISCV_IOMMU_TR_RESPONSE_PPN; - - /* We do not support superpages (> 4kbs) for now */ - iova &= ~RISCV_IOMMU_TR_RESPONSE_S; + iova = set_field(0, RISCV_IOMMU_TR_RESPONSE_PPN, PPN_DOWN(iova)); } riscv_iommu_reg_set64(s, RISCV_IOMMU_REG_TR_RESPONSE, iova); } @@ -2355,7 +2351,8 @@ static void riscv_iommu_realize(DeviceState *dev, Error **errp) } if (s->enable_g_stage) { s->cap |= RISCV_IOMMU_CAP_SV32X4 | RISCV_IOMMU_CAP_SV39X4 | - RISCV_IOMMU_CAP_SV48X4 | RISCV_IOMMU_CAP_SV57X4; + RISCV_IOMMU_CAP_SV48X4 | RISCV_IOMMU_CAP_SV57X4 | + RISCV_IOMMU_CAP_SVRSW60T59B; } if (s->hpm_cntrs > 0) { diff --git a/hw/riscv/virt-acpi-build.c b/hw/riscv/virt-acpi-build.c index 8b5683d..f1406cb 100644 --- a/hw/riscv/virt-acpi-build.c +++ b/hw/riscv/virt-acpi-build.c @@ -270,11 +270,8 @@ spcr_setup(GArray *table_data, BIOSLinker *linker, RISCVVirtState *s) #define RHCT_NODE_ARRAY_OFFSET 56 /* - * ACPI spec, Revision 6.5+ - * 5.2.36 RISC-V Hart Capabilities Table (RHCT) - * REF: https://github.com/riscv-non-isa/riscv-acpi/issues/16 - * https://drive.google.com/file/d/1nP3nFiH4jkPMp6COOxP6123DCZKR-tia/view - * https://drive.google.com/file/d/1sKbOa8m1UZw1JkquZYe3F1zQBN1xXsaf/view + * ACPI spec, Revision 6.6 + * 5.2.37 RISC-V Hart Capabilities Table (RHCT) */ static void build_rhct(GArray *table_data, BIOSLinker *linker, @@ -421,7 +418,10 @@ static void build_rhct(GArray *table_data, acpi_table_end(linker, &table); } -/* FADT */ +/* + * ACPI spec, Revision 6.6 + * 5.2.9 Fixed ACPI Description Table (MADT) + */ static void build_fadt_rev6(GArray *table_data, BIOSLinker *linker, RISCVVirtState *s, @@ -429,7 +429,7 @@ static void build_fadt_rev6(GArray *table_data, { AcpiFadtData fadt = { .rev = 6, - .minor_ver = 5, + .minor_ver = 6, .flags = 1 << ACPI_FADT_F_HW_REDUCED_ACPI, .xdsdt_tbl_offset = &dsdt_tbl_offset, }; @@ -508,11 +508,8 @@ static void build_dsdt(GArray *table_data, } /* - * ACPI spec, Revision 6.5+ + * ACPI spec, Revision 6.6 * 5.2.12 Multiple APIC Description Table (MADT) - * REF: https://github.com/riscv-non-isa/riscv-acpi/issues/15 - * https://drive.google.com/file/d/1R6k4MshhN3WTT-hwqAquu5nX6xSEqK2l/view - * https://drive.google.com/file/d/1oMGPyOD58JaPgMl1pKasT-VKsIKia7zR/view */ static void build_madt(GArray *table_data, BIOSLinker *linker, @@ -537,7 +534,7 @@ static void build_madt(GArray *table_data, hart_index_bits = imsic_num_bits(imsic_max_hart_per_socket); - AcpiTable table = { .sig = "APIC", .rev = 6, .oem_id = s->oem_id, + AcpiTable table = { .sig = "APIC", .rev = 7, .oem_id = s->oem_id, .oem_table_id = s->oem_table_id }; acpi_table_begin(&table, table_data); @@ -812,10 +809,8 @@ static void build_rimt(GArray *table_data, BIOSLinker *linker, } /* - * ACPI spec, Revision 6.5+ + * ACPI spec, Revision 6.6 * 5.2.16 System Resource Affinity Table (SRAT) - * REF: https://github.com/riscv-non-isa/riscv-acpi/issues/25 - * https://drive.google.com/file/d/1YTdDx2IPm5IeZjAW932EYU-tUtgS08tX/view */ static void build_srat(GArray *table_data, BIOSLinker *linker, RISCVVirtState *vms) @@ -894,7 +889,10 @@ static void virt_acpi_build(RISCVVirtState *s, AcpiBuildTables *tables) } acpi_add_table(table_offsets, tables_blob); - spcr_setup(tables_blob, tables->linker, s); + + if (ms->acpi_spcr_enabled) { + spcr_setup(tables_blob, tables->linker, s); + } acpi_add_table(table_offsets, tables_blob); { diff --git a/hw/riscv/virt.c b/hw/riscv/virt.c index cf280a9..47e573f 100644 --- a/hw/riscv/virt.c +++ b/hw/riscv/virt.c @@ -311,8 +311,7 @@ static void create_fdt_socket_memory(RISCVVirtState *s, int socket) size = riscv_socket_mem_size(ms, socket); mem_name = g_strdup_printf("/memory@%"HWADDR_PRIx, addr); qemu_fdt_add_subnode(ms->fdt, mem_name); - qemu_fdt_setprop_cells(ms->fdt, mem_name, "reg", - addr >> 32, addr, size >> 32, size); + qemu_fdt_setprop_sized_cells(ms->fdt, mem_name, "reg", 2, addr, 2, size); qemu_fdt_setprop_string(ms->fdt, mem_name, "device_type", "memory"); riscv_socket_fdt_write_id(ms, mem_name, socket); } @@ -324,7 +323,7 @@ static void create_fdt_socket_clint(RISCVVirtState *s, int cpu; g_autofree char *clint_name = NULL; g_autofree uint32_t *clint_cells = NULL; - unsigned long clint_addr; + hwaddr clint_addr; MachineState *ms = MACHINE(s); static const char * const clint_compat[2] = { "sifive,clint0", "riscv,clint0" @@ -340,14 +339,14 @@ static void create_fdt_socket_clint(RISCVVirtState *s, } clint_addr = s->memmap[VIRT_CLINT].base + - (s->memmap[VIRT_CLINT].size * socket); - clint_name = g_strdup_printf("/soc/clint@%lx", clint_addr); + s->memmap[VIRT_CLINT].size * socket; + clint_name = g_strdup_printf("/soc/clint@%"HWADDR_PRIx, clint_addr); qemu_fdt_add_subnode(ms->fdt, clint_name); qemu_fdt_setprop_string_array(ms->fdt, clint_name, "compatible", (char **)&clint_compat, ARRAY_SIZE(clint_compat)); - qemu_fdt_setprop_cells(ms->fdt, clint_name, "reg", - 0x0, clint_addr, 0x0, s->memmap[VIRT_CLINT].size); + qemu_fdt_setprop_sized_cells(ms->fdt, clint_name, "reg", + 2, clint_addr, 2, s->memmap[VIRT_CLINT].size); qemu_fdt_setprop(ms->fdt, clint_name, "interrupts-extended", clint_cells, s->soc[socket].num_harts * sizeof(uint32_t) * 4); riscv_socket_fdt_write_id(ms, clint_name, socket); @@ -388,8 +387,8 @@ static void create_fdt_socket_aclint(RISCVVirtState *s, qemu_fdt_add_subnode(ms->fdt, name); qemu_fdt_setprop_string(ms->fdt, name, "compatible", "riscv,aclint-mswi"); - qemu_fdt_setprop_cells(ms->fdt, name, "reg", - 0x0, addr, 0x0, RISCV_ACLINT_SWI_SIZE); + qemu_fdt_setprop_sized_cells(ms->fdt, name, "reg", + 2, addr, 2, RISCV_ACLINT_SWI_SIZE); qemu_fdt_setprop(ms->fdt, name, "interrupts-extended", aclint_mswi_cells, aclint_cells_size); qemu_fdt_setprop(ms->fdt, name, "interrupt-controller", NULL, 0); @@ -411,11 +410,11 @@ static void create_fdt_socket_aclint(RISCVVirtState *s, qemu_fdt_add_subnode(ms->fdt, name); qemu_fdt_setprop_string(ms->fdt, name, "compatible", "riscv,aclint-mtimer"); - qemu_fdt_setprop_cells(ms->fdt, name, "reg", - 0x0, addr + RISCV_ACLINT_DEFAULT_MTIME, - 0x0, size - RISCV_ACLINT_DEFAULT_MTIME, - 0x0, addr + RISCV_ACLINT_DEFAULT_MTIMECMP, - 0x0, RISCV_ACLINT_DEFAULT_MTIME); + qemu_fdt_setprop_sized_cells(ms->fdt, name, "reg", + 2, addr + RISCV_ACLINT_DEFAULT_MTIME, + 2, size - RISCV_ACLINT_DEFAULT_MTIME, + 2, addr + RISCV_ACLINT_DEFAULT_MTIMECMP, + 2, RISCV_ACLINT_DEFAULT_MTIME); qemu_fdt_setprop(ms->fdt, name, "interrupts-extended", aclint_mtimer_cells, aclint_cells_size); riscv_socket_fdt_write_id(ms, name, socket); @@ -429,8 +428,8 @@ static void create_fdt_socket_aclint(RISCVVirtState *s, qemu_fdt_add_subnode(ms->fdt, name); qemu_fdt_setprop_string(ms->fdt, name, "compatible", "riscv,aclint-sswi"); - qemu_fdt_setprop_cells(ms->fdt, name, "reg", - 0x0, addr, 0x0, s->memmap[VIRT_ACLINT_SSWI].size); + qemu_fdt_setprop_sized_cells(ms->fdt, name, "reg", + 2, addr, 2, s->memmap[VIRT_ACLINT_SSWI].size); qemu_fdt_setprop(ms->fdt, name, "interrupts-extended", aclint_sswi_cells, aclint_cells_size); qemu_fdt_setprop(ms->fdt, name, "interrupt-controller", NULL, 0); @@ -494,8 +493,8 @@ static void create_fdt_socket_plic(RISCVVirtState *s, s->soc[socket].num_harts * sizeof(uint32_t) * 4); } - qemu_fdt_setprop_cells(ms->fdt, plic_name, "reg", - 0x0, plic_addr, 0x0, s->memmap[VIRT_PLIC].size); + qemu_fdt_setprop_sized_cells(ms->fdt, plic_name, "reg", + 2, plic_addr, 2, s->memmap[VIRT_PLIC].size); qemu_fdt_setprop_cell(ms->fdt, plic_name, "riscv,ndev", VIRT_IRQCHIP_NUM_SOURCES - 1); riscv_socket_fdt_write_id(ms, plic_name, socket); @@ -656,8 +655,8 @@ static void create_fdt_one_aplic(RISCVVirtState *s, int socket, qemu_fdt_setprop_cell(ms->fdt, aplic_name, "msi-parent", msi_phandle); } - qemu_fdt_setprop_cells(ms->fdt, aplic_name, "reg", - 0x0, aplic_addr, 0x0, aplic_size); + qemu_fdt_setprop_sized_cells(ms->fdt, aplic_name, "reg", + 2, aplic_addr, 2, aplic_size); qemu_fdt_setprop_cell(ms->fdt, aplic_name, "riscv,num-sources", VIRT_IRQCHIP_NUM_SOURCES); @@ -857,9 +856,7 @@ static void create_fdt_virtio(RISCVVirtState *s, uint32_t irq_virtio_phandle) qemu_fdt_add_subnode(ms->fdt, name); qemu_fdt_setprop_string(ms->fdt, name, "compatible", "virtio,mmio"); - qemu_fdt_setprop_cells(ms->fdt, name, "reg", - 0x0, addr, - 0x0, size); + qemu_fdt_setprop_sized_cells(ms->fdt, name, "reg", 2, addr, 2, size); qemu_fdt_setprop_cell(ms->fdt, name, "interrupt-parent", irq_virtio_phandle); if (s->aia_type == VIRT_AIA_TYPE_NONE) { @@ -897,8 +894,8 @@ static void create_fdt_pcie(RISCVVirtState *s, if (s->aia_type == VIRT_AIA_TYPE_APLIC_IMSIC) { qemu_fdt_setprop_cell(ms->fdt, name, "msi-parent", msi_pcie_phandle); } - qemu_fdt_setprop_cells(ms->fdt, name, "reg", 0, - s->memmap[VIRT_PCIE_ECAM].base, 0, s->memmap[VIRT_PCIE_ECAM].size); + qemu_fdt_setprop_sized_cells(ms->fdt, name, "reg", 2, + s->memmap[VIRT_PCIE_ECAM].base, 2, s->memmap[VIRT_PCIE_ECAM].size); qemu_fdt_setprop_sized_cells(ms->fdt, name, "ranges", 1, FDT_PCI_RANGE_IOPORT, 2, 0, 2, s->memmap[VIRT_PCIE_PIO].base, 2, s->memmap[VIRT_PCIE_PIO].size, @@ -935,8 +932,9 @@ static void create_fdt_reset(RISCVVirtState *s, uint32_t *phandle) qemu_fdt_setprop_string_array(ms->fdt, name, "compatible", (char **)&compat, ARRAY_SIZE(compat)); } - qemu_fdt_setprop_cells(ms->fdt, name, "reg", - 0x0, s->memmap[VIRT_TEST].base, 0x0, s->memmap[VIRT_TEST].size); + qemu_fdt_setprop_sized_cells(ms->fdt, name, "reg", + 2, s->memmap[VIRT_TEST].base, + 2, s->memmap[VIRT_TEST].size); qemu_fdt_setprop_cell(ms->fdt, name, "phandle", test_phandle); test_phandle = qemu_fdt_get_phandle(ms->fdt, name); g_free(name); @@ -968,9 +966,9 @@ static void create_fdt_uart(RISCVVirtState *s, s->memmap[VIRT_UART0].base); qemu_fdt_add_subnode(ms->fdt, name); qemu_fdt_setprop_string(ms->fdt, name, "compatible", "ns16550a"); - qemu_fdt_setprop_cells(ms->fdt, name, "reg", - 0x0, s->memmap[VIRT_UART0].base, - 0x0, s->memmap[VIRT_UART0].size); + qemu_fdt_setprop_sized_cells(ms->fdt, name, "reg", + 2, s->memmap[VIRT_UART0].base, + 2, s->memmap[VIRT_UART0].size); qemu_fdt_setprop_cell(ms->fdt, name, "clock-frequency", 3686400); qemu_fdt_setprop_cell(ms->fdt, name, "interrupt-parent", irq_mmio_phandle); if (s->aia_type == VIRT_AIA_TYPE_NONE) { @@ -994,8 +992,9 @@ static void create_fdt_rtc(RISCVVirtState *s, qemu_fdt_add_subnode(ms->fdt, name); qemu_fdt_setprop_string(ms->fdt, name, "compatible", "google,goldfish-rtc"); - qemu_fdt_setprop_cells(ms->fdt, name, "reg", - 0x0, s->memmap[VIRT_RTC].base, 0x0, s->memmap[VIRT_RTC].size); + qemu_fdt_setprop_sized_cells(ms->fdt, name, "reg", + 2, s->memmap[VIRT_RTC].base, + 2, s->memmap[VIRT_RTC].size); qemu_fdt_setprop_cell(ms->fdt, name, "interrupt-parent", irq_mmio_phandle); if (s->aia_type == VIRT_AIA_TYPE_NONE) { @@ -1089,8 +1088,7 @@ static void create_fdt_iommu_sys(RISCVVirtState *s, uint32_t irq_chip, qemu_fdt_setprop_cell(fdt, iommu_node, "#iommu-cells", 1); qemu_fdt_setprop_cell(fdt, iommu_node, "phandle", iommu_phandle); - qemu_fdt_setprop_cells(fdt, iommu_node, "reg", - addr >> 32, addr, size >> 32, size); + qemu_fdt_setprop_sized_cells(fdt, iommu_node, "reg", 2, addr, 2, size); qemu_fdt_setprop_cell(fdt, iommu_node, "interrupt-parent", irq_chip); qemu_fdt_setprop_cells(fdt, iommu_node, "interrupts", diff --git a/hw/riscv/xiangshan_kmh.c b/hw/riscv/xiangshan_kmh.c new file mode 100644 index 0000000..a95fd61 --- /dev/null +++ b/hw/riscv/xiangshan_kmh.c @@ -0,0 +1,220 @@ +/* + * QEMU RISC-V Board Compatible with the Xiangshan Kunminghu + * FPGA prototype platform + * + * Copyright (c) 2025 Beijing Institute of Open Source Chip (BOSC) + * SPDX-License-Identifier: GPL-2.0-or-later + * + * Provides a board compatible with the Xiangshan Kunminghu + * FPGA prototype platform: + * + * 0) UART (16550A) + * 1) CLINT (Core-Local Interruptor) + * 2) IMSIC (Incoming MSI Controller) + * 3) APLIC (Advanced Platform-Level Interrupt Controller) + * + * More information can be found in our Github repository: + * https://github.com/OpenXiangShan/XiangShan + * + * This program is free software; you can redistribute it and/or modify it + * under the terms and conditions of the GNU General Public License, + * version 2 or later, as published by the Free Software Foundation. + * + * This program is distributed in the hope it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for + * more details. + * + * You should have received a copy of the GNU General Public License along with + * this program. If not, see <http://www.gnu.org/licenses/>. + */ + +#include "qemu/osdep.h" +#include "qapi/error.h" +#include "system/address-spaces.h" +#include "hw/boards.h" +#include "hw/char/serial-mm.h" +#include "hw/intc/riscv_aclint.h" +#include "hw/intc/riscv_aplic.h" +#include "hw/intc/riscv_imsic.h" +#include "hw/qdev-properties.h" +#include "hw/riscv/boot.h" +#include "hw/riscv/xiangshan_kmh.h" +#include "hw/riscv/riscv_hart.h" +#include "system/system.h" + +static const MemMapEntry xiangshan_kmh_memmap[] = { + [XIANGSHAN_KMH_ROM] = { 0x1000, 0xF000 }, + [XIANGSHAN_KMH_UART0] = { 0x310B0000, 0x10000 }, + [XIANGSHAN_KMH_CLINT] = { 0x38000000, 0x10000 }, + [XIANGSHAN_KMH_APLIC_M] = { 0x31100000, 0x4000 }, + [XIANGSHAN_KMH_APLIC_S] = { 0x31120000, 0x4000 }, + [XIANGSHAN_KMH_IMSIC_M] = { 0x3A800000, 0x10000 }, + [XIANGSHAN_KMH_IMSIC_S] = { 0x3B000000, 0x80000 }, + [XIANGSHAN_KMH_DRAM] = { 0x80000000, 0x0 }, +}; + +static DeviceState *xiangshan_kmh_create_aia(uint32_t num_harts) +{ + int i; + const MemMapEntry *memmap = xiangshan_kmh_memmap; + hwaddr addr = 0; + DeviceState *aplic_m = NULL; + + /* M-level IMSICs */ + addr = memmap[XIANGSHAN_KMH_IMSIC_M].base; + for (i = 0; i < num_harts; i++) { + riscv_imsic_create(addr + i * IMSIC_HART_SIZE(0), i, true, + 1, XIANGSHAN_KMH_IMSIC_NUM_IDS); + } + + /* S-level IMSICs */ + addr = memmap[XIANGSHAN_KMH_IMSIC_S].base; + for (i = 0; i < num_harts; i++) { + riscv_imsic_create(addr + + i * IMSIC_HART_SIZE(XIANGSHAN_KMH_IMSIC_GUEST_BITS), + i, false, 1 + XIANGSHAN_KMH_IMSIC_GUEST_BITS, + XIANGSHAN_KMH_IMSIC_NUM_IDS); + } + + /* M-level APLIC */ + aplic_m = riscv_aplic_create(memmap[XIANGSHAN_KMH_APLIC_M].base, + memmap[XIANGSHAN_KMH_APLIC_M].size, + 0, 0, XIANGSHAN_KMH_APLIC_NUM_SOURCES, + 1, true, true, NULL); + + /* S-level APLIC */ + riscv_aplic_create(memmap[XIANGSHAN_KMH_APLIC_S].base, + memmap[XIANGSHAN_KMH_APLIC_S].size, + 0, 0, XIANGSHAN_KMH_APLIC_NUM_SOURCES, + 1, true, false, aplic_m); + + return aplic_m; +} + +static void xiangshan_kmh_soc_realize(DeviceState *dev, Error **errp) +{ + MachineState *ms = MACHINE(qdev_get_machine()); + XiangshanKmhSoCState *s = XIANGSHAN_KMH_SOC(dev); + const MemMapEntry *memmap = xiangshan_kmh_memmap; + MemoryRegion *system_memory = get_system_memory(); + uint32_t num_harts = ms->smp.cpus; + + qdev_prop_set_uint32(DEVICE(&s->cpus), "num-harts", num_harts); + qdev_prop_set_uint32(DEVICE(&s->cpus), "hartid-base", 0); + qdev_prop_set_string(DEVICE(&s->cpus), "cpu-type", + TYPE_RISCV_CPU_XIANGSHAN_KMH); + sysbus_realize(SYS_BUS_DEVICE(&s->cpus), &error_fatal); + + /* AIA */ + s->irqchip = xiangshan_kmh_create_aia(num_harts); + + /* UART */ + serial_mm_init(system_memory, memmap[XIANGSHAN_KMH_UART0].base, 2, + qdev_get_gpio_in(s->irqchip, XIANGSHAN_KMH_UART0_IRQ), + 115200, serial_hd(0), DEVICE_LITTLE_ENDIAN); + + /* CLINT */ + riscv_aclint_swi_create(memmap[XIANGSHAN_KMH_CLINT].base, + 0, num_harts, false); + riscv_aclint_mtimer_create(memmap[XIANGSHAN_KMH_CLINT].base + + RISCV_ACLINT_SWI_SIZE, + RISCV_ACLINT_DEFAULT_MTIMER_SIZE, + 0, num_harts, RISCV_ACLINT_DEFAULT_MTIMECMP, + RISCV_ACLINT_DEFAULT_MTIME, + XIANGSHAN_KMH_CLINT_TIMEBASE_FREQ, true); + + /* ROM */ + memory_region_init_rom(&s->rom, OBJECT(dev), "xiangshan.kunminghu.rom", + memmap[XIANGSHAN_KMH_ROM].size, &error_fatal); + memory_region_add_subregion(system_memory, + memmap[XIANGSHAN_KMH_ROM].base, &s->rom); +} + +static void xiangshan_kmh_soc_class_init(ObjectClass *klass, const void *data) +{ + DeviceClass *dc = DEVICE_CLASS(klass); + + dc->realize = xiangshan_kmh_soc_realize; + dc->user_creatable = false; +} + +static void xiangshan_kmh_soc_instance_init(Object *obj) +{ + XiangshanKmhSoCState *s = XIANGSHAN_KMH_SOC(obj); + + object_initialize_child(obj, "cpus", &s->cpus, TYPE_RISCV_HART_ARRAY); +} + +static const TypeInfo xiangshan_kmh_soc_info = { + .name = TYPE_XIANGSHAN_KMH_SOC, + .parent = TYPE_DEVICE, + .instance_size = sizeof(XiangshanKmhSoCState), + .instance_init = xiangshan_kmh_soc_instance_init, + .class_init = xiangshan_kmh_soc_class_init, +}; + +static void xiangshan_kmh_soc_register_types(void) +{ + type_register_static(&xiangshan_kmh_soc_info); +} +type_init(xiangshan_kmh_soc_register_types) + +static void xiangshan_kmh_machine_init(MachineState *machine) +{ + XiangshanKmhState *s = XIANGSHAN_KMH_MACHINE(machine); + const MemMapEntry *memmap = xiangshan_kmh_memmap; + MemoryRegion *system_memory = get_system_memory(); + hwaddr start_addr = memmap[XIANGSHAN_KMH_DRAM].base; + + /* Initialize SoC */ + object_initialize_child(OBJECT(machine), "soc", &s->soc, + TYPE_XIANGSHAN_KMH_SOC); + qdev_realize(DEVICE(&s->soc), NULL, &error_fatal); + + /* Register RAM */ + memory_region_add_subregion(system_memory, + memmap[XIANGSHAN_KMH_DRAM].base, + machine->ram); + + /* ROM reset vector */ + riscv_setup_rom_reset_vec(machine, &s->soc.cpus, + start_addr, + memmap[XIANGSHAN_KMH_ROM].base, + memmap[XIANGSHAN_KMH_ROM].size, 0, 0); + if (machine->firmware) { + riscv_load_firmware(machine->firmware, &start_addr, NULL); + } + + /* Note: dtb has been integrated into firmware(OpenSBI) when compiling */ +} + +static void xiangshan_kmh_machine_class_init(ObjectClass *klass, const void *data) +{ + MachineClass *mc = MACHINE_CLASS(klass); + static const char *const valid_cpu_types[] = { + TYPE_RISCV_CPU_XIANGSHAN_KMH, + NULL + }; + + mc->desc = "RISC-V Board compatible with the Xiangshan " \ + "Kunminghu FPGA prototype platform"; + mc->init = xiangshan_kmh_machine_init; + mc->max_cpus = XIANGSHAN_KMH_MAX_CPUS; + mc->default_cpu_type = TYPE_RISCV_CPU_XIANGSHAN_KMH; + mc->valid_cpu_types = valid_cpu_types; + mc->default_ram_id = "xiangshan.kunminghu.ram"; +} + +static const TypeInfo xiangshan_kmh_machine_info = { + .name = TYPE_XIANGSHAN_KMH_MACHINE, + .parent = TYPE_MACHINE, + .instance_size = sizeof(XiangshanKmhState), + .class_init = xiangshan_kmh_machine_class_init, +}; + +static void xiangshan_kmh_machine_register_types(void) +{ + type_register_static(&xiangshan_kmh_machine_info); +} +type_init(xiangshan_kmh_machine_register_types) diff --git a/hw/s390x/s390-pci-bus.c b/hw/s390x/s390-pci-bus.c index e6aa445..f87d274 100644 --- a/hw/s390x/s390-pci-bus.c +++ b/hw/s390x/s390-pci-bus.c @@ -384,9 +384,9 @@ static uint64_t get_table_index(uint64_t iova, int8_t ett) return calc_sx(iova); case ZPCI_ETT_RT: return calc_rtx(iova); + default: + g_assert_not_reached(); } - - return -1; } static bool entry_isvalid(uint64_t entry, int8_t ett) @@ -397,22 +397,24 @@ static bool entry_isvalid(uint64_t entry, int8_t ett) case ZPCI_ETT_ST: case ZPCI_ETT_RT: return rt_entry_isvalid(entry); + default: + g_assert_not_reached(); } - - return false; } /* Return true if address translation is done */ static bool translate_iscomplete(uint64_t entry, int8_t ett) { switch (ett) { - case 0: + case ZPCI_ETT_ST: return (entry & ZPCI_TABLE_FC) ? true : false; - case 1: + case ZPCI_ETT_RT: return false; + case ZPCI_ETT_PT: + return true; + default: + g_assert_not_reached(); } - - return true; } static uint64_t get_frame_size(int8_t ett) @@ -424,9 +426,9 @@ static uint64_t get_frame_size(int8_t ett) return 1ULL << 20; case ZPCI_ETT_RT: return 1ULL << 31; + default: + g_assert_not_reached(); } - - return 0; } static uint64_t get_next_table_origin(uint64_t entry, int8_t ett) @@ -438,9 +440,9 @@ static uint64_t get_next_table_origin(uint64_t entry, int8_t ett) return get_st_pto(entry); case ZPCI_ETT_RT: return get_rt_sto(entry); + default: + g_assert_not_reached(); } - - return 0; } /** diff --git a/hw/s390x/s390-pci-inst.c b/hw/s390x/s390-pci-inst.c index b5dddb2..a3bb5aa 100644 --- a/hw/s390x/s390-pci-inst.c +++ b/hw/s390x/s390-pci-inst.c @@ -16,6 +16,7 @@ #include "exec/target_page.h" #include "system/memory.h" #include "qemu/error-report.h" +#include "qemu/bswap.h" #include "system/hw_accel.h" #include "hw/boards.h" #include "hw/pci/pci_device.h" diff --git a/hw/s390x/s390-stattrib.c b/hw/s390x/s390-stattrib.c index f74cf32..13a678a 100644 --- a/hw/s390x/s390-stattrib.c +++ b/hw/s390x/s390-stattrib.c @@ -338,7 +338,7 @@ static const TypeInfo qemu_s390_stattrib_info = { static SaveVMHandlers savevm_s390_stattrib_handlers = { .save_setup = cmma_save_setup, .save_live_iterate = cmma_save_iterate, - .save_live_complete_precopy = cmma_save_complete, + .save_complete = cmma_save_complete, .state_pending_exact = cmma_state_pending, .state_pending_estimate = cmma_state_pending, .save_cleanup = cmma_save_cleanup, diff --git a/hw/scsi/esp.c b/hw/scsi/esp.c index f24991f..1d264c4 100644 --- a/hw/scsi/esp.c +++ b/hw/scsi/esp.c @@ -275,6 +275,7 @@ static int esp_select(ESPState *s) if (!s->current_dev) { /* No such drive */ s->rregs[ESP_RSTAT] = 0; + s->asc_mode = ESP_ASC_MODE_DIS; s->rregs[ESP_RINTR] = INTR_DC; esp_raise_irq(s); return -1; @@ -284,6 +285,7 @@ static int esp_select(ESPState *s) * Note that we deliberately don't raise the IRQ here: this will be done * either in esp_transfer_data() or esp_command_complete() */ + s->asc_mode = ESP_ASC_MODE_INI; return 0; } @@ -308,6 +310,7 @@ static void do_command_phase(ESPState *s) if (!current_lun) { /* No such drive */ s->rregs[ESP_RSTAT] = 0; + s->asc_mode = ESP_ASC_MODE_DIS; s->rregs[ESP_RINTR] = INTR_DC; s->rregs[ESP_RSEQ] = SEQ_0; esp_raise_irq(s); @@ -487,8 +490,10 @@ static void esp_do_dma(ESPState *s) case STAT_MO: if (s->dma_memory_read) { len = MIN(len, fifo8_num_free(&s->cmdfifo)); - s->dma_memory_read(s->dma_opaque, buf, len); - esp_set_tc(s, esp_get_tc(s) - len); + if (len) { + s->dma_memory_read(s->dma_opaque, buf, len); + esp_set_tc(s, esp_get_tc(s) - len); + } } else { len = esp_fifo_pop_buf(s, buf, fifo8_num_used(&s->fifo)); len = MIN(fifo8_num_free(&s->cmdfifo), len); @@ -541,9 +546,11 @@ static void esp_do_dma(ESPState *s) trace_esp_do_dma(cmdlen, len); if (s->dma_memory_read) { len = MIN(len, fifo8_num_free(&s->cmdfifo)); - s->dma_memory_read(s->dma_opaque, buf, len); - fifo8_push_all(&s->cmdfifo, buf, len); - esp_set_tc(s, esp_get_tc(s) - len); + if (len) { + s->dma_memory_read(s->dma_opaque, buf, len); + fifo8_push_all(&s->cmdfifo, buf, len); + esp_set_tc(s, esp_get_tc(s) - len); + } } else { len = esp_fifo_pop_buf(s, buf, fifo8_num_used(&s->fifo)); len = MIN(fifo8_num_free(&s->cmdfifo), len); @@ -572,8 +579,10 @@ static void esp_do_dma(ESPState *s) switch (s->rregs[ESP_CMD]) { case CMD_TI | CMD_DMA: if (s->dma_memory_read) { - s->dma_memory_read(s->dma_opaque, s->async_buf, len); - esp_set_tc(s, esp_get_tc(s) - len); + if (len) { + s->dma_memory_read(s->dma_opaque, s->async_buf, len); + esp_set_tc(s, esp_get_tc(s) - len); + } } else { /* Copy FIFO data to device */ len = MIN(s->async_len, ESP_FIFO_SZ); @@ -625,7 +634,9 @@ static void esp_do_dma(ESPState *s) switch (s->rregs[ESP_CMD]) { case CMD_TI | CMD_DMA: if (s->dma_memory_write) { - s->dma_memory_write(s->dma_opaque, s->async_buf, len); + if (len) { + s->dma_memory_write(s->dma_opaque, s->async_buf, len); + } } else { /* Copy device data to FIFO */ len = MIN(len, fifo8_num_free(&s->fifo)); @@ -675,6 +686,7 @@ static void esp_do_dma(ESPState *s) buf[0] = s->status; if (s->dma_memory_write) { + /* Length already non-zero */ s->dma_memory_write(s->dma_opaque, buf, len); } else { esp_fifo_push_buf(s, buf, len); @@ -709,6 +721,7 @@ static void esp_do_dma(ESPState *s) buf[0] = 0; if (s->dma_memory_write) { + /* Length already non-zero */ s->dma_memory_write(s->dma_opaque, buf, len); } else { esp_fifo_push_buf(s, buf, len); @@ -1012,6 +1025,7 @@ void esp_transfer_data(SCSIRequest *req, uint32_t len) */ s->rregs[ESP_RINTR] |= INTR_BS | INTR_FC; s->rregs[ESP_RSEQ] = SEQ_CD; + esp_raise_irq(s); break; case CMD_SELATNS | CMD_DMA: @@ -1022,20 +1036,21 @@ void esp_transfer_data(SCSIRequest *req, uint32_t len) */ s->rregs[ESP_RINTR] |= INTR_BS; s->rregs[ESP_RSEQ] = SEQ_MO; + esp_raise_irq(s); break; case CMD_TI | CMD_DMA: case CMD_TI: /* - * Bus service interrupt raised because of initial change to - * DATA phase + * If the final COMMAND phase data was transferred using a TI + * command, clear ESP_CMD to terminate the TI command and raise + * the completion interrupt */ s->rregs[ESP_CMD] = 0; s->rregs[ESP_RINTR] |= INTR_BS; + esp_raise_irq(s); break; } - - esp_raise_irq(s); } /* @@ -1090,6 +1105,7 @@ void esp_hard_reset(ESPState *s) fifo8_reset(&s->cmdfifo); s->dma = 0; s->dma_cb = NULL; + s->asc_mode = ESP_ASC_MODE_DIS; s->rregs[ESP_CFG1] = 7; } @@ -1113,6 +1129,38 @@ static void parent_esp_reset(ESPState *s, int irq, int level) } } +static bool esp_cmd_is_valid(ESPState *s, uint8_t cmd) +{ + uint8_t cmd_group = (cmd & CMD_GRP_MASK) >> 4; + + /* Always allow misc commands */ + if (cmd_group == CMD_GRP_MISC) { + return true; + } + + switch (s->asc_mode) { + case ESP_ASC_MODE_DIS: + /* Disconnected mode: only allow disconnected commands */ + if (cmd_group == CMD_GRP_DISC) { + return true; + } + break; + + case ESP_ASC_MODE_INI: + /* Initiator mode: allow initiator commands */ + if (cmd_group == CMD_GRP_INIT) { + return true; + } + break; + + default: + g_assert_not_reached(); + } + + trace_esp_invalid_cmd(cmd, s->asc_mode); + return false; +} + static void esp_run_cmd(ESPState *s) { uint8_t cmd = s->rregs[ESP_CMD]; @@ -1158,6 +1206,7 @@ static void esp_run_cmd(ESPState *s) break; case CMD_MSGACC: trace_esp_mem_writeb_cmd_msgacc(cmd); + s->asc_mode = ESP_ASC_MODE_DIS; s->rregs[ESP_RINTR] |= INTR_DC; s->rregs[ESP_RSEQ] = 0; s->rregs[ESP_RFLAGS] = 0; @@ -1268,6 +1317,11 @@ void esp_reg_write(ESPState *s, uint32_t saddr, uint64_t val) break; case ESP_CMD: s->rregs[saddr] = val; + if (!esp_cmd_is_valid(s, s->rregs[saddr])) { + s->rregs[ESP_RSTAT] |= INTR_IL; + esp_raise_irq(s); + break; + } esp_run_cmd(s); break; case ESP_WBUSID ... ESP_WSYNO: @@ -1325,6 +1379,14 @@ static bool esp_is_between_version_5_and_6(void *opaque, int version_id) return version_id >= 5 && version_id <= 6; } +static bool esp_is_version_8(void *opaque, int version_id) +{ + ESPState *s = ESP(opaque); + + version_id = MIN(version_id, s->mig_version_id); + return version_id >= 8; +} + int esp_pre_save(void *opaque) { ESPState *s = ESP(object_resolve_path_component( @@ -1356,13 +1418,18 @@ static int esp_post_load(void *opaque, int version_id) } } + if (version_id < 8) { + /* Assume initiator mode to allow all commands to continue */ + s->asc_mode = ESP_ASC_MODE_INI; + } + s->mig_version_id = vmstate_esp.version_id; return 0; } const VMStateDescription vmstate_esp = { .name = "esp", - .version_id = 7, + .version_id = 8, .minimum_version_id = 3, .post_load = esp_post_load, .fields = (const VMStateField[]) { @@ -1394,6 +1461,7 @@ const VMStateDescription vmstate_esp = { esp_is_between_version_5_and_6), VMSTATE_UINT8_TEST(lun, ESPState, esp_is_version_6), VMSTATE_BOOL(drq_state, ESPState), + VMSTATE_UINT8_TEST(asc_mode, ESPState, esp_is_version_8), VMSTATE_END_OF_LIST() }, }; diff --git a/hw/scsi/trace-events b/hw/scsi/trace-events index f0f2a98..6c2788e 100644 --- a/hw/scsi/trace-events +++ b/hw/scsi/trace-events @@ -198,6 +198,7 @@ esp_mem_writeb_cmd_ensel(uint32_t val) "Enable selection (0x%2.2x)" esp_mem_writeb_cmd_dissel(uint32_t val) "Disable selection (0x%2.2x)" esp_mem_writeb_cmd_ti(uint32_t val) "Transfer Information (0x%2.2x)" esp_set_phase(const char *phase) "setting bus phase to %s" +esp_invalid_cmd(uint8_t cmd, uint8_t asc_mode) "command 0x%x asc_mode 0x%x" # esp-pci.c esp_pci_error_invalid_dma_direction(void) "invalid DMA transfer direction" diff --git a/hw/sensor/lsm303dlhc_mag.c b/hw/sensor/lsm303dlhc_mag.c index f9e501d..cd5773a 100644 --- a/hw/sensor/lsm303dlhc_mag.c +++ b/hw/sensor/lsm303dlhc_mag.c @@ -28,7 +28,6 @@ #include "qapi/visitor.h" #include "qemu/module.h" #include "qemu/log.h" -#include "qemu/bswap.h" enum LSM303DLHCMagReg { LSM303DLHC_MAG_REG_CRA = 0x00, diff --git a/hw/smbios/smbios.c b/hw/smbios/smbios.c index ad4cd67..1ac063c 100644 --- a/hw/smbios/smbios.c +++ b/hw/smbios/smbios.c @@ -17,6 +17,7 @@ #include "qemu/osdep.h" #include "qemu/units.h" +#include "qemu/bswap.h" #include "qapi/error.h" #include "qemu/config-file.h" #include "qemu/module.h" diff --git a/hw/uefi/trace.h b/hw/uefi/trace.h new file mode 100644 index 0000000..6aa1c93 --- /dev/null +++ b/hw/uefi/trace.h @@ -0,0 +1,2 @@ +/* SPDX-License-Identifier: GPL-2.0-or-later */ +#include "trace/trace-hw_uefi.h" diff --git a/hw/uefi/var-service-core.c b/hw/uefi/var-service-core.c index 4836a0c..feec5a5 100644 --- a/hw/uefi/var-service-core.c +++ b/hw/uefi/var-service-core.c @@ -12,7 +12,7 @@ #include "hw/uefi/var-service-api.h" #include "hw/uefi/var-service-edk2.h" -#include "trace/trace-hw_uefi.h" +#include "trace.h" static int uefi_vars_pre_load(void *opaque) { diff --git a/hw/uefi/var-service-policy.c b/hw/uefi/var-service-policy.c index 3b1155f..58da4ad 100644 --- a/hw/uefi/var-service-policy.c +++ b/hw/uefi/var-service-policy.c @@ -14,7 +14,7 @@ #include "hw/uefi/var-service-api.h" #include "hw/uefi/var-service-edk2.h" -#include "trace/trace-hw_uefi.h" +#include "trace.h" static void calc_policy(uefi_var_policy *pol); diff --git a/hw/uefi/var-service-utils.c b/hw/uefi/var-service-utils.c index c9ef465..258013f 100644 --- a/hw/uefi/var-service-utils.c +++ b/hw/uefi/var-service-utils.c @@ -8,7 +8,7 @@ #include "hw/uefi/var-service.h" -#include "trace/trace-hw_uefi.h" +#include "trace.h" /* ------------------------------------------------------------------ */ diff --git a/hw/uefi/var-service-vars.c b/hw/uefi/var-service-vars.c index 7f98d77..37d05b7 100644 --- a/hw/uefi/var-service-vars.c +++ b/hw/uefi/var-service-vars.c @@ -12,7 +12,7 @@ #include "hw/uefi/var-service-api.h" #include "hw/uefi/var-service-edk2.h" -#include "trace/trace-hw_uefi.h" +#include "trace.h" #define EFI_VARIABLE_ATTRIBUTE_SUPPORTED \ (EFI_VARIABLE_NON_VOLATILE | \ diff --git a/hw/usb/dev-hid.c b/hw/usb/dev-hid.c index 54d064e..96623aa 100644 --- a/hw/usb/dev-hid.c +++ b/hw/usb/dev-hid.c @@ -491,14 +491,14 @@ static const uint8_t qemu_tablet_hid_report_descriptor[] = { 0xa1, 0x00, /* Collection (Physical) */ 0x05, 0x09, /* Usage Page (Button) */ 0x19, 0x01, /* Usage Minimum (1) */ - 0x29, 0x03, /* Usage Maximum (3) */ + 0x29, 0x05, /* Usage Maximum (5) */ 0x15, 0x00, /* Logical Minimum (0) */ 0x25, 0x01, /* Logical Maximum (1) */ - 0x95, 0x03, /* Report Count (3) */ + 0x95, 0x05, /* Report Count (5) */ 0x75, 0x01, /* Report Size (1) */ 0x81, 0x02, /* Input (Data, Variable, Absolute) */ 0x95, 0x01, /* Report Count (1) */ - 0x75, 0x05, /* Report Size (5) */ + 0x75, 0x03, /* Report Size (3) */ 0x81, 0x01, /* Input (Constant) */ 0x05, 0x01, /* Usage Page (Generic Desktop) */ 0x09, 0x30, /* Usage (X) */ diff --git a/hw/vfio-user/container.c b/hw/vfio-user/container.c index 3133fef..d589dd9 100644 --- a/hw/vfio-user/container.c +++ b/hw/vfio-user/container.c @@ -13,7 +13,6 @@ #include "hw/vfio-user/container.h" #include "hw/vfio-user/device.h" #include "hw/vfio-user/trace.h" -#include "hw/vfio/vfio-cpr.h" #include "hw/vfio/vfio-device.h" #include "hw/vfio/vfio-listener.h" #include "qapi/error.h" @@ -65,8 +64,6 @@ static int vfio_user_dma_unmap(const VFIOContainerBase *bcontainer, 0, &local_err)) { error_report_err(local_err); ret = -EFAULT; - } else { - ret = 0; } } else { if (!vfio_user_send_wait(container->proxy, &msgp->hdr, NULL, @@ -93,7 +90,7 @@ static int vfio_user_dma_map(const VFIOContainerBase *bcontainer, hwaddr iova, bcontainer); int fd = memory_region_get_fd(mrp); Error *local_err = NULL; - int ret; + int ret = 0; VFIOUserFDs *fds = NULL; VFIOUserDMAMap *msgp = g_malloc0(sizeof(*msgp)); @@ -136,8 +133,6 @@ static int vfio_user_dma_map(const VFIOContainerBase *bcontainer, hwaddr iova, 0, &local_err)) { error_report_err(local_err); ret = -EFAULT; - } else { - ret = 0; } } else { VFIOUserFDs local_fds = { 1, 0, &fd }; @@ -225,14 +220,10 @@ vfio_user_container_connect(AddressSpace *as, VFIODevice *vbasedev, bcontainer = &container->bcontainer; - if (!vfio_cpr_register_container(bcontainer, errp)) { - goto free_container_exit; - } - ret = ram_block_uncoordinated_discard_disable(true); if (ret) { error_setg_errno(errp, -ret, "Cannot set discarding of RAM broken"); - goto unregister_container_exit; + goto free_container_exit; } vioc = VFIO_IOMMU_GET_CLASS(bcontainer); @@ -261,9 +252,6 @@ listener_release_exit: enable_discards_exit: ram_block_uncoordinated_discard_disable(false); -unregister_container_exit: - vfio_cpr_unregister_container(bcontainer); - free_container_exit: object_unref(container); @@ -286,7 +274,6 @@ static void vfio_user_container_disconnect(VFIOUserContainer *container) vioc->release(bcontainer); } - vfio_cpr_unregister_container(bcontainer); object_unref(container); vfio_address_space_put(space); diff --git a/hw/vfio-user/proxy.c b/hw/vfio-user/proxy.c index c418954..2275d3f 100644 --- a/hw/vfio-user/proxy.c +++ b/hw/vfio-user/proxy.c @@ -32,7 +32,6 @@ static void vfio_user_recycle(VFIOUserProxy *proxy, VFIOUserMsg *msg); static void vfio_user_recv(void *opaque); static void vfio_user_send(void *opaque); -static void vfio_user_cb(void *opaque); static void vfio_user_request(void *opaque); @@ -492,7 +491,7 @@ static void vfio_user_send(void *opaque) } } -static void vfio_user_cb(void *opaque) +static void vfio_user_close_cb(void *opaque) { VFIOUserProxy *proxy = opaque; @@ -984,8 +983,11 @@ void vfio_user_disconnect(VFIOUserProxy *proxy) * handler to run after the proxy fd handlers were * deleted above. */ - aio_bh_schedule_oneshot(proxy->ctx, vfio_user_cb, proxy); - qemu_cond_wait(&proxy->close_cv, &proxy->lock); + aio_bh_schedule_oneshot(proxy->ctx, vfio_user_close_cb, proxy); + + while (proxy->state != VFIO_PROXY_CLOSED) { + qemu_cond_wait(&proxy->close_cv, &proxy->lock); + } /* we now hold the only ref to proxy */ qemu_mutex_unlock(&proxy->lock); diff --git a/hw/vfio/ap.c b/hw/vfio/ap.c index 1df4438..7719f24 100644 --- a/hw/vfio/ap.c +++ b/hw/vfio/ap.c @@ -265,7 +265,7 @@ static void vfio_ap_realize(DeviceState *dev, Error **errp) error: error_prepend(errp, VFIO_MSG_PREFIX, vbasedev->name); - g_free(vbasedev->name); + vfio_device_free_name(vbasedev); } static void vfio_ap_unrealize(DeviceState *dev) @@ -275,7 +275,7 @@ static void vfio_ap_unrealize(DeviceState *dev) vfio_ap_unregister_irq_notifier(vapdev, VFIO_AP_REQ_IRQ_INDEX); vfio_ap_unregister_irq_notifier(vapdev, VFIO_AP_CFG_CHG_IRQ_INDEX); vfio_device_detach(&vapdev->vdev); - g_free(vapdev->vdev.name); + vfio_device_free_name(&vapdev->vdev); } static const Property vfio_ap_properties[] = { diff --git a/hw/vfio/ccw.c b/hw/vfio/ccw.c index cea9d6e..9560b8d 100644 --- a/hw/vfio/ccw.c +++ b/hw/vfio/ccw.c @@ -619,7 +619,7 @@ out_io_notifier_err: out_region_err: vfio_device_detach(vbasedev); out_attach_dev_err: - g_free(vbasedev->name); + vfio_device_free_name(vbasedev); out_unrealize: if (cdc->unrealize) { cdc->unrealize(cdev); @@ -637,7 +637,7 @@ static void vfio_ccw_unrealize(DeviceState *dev) vfio_ccw_unregister_irq_notifier(vcdev, VFIO_CCW_IO_IRQ_INDEX); vfio_ccw_put_region(vcdev); vfio_device_detach(&vcdev->vdev); - g_free(vcdev->vdev.name); + vfio_device_free_name(&vcdev->vdev); if (cdc->unrealize) { cdc->unrealize(cdev); diff --git a/hw/vfio/container-base.c b/hw/vfio/container-base.c index d834bd4..5630497 100644 --- a/hw/vfio/container-base.c +++ b/hw/vfio/container-base.c @@ -78,7 +78,16 @@ int vfio_container_dma_map(VFIOContainerBase *bcontainer, void *vaddr, bool readonly, MemoryRegion *mr) { VFIOIOMMUClass *vioc = VFIO_IOMMU_GET_CLASS(bcontainer); + RAMBlock *rb = mr->ram_block; + int mfd = rb ? qemu_ram_get_fd(rb) : -1; + if (mfd >= 0 && vioc->dma_map_file) { + unsigned long start = vaddr - qemu_ram_get_host_addr(rb); + unsigned long offset = qemu_ram_get_fd_offset(rb); + + return vioc->dma_map_file(bcontainer, iova, size, mfd, start + offset, + readonly); + } g_assert(vioc->dma_map); return vioc->dma_map(bcontainer, iova, size, vaddr, readonly, mr); } diff --git a/hw/vfio/cpr-iommufd.c b/hw/vfio/cpr-iommufd.c new file mode 100644 index 0000000..148a06d --- /dev/null +++ b/hw/vfio/cpr-iommufd.c @@ -0,0 +1,225 @@ +/* + * Copyright (c) 2024-2025 Oracle and/or its affiliates. + * + * SPDX-License-Identifier: GPL-2.0-or-later + */ + +#include "qemu/osdep.h" +#include "qemu/error-report.h" +#include "qapi/error.h" +#include "hw/vfio/vfio-cpr.h" +#include "hw/vfio/vfio-device.h" +#include "migration/blocker.h" +#include "migration/cpr.h" +#include "migration/migration.h" +#include "migration/vmstate.h" +#include "system/iommufd.h" +#include "vfio-iommufd.h" +#include "trace.h" + +typedef struct CprVFIODevice { + char *name; + unsigned int namelen; + uint32_t ioas_id; + int devid; + uint32_t hwpt_id; + QLIST_ENTRY(CprVFIODevice) next; +} CprVFIODevice; + +static const VMStateDescription vmstate_cpr_vfio_device = { + .name = "cpr vfio device", + .version_id = 1, + .minimum_version_id = 1, + .fields = (VMStateField[]) { + VMSTATE_UINT32(namelen, CprVFIODevice), + VMSTATE_VBUFFER_ALLOC_UINT32(name, CprVFIODevice, 0, NULL, namelen), + VMSTATE_INT32(devid, CprVFIODevice), + VMSTATE_UINT32(ioas_id, CprVFIODevice), + VMSTATE_UINT32(hwpt_id, CprVFIODevice), + VMSTATE_END_OF_LIST() + } +}; + +const VMStateDescription vmstate_cpr_vfio_devices = { + .name = CPR_STATE "/vfio devices", + .version_id = 1, + .minimum_version_id = 1, + .fields = (const VMStateField[]){ + VMSTATE_QLIST_V(vfio_devices, CprState, 1, vmstate_cpr_vfio_device, + CprVFIODevice, next), + VMSTATE_END_OF_LIST() + } +}; + +static void vfio_cpr_save_device(VFIODevice *vbasedev) +{ + CprVFIODevice *elem = g_new0(CprVFIODevice, 1); + + elem->name = g_strdup(vbasedev->name); + elem->namelen = strlen(vbasedev->name) + 1; + elem->ioas_id = vbasedev->cpr.ioas_id; + elem->devid = vbasedev->devid; + elem->hwpt_id = vbasedev->cpr.hwpt_id; + QLIST_INSERT_HEAD(&cpr_state.vfio_devices, elem, next); +} + +static CprVFIODevice *find_device(const char *name) +{ + CprVFIODeviceList *head = &cpr_state.vfio_devices; + CprVFIODevice *elem; + + QLIST_FOREACH(elem, head, next) { + if (!strcmp(elem->name, name)) { + return elem; + } + } + return NULL; +} + +static void vfio_cpr_delete_device(const char *name) +{ + CprVFIODevice *elem = find_device(name); + + if (elem) { + QLIST_REMOVE(elem, next); + g_free(elem->name); + g_free(elem); + } +} + +static bool vfio_cpr_find_device(VFIODevice *vbasedev) +{ + CprVFIODevice *elem = find_device(vbasedev->name); + + if (elem) { + vbasedev->cpr.ioas_id = elem->ioas_id; + vbasedev->devid = elem->devid; + vbasedev->cpr.hwpt_id = elem->hwpt_id; + trace_vfio_cpr_find_device(elem->ioas_id, elem->devid, elem->hwpt_id); + return true; + } + return false; +} + +static bool vfio_cpr_supported(IOMMUFDBackend *be, Error **errp) +{ + if (!iommufd_change_process_capable(be)) { + if (errp) { + error_setg(errp, "vfio iommufd backend does not support " + "IOMMU_IOAS_CHANGE_PROCESS"); + } + return false; + } + return true; +} + +static int iommufd_cpr_pre_save(void *opaque) +{ + IOMMUFDBackend *be = opaque; + + /* + * The process has not changed yet, but proactively try the ioctl, + * and it will fail if any DMA mappings are not supported. + */ + if (!iommufd_change_process_capable(be)) { + error_report("some memory regions do not support " + "IOMMU_IOAS_CHANGE_PROCESS"); + return -1; + } + return 0; +} + +static int iommufd_cpr_post_load(void *opaque, int version_id) +{ + IOMMUFDBackend *be = opaque; + Error *local_err = NULL; + + if (!iommufd_change_process(be, &local_err)) { + error_report_err(local_err); + return -1; + } + return 0; +} + +static const VMStateDescription iommufd_cpr_vmstate = { + .name = "iommufd", + .version_id = 0, + .minimum_version_id = 0, + .pre_save = iommufd_cpr_pre_save, + .post_load = iommufd_cpr_post_load, + .needed = cpr_incoming_needed, + .fields = (VMStateField[]) { + VMSTATE_END_OF_LIST() + } +}; + +bool vfio_iommufd_cpr_register_iommufd(IOMMUFDBackend *be, Error **errp) +{ + Error **cpr_blocker = &be->cpr_blocker; + + if (!vfio_cpr_supported(be, cpr_blocker)) { + return migrate_add_blocker_modes(cpr_blocker, errp, + MIG_MODE_CPR_TRANSFER, -1) == 0; + } + + vmstate_register(NULL, -1, &iommufd_cpr_vmstate, be); + + return true; +} + +void vfio_iommufd_cpr_unregister_iommufd(IOMMUFDBackend *be) +{ + vmstate_unregister(NULL, &iommufd_cpr_vmstate, be); + migrate_del_blocker(&be->cpr_blocker); +} + +bool vfio_iommufd_cpr_register_container(VFIOIOMMUFDContainer *container, + Error **errp) +{ + VFIOContainerBase *bcontainer = &container->bcontainer; + + migration_add_notifier_mode(&bcontainer->cpr_reboot_notifier, + vfio_cpr_reboot_notifier, + MIG_MODE_CPR_REBOOT); + + vfio_cpr_add_kvm_notifier(); + + return true; +} + +void vfio_iommufd_cpr_unregister_container(VFIOIOMMUFDContainer *container) +{ + VFIOContainerBase *bcontainer = &container->bcontainer; + + migration_remove_notifier(&bcontainer->cpr_reboot_notifier); +} + +void vfio_iommufd_cpr_register_device(VFIODevice *vbasedev) +{ + if (!cpr_is_incoming()) { + /* + * Beware fd may have already been saved by vfio_device_set_fd, + * so call resave to avoid a duplicate entry. + */ + cpr_resave_fd(vbasedev->name, 0, vbasedev->fd); + vfio_cpr_save_device(vbasedev); + } +} + +void vfio_iommufd_cpr_unregister_device(VFIODevice *vbasedev) +{ + cpr_delete_fd(vbasedev->name, 0); + vfio_cpr_delete_device(vbasedev->name); +} + +void vfio_cpr_load_device(VFIODevice *vbasedev) +{ + if (cpr_is_incoming()) { + bool ret = vfio_cpr_find_device(vbasedev); + g_assert(ret); + + if (vbasedev->fd < 0) { + vbasedev->fd = cpr_find_fd(vbasedev->name, 0); + } + } +} diff --git a/hw/vfio/cpr-legacy.c b/hw/vfio/cpr-legacy.c index a84c324..553b203 100644 --- a/hw/vfio/cpr-legacy.c +++ b/hw/vfio/cpr-legacy.c @@ -99,20 +99,21 @@ static int vfio_container_post_load(void *opaque, int version_id) { VFIOContainer *container = opaque; VFIOContainerBase *bcontainer = &container->bcontainer; - VFIOGroup *group; + VFIOIOMMUClass *vioc = VFIO_IOMMU_GET_CLASS(bcontainer); + dma_map_fn saved_dma_map = vioc->dma_map; Error *local_err = NULL; + /* During incoming CPR, divert calls to dma_map. */ + vioc->dma_map = vfio_legacy_cpr_dma_map; + if (!vfio_listener_register(bcontainer, &local_err)) { error_report_err(local_err); return -1; } - QLIST_FOREACH(group, &container->group_list, container_next) { - VFIOIOMMUClass *vioc = VFIO_IOMMU_GET_CLASS(bcontainer); + /* Restore original dma_map function */ + vioc->dma_map = saved_dma_map; - /* Restore original dma_map function */ - vioc->dma_map = container->cpr.saved_dma_map; - } return 0; } @@ -148,6 +149,7 @@ static int vfio_cpr_fail_notifier(NotifierWithReturn *notifier, */ VFIOIOMMUClass *vioc = VFIO_IOMMU_GET_CLASS(bcontainer); + dma_map_fn saved_dma_map = vioc->dma_map; vioc->dma_map = vfio_legacy_cpr_dma_map; container->cpr.remap_listener = (MemoryListener) { @@ -158,7 +160,7 @@ static int vfio_cpr_fail_notifier(NotifierWithReturn *notifier, bcontainer->space->as); memory_listener_unregister(&container->cpr.remap_listener); container->cpr.vaddr_unmapped = false; - vioc->dma_map = container->cpr.saved_dma_map; + vioc->dma_map = saved_dma_map; } return 0; } @@ -177,14 +179,9 @@ bool vfio_legacy_cpr_register_container(VFIOContainer *container, Error **errp) MIG_MODE_CPR_TRANSFER, -1) == 0; } - vmstate_register(NULL, -1, &vfio_container_vmstate, container); + vfio_cpr_add_kvm_notifier(); - /* During incoming CPR, divert calls to dma_map. */ - if (cpr_is_incoming()) { - VFIOIOMMUClass *vioc = VFIO_IOMMU_GET_CLASS(bcontainer); - container->cpr.saved_dma_map = vioc->dma_map; - vioc->dma_map = vfio_legacy_cpr_dma_map; - } + vmstate_register(NULL, -1, &vfio_container_vmstate, container); migration_add_notifier_mode(&container->cpr.transfer_notifier, vfio_cpr_fail_notifier, diff --git a/hw/vfio/cpr.c b/hw/vfio/cpr.c index fdbb58e..384b56c 100644 --- a/hw/vfio/cpr.c +++ b/hw/vfio/cpr.c @@ -9,6 +9,8 @@ #include "hw/vfio/vfio-device.h" #include "hw/vfio/vfio-cpr.h" #include "hw/vfio/pci.h" +#include "hw/pci/msix.h" +#include "hw/pci/msi.h" #include "migration/cpr.h" #include "qapi/error.h" #include "system/runstate.h" @@ -27,17 +29,67 @@ int vfio_cpr_reboot_notifier(NotifierWithReturn *notifier, return 0; } -bool vfio_cpr_register_container(VFIOContainerBase *bcontainer, Error **errp) +#define STRDUP_VECTOR_FD_NAME(vdev, name) \ + g_strdup_printf("%s_%s", (vdev)->vbasedev.name, (name)) + +void vfio_cpr_save_vector_fd(VFIOPCIDevice *vdev, const char *name, int nr, + int fd) +{ + g_autofree char *fdname = STRDUP_VECTOR_FD_NAME(vdev, name); + cpr_save_fd(fdname, nr, fd); +} + +int vfio_cpr_load_vector_fd(VFIOPCIDevice *vdev, const char *name, int nr) { - migration_add_notifier_mode(&bcontainer->cpr_reboot_notifier, - vfio_cpr_reboot_notifier, - MIG_MODE_CPR_REBOOT); - return true; + g_autofree char *fdname = STRDUP_VECTOR_FD_NAME(vdev, name); + return cpr_find_fd(fdname, nr); } -void vfio_cpr_unregister_container(VFIOContainerBase *bcontainer) +void vfio_cpr_delete_vector_fd(VFIOPCIDevice *vdev, const char *name, int nr) { - migration_remove_notifier(&bcontainer->cpr_reboot_notifier); + g_autofree char *fdname = STRDUP_VECTOR_FD_NAME(vdev, name); + cpr_delete_fd(fdname, nr); +} + +static void vfio_cpr_claim_vectors(VFIOPCIDevice *vdev, int nr_vectors, + bool msix) +{ + int i, fd; + bool pending = false; + PCIDevice *pdev = &vdev->pdev; + + vdev->nr_vectors = nr_vectors; + vdev->msi_vectors = g_new0(VFIOMSIVector, nr_vectors); + vdev->interrupt = msix ? VFIO_INT_MSIX : VFIO_INT_MSI; + + vfio_pci_prepare_kvm_msi_virq_batch(vdev); + + for (i = 0; i < nr_vectors; i++) { + VFIOMSIVector *vector = &vdev->msi_vectors[i]; + + fd = vfio_cpr_load_vector_fd(vdev, "interrupt", i); + if (fd >= 0) { + vfio_pci_vector_init(vdev, i); + vfio_pci_msi_set_handler(vdev, i); + } + + if (vfio_cpr_load_vector_fd(vdev, "kvm_interrupt", i) >= 0) { + vfio_pci_add_kvm_msi_virq(vdev, vector, i, msix); + } else { + vdev->msi_vectors[i].virq = -1; + } + + if (msix && msix_is_pending(pdev, i) && msix_is_masked(pdev, i)) { + set_bit(i, vdev->msix->pending); + pending = true; + } + } + + vfio_pci_commit_kvm_msi_virq_batch(vdev); + + if (msix) { + memory_region_set_enabled(&pdev->msix_pba_mmio, pending); + } } /* @@ -58,13 +110,93 @@ static int vfio_cpr_pci_pre_load(void *opaque) return 0; } +static int vfio_cpr_pci_post_load(void *opaque, int version_id) +{ + VFIOPCIDevice *vdev = opaque; + PCIDevice *pdev = &vdev->pdev; + int nr_vectors; + + vfio_sub_page_bar_update_mappings(vdev); + + if (msix_enabled(pdev)) { + vfio_pci_msix_set_notifiers(vdev); + nr_vectors = vdev->msix->entries; + vfio_cpr_claim_vectors(vdev, nr_vectors, true); + + } else if (msi_enabled(pdev)) { + nr_vectors = msi_nr_vectors_allocated(pdev); + vfio_cpr_claim_vectors(vdev, nr_vectors, false); + + } else if (vfio_pci_read_config(pdev, PCI_INTERRUPT_PIN, 1)) { + Error *local_err = NULL; + if (!vfio_pci_intx_enable(vdev, &local_err)) { + error_report_err(local_err); + return -1; + } + } + + return 0; +} + +static bool pci_msix_present(void *opaque, int version_id) +{ + PCIDevice *pdev = opaque; + + return msix_present(pdev); +} + +static const VMStateDescription vfio_intx_vmstate = { + .name = "vfio-cpr-intx", + .version_id = 0, + .minimum_version_id = 0, + .fields = (VMStateField[]) { + VMSTATE_BOOL(pending, VFIOINTx), + VMSTATE_UINT32(route.mode, VFIOINTx), + VMSTATE_INT32(route.irq, VFIOINTx), + VMSTATE_END_OF_LIST() + } +}; + +#define VMSTATE_VFIO_INTX(_field, _state) { \ + .name = (stringify(_field)), \ + .size = sizeof(VFIOINTx), \ + .vmsd = &vfio_intx_vmstate, \ + .flags = VMS_STRUCT, \ + .offset = vmstate_offset_value(_state, _field, VFIOINTx), \ +} + const VMStateDescription vfio_cpr_pci_vmstate = { .name = "vfio-cpr-pci", .version_id = 0, .minimum_version_id = 0, .pre_load = vfio_cpr_pci_pre_load, + .post_load = vfio_cpr_pci_post_load, .needed = cpr_incoming_needed, .fields = (VMStateField[]) { + VMSTATE_PCI_DEVICE(pdev, VFIOPCIDevice), + VMSTATE_MSIX_TEST(pdev, VFIOPCIDevice, pci_msix_present), + VMSTATE_VFIO_INTX(intx, VFIOPCIDevice), VMSTATE_END_OF_LIST() } }; + +static NotifierWithReturn kvm_close_notifier; + +static int vfio_cpr_kvm_close_notifier(NotifierWithReturn *notifier, + MigrationEvent *e, + Error **errp) +{ + if (e->type == MIG_EVENT_PRECOPY_DONE) { + vfio_kvm_device_close(); + } + return 0; +} + +void vfio_cpr_add_kvm_notifier(void) +{ + if (!kvm_close_notifier.notify) { + migration_add_notifier_mode(&kvm_close_notifier, + vfio_cpr_kvm_close_notifier, + MIG_MODE_CPR_TRANSFER); + } +} diff --git a/hw/vfio/device.c b/hw/vfio/device.c index d91c695..52a1996 100644 --- a/hw/vfio/device.c +++ b/hw/vfio/device.c @@ -28,6 +28,8 @@ #include "qapi/error.h" #include "qemu/error-report.h" #include "qemu/units.h" +#include "migration/cpr.h" +#include "migration/blocker.h" #include "monitor/monitor.h" #include "vfio-helpers.h" @@ -316,28 +318,40 @@ bool vfio_device_get_name(VFIODevice *vbasedev, Error **errp) error_setg(errp, "Use FD passing only with iommufd backend"); return false; } - /* - * Give a name with fd so any function printing out vbasedev->name - * will not break. - */ if (!vbasedev->name) { - vbasedev->name = g_strdup_printf("VFIO_FD%d", vbasedev->fd); + + if (vbasedev->dev->id) { + vbasedev->name = g_strdup(vbasedev->dev->id); + return true; + } else { + /* + * Assign a name so any function printing it will not break. + * The fd number changes across processes, so this cannot be + * used as an invariant name for CPR. + */ + vbasedev->name = g_strdup_printf("VFIO_FD%d", vbasedev->fd); + error_setg(&vbasedev->cpr.id_blocker, + "vfio device with fd=%d needs an id property", + vbasedev->fd); + return migrate_add_blocker_modes(&vbasedev->cpr.id_blocker, + errp, MIG_MODE_CPR_TRANSFER, + -1) == 0; + } } } return true; } -void vfio_device_set_fd(VFIODevice *vbasedev, const char *str, Error **errp) +void vfio_device_free_name(VFIODevice *vbasedev) { - ERRP_GUARD(); - int fd = monitor_fd_param(monitor_cur(), str, errp); + g_clear_pointer(&vbasedev->name, g_free); + migrate_del_blocker(&vbasedev->cpr.id_blocker); +} - if (fd < 0) { - error_prepend(errp, "Could not parse remote object fd %s:", str); - return; - } - vbasedev->fd = fd; +void vfio_device_set_fd(VFIODevice *vbasedev, const char *str, Error **errp) +{ + vbasedev->fd = cpr_get_fd_param(vbasedev->dev->id, str, 0, errp); } static VFIODeviceIOOps vfio_device_io_ops_ioctl; @@ -449,6 +463,8 @@ void vfio_device_detach(VFIODevice *vbasedev) void vfio_device_prepare(VFIODevice *vbasedev, VFIOContainerBase *bcontainer, struct vfio_device_info *info) { + int i; + vbasedev->num_irqs = info->num_irqs; vbasedev->num_regions = info->num_regions; vbasedev->flags = info->flags; @@ -463,6 +479,9 @@ void vfio_device_prepare(VFIODevice *vbasedev, VFIOContainerBase *bcontainer, vbasedev->num_regions); if (vbasedev->use_region_fds) { vbasedev->region_fds = g_new0(int, vbasedev->num_regions); + for (i = 0; i < vbasedev->num_regions; i++) { + vbasedev->region_fds[i] = -1; + } } } @@ -475,7 +494,6 @@ void vfio_device_unprepare(VFIODevice *vbasedev) if (vbasedev->region_fds != NULL && vbasedev->region_fds[i] != -1) { close(vbasedev->region_fds[i]); } - } g_clear_pointer(&vbasedev->reginfo, g_free); diff --git a/hw/vfio/display.c b/hw/vfio/display.c index 9c6f5aa..faacd90 100644 --- a/hw/vfio/display.c +++ b/hw/vfio/display.c @@ -365,7 +365,7 @@ static bool vfio_display_dmabuf_init(VFIOPCIDevice *vdev, Error **errp) &vfio_display_dmabuf_ops, vdev); if (vdev->enable_ramfb) { - vdev->dpy->ramfb = ramfb_setup(errp); + vdev->dpy->ramfb = ramfb_setup(vdev->use_legacy_x86_rom, errp); if (!vdev->dpy->ramfb) { return false; } @@ -494,7 +494,7 @@ static bool vfio_display_region_init(VFIOPCIDevice *vdev, Error **errp) &vfio_display_region_ops, vdev); if (vdev->enable_ramfb) { - vdev->dpy->ramfb = ramfb_setup(errp); + vdev->dpy->ramfb = ramfb_setup(vdev->use_legacy_x86_rom, errp); if (!vdev->dpy->ramfb) { return false; } diff --git a/hw/vfio/helpers.c b/hw/vfio/helpers.c index d0dbab1..23d13e5 100644 --- a/hw/vfio/helpers.c +++ b/hw/vfio/helpers.c @@ -117,6 +117,17 @@ bool vfio_get_info_dma_avail(struct vfio_iommu_type1_info *info, int vfio_kvm_device_fd = -1; #endif +void vfio_kvm_device_close(void) +{ +#ifdef CONFIG_KVM + kvm_close(); + if (vfio_kvm_device_fd != -1) { + close(vfio_kvm_device_fd); + vfio_kvm_device_fd = -1; + } +#endif +} + int vfio_kvm_device_add_fd(int fd, Error **errp) { #ifdef CONFIG_KVM @@ -198,3 +209,20 @@ retry: return info; } + +bool vfio_arch_wants_loading_config_after_iter(void) +{ + /* + * Starting the config load only after all iterables were loaded (during + * non-iterables loading phase) is required for ARM64 due to this platform + * VFIO dependency on interrupt controller being loaded first. + * + * See commit d329f5032e17 ("vfio: Move the saving of the config space to + * the right place in VFIO migration"). + */ +#if defined(TARGET_ARM) + return true; +#else + return false; +#endif +} diff --git a/hw/vfio/igd.c b/hw/vfio/igd.c index e7a9d1f..ee0767b 100644 --- a/hw/vfio/igd.c +++ b/hw/vfio/igd.c @@ -113,6 +113,7 @@ static int igd_gen(VFIOPCIDevice *vdev) #define IGD_BDSM 0x5c /* Base Data of Stolen Memory */ #define IGD_BDSM_GEN11 0xc0 /* Base Data of Stolen Memory of gen 11 and later */ +#define IGD_GMCH_VGA_DISABLE BIT(1) #define IGD_GMCH_GEN6_GMS_SHIFT 3 /* SNB_GMCH in i915 */ #define IGD_GMCH_GEN6_GMS_MASK 0x1f #define IGD_GMCH_GEN8_GMS_SHIFT 8 /* BDW_GMCH in i915 */ @@ -533,12 +534,14 @@ static bool vfio_pci_igd_config_quirk(VFIOPCIDevice *vdev, Error **errp) /* * For backward compatibility, enable legacy mode when * - Device geneation is 6 to 9 (including both) + * - IGD claims VGA cycles on host * - Machine type is i440fx (pc_piix) * - IGD device is at guest BDF 00:02.0 * - Not manually disabled by x-igd-legacy-mode=off */ if ((vdev->igd_legacy_mode != ON_OFF_AUTO_OFF) && (gen >= 6 && gen <= 9) && + !(gmch & IGD_GMCH_VGA_DISABLE) && !strcmp(MACHINE_GET_CLASS(qdev_get_machine())->family, "pc_piix") && (&vdev->pdev == pci_find_device(pci_device_root_bus(&vdev->pdev), 0, PCI_DEVFN(0x2, 0)))) { @@ -568,14 +571,16 @@ static bool vfio_pci_igd_config_quirk(VFIOPCIDevice *vdev, Error **errp) } /* - * If IGD VGA Disable is clear (expected) and VGA is not already - * enabled, try to enable it. Probably shouldn't be using legacy mode - * without VGA, but also no point in us enabling VGA if disabled in - * hardware. + * If VGA is not already enabled, try to enable it. We shouldn't be + * using legacy mode without VGA. */ - if (!(gmch & 0x2) && !vdev->vga && !vfio_populate_vga(vdev, &err)) { - error_setg(&err, "Unable to enable VGA access"); - goto error; + if (!vdev->vga) { + if (vfio_populate_vga(vdev, &err)) { + vfio_pci_config_register_vga(vdev); + } else { + error_setg(&err, "Unable to enable VGA access"); + goto error; + } } /* Enable OpRegion and LPC bridge quirk */ diff --git a/hw/vfio/iommufd-stubs.c b/hw/vfio/iommufd-stubs.c new file mode 100644 index 0000000..0be5276 --- /dev/null +++ b/hw/vfio/iommufd-stubs.c @@ -0,0 +1,18 @@ +/* + * Copyright (c) 2025 Oracle and/or its affiliates. + * + * SPDX-License-Identifier: GPL-2.0-or-later + */ + +#include "qemu/osdep.h" +#include "migration/cpr.h" +#include "migration/vmstate.h" + +const VMStateDescription vmstate_cpr_vfio_devices = { + .name = CPR_STATE "/vfio devices", + .version_id = 1, + .minimum_version_id = 1, + .fields = (const VMStateField[]){ + VMSTATE_END_OF_LIST() + } +}; diff --git a/hw/vfio/iommufd.c b/hw/vfio/iommufd.c index d3efef7..48c590b 100644 --- a/hw/vfio/iommufd.c +++ b/hw/vfio/iommufd.c @@ -25,6 +25,7 @@ #include "system/reset.h" #include "qemu/cutils.h" #include "qemu/chardev_open.h" +#include "migration/cpr.h" #include "pci.h" #include "vfio-iommufd.h" #include "vfio-helpers.h" @@ -45,6 +46,18 @@ static int iommufd_cdev_map(const VFIOContainerBase *bcontainer, hwaddr iova, iova, size, vaddr, readonly); } +static int iommufd_cdev_map_file(const VFIOContainerBase *bcontainer, + hwaddr iova, ram_addr_t size, + int fd, unsigned long start, bool readonly) +{ + const VFIOIOMMUFDContainer *container = + container_of(bcontainer, VFIOIOMMUFDContainer, bcontainer); + + return iommufd_backend_map_file_dma(container->be, + container->ioas_id, + iova, size, fd, start, readonly); +} + static int iommufd_cdev_unmap(const VFIOContainerBase *bcontainer, hwaddr iova, ram_addr_t size, IOMMUTLBEntry *iotlb, bool unmap_all) @@ -109,6 +122,10 @@ static bool iommufd_cdev_connect_and_bind(VFIODevice *vbasedev, Error **errp) goto err_kvm_device_add; } + if (cpr_is_incoming()) { + goto skip_bind; + } + /* Bind device to iommufd */ bind.iommufd = iommufd->fd; if (ioctl(vbasedev->fd, VFIO_DEVICE_BIND_IOMMUFD, &bind)) { @@ -120,6 +137,8 @@ static bool iommufd_cdev_connect_and_bind(VFIODevice *vbasedev, Error **errp) vbasedev->devid = bind.out_devid; trace_iommufd_cdev_connect_and_bind(bind.iommufd, vbasedev->name, vbasedev->fd, vbasedev->devid); + +skip_bind: return true; err_bind: iommufd_cdev_kvm_device_del(vbasedev); @@ -313,7 +332,14 @@ static bool iommufd_cdev_autodomains_get(VFIODevice *vbasedev, /* Try to find a domain */ QLIST_FOREACH(hwpt, &container->hwpt_list, next) { - ret = iommufd_cdev_attach_ioas_hwpt(vbasedev, hwpt->hwpt_id, errp); + if (!cpr_is_incoming()) { + ret = iommufd_cdev_attach_ioas_hwpt(vbasedev, hwpt->hwpt_id, errp); + } else if (vbasedev->cpr.hwpt_id == hwpt->hwpt_id) { + ret = 0; + } else { + continue; + } + if (ret) { /* -EINVAL means the domain is incompatible with the device. */ if (ret == -EINVAL) { @@ -330,6 +356,7 @@ static bool iommufd_cdev_autodomains_get(VFIODevice *vbasedev, return false; } else { vbasedev->hwpt = hwpt; + vbasedev->cpr.hwpt_id = hwpt->hwpt_id; QLIST_INSERT_HEAD(&hwpt->device_list, vbasedev, hwpt_next); vbasedev->iommu_dirty_tracking = iommufd_hwpt_dirty_tracking(hwpt); return true; @@ -352,6 +379,11 @@ static bool iommufd_cdev_autodomains_get(VFIODevice *vbasedev, flags = IOMMU_HWPT_ALLOC_DIRTY_TRACKING; } + if (cpr_is_incoming()) { + hwpt_id = vbasedev->cpr.hwpt_id; + goto skip_alloc; + } + if (!iommufd_backend_alloc_hwpt(iommufd, vbasedev->devid, container->ioas_id, flags, IOMMU_HWPT_DATA_NONE, 0, NULL, @@ -359,19 +391,20 @@ static bool iommufd_cdev_autodomains_get(VFIODevice *vbasedev, return false; } + ret = iommufd_cdev_attach_ioas_hwpt(vbasedev, hwpt_id, errp); + if (ret) { + iommufd_backend_free_id(container->be, hwpt_id); + return false; + } + +skip_alloc: hwpt = g_malloc0(sizeof(*hwpt)); hwpt->hwpt_id = hwpt_id; hwpt->hwpt_flags = flags; QLIST_INIT(&hwpt->device_list); - ret = iommufd_cdev_attach_ioas_hwpt(vbasedev, hwpt->hwpt_id, errp); - if (ret) { - iommufd_backend_free_id(container->be, hwpt->hwpt_id); - g_free(hwpt); - return false; - } - vbasedev->hwpt = hwpt; + vbasedev->cpr.hwpt_id = hwpt->hwpt_id; vbasedev->iommu_dirty_tracking = iommufd_hwpt_dirty_tracking(hwpt); QLIST_INSERT_HEAD(&hwpt->device_list, vbasedev, hwpt_next); QLIST_INSERT_HEAD(&container->hwpt_list, hwpt, next); @@ -409,7 +442,9 @@ static bool iommufd_cdev_attach_container(VFIODevice *vbasedev, return iommufd_cdev_autodomains_get(vbasedev, container, errp); } - return !iommufd_cdev_attach_ioas_hwpt(vbasedev, container->ioas_id, errp); + /* If CPR, we are already attached to ioas_id. */ + return cpr_is_incoming() || + !iommufd_cdev_attach_ioas_hwpt(vbasedev, container->ioas_id, errp); } static void iommufd_cdev_detach_container(VFIODevice *vbasedev, @@ -434,7 +469,7 @@ static void iommufd_cdev_container_destroy(VFIOIOMMUFDContainer *container) if (!QLIST_EMPTY(&bcontainer->device_list)) { return; } - vfio_cpr_unregister_container(bcontainer); + vfio_iommufd_cpr_unregister_container(container); vfio_listener_unregister(bcontainer); iommufd_backend_free_id(container->be, container->ioas_id); object_unref(container); @@ -498,11 +533,14 @@ static bool iommufd_cdev_attach(const char *name, VFIODevice *vbasedev, VFIOAddressSpace *space; struct vfio_device_info dev_info = { .argsz = sizeof(dev_info) }; int ret, devfd; + bool res; uint32_t ioas_id; Error *err = NULL; const VFIOIOMMUClass *iommufd_vioc = VFIO_IOMMU_CLASS(object_class_by_name(TYPE_VFIO_IOMMU_IOMMUFD)); + vfio_cpr_load_device(vbasedev); + if (vbasedev->fd < 0) { devfd = iommufd_cdev_getfd(vbasedev->sysfsdev, errp); if (devfd < 0) { @@ -526,7 +564,16 @@ static bool iommufd_cdev_attach(const char *name, VFIODevice *vbasedev, vbasedev->iommufd != container->be) { continue; } - if (!iommufd_cdev_attach_container(vbasedev, container, &err)) { + + if (!cpr_is_incoming()) { + res = iommufd_cdev_attach_container(vbasedev, container, &err); + } else if (vbasedev->cpr.ioas_id == container->ioas_id) { + res = true; + } else { + continue; + } + + if (!res) { const char *msg = error_get_pretty(err); trace_iommufd_cdev_fail_attach_existing_container(msg); @@ -543,6 +590,11 @@ static bool iommufd_cdev_attach(const char *name, VFIODevice *vbasedev, } } + if (cpr_is_incoming()) { + ioas_id = vbasedev->cpr.ioas_id; + goto skip_ioas_alloc; + } + /* Need to allocate a new dedicated container */ if (!iommufd_backend_alloc_ioas(vbasedev->iommufd, &ioas_id, errp)) { goto err_alloc_ioas; @@ -550,10 +602,12 @@ static bool iommufd_cdev_attach(const char *name, VFIODevice *vbasedev, trace_iommufd_cdev_alloc_ioas(vbasedev->iommufd->fd, ioas_id); +skip_ioas_alloc: container = VFIO_IOMMU_IOMMUFD(object_new(TYPE_VFIO_IOMMU_IOMMUFD)); container->be = vbasedev->iommufd; container->ioas_id = ioas_id; QLIST_INIT(&container->hwpt_list); + vbasedev->cpr.ioas_id = ioas_id; bcontainer = &container->bcontainer; vfio_address_space_insert(space, bcontainer); @@ -580,7 +634,7 @@ static bool iommufd_cdev_attach(const char *name, VFIODevice *vbasedev, goto err_listener_register; } - if (!vfio_cpr_register_container(bcontainer, errp)) { + if (!vfio_iommufd_cpr_register_container(container, errp)) { goto err_listener_register; } @@ -611,6 +665,7 @@ found_container: } vfio_device_prepare(vbasedev, bcontainer, &dev_info); + vfio_iommufd_cpr_register_device(vbasedev); trace_iommufd_cdev_device_info(vbasedev->name, devfd, vbasedev->num_irqs, vbasedev->num_regions, vbasedev->flags); @@ -648,6 +703,7 @@ static void iommufd_cdev_detach(VFIODevice *vbasedev) iommufd_cdev_container_destroy(container); vfio_address_space_put(space); + vfio_iommufd_cpr_unregister_device(vbasedev); iommufd_cdev_unbind_and_disconnect(vbasedev); close(vbasedev->fd); } @@ -807,6 +863,7 @@ static void vfio_iommu_iommufd_class_init(ObjectClass *klass, const void *data) VFIOIOMMUClass *vioc = VFIO_IOMMU_CLASS(klass); vioc->dma_map = iommufd_cdev_map; + vioc->dma_map_file = iommufd_cdev_map_file; vioc->dma_unmap = iommufd_cdev_unmap; vioc->attach_device = iommufd_cdev_attach; vioc->detach_device = iommufd_cdev_detach; diff --git a/hw/vfio/meson.build b/hw/vfio/meson.build index 63ea393..bfaf6be 100644 --- a/hw/vfio/meson.build +++ b/hw/vfio/meson.build @@ -31,7 +31,9 @@ system_ss.add(when: 'CONFIG_VFIO', if_true: files( )) system_ss.add(when: ['CONFIG_VFIO', 'CONFIG_IOMMUFD'], if_true: files( 'iommufd.c', + 'cpr-iommufd.c', )) +system_ss.add(when: 'CONFIG_IOMMUFD', if_false: files('iommufd-stubs.c')) system_ss.add(when: 'CONFIG_VFIO_PCI', if_true: files( 'display.c', )) diff --git a/hw/vfio/migration-multifd.c b/hw/vfio/migration-multifd.c index 850a319..e478503 100644 --- a/hw/vfio/migration-multifd.c +++ b/hw/vfio/migration-multifd.c @@ -13,7 +13,6 @@ #include "hw/vfio/vfio-device.h" #include "migration/misc.h" #include "qapi/error.h" -#include "qemu/bswap.h" #include "qemu/error-report.h" #include "qemu/lockable.h" #include "qemu/main-loop.h" @@ -23,6 +22,7 @@ #include "migration-multifd.h" #include "vfio-migration-internal.h" #include "trace.h" +#include "vfio-helpers.h" #define VFIO_DEVICE_STATE_CONFIG_STATE (1) @@ -35,6 +35,18 @@ typedef struct VFIODeviceStatePacket { uint8_t data[0]; } QEMU_PACKED VFIODeviceStatePacket; +bool vfio_load_config_after_iter(VFIODevice *vbasedev) +{ + if (vbasedev->migration_load_config_after_iter == ON_OFF_AUTO_ON) { + return true; + } else if (vbasedev->migration_load_config_after_iter == ON_OFF_AUTO_OFF) { + return false; + } + + assert(vbasedev->migration_load_config_after_iter == ON_OFF_AUTO_AUTO); + return vfio_arch_wants_loading_config_after_iter(); +} + /* type safety */ typedef struct VFIOStateBuffers { GArray *array; @@ -50,12 +62,16 @@ typedef struct VFIOMultifd { bool load_bufs_thread_running; bool load_bufs_thread_want_exit; + bool load_bufs_iter_done; + QemuCond load_bufs_iter_done_cond; + VFIOStateBuffers load_bufs; QemuCond load_bufs_buffer_ready_cond; QemuCond load_bufs_thread_finished_cond; QemuMutex load_bufs_mutex; /* Lock order: this lock -> BQL */ uint32_t load_buf_idx; uint32_t load_buf_idx_last; + size_t load_buf_queued_pending_buffers_size; } VFIOMultifd; static void vfio_state_buffer_clear(gpointer data) @@ -112,6 +128,7 @@ static bool vfio_load_state_buffer_insert(VFIODevice *vbasedev, VFIOMigration *migration = vbasedev->migration; VFIOMultifd *multifd = migration->multifd; VFIOStateBuffer *lb; + size_t data_size = packet_total_size - sizeof(*packet); vfio_state_buffers_assert_init(&multifd->load_bufs); if (packet->idx >= vfio_state_buffers_size_get(&multifd->load_bufs)) { @@ -127,8 +144,19 @@ static bool vfio_load_state_buffer_insert(VFIODevice *vbasedev, assert(packet->idx >= multifd->load_buf_idx); - lb->data = g_memdup2(&packet->data, packet_total_size - sizeof(*packet)); - lb->len = packet_total_size - sizeof(*packet); + multifd->load_buf_queued_pending_buffers_size += data_size; + if (multifd->load_buf_queued_pending_buffers_size > + vbasedev->migration_max_queued_buffers_size) { + error_setg(errp, + "%s: queuing state buffer %" PRIu32 + " would exceed the size max of %" PRIu64, + vbasedev->name, packet->idx, + vbasedev->migration_max_queued_buffers_size); + return false; + } + + lb->data = g_memdup2(&packet->data, data_size); + lb->len = data_size; lb->is_present = true; return true; @@ -312,6 +340,9 @@ static bool vfio_load_state_buffer_write(VFIODevice *vbasedev, assert(wr_ret <= buf_len); buf_len -= wr_ret; buf_cur += wr_ret; + + assert(multifd->load_buf_queued_pending_buffers_size >= wr_ret); + multifd->load_buf_queued_pending_buffers_size -= wr_ret; } trace_vfio_load_state_device_buffer_load_end(vbasedev->name, @@ -394,6 +425,22 @@ static bool vfio_load_bufs_thread(void *opaque, bool *should_quit, Error **errp) multifd->load_buf_idx++; } + if (vfio_load_config_after_iter(vbasedev)) { + while (!multifd->load_bufs_iter_done) { + qemu_cond_wait(&multifd->load_bufs_iter_done_cond, + &multifd->load_bufs_mutex); + + /* + * Need to re-check cancellation immediately after wait in case + * cond was signalled by vfio_load_cleanup_load_bufs_thread(). + */ + if (vfio_load_bufs_thread_want_exit(multifd, should_quit)) { + error_setg(errp, "operation cancelled"); + goto thread_exit; + } + } + } + if (!vfio_load_bufs_thread_load_config(vbasedev, errp)) { goto thread_exit; } @@ -413,6 +460,48 @@ thread_exit: return ret; } +int vfio_load_state_config_load_ready(VFIODevice *vbasedev) +{ + VFIOMigration *migration = vbasedev->migration; + VFIOMultifd *multifd = migration->multifd; + int ret = 0; + + if (!vfio_multifd_transfer_enabled(vbasedev)) { + error_report("%s: got DEV_CONFIG_LOAD_READY outside multifd transfer", + vbasedev->name); + return -EINVAL; + } + + if (!vfio_load_config_after_iter(vbasedev)) { + error_report("%s: got DEV_CONFIG_LOAD_READY but was disabled", + vbasedev->name); + return -EINVAL; + } + + assert(multifd); + + /* The lock order is load_bufs_mutex -> BQL so unlock BQL here first */ + bql_unlock(); + WITH_QEMU_LOCK_GUARD(&multifd->load_bufs_mutex) { + if (multifd->load_bufs_iter_done) { + /* Can't print error here as we're outside BQL */ + ret = -EINVAL; + break; + } + + multifd->load_bufs_iter_done = true; + qemu_cond_signal(&multifd->load_bufs_iter_done_cond); + } + bql_lock(); + + if (ret) { + error_report("%s: duplicate DEV_CONFIG_LOAD_READY", + vbasedev->name); + } + + return ret; +} + static VFIOMultifd *vfio_multifd_new(void) { VFIOMultifd *multifd = g_new(VFIOMultifd, 1); @@ -423,8 +512,12 @@ static VFIOMultifd *vfio_multifd_new(void) multifd->load_buf_idx = 0; multifd->load_buf_idx_last = UINT32_MAX; + multifd->load_buf_queued_pending_buffers_size = 0; qemu_cond_init(&multifd->load_bufs_buffer_ready_cond); + multifd->load_bufs_iter_done = false; + qemu_cond_init(&multifd->load_bufs_iter_done_cond); + multifd->load_bufs_thread_running = false; multifd->load_bufs_thread_want_exit = false; qemu_cond_init(&multifd->load_bufs_thread_finished_cond); @@ -448,6 +541,7 @@ static void vfio_load_cleanup_load_bufs_thread(VFIOMultifd *multifd) multifd->load_bufs_thread_want_exit = true; qemu_cond_signal(&multifd->load_bufs_buffer_ready_cond); + qemu_cond_signal(&multifd->load_bufs_iter_done_cond); qemu_cond_wait(&multifd->load_bufs_thread_finished_cond, &multifd->load_bufs_mutex); } @@ -460,6 +554,7 @@ static void vfio_multifd_free(VFIOMultifd *multifd) vfio_load_cleanup_load_bufs_thread(multifd); qemu_cond_destroy(&multifd->load_bufs_thread_finished_cond); + qemu_cond_destroy(&multifd->load_bufs_iter_done_cond); vfio_state_buffers_destroy(&multifd->load_bufs); qemu_cond_destroy(&multifd->load_bufs_buffer_ready_cond); qemu_mutex_destroy(&multifd->load_bufs_mutex); @@ -583,7 +678,7 @@ vfio_save_complete_precopy_thread_config_state(VFIODevice *vbasedev, /* * This thread is spawned by the migration core directly via - * .save_live_complete_precopy_thread SaveVMHandler. + * .save_complete_precopy_thread SaveVMHandler. * * It exits after either: * * completing saving the remaining device state and device config, OR: @@ -592,7 +687,7 @@ vfio_save_complete_precopy_thread_config_state(VFIODevice *vbasedev, * multifd_device_state_save_thread_should_exit() returning true. */ bool -vfio_multifd_save_complete_precopy_thread(SaveLiveCompletePrecopyThreadData *d, +vfio_multifd_save_complete_precopy_thread(SaveCompletePrecopyThreadData *d, Error **errp) { VFIODevice *vbasedev = d->handler_opaque; diff --git a/hw/vfio/migration-multifd.h b/hw/vfio/migration-multifd.h index 0bab632..82d2d3a 100644 --- a/hw/vfio/migration-multifd.h +++ b/hw/vfio/migration-multifd.h @@ -20,13 +20,16 @@ void vfio_multifd_cleanup(VFIODevice *vbasedev); bool vfio_multifd_transfer_supported(void); bool vfio_multifd_transfer_enabled(VFIODevice *vbasedev); +bool vfio_load_config_after_iter(VFIODevice *vbasedev); bool vfio_multifd_load_state_buffer(void *opaque, char *data, size_t data_size, Error **errp); +int vfio_load_state_config_load_ready(VFIODevice *vbasedev); + void vfio_multifd_emit_dummy_eos(VFIODevice *vbasedev, QEMUFile *f); bool -vfio_multifd_save_complete_precopy_thread(SaveLiveCompletePrecopyThreadData *d, +vfio_multifd_save_complete_precopy_thread(SaveCompletePrecopyThreadData *d, Error **errp); int vfio_multifd_switchover_start(VFIODevice *vbasedev); diff --git a/hw/vfio/migration.c b/hw/vfio/migration.c index b76697bd..4c06e3d 100644 --- a/hw/vfio/migration.c +++ b/hw/vfio/migration.c @@ -675,7 +675,11 @@ static void vfio_save_state(QEMUFile *f, void *opaque) int ret; if (vfio_multifd_transfer_enabled(vbasedev)) { - vfio_multifd_emit_dummy_eos(vbasedev, f); + if (vfio_load_config_after_iter(vbasedev)) { + qemu_put_be64(f, VFIO_MIG_FLAG_DEV_CONFIG_LOAD_READY); + } else { + vfio_multifd_emit_dummy_eos(vbasedev, f); + } return; } @@ -784,6 +788,10 @@ static int vfio_load_state(QEMUFile *f, void *opaque, int version_id) return ret; } + case VFIO_MIG_FLAG_DEV_CONFIG_LOAD_READY: + { + return vfio_load_state_config_load_ready(vbasedev); + } default: error_report("%s: Unknown tag 0x%"PRIx64, vbasedev->name, data); return -EINVAL; @@ -824,7 +832,7 @@ static const SaveVMHandlers savevm_vfio_handlers = { .state_pending_exact = vfio_state_pending_exact, .is_active_iterate = vfio_is_active_iterate, .save_live_iterate = vfio_save_iterate, - .save_live_complete_precopy = vfio_save_complete_precopy, + .save_complete = vfio_save_complete_precopy, .save_state = vfio_save_state, .load_setup = vfio_load_setup, .load_cleanup = vfio_load_cleanup, @@ -835,7 +843,7 @@ static const SaveVMHandlers savevm_vfio_handlers = { */ .load_state_buffer = vfio_multifd_load_state_buffer, .switchover_start = vfio_switchover_start, - .save_live_complete_precopy_thread = vfio_multifd_save_complete_precopy_thread, + .save_complete_precopy_thread = vfio_multifd_save_complete_precopy_thread, }; /* ---------------------------------------------------------------------- */ diff --git a/hw/vfio/pci.c b/hw/vfio/pci.c index fa25bde..4fa692c 100644 --- a/hw/vfio/pci.c +++ b/hw/vfio/pci.c @@ -29,6 +29,7 @@ #include "hw/pci/pci_bridge.h" #include "hw/qdev-properties.h" #include "hw/qdev-properties-system.h" +#include "hw/vfio/vfio-cpr.h" #include "migration/vmstate.h" #include "migration/cpr.h" #include "qobject/qdict.h" @@ -48,8 +49,6 @@ #include "vfio-migration-internal.h" #include "vfio-helpers.h" -#define TYPE_VFIO_PCI_NOHOTPLUG "vfio-pci-nohotplug" - /* Protected by BQL */ static KVMRouteChange vfio_route_change; @@ -57,20 +56,33 @@ static void vfio_disable_interrupts(VFIOPCIDevice *vdev); static void vfio_mmap_set_enabled(VFIOPCIDevice *vdev, bool enabled); static void vfio_msi_disable_common(VFIOPCIDevice *vdev); +/* Create new or reuse existing eventfd */ static bool vfio_notifier_init(VFIOPCIDevice *vdev, EventNotifier *e, const char *name, int nr, Error **errp) { - int ret = event_notifier_init(e, 0); + int fd, ret; + + fd = vfio_cpr_load_vector_fd(vdev, name, nr); + if (fd >= 0) { + event_notifier_init_fd(e, fd); + return true; + } + ret = event_notifier_init(e, 0); if (ret) { error_setg_errno(errp, -ret, "vfio_notifier_init %s failed", name); + return false; } - return !ret; + + fd = event_notifier_get_fd(e); + vfio_cpr_save_vector_fd(vdev, name, nr, fd); + return true; } static void vfio_notifier_cleanup(VFIOPCIDevice *vdev, EventNotifier *e, const char *name, int nr) { + vfio_cpr_delete_vector_fd(vdev, name, nr); event_notifier_cleanup(e); } @@ -196,6 +208,36 @@ fail: #endif } +static bool vfio_cpr_intx_enable_kvm(VFIOPCIDevice *vdev, Error **errp) +{ +#ifdef CONFIG_KVM + if (vdev->no_kvm_intx || !kvm_irqfds_enabled() || + vdev->intx.route.mode != PCI_INTX_ENABLED || + !kvm_resamplefds_enabled()) { + return true; + } + + if (!vfio_notifier_init(vdev, &vdev->intx.unmask, "intx-unmask", 0, errp)) { + return false; + } + + if (kvm_irqchip_add_irqfd_notifier_gsi(kvm_state, + &vdev->intx.interrupt, + &vdev->intx.unmask, + vdev->intx.route.irq)) { + error_setg_errno(errp, errno, "failed to setup resample irqfd"); + vfio_notifier_cleanup(vdev, &vdev->intx.unmask, "intx-unmask", 0); + return false; + } + + vdev->intx.kvm_accel = true; + trace_vfio_intx_enable_kvm(vdev->vbasedev.name); + return true; +#else + return true; +#endif +} + static void vfio_intx_disable_kvm(VFIOPCIDevice *vdev) { #ifdef CONFIG_KVM @@ -291,7 +333,13 @@ static bool vfio_intx_enable(VFIOPCIDevice *vdev, Error **errp) return true; } - vfio_disable_interrupts(vdev); + /* + * Do not alter interrupt state during vfio_realize and cpr load. + * The incoming state is cleared thereafter. + */ + if (!cpr_is_incoming()) { + vfio_disable_interrupts(vdev); + } vdev->intx.pin = pin - 1; /* Pin A (1) -> irq[0] */ pci_config_set_interrupt_pin(vdev->pdev.config, pin); @@ -314,6 +362,14 @@ static bool vfio_intx_enable(VFIOPCIDevice *vdev, Error **errp) fd = event_notifier_get_fd(&vdev->intx.interrupt); qemu_set_fd_handler(fd, vfio_intx_interrupt, NULL, vdev); + + if (cpr_is_incoming()) { + if (!vfio_cpr_intx_enable_kvm(vdev, &err)) { + warn_reportf_err(err, VFIO_MSG_PREFIX, vdev->vbasedev.name); + } + goto skip_signaling; + } + if (!vfio_device_irq_set_signaling(&vdev->vbasedev, VFIO_PCI_INTX_IRQ_INDEX, 0, VFIO_IRQ_SET_ACTION_TRIGGER, fd, errp)) { qemu_set_fd_handler(fd, NULL, NULL, vdev); @@ -325,6 +381,7 @@ static bool vfio_intx_enable(VFIOPCIDevice *vdev, Error **errp) warn_reportf_err(err, VFIO_MSG_PREFIX, vdev->vbasedev.name); } +skip_signaling: vdev->interrupt = VFIO_INT_INTx; trace_vfio_intx_enable(vdev->vbasedev.name); @@ -394,6 +451,14 @@ static void vfio_msi_interrupt(void *opaque) notify(&vdev->pdev, nr); } +void vfio_pci_msi_set_handler(VFIOPCIDevice *vdev, int nr) +{ + VFIOMSIVector *vector = &vdev->msi_vectors[nr]; + int fd = event_notifier_get_fd(&vector->interrupt); + + qemu_set_fd_handler(fd, vfio_msi_interrupt, NULL, vector); +} + /* * Get MSI-X enabled, but no vector enabled, by setting vector 0 with an invalid * fd to kernel. @@ -656,6 +721,15 @@ static int vfio_msix_vector_do_use(PCIDevice *pdev, unsigned int nr, static int vfio_msix_vector_use(PCIDevice *pdev, unsigned int nr, MSIMessage msg) { + /* + * Ignore the callback from msix_set_vector_notifiers during resume. + * The necessary subset of these actions is called from + * vfio_cpr_claim_vectors during post load. + */ + if (cpr_is_incoming()) { + return 0; + } + return vfio_msix_vector_do_use(pdev, nr, &msg, vfio_msi_interrupt); } @@ -686,6 +760,12 @@ static void vfio_msix_vector_release(PCIDevice *pdev, unsigned int nr) } } +void vfio_pci_msix_set_notifiers(VFIOPCIDevice *vdev) +{ + msix_set_vector_notifiers(&vdev->pdev, vfio_msix_vector_use, + vfio_msix_vector_release, NULL); +} + void vfio_pci_prepare_kvm_msi_virq_batch(VFIOPCIDevice *vdev) { assert(!vdev->defer_kvm_irq_routing); @@ -2744,6 +2824,20 @@ static int vfio_pci_load_config(VFIODevice *vbasedev, QEMUFile *f) return ret; } +void vfio_sub_page_bar_update_mappings(VFIOPCIDevice *vdev) +{ + PCIDevice *pdev = &vdev->pdev; + int page_size = qemu_real_host_page_size(); + int bar; + + for (bar = 0; bar < PCI_ROM_SLOT; bar++) { + PCIIORegion *r = &pdev->io_regions[bar]; + if (r->addr != PCI_BAR_UNMAPPED && r->size > 0 && r->size < page_size) { + vfio_sub_page_bar_update_mapping(pdev, bar); + } + } +} + static VFIODeviceOps vfio_pci_ops = { .vfio_compute_needs_reset = vfio_pci_compute_needs_reset, .vfio_hot_reset_multi = vfio_pci_hot_reset_multi, @@ -2811,10 +2905,6 @@ bool vfio_populate_vga(VFIOPCIDevice *vdev, Error **errp) "vfio-vga-io@0x3c0", QEMU_PCI_VGA_IO_HI_SIZE); - pci_register_vga(&vdev->pdev, &vdev->vga->region[QEMU_PCI_VGA_MEM].mem, - &vdev->vga->region[QEMU_PCI_VGA_IO_LO].mem, - &vdev->vga->region[QEMU_PCI_VGA_IO_HI].mem); - return true; } @@ -2914,7 +3004,7 @@ void vfio_pci_put_device(VFIOPCIDevice *vdev) vfio_device_detach(&vdev->vbasedev); - g_free(vdev->vbasedev.name); + vfio_device_free_name(&vdev->vbasedev); g_free(vdev->msix); } @@ -2965,6 +3055,11 @@ void vfio_pci_register_err_notifier(VFIOPCIDevice *vdev) fd = event_notifier_get_fd(&vdev->err_notifier); qemu_set_fd_handler(fd, vfio_err_notifier_handler, NULL, vdev); + /* Do not alter irq_signaling during vfio_realize for cpr */ + if (cpr_is_incoming()) { + return; + } + if (!vfio_device_irq_set_signaling(&vdev->vbasedev, VFIO_PCI_ERR_IRQ_INDEX, 0, VFIO_IRQ_SET_ACTION_TRIGGER, fd, &err)) { error_reportf_err(err, VFIO_MSG_PREFIX, vdev->vbasedev.name); @@ -3032,6 +3127,12 @@ void vfio_pci_register_req_notifier(VFIOPCIDevice *vdev) fd = event_notifier_get_fd(&vdev->req_notifier); qemu_set_fd_handler(fd, vfio_req_notifier_handler, NULL, vdev); + /* Do not alter irq_signaling during vfio_realize for cpr */ + if (cpr_is_incoming()) { + vdev->req_enabled = true; + return; + } + if (!vfio_device_irq_set_signaling(&vdev->vbasedev, VFIO_PCI_REQ_IRQ_INDEX, 0, VFIO_IRQ_SET_ACTION_TRIGGER, fd, &err)) { error_reportf_err(err, VFIO_MSG_PREFIX, vdev->vbasedev.name); @@ -3061,6 +3162,15 @@ static void vfio_unregister_req_notifier(VFIOPCIDevice *vdev) vdev->req_enabled = false; } +void vfio_pci_config_register_vga(VFIOPCIDevice *vdev) +{ + assert(vdev->vga != NULL); + + pci_register_vga(&vdev->pdev, &vdev->vga->region[QEMU_PCI_VGA_MEM].mem, + &vdev->vga->region[QEMU_PCI_VGA_IO_LO].mem, + &vdev->vga->region[QEMU_PCI_VGA_IO_HI].mem); +} + bool vfio_pci_config_setup(VFIOPCIDevice *vdev, Error **errp) { PCIDevice *pdev = &vdev->pdev; @@ -3135,6 +3245,23 @@ bool vfio_pci_config_setup(VFIOPCIDevice *vdev, Error **errp) vdev->sub_device_id); } + /* + * Class code is a 24-bit value at config space 0x09. Allow overriding it + * with any 24-bit value. + */ + if (vdev->class_code != PCI_ANY_ID) { + if (vdev->class_code > 0xffffff) { + error_setg(errp, "invalid PCI class code provided"); + return false; + } + /* Higher 24 bits of PCI_CLASS_REVISION are class code */ + vfio_add_emulated_long(vdev, PCI_CLASS_REVISION, + vdev->class_code << 8, ~0xff); + trace_vfio_pci_emulated_class_code(vbasedev->name, vdev->class_code); + } else { + vdev->class_code = pci_get_long(pdev->config + PCI_CLASS_REVISION) >> 8; + } + /* QEMU can change multi-function devices to single function, or reverse */ vdev->emulated_config_bits[PCI_HEADER_TYPE] = PCI_HEADER_TYPE_MULTI_FUNCTION; @@ -3164,6 +3291,10 @@ bool vfio_pci_config_setup(VFIOPCIDevice *vdev, Error **errp) vfio_bars_register(vdev); + if (vdev->vga && vfio_is_vga(vdev)) { + vfio_pci_config_register_vga(vdev); + } + return true; } @@ -3189,7 +3320,13 @@ bool vfio_pci_interrupt_setup(VFIOPCIDevice *vdev, Error **errp) vfio_intx_routing_notifier); vdev->irqchip_change_notifier.notify = vfio_irqchip_change; kvm_irqchip_add_change_notifier(&vdev->irqchip_change_notifier); - if (!vfio_intx_enable(vdev, errp)) { + + /* + * During CPR, do not call vfio_intx_enable at this time. Instead, + * call it from vfio_pci_post_load after the intx routing data has + * been loaded from vmstate. + */ + if (!cpr_is_incoming() && !vfio_intx_enable(vdev, errp)) { timer_free(vdev->intx.mmap_timer); pci_device_set_intx_routing_notifier(&vdev->pdev, NULL); kvm_irqchip_remove_change_notifier(&vdev->irqchip_change_notifier); @@ -3524,6 +3661,11 @@ static const Property vfio_pci_dev_properties[] = { vbasedev.migration_multifd_transfer, vfio_pci_migration_multifd_transfer_prop, OnOffAuto, .set_default = true, .defval.i = ON_OFF_AUTO_AUTO), + DEFINE_PROP_ON_OFF_AUTO("x-migration-load-config-after-iter", VFIOPCIDevice, + vbasedev.migration_load_config_after_iter, + ON_OFF_AUTO_AUTO), + DEFINE_PROP_SIZE("x-migration-max-queued-buffers-size", VFIOPCIDevice, + vbasedev.migration_max_queued_buffers_size, UINT64_MAX), DEFINE_PROP_BOOL("migration-events", VFIOPCIDevice, vbasedev.migration_events, false), DEFINE_PROP_BOOL("x-no-mmap", VFIOPCIDevice, vbasedev.no_mmap, false), @@ -3544,6 +3686,8 @@ static const Property vfio_pci_dev_properties[] = { sub_vendor_id, PCI_ANY_ID), DEFINE_PROP_UINT32("x-pci-sub-device-id", VFIOPCIDevice, sub_device_id, PCI_ANY_ID), + DEFINE_PROP_UINT32("x-pci-class-code", VFIOPCIDevice, + class_code, PCI_ANY_ID), DEFINE_PROP_UINT32("x-igd-gms", VFIOPCIDevice, igd_gms, 0), DEFINE_PROP_UNSIGNED_NODEFAULT("x-nv-gpudirect-clique", VFIOPCIDevice, nv_gpudirect_clique, @@ -3698,6 +3842,20 @@ static void vfio_pci_dev_class_init(ObjectClass *klass, const void *data) "x-migration-multifd-transfer", "Transfer this device state via " "multifd channels when live migrating it"); + object_class_property_set_description(klass, /* 10.1 */ + "x-migration-load-config-after-iter", + "Start the config load only after " + "all iterables were loaded (during " + "non-iterables loading phase) when " + "doing live migration of device state " + "via multifd channels"); + object_class_property_set_description(klass, /* 10.1 */ + "x-migration-max-queued-buffers-size", + "Maximum size of in-flight VFIO " + "device state buffers queued at the " + "destination when doing live " + "migration of device state via " + "multifd channels"); } static const TypeInfo vfio_pci_dev_info = { @@ -3710,6 +3868,8 @@ static const TypeInfo vfio_pci_dev_info = { static const Property vfio_pci_dev_nohotplug_properties[] = { DEFINE_PROP_BOOL("ramfb", VFIOPCIDevice, enable_ramfb, false), + DEFINE_PROP_BOOL("use-legacy-x86-rom", VFIOPCIDevice, + use_legacy_x86_rom, false), DEFINE_PROP_ON_OFF_AUTO("x-ramfb-migrate", VFIOPCIDevice, ramfb_migrate, ON_OFF_AUTO_AUTO), }; diff --git a/hw/vfio/pci.h b/hw/vfio/pci.h index 5ba7330..81465a8 100644 --- a/hw/vfio/pci.h +++ b/hw/vfio/pci.h @@ -14,6 +14,7 @@ #include "system/memory.h" #include "hw/pci/pci_device.h" +#include "hw/vfio/types.h" #include "hw/vfio/vfio-device.h" #include "hw/vfio/vfio-region.h" #include "qemu/event_notifier.h" @@ -119,17 +120,8 @@ typedef struct VFIOMSIXInfo { MemoryRegion *pba_region; } VFIOMSIXInfo; -/* - * TYPE_VFIO_PCI_BASE is an abstract type used to share code - * between VFIO implementations that use a kernel driver - * with those that use user sockets. - */ -#define TYPE_VFIO_PCI_BASE "vfio-pci-base" OBJECT_DECLARE_SIMPLE_TYPE(VFIOPCIDevice, VFIO_PCI_BASE) -#define TYPE_VFIO_PCI "vfio-pci" -/* TYPE_VFIO_PCI shares struct VFIOPCIDevice. */ - struct VFIOPCIDevice { PCIDevice pdev; VFIODevice vbasedev; @@ -157,6 +149,7 @@ struct VFIOPCIDevice { uint32_t device_id; uint32_t sub_vendor_id; uint32_t sub_device_id; + uint32_t class_code; uint32_t features; #define VFIO_FEATURE_ENABLE_VGA_BIT 0 #define VFIO_FEATURE_ENABLE_VGA (1 << VFIO_FEATURE_ENABLE_VGA_BIT) @@ -188,6 +181,7 @@ struct VFIOPCIDevice { bool no_kvm_ioeventfd; bool no_vfio_ioeventfd; bool enable_ramfb; + bool use_legacy_x86_rom; OnOffAuto ramfb_migrate; bool defer_kvm_irq_routing; bool clear_parent_atomics_on_exit; @@ -205,10 +199,7 @@ static inline bool vfio_pci_is(VFIOPCIDevice *vdev, uint32_t vendor, uint32_t de static inline bool vfio_is_vga(VFIOPCIDevice *vdev) { - PCIDevice *pdev = &vdev->pdev; - uint16_t class = pci_get_word(pdev->config + PCI_CLASS_DEVICE); - - return class == PCI_CLASS_DISPLAY_VGA; + return (vdev->class_code >> 8) == PCI_CLASS_DISPLAY_VGA; } /* MSI/MSI-X/INTx */ @@ -218,6 +209,8 @@ void vfio_pci_add_kvm_msi_virq(VFIOPCIDevice *vdev, VFIOMSIVector *vector, void vfio_pci_prepare_kvm_msi_virq_batch(VFIOPCIDevice *vdev); void vfio_pci_commit_kvm_msi_virq_batch(VFIOPCIDevice *vdev); bool vfio_pci_intx_enable(VFIOPCIDevice *vdev, Error **errp); +void vfio_pci_msix_set_notifiers(VFIOPCIDevice *vdev); +void vfio_pci_msi_set_handler(VFIOPCIDevice *vdev, int nr); uint32_t vfio_pci_read_config(PCIDevice *pdev, uint32_t addr, int len); void vfio_pci_write_config(PCIDevice *pdev, @@ -226,6 +219,7 @@ void vfio_pci_write_config(PCIDevice *pdev, uint64_t vfio_vga_read(void *opaque, hwaddr addr, unsigned size); void vfio_vga_write(void *opaque, hwaddr addr, uint64_t data, unsigned size); +void vfio_sub_page_bar_update_mappings(VFIOPCIDevice *vdev); bool vfio_opt_rom_in_denylist(VFIOPCIDevice *vdev); bool vfio_config_quirk_setup(VFIOPCIDevice *vdev, Error **errp); void vfio_vga_quirk_setup(VFIOPCIDevice *vdev); @@ -259,6 +253,7 @@ extern const VMStateDescription vfio_display_vmstate; void vfio_pci_bars_exit(VFIOPCIDevice *vdev); bool vfio_pci_add_capabilities(VFIOPCIDevice *vdev, Error **errp); +void vfio_pci_config_register_vga(VFIOPCIDevice *vdev); bool vfio_pci_config_setup(VFIOPCIDevice *vdev, Error **errp); bool vfio_pci_interrupt_setup(VFIOPCIDevice *vdev, Error **errp); void vfio_pci_intx_eoi(VFIODevice *vbasedev); diff --git a/hw/vfio/platform.c b/hw/vfio/platform.c index 9a21f2e..5c1795a 100644 --- a/hw/vfio/platform.c +++ b/hw/vfio/platform.c @@ -530,7 +530,7 @@ static bool vfio_base_device_init(VFIODevice *vbasedev, Error **errp) { /* @fd takes precedence over @sysfsdev which takes precedence over @host */ if (vbasedev->fd < 0 && vbasedev->sysfsdev) { - g_free(vbasedev->name); + vfio_device_free_name(vbasedev); vbasedev->name = g_path_get_basename(vbasedev->sysfsdev); } else if (vbasedev->fd < 0) { if (!vbasedev->name || strchr(vbasedev->name, '/')) { diff --git a/hw/vfio/trace-events b/hw/vfio/trace-events index e1728c4..fc6ed23 100644 --- a/hw/vfio/trace-events +++ b/hw/vfio/trace-events @@ -48,6 +48,7 @@ vfio_pci_emulated_vendor_id(const char *name, uint16_t val) "%s 0x%04x" vfio_pci_emulated_device_id(const char *name, uint16_t val) "%s 0x%04x" vfio_pci_emulated_sub_vendor_id(const char *name, uint16_t val) "%s 0x%04x" vfio_pci_emulated_sub_device_id(const char *name, uint16_t val) "%s 0x%04x" +vfio_pci_emulated_class_code(const char *name, uint32_t val) "%s 0x%06x" # pci-quirks.c vfio_quirk_rom_in_denylist(const char *name, uint16_t vid, uint16_t did) "%s %04x:%04x" @@ -197,6 +198,9 @@ iommufd_cdev_alloc_ioas(int iommufd, int ioas_id) " [iommufd=%d] new IOMMUFD con iommufd_cdev_device_info(char *name, int devfd, int num_irqs, int num_regions, int flags) " %s (%d) num_irqs=%d num_regions=%d flags=%d" iommufd_cdev_pci_hot_reset_dep_devices(int domain, int bus, int slot, int function, int dev_id) "\t%04x:%02x:%02x.%x devid %d" +# cpr-iommufd.c +vfio_cpr_find_device(uint32_t ioas_id, int devid, uint32_t hwpt_id) "ioas_id %u, devid %d, hwpt_id %u" + # device.c vfio_device_get_region_info_type(const char *name, int index, uint32_t type, uint32_t subtype) "%s index %d, %08x/%08x" vfio_device_reset_handler(void) "" diff --git a/hw/vfio/types.h b/hw/vfio/types.h new file mode 100644 index 0000000..c19334f --- /dev/null +++ b/hw/vfio/types.h @@ -0,0 +1,23 @@ +/* + * VFIO types definition + * + * Copyright Red Hat, Inc. 2025 + * + * SPDX-License-Identifier: GPL-2.0-or-later + */ +#ifndef HW_VFIO_VFIO_TYPES_H +#define HW_VFIO_VFIO_TYPES_H + +/* + * TYPE_VFIO_PCI_BASE is an abstract type used to share code + * between VFIO implementations that use a kernel driver + * with those that use user sockets. + */ +#define TYPE_VFIO_PCI_BASE "vfio-pci-base" + +#define TYPE_VFIO_PCI "vfio-pci" +/* TYPE_VFIO_PCI shares struct VFIOPCIDevice. */ + +#define TYPE_VFIO_PCI_NOHOTPLUG "vfio-pci-nohotplug" + +#endif /* HW_VFIO_VFIO_TYPES_H */ diff --git a/hw/vfio/vfio-helpers.h b/hw/vfio/vfio-helpers.h index 54a327f..ce31758 100644 --- a/hw/vfio/vfio-helpers.h +++ b/hw/vfio/vfio-helpers.h @@ -32,4 +32,6 @@ struct vfio_device_info *vfio_get_device_info(int fd); int vfio_kvm_device_add_fd(int fd, Error **errp); int vfio_kvm_device_del_fd(int fd, Error **errp); +bool vfio_arch_wants_loading_config_after_iter(void); + #endif /* HW_VFIO_VFIO_HELPERS_H */ diff --git a/hw/vfio/vfio-migration-internal.h b/hw/vfio/vfio-migration-internal.h index a8b456b..814fbd9 100644 --- a/hw/vfio/vfio-migration-internal.h +++ b/hw/vfio/vfio-migration-internal.h @@ -13,7 +13,6 @@ #include <linux/vfio.h> #endif -#include "qemu/typedefs.h" #include "qemu/notify.h" /* @@ -32,6 +31,7 @@ #define VFIO_MIG_FLAG_DEV_SETUP_STATE (0xffffffffef100003ULL) #define VFIO_MIG_FLAG_DEV_DATA_STATE (0xffffffffef100004ULL) #define VFIO_MIG_FLAG_DEV_INIT_DATA_SENT (0xffffffffef100005ULL) +#define VFIO_MIG_FLAG_DEV_CONFIG_LOAD_READY (0xffffffffef100006ULL) typedef struct VFIODevice VFIODevice; typedef struct VFIOMultifd VFIOMultifd; diff --git a/hw/virtio/meson.build b/hw/virtio/meson.build index 164f6fd..3ea7b3c 100644 --- a/hw/virtio/meson.build +++ b/hw/virtio/meson.build @@ -1,6 +1,7 @@ system_virtio_ss = ss.source_set() system_virtio_ss.add(files('virtio-bus.c')) system_virtio_ss.add(files('iothread-vq-mapping.c')) +system_virtio_ss.add(files('virtio-config-io.c')) system_virtio_ss.add(when: 'CONFIG_VIRTIO_PCI', if_true: files('virtio-pci.c')) system_virtio_ss.add(when: 'CONFIG_VIRTIO_MMIO', if_true: files('virtio-mmio.c')) system_virtio_ss.add(when: 'CONFIG_VIRTIO_CRYPTO', if_true: files('virtio-crypto.c')) @@ -10,11 +11,11 @@ system_virtio_ss.add(when: 'CONFIG_VHOST_VDPA_DEV', if_true: files('vdpa-dev.c') specific_virtio_ss = ss.source_set() specific_virtio_ss.add(files('virtio.c')) -specific_virtio_ss.add(files('virtio-config-io.c', 'virtio-qmp.c')) +specific_virtio_ss.add(files('virtio-qmp.c')) if have_vhost system_virtio_ss.add(files('vhost.c')) - specific_virtio_ss.add(files('vhost-backend.c', 'vhost-iova-tree.c')) + system_virtio_ss.add(files('vhost-backend.c', 'vhost-iova-tree.c')) if have_vhost_user # fixme - this really should be generic specific_virtio_ss.add(files('vhost-user.c')) @@ -43,22 +44,22 @@ if have_vhost endif if have_vhost_vdpa system_virtio_ss.add(files('vhost-vdpa.c')) - specific_virtio_ss.add(files('vhost-shadow-virtqueue.c')) + system_virtio_ss.add(files('vhost-shadow-virtqueue.c')) endif else system_virtio_ss.add(files('vhost-stub.c')) endif +system_virtio_ss.add(when: 'CONFIG_VHOST_USER_VSOCK', if_true: files('vhost-user-vsock.c')) +system_virtio_ss.add(when: 'CONFIG_VIRTIO_RNG', if_true: files('virtio-rng.c')) specific_virtio_ss.add(when: 'CONFIG_VIRTIO_BALLOON', if_true: files('virtio-balloon.c')) specific_virtio_ss.add(when: 'CONFIG_VHOST_USER_FS', if_true: files('vhost-user-fs.c')) specific_virtio_ss.add(when: 'CONFIG_VIRTIO_PMEM', if_true: files('virtio-pmem.c')) specific_virtio_ss.add(when: 'CONFIG_VHOST_VSOCK', if_true: files('vhost-vsock.c')) -specific_virtio_ss.add(when: 'CONFIG_VHOST_USER_VSOCK', if_true: files('vhost-user-vsock.c')) -specific_virtio_ss.add(when: 'CONFIG_VIRTIO_RNG', if_true: files('virtio-rng.c')) -specific_virtio_ss.add(when: 'CONFIG_VIRTIO_NSM', if_true: [files('virtio-nsm.c', 'cbor-helpers.c'), libcbor]) specific_virtio_ss.add(when: 'CONFIG_VIRTIO_MEM', if_true: files('virtio-mem.c')) -specific_virtio_ss.add(when: 'CONFIG_VHOST_USER_SCMI', if_true: files('vhost-user-scmi.c')) -specific_virtio_ss.add(when: ['CONFIG_VIRTIO_PCI', 'CONFIG_VHOST_USER_SCMI'], if_true: files('vhost-user-scmi-pci.c')) +system_virtio_ss.add(when: 'CONFIG_VIRTIO_NSM', if_true: files('virtio-nsm.c')) +system_virtio_ss.add(when: 'CONFIG_VIRTIO_NSM', if_true: [files('cbor-helpers.c'), libcbor]) +system_virtio_ss.add(when: 'CONFIG_VHOST_USER_SCMI', if_true: files('vhost-user-scmi.c')) virtio_pci_ss = ss.source_set() virtio_pci_ss.add(when: 'CONFIG_VHOST_VSOCK', if_true: files('vhost-vsock-pci.c')) @@ -67,6 +68,7 @@ virtio_pci_ss.add(when: 'CONFIG_VHOST_USER_BLK', if_true: files('vhost-user-blk- virtio_pci_ss.add(when: 'CONFIG_VHOST_USER_SCSI', if_true: files('vhost-user-scsi-pci.c')) virtio_pci_ss.add(when: 'CONFIG_VHOST_SCSI', if_true: files('vhost-scsi-pci.c')) virtio_pci_ss.add(when: 'CONFIG_VHOST_USER_FS', if_true: files('vhost-user-fs-pci.c')) +virtio_pci_ss.add(when: 'CONFIG_VHOST_USER_SCMI', if_true: files('vhost-user-scmi-pci.c')) virtio_pci_ss.add(when: 'CONFIG_VIRTIO_CRYPTO', if_true: files('virtio-crypto-pci.c')) virtio_pci_ss.add(when: 'CONFIG_VIRTIO_INPUT_HOST', if_true: files('virtio-input-host-pci.c')) @@ -85,7 +87,7 @@ virtio_pci_ss.add(when: 'CONFIG_VIRTIO_MEM', if_true: files('virtio-mem-pci.c')) virtio_pci_ss.add(when: 'CONFIG_VHOST_VDPA_DEV', if_true: files('vdpa-dev-pci.c')) virtio_pci_ss.add(when: 'CONFIG_VIRTIO_MD', if_true: files('virtio-md-pci.c')) -specific_virtio_ss.add_all(when: 'CONFIG_VIRTIO_PCI', if_true: virtio_pci_ss) +system_virtio_ss.add_all(when: 'CONFIG_VIRTIO_PCI', if_true: virtio_pci_ss) system_ss.add_all(when: 'CONFIG_VIRTIO', if_true: system_virtio_ss) system_ss.add(when: 'CONFIG_VIRTIO', if_false: files('vhost-stub.c')) diff --git a/hw/virtio/vhost.c b/hw/virtio/vhost.c index fc43853..6557c58 100644 --- a/hw/virtio/vhost.c +++ b/hw/virtio/vhost.c @@ -47,12 +47,6 @@ static struct vhost_log *vhost_log[VHOST_BACKEND_TYPE_MAX]; static struct vhost_log *vhost_log_shm[VHOST_BACKEND_TYPE_MAX]; static QLIST_HEAD(, vhost_dev) vhost_log_devs[VHOST_BACKEND_TYPE_MAX]; -/* Memslots used by backends that support private memslots (without an fd). */ -static unsigned int used_memslots; - -/* Memslots used by backends that only support shared memslots (with an fd). */ -static unsigned int used_shared_memslots; - static QLIST_HEAD(, vhost_dev) vhost_devices = QLIST_HEAD_INITIALIZER(vhost_devices); @@ -74,15 +68,15 @@ unsigned int vhost_get_free_memslots(void) QLIST_FOREACH(hdev, &vhost_devices, entry) { unsigned int r = hdev->vhost_ops->vhost_backend_memslots_limit(hdev); - unsigned int cur_free; + unsigned int cur_free = r - hdev->mem->nregions; - if (hdev->vhost_ops->vhost_backend_no_private_memslots && - hdev->vhost_ops->vhost_backend_no_private_memslots(hdev)) { - cur_free = r - used_shared_memslots; + if (unlikely(r < hdev->mem->nregions)) { + warn_report_once("used (%u) vhost backend memory slots exceed" + " the device limit (%u).", hdev->mem->nregions, r); + free = 0; } else { - cur_free = r - used_memslots; + free = MIN(free, cur_free); } - free = MIN(free, cur_free); } return free; } @@ -666,13 +660,6 @@ static void vhost_commit(MemoryListener *listener) dev->mem = g_realloc(dev->mem, regions_size); dev->mem->nregions = dev->n_mem_sections; - if (dev->vhost_ops->vhost_backend_no_private_memslots && - dev->vhost_ops->vhost_backend_no_private_memslots(dev)) { - used_shared_memslots = dev->mem->nregions; - } else { - used_memslots = dev->mem->nregions; - } - for (i = 0; i < dev->n_mem_sections; i++) { struct vhost_memory_region *cur_vmr = dev->mem->regions + i; struct MemoryRegionSection *mrs = dev->mem_sections + i; @@ -1123,7 +1110,8 @@ static bool vhost_log_global_start(MemoryListener *listener, Error **errp) r = vhost_migration_log(listener, true); if (r < 0) { - abort(); + error_setg_errno(errp, -r, "vhost: Failed to start logging"); + return false; } return true; } @@ -1134,7 +1122,8 @@ static void vhost_log_global_stop(MemoryListener *listener) r = vhost_migration_log(listener, false); if (r < 0) { - abort(); + /* Not fatal, so report it, but take no further action */ + warn_report("vhost: Failed to stop logging"); } } @@ -1367,25 +1356,30 @@ fail_alloc_desc: return r; } -int vhost_virtqueue_stop(struct vhost_dev *dev, - struct VirtIODevice *vdev, - struct vhost_virtqueue *vq, - unsigned idx) +static int do_vhost_virtqueue_stop(struct vhost_dev *dev, + struct VirtIODevice *vdev, + struct vhost_virtqueue *vq, + unsigned idx, bool force) { int vhost_vq_index = dev->vhost_ops->vhost_get_vq_index(dev, idx); struct vhost_vring_state state = { .index = vhost_vq_index, }; - int r; + int r = 0; if (virtio_queue_get_desc_addr(vdev, idx) == 0) { /* Don't stop the virtqueue which might have not been started */ return 0; } - r = dev->vhost_ops->vhost_get_vring_base(dev, &state); - if (r < 0) { - VHOST_OPS_DEBUG(r, "vhost VQ %u ring restore failed: %d", idx, r); + if (!force) { + r = dev->vhost_ops->vhost_get_vring_base(dev, &state); + if (r < 0) { + VHOST_OPS_DEBUG(r, "vhost VQ %u ring restore failed: %d", idx, r); + } + } + + if (r < 0 || force) { /* Connection to the backend is broken, so let's sync internal * last avail idx to the device used idx. */ @@ -1414,6 +1408,14 @@ int vhost_virtqueue_stop(struct vhost_dev *dev, return r; } +int vhost_virtqueue_stop(struct vhost_dev *dev, + struct VirtIODevice *vdev, + struct vhost_virtqueue *vq, + unsigned idx) +{ + return do_vhost_virtqueue_stop(dev, vdev, vq, idx, false); +} + static int vhost_virtqueue_set_busyloop_timeout(struct vhost_dev *dev, int n, uint32_t timeout) { @@ -1619,15 +1621,11 @@ int vhost_dev_init(struct vhost_dev *hdev, void *opaque, QLIST_INSERT_HEAD(&vhost_devices, hdev, entry); /* - * The listener we registered properly updated the corresponding counter. - * So we can trust that these values are accurate. + * The listener we registered properly setup the number of required + * memslots in vhost_commit(). */ - if (hdev->vhost_ops->vhost_backend_no_private_memslots && - hdev->vhost_ops->vhost_backend_no_private_memslots(hdev)) { - used = used_shared_memslots; - } else { - used = used_memslots; - } + used = hdev->mem->nregions; + /* * We assume that all reserved memslots actually require a real memslot * in our vhost backend. This might not be true, for example, if the @@ -2136,7 +2134,8 @@ fail_features: } /* Host notifiers must be enabled at this point. */ -int vhost_dev_stop(struct vhost_dev *hdev, VirtIODevice *vdev, bool vrings) +static int do_vhost_dev_stop(struct vhost_dev *hdev, VirtIODevice *vdev, + bool vrings, bool force) { int i; int rc = 0; @@ -2158,10 +2157,11 @@ int vhost_dev_stop(struct vhost_dev *hdev, VirtIODevice *vdev, bool vrings) vhost_dev_set_vring_enable(hdev, false); } for (i = 0; i < hdev->nvqs; ++i) { - rc |= vhost_virtqueue_stop(hdev, - vdev, - hdev->vqs + i, - hdev->vq_index + i); + rc |= do_vhost_virtqueue_stop(hdev, + vdev, + hdev->vqs + i, + hdev->vq_index + i, + force); } if (hdev->vhost_ops->vhost_reset_status) { hdev->vhost_ops->vhost_reset_status(hdev); @@ -2181,6 +2181,17 @@ int vhost_dev_stop(struct vhost_dev *hdev, VirtIODevice *vdev, bool vrings) return rc; } +int vhost_dev_stop(struct vhost_dev *hdev, VirtIODevice *vdev, bool vrings) +{ + return do_vhost_dev_stop(hdev, vdev, vrings, false); +} + +int vhost_dev_force_stop(struct vhost_dev *hdev, VirtIODevice *vdev, + bool vrings) +{ + return do_vhost_dev_stop(hdev, vdev, vrings, true); +} + int vhost_net_set_backend(struct vhost_dev *hdev, struct vhost_vring_file *file) { diff --git a/hw/virtio/virtio-config-io.c b/hw/virtio/virtio-config-io.c index ad78e0b..f58d90b 100644 --- a/hw/virtio/virtio-config-io.c +++ b/hw/virtio/virtio-config-io.c @@ -11,7 +11,6 @@ #include "qemu/osdep.h" #include "hw/virtio/virtio.h" -#include "cpu.h" uint32_t virtio_config_readb(VirtIODevice *vdev, uint32_t addr) { diff --git a/hw/virtio/virtio-pci.c b/hw/virtio/virtio-pci.c index fba2372..767216d 100644 --- a/hw/virtio/virtio-pci.c +++ b/hw/virtio/virtio-pci.c @@ -30,6 +30,7 @@ #include "qemu/error-report.h" #include "qemu/log.h" #include "qemu/module.h" +#include "qemu/bswap.h" #include "hw/pci/msi.h" #include "hw/pci/msix.h" #include "hw/loader.h" diff --git a/hw/virtio/virtio.c b/hw/virtio/virtio.c index 82a285a..9a81ad9 100644 --- a/hw/virtio/virtio.c +++ b/hw/virtio/virtio.c @@ -20,7 +20,7 @@ #include "qemu/log.h" #include "qemu/main-loop.h" #include "qemu/module.h" -#include "exec/tswap.h" +#include "qemu/target-info.h" #include "qom/object_interfaces.h" #include "hw/core/cpu.h" #include "hw/virtio/virtio.h" @@ -938,18 +938,18 @@ static void virtqueue_packed_fill(VirtQueue *vq, const VirtQueueElement *elem, static void virtqueue_ordered_fill(VirtQueue *vq, const VirtQueueElement *elem, unsigned int len) { - unsigned int i, steps, max_steps; + unsigned int i, steps, max_steps, ndescs; i = vq->used_idx % vq->vring.num; steps = 0; /* - * We shouldn't need to increase 'i' by more than the distance - * between used_idx and last_avail_idx. + * We shouldn't need to increase 'i' by more than or equal to + * the distance between used_idx and last_avail_idx (max_steps). */ max_steps = (vq->last_avail_idx - vq->used_idx) % vq->vring.num; /* Search for element in vq->used_elems */ - while (steps <= max_steps) { + while (steps < max_steps) { /* Found element, set length and mark as filled */ if (vq->used_elems[i].index == elem->index) { vq->used_elems[i].len = len; @@ -957,8 +957,18 @@ static void virtqueue_ordered_fill(VirtQueue *vq, const VirtQueueElement *elem, break; } - i += vq->used_elems[i].ndescs; - steps += vq->used_elems[i].ndescs; + ndescs = vq->used_elems[i].ndescs; + + /* Defensive sanity check */ + if (unlikely(ndescs == 0 || ndescs > vq->vring.num)) { + qemu_log_mask(LOG_GUEST_ERROR, + "%s: %s invalid ndescs %u at position %u\n", + __func__, vq->vdev->name, ndescs, i); + return; + } + + i += ndescs; + steps += ndescs; if (i >= vq->vring.num) { i -= vq->vring.num; @@ -3270,13 +3280,6 @@ virtio_load(VirtIODevice *vdev, QEMUFile *f, int version_id) config_len--; } - if (vdc->pre_load_queues) { - ret = vdc->pre_load_queues(vdev); - if (ret) { - return ret; - } - } - num = qemu_get_be32(f); if (num > VIRTIO_QUEUE_MAX) { @@ -3284,6 +3287,13 @@ virtio_load(VirtIODevice *vdev, QEMUFile *f, int version_id) return -1; } + if (vdc->pre_load_queues) { + ret = vdc->pre_load_queues(vdev, num); + if (ret) { + return ret; + } + } + for (i = 0; i < num; i++) { vdev->vq[i].vring.num = qemu_get_be32(f); if (k->has_variable_vring_alignment) { diff --git a/hw/vmapple/virtio-blk.c b/hw/vmapple/virtio-blk.c index 532b564..9de9aaa 100644 --- a/hw/vmapple/virtio-blk.c +++ b/hw/vmapple/virtio-blk.c @@ -19,7 +19,6 @@ #include "hw/vmapple/vmapple.h" #include "hw/virtio/virtio-blk.h" #include "hw/virtio/virtio-pci.h" -#include "qemu/bswap.h" #include "qemu/log.h" #include "qemu/module.h" #include "qapi/error.h" diff --git a/hw/xen/xen_pt.c b/hw/xen/xen_pt.c index 9d16644..006b5b5 100644 --- a/hw/xen/xen_pt.c +++ b/hw/xen/xen_pt.c @@ -54,6 +54,7 @@ #include "qemu/osdep.h" #include "qapi/error.h" +#include "qemu/error-report.h" #include <sys/ioctl.h> #include "hw/pci/pci.h" |