diff options
-rw-r--r-- | hw/ppc/spapr.c | 239 | ||||
-rw-r--r-- | hw/ppc/spapr_drc.c | 405 | ||||
-rw-r--r-- | hw/ppc/spapr_events.c | 98 | ||||
-rw-r--r-- | hw/ppc/spapr_hcall.c | 439 | ||||
-rw-r--r-- | hw/ppc/spapr_pci.c | 17 | ||||
-rw-r--r-- | hw/ppc/trace-events | 5 | ||||
-rw-r--r-- | include/hw/ppc/pnv_psi.h | 2 | ||||
-rw-r--r-- | include/hw/ppc/spapr.h | 24 | ||||
-rw-r--r-- | include/hw/ppc/spapr_drc.h | 74 | ||||
-rw-r--r-- | include/hw/ppc/spapr_ovec.h | 1 | ||||
-rw-r--r-- | target/ppc/kvm.c | 76 | ||||
-rw-r--r-- | target/ppc/kvm_ppc.h | 26 | ||||
-rw-r--r-- | target/ppc/mmu-hash64.h | 4 | ||||
-rw-r--r-- | target/ppc/translate_init.c | 10 |
14 files changed, 1067 insertions, 353 deletions
diff --git a/hw/ppc/spapr.c b/hw/ppc/spapr.c index 16638ce..970093e 100644 --- a/hw/ppc/spapr.c +++ b/hw/ppc/spapr.c @@ -98,8 +98,6 @@ #define PHANDLE_XICP 0x00001111 -#define HTAB_SIZE(spapr) (1ULL << ((spapr)->htab_shift)) - static ICSState *spapr_ics_create(sPAPRMachineState *spapr, const char *type_ics, int nr_irqs, Error **errp) @@ -874,6 +872,11 @@ static void spapr_dt_rtas(sPAPRMachineState *spapr, void *fdt) if (!kvm_enabled() || kvmppc_spapr_use_multitce()) { add_str(hypertas, "hcall-multi-tce"); } + + if (spapr->resize_hpt != SPAPR_RESIZE_HPT_DISABLED) { + add_str(hypertas, "hcall-hpt-resize"); + } + _FDT(fdt_setprop(fdt, rtas, "ibm,hypertas-functions", hypertas->str, hypertas->len)); g_string_free(hypertas, TRUE); @@ -1264,7 +1267,7 @@ static void spapr_store_hpte(PPCVirtualHypervisor *vhyp, hwaddr ptex, } } -static int spapr_hpt_shift_for_ramsize(uint64_t ramsize) +int spapr_hpt_shift_for_ramsize(uint64_t ramsize) { int shift; @@ -1285,8 +1288,8 @@ void spapr_free_hpt(sPAPRMachineState *spapr) close_htab_fd(spapr); } -static void spapr_reallocate_hpt(sPAPRMachineState *spapr, int shift, - Error **errp) +void spapr_reallocate_hpt(sPAPRMachineState *spapr, int shift, + Error **errp) { long rc; @@ -1334,9 +1337,17 @@ static void spapr_reallocate_hpt(sPAPRMachineState *spapr, int shift, void spapr_setup_hpt_and_vrma(sPAPRMachineState *spapr) { - spapr_reallocate_hpt(spapr, - spapr_hpt_shift_for_ramsize(MACHINE(spapr)->maxram_size), - &error_fatal); + int hpt_shift; + + if ((spapr->resize_hpt == SPAPR_RESIZE_HPT_DISABLED) + || (spapr->cas_reboot + && !spapr_ovec_test(spapr->ov5_cas, OV5_HPT_RESIZE))) { + hpt_shift = spapr_hpt_shift_for_ramsize(MACHINE(spapr)->maxram_size); + } else { + hpt_shift = spapr_hpt_shift_for_ramsize(MACHINE(spapr)->ram_size); + } + spapr_reallocate_hpt(spapr, hpt_shift, &error_fatal); + if (spapr->vrma_adjust) { spapr->rma_size = kvmppc_rma_size(spapr_node0_size(), spapr->htab_shift); @@ -1517,6 +1528,37 @@ static bool version_before_3(void *opaque, int version_id) return version_id < 3; } +static bool spapr_pending_events_needed(void *opaque) +{ + sPAPRMachineState *spapr = (sPAPRMachineState *)opaque; + return !QTAILQ_EMPTY(&spapr->pending_events); +} + +static const VMStateDescription vmstate_spapr_event_entry = { + .name = "spapr_event_log_entry", + .version_id = 1, + .minimum_version_id = 1, + .fields = (VMStateField[]) { + VMSTATE_UINT32(summary, sPAPREventLogEntry), + VMSTATE_UINT32(extended_length, sPAPREventLogEntry), + VMSTATE_VBUFFER_ALLOC_UINT32(extended_log, sPAPREventLogEntry, 0, + NULL, extended_length), + VMSTATE_END_OF_LIST() + }, +}; + +static const VMStateDescription vmstate_spapr_pending_events = { + .name = "spapr_pending_events", + .version_id = 1, + .minimum_version_id = 1, + .needed = spapr_pending_events_needed, + .fields = (VMStateField[]) { + VMSTATE_QTAILQ_V(pending_events, sPAPRMachineState, 1, + vmstate_spapr_event_entry, sPAPREventLogEntry, next), + VMSTATE_END_OF_LIST() + }, +}; + static bool spapr_ov5_cas_needed(void *opaque) { sPAPRMachineState *spapr = opaque; @@ -1615,6 +1657,7 @@ static const VMStateDescription vmstate_spapr = { .subsections = (const VMStateDescription*[]) { &vmstate_spapr_ov5_cas, &vmstate_spapr_patb_entry, + &vmstate_spapr_pending_events, NULL } }; @@ -2116,12 +2159,41 @@ static void ppc_spapr_init(MachineState *machine) hwaddr node0_size = spapr_node0_size(); long load_limit, fw_size; char *filename; + Error *resize_hpt_err = NULL; msi_nonbroken = true; QLIST_INIT(&spapr->phbs); QTAILQ_INIT(&spapr->pending_dimm_unplugs); + /* Check HPT resizing availability */ + kvmppc_check_papr_resize_hpt(&resize_hpt_err); + if (spapr->resize_hpt == SPAPR_RESIZE_HPT_DEFAULT) { + /* + * If the user explicitly requested a mode we should either + * supply it, or fail completely (which we do below). But if + * it's not set explicitly, we reset our mode to something + * that works + */ + if (resize_hpt_err) { + spapr->resize_hpt = SPAPR_RESIZE_HPT_DISABLED; + error_free(resize_hpt_err); + resize_hpt_err = NULL; + } else { + spapr->resize_hpt = smc->resize_hpt_default; + } + } + + assert(spapr->resize_hpt != SPAPR_RESIZE_HPT_DEFAULT); + + if ((spapr->resize_hpt != SPAPR_RESIZE_HPT_DISABLED) && resize_hpt_err) { + /* + * User requested HPT resize, but this host can't supply it. Bail out + */ + error_report_err(resize_hpt_err); + exit(1); + } + /* Allocate RMA if necessary */ rma_alloc_size = kvmppc_alloc_rma(&rma); @@ -2190,6 +2262,11 @@ static void ppc_spapr_init(MachineState *machine) spapr_ovec_set(spapr->ov5, OV5_HP_EVT); } + /* advertise support for HPT resizing */ + if (spapr->resize_hpt != SPAPR_RESIZE_HPT_DISABLED) { + spapr_ovec_set(spapr->ov5, OV5_HPT_RESIZE); + } + /* init CPUs */ if (machine->cpu_model == NULL) { machine->cpu_model = kvm_enabled() ? "host" : smc->tcg_default_cpu; @@ -2547,6 +2624,40 @@ static void spapr_set_modern_hotplug_events(Object *obj, bool value, spapr->use_hotplug_event_source = value; } +static char *spapr_get_resize_hpt(Object *obj, Error **errp) +{ + sPAPRMachineState *spapr = SPAPR_MACHINE(obj); + + switch (spapr->resize_hpt) { + case SPAPR_RESIZE_HPT_DEFAULT: + return g_strdup("default"); + case SPAPR_RESIZE_HPT_DISABLED: + return g_strdup("disabled"); + case SPAPR_RESIZE_HPT_ENABLED: + return g_strdup("enabled"); + case SPAPR_RESIZE_HPT_REQUIRED: + return g_strdup("required"); + } + g_assert_not_reached(); +} + +static void spapr_set_resize_hpt(Object *obj, const char *value, Error **errp) +{ + sPAPRMachineState *spapr = SPAPR_MACHINE(obj); + + if (strcmp(value, "default") == 0) { + spapr->resize_hpt = SPAPR_RESIZE_HPT_DEFAULT; + } else if (strcmp(value, "disabled") == 0) { + spapr->resize_hpt = SPAPR_RESIZE_HPT_DISABLED; + } else if (strcmp(value, "enabled") == 0) { + spapr->resize_hpt = SPAPR_RESIZE_HPT_ENABLED; + } else if (strcmp(value, "required") == 0) { + spapr->resize_hpt = SPAPR_RESIZE_HPT_REQUIRED; + } else { + error_setg(errp, "Bad value for \"resize-hpt\" property"); + } +} + static void spapr_machine_initfn(Object *obj) { sPAPRMachineState *spapr = SPAPR_MACHINE(obj); @@ -2571,6 +2682,12 @@ static void spapr_machine_initfn(Object *obj) ppc_compat_add_property(obj, "max-cpu-compat", &spapr->max_compat_pvr, "Maximum permitted CPU compatibility mode", &error_fatal); + + object_property_add_str(obj, "resize-hpt", + spapr_get_resize_hpt, spapr_set_resize_hpt, NULL); + object_property_set_description(obj, "resize-hpt", + "Resizing of the Hash Page Table (enabled, disabled, required)", + NULL); } static void spapr_machine_finalizefn(Object *obj) @@ -2604,6 +2721,7 @@ static void spapr_add_lmbs(DeviceState *dev, uint64_t addr_start, uint64_t size, int i, fdt_offset, fdt_size; void *fdt; uint64_t addr = addr_start; + bool hotplugged = spapr_drc_hotplugged(dev); Error *local_err = NULL; for (i = 0; i < nr_lmbs; i++) { @@ -2621,18 +2739,21 @@ static void spapr_add_lmbs(DeviceState *dev, uint64_t addr_start, uint64_t size, addr -= SPAPR_MEMORY_BLOCK_SIZE; drc = spapr_drc_by_id(TYPE_SPAPR_DRC_LMB, addr / SPAPR_MEMORY_BLOCK_SIZE); - spapr_drc_detach(drc, dev, NULL); + spapr_drc_detach(drc); } g_free(fdt); error_propagate(errp, local_err); return; } + if (!hotplugged) { + spapr_drc_reset(drc); + } addr += SPAPR_MEMORY_BLOCK_SIZE; } /* send hotplug notification to the * guest only in case of hotplugged memory */ - if (dev->hotplugged) { + if (hotplugged) { if (dedicated_hp_event_source) { drc = spapr_drc_by_id(TYPE_SPAPR_DRC_LMB, addr_start / SPAPR_MEMORY_BLOCK_SIZE); @@ -2780,8 +2901,10 @@ static sPAPRDIMMState *spapr_recover_pending_dimm_state(sPAPRMachineState *ms, /* Callback to be called during DRC release. */ void spapr_lmb_release(DeviceState *dev) { - HotplugHandler *hotplug_ctrl = qdev_get_hotplug_handler(dev); - sPAPRMachineState *spapr = SPAPR_MACHINE(hotplug_ctrl); + sPAPRMachineState *spapr = SPAPR_MACHINE(qdev_get_hotplug_handler(dev)); + PCDIMMDevice *dimm = PC_DIMM(dev); + PCDIMMDeviceClass *ddc = PC_DIMM_GET_CLASS(dimm); + MemoryRegion *mr = ddc->get_memory_region(dimm); sPAPRDIMMState *ds = spapr_pending_dimm_unplugs_find(spapr, PC_DIMM(dev)); /* This information will get lost if a migration occurs @@ -2802,18 +2925,7 @@ void spapr_lmb_release(DeviceState *dev) * Now that all the LMBs have been removed by the guest, call the * pc-dimm unplug handler to cleanup up the pc-dimm device. */ - hotplug_handler_unplug(hotplug_ctrl, dev, &error_abort); -} - -static void spapr_memory_unplug(HotplugHandler *hotplug_dev, DeviceState *dev, - Error **errp) -{ - sPAPRMachineState *ms = SPAPR_MACHINE(hotplug_dev); - PCDIMMDevice *dimm = PC_DIMM(dev); - PCDIMMDeviceClass *ddc = PC_DIMM_GET_CLASS(dimm); - MemoryRegion *mr = ddc->get_memory_region(dimm); - - pc_dimm_memory_unplug(dev, &ms->hotplug_memory, mr); + pc_dimm_memory_unplug(dev, &spapr->hotplug_memory, mr); object_unparent(OBJECT(dev)); } @@ -2849,7 +2961,7 @@ static void spapr_memory_unplug_request(HotplugHandler *hotplug_dev, addr / SPAPR_MEMORY_BLOCK_SIZE); g_assert(drc); - spapr_drc_detach(drc, dev, errp); + spapr_drc_detach(drc); addr += SPAPR_MEMORY_BLOCK_SIZE; } @@ -2882,10 +2994,10 @@ static void *spapr_populate_hotplug_cpu_dt(CPUState *cs, int *fdt_offset, return fdt; } -static void spapr_core_unplug(HotplugHandler *hotplug_dev, DeviceState *dev, - Error **errp) +/* Callback to be called during DRC release. */ +void spapr_core_release(DeviceState *dev) { - MachineState *ms = MACHINE(qdev_get_machine()); + MachineState *ms = MACHINE(qdev_get_hotplug_handler(dev)); sPAPRMachineClass *smc = SPAPR_MACHINE_GET_CLASS(ms); CPUCore *cc = CPU_CORE(dev); CPUArchId *core_slot = spapr_find_cpu_slot(ms, cc->core_id, NULL); @@ -2909,22 +3021,12 @@ static void spapr_core_unplug(HotplugHandler *hotplug_dev, DeviceState *dev, object_unparent(OBJECT(dev)); } -/* Callback to be called during DRC release. */ -void spapr_core_release(DeviceState *dev) -{ - HotplugHandler *hotplug_ctrl; - - hotplug_ctrl = qdev_get_hotplug_handler(dev); - hotplug_handler_unplug(hotplug_ctrl, dev, &error_abort); -} - static void spapr_core_unplug_request(HotplugHandler *hotplug_dev, DeviceState *dev, Error **errp) { int index; sPAPRDRConnector *drc; - Error *local_err = NULL; CPUCore *cc = CPU_CORE(dev); int smt = kvmppc_smt_threads(); @@ -2941,11 +3043,7 @@ void spapr_core_unplug_request(HotplugHandler *hotplug_dev, DeviceState *dev, drc = spapr_drc_by_id(TYPE_SPAPR_DRC_CPU, index * smt); g_assert(drc); - spapr_drc_detach(drc, dev, &local_err); - if (local_err) { - error_propagate(errp, local_err); - return; - } + spapr_drc_detach(drc); spapr_hotplug_req_remove_by_index(drc); } @@ -2961,11 +3059,10 @@ static void spapr_core_plug(HotplugHandler *hotplug_dev, DeviceState *dev, CPUState *cs = CPU(core->threads); sPAPRDRConnector *drc; Error *local_err = NULL; - void *fdt = NULL; - int fdt_offset = 0; int smt = kvmppc_smt_threads(); CPUArchId *core_slot; int index; + bool hotplugged = spapr_drc_hotplugged(dev); core_slot = spapr_find_cpu_slot(MACHINE(hotplug_dev), cc->core_id, &index); if (!core_slot) { @@ -2977,24 +3074,30 @@ static void spapr_core_plug(HotplugHandler *hotplug_dev, DeviceState *dev, g_assert(drc || !mc->has_hotpluggable_cpus); - fdt = spapr_populate_hotplug_cpu_dt(cs, &fdt_offset, spapr); - if (drc) { + void *fdt; + int fdt_offset; + + fdt = spapr_populate_hotplug_cpu_dt(cs, &fdt_offset, spapr); + spapr_drc_attach(drc, dev, fdt, fdt_offset, &local_err); if (local_err) { g_free(fdt); error_propagate(errp, local_err); return; } - } - if (dev->hotplugged) { - /* - * Send hotplug notification interrupt to the guest only in case - * of hotplugged CPUs. - */ - spapr_hotplug_req_add_by_index(drc); + if (hotplugged) { + /* + * Send hotplug notification interrupt to the guest only + * in case of hotplugged CPUs. + */ + spapr_hotplug_req_add_by_index(drc); + } else { + spapr_drc_reset(drc); + } } + core_slot->cpu = OBJECT(dev); if (smc->pre_2_10_has_unused_icps) { @@ -3047,9 +3150,9 @@ static void spapr_core_pre_plug(HotplugHandler *hotplug_dev, DeviceState *dev, * total vcpus not a multiple of threads-per-core. */ if (mc->has_hotpluggable_cpus && (cc->nr_threads != smp_threads)) { - error_setg(errp, "invalid nr-threads %d, must be %d", + error_setg(&local_err, "invalid nr-threads %d, must be %d", cc->nr_threads, smp_threads); - return; + goto out; } core_slot = spapr_find_cpu_slot(MACHINE(hotplug_dev), cc->core_id, &index); @@ -3119,27 +3222,6 @@ static void spapr_machine_device_plug(HotplugHandler *hotplug_dev, } } -static void spapr_machine_device_unplug(HotplugHandler *hotplug_dev, - DeviceState *dev, Error **errp) -{ - sPAPRMachineState *sms = SPAPR_MACHINE(qdev_get_machine()); - MachineClass *mc = MACHINE_GET_CLASS(qdev_get_machine()); - - if (object_dynamic_cast(OBJECT(dev), TYPE_PC_DIMM)) { - if (spapr_ovec_test(sms->ov5_cas, OV5_HP_EVT)) { - spapr_memory_unplug(hotplug_dev, dev, errp); - } else { - error_setg(errp, "Memory hot unplug not supported for this guest"); - } - } else if (object_dynamic_cast(OBJECT(dev), TYPE_SPAPR_CPU_CORE)) { - if (!mc->has_hotpluggable_cpus) { - error_setg(errp, "CPU hot unplug not supported on this machine"); - return; - } - spapr_core_unplug(hotplug_dev, dev, errp); - } -} - static void spapr_machine_device_unplug_request(HotplugHandler *hotplug_dev, DeviceState *dev, Error **errp) { @@ -3357,7 +3439,6 @@ static void spapr_machine_class_init(ObjectClass *oc, void *data) mc->get_hotplug_handler = spapr_get_hotplug_handler; hc->pre_plug = spapr_machine_device_pre_plug; hc->plug = spapr_machine_device_plug; - hc->unplug = spapr_machine_device_unplug; mc->cpu_index_to_instance_props = spapr_cpu_index_to_props; mc->possible_cpu_arch_ids = spapr_possible_cpu_arch_ids; hc->unplug_request = spapr_machine_device_unplug_request; @@ -3365,6 +3446,7 @@ static void spapr_machine_class_init(ObjectClass *oc, void *data) smc->dr_lmb_enabled = true; smc->tcg_default_cpu = "POWER8"; mc->has_hotpluggable_cpus = true; + smc->resize_hpt_default = SPAPR_RESIZE_HPT_ENABLED; fwc->get_dev_path = spapr_get_fw_dev_path; nc->nmi_monitor_handler = spapr_nmi; smc->phb_placement = spapr_phb_placement; @@ -3471,6 +3553,7 @@ static void spapr_machine_2_9_class_options(MachineClass *mc) SET_MACHINE_COMPAT(mc, SPAPR_COMPAT_2_9); mc->numa_auto_assign_ram = numa_legacy_auto_assign_ram; smc->pre_2_10_has_unused_icps = true; + smc->resize_hpt_default = SPAPR_RESIZE_HPT_DISABLED; } DEFINE_SPAPR_MACHINE(2_9, "2.9", false); diff --git a/hw/ppc/spapr_drc.c b/hw/ppc/spapr_drc.c index f34355d..0ffcec6 100644 --- a/hw/ppc/spapr_drc.c +++ b/hw/ppc/spapr_drc.c @@ -48,40 +48,40 @@ uint32_t spapr_drc_index(sPAPRDRConnector *drc) static uint32_t drc_isolate_physical(sPAPRDRConnector *drc) { - /* if the guest is configuring a device attached to this DRC, we - * should reset the configuration state at this point since it may - * no longer be reliable (guest released device and needs to start - * over, or unplug occurred so the FDT is no longer valid) - */ - g_free(drc->ccs); - drc->ccs = NULL; + switch (drc->state) { + case SPAPR_DRC_STATE_PHYSICAL_POWERON: + return RTAS_OUT_SUCCESS; /* Nothing to do */ + case SPAPR_DRC_STATE_PHYSICAL_CONFIGURED: + break; /* see below */ + case SPAPR_DRC_STATE_PHYSICAL_UNISOLATE: + return RTAS_OUT_PARAM_ERROR; /* not allowed */ + default: + g_assert_not_reached(); + } - drc->isolation_state = SPAPR_DR_ISOLATION_STATE_ISOLATED; + drc->state = SPAPR_DRC_STATE_PHYSICAL_POWERON; - /* if we're awaiting release, but still in an unconfigured state, - * it's likely the guest is still in the process of configuring - * the device and is transitioning the devices to an ISOLATED - * state as a part of that process. so we only complete the - * removal when this transition happens for a device in a - * configured state, as suggested by the state diagram from PAPR+ - * 2.7, 13.4 - */ - if (drc->awaiting_release) { + if (drc->unplug_requested) { uint32_t drc_index = spapr_drc_index(drc); - if (drc->configured) { - trace_spapr_drc_set_isolation_state_finalizing(drc_index); - spapr_drc_detach(drc, DEVICE(drc->dev), NULL); - } else { - trace_spapr_drc_set_isolation_state_deferring(drc_index); - } + trace_spapr_drc_set_isolation_state_finalizing(drc_index); + spapr_drc_detach(drc); } - drc->configured = false; return RTAS_OUT_SUCCESS; } static uint32_t drc_unisolate_physical(sPAPRDRConnector *drc) { + switch (drc->state) { + case SPAPR_DRC_STATE_PHYSICAL_UNISOLATE: + case SPAPR_DRC_STATE_PHYSICAL_CONFIGURED: + return RTAS_OUT_SUCCESS; /* Nothing to do */ + case SPAPR_DRC_STATE_PHYSICAL_POWERON: + break; /* see below */ + default: + g_assert_not_reached(); + } + /* cannot unisolate a non-existent resource, and, or resources * which are in an 'UNUSABLE' allocation state. (PAPR 2.7, * 13.5.3.5) @@ -90,20 +90,26 @@ static uint32_t drc_unisolate_physical(sPAPRDRConnector *drc) return RTAS_OUT_NO_SUCH_INDICATOR; } - drc->isolation_state = SPAPR_DR_ISOLATION_STATE_UNISOLATED; + drc->state = SPAPR_DRC_STATE_PHYSICAL_UNISOLATE; + drc->ccs_offset = drc->fdt_start_offset; + drc->ccs_depth = 0; return RTAS_OUT_SUCCESS; } static uint32_t drc_isolate_logical(sPAPRDRConnector *drc) { - /* if the guest is configuring a device attached to this DRC, we - * should reset the configuration state at this point since it may - * no longer be reliable (guest released device and needs to start - * over, or unplug occurred so the FDT is no longer valid) - */ - g_free(drc->ccs); - drc->ccs = NULL; + switch (drc->state) { + case SPAPR_DRC_STATE_LOGICAL_AVAILABLE: + case SPAPR_DRC_STATE_LOGICAL_UNUSABLE: + return RTAS_OUT_SUCCESS; /* Nothing to do */ + case SPAPR_DRC_STATE_LOGICAL_CONFIGURED: + break; /* see below */ + case SPAPR_DRC_STATE_LOGICAL_UNISOLATE: + return RTAS_OUT_PARAM_ERROR; /* not allowed */ + default: + g_assert_not_reached(); + } /* * Fail any requests to ISOLATE the LMB DRC if this LMB doesn't @@ -116,11 +122,11 @@ static uint32_t drc_isolate_logical(sPAPRDRConnector *drc) * actually being unplugged, fail the isolation request here. */ if (spapr_drc_type(drc) == SPAPR_DR_CONNECTOR_TYPE_LMB - && !drc->awaiting_release) { + && !drc->unplug_requested) { return RTAS_OUT_HW_ERROR; } - drc->isolation_state = SPAPR_DR_ISOLATION_STATE_ISOLATED; + drc->state = SPAPR_DRC_STATE_LOGICAL_AVAILABLE; /* if we're awaiting release, but still in an unconfigured state, * it's likely the guest is still in the process of configuring @@ -130,38 +136,51 @@ static uint32_t drc_isolate_logical(sPAPRDRConnector *drc) * configured state, as suggested by the state diagram from PAPR+ * 2.7, 13.4 */ - if (drc->awaiting_release) { + if (drc->unplug_requested) { uint32_t drc_index = spapr_drc_index(drc); - if (drc->configured) { - trace_spapr_drc_set_isolation_state_finalizing(drc_index); - spapr_drc_detach(drc, DEVICE(drc->dev), NULL); - } else { - trace_spapr_drc_set_isolation_state_deferring(drc_index); - } + trace_spapr_drc_set_isolation_state_finalizing(drc_index); + spapr_drc_detach(drc); } - drc->configured = false; - return RTAS_OUT_SUCCESS; } static uint32_t drc_unisolate_logical(sPAPRDRConnector *drc) { - /* cannot unisolate a non-existent resource, and, or resources - * which are in an 'UNUSABLE' allocation state. (PAPR 2.7, - * 13.5.3.5) - */ - if (!drc->dev || - drc->allocation_state == SPAPR_DR_ALLOCATION_STATE_UNUSABLE) { - return RTAS_OUT_NO_SUCH_INDICATOR; + switch (drc->state) { + case SPAPR_DRC_STATE_LOGICAL_UNISOLATE: + case SPAPR_DRC_STATE_LOGICAL_CONFIGURED: + return RTAS_OUT_SUCCESS; /* Nothing to do */ + case SPAPR_DRC_STATE_LOGICAL_AVAILABLE: + break; /* see below */ + case SPAPR_DRC_STATE_LOGICAL_UNUSABLE: + return RTAS_OUT_NO_SUCH_INDICATOR; /* not allowed */ + default: + g_assert_not_reached(); } - drc->isolation_state = SPAPR_DR_ISOLATION_STATE_UNISOLATED; + /* Move to AVAILABLE state should have ensured device was present */ + g_assert(drc->dev); + + drc->state = SPAPR_DRC_STATE_LOGICAL_UNISOLATE; + drc->ccs_offset = drc->fdt_start_offset; + drc->ccs_depth = 0; return RTAS_OUT_SUCCESS; } static uint32_t drc_set_usable(sPAPRDRConnector *drc) { + switch (drc->state) { + case SPAPR_DRC_STATE_LOGICAL_AVAILABLE: + case SPAPR_DRC_STATE_LOGICAL_UNISOLATE: + case SPAPR_DRC_STATE_LOGICAL_CONFIGURED: + return RTAS_OUT_SUCCESS; /* Nothing to do */ + case SPAPR_DRC_STATE_LOGICAL_UNUSABLE: + break; /* see below */ + default: + g_assert_not_reached(); + } + /* if there's no resource/device associated with the DRC, there's * no way for us to put it in an allocation state consistent with * being 'USABLE'. PAPR 2.7, 13.5.3.4 documents that this should @@ -170,30 +189,36 @@ static uint32_t drc_set_usable(sPAPRDRConnector *drc) if (!drc->dev) { return RTAS_OUT_NO_SUCH_INDICATOR; } - if (drc->awaiting_release && drc->awaiting_allocation) { - /* kernel is acknowledging a previous hotplug event - * while we are already removing it. - * it's safe to ignore awaiting_allocation here since we know the - * situation is predicated on the guest either already having done - * so (boot-time hotplug), or never being able to acquire in the - * first place (hotplug followed by immediate unplug). - */ + if (drc->unplug_requested) { + /* Don't allow the guest to move a device away from UNUSABLE + * state when we want to unplug it */ return RTAS_OUT_NO_SUCH_INDICATOR; } - drc->allocation_state = SPAPR_DR_ALLOCATION_STATE_USABLE; - drc->awaiting_allocation = false; + drc->state = SPAPR_DRC_STATE_LOGICAL_AVAILABLE; return RTAS_OUT_SUCCESS; } static uint32_t drc_set_unusable(sPAPRDRConnector *drc) { - drc->allocation_state = SPAPR_DR_ALLOCATION_STATE_UNUSABLE; - if (drc->awaiting_release) { + switch (drc->state) { + case SPAPR_DRC_STATE_LOGICAL_UNUSABLE: + return RTAS_OUT_SUCCESS; /* Nothing to do */ + case SPAPR_DRC_STATE_LOGICAL_AVAILABLE: + break; /* see below */ + case SPAPR_DRC_STATE_LOGICAL_UNISOLATE: + case SPAPR_DRC_STATE_LOGICAL_CONFIGURED: + return RTAS_OUT_NO_SUCH_INDICATOR; /* not allowed */ + default: + g_assert_not_reached(); + } + + drc->state = SPAPR_DRC_STATE_LOGICAL_UNUSABLE; + if (drc->unplug_requested) { uint32_t drc_index = spapr_drc_index(drc); trace_spapr_drc_set_allocation_state_finalizing(drc_index); - spapr_drc_detach(drc, DEVICE(drc->dev), NULL); + spapr_drc_detach(drc); } return RTAS_OUT_SUCCESS; @@ -247,11 +272,16 @@ static sPAPRDREntitySense physical_entity_sense(sPAPRDRConnector *drc) static sPAPRDREntitySense logical_entity_sense(sPAPRDRConnector *drc) { - if (drc->dev - && (drc->allocation_state != SPAPR_DR_ALLOCATION_STATE_UNUSABLE)) { - return SPAPR_DR_ENTITY_SENSE_PRESENT; - } else { + switch (drc->state) { + case SPAPR_DRC_STATE_LOGICAL_UNUSABLE: return SPAPR_DR_ENTITY_SENSE_UNUSABLE; + case SPAPR_DRC_STATE_LOGICAL_AVAILABLE: + case SPAPR_DRC_STATE_LOGICAL_UNISOLATE: + case SPAPR_DRC_STATE_LOGICAL_CONFIGURED: + g_assert(drc->dev); + return SPAPR_DR_ENTITY_SENSE_PRESENT; + default: + g_assert_not_reached(); } } @@ -344,23 +374,18 @@ void spapr_drc_attach(sPAPRDRConnector *drc, DeviceState *d, void *fdt, { trace_spapr_drc_attach(spapr_drc_index(drc)); - if (drc->isolation_state != SPAPR_DR_ISOLATION_STATE_ISOLATED) { + if (drc->dev) { error_setg(errp, "an attached device is still awaiting release"); return; } - if (spapr_drc_type(drc) == SPAPR_DR_CONNECTOR_TYPE_PCI) { - g_assert(drc->allocation_state == SPAPR_DR_ALLOCATION_STATE_USABLE); - } + g_assert((drc->state == SPAPR_DRC_STATE_LOGICAL_UNUSABLE) + || (drc->state == SPAPR_DRC_STATE_PHYSICAL_POWERON)); g_assert(fdt); drc->dev = d; drc->fdt = fdt; drc->fdt_start_offset = fdt_start_offset; - if (spapr_drc_type(drc) != SPAPR_DR_CONNECTOR_TYPE_PCI) { - drc->awaiting_allocation = true; - } - object_property_add_link(OBJECT(drc), "device", object_get_typename(OBJECT(drc->dev)), (Object **)(&drc->dev), @@ -373,85 +398,65 @@ static void spapr_drc_release(sPAPRDRConnector *drc) drck->release(drc->dev); - drc->awaiting_release = false; + drc->unplug_requested = false; g_free(drc->fdt); drc->fdt = NULL; drc->fdt_start_offset = 0; - object_property_del(OBJECT(drc), "device", NULL); + object_property_del(OBJECT(drc), "device", &error_abort); drc->dev = NULL; } -void spapr_drc_detach(sPAPRDRConnector *drc, DeviceState *d, Error **errp) +void spapr_drc_detach(sPAPRDRConnector *drc) { + sPAPRDRConnectorClass *drck = SPAPR_DR_CONNECTOR_GET_CLASS(drc); + trace_spapr_drc_detach(spapr_drc_index(drc)); - if (drc->isolation_state != SPAPR_DR_ISOLATION_STATE_ISOLATED) { - trace_spapr_drc_awaiting_isolated(spapr_drc_index(drc)); - drc->awaiting_release = true; - return; - } + g_assert(drc->dev); - if (spapr_drc_type(drc) != SPAPR_DR_CONNECTOR_TYPE_PCI && - drc->allocation_state != SPAPR_DR_ALLOCATION_STATE_UNUSABLE) { - trace_spapr_drc_awaiting_unusable(spapr_drc_index(drc)); - drc->awaiting_release = true; - return; - } + drc->unplug_requested = true; - if (drc->awaiting_allocation) { - drc->awaiting_release = true; - trace_spapr_drc_awaiting_allocation(spapr_drc_index(drc)); + if (drc->state != drck->empty_state) { + trace_spapr_drc_awaiting_quiesce(spapr_drc_index(drc)); return; } spapr_drc_release(drc); } -static bool release_pending(sPAPRDRConnector *drc) +void spapr_drc_reset(sPAPRDRConnector *drc) { - return drc->awaiting_release; -} - -static void drc_reset(void *opaque) -{ - sPAPRDRConnector *drc = SPAPR_DR_CONNECTOR(opaque); + sPAPRDRConnectorClass *drck = SPAPR_DR_CONNECTOR_GET_CLASS(drc); trace_spapr_drc_reset(spapr_drc_index(drc)); - g_free(drc->ccs); - drc->ccs = NULL; - /* immediately upon reset we can safely assume DRCs whose devices * are pending removal can be safely removed. */ - if (drc->awaiting_release) { + if (drc->unplug_requested) { spapr_drc_release(drc); } - drc->awaiting_allocation = false; - if (drc->dev) { - /* A device present at reset is coldplugged */ - drc->isolation_state = SPAPR_DR_ISOLATION_STATE_UNISOLATED; - if (spapr_drc_type(drc) != SPAPR_DR_CONNECTOR_TYPE_PCI) { - drc->allocation_state = SPAPR_DR_ALLOCATION_STATE_USABLE; - } - drc->dr_indicator = SPAPR_DR_INDICATOR_ACTIVE; + /* A device present at reset is ready to go, same as coldplugged */ + drc->state = drck->ready_state; } else { - /* Otherwise device is absent, but might be hotplugged */ - drc->isolation_state = SPAPR_DR_ISOLATION_STATE_ISOLATED; - if (spapr_drc_type(drc) != SPAPR_DR_CONNECTOR_TYPE_PCI) { - drc->allocation_state = SPAPR_DR_ALLOCATION_STATE_UNUSABLE; - } - drc->dr_indicator = SPAPR_DR_INDICATOR_INACTIVE; + drc->state = drck->empty_state; } + + drc->ccs_offset = -1; + drc->ccs_depth = -1; +} + +static void drc_reset(void *opaque) +{ + spapr_drc_reset(SPAPR_DR_CONNECTOR(opaque)); } static bool spapr_drc_needed(void *opaque) { sPAPRDRConnector *drc = (sPAPRDRConnector *)opaque; sPAPRDRConnectorClass *drck = SPAPR_DR_CONNECTOR_GET_CLASS(drc); - bool rc = false; sPAPRDREntitySense value = drck->dr_entity_sense(drc); /* If no dev is plugged in there is no need to migrate the DRC state */ @@ -460,23 +465,10 @@ static bool spapr_drc_needed(void *opaque) } /* - * If there is dev plugged in, we need to migrate the DRC state when - * it is different from cold-plugged state - */ - switch (spapr_drc_type(drc)) { - case SPAPR_DR_CONNECTOR_TYPE_PCI: - case SPAPR_DR_CONNECTOR_TYPE_CPU: - case SPAPR_DR_CONNECTOR_TYPE_LMB: - rc = !((drc->isolation_state == SPAPR_DR_ISOLATION_STATE_UNISOLATED) && - (drc->allocation_state == SPAPR_DR_ALLOCATION_STATE_USABLE) && - drc->configured && !drc->awaiting_release); - break; - case SPAPR_DR_CONNECTOR_TYPE_PHB: - case SPAPR_DR_CONNECTOR_TYPE_VIO: - default: - g_assert_not_reached(); - } - return rc; + * We need to migrate the state if it's not equal to the expected + * long-term state, which is the same as the coldplugged initial + * state */ + return (drc->state != drck->ready_state); } static const VMStateDescription vmstate_spapr_drc = { @@ -485,12 +477,7 @@ static const VMStateDescription vmstate_spapr_drc = { .minimum_version_id = 1, .needed = spapr_drc_needed, .fields = (VMStateField []) { - VMSTATE_UINT32(isolation_state, sPAPRDRConnector), - VMSTATE_UINT32(allocation_state, sPAPRDRConnector), - VMSTATE_UINT32(dr_indicator, sPAPRDRConnector), - VMSTATE_BOOL(configured, sPAPRDRConnector), - VMSTATE_BOOL(awaiting_release, sPAPRDRConnector), - VMSTATE_BOOL(awaiting_allocation, sPAPRDRConnector), + VMSTATE_UINT32(state, sPAPRDRConnector), VMSTATE_END_OF_LIST() } }; @@ -559,46 +546,96 @@ sPAPRDRConnector *spapr_dr_connector_new(Object *owner, const char *type, object_property_set_bool(OBJECT(drc), true, "realized", NULL); g_free(prop_name); - /* PCI slot always start in a USABLE state, and stay there */ - if (spapr_drc_type(drc) == SPAPR_DR_CONNECTOR_TYPE_PCI) { - drc->allocation_state = SPAPR_DR_ALLOCATION_STATE_USABLE; - } - return drc; } static void spapr_dr_connector_instance_init(Object *obj) { sPAPRDRConnector *drc = SPAPR_DR_CONNECTOR(obj); + sPAPRDRConnectorClass *drck = SPAPR_DR_CONNECTOR_GET_CLASS(drc); object_property_add_uint32_ptr(obj, "id", &drc->id, NULL); object_property_add(obj, "index", "uint32", prop_get_index, NULL, NULL, NULL, NULL); object_property_add(obj, "fdt", "struct", prop_get_fdt, NULL, NULL, NULL, NULL); + drc->state = drck->empty_state; } static void spapr_dr_connector_class_init(ObjectClass *k, void *data) { DeviceClass *dk = DEVICE_CLASS(k); - sPAPRDRConnectorClass *drck = SPAPR_DR_CONNECTOR_CLASS(k); dk->realize = realize; dk->unrealize = unrealize; - drck->release_pending = release_pending; /* * Reason: it crashes FIXME find and document the real reason */ dk->user_creatable = false; } +static bool drc_physical_needed(void *opaque) +{ + sPAPRDRCPhysical *drcp = (sPAPRDRCPhysical *)opaque; + sPAPRDRConnector *drc = SPAPR_DR_CONNECTOR(drcp); + + if ((drc->dev && (drcp->dr_indicator == SPAPR_DR_INDICATOR_ACTIVE)) + || (!drc->dev && (drcp->dr_indicator == SPAPR_DR_INDICATOR_INACTIVE))) { + return false; + } + return true; +} + +static const VMStateDescription vmstate_spapr_drc_physical = { + .name = "spapr_drc/physical", + .version_id = 1, + .minimum_version_id = 1, + .needed = drc_physical_needed, + .fields = (VMStateField []) { + VMSTATE_UINT32(dr_indicator, sPAPRDRCPhysical), + VMSTATE_END_OF_LIST() + } +}; + +static void drc_physical_reset(void *opaque) +{ + sPAPRDRConnector *drc = SPAPR_DR_CONNECTOR(opaque); + sPAPRDRCPhysical *drcp = SPAPR_DRC_PHYSICAL(drc); + + if (drc->dev) { + drcp->dr_indicator = SPAPR_DR_INDICATOR_ACTIVE; + } else { + drcp->dr_indicator = SPAPR_DR_INDICATOR_INACTIVE; + } +} + +static void realize_physical(DeviceState *d, Error **errp) +{ + sPAPRDRCPhysical *drcp = SPAPR_DRC_PHYSICAL(d); + Error *local_err = NULL; + + realize(d, &local_err); + if (local_err) { + error_propagate(errp, local_err); + return; + } + + vmstate_register(DEVICE(drcp), spapr_drc_index(SPAPR_DR_CONNECTOR(drcp)), + &vmstate_spapr_drc_physical, drcp); + qemu_register_reset(drc_physical_reset, drcp); +} + static void spapr_drc_physical_class_init(ObjectClass *k, void *data) { + DeviceClass *dk = DEVICE_CLASS(k); sPAPRDRConnectorClass *drck = SPAPR_DR_CONNECTOR_CLASS(k); + dk->realize = realize_physical; drck->dr_entity_sense = physical_entity_sense; drck->isolate = drc_isolate_physical; drck->unisolate = drc_unisolate_physical; + drck->ready_state = SPAPR_DRC_STATE_PHYSICAL_CONFIGURED; + drck->empty_state = SPAPR_DRC_STATE_PHYSICAL_POWERON; } static void spapr_drc_logical_class_init(ObjectClass *k, void *data) @@ -608,6 +645,8 @@ static void spapr_drc_logical_class_init(ObjectClass *k, void *data) drck->dr_entity_sense = logical_entity_sense; drck->isolate = drc_isolate_logical; drck->unisolate = drc_unisolate_logical; + drck->ready_state = SPAPR_DRC_STATE_LOGICAL_CONFIGURED; + drck->empty_state = SPAPR_DRC_STATE_LOGICAL_UNUSABLE; } static void spapr_drc_cpu_class_init(ObjectClass *k, void *data) @@ -653,7 +692,7 @@ static const TypeInfo spapr_dr_connector_info = { static const TypeInfo spapr_drc_physical_info = { .name = TYPE_SPAPR_DRC_PHYSICAL, .parent = TYPE_SPAPR_DR_CONNECTOR, - .instance_size = sizeof(sPAPRDRConnector), + .instance_size = sizeof(sPAPRDRCPhysical), .class_init = spapr_drc_physical_class_init, .abstract = true, }; @@ -661,7 +700,6 @@ static const TypeInfo spapr_drc_physical_info = { static const TypeInfo spapr_drc_logical_info = { .name = TYPE_SPAPR_DRC_LOGICAL, .parent = TYPE_SPAPR_DR_CONNECTOR, - .instance_size = sizeof(sPAPRDRConnector), .class_init = spapr_drc_logical_class_init, .abstract = true, }; @@ -669,21 +707,18 @@ static const TypeInfo spapr_drc_logical_info = { static const TypeInfo spapr_drc_cpu_info = { .name = TYPE_SPAPR_DRC_CPU, .parent = TYPE_SPAPR_DRC_LOGICAL, - .instance_size = sizeof(sPAPRDRConnector), .class_init = spapr_drc_cpu_class_init, }; static const TypeInfo spapr_drc_pci_info = { .name = TYPE_SPAPR_DRC_PCI, .parent = TYPE_SPAPR_DRC_PHYSICAL, - .instance_size = sizeof(sPAPRDRConnector), .class_init = spapr_drc_pci_class_init, }; static const TypeInfo spapr_drc_lmb_info = { .name = TYPE_SPAPR_DRC_LMB, .parent = TYPE_SPAPR_DRC_LOGICAL, - .instance_size = sizeof(sPAPRDRConnector), .class_init = spapr_drc_lmb_class_init, }; @@ -896,12 +931,18 @@ static uint32_t rtas_set_dr_indicator(uint32_t idx, uint32_t state) { sPAPRDRConnector *drc = spapr_drc_by_index(idx); - if (!drc) { - return RTAS_OUT_PARAM_ERROR; + if (!drc || !object_dynamic_cast(OBJECT(drc), TYPE_SPAPR_DRC_PHYSICAL)) { + return RTAS_OUT_NO_SUCH_INDICATOR; + } + if ((state != SPAPR_DR_INDICATOR_INACTIVE) + && (state != SPAPR_DR_INDICATOR_ACTIVE) + && (state != SPAPR_DR_INDICATOR_IDENTIFY) + && (state != SPAPR_DR_INDICATOR_ACTION)) { + return RTAS_OUT_PARAM_ERROR; /* bad state parameter */ } trace_spapr_drc_set_dr_indicator(idx, state); - drc->dr_indicator = state; + SPAPR_DRC_PHYSICAL(drc)->dr_indicator = state; return RTAS_OUT_SUCCESS; } @@ -1011,7 +1052,7 @@ static void rtas_ibm_configure_connector(PowerPCCPU *cpu, uint64_t wa_offset; uint32_t drc_index; sPAPRDRConnector *drc; - sPAPRConfigureConnectorState *ccs; + sPAPRDRConnectorClass *drck; sPAPRDRCCResponse resp = SPAPR_DR_CC_RESPONSE_CONTINUE; int rc; @@ -1030,18 +1071,16 @@ static void rtas_ibm_configure_connector(PowerPCCPU *cpu, goto out; } - if (!drc->fdt) { - trace_spapr_rtas_ibm_configure_connector_missing_fdt(drc_index); + if ((drc->state != SPAPR_DRC_STATE_LOGICAL_UNISOLATE) + && (drc->state != SPAPR_DRC_STATE_PHYSICAL_UNISOLATE)) { + /* Need to unisolate the device before configuring */ rc = SPAPR_DR_CC_RESPONSE_NOT_CONFIGURABLE; goto out; } - ccs = drc->ccs; - if (!ccs) { - ccs = g_new0(sPAPRConfigureConnectorState, 1); - ccs->fdt_offset = drc->fdt_start_offset; - drc->ccs = ccs; - } + g_assert(drc->fdt); + + drck = SPAPR_DR_CONNECTOR_GET_CLASS(drc); do { uint32_t tag; @@ -1049,12 +1088,12 @@ static void rtas_ibm_configure_connector(PowerPCCPU *cpu, const struct fdt_property *prop; int fdt_offset_next, prop_len; - tag = fdt_next_tag(drc->fdt, ccs->fdt_offset, &fdt_offset_next); + tag = fdt_next_tag(drc->fdt, drc->ccs_offset, &fdt_offset_next); switch (tag) { case FDT_BEGIN_NODE: - ccs->fdt_depth++; - name = fdt_get_name(drc->fdt, ccs->fdt_offset, NULL); + drc->ccs_depth++; + name = fdt_get_name(drc->fdt, drc->ccs_offset, NULL); /* provide the name of the next OF node */ wa_offset = CC_VAL_DATA_OFFSET; @@ -1063,30 +1102,22 @@ static void rtas_ibm_configure_connector(PowerPCCPU *cpu, resp = SPAPR_DR_CC_RESPONSE_NEXT_CHILD; break; case FDT_END_NODE: - ccs->fdt_depth--; - if (ccs->fdt_depth == 0) { - sPAPRDRIsolationState state = drc->isolation_state; + drc->ccs_depth--; + if (drc->ccs_depth == 0) { uint32_t drc_index = spapr_drc_index(drc); - /* done sending the device tree, don't need to track - * the state anymore - */ + + /* done sending the device tree, move to configured state */ trace_spapr_drc_set_configured(drc_index); - if (state == SPAPR_DR_ISOLATION_STATE_UNISOLATED) { - drc->configured = true; - } else { - /* guest should be not configuring an isolated device */ - trace_spapr_drc_set_configured_skipping(drc_index); - } - g_free(ccs); - drc->ccs = NULL; - ccs = NULL; + drc->state = drck->ready_state; + drc->ccs_offset = -1; + drc->ccs_depth = -1; resp = SPAPR_DR_CC_RESPONSE_SUCCESS; } else { resp = SPAPR_DR_CC_RESPONSE_PREV_PARENT; } break; case FDT_PROP: - prop = fdt_get_property_by_offset(drc->fdt, ccs->fdt_offset, + prop = fdt_get_property_by_offset(drc->fdt, drc->ccs_offset, &prop_len); name = fdt_string(drc->fdt, fdt32_to_cpu(prop->nameoff)); @@ -1111,8 +1142,8 @@ static void rtas_ibm_configure_connector(PowerPCCPU *cpu, /* keep seeking for an actionable tag */ break; } - if (ccs) { - ccs->fdt_offset = fdt_offset_next; + if (drc->ccs_offset >= 0) { + drc->ccs_offset = fdt_offset_next; } } while (resp == SPAPR_DR_CC_RESPONSE_CONTINUE); diff --git a/hw/ppc/spapr_events.c b/hw/ppc/spapr_events.c index 587a3da..f952b78 100644 --- a/hw/ppc/spapr_events.c +++ b/hw/ppc/spapr_events.c @@ -42,8 +42,6 @@ #include "hw/ppc/spapr_ovec.h" #include <libfdt.h> -struct rtas_error_log { - uint32_t summary; #define RTAS_LOG_VERSION_MASK 0xff000000 #define RTAS_LOG_VERSION_6 0x06000000 #define RTAS_LOG_SEVERITY_MASK 0x00e00000 @@ -85,6 +83,9 @@ struct rtas_error_log { #define RTAS_LOG_TYPE_ECC_CORR 0x0000000a #define RTAS_LOG_TYPE_EPOW 0x00000040 #define RTAS_LOG_TYPE_HOTPLUG 0x000000e5 + +struct rtas_error_log { + uint32_t summary; uint32_t extended_length; } QEMU_PACKED; @@ -166,8 +167,7 @@ struct rtas_event_log_v6_epow { uint64_t reason_code; } QEMU_PACKED; -struct epow_log_full { - struct rtas_error_log hdr; +struct epow_extended_log { struct rtas_event_log_v6 v6hdr; struct rtas_event_log_v6_maina maina; struct rtas_event_log_v6_mainb mainb; @@ -205,8 +205,7 @@ struct rtas_event_log_v6_hp { union drc_identifier drc_id; } QEMU_PACKED; -struct hp_log_full { - struct rtas_error_log hdr; +struct hp_extended_log { struct rtas_event_log_v6 v6hdr; struct rtas_event_log_v6_maina maina; struct rtas_event_log_v6_mainb mainb; @@ -341,25 +340,26 @@ static int rtas_event_log_to_irq(sPAPRMachineState *spapr, int log_type) return source->irq; } -static void rtas_event_log_queue(int log_type, void *data) +static uint32_t spapr_event_log_entry_type(sPAPREventLogEntry *entry) { - sPAPRMachineState *spapr = SPAPR_MACHINE(qdev_get_machine()); - sPAPREventLogEntry *entry = g_new(sPAPREventLogEntry, 1); + return entry->summary & RTAS_LOG_TYPE_MASK; +} - g_assert(data); - entry->log_type = log_type; - entry->data = data; +static void rtas_event_log_queue(sPAPRMachineState *spapr, + sPAPREventLogEntry *entry) +{ QTAILQ_INSERT_TAIL(&spapr->pending_events, entry, next); } -static sPAPREventLogEntry *rtas_event_log_dequeue(uint32_t event_mask) +static sPAPREventLogEntry *rtas_event_log_dequeue(sPAPRMachineState *spapr, + uint32_t event_mask) { - sPAPRMachineState *spapr = SPAPR_MACHINE(qdev_get_machine()); sPAPREventLogEntry *entry = NULL; QTAILQ_FOREACH(entry, &spapr->pending_events, next) { const sPAPREventSource *source = - rtas_event_log_to_source(spapr, entry->log_type); + rtas_event_log_to_source(spapr, + spapr_event_log_entry_type(entry)); if (source->mask & event_mask) { break; @@ -380,7 +380,8 @@ static bool rtas_event_log_contains(uint32_t event_mask) QTAILQ_FOREACH(entry, &spapr->pending_events, next) { const sPAPREventSource *source = - rtas_event_log_to_source(spapr, entry->log_type); + rtas_event_log_to_source(spapr, + spapr_event_log_entry_type(entry)); if (source->mask & event_mask) { return true; @@ -428,27 +429,28 @@ static void spapr_init_maina(struct rtas_event_log_v6_maina *maina, static void spapr_powerdown_req(Notifier *n, void *opaque) { sPAPRMachineState *spapr = SPAPR_MACHINE(qdev_get_machine()); - struct rtas_error_log *hdr; + sPAPREventLogEntry *entry; struct rtas_event_log_v6 *v6hdr; struct rtas_event_log_v6_maina *maina; struct rtas_event_log_v6_mainb *mainb; struct rtas_event_log_v6_epow *epow; - struct epow_log_full *new_epow; + struct epow_extended_log *new_epow; + entry = g_new(sPAPREventLogEntry, 1); new_epow = g_malloc0(sizeof(*new_epow)); - hdr = &new_epow->hdr; + entry->extended_log = new_epow; + v6hdr = &new_epow->v6hdr; maina = &new_epow->maina; mainb = &new_epow->mainb; epow = &new_epow->epow; - hdr->summary = cpu_to_be32(RTAS_LOG_VERSION_6 - | RTAS_LOG_SEVERITY_EVENT - | RTAS_LOG_DISPOSITION_NOT_RECOVERED - | RTAS_LOG_OPTIONAL_PART_PRESENT - | RTAS_LOG_TYPE_EPOW); - hdr->extended_length = cpu_to_be32(sizeof(*new_epow) - - sizeof(new_epow->hdr)); + entry->summary = RTAS_LOG_VERSION_6 + | RTAS_LOG_SEVERITY_EVENT + | RTAS_LOG_DISPOSITION_NOT_RECOVERED + | RTAS_LOG_OPTIONAL_PART_PRESENT + | RTAS_LOG_TYPE_EPOW; + entry->extended_length = sizeof(*new_epow); spapr_init_v6hdr(v6hdr); spapr_init_maina(maina, 3 /* Main-A, Main-B and EPOW */); @@ -468,7 +470,7 @@ static void spapr_powerdown_req(Notifier *n, void *opaque) epow->event_modifier = RTAS_LOG_V6_EPOW_MODIFIER_NORMAL; epow->extended_modifier = RTAS_LOG_V6_EPOW_XMODIFIER_PARTITION_SPECIFIC; - rtas_event_log_queue(RTAS_LOG_TYPE_EPOW, new_epow); + rtas_event_log_queue(spapr, entry); qemu_irq_pulse(xics_get_qirq(XICS_FABRIC(spapr), rtas_event_log_to_irq(spapr, @@ -480,28 +482,29 @@ static void spapr_hotplug_req_event(uint8_t hp_id, uint8_t hp_action, union drc_identifier *drc_id) { sPAPRMachineState *spapr = SPAPR_MACHINE(qdev_get_machine()); - struct hp_log_full *new_hp; - struct rtas_error_log *hdr; + sPAPREventLogEntry *entry; + struct hp_extended_log *new_hp; struct rtas_event_log_v6 *v6hdr; struct rtas_event_log_v6_maina *maina; struct rtas_event_log_v6_mainb *mainb; struct rtas_event_log_v6_hp *hp; - new_hp = g_malloc0(sizeof(struct hp_log_full)); - hdr = &new_hp->hdr; + entry = g_new(sPAPREventLogEntry, 1); + new_hp = g_malloc0(sizeof(struct hp_extended_log)); + entry->extended_log = new_hp; + v6hdr = &new_hp->v6hdr; maina = &new_hp->maina; mainb = &new_hp->mainb; hp = &new_hp->hp; - hdr->summary = cpu_to_be32(RTAS_LOG_VERSION_6 - | RTAS_LOG_SEVERITY_EVENT - | RTAS_LOG_DISPOSITION_NOT_RECOVERED - | RTAS_LOG_OPTIONAL_PART_PRESENT - | RTAS_LOG_INITIATOR_HOTPLUG - | RTAS_LOG_TYPE_HOTPLUG); - hdr->extended_length = cpu_to_be32(sizeof(*new_hp) - - sizeof(new_hp->hdr)); + entry->summary = RTAS_LOG_VERSION_6 + | RTAS_LOG_SEVERITY_EVENT + | RTAS_LOG_DISPOSITION_NOT_RECOVERED + | RTAS_LOG_OPTIONAL_PART_PRESENT + | RTAS_LOG_INITIATOR_HOTPLUG + | RTAS_LOG_TYPE_HOTPLUG; + entry->extended_length = sizeof(*new_hp); spapr_init_v6hdr(v6hdr); spapr_init_maina(maina, 3 /* Main-A, Main-B, HP */); @@ -551,7 +554,7 @@ static void spapr_hotplug_req_event(uint8_t hp_id, uint8_t hp_action, cpu_to_be32(drc_id->count_indexed.index); } - rtas_event_log_queue(RTAS_LOG_TYPE_HOTPLUG, new_hp); + rtas_event_log_queue(spapr, entry); qemu_irq_pulse(xics_get_qirq(XICS_FABRIC(spapr), rtas_event_log_to_irq(spapr, @@ -628,7 +631,7 @@ static void check_exception(PowerPCCPU *cpu, sPAPRMachineState *spapr, uint32_t mask, buf, len, event_len; uint64_t xinfo; sPAPREventLogEntry *event; - struct rtas_error_log *hdr; + struct rtas_error_log header; int i; if ((nargs < 6) || (nargs > 7) || nret != 1) { @@ -644,21 +647,24 @@ static void check_exception(PowerPCCPU *cpu, sPAPRMachineState *spapr, xinfo |= (uint64_t)rtas_ld(args, 6) << 32; } - event = rtas_event_log_dequeue(mask); + event = rtas_event_log_dequeue(spapr, mask); if (!event) { goto out_no_events; } - hdr = event->data; - event_len = be32_to_cpu(hdr->extended_length) + sizeof(*hdr); + event_len = event->extended_length + sizeof(header); if (event_len < len) { len = event_len; } - cpu_physical_memory_write(buf, event->data, len); + header.summary = cpu_to_be32(event->summary); + header.extended_length = cpu_to_be32(event->extended_length); + cpu_physical_memory_write(buf, &header, sizeof(header)); + cpu_physical_memory_write(buf + sizeof(header), event->extended_log, + event->extended_length); rtas_st(rets, 0, RTAS_OUT_SUCCESS); - g_free(event->data); + g_free(event->extended_log); g_free(event); /* according to PAPR+, the IRQ must be left asserted, or re-asserted, if diff --git a/hw/ppc/spapr_hcall.c b/hw/ppc/spapr_hcall.c index 8624ce8..72ea5a8 100644 --- a/hw/ppc/spapr_hcall.c +++ b/hw/ppc/spapr_hcall.c @@ -3,6 +3,7 @@ #include "sysemu/hw_accel.h" #include "sysemu/sysemu.h" #include "qemu/log.h" +#include "qemu/error-report.h" #include "cpu.h" #include "exec/exec-all.h" #include "helper_regs.h" @@ -354,6 +355,401 @@ static target_ulong h_read(PowerPCCPU *cpu, sPAPRMachineState *spapr, return H_SUCCESS; } +struct sPAPRPendingHPT { + /* These fields are read-only after initialization */ + int shift; + QemuThread thread; + + /* These fields are protected by the BQL */ + bool complete; + + /* These fields are private to the preparation thread if + * !complete, otherwise protected by the BQL */ + int ret; + void *hpt; +}; + +static void free_pending_hpt(sPAPRPendingHPT *pending) +{ + if (pending->hpt) { + qemu_vfree(pending->hpt); + } + + g_free(pending); +} + +static void *hpt_prepare_thread(void *opaque) +{ + sPAPRPendingHPT *pending = opaque; + size_t size = 1ULL << pending->shift; + + pending->hpt = qemu_memalign(size, size); + if (pending->hpt) { + memset(pending->hpt, 0, size); + pending->ret = H_SUCCESS; + } else { + pending->ret = H_NO_MEM; + } + + qemu_mutex_lock_iothread(); + + if (SPAPR_MACHINE(qdev_get_machine())->pending_hpt == pending) { + /* Ready to go */ + pending->complete = true; + } else { + /* We've been cancelled, clean ourselves up */ + free_pending_hpt(pending); + } + + qemu_mutex_unlock_iothread(); + return NULL; +} + +/* Must be called with BQL held */ +static void cancel_hpt_prepare(sPAPRMachineState *spapr) +{ + sPAPRPendingHPT *pending = spapr->pending_hpt; + + /* Let the thread know it's cancelled */ + spapr->pending_hpt = NULL; + + if (!pending) { + /* Nothing to do */ + return; + } + + if (!pending->complete) { + /* thread will clean itself up */ + return; + } + + free_pending_hpt(pending); +} + +/* Convert a return code from the KVM ioctl()s implementing resize HPT + * into a PAPR hypercall return code */ +static target_ulong resize_hpt_convert_rc(int ret) +{ + if (ret >= 100000) { + return H_LONG_BUSY_ORDER_100_SEC; + } else if (ret >= 10000) { + return H_LONG_BUSY_ORDER_10_SEC; + } else if (ret >= 1000) { + return H_LONG_BUSY_ORDER_1_SEC; + } else if (ret >= 100) { + return H_LONG_BUSY_ORDER_100_MSEC; + } else if (ret >= 10) { + return H_LONG_BUSY_ORDER_10_MSEC; + } else if (ret > 0) { + return H_LONG_BUSY_ORDER_1_MSEC; + } + + switch (ret) { + case 0: + return H_SUCCESS; + case -EPERM: + return H_AUTHORITY; + case -EINVAL: + return H_PARAMETER; + case -ENXIO: + return H_CLOSED; + case -ENOSPC: + return H_PTEG_FULL; + case -EBUSY: + return H_BUSY; + case -ENOMEM: + return H_NO_MEM; + default: + return H_HARDWARE; + } +} + +static target_ulong h_resize_hpt_prepare(PowerPCCPU *cpu, + sPAPRMachineState *spapr, + target_ulong opcode, + target_ulong *args) +{ + target_ulong flags = args[0]; + int shift = args[1]; + sPAPRPendingHPT *pending = spapr->pending_hpt; + uint64_t current_ram_size = MACHINE(spapr)->ram_size; + int rc; + + if (spapr->resize_hpt == SPAPR_RESIZE_HPT_DISABLED) { + return H_AUTHORITY; + } + + if (!spapr->htab_shift) { + /* Radix guest, no HPT */ + return H_NOT_AVAILABLE; + } + + trace_spapr_h_resize_hpt_prepare(flags, shift); + + if (flags != 0) { + return H_PARAMETER; + } + + if (shift && ((shift < 18) || (shift > 46))) { + return H_PARAMETER; + } + + current_ram_size = pc_existing_dimms_capacity(&error_fatal); + + /* We only allow the guest to allocate an HPT one order above what + * we'd normally give them (to stop a small guest claiming a huge + * chunk of resources in the HPT */ + if (shift > (spapr_hpt_shift_for_ramsize(current_ram_size) + 1)) { + return H_RESOURCE; + } + + rc = kvmppc_resize_hpt_prepare(cpu, flags, shift); + if (rc != -ENOSYS) { + return resize_hpt_convert_rc(rc); + } + + if (pending) { + /* something already in progress */ + if (pending->shift == shift) { + /* and it's suitable */ + if (pending->complete) { + return pending->ret; + } else { + return H_LONG_BUSY_ORDER_100_MSEC; + } + } + + /* not suitable, cancel and replace */ + cancel_hpt_prepare(spapr); + } + + if (!shift) { + /* nothing to do */ + return H_SUCCESS; + } + + /* start new prepare */ + + pending = g_new0(sPAPRPendingHPT, 1); + pending->shift = shift; + pending->ret = H_HARDWARE; + + qemu_thread_create(&pending->thread, "sPAPR HPT prepare", + hpt_prepare_thread, pending, QEMU_THREAD_DETACHED); + + spapr->pending_hpt = pending; + + /* In theory we could estimate the time more accurately based on + * the new size, but there's not much point */ + return H_LONG_BUSY_ORDER_100_MSEC; +} + +static uint64_t new_hpte_load0(void *htab, uint64_t pteg, int slot) +{ + uint8_t *addr = htab; + + addr += pteg * HASH_PTEG_SIZE_64; + addr += slot * HASH_PTE_SIZE_64; + return ldq_p(addr); +} + +static void new_hpte_store(void *htab, uint64_t pteg, int slot, + uint64_t pte0, uint64_t pte1) +{ + uint8_t *addr = htab; + + addr += pteg * HASH_PTEG_SIZE_64; + addr += slot * HASH_PTE_SIZE_64; + + stq_p(addr, pte0); + stq_p(addr + HASH_PTE_SIZE_64 / 2, pte1); +} + +static int rehash_hpte(PowerPCCPU *cpu, + const ppc_hash_pte64_t *hptes, + void *old_hpt, uint64_t oldsize, + void *new_hpt, uint64_t newsize, + uint64_t pteg, int slot) +{ + uint64_t old_hash_mask = (oldsize >> 7) - 1; + uint64_t new_hash_mask = (newsize >> 7) - 1; + target_ulong pte0 = ppc_hash64_hpte0(cpu, hptes, slot); + target_ulong pte1; + uint64_t avpn; + unsigned base_pg_shift; + uint64_t hash, new_pteg, replace_pte0; + + if (!(pte0 & HPTE64_V_VALID) || !(pte0 & HPTE64_V_BOLTED)) { + return H_SUCCESS; + } + + pte1 = ppc_hash64_hpte1(cpu, hptes, slot); + + base_pg_shift = ppc_hash64_hpte_page_shift_noslb(cpu, pte0, pte1); + assert(base_pg_shift); /* H_ENTER shouldn't allow a bad encoding */ + avpn = HPTE64_V_AVPN_VAL(pte0) & ~(((1ULL << base_pg_shift) - 1) >> 23); + + if (pte0 & HPTE64_V_SECONDARY) { + pteg = ~pteg; + } + + if ((pte0 & HPTE64_V_SSIZE) == HPTE64_V_SSIZE_256M) { + uint64_t offset, vsid; + + /* We only have 28 - 23 bits of offset in avpn */ + offset = (avpn & 0x1f) << 23; + vsid = avpn >> 5; + /* We can find more bits from the pteg value */ + if (base_pg_shift < 23) { + offset |= ((vsid ^ pteg) & old_hash_mask) << base_pg_shift; + } + + hash = vsid ^ (offset >> base_pg_shift); + } else if ((pte0 & HPTE64_V_SSIZE) == HPTE64_V_SSIZE_1T) { + uint64_t offset, vsid; + + /* We only have 40 - 23 bits of seg_off in avpn */ + offset = (avpn & 0x1ffff) << 23; + vsid = avpn >> 17; + if (base_pg_shift < 23) { + offset |= ((vsid ^ (vsid << 25) ^ pteg) & old_hash_mask) + << base_pg_shift; + } + + hash = vsid ^ (vsid << 25) ^ (offset >> base_pg_shift); + } else { + error_report("rehash_pte: Bad segment size in HPTE"); + return H_HARDWARE; + } + + new_pteg = hash & new_hash_mask; + if (pte0 & HPTE64_V_SECONDARY) { + assert(~pteg == (hash & old_hash_mask)); + new_pteg = ~new_pteg; + } else { + assert(pteg == (hash & old_hash_mask)); + } + assert((oldsize != newsize) || (pteg == new_pteg)); + replace_pte0 = new_hpte_load0(new_hpt, new_pteg, slot); + /* + * Strictly speaking, we don't need all these tests, since we only + * ever rehash bolted HPTEs. We might in future handle non-bolted + * HPTEs, though so make the logic correct for those cases as + * well. + */ + if (replace_pte0 & HPTE64_V_VALID) { + assert(newsize < oldsize); + if (replace_pte0 & HPTE64_V_BOLTED) { + if (pte0 & HPTE64_V_BOLTED) { + /* Bolted collision, nothing we can do */ + return H_PTEG_FULL; + } else { + /* Discard this hpte */ + return H_SUCCESS; + } + } + } + + new_hpte_store(new_hpt, new_pteg, slot, pte0, pte1); + return H_SUCCESS; +} + +static int rehash_hpt(PowerPCCPU *cpu, + void *old_hpt, uint64_t oldsize, + void *new_hpt, uint64_t newsize) +{ + uint64_t n_ptegs = oldsize >> 7; + uint64_t pteg; + int slot; + int rc; + + for (pteg = 0; pteg < n_ptegs; pteg++) { + hwaddr ptex = pteg * HPTES_PER_GROUP; + const ppc_hash_pte64_t *hptes + = ppc_hash64_map_hptes(cpu, ptex, HPTES_PER_GROUP); + + if (!hptes) { + return H_HARDWARE; + } + + for (slot = 0; slot < HPTES_PER_GROUP; slot++) { + rc = rehash_hpte(cpu, hptes, old_hpt, oldsize, new_hpt, newsize, + pteg, slot); + if (rc != H_SUCCESS) { + ppc_hash64_unmap_hptes(cpu, hptes, ptex, HPTES_PER_GROUP); + return rc; + } + } + ppc_hash64_unmap_hptes(cpu, hptes, ptex, HPTES_PER_GROUP); + } + + return H_SUCCESS; +} + +static target_ulong h_resize_hpt_commit(PowerPCCPU *cpu, + sPAPRMachineState *spapr, + target_ulong opcode, + target_ulong *args) +{ + target_ulong flags = args[0]; + target_ulong shift = args[1]; + sPAPRPendingHPT *pending = spapr->pending_hpt; + int rc; + size_t newsize; + + if (spapr->resize_hpt == SPAPR_RESIZE_HPT_DISABLED) { + return H_AUTHORITY; + } + + trace_spapr_h_resize_hpt_commit(flags, shift); + + rc = kvmppc_resize_hpt_commit(cpu, flags, shift); + if (rc != -ENOSYS) { + return resize_hpt_convert_rc(rc); + } + + if (flags != 0) { + return H_PARAMETER; + } + + if (!pending || (pending->shift != shift)) { + /* no matching prepare */ + return H_CLOSED; + } + + if (!pending->complete) { + /* prepare has not completed */ + return H_BUSY; + } + + /* Shouldn't have got past PREPARE without an HPT */ + g_assert(spapr->htab_shift); + + newsize = 1ULL << pending->shift; + rc = rehash_hpt(cpu, spapr->htab, HTAB_SIZE(spapr), + pending->hpt, newsize); + if (rc == H_SUCCESS) { + qemu_vfree(spapr->htab); + spapr->htab = pending->hpt; + spapr->htab_shift = pending->shift; + + if (kvm_enabled()) { + /* For KVM PR, update the HPT pointer */ + target_ulong sdr1 = (target_ulong)(uintptr_t)spapr->htab + | (spapr->htab_shift - 18); + kvmppc_update_sdr1(sdr1); + } + + pending->hpt = NULL; /* so it's not free()d */ + } + + /* Clean up */ + spapr->pending_hpt = NULL; + free_pending_hpt(pending); + + return rc; +} + static target_ulong h_set_sprg0(PowerPCCPU *cpu, sPAPRMachineState *spapr, target_ulong opcode, target_ulong *args) { @@ -1133,6 +1529,45 @@ static target_ulong h_client_architecture_support(PowerPCCPU *cpu, guest_radix = spapr_ovec_test(ov5_guest, OV5_MMU_RADIX_300); spapr_ovec_clear(ov5_guest, OV5_MMU_RADIX_300); + /* + * HPT resizing is a bit of a special case, because when enabled + * we assume an HPT guest will support it until it says it + * doesn't, instead of assuming it won't support it until it says + * it does. Strictly speaking that approach could break for + * guests which don't make a CAS call, but those are so old we + * don't care about them. Without that assumption we'd have to + * make at least a temporary allocation of an HPT sized for max + * memory, which could be impossibly difficult under KVM HV if + * maxram is large. + */ + if (!guest_radix && !spapr_ovec_test(ov5_guest, OV5_HPT_RESIZE)) { + int maxshift = spapr_hpt_shift_for_ramsize(MACHINE(spapr)->maxram_size); + + if (spapr->resize_hpt == SPAPR_RESIZE_HPT_REQUIRED) { + error_report( + "h_client_architecture_support: Guest doesn't support HPT resizing, but resize-hpt=required"); + exit(1); + } + + if (spapr->htab_shift < maxshift) { + CPUState *cs; + + /* Guest doesn't know about HPT resizing, so we + * pre-emptively resize for the maximum permitted RAM. At + * the point this is called, nothing should have been + * entered into the existing HPT */ + spapr_reallocate_hpt(spapr, maxshift, &error_fatal); + CPU_FOREACH(cs) { + if (kvm_enabled()) { + /* For KVM PR, update the HPT pointer */ + target_ulong sdr1 = (target_ulong)(uintptr_t)spapr->htab + | (spapr->htab_shift - 18); + kvmppc_update_sdr1(sdr1); + } + } + } + } + /* NOTE: there are actually a number of ov5 bits where input from the * guest is always zero, and the platform/QEMU enables them independently * of guest input. To model these properly we'd want some sort of mask, @@ -1246,6 +1681,10 @@ static void hypercall_register_types(void) /* hcall-bulk */ spapr_register_hypercall(H_BULK_REMOVE, h_bulk_remove); + /* hcall-hpt-resize */ + spapr_register_hypercall(H_RESIZE_HPT_PREPARE, h_resize_hpt_prepare); + spapr_register_hypercall(H_RESIZE_HPT_COMMIT, h_resize_hpt_commit); + /* hcall-splpar */ spapr_register_hypercall(H_REGISTER_VPA, h_register_vpa); spapr_register_hypercall(H_CEDE, h_cede); diff --git a/hw/ppc/spapr_pci.c b/hw/ppc/spapr_pci.c index a52dcf8..6ecdf29 100644 --- a/hw/ppc/spapr_pci.c +++ b/hw/ppc/spapr_pci.c @@ -1443,7 +1443,9 @@ static void spapr_pci_plug(HotplugHandler *plug_handler, /* If this is function 0, signal hotplug for all the device functions. * Otherwise defer sending the hotplug event. */ - if (plugged_dev->hotplugged && PCI_FUNC(pdev->devfn) == 0) { + if (!spapr_drc_hotplugged(plugged_dev)) { + spapr_drc_reset(drc); + } else if (PCI_FUNC(pdev->devfn) == 0) { int i; for (i = 0; i < 8; i++) { @@ -1474,9 +1476,7 @@ static void spapr_pci_unplug_request(HotplugHandler *plug_handler, { sPAPRPHBState *phb = SPAPR_PCI_HOST_BRIDGE(DEVICE(plug_handler)); PCIDevice *pdev = PCI_DEVICE(plugged_dev); - sPAPRDRConnectorClass *drck; sPAPRDRConnector *drc = spapr_phb_get_pci_drc(phb, pdev); - Error *local_err = NULL; if (!phb->dr_enabled) { error_setg(errp, QERR_BUS_NO_HOTPLUG, @@ -1487,8 +1487,7 @@ static void spapr_pci_unplug_request(HotplugHandler *plug_handler, g_assert(drc); g_assert(drc->dev == plugged_dev); - drck = SPAPR_DR_CONNECTOR_GET_CLASS(drc); - if (!drck->release_pending(drc)) { + if (!spapr_drc_unplug_requested(drc)) { PCIBus *bus = PCI_BUS(qdev_get_parent_bus(DEVICE(pdev))); uint32_t slotnr = PCI_SLOT(pdev->devfn); sPAPRDRConnector *func_drc; @@ -1504,7 +1503,7 @@ static void spapr_pci_unplug_request(HotplugHandler *plug_handler, func_drck = SPAPR_DR_CONNECTOR_GET_CLASS(func_drc); state = func_drck->dr_entity_sense(func_drc); if (state == SPAPR_DR_ENTITY_SENSE_PRESENT - && !func_drck->release_pending(func_drc)) { + && !spapr_drc_unplug_requested(func_drc)) { error_setg(errp, "PCI: slot %d, function %d still present. " "Must unplug all non-0 functions first.", @@ -1514,11 +1513,7 @@ static void spapr_pci_unplug_request(HotplugHandler *plug_handler, } } - spapr_drc_detach(drc, DEVICE(pdev), &local_err); - if (local_err) { - error_propagate(errp, local_err); - return; - } + spapr_drc_detach(drc); /* if this isn't func 0, defer unplug event. otherwise signal removal * for all present functions diff --git a/hw/ppc/trace-events b/hw/ppc/trace-events index 3e8e3cf..0f7d9be 100644 --- a/hw/ppc/trace-events +++ b/hw/ppc/trace-events @@ -16,6 +16,8 @@ spapr_cas_continue(unsigned long n) "Copy changes to the guest: %ld bytes" # hw/ppc/spapr_hcall.c spapr_cas_pvr_try(uint32_t pvr) "%x" spapr_cas_pvr(uint32_t cur_pvr, bool explicit_match, uint32_t new_pvr) "current=%x, explicit_match=%u, new=%x" +spapr_h_resize_hpt_prepare(uint64_t flags, uint64_t shift) "flags=0x%"PRIx64", shift=%"PRIu64 +spapr_h_resize_hpt_commit(uint64_t flags, uint64_t shift) "flags=0x%"PRIx64", shift=%"PRIu64 # hw/ppc/spapr_iommu.c spapr_iommu_put(uint64_t liobn, uint64_t ioba, uint64_t tce, uint64_t ret) "liobn=%"PRIx64" ioba=0x%"PRIx64" tce=0x%"PRIx64" ret=%"PRId64 @@ -46,8 +48,7 @@ spapr_drc_set_configured(uint32_t index) "drc: 0x%"PRIx32 spapr_drc_set_configured_skipping(uint32_t index) "drc: 0x%"PRIx32", isolated device" spapr_drc_attach(uint32_t index) "drc: 0x%"PRIx32 spapr_drc_detach(uint32_t index) "drc: 0x%"PRIx32 -spapr_drc_awaiting_isolated(uint32_t index) "drc: 0x%"PRIx32 -spapr_drc_awaiting_unusable(uint32_t index) "drc: 0x%"PRIx32 +spapr_drc_awaiting_quiesce(uint32_t index) "drc: 0x%"PRIx32 spapr_drc_awaiting_allocation(uint32_t index) "drc: 0x%"PRIx32 spapr_drc_reset(uint32_t index) "drc: 0x%"PRIx32 spapr_drc_realize(uint32_t index) "drc: 0x%"PRIx32 diff --git a/include/hw/ppc/pnv_psi.h b/include/hw/ppc/pnv_psi.h index 11d83e4..f6af5ea 100644 --- a/include/hw/ppc/pnv_psi.h +++ b/include/hw/ppc/pnv_psi.h @@ -28,8 +28,6 @@ #define PSIHB_XSCOM_MAX 0x20 -typedef struct XICSState XICSState; - typedef struct PnvPsi { SysBusDevice parent; diff --git a/include/hw/ppc/spapr.h b/include/hw/ppc/spapr.h index 5f708ee..2a303a7 100644 --- a/include/hw/ppc/spapr.h +++ b/include/hw/ppc/spapr.h @@ -13,6 +13,7 @@ struct sPAPRPHBState; struct sPAPRNVRAM; typedef struct sPAPREventLogEntry sPAPREventLogEntry; typedef struct sPAPREventSource sPAPREventSource; +typedef struct sPAPRPendingHPT sPAPRPendingHPT; #define HPTE64_V_HPTE_DIRTY 0x0000000000000040ULL #define SPAPR_ENTRY_POINT 0x100 @@ -42,6 +43,13 @@ typedef struct sPAPRMachineClass sPAPRMachineClass; #define SPAPR_MACHINE_CLASS(klass) \ OBJECT_CLASS_CHECK(sPAPRMachineClass, klass, TYPE_SPAPR_MACHINE) +typedef enum { + SPAPR_RESIZE_HPT_DEFAULT = 0, + SPAPR_RESIZE_HPT_DISABLED, + SPAPR_RESIZE_HPT_ENABLED, + SPAPR_RESIZE_HPT_REQUIRED, +} sPAPRResizeHPT; + /** * sPAPRMachineClass: */ @@ -58,6 +66,7 @@ struct sPAPRMachineClass { uint64_t *buid, hwaddr *pio, hwaddr *mmio32, hwaddr *mmio64, unsigned n_dma, uint32_t *liobns, Error **errp); + sPAPRResizeHPT resize_hpt_default; }; /** @@ -73,9 +82,12 @@ struct sPAPRMachineState { ICSState *ics; sPAPRRTCState rtc; + sPAPRResizeHPT resize_hpt; void *htab; uint32_t htab_shift; uint64_t patb_entry; /* Process tbl registed in H_REGISTER_PROCESS_TABLE */ + sPAPRPendingHPT *pending_hpt; /* in-progress resize */ + hwaddr rma_size; int vrma_adjust; ssize_t rtas_size; @@ -367,6 +379,8 @@ struct sPAPRMachineState { #define H_XIRR_X 0x2FC #define H_RANDOM 0x300 #define H_SET_MODE 0x31C +#define H_RESIZE_HPT_PREPARE 0x36C +#define H_RESIZE_HPT_COMMIT 0x370 #define H_CLEAN_SLB 0x374 #define H_INVALIDATE_PID 0x378 #define H_REGISTER_PROC_TBL 0x37C @@ -607,8 +621,9 @@ struct sPAPRTCETable { sPAPRTCETable *spapr_tce_find_by_liobn(target_ulong liobn); struct sPAPREventLogEntry { - int log_type; - void *data; + uint32_t summary; + uint32_t extended_length; + void *extended_log; QTAILQ_ENTRY(sPAPREventLogEntry) next; }; @@ -644,6 +659,9 @@ void spapr_hotplug_req_add_by_count_indexed(sPAPRDRConnectorType drc_type, void spapr_hotplug_req_remove_by_count_indexed(sPAPRDRConnectorType drc_type, uint32_t count, uint32_t index); void spapr_cpu_parse_features(sPAPRMachineState *spapr); +int spapr_hpt_shift_for_ramsize(uint64_t ramsize); +void spapr_reallocate_hpt(sPAPRMachineState *spapr, int shift, + Error **errp); /* CPU and LMB DRC release callbacks. */ void spapr_core_release(DeviceState *dev); @@ -684,4 +702,6 @@ int spapr_rng_populate_dt(void *fdt); void spapr_do_system_reset_on_cpu(CPUState *cs, run_on_cpu_data arg); +#define HTAB_SIZE(spapr) (1ULL << ((spapr)->htab_shift)) + #endif /* HW_SPAPR_H */ diff --git a/include/hw/ppc/spapr_drc.h b/include/hw/ppc/spapr_drc.h index d15e9eb..a7958d0 100644 --- a/include/hw/ppc/spapr_drc.h +++ b/include/hw/ppc/spapr_drc.h @@ -15,6 +15,7 @@ #include <libfdt.h> #include "qom/object.h" +#include "sysemu/sysemu.h" #include "hw/qdev.h" #define TYPE_SPAPR_DR_CONNECTOR "spapr-dr-connector" @@ -32,7 +33,7 @@ #define SPAPR_DRC_PHYSICAL_CLASS(klass) \ OBJECT_CLASS_CHECK(sPAPRDRConnectorClass, klass, \ TYPE_SPAPR_DRC_PHYSICAL) -#define SPAPR_DRC_PHYSICAL(obj) OBJECT_CHECK(sPAPRDRConnector, (obj), \ +#define SPAPR_DRC_PHYSICAL(obj) OBJECT_CHECK(sPAPRDRCPhysical, (obj), \ TYPE_SPAPR_DRC_PHYSICAL) #define TYPE_SPAPR_DRC_LOGICAL "spapr-drc-logical" @@ -172,11 +173,23 @@ typedef enum { SPAPR_DR_CC_RESPONSE_NOT_CONFIGURABLE = -9003, } sPAPRDRCCResponse; -/* rtas-configure-connector state */ -typedef struct sPAPRConfigureConnectorState { - int fdt_offset; - int fdt_depth; -} sPAPRConfigureConnectorState; +typedef enum { + /* + * Values come from Fig. 12 in LoPAPR section 13.4 + * + * These are exposed in the migration stream, so don't change + * them. + */ + SPAPR_DRC_STATE_INVALID = 0, + SPAPR_DRC_STATE_LOGICAL_UNUSABLE = 1, + SPAPR_DRC_STATE_LOGICAL_AVAILABLE = 2, + SPAPR_DRC_STATE_LOGICAL_UNISOLATE = 3, + SPAPR_DRC_STATE_LOGICAL_CONFIGURED = 4, + SPAPR_DRC_STATE_PHYSICAL_AVAILABLE = 5, + SPAPR_DRC_STATE_PHYSICAL_POWERON = 6, + SPAPR_DRC_STATE_PHYSICAL_UNISOLATE = 7, + SPAPR_DRC_STATE_PHYSICAL_CONFIGURED = 8, +} sPAPRDRCState; typedef struct sPAPRDRConnector { /*< private >*/ @@ -185,29 +198,25 @@ typedef struct sPAPRDRConnector { uint32_t id; Object *owner; - /* DR-indicator */ - uint32_t dr_indicator; + uint32_t state; - /* sensor/indicator states */ - uint32_t isolation_state; - uint32_t allocation_state; - - /* configure-connector state */ - void *fdt; - int fdt_start_offset; - bool configured; - sPAPRConfigureConnectorState *ccs; - - bool awaiting_release; - bool awaiting_allocation; + /* RTAS ibm,configure-connector state */ + /* (only valid in UNISOLATE state) */ + int ccs_offset; + int ccs_depth; /* device pointer, via link property */ DeviceState *dev; + bool unplug_requested; + void *fdt; + int fdt_start_offset; } sPAPRDRConnector; typedef struct sPAPRDRConnectorClass { /*< private >*/ DeviceClass parent; + sPAPRDRCState empty_state; + sPAPRDRCState ready_state; /*< public >*/ sPAPRDRConnectorTypeShift typeshift; @@ -218,11 +227,23 @@ typedef struct sPAPRDRConnectorClass { uint32_t (*isolate)(sPAPRDRConnector *drc); uint32_t (*unisolate)(sPAPRDRConnector *drc); void (*release)(DeviceState *dev); - - /* QEMU interfaces for managing hotplug operations */ - bool (*release_pending)(sPAPRDRConnector *drc); } sPAPRDRConnectorClass; +typedef struct sPAPRDRCPhysical { + /*< private >*/ + sPAPRDRConnector parent; + + /* DR-indicator */ + uint32_t dr_indicator; +} sPAPRDRCPhysical; + +static inline bool spapr_drc_hotplugged(DeviceState *dev) +{ + return dev->hotplugged && !runstate_check(RUN_STATE_INMIGRATE); +} + +void spapr_drc_reset(sPAPRDRConnector *drc); + uint32_t spapr_drc_index(sPAPRDRConnector *drc); sPAPRDRConnectorType spapr_drc_type(sPAPRDRConnector *drc); @@ -235,6 +256,11 @@ int spapr_drc_populate_dt(void *fdt, int fdt_offset, Object *owner, void spapr_drc_attach(sPAPRDRConnector *drc, DeviceState *d, void *fdt, int fdt_start_offset, Error **errp); -void spapr_drc_detach(sPAPRDRConnector *drc, DeviceState *d, Error **errp); +void spapr_drc_detach(sPAPRDRConnector *drc); + +static inline bool spapr_drc_unplug_requested(sPAPRDRConnector *drc) +{ + return drc->unplug_requested; +} #endif /* HW_SPAPR_DRC_H */ diff --git a/include/hw/ppc/spapr_ovec.h b/include/hw/ppc/spapr_ovec.h index 0b464e2..9edfa5f 100644 --- a/include/hw/ppc/spapr_ovec.h +++ b/include/hw/ppc/spapr_ovec.h @@ -50,6 +50,7 @@ typedef struct sPAPROptionVector sPAPROptionVector; #define OV5_DRCONF_MEMORY OV_BIT(2, 2) #define OV5_FORM1_AFFINITY OV_BIT(5, 0) #define OV5_HP_EVT OV_BIT(6, 5) +#define OV5_HPT_RESIZE OV_BIT(6, 7) #define OV5_XIVE_EXPLOIT OV_BIT(23, 7) /* ISA 3.00 MMU features: */ diff --git a/target/ppc/kvm.c b/target/ppc/kvm.c index f7a7ea5..8571379 100644 --- a/target/ppc/kvm.c +++ b/target/ppc/kvm.c @@ -22,6 +22,7 @@ #include <linux/kvm.h> #include "qemu-common.h" +#include "qapi/error.h" #include "qemu/error-report.h" #include "cpu.h" #include "cpu-models.h" @@ -88,6 +89,7 @@ static int cap_fixup_hcalls; static int cap_htm; /* Hardware transactional memory support */ static int cap_mmu_radix; static int cap_mmu_hash_v3; +static int cap_resize_hpt; static uint32_t debug_inst_opcode; @@ -144,6 +146,7 @@ int kvm_arch_init(MachineState *ms, KVMState *s) cap_htm = kvm_vm_check_extension(s, KVM_CAP_PPC_HTM); cap_mmu_radix = kvm_vm_check_extension(s, KVM_CAP_PPC_MMU_RADIX); cap_mmu_hash_v3 = kvm_vm_check_extension(s, KVM_CAP_PPC_MMU_HASH_V3); + cap_resize_hpt = kvm_vm_check_extension(s, KVM_CAP_SPAPR_RESIZE_HPT); if (!cap_interrupt_level) { fprintf(stderr, "KVM: Couldn't find level irq capability. Expect the " @@ -2709,3 +2712,76 @@ int kvmppc_enable_hwrng(void) return kvmppc_enable_hcall(kvm_state, H_RANDOM); } + +void kvmppc_check_papr_resize_hpt(Error **errp) +{ + if (!kvm_enabled()) { + return; /* No KVM, we're good */ + } + + if (cap_resize_hpt) { + return; /* Kernel has explicit support, we're good */ + } + + /* Otherwise fallback on looking for PR KVM */ + if (kvmppc_is_pr(kvm_state)) { + return; + } + + error_setg(errp, + "Hash page table resizing not available with this KVM version"); +} + +int kvmppc_resize_hpt_prepare(PowerPCCPU *cpu, target_ulong flags, int shift) +{ + CPUState *cs = CPU(cpu); + struct kvm_ppc_resize_hpt rhpt = { + .flags = flags, + .shift = shift, + }; + + if (!cap_resize_hpt) { + return -ENOSYS; + } + + return kvm_vm_ioctl(cs->kvm_state, KVM_PPC_RESIZE_HPT_PREPARE, &rhpt); +} + +int kvmppc_resize_hpt_commit(PowerPCCPU *cpu, target_ulong flags, int shift) +{ + CPUState *cs = CPU(cpu); + struct kvm_ppc_resize_hpt rhpt = { + .flags = flags, + .shift = shift, + }; + + if (!cap_resize_hpt) { + return -ENOSYS; + } + + return kvm_vm_ioctl(cs->kvm_state, KVM_PPC_RESIZE_HPT_COMMIT, &rhpt); +} + +static void kvmppc_pivot_hpt_cpu(CPUState *cs, run_on_cpu_data arg) +{ + target_ulong sdr1 = arg.target_ptr; + PowerPCCPU *cpu = POWERPC_CPU(cs); + CPUPPCState *env = &cpu->env; + + /* This is just for the benefit of PR KVM */ + cpu_synchronize_state(cs); + env->spr[SPR_SDR1] = sdr1; + if (kvmppc_put_books_sregs(cpu) < 0) { + error_report("Unable to update SDR1 in KVM"); + exit(1); + } +} + +void kvmppc_update_sdr1(target_ulong sdr1) +{ + CPUState *cs; + + CPU_FOREACH(cs) { + run_on_cpu(cs, kvmppc_pivot_hpt_cpu, RUN_ON_CPU_TARGET_PTR(sdr1)); + } +} diff --git a/target/ppc/kvm_ppc.h b/target/ppc/kvm_ppc.h index eab7c8f..6bc6fb3 100644 --- a/target/ppc/kvm_ppc.h +++ b/target/ppc/kvm_ppc.h @@ -63,6 +63,10 @@ bool kvmppc_has_cap_mmu_hash_v3(void); int kvmppc_enable_hwrng(void); int kvmppc_put_books_sregs(PowerPCCPU *cpu); PowerPCCPUClass *kvm_ppc_get_host_cpu_class(void); +void kvmppc_check_papr_resize_hpt(Error **errp); +int kvmppc_resize_hpt_prepare(PowerPCCPU *cpu, target_ulong flags, int shift); +int kvmppc_resize_hpt_commit(PowerPCCPU *cpu, target_ulong flags, int shift); +void kvmppc_update_sdr1(target_ulong sdr1); bool kvmppc_is_mem_backend_page_size_ok(const char *obj_path); @@ -297,6 +301,28 @@ static inline PowerPCCPUClass *kvm_ppc_get_host_cpu_class(void) return NULL; } +static inline void kvmppc_check_papr_resize_hpt(Error **errp) +{ + return; +} + +static inline int kvmppc_resize_hpt_prepare(PowerPCCPU *cpu, + target_ulong flags, int shift) +{ + return -ENOSYS; +} + +static inline int kvmppc_resize_hpt_commit(PowerPCCPU *cpu, + target_ulong flags, int shift) +{ + return -ENOSYS; +} + +static inline void kvmppc_update_sdr1(target_ulong sdr1) +{ + abort(); +} + #endif #ifndef CONFIG_KVM diff --git a/target/ppc/mmu-hash64.h b/target/ppc/mmu-hash64.h index 54f1e37..d297b97 100644 --- a/target/ppc/mmu-hash64.h +++ b/target/ppc/mmu-hash64.h @@ -63,11 +63,15 @@ void ppc_hash64_update_rmls(CPUPPCState *env); #define HASH_PTE_SIZE_64 16 #define HASH_PTEG_SIZE_64 (HASH_PTE_SIZE_64 * HPTES_PER_GROUP) +#define HPTE64_V_SSIZE SLB_VSID_B +#define HPTE64_V_SSIZE_256M SLB_VSID_B_256M +#define HPTE64_V_SSIZE_1T SLB_VSID_B_1T #define HPTE64_V_SSIZE_SHIFT 62 #define HPTE64_V_AVPN_SHIFT 7 #define HPTE64_V_AVPN 0x3fffffffffffff80ULL #define HPTE64_V_AVPN_VAL(x) (((x) & HPTE64_V_AVPN) >> HPTE64_V_AVPN_SHIFT) #define HPTE64_V_COMPARE(x, y) (!(((x) ^ (y)) & 0xffffffffffffff83ULL)) +#define HPTE64_V_BOLTED 0x0000000000000010ULL #define HPTE64_V_LARGE 0x0000000000000004ULL #define HPTE64_V_SECONDARY 0x0000000000000002ULL #define HPTE64_V_VALID 0x0000000000000001ULL diff --git a/target/ppc/translate_init.c b/target/ppc/translate_init.c index ae25faf..b325c2c 100644 --- a/target/ppc/translate_init.c +++ b/target/ppc/translate_init.c @@ -9011,8 +9011,16 @@ void cpu_ppc_set_papr(PowerPCCPU *cpu, PPCVirtualHypervisor *vhyp) /* By default we choose legacy mode and switch to new hash or radix * when a register process table hcall is made. So disable process * tables and guest translation shootdown by default + * + * Hot-plugged CPUs inherit from the guest radix setting under + * KVM but not under TCG. Update the default LPCR to keep new + * CPUs in sync when radix is enabled. */ - lpcr->default_value &= ~(LPCR_UPRT | LPCR_GTSE); + if (ppc64_radix_guest(cpu)) { + lpcr->default_value |= LPCR_UPRT | LPCR_GTSE; + } else { + lpcr->default_value &= ~(LPCR_UPRT | LPCR_GTSE); + } lpcr->default_value |= LPCR_PDEE | LPCR_HDEE | LPCR_EEE | LPCR_DEE | LPCR_OEE; break; |