diff options
author | Peter Maydell <peter.maydell@linaro.org> | 2015-07-07 21:16:06 +0100 |
---|---|---|
committer | Peter Maydell <peter.maydell@linaro.org> | 2015-07-07 21:16:06 +0100 |
commit | 7ce0f7dc87e50ebf58ac756ff6be17ec97d3ba4e (patch) | |
tree | c0f8795f087145f41c494460438ba874077b41c5 | |
parent | 1a632032d1ea09a09dc424ac2b10a4a11cd52ab9 (diff) | |
parent | 6319b1dad04e66f450fb3ac6c31d2bf3940068b8 (diff) | |
download | qemu-7ce0f7dc87e50ebf58ac756ff6be17ec97d3ba4e.zip qemu-7ce0f7dc87e50ebf58ac756ff6be17ec97d3ba4e.tar.gz qemu-7ce0f7dc87e50ebf58ac756ff6be17ec97d3ba4e.tar.bz2 |
Merge remote-tracking branch 'remotes/agraf/tags/signed-ppc-for-upstream' into staging
Patch queue for ppc - 2015-07-07
A few last minute PPC changes for 2.4:
- spapr: Update SLOF
- spapr: Fix a few bugs
- spapr: Preparation for hotplug
- spapr: Minor code cleanups
- linux-user: Add mftb handling
- kvm: Enable hugepage support with memory-backend-file
- mac99: Remove nonexistent interrupt pin (Mac OS 9 fix)
# gpg: Signature made Tue Jul 7 16:48:41 2015 BST using RSA key ID 03FEDC60
# gpg: Good signature from "Alexander Graf <agraf@suse.de>"
# gpg: aka "Alexander Graf <alex@csgraf.de>"
* remotes/agraf/tags/signed-ppc-for-upstream: (30 commits)
sPAPR: Clear stale MSIx table during EEH reset
sPAPR: Reenable EEH functionality on reboot
sPAPR: Don't enable EEH on emulated PCI devices
spapr-vty: Use TYPE_ definition instead of hardcoding
spapr_vty: lookup should only return valid VTY objects
spapr_pci: drop redundant args in spapr_[populate, create]_pci_child_dt
spapr_pci: populate ibm,loc-code
spapr_pci: enumerate and add PCI device tree
xics_kvm: Don't enable KVM_CAP_IRQ_XICS if already enabled
ppc: Update cpu_model in MachineState
spapr: Consolidate cpu init code into a routine
spapr: Reorganize CPU dt generation code
cpus: Add a macro to walk CPUs in reverse
spapr: Support ibm, lrdr-capacity device tree property
spapr: Consider max_cpus during xics initialization
Revert "hw/ppc/spapr_pci.c: Avoid functions not in glib 2.12 (g_hash_table_iter_*)"
spapr_iommu: translate sPAPRTCEAccess to IOMMUAccessFlags
spapr_iommu: drop erroneous check in h_put_tce_indirect()
spapr_pci: set device node unit address as hex
spapr_pci: encode class code including Prog IF register
...
Signed-off-by: Peter Maydell <peter.maydell@linaro.org>
31 files changed, 835 insertions, 410 deletions
diff --git a/docs/specs/ppc-spapr-hotplug.txt b/docs/specs/ppc-spapr-hotplug.txt index d35771c..46e0719 100644 --- a/docs/specs/ppc-spapr-hotplug.txt +++ b/docs/specs/ppc-spapr-hotplug.txt @@ -284,4 +284,22 @@ struct rtas_event_log_v6_hp { } drc; } QEMU_PACKED; +== ibm,lrdr-capacity == + +ibm,lrdr-capacity is a property in the /rtas device tree node that identifies +the dynamic reconfiguration capabilities of the guest. It consists of a triple +consisting of <phys>, <size> and <maxcpus>. + + <phys>, encoded in BE format represents the maximum address in bytes and + hence the maximum memory that can be allocated to the guest. + + <size>, encoded in BE format represents the size increments in which + memory can be hot-plugged to the guest. + + <maxcpus>, a BE-encoded integer, represents the maximum number of + processors that the guest can have. + +pseries guests use this property to note the maximum allowed CPUs for the +guest. + [1] http://thread.gmane.org/gmane.linux.ports.ppc.embedded/75350/focus=106867 diff --git a/hw/char/spapr_vty.c b/hw/char/spapr_vty.c index 4e464bd..36b328b 100644 --- a/hw/char/spapr_vty.c +++ b/hw/char/spapr_vty.c @@ -74,7 +74,7 @@ static void spapr_vty_realize(VIOsPAPRDevice *sdev, Error **errp) } /* Forward declaration */ -static target_ulong h_put_term_char(PowerPCCPU *cpu, sPAPREnvironment *spapr, +static target_ulong h_put_term_char(PowerPCCPU *cpu, sPAPRMachineState *spapr, target_ulong opcode, target_ulong *args) { target_ulong reg = args[0]; @@ -101,7 +101,7 @@ static target_ulong h_put_term_char(PowerPCCPU *cpu, sPAPREnvironment *spapr, return H_SUCCESS; } -static target_ulong h_get_term_char(PowerPCCPU *cpu, sPAPREnvironment *spapr, +static target_ulong h_get_term_char(PowerPCCPU *cpu, sPAPRMachineState *spapr, target_ulong opcode, target_ulong *args) { target_ulong reg = args[0]; @@ -193,7 +193,7 @@ VIOsPAPRDevice *spapr_vty_get_default(VIOsPAPRBus *bus) DeviceState *iter = kid->child; /* Only look at VTY devices */ - if (!object_dynamic_cast(OBJECT(iter), "spapr-vty")) { + if (!object_dynamic_cast(OBJECT(iter), TYPE_VIO_SPAPR_VTY_DEVICE)) { continue; } @@ -214,7 +214,7 @@ VIOsPAPRDevice *spapr_vty_get_default(VIOsPAPRBus *bus) return selected; } -VIOsPAPRDevice *vty_lookup(sPAPREnvironment *spapr, target_ulong reg) +VIOsPAPRDevice *vty_lookup(sPAPRMachineState *spapr, target_ulong reg) { VIOsPAPRDevice *sdev; @@ -228,6 +228,10 @@ VIOsPAPRDevice *vty_lookup(sPAPREnvironment *spapr, target_ulong reg) return spapr_vty_get_default(spapr->vio_bus); } + if (!object_dynamic_cast(OBJECT(sdev), TYPE_VIO_SPAPR_VTY_DEVICE)) { + return NULL; + } + return sdev; } diff --git a/hw/intc/xics.c b/hw/intc/xics.c index 0fd2a84..924b1ae 100644 --- a/hw/intc/xics.c +++ b/hw/intc/xics.c @@ -806,7 +806,7 @@ void xics_free(XICSState *icp, int irq, int num) * Guest interfaces */ -static target_ulong h_cppr(PowerPCCPU *cpu, sPAPREnvironment *spapr, +static target_ulong h_cppr(PowerPCCPU *cpu, sPAPRMachineState *spapr, target_ulong opcode, target_ulong *args) { CPUState *cs = CPU(cpu); @@ -816,7 +816,7 @@ static target_ulong h_cppr(PowerPCCPU *cpu, sPAPREnvironment *spapr, return H_SUCCESS; } -static target_ulong h_ipi(PowerPCCPU *cpu, sPAPREnvironment *spapr, +static target_ulong h_ipi(PowerPCCPU *cpu, sPAPRMachineState *spapr, target_ulong opcode, target_ulong *args) { target_ulong server = get_cpu_index_by_dt_id(args[0]); @@ -830,7 +830,7 @@ static target_ulong h_ipi(PowerPCCPU *cpu, sPAPREnvironment *spapr, return H_SUCCESS; } -static target_ulong h_xirr(PowerPCCPU *cpu, sPAPREnvironment *spapr, +static target_ulong h_xirr(PowerPCCPU *cpu, sPAPRMachineState *spapr, target_ulong opcode, target_ulong *args) { CPUState *cs = CPU(cpu); @@ -840,7 +840,7 @@ static target_ulong h_xirr(PowerPCCPU *cpu, sPAPREnvironment *spapr, return H_SUCCESS; } -static target_ulong h_xirr_x(PowerPCCPU *cpu, sPAPREnvironment *spapr, +static target_ulong h_xirr_x(PowerPCCPU *cpu, sPAPRMachineState *spapr, target_ulong opcode, target_ulong *args) { CPUState *cs = CPU(cpu); @@ -852,7 +852,7 @@ static target_ulong h_xirr_x(PowerPCCPU *cpu, sPAPREnvironment *spapr, return H_SUCCESS; } -static target_ulong h_eoi(PowerPCCPU *cpu, sPAPREnvironment *spapr, +static target_ulong h_eoi(PowerPCCPU *cpu, sPAPRMachineState *spapr, target_ulong opcode, target_ulong *args) { CPUState *cs = CPU(cpu); @@ -862,7 +862,7 @@ static target_ulong h_eoi(PowerPCCPU *cpu, sPAPREnvironment *spapr, return H_SUCCESS; } -static target_ulong h_ipoll(PowerPCCPU *cpu, sPAPREnvironment *spapr, +static target_ulong h_ipoll(PowerPCCPU *cpu, sPAPRMachineState *spapr, target_ulong opcode, target_ulong *args) { CPUState *cs = CPU(cpu); @@ -874,7 +874,7 @@ static target_ulong h_ipoll(PowerPCCPU *cpu, sPAPREnvironment *spapr, return H_SUCCESS; } -static void rtas_set_xive(PowerPCCPU *cpu, sPAPREnvironment *spapr, +static void rtas_set_xive(PowerPCCPU *cpu, sPAPRMachineState *spapr, uint32_t token, uint32_t nargs, target_ulong args, uint32_t nret, target_ulong rets) @@ -902,7 +902,7 @@ static void rtas_set_xive(PowerPCCPU *cpu, sPAPREnvironment *spapr, rtas_st(rets, 0, RTAS_OUT_SUCCESS); } -static void rtas_get_xive(PowerPCCPU *cpu, sPAPREnvironment *spapr, +static void rtas_get_xive(PowerPCCPU *cpu, sPAPRMachineState *spapr, uint32_t token, uint32_t nargs, target_ulong args, uint32_t nret, target_ulong rets) @@ -927,7 +927,7 @@ static void rtas_get_xive(PowerPCCPU *cpu, sPAPREnvironment *spapr, rtas_st(rets, 2, ics->irqs[nr - ics->offset].priority); } -static void rtas_int_off(PowerPCCPU *cpu, sPAPREnvironment *spapr, +static void rtas_int_off(PowerPCCPU *cpu, sPAPRMachineState *spapr, uint32_t token, uint32_t nargs, target_ulong args, uint32_t nret, target_ulong rets) @@ -953,7 +953,7 @@ static void rtas_int_off(PowerPCCPU *cpu, sPAPREnvironment *spapr, rtas_st(rets, 0, RTAS_OUT_SUCCESS); } -static void rtas_int_on(PowerPCCPU *cpu, sPAPREnvironment *spapr, +static void rtas_int_on(PowerPCCPU *cpu, sPAPRMachineState *spapr, uint32_t token, uint32_t nargs, target_ulong args, uint32_t nret, target_ulong rets) diff --git a/hw/intc/xics_kvm.c b/hw/intc/xics_kvm.c index c15453f..d58729c 100644 --- a/hw/intc/xics_kvm.c +++ b/hw/intc/xics_kvm.c @@ -331,6 +331,15 @@ static void xics_kvm_cpu_setup(XICSState *icp, PowerPCCPU *cpu) abort(); } + /* + * If we are reusing a parked vCPU fd corresponding to the CPU + * which was hot-removed earlier we don't have to renable + * KVM_CAP_IRQ_XICS capability again. + */ + if (ss->cap_irq_xics_enabled) { + return; + } + if (icpkvm->kernel_xics_fd != -1) { int ret; @@ -343,6 +352,7 @@ static void xics_kvm_cpu_setup(XICSState *icp, PowerPCCPU *cpu) kvm_arch_vcpu_id(cs), strerror(errno)); exit(1); } + ss->cap_irq_xics_enabled = true; } } @@ -368,7 +378,7 @@ static void xics_kvm_set_nr_servers(XICSState *icp, uint32_t nr_servers, } } -static void rtas_dummy(PowerPCCPU *cpu, sPAPREnvironment *spapr, +static void rtas_dummy(PowerPCCPU *cpu, sPAPRMachineState *spapr, uint32_t token, uint32_t nargs, target_ulong args, uint32_t nret, target_ulong rets) diff --git a/hw/misc/macio/macio.c b/hw/misc/macio/macio.c index e9037b0..e3c0242 100644 --- a/hw/misc/macio/macio.c +++ b/hw/misc/macio/macio.c @@ -132,8 +132,6 @@ static void macio_common_realize(PCIDevice *d, Error **errp) SysBusDevice *sysbus_dev; Error *err = NULL; - d->config[0x3d] = 0x01; // interrupt on pin 1 - object_property_set_bool(OBJECT(&s->cuda), true, "realized", &err); if (err) { error_propagate(errp, err); diff --git a/hw/net/spapr_llan.c b/hw/net/spapr_llan.c index 2dd5ec1..1ca5e9c 100644 --- a/hw/net/spapr_llan.c +++ b/hw/net/spapr_llan.c @@ -284,7 +284,7 @@ static int check_bd(VIOsPAPRVLANDevice *dev, vlan_bd_t bd, } static target_ulong h_register_logical_lan(PowerPCCPU *cpu, - sPAPREnvironment *spapr, + sPAPRMachineState *spapr, target_ulong opcode, target_ulong *args) { @@ -349,7 +349,8 @@ static target_ulong h_register_logical_lan(PowerPCCPU *cpu, } -static target_ulong h_free_logical_lan(PowerPCCPU *cpu, sPAPREnvironment *spapr, +static target_ulong h_free_logical_lan(PowerPCCPU *cpu, + sPAPRMachineState *spapr, target_ulong opcode, target_ulong *args) { target_ulong reg = args[0]; @@ -371,7 +372,7 @@ static target_ulong h_free_logical_lan(PowerPCCPU *cpu, sPAPREnvironment *spapr, } static target_ulong h_add_logical_lan_buffer(PowerPCCPU *cpu, - sPAPREnvironment *spapr, + sPAPRMachineState *spapr, target_ulong opcode, target_ulong *args) { @@ -421,7 +422,8 @@ static target_ulong h_add_logical_lan_buffer(PowerPCCPU *cpu, return H_SUCCESS; } -static target_ulong h_send_logical_lan(PowerPCCPU *cpu, sPAPREnvironment *spapr, +static target_ulong h_send_logical_lan(PowerPCCPU *cpu, + sPAPRMachineState *spapr, target_ulong opcode, target_ulong *args) { target_ulong reg = args[0]; @@ -490,7 +492,7 @@ static target_ulong h_send_logical_lan(PowerPCCPU *cpu, sPAPREnvironment *spapr, return H_SUCCESS; } -static target_ulong h_multicast_ctrl(PowerPCCPU *cpu, sPAPREnvironment *spapr, +static target_ulong h_multicast_ctrl(PowerPCCPU *cpu, sPAPRMachineState *spapr, target_ulong opcode, target_ulong *args) { target_ulong reg = args[0]; diff --git a/hw/nvram/spapr_nvram.c b/hw/nvram/spapr_nvram.c index 11332d1..fcaa77d 100644 --- a/hw/nvram/spapr_nvram.c +++ b/hw/nvram/spapr_nvram.c @@ -45,7 +45,7 @@ typedef struct sPAPRNVRAM { #define DEFAULT_NVRAM_SIZE 65536 #define MAX_NVRAM_SIZE 1048576 -static void rtas_nvram_fetch(PowerPCCPU *cpu, sPAPREnvironment *spapr, +static void rtas_nvram_fetch(PowerPCCPU *cpu, sPAPRMachineState *spapr, uint32_t token, uint32_t nargs, target_ulong args, uint32_t nret, target_ulong rets) @@ -86,7 +86,7 @@ static void rtas_nvram_fetch(PowerPCCPU *cpu, sPAPREnvironment *spapr, rtas_st(rets, 1, len); } -static void rtas_nvram_store(PowerPCCPU *cpu, sPAPREnvironment *spapr, +static void rtas_nvram_store(PowerPCCPU *cpu, sPAPRMachineState *spapr, uint32_t token, uint32_t nargs, target_ulong args, uint32_t nret, target_ulong rets) diff --git a/hw/ppc/mac_newworld.c b/hw/ppc/mac_newworld.c index 0f3e341..77d5c81 100644 --- a/hw/ppc/mac_newworld.c +++ b/hw/ppc/mac_newworld.c @@ -145,7 +145,6 @@ static void ppc_core99_reset(void *opaque) static void ppc_core99_init(MachineState *machine) { ram_addr_t ram_size = machine->ram_size; - const char *cpu_model = machine->cpu_model; const char *kernel_filename = machine->kernel_filename; const char *kernel_cmdline = machine->kernel_cmdline; const char *initrd_filename = machine->initrd_filename; @@ -182,14 +181,15 @@ static void ppc_core99_init(MachineState *machine) linux_boot = (kernel_filename != NULL); /* init CPUs */ - if (cpu_model == NULL) + if (machine->cpu_model == NULL) { #ifdef TARGET_PPC64 - cpu_model = "970fx"; + machine->cpu_model = "970fx"; #else - cpu_model = "G4"; + machine->cpu_model = "G4"; #endif + } for (i = 0; i < smp_cpus; i++) { - cpu = cpu_ppc_init(cpu_model); + cpu = cpu_ppc_init(machine->cpu_model); if (cpu == NULL) { fprintf(stderr, "Unable to find PowerPC CPU definition\n"); exit(1); diff --git a/hw/ppc/mac_oldworld.c b/hw/ppc/mac_oldworld.c index 99879dd..06fdbaf 100644 --- a/hw/ppc/mac_oldworld.c +++ b/hw/ppc/mac_oldworld.c @@ -75,7 +75,6 @@ static void ppc_heathrow_reset(void *opaque) static void ppc_heathrow_init(MachineState *machine) { ram_addr_t ram_size = machine->ram_size; - const char *cpu_model = machine->cpu_model; const char *kernel_filename = machine->kernel_filename; const char *kernel_cmdline = machine->kernel_cmdline; const char *initrd_filename = machine->initrd_filename; @@ -107,10 +106,10 @@ static void ppc_heathrow_init(MachineState *machine) linux_boot = (kernel_filename != NULL); /* init CPUs */ - if (cpu_model == NULL) - cpu_model = "G3"; + if (machine->cpu_model == NULL) + machine->cpu_model = "G3"; for (i = 0; i < smp_cpus; i++) { - cpu = cpu_ppc_init(cpu_model); + cpu = cpu_ppc_init(machine->cpu_model); if (cpu == NULL) { fprintf(stderr, "Unable to find PowerPC CPU definition\n"); exit(1); diff --git a/hw/ppc/ppc440_bamboo.c b/hw/ppc/ppc440_bamboo.c index 778970a..032fa80 100644 --- a/hw/ppc/ppc440_bamboo.c +++ b/hw/ppc/ppc440_bamboo.c @@ -159,7 +159,6 @@ static void main_cpu_reset(void *opaque) static void bamboo_init(MachineState *machine) { ram_addr_t ram_size = machine->ram_size; - const char *cpu_model = machine->cpu_model; const char *kernel_filename = machine->kernel_filename; const char *kernel_cmdline = machine->kernel_cmdline; const char *initrd_filename = machine->initrd_filename; @@ -184,10 +183,10 @@ static void bamboo_init(MachineState *machine) int i; /* Setup CPU. */ - if (cpu_model == NULL) { - cpu_model = "440EP"; + if (machine->cpu_model == NULL) { + machine->cpu_model = "440EP"; } - cpu = cpu_ppc_init(cpu_model); + cpu = cpu_ppc_init(machine->cpu_model); if (cpu == NULL) { fprintf(stderr, "Unable to initialize CPU!\n"); exit(1); diff --git a/hw/ppc/prep.c b/hw/ppc/prep.c index 998ee2d..45b5f62 100644 --- a/hw/ppc/prep.c +++ b/hw/ppc/prep.c @@ -506,7 +506,6 @@ static int PPC_NVRAM_set_params (Nvram *nvram, uint16_t NVRAM_size, static void ppc_prep_init(MachineState *machine) { ram_addr_t ram_size = machine->ram_size; - const char *cpu_model = machine->cpu_model; const char *kernel_filename = machine->kernel_filename; const char *kernel_cmdline = machine->kernel_cmdline; const char *initrd_filename = machine->initrd_filename; @@ -536,10 +535,10 @@ static void ppc_prep_init(MachineState *machine) linux_boot = (kernel_filename != NULL); /* init CPUs */ - if (cpu_model == NULL) - cpu_model = "602"; + if (machine->cpu_model == NULL) + machine->cpu_model = "602"; for (i = 0; i < smp_cpus; i++) { - cpu = cpu_ppc_init(cpu_model); + cpu = cpu_ppc_init(machine->cpu_model); if (cpu == NULL) { fprintf(stderr, "Unable to find PowerPC CPU definition\n"); exit(1); diff --git a/hw/ppc/spapr.c b/hw/ppc/spapr.c index bcf5f0f..a6f1947 100644 --- a/hw/ppc/spapr.c +++ b/hw/ppc/spapr.c @@ -91,25 +91,6 @@ #define HTAB_SIZE(spapr) (1ULL << ((spapr)->htab_shift)) -typedef struct sPAPRMachineState sPAPRMachineState; - -#define TYPE_SPAPR_MACHINE "spapr-machine" -#define SPAPR_MACHINE(obj) \ - OBJECT_CHECK(sPAPRMachineState, (obj), TYPE_SPAPR_MACHINE) - -/** - * sPAPRMachineState: - */ -struct sPAPRMachineState { - /*< private >*/ - MachineState parent_obj; - - /*< public >*/ - char *kvm_type; -}; - -sPAPREnvironment *spapr; - static XICSState *try_create_xics(const char *type, int nr_servers, int nr_irqs, Error **errp) { @@ -185,7 +166,28 @@ static int spapr_fixup_cpu_smt_dt(void *fdt, int offset, PowerPCCPU *cpu, return ret; } -static int spapr_fixup_cpu_dt(void *fdt, sPAPREnvironment *spapr) +static int spapr_fixup_cpu_numa_dt(void *fdt, int offset, CPUState *cs) +{ + int ret = 0; + PowerPCCPU *cpu = POWERPC_CPU(cs); + int index = ppc_get_vcpu_dt_id(cpu); + uint32_t associativity[] = {cpu_to_be32(0x5), + cpu_to_be32(0x0), + cpu_to_be32(0x0), + cpu_to_be32(0x0), + cpu_to_be32(cs->numa_node), + cpu_to_be32(index)}; + + /* Advertise NUMA via ibm,associativity */ + if (nb_numa_nodes > 1) { + ret = fdt_setprop(fdt, offset, "ibm,associativity", associativity, + sizeof(associativity)); + } + + return ret; +} + +static int spapr_fixup_cpu_dt(void *fdt, sPAPRMachineState *spapr) { int ret = 0, offset, cpus_offset; CPUState *cs; @@ -197,12 +199,6 @@ static int spapr_fixup_cpu_dt(void *fdt, sPAPREnvironment *spapr) PowerPCCPU *cpu = POWERPC_CPU(cs); DeviceClass *dc = DEVICE_GET_CLASS(cs); int index = ppc_get_vcpu_dt_id(cpu); - uint32_t associativity[] = {cpu_to_be32(0x5), - cpu_to_be32(0x0), - cpu_to_be32(0x0), - cpu_to_be32(0x0), - cpu_to_be32(cs->numa_node), - cpu_to_be32(index)}; if ((index % smt) != 0) { continue; @@ -226,20 +222,17 @@ static int spapr_fixup_cpu_dt(void *fdt, sPAPREnvironment *spapr) } } - if (nb_numa_nodes > 1) { - ret = fdt_setprop(fdt, offset, "ibm,associativity", associativity, - sizeof(associativity)); - if (ret < 0) { - return ret; - } - } - ret = fdt_setprop(fdt, offset, "ibm,pft-size", pft_size_prop, sizeof(pft_size_prop)); if (ret < 0) { return ret; } + ret = spapr_fixup_cpu_numa_dt(fdt, offset, cs); + if (ret < 0) { + return ret; + } + ret = spapr_fixup_cpu_smt_dt(fdt, offset, cpu, ppc_get_compat_smt_threads(cpu)); if (ret < 0) { @@ -285,15 +278,18 @@ static size_t create_page_sizes_prop(CPUPPCState *env, uint32_t *prop, static hwaddr spapr_node0_size(void) { + MachineState *machine = MACHINE(qdev_get_machine()); + if (nb_numa_nodes) { int i; for (i = 0; i < nb_numa_nodes; ++i) { if (numa_info[i].node_mem) { - return MIN(pow2floor(numa_info[i].node_mem), ram_size); + return MIN(pow2floor(numa_info[i].node_mem), + machine->ram_size); } } } - return ram_size; + return machine->ram_size; } #define _FDT(exp) \ @@ -319,18 +315,13 @@ static void *spapr_create_fdt_skel(hwaddr initrd_base, uint32_t epow_irq) { void *fdt; - CPUState *cs; uint32_t start_prop = cpu_to_be32(initrd_base); uint32_t end_prop = cpu_to_be32(initrd_base + initrd_size); GString *hypertas = g_string_sized_new(256); GString *qemu_hypertas = g_string_sized_new(256); uint32_t refpoints[] = {cpu_to_be32(0x4), cpu_to_be32(0x4)}; - uint32_t interrupt_server_ranges_prop[] = {0, cpu_to_be32(smp_cpus)}; - int smt = kvmppc_smt_threads(); + uint32_t interrupt_server_ranges_prop[] = {0, cpu_to_be32(max_cpus)}; unsigned char vec5[] = {0x0, 0x0, 0x0, 0x0, 0x0, 0x80}; - QemuOpts *opts = qemu_opts_find(qemu_find_opts("smp-opts"), NULL); - unsigned sockets = opts ? qemu_opt_get_number(opts, "sockets", 0) : 0; - uint32_t cpus_per_socket = sockets ? (smp_cpus / sockets) : 1; char *buf; add_str(hypertas, "hcall-pft"); @@ -416,107 +407,6 @@ static void *spapr_create_fdt_skel(hwaddr initrd_base, _FDT((fdt_end_node(fdt))); - /* cpus */ - _FDT((fdt_begin_node(fdt, "cpus"))); - - _FDT((fdt_property_cell(fdt, "#address-cells", 0x1))); - _FDT((fdt_property_cell(fdt, "#size-cells", 0x0))); - - CPU_FOREACH(cs) { - PowerPCCPU *cpu = POWERPC_CPU(cs); - CPUPPCState *env = &cpu->env; - DeviceClass *dc = DEVICE_GET_CLASS(cs); - PowerPCCPUClass *pcc = POWERPC_CPU_GET_CLASS(cs); - int index = ppc_get_vcpu_dt_id(cpu); - char *nodename; - uint32_t segs[] = {cpu_to_be32(28), cpu_to_be32(40), - 0xffffffff, 0xffffffff}; - uint32_t tbfreq = kvm_enabled() ? kvmppc_get_tbfreq() : TIMEBASE_FREQ; - uint32_t cpufreq = kvm_enabled() ? kvmppc_get_clockfreq() : 1000000000; - uint32_t page_sizes_prop[64]; - size_t page_sizes_prop_size; - - if ((index % smt) != 0) { - continue; - } - - nodename = g_strdup_printf("%s@%x", dc->fw_name, index); - - _FDT((fdt_begin_node(fdt, nodename))); - - g_free(nodename); - - _FDT((fdt_property_cell(fdt, "reg", index))); - _FDT((fdt_property_string(fdt, "device_type", "cpu"))); - - _FDT((fdt_property_cell(fdt, "cpu-version", env->spr[SPR_PVR]))); - _FDT((fdt_property_cell(fdt, "d-cache-block-size", - env->dcache_line_size))); - _FDT((fdt_property_cell(fdt, "d-cache-line-size", - env->dcache_line_size))); - _FDT((fdt_property_cell(fdt, "i-cache-block-size", - env->icache_line_size))); - _FDT((fdt_property_cell(fdt, "i-cache-line-size", - env->icache_line_size))); - - if (pcc->l1_dcache_size) { - _FDT((fdt_property_cell(fdt, "d-cache-size", pcc->l1_dcache_size))); - } else { - fprintf(stderr, "Warning: Unknown L1 dcache size for cpu\n"); - } - if (pcc->l1_icache_size) { - _FDT((fdt_property_cell(fdt, "i-cache-size", pcc->l1_icache_size))); - } else { - fprintf(stderr, "Warning: Unknown L1 icache size for cpu\n"); - } - - _FDT((fdt_property_cell(fdt, "timebase-frequency", tbfreq))); - _FDT((fdt_property_cell(fdt, "clock-frequency", cpufreq))); - _FDT((fdt_property_cell(fdt, "ibm,slb-size", env->slb_nr))); - _FDT((fdt_property_string(fdt, "status", "okay"))); - _FDT((fdt_property(fdt, "64-bit", NULL, 0))); - - if (env->spr_cb[SPR_PURR].oea_read) { - _FDT((fdt_property(fdt, "ibm,purr", NULL, 0))); - } - - if (env->mmu_model & POWERPC_MMU_1TSEG) { - _FDT((fdt_property(fdt, "ibm,processor-segment-sizes", - segs, sizeof(segs)))); - } - - /* Advertise VMX/VSX (vector extensions) if available - * 0 / no property == no vector extensions - * 1 == VMX / Altivec available - * 2 == VSX available */ - if (env->insns_flags & PPC_ALTIVEC) { - uint32_t vmx = (env->insns_flags2 & PPC2_VSX) ? 2 : 1; - - _FDT((fdt_property_cell(fdt, "ibm,vmx", vmx))); - } - - /* Advertise DFP (Decimal Floating Point) if available - * 0 / no property == no DFP - * 1 == DFP available */ - if (env->insns_flags2 & PPC2_DFP) { - _FDT((fdt_property_cell(fdt, "ibm,dfp", 1))); - } - - page_sizes_prop_size = create_page_sizes_prop(env, page_sizes_prop, - sizeof(page_sizes_prop)); - if (page_sizes_prop_size) { - _FDT((fdt_property(fdt, "ibm,segment-page-sizes", - page_sizes_prop, page_sizes_prop_size))); - } - - _FDT((fdt_property_cell(fdt, "ibm,chip-id", - cs->cpu_index / cpus_per_socket))); - - _FDT((fdt_end_node(fdt))); - } - - _FDT((fdt_end_node(fdt))); - /* RTAS */ _FDT((fdt_begin_node(fdt, "rtas"))); @@ -605,7 +495,8 @@ static void *spapr_create_fdt_skel(hwaddr initrd_base, return fdt; } -int spapr_h_cas_compose_response(target_ulong addr, target_ulong size) +int spapr_h_cas_compose_response(sPAPRMachineState *spapr, + target_ulong addr, target_ulong size) { void *fdt, *fdt_skel; sPAPRDeviceTreeUpdateHeader hdr = { .version_id = 1 }; @@ -666,8 +557,9 @@ static void spapr_populate_memory_node(void *fdt, int nodeid, hwaddr start, sizeof(associativity)))); } -static int spapr_populate_memory(sPAPREnvironment *spapr, void *fdt) +static int spapr_populate_memory(sPAPRMachineState *spapr, void *fdt) { + MachineState *machine = MACHINE(spapr); hwaddr mem_start, node_size; int i, nb_nodes = nb_numa_nodes; NodeInfo *nodes = numa_info; @@ -676,7 +568,7 @@ static int spapr_populate_memory(sPAPREnvironment *spapr, void *fdt) /* No NUMA nodes, assume there is just one node with whole RAM */ if (!nb_numa_nodes) { nb_nodes = 1; - ramnode.node_mem = ram_size; + ramnode.node_mem = machine->ram_size; nodes = &ramnode; } @@ -684,12 +576,12 @@ static int spapr_populate_memory(sPAPREnvironment *spapr, void *fdt) if (!nodes[i].node_mem) { continue; } - if (mem_start >= ram_size) { + if (mem_start >= machine->ram_size) { node_size = 0; } else { node_size = nodes[i].node_mem; - if (node_size > ram_size - mem_start) { - node_size = ram_size - mem_start; + if (node_size > machine->ram_size - mem_start) { + node_size = machine->ram_size - mem_start; } } if (!mem_start) { @@ -715,7 +607,138 @@ static int spapr_populate_memory(sPAPREnvironment *spapr, void *fdt) return 0; } -static void spapr_finalize_fdt(sPAPREnvironment *spapr, +static void spapr_populate_cpu_dt(CPUState *cs, void *fdt, int offset, + sPAPRMachineState *spapr) +{ + PowerPCCPU *cpu = POWERPC_CPU(cs); + CPUPPCState *env = &cpu->env; + PowerPCCPUClass *pcc = POWERPC_CPU_GET_CLASS(cs); + int index = ppc_get_vcpu_dt_id(cpu); + uint32_t segs[] = {cpu_to_be32(28), cpu_to_be32(40), + 0xffffffff, 0xffffffff}; + uint32_t tbfreq = kvm_enabled() ? kvmppc_get_tbfreq() : TIMEBASE_FREQ; + uint32_t cpufreq = kvm_enabled() ? kvmppc_get_clockfreq() : 1000000000; + uint32_t page_sizes_prop[64]; + size_t page_sizes_prop_size; + QemuOpts *opts = qemu_opts_find(qemu_find_opts("smp-opts"), NULL); + unsigned sockets = opts ? qemu_opt_get_number(opts, "sockets", 0) : 0; + uint32_t cpus_per_socket = sockets ? (smp_cpus / sockets) : 1; + uint32_t pft_size_prop[] = {0, cpu_to_be32(spapr->htab_shift)}; + + _FDT((fdt_setprop_cell(fdt, offset, "reg", index))); + _FDT((fdt_setprop_string(fdt, offset, "device_type", "cpu"))); + + _FDT((fdt_setprop_cell(fdt, offset, "cpu-version", env->spr[SPR_PVR]))); + _FDT((fdt_setprop_cell(fdt, offset, "d-cache-block-size", + env->dcache_line_size))); + _FDT((fdt_setprop_cell(fdt, offset, "d-cache-line-size", + env->dcache_line_size))); + _FDT((fdt_setprop_cell(fdt, offset, "i-cache-block-size", + env->icache_line_size))); + _FDT((fdt_setprop_cell(fdt, offset, "i-cache-line-size", + env->icache_line_size))); + + if (pcc->l1_dcache_size) { + _FDT((fdt_setprop_cell(fdt, offset, "d-cache-size", + pcc->l1_dcache_size))); + } else { + fprintf(stderr, "Warning: Unknown L1 dcache size for cpu\n"); + } + if (pcc->l1_icache_size) { + _FDT((fdt_setprop_cell(fdt, offset, "i-cache-size", + pcc->l1_icache_size))); + } else { + fprintf(stderr, "Warning: Unknown L1 icache size for cpu\n"); + } + + _FDT((fdt_setprop_cell(fdt, offset, "timebase-frequency", tbfreq))); + _FDT((fdt_setprop_cell(fdt, offset, "clock-frequency", cpufreq))); + _FDT((fdt_setprop_cell(fdt, offset, "ibm,slb-size", env->slb_nr))); + _FDT((fdt_setprop_string(fdt, offset, "status", "okay"))); + _FDT((fdt_setprop(fdt, offset, "64-bit", NULL, 0))); + + if (env->spr_cb[SPR_PURR].oea_read) { + _FDT((fdt_setprop(fdt, offset, "ibm,purr", NULL, 0))); + } + + if (env->mmu_model & POWERPC_MMU_1TSEG) { + _FDT((fdt_setprop(fdt, offset, "ibm,processor-segment-sizes", + segs, sizeof(segs)))); + } + + /* Advertise VMX/VSX (vector extensions) if available + * 0 / no property == no vector extensions + * 1 == VMX / Altivec available + * 2 == VSX available */ + if (env->insns_flags & PPC_ALTIVEC) { + uint32_t vmx = (env->insns_flags2 & PPC2_VSX) ? 2 : 1; + + _FDT((fdt_setprop_cell(fdt, offset, "ibm,vmx", vmx))); + } + + /* Advertise DFP (Decimal Floating Point) if available + * 0 / no property == no DFP + * 1 == DFP available */ + if (env->insns_flags2 & PPC2_DFP) { + _FDT((fdt_setprop_cell(fdt, offset, "ibm,dfp", 1))); + } + + page_sizes_prop_size = create_page_sizes_prop(env, page_sizes_prop, + sizeof(page_sizes_prop)); + if (page_sizes_prop_size) { + _FDT((fdt_setprop(fdt, offset, "ibm,segment-page-sizes", + page_sizes_prop, page_sizes_prop_size))); + } + + _FDT((fdt_setprop_cell(fdt, offset, "ibm,chip-id", + cs->cpu_index / cpus_per_socket))); + + _FDT((fdt_setprop(fdt, offset, "ibm,pft-size", + pft_size_prop, sizeof(pft_size_prop)))); + + _FDT(spapr_fixup_cpu_numa_dt(fdt, offset, cs)); + + _FDT(spapr_fixup_cpu_smt_dt(fdt, offset, cpu, + ppc_get_compat_smt_threads(cpu))); +} + +static void spapr_populate_cpus_dt_node(void *fdt, sPAPRMachineState *spapr) +{ + CPUState *cs; + int cpus_offset; + char *nodename; + int smt = kvmppc_smt_threads(); + + cpus_offset = fdt_add_subnode(fdt, 0, "cpus"); + _FDT(cpus_offset); + _FDT((fdt_setprop_cell(fdt, cpus_offset, "#address-cells", 0x1))); + _FDT((fdt_setprop_cell(fdt, cpus_offset, "#size-cells", 0x0))); + + /* + * We walk the CPUs in reverse order to ensure that CPU DT nodes + * created by fdt_add_subnode() end up in the right order in FDT + * for the guest kernel the enumerate the CPUs correctly. + */ + CPU_FOREACH_REVERSE(cs) { + PowerPCCPU *cpu = POWERPC_CPU(cs); + int index = ppc_get_vcpu_dt_id(cpu); + DeviceClass *dc = DEVICE_GET_CLASS(cs); + int offset; + + if ((index % smt) != 0) { + continue; + } + + nodename = g_strdup_printf("%s@%x", dc->fw_name, index); + offset = fdt_add_subnode(fdt, cpus_offset, nodename); + g_free(nodename); + _FDT(offset); + spapr_populate_cpu_dt(cs, fdt, offset, spapr); + } + +} + +static void spapr_finalize_fdt(sPAPRMachineState *spapr, hwaddr fdt_addr, hwaddr rtas_addr, hwaddr rtas_size) @@ -760,11 +783,8 @@ static void spapr_finalize_fdt(sPAPREnvironment *spapr, fprintf(stderr, "Couldn't set up RTAS device tree properties\n"); } - /* Advertise NUMA via ibm,associativity */ - ret = spapr_fixup_cpu_dt(fdt, spapr); - if (ret < 0) { - fprintf(stderr, "Couldn't finalize CPU device tree properties\n"); - } + /* cpus */ + spapr_populate_cpus_dt_node(fdt, spapr); bootlist = get_boot_devices_list(&cb, true); if (cb && bootlist) { @@ -831,7 +851,7 @@ static void emulate_spapr_hypercall(PowerPCCPU *cpu) #define CLEAN_HPTE(_hpte) ((*(uint64_t *)(_hpte)) &= tswap64(~HPTE64_V_HPTE_DIRTY)) #define DIRTY_HPTE(_hpte) ((*(uint64_t *)(_hpte)) |= tswap64(HPTE64_V_HPTE_DIRTY)) -static void spapr_reset_htab(sPAPREnvironment *spapr) +static void spapr_reset_htab(sPAPRMachineState *spapr) { long shift; int index; @@ -893,7 +913,7 @@ static int find_unknown_sysbus_device(SysBusDevice *sbdev, void *opaque) * A guest reset will cause spapr->htab_fd to become stale if being used. * Reopen the file descriptor to make sure the whole HTAB is properly read. */ -static int spapr_check_htab_fd(sPAPREnvironment *spapr) +static int spapr_check_htab_fd(sPAPRMachineState *spapr) { int rc = 0; @@ -913,6 +933,7 @@ static int spapr_check_htab_fd(sPAPREnvironment *spapr) static void ppc_spapr_reset(void) { + sPAPRMachineState *spapr = SPAPR_MACHINE(qdev_get_machine()); PowerPCCPU *first_ppc_cpu; uint32_t rtas_limit; @@ -946,12 +967,13 @@ static void ppc_spapr_reset(void) first_ppc_cpu->env.gpr[3] = spapr->fdt_addr; first_ppc_cpu->env.gpr[5] = 0; first_cpu->halted = 0; - first_ppc_cpu->env.nip = spapr->entry_point; + first_ppc_cpu->env.nip = SPAPR_ENTRY_POINT; } static void spapr_cpu_reset(void *opaque) { + sPAPRMachineState *spapr = SPAPR_MACHINE(qdev_get_machine()); PowerPCCPU *cpu = opaque; CPUState *cs = CPU(cpu); CPUPPCState *env = &cpu->env; @@ -980,12 +1002,12 @@ static void spapr_cpu_reset(void *opaque) * We have 8 hpte per group, and each hpte is 16 bytes. * ie have 128 bytes per hpte entry. */ - env->htab_mask = (1ULL << ((spapr)->htab_shift - 7)) - 1; + env->htab_mask = (1ULL << (spapr->htab_shift - 7)) - 1; env->spr[SPR_SDR1] = (target_ulong)(uintptr_t)spapr->htab | (spapr->htab_shift - 18); } -static void spapr_create_nvram(sPAPREnvironment *spapr) +static void spapr_create_nvram(sPAPRMachineState *spapr) { DeviceState *dev = qdev_create(&spapr->vio_bus->bus, "spapr-nvram"); DriveInfo *dinfo = drive_get(IF_PFLASH, 0, 0); @@ -999,7 +1021,7 @@ static void spapr_create_nvram(sPAPREnvironment *spapr) spapr->nvram = (struct sPAPRNVRAM *)dev; } -static void spapr_rtc_create(sPAPREnvironment *spapr) +static void spapr_rtc_create(sPAPRMachineState *spapr) { DeviceState *dev = qdev_create(NULL, TYPE_SPAPR_RTC); @@ -1029,7 +1051,7 @@ static int spapr_vga_init(PCIBus *pci_bus) static int spapr_post_load(void *opaque, int version_id) { - sPAPREnvironment *spapr = (sPAPREnvironment *)opaque; + sPAPRMachineState *spapr = (sPAPRMachineState *)opaque; int err = 0; /* In earlier versions, there was no separate qdev for the PAPR @@ -1058,16 +1080,16 @@ static const VMStateDescription vmstate_spapr = { VMSTATE_UNUSED_BUFFER(version_before_3, 0, 4), /* RTC offset */ - VMSTATE_UINT64_TEST(rtc_offset, sPAPREnvironment, version_before_3), + VMSTATE_UINT64_TEST(rtc_offset, sPAPRMachineState, version_before_3), - VMSTATE_PPC_TIMEBASE_V(tb, sPAPREnvironment, 2), + VMSTATE_PPC_TIMEBASE_V(tb, sPAPRMachineState, 2), VMSTATE_END_OF_LIST() }, }; static int htab_save_setup(QEMUFile *f, void *opaque) { - sPAPREnvironment *spapr = opaque; + sPAPRMachineState *spapr = opaque; /* "Iteration" header */ qemu_put_be32(f, spapr->htab_shift); @@ -1091,7 +1113,7 @@ static int htab_save_setup(QEMUFile *f, void *opaque) return 0; } -static void htab_save_first_pass(QEMUFile *f, sPAPREnvironment *spapr, +static void htab_save_first_pass(QEMUFile *f, sPAPRMachineState *spapr, int64_t max_ns) { int htabslots = HTAB_SIZE(spapr) / HASH_PTE_SIZE_64; @@ -1141,7 +1163,7 @@ static void htab_save_first_pass(QEMUFile *f, sPAPREnvironment *spapr, spapr->htab_save_index = index; } -static int htab_save_later_pass(QEMUFile *f, sPAPREnvironment *spapr, +static int htab_save_later_pass(QEMUFile *f, sPAPRMachineState *spapr, int64_t max_ns) { bool final = max_ns < 0; @@ -1223,7 +1245,7 @@ static int htab_save_later_pass(QEMUFile *f, sPAPREnvironment *spapr, static int htab_save_iterate(QEMUFile *f, void *opaque) { - sPAPREnvironment *spapr = opaque; + sPAPRMachineState *spapr = opaque; int rc = 0; /* Iteration header */ @@ -1258,7 +1280,7 @@ static int htab_save_iterate(QEMUFile *f, void *opaque) static int htab_save_complete(QEMUFile *f, void *opaque) { - sPAPREnvironment *spapr = opaque; + sPAPRMachineState *spapr = opaque; /* Iteration header */ qemu_put_be32(f, 0); @@ -1293,7 +1315,7 @@ static int htab_save_complete(QEMUFile *f, void *opaque) static int htab_load(QEMUFile *f, void *opaque, int version_id) { - sPAPREnvironment *spapr = opaque; + sPAPRMachineState *spapr = opaque; uint32_t section_hdr; int fd = -1; @@ -1387,16 +1409,42 @@ static void spapr_boot_set(void *opaque, const char *boot_device, machine->boot_order = g_strdup(boot_device); } +static void spapr_cpu_init(sPAPRMachineState *spapr, PowerPCCPU *cpu) +{ + CPUPPCState *env = &cpu->env; + + /* Set time-base frequency to 512 MHz */ + cpu_ppc_tb_init(env, TIMEBASE_FREQ); + + /* PAPR always has exception vectors in RAM not ROM. To ensure this, + * MSR[IP] should never be set. + */ + env->msr_mask &= ~(1 << 6); + + /* Tell KVM that we're in PAPR mode */ + if (kvm_enabled()) { + kvmppc_set_papr(cpu); + } + + if (cpu->max_compat) { + if (ppc_set_compat(cpu, cpu->max_compat) < 0) { + exit(1); + } + } + + xics_cpu_setup(spapr->icp, cpu); + + qemu_register_reset(spapr_cpu_reset, cpu); +} + /* pSeries LPAR / sPAPR hardware init */ static void ppc_spapr_init(MachineState *machine) { - ram_addr_t ram_size = machine->ram_size; - const char *cpu_model = machine->cpu_model; + sPAPRMachineState *spapr = SPAPR_MACHINE(machine); const char *kernel_filename = machine->kernel_filename; const char *kernel_cmdline = machine->kernel_cmdline; const char *initrd_filename = machine->initrd_filename; PowerPCCPU *cpu; - CPUPPCState *env; PCIHostState *phb; int i; MemoryRegion *sysmem = get_system_memory(); @@ -1413,7 +1461,6 @@ static void ppc_spapr_init(MachineState *machine) msi_supported = true; - spapr = g_malloc0(sizeof(*spapr)); QLIST_INIT(&spapr->phbs); cpu_ppc_hypercall = emulate_spapr_hypercall; @@ -1460,7 +1507,7 @@ static void ppc_spapr_init(MachineState *machine) * more than needed for the Linux guests we support. */ spapr->htab_shift = 18; /* Minimum architected size */ while (spapr->htab_shift <= 46) { - if ((1ULL << (spapr->htab_shift + 7)) >= ram_size) { + if ((1ULL << (spapr->htab_shift + 7)) >= machine->ram_size) { break; } spapr->htab_shift++; @@ -1468,43 +1515,21 @@ static void ppc_spapr_init(MachineState *machine) /* Set up Interrupt Controller before we create the VCPUs */ spapr->icp = xics_system_init(machine, - smp_cpus * kvmppc_smt_threads() / smp_threads, + DIV_ROUND_UP(max_cpus * kvmppc_smt_threads(), + smp_threads), XICS_IRQS); /* init CPUs */ - if (cpu_model == NULL) { - cpu_model = kvm_enabled() ? "host" : "POWER7"; + if (machine->cpu_model == NULL) { + machine->cpu_model = kvm_enabled() ? "host" : "POWER7"; } for (i = 0; i < smp_cpus; i++) { - cpu = cpu_ppc_init(cpu_model); + cpu = cpu_ppc_init(machine->cpu_model); if (cpu == NULL) { fprintf(stderr, "Unable to find PowerPC CPU definition\n"); exit(1); } - env = &cpu->env; - - /* Set time-base frequency to 512 MHz */ - cpu_ppc_tb_init(env, TIMEBASE_FREQ); - - /* PAPR always has exception vectors in RAM not ROM. To ensure this, - * MSR[IP] should never be set. - */ - env->msr_mask &= ~(1 << 6); - - /* Tell KVM that we're in PAPR mode */ - if (kvm_enabled()) { - kvmppc_set_papr(cpu); - } - - if (cpu->max_compat) { - if (ppc_set_compat(cpu, cpu->max_compat) < 0) { - exit(1); - } - } - - xics_cpu_setup(spapr->icp, cpu); - - qemu_register_reset(spapr_cpu_reset, cpu); + spapr_cpu_init(spapr, cpu); } if (kvm_enabled()) { @@ -1513,9 +1538,8 @@ static void ppc_spapr_init(MachineState *machine) } /* allocate RAM */ - spapr->ram_limit = ram_size; memory_region_allocate_system_memory(ram, NULL, "ppc_spapr.ram", - spapr->ram_limit); + machine->ram_size); memory_region_add_subregion(sysmem, 0, ram); if (rma_alloc_size && rma) { @@ -1659,8 +1683,9 @@ static void ppc_spapr_init(MachineState *machine) } g_free(filename); - spapr->entry_point = 0x100; - + /* FIXME: Should register things through the MachineState's qdev + * interface, this is a legacy from the sPAPREnvironment structure + * which predated MachineState but had a similar function */ vmstate_register(NULL, 0, &vmstate_spapr, spapr); register_savevm_live(NULL, "spapr/htab", -1, 1, &savevm_htab_handlers, spapr); @@ -1756,17 +1781,17 @@ static char *spapr_get_fw_dev_path(FWPathProvider *p, BusState *bus, static char *spapr_get_kvm_type(Object *obj, Error **errp) { - sPAPRMachineState *sm = SPAPR_MACHINE(obj); + sPAPRMachineState *spapr = SPAPR_MACHINE(obj); - return g_strdup(sm->kvm_type); + return g_strdup(spapr->kvm_type); } static void spapr_set_kvm_type(Object *obj, const char *value, Error **errp) { - sPAPRMachineState *sm = SPAPR_MACHINE(obj); + sPAPRMachineState *spapr = SPAPR_MACHINE(obj); - g_free(sm->kvm_type); - sm->kvm_type = g_strdup(value); + g_free(spapr->kvm_type); + spapr->kvm_type = g_strdup(value); } static void spapr_machine_initfn(Object *obj) @@ -1821,6 +1846,7 @@ static const TypeInfo spapr_machine_info = { .abstract = true, .instance_size = sizeof(sPAPRMachineState), .instance_init = spapr_machine_initfn, + .class_size = sizeof(sPAPRMachineClass), .class_init = spapr_machine_class_init, .interfaces = (InterfaceInfo[]) { { TYPE_FW_PATH_PROVIDER }, diff --git a/hw/ppc/spapr_events.c b/hw/ppc/spapr_events.c index fda9e35..f626eb7 100644 --- a/hw/ppc/spapr_events.c +++ b/hw/ppc/spapr_events.c @@ -238,6 +238,7 @@ void spapr_events_fdt_skel(void *fdt, uint32_t check_exception_irq) static void rtas_event_log_queue(int log_type, void *data, bool exception) { + sPAPRMachineState *spapr = SPAPR_MACHINE(qdev_get_machine()); sPAPREventLogEntry *entry = g_new(sPAPREventLogEntry, 1); g_assert(data); @@ -250,6 +251,7 @@ static void rtas_event_log_queue(int log_type, void *data, bool exception) static sPAPREventLogEntry *rtas_event_log_dequeue(uint32_t event_mask, bool exception) { + sPAPRMachineState *spapr = SPAPR_MACHINE(qdev_get_machine()); sPAPREventLogEntry *entry = NULL; /* we only queue EPOW events atm. */ @@ -278,6 +280,7 @@ static sPAPREventLogEntry *rtas_event_log_dequeue(uint32_t event_mask, static bool rtas_event_log_contains(uint32_t event_mask, bool exception) { + sPAPRMachineState *spapr = SPAPR_MACHINE(qdev_get_machine()); sPAPREventLogEntry *entry = NULL; /* we only queue EPOW events atm. */ @@ -314,6 +317,7 @@ static void spapr_init_v6hdr(struct rtas_event_log_v6 *v6hdr) static void spapr_init_maina(struct rtas_event_log_v6_maina *maina, int section_count) { + sPAPRMachineState *spapr = SPAPR_MACHINE(qdev_get_machine()); struct tm tm; int year; @@ -336,7 +340,7 @@ static void spapr_init_maina(struct rtas_event_log_v6_maina *maina, static void spapr_powerdown_req(Notifier *n, void *opaque) { - sPAPREnvironment *spapr = container_of(n, sPAPREnvironment, epow_notifier); + sPAPRMachineState *spapr = SPAPR_MACHINE(qdev_get_machine()); struct rtas_error_log *hdr; struct rtas_event_log_v6 *v6hdr; struct rtas_event_log_v6_maina *maina; @@ -384,6 +388,7 @@ static void spapr_powerdown_req(Notifier *n, void *opaque) static void spapr_hotplug_req_event(sPAPRDRConnector *drc, uint8_t hp_action) { + sPAPRMachineState *spapr = SPAPR_MACHINE(qdev_get_machine()); struct hp_log_full *new_hp; struct rtas_error_log *hdr; struct rtas_event_log_v6 *v6hdr; @@ -453,7 +458,7 @@ void spapr_hotplug_req_remove_event(sPAPRDRConnector *drc) spapr_hotplug_req_event(drc, RTAS_LOG_V6_HP_ACTION_REMOVE); } -static void check_exception(PowerPCCPU *cpu, sPAPREnvironment *spapr, +static void check_exception(PowerPCCPU *cpu, sPAPRMachineState *spapr, uint32_t token, uint32_t nargs, target_ulong args, uint32_t nret, target_ulong rets) @@ -508,7 +513,7 @@ out_no_events: rtas_st(rets, 0, RTAS_OUT_NO_ERRORS_FOUND); } -static void event_scan(PowerPCCPU *cpu, sPAPREnvironment *spapr, +static void event_scan(PowerPCCPU *cpu, sPAPRMachineState *spapr, uint32_t token, uint32_t nargs, target_ulong args, uint32_t nret, target_ulong rets) @@ -548,7 +553,7 @@ out_no_events: rtas_st(rets, 0, RTAS_OUT_NO_ERRORS_FOUND); } -void spapr_events_init(sPAPREnvironment *spapr) +void spapr_events_init(sPAPRMachineState *spapr) { QTAILQ_INIT(&spapr->pending_events); spapr->check_exception_irq = xics_alloc(spapr->icp, 0, 0, false); diff --git a/hw/ppc/spapr_hcall.c b/hw/ppc/spapr_hcall.c index 4f76f1c..652ddf6 100644 --- a/hw/ppc/spapr_hcall.c +++ b/hw/ppc/spapr_hcall.c @@ -84,9 +84,10 @@ static inline bool valid_pte_index(CPUPPCState *env, target_ulong pte_index) return true; } -static target_ulong h_enter(PowerPCCPU *cpu, sPAPREnvironment *spapr, +static target_ulong h_enter(PowerPCCPU *cpu, sPAPRMachineState *spapr, target_ulong opcode, target_ulong *args) { + MachineState *machine = MACHINE(spapr); CPUPPCState *env = &cpu->env; target_ulong flags = args[0]; target_ulong pte_index = args[1]; @@ -118,7 +119,7 @@ static target_ulong h_enter(PowerPCCPU *cpu, sPAPREnvironment *spapr, raddr = (ptel & HPTE64_R_RPN) & ~((1ULL << page_shift) - 1); - if (raddr < spapr->ram_limit) { + if (raddr < machine->ram_size) { /* Regular RAM - should have WIMG=0010 */ if ((ptel & HPTE64_R_WIMG) != HPTE64_R_M) { return H_PARAMETER; @@ -205,7 +206,7 @@ static RemoveResult remove_hpte(CPUPPCState *env, target_ulong ptex, return REMOVE_SUCCESS; } -static target_ulong h_remove(PowerPCCPU *cpu, sPAPREnvironment *spapr, +static target_ulong h_remove(PowerPCCPU *cpu, sPAPRMachineState *spapr, target_ulong opcode, target_ulong *args) { CPUPPCState *env = &cpu->env; @@ -252,7 +253,7 @@ static target_ulong h_remove(PowerPCCPU *cpu, sPAPREnvironment *spapr, #define H_BULK_REMOVE_MAX_BATCH 4 -static target_ulong h_bulk_remove(PowerPCCPU *cpu, sPAPREnvironment *spapr, +static target_ulong h_bulk_remove(PowerPCCPU *cpu, sPAPRMachineState *spapr, target_ulong opcode, target_ulong *args) { CPUPPCState *env = &cpu->env; @@ -299,7 +300,7 @@ static target_ulong h_bulk_remove(PowerPCCPU *cpu, sPAPREnvironment *spapr, return H_SUCCESS; } -static target_ulong h_protect(PowerPCCPU *cpu, sPAPREnvironment *spapr, +static target_ulong h_protect(PowerPCCPU *cpu, sPAPRMachineState *spapr, target_ulong opcode, target_ulong *args) { CPUPPCState *env = &cpu->env; @@ -337,7 +338,7 @@ static target_ulong h_protect(PowerPCCPU *cpu, sPAPREnvironment *spapr, return H_SUCCESS; } -static target_ulong h_read(PowerPCCPU *cpu, sPAPREnvironment *spapr, +static target_ulong h_read(PowerPCCPU *cpu, sPAPRMachineState *spapr, target_ulong opcode, target_ulong *args) { CPUPPCState *env = &cpu->env; @@ -367,7 +368,7 @@ static target_ulong h_read(PowerPCCPU *cpu, sPAPREnvironment *spapr, return H_SUCCESS; } -static target_ulong h_set_dabr(PowerPCCPU *cpu, sPAPREnvironment *spapr, +static target_ulong h_set_dabr(PowerPCCPU *cpu, sPAPRMachineState *spapr, target_ulong opcode, target_ulong *args) { /* FIXME: actually implement this */ @@ -506,7 +507,7 @@ static target_ulong deregister_dtl(CPUPPCState *env, target_ulong addr) return H_SUCCESS; } -static target_ulong h_register_vpa(PowerPCCPU *cpu, sPAPREnvironment *spapr, +static target_ulong h_register_vpa(PowerPCCPU *cpu, sPAPRMachineState *spapr, target_ulong opcode, target_ulong *args) { target_ulong flags = args[0]; @@ -551,7 +552,7 @@ static target_ulong h_register_vpa(PowerPCCPU *cpu, sPAPREnvironment *spapr, return ret; } -static target_ulong h_cede(PowerPCCPU *cpu, sPAPREnvironment *spapr, +static target_ulong h_cede(PowerPCCPU *cpu, sPAPRMachineState *spapr, target_ulong opcode, target_ulong *args) { CPUPPCState *env = &cpu->env; @@ -567,7 +568,7 @@ static target_ulong h_cede(PowerPCCPU *cpu, sPAPREnvironment *spapr, return H_SUCCESS; } -static target_ulong h_rtas(PowerPCCPU *cpu, sPAPREnvironment *spapr, +static target_ulong h_rtas(PowerPCCPU *cpu, sPAPRMachineState *spapr, target_ulong opcode, target_ulong *args) { target_ulong rtas_r3 = args[0]; @@ -579,7 +580,7 @@ static target_ulong h_rtas(PowerPCCPU *cpu, sPAPREnvironment *spapr, nret, rtas_r3 + 12 + 4*nargs); } -static target_ulong h_logical_load(PowerPCCPU *cpu, sPAPREnvironment *spapr, +static target_ulong h_logical_load(PowerPCCPU *cpu, sPAPRMachineState *spapr, target_ulong opcode, target_ulong *args) { CPUState *cs = CPU(cpu); @@ -603,7 +604,7 @@ static target_ulong h_logical_load(PowerPCCPU *cpu, sPAPREnvironment *spapr, return H_PARAMETER; } -static target_ulong h_logical_store(PowerPCCPU *cpu, sPAPREnvironment *spapr, +static target_ulong h_logical_store(PowerPCCPU *cpu, sPAPRMachineState *spapr, target_ulong opcode, target_ulong *args) { CPUState *cs = CPU(cpu); @@ -629,7 +630,7 @@ static target_ulong h_logical_store(PowerPCCPU *cpu, sPAPREnvironment *spapr, return H_PARAMETER; } -static target_ulong h_logical_memop(PowerPCCPU *cpu, sPAPREnvironment *spapr, +static target_ulong h_logical_memop(PowerPCCPU *cpu, sPAPRMachineState *spapr, target_ulong opcode, target_ulong *args) { CPUState *cs = CPU(cpu); @@ -698,14 +699,14 @@ static target_ulong h_logical_memop(PowerPCCPU *cpu, sPAPREnvironment *spapr, return H_SUCCESS; } -static target_ulong h_logical_icbi(PowerPCCPU *cpu, sPAPREnvironment *spapr, +static target_ulong h_logical_icbi(PowerPCCPU *cpu, sPAPRMachineState *spapr, target_ulong opcode, target_ulong *args) { /* Nothing to do on emulation, KVM will trap this in the kernel */ return H_SUCCESS; } -static target_ulong h_logical_dcbf(PowerPCCPU *cpu, sPAPREnvironment *spapr, +static target_ulong h_logical_dcbf(PowerPCCPU *cpu, sPAPRMachineState *spapr, target_ulong opcode, target_ulong *args) { /* Nothing to do on emulation, KVM will trap this in the kernel */ @@ -788,7 +789,7 @@ static target_ulong h_set_mode_resource_addr_trans_mode(PowerPCCPU *cpu, return H_SUCCESS; } -static target_ulong h_set_mode(PowerPCCPU *cpu, sPAPREnvironment *spapr, +static target_ulong h_set_mode(PowerPCCPU *cpu, sPAPRMachineState *spapr, target_ulong opcode, target_ulong *args) { target_ulong resource = args[1]; @@ -828,7 +829,7 @@ static void do_set_compat(void *arg) ((cpuver) == CPU_POWERPC_LOGICAL_2_07) ? 2070 : 0) static target_ulong h_client_architecture_support(PowerPCCPU *cpu_, - sPAPREnvironment *spapr, + sPAPRMachineState *spapr, target_ulong opcode, target_ulong *args) { @@ -921,7 +922,7 @@ static target_ulong h_client_architecture_support(PowerPCCPU *cpu_, return H_SUCCESS; } - if (spapr_h_cas_compose_response(args[1], args[2])) { + if (spapr_h_cas_compose_response(spapr, args[1], args[2])) { qemu_system_reset_request(); } @@ -952,6 +953,8 @@ void spapr_register_hypercall(target_ulong opcode, spapr_hcall_fn fn) target_ulong spapr_hypercall(PowerPCCPU *cpu, target_ulong opcode, target_ulong *args) { + sPAPRMachineState *spapr = SPAPR_MACHINE(qdev_get_machine()); + if ((opcode <= MAX_HCALL_OPCODE) && ((opcode & 0x3) == 0)) { spapr_hcall_fn fn = papr_hypercall_table[opcode / 4]; diff --git a/hw/ppc/spapr_iommu.c b/hw/ppc/spapr_iommu.c index 8cd9dba..f61504e 100644 --- a/hw/ppc/spapr_iommu.c +++ b/hw/ppc/spapr_iommu.c @@ -60,6 +60,20 @@ sPAPRTCETable *spapr_tce_find_by_liobn(target_ulong liobn) return NULL; } +static IOMMUAccessFlags spapr_tce_iommu_access_flags(uint64_t tce) +{ + switch (tce & SPAPR_TCE_RW) { + case SPAPR_TCE_FAULT: + return IOMMU_NONE; + case SPAPR_TCE_RO: + return IOMMU_RO; + case SPAPR_TCE_WO: + return IOMMU_WO; + default: /* SPAPR_TCE_RW */ + return IOMMU_RW; + } +} + /* Called from RCU critical section */ static IOMMUTLBEntry spapr_tce_translate_iommu(MemoryRegion *iommu, hwaddr addr, bool is_write) @@ -82,7 +96,7 @@ static IOMMUTLBEntry spapr_tce_translate_iommu(MemoryRegion *iommu, hwaddr addr, ret.iova = addr & page_mask; ret.translated_addr = tce & page_mask; ret.addr_mask = ~page_mask; - ret.perm = tce & IOMMU_RW; + ret.perm = spapr_tce_iommu_access_flags(tce); } trace_spapr_iommu_xlate(tcet->liobn, addr, ret.iova, ret.perm, ret.addr_mask); @@ -233,14 +247,14 @@ static target_ulong put_tce_emu(sPAPRTCETable *tcet, target_ulong ioba, entry.iova = ioba & page_mask; entry.translated_addr = tce & page_mask; entry.addr_mask = ~page_mask; - entry.perm = tce & IOMMU_RW; + entry.perm = spapr_tce_iommu_access_flags(tce); memory_region_notify_iommu(&tcet->iommu, entry); return H_SUCCESS; } static target_ulong h_put_tce_indirect(PowerPCCPU *cpu, - sPAPREnvironment *spapr, + sPAPRMachineState *spapr, target_ulong opcode, target_ulong *args) { int i; @@ -267,9 +281,7 @@ static target_ulong h_put_tce_indirect(PowerPCCPU *cpu, ioba &= page_mask; for (i = 0; i < npages; ++i, ioba += page_size) { - target_ulong off = (tce_list & ~SPAPR_TCE_RW) + - i * sizeof(target_ulong); - tce = ldq_be_phys(cs->as, off); + tce = ldq_be_phys(cs->as, tce_list + i * sizeof(target_ulong)); ret = put_tce_emu(tcet, ioba, tce); if (ret) { @@ -287,7 +299,7 @@ static target_ulong h_put_tce_indirect(PowerPCCPU *cpu, return ret; } -static target_ulong h_stuff_tce(PowerPCCPU *cpu, sPAPREnvironment *spapr, +static target_ulong h_stuff_tce(PowerPCCPU *cpu, sPAPRMachineState *spapr, target_ulong opcode, target_ulong *args) { int i; @@ -326,7 +338,7 @@ static target_ulong h_stuff_tce(PowerPCCPU *cpu, sPAPREnvironment *spapr, return ret; } -static target_ulong h_put_tce(PowerPCCPU *cpu, sPAPREnvironment *spapr, +static target_ulong h_put_tce(PowerPCCPU *cpu, sPAPRMachineState *spapr, target_ulong opcode, target_ulong *args) { target_ulong liobn = args[0]; @@ -367,7 +379,7 @@ static target_ulong get_tce_emu(sPAPRTCETable *tcet, target_ulong ioba, return H_SUCCESS; } -static target_ulong h_get_tce(PowerPCCPU *cpu, sPAPREnvironment *spapr, +static target_ulong h_get_tce(PowerPCCPU *cpu, sPAPRMachineState *spapr, target_ulong opcode, target_ulong *args) { target_ulong liobn = args[0]; diff --git a/hw/ppc/spapr_pci.c b/hw/ppc/spapr_pci.c index d4a6150..a8f79d8 100644 --- a/hw/ppc/spapr_pci.c +++ b/hw/ppc/spapr_pci.c @@ -23,6 +23,7 @@ * THE SOFTWARE. */ #include "hw/hw.h" +#include "hw/sysbus.h" #include "hw/pci/pci.h" #include "hw/pci/msi.h" #include "hw/pci/msix.h" @@ -35,6 +36,7 @@ #include "qemu/error-report.h" #include "qapi/qmp/qerror.h" +#include "hw/pci/pci_bridge.h" #include "hw/pci/pci_bus.h" #include "hw/ppc/spapr_drc.h" #include "sysemu/device_tree.h" @@ -50,6 +52,8 @@ #define RTAS_TYPE_MSI 1 #define RTAS_TYPE_MSIX 2 +#define FDT_NAME_MAX 128 + #define _FDT(exp) \ do { \ int ret = (exp); \ @@ -58,7 +62,7 @@ } \ } while (0) -sPAPRPHBState *spapr_pci_find_phb(sPAPREnvironment *spapr, uint64_t buid) +sPAPRPHBState *spapr_pci_find_phb(sPAPRMachineState *spapr, uint64_t buid) { sPAPRPHBState *sphb; @@ -72,7 +76,7 @@ sPAPRPHBState *spapr_pci_find_phb(sPAPREnvironment *spapr, uint64_t buid) return NULL; } -PCIDevice *spapr_pci_find_dev(sPAPREnvironment *spapr, uint64_t buid, +PCIDevice *spapr_pci_find_dev(sPAPRMachineState *spapr, uint64_t buid, uint32_t config_addr) { sPAPRPHBState *sphb = spapr_pci_find_phb(spapr, buid); @@ -93,7 +97,7 @@ static uint32_t rtas_pci_cfgaddr(uint32_t arg) return ((arg >> 20) & 0xf00) | (arg & 0xff); } -static void finish_read_pci_config(sPAPREnvironment *spapr, uint64_t buid, +static void finish_read_pci_config(sPAPRMachineState *spapr, uint64_t buid, uint32_t addr, uint32_t size, target_ulong rets) { @@ -123,7 +127,7 @@ static void finish_read_pci_config(sPAPREnvironment *spapr, uint64_t buid, rtas_st(rets, 1, val); } -static void rtas_ibm_read_pci_config(PowerPCCPU *cpu, sPAPREnvironment *spapr, +static void rtas_ibm_read_pci_config(PowerPCCPU *cpu, sPAPRMachineState *spapr, uint32_t token, uint32_t nargs, target_ulong args, uint32_t nret, target_ulong rets) @@ -143,7 +147,7 @@ static void rtas_ibm_read_pci_config(PowerPCCPU *cpu, sPAPREnvironment *spapr, finish_read_pci_config(spapr, buid, addr, size, rets); } -static void rtas_read_pci_config(PowerPCCPU *cpu, sPAPREnvironment *spapr, +static void rtas_read_pci_config(PowerPCCPU *cpu, sPAPRMachineState *spapr, uint32_t token, uint32_t nargs, target_ulong args, uint32_t nret, target_ulong rets) @@ -161,7 +165,7 @@ static void rtas_read_pci_config(PowerPCCPU *cpu, sPAPREnvironment *spapr, finish_read_pci_config(spapr, 0, addr, size, rets); } -static void finish_write_pci_config(sPAPREnvironment *spapr, uint64_t buid, +static void finish_write_pci_config(sPAPRMachineState *spapr, uint64_t buid, uint32_t addr, uint32_t size, uint32_t val, target_ulong rets) { @@ -189,7 +193,7 @@ static void finish_write_pci_config(sPAPREnvironment *spapr, uint64_t buid, rtas_st(rets, 0, RTAS_OUT_SUCCESS); } -static void rtas_ibm_write_pci_config(PowerPCCPU *cpu, sPAPREnvironment *spapr, +static void rtas_ibm_write_pci_config(PowerPCCPU *cpu, sPAPRMachineState *spapr, uint32_t token, uint32_t nargs, target_ulong args, uint32_t nret, target_ulong rets) @@ -210,7 +214,7 @@ static void rtas_ibm_write_pci_config(PowerPCCPU *cpu, sPAPREnvironment *spapr, finish_write_pci_config(spapr, buid, addr, size, val, rets); } -static void rtas_write_pci_config(PowerPCCPU *cpu, sPAPREnvironment *spapr, +static void rtas_write_pci_config(PowerPCCPU *cpu, sPAPRMachineState *spapr, uint32_t token, uint32_t nargs, target_ulong args, uint32_t nret, target_ulong rets) @@ -259,7 +263,7 @@ static void spapr_msi_setmsg(PCIDevice *pdev, hwaddr addr, bool msix, } } -static void rtas_ibm_change_msi(PowerPCCPU *cpu, sPAPREnvironment *spapr, +static void rtas_ibm_change_msi(PowerPCCPU *cpu, sPAPRMachineState *spapr, uint32_t token, uint32_t nargs, target_ulong args, uint32_t nret, target_ulong rets) @@ -377,7 +381,7 @@ out: } static void rtas_ibm_query_interrupt_source_number(PowerPCCPU *cpu, - sPAPREnvironment *spapr, + sPAPRMachineState *spapr, uint32_t token, uint32_t nargs, target_ulong args, @@ -418,13 +422,14 @@ static void rtas_ibm_query_interrupt_source_number(PowerPCCPU *cpu, } static void rtas_ibm_set_eeh_option(PowerPCCPU *cpu, - sPAPREnvironment *spapr, + sPAPRMachineState *spapr, uint32_t token, uint32_t nargs, target_ulong args, uint32_t nret, target_ulong rets) { sPAPRPHBState *sphb; sPAPRPHBClass *spc; + PCIDevice *pdev; uint32_t addr, option; uint64_t buid; int ret; @@ -442,6 +447,12 @@ static void rtas_ibm_set_eeh_option(PowerPCCPU *cpu, goto param_error_exit; } + pdev = pci_find_device(PCI_HOST_BRIDGE(sphb)->bus, + (addr >> 16) & 0xFF, (addr >> 8) & 0xFF); + if (!pdev || !object_dynamic_cast(OBJECT(pdev), "vfio-pci")) { + goto param_error_exit; + } + spc = SPAPR_PCI_HOST_BRIDGE_GET_CLASS(sphb); if (!spc->eeh_set_option) { goto param_error_exit; @@ -456,7 +467,7 @@ param_error_exit: } static void rtas_ibm_get_config_addr_info2(PowerPCCPU *cpu, - sPAPREnvironment *spapr, + sPAPRMachineState *spapr, uint32_t token, uint32_t nargs, target_ulong args, uint32_t nret, target_ulong rets) @@ -512,7 +523,7 @@ param_error_exit: } static void rtas_ibm_read_slot_reset_state2(PowerPCCPU *cpu, - sPAPREnvironment *spapr, + sPAPRMachineState *spapr, uint32_t token, uint32_t nargs, target_ulong args, uint32_t nret, target_ulong rets) @@ -556,7 +567,7 @@ param_error_exit: } static void rtas_ibm_set_slot_reset(PowerPCCPU *cpu, - sPAPREnvironment *spapr, + sPAPRMachineState *spapr, uint32_t token, uint32_t nargs, target_ulong args, uint32_t nret, target_ulong rets) @@ -592,7 +603,7 @@ param_error_exit: } static void rtas_ibm_configure_pe(PowerPCCPU *cpu, - sPAPREnvironment *spapr, + sPAPRMachineState *spapr, uint32_t token, uint32_t nargs, target_ulong args, uint32_t nret, target_ulong rets) @@ -627,7 +638,7 @@ param_error_exit: /* To support it later */ static void rtas_ibm_slot_error_detail(PowerPCCPU *cpu, - sPAPREnvironment *spapr, + sPAPRMachineState *spapr, uint32_t token, uint32_t nargs, target_ulong args, uint32_t nret, target_ulong rets) @@ -718,6 +729,7 @@ static PCIINTxRoute spapr_route_intx_pin_to_irq(void *opaque, int pin) static void spapr_msi_write(void *opaque, hwaddr addr, uint64_t data, unsigned size) { + sPAPRMachineState *spapr = SPAPR_MACHINE(qdev_get_machine()); uint32_t irq = data; trace_spapr_pci_msi_write(addr, data, irq); @@ -742,6 +754,60 @@ static AddressSpace *spapr_pci_dma_iommu(PCIBus *bus, void *opaque, int devfn) return &phb->iommu_as; } +static char *spapr_phb_vfio_get_loc_code(sPAPRPHBState *sphb, PCIDevice *pdev) +{ + char *path = NULL, *buf = NULL, *host = NULL; + + /* Get the PCI VFIO host id */ + host = object_property_get_str(OBJECT(pdev), "host", NULL); + if (!host) { + goto err_out; + } + + /* Construct the path of the file that will give us the DT location */ + path = g_strdup_printf("/sys/bus/pci/devices/%s/devspec", host); + g_free(host); + if (!path || !g_file_get_contents(path, &buf, NULL, NULL)) { + goto err_out; + } + g_free(path); + + /* Construct and read from host device tree the loc-code */ + path = g_strdup_printf("/proc/device-tree%s/ibm,loc-code", buf); + g_free(buf); + if (!path || !g_file_get_contents(path, &buf, NULL, NULL)) { + goto err_out; + } + return buf; + +err_out: + g_free(path); + return NULL; +} + +static char *spapr_phb_get_loc_code(sPAPRPHBState *sphb, PCIDevice *pdev) +{ + char *buf; + const char *devtype = "qemu"; + uint32_t busnr = pci_bus_num(PCI_BUS(qdev_get_parent_bus(DEVICE(pdev)))); + + if (object_dynamic_cast(OBJECT(pdev), "vfio-pci")) { + buf = spapr_phb_vfio_get_loc_code(sphb, pdev); + if (buf) { + return buf; + } + devtype = "vfio"; + } + /* + * For emulated devices and VFIO-failure case, make up + * the loc-code. + */ + buf = g_strdup_printf("%s_%s:%04x:%02x:%02x.%x", + devtype, pdev->name, sphb->index, busnr, + PCI_SLOT(pdev->devfn), PCI_FUNC(pdev->devfn)); + return buf; +} + /* Macros to operate with address in OF binding to PCI */ #define b_x(x, p, l) (((x) & ((1<<(l))-1)) << (p)) #define b_n(x) b_x((x), 31, 1) /* 0 if relocatable */ @@ -786,7 +852,13 @@ typedef struct ResourceProps { * phys.hi = 0xYYXXXXZZ, where: * 0xYY = npt000ss * ||| | - * ||| +-- space code: 1 if IO region, 2 if MEM region + * ||| +-- space code + * ||| | + * ||| + 00 if configuration space + * ||| + 01 if IO region, + * ||| + 10 if 32-bit MEM region + * ||| + 11 if 64-bit MEM region + * ||| * ||+------ for non-relocatable IO: 1 if aliased * || for relocatable IO: 1 if below 64KB * || for MEM: 1 if below 1MB @@ -846,6 +918,8 @@ static void populate_resource_props(PCIDevice *d, ResourceProps *rp) reg->phys_hi = cpu_to_be32(dev_id | b_rrrrrrrr(pci_bar(d, i))); if (d->io_regions[i].type & PCI_BASE_ADDRESS_SPACE_IO) { reg->phys_hi |= cpu_to_be32(b_ss(1)); + } else if (d->io_regions[i].type & PCI_BASE_ADDRESS_MEM_TYPE_64) { + reg->phys_hi |= cpu_to_be32(b_ss(3)); } else { reg->phys_hi |= cpu_to_be32(b_ss(2)); } @@ -870,13 +944,17 @@ static void populate_resource_props(PCIDevice *d, ResourceProps *rp) rp->assigned_len = assigned_idx * sizeof(ResourceFields); } +static uint32_t spapr_phb_get_pci_drc_index(sPAPRPHBState *phb, + PCIDevice *pdev); + static int spapr_populate_pci_child_dt(PCIDevice *dev, void *fdt, int offset, - int phb_index, int drc_index, - const char *drc_name) + sPAPRPHBState *sphb) { ResourceProps rp; bool is_bridge = false; - int pci_status; + int pci_status, err; + char *buf = NULL; + uint32_t drc_index = spapr_phb_get_pci_drc_index(sphb, dev); if (pci_default_read_config(dev, PCI_HEADER_TYPE, 1) == PCI_HEADER_TYPE_BRIDGE) { @@ -891,8 +969,7 @@ static int spapr_populate_pci_child_dt(PCIDevice *dev, void *fdt, int offset, _FDT(fdt_setprop_cell(fdt, offset, "revision-id", pci_default_read_config(dev, PCI_REVISION_ID, 1))); _FDT(fdt_setprop_cell(fdt, offset, "class-code", - pci_default_read_config(dev, PCI_CLASS_DEVICE, 2) - << 8)); + pci_default_read_config(dev, PCI_CLASS_PROG, 3))); if (pci_default_read_config(dev, PCI_INTERRUPT_PIN, 1)) { _FDT(fdt_setprop_cell(fdt, offset, "interrupts", pci_default_read_config(dev, PCI_INTERRUPT_PIN, 1))); @@ -938,8 +1015,21 @@ static int spapr_populate_pci_child_dt(PCIDevice *dev, void *fdt, int offset, * processed by OF beforehand */ _FDT(fdt_setprop_string(fdt, offset, "name", "pci")); - _FDT(fdt_setprop(fdt, offset, "ibm,loc-code", drc_name, strlen(drc_name))); - _FDT(fdt_setprop_cell(fdt, offset, "ibm,my-drc-index", drc_index)); + buf = spapr_phb_get_loc_code(sphb, dev); + if (!buf) { + error_report("Failed setting the ibm,loc-code"); + return -1; + } + + err = fdt_setprop_string(fdt, offset, "ibm,loc-code", buf); + g_free(buf); + if (err < 0) { + return err; + } + + if (drc_index) { + _FDT(fdt_setprop_cell(fdt, offset, "ibm,my-drc-index", drc_index)); + } _FDT(fdt_setprop_cell(fdt, offset, "#address-cells", RESOURCE_CELLS_ADDRESS)); @@ -957,29 +1047,27 @@ static int spapr_populate_pci_child_dt(PCIDevice *dev, void *fdt, int offset, } /* create OF node for pci device and required OF DT properties */ -static void *spapr_create_pci_child_dt(sPAPRPHBState *phb, PCIDevice *dev, - int drc_index, const char *drc_name, - int *dt_offset) +static int spapr_create_pci_child_dt(sPAPRPHBState *phb, PCIDevice *dev, + void *fdt, int node_offset) { - void *fdt; - int offset, ret, fdt_size; + int offset, ret; int slot = PCI_SLOT(dev->devfn); int func = PCI_FUNC(dev->devfn); - char nodename[512]; + char nodename[FDT_NAME_MAX]; - fdt = create_device_tree(&fdt_size); if (func != 0) { - sprintf(nodename, "pci@%d,%d", slot, func); + snprintf(nodename, FDT_NAME_MAX, "pci@%x,%x", slot, func); } else { - sprintf(nodename, "pci@%d", slot); + snprintf(nodename, FDT_NAME_MAX, "pci@%x", slot); } - offset = fdt_add_subnode(fdt, 0, nodename); - ret = spapr_populate_pci_child_dt(dev, fdt, offset, phb->index, drc_index, - drc_name); - g_assert(!ret); + offset = fdt_add_subnode(fdt, node_offset, nodename); + ret = spapr_populate_pci_child_dt(dev, fdt, offset, phb); - *dt_offset = offset; - return fdt; + g_assert(!ret); + if (ret) { + return 0; + } + return offset; } static void spapr_phb_add_pci_device(sPAPRDRConnector *drc, @@ -989,22 +1077,21 @@ static void spapr_phb_add_pci_device(sPAPRDRConnector *drc, { sPAPRDRConnectorClass *drck = SPAPR_DR_CONNECTOR_GET_CLASS(drc); DeviceState *dev = DEVICE(pdev); - int drc_index = drck->get_index(drc); - const char *drc_name = drck->get_name(drc); void *fdt = NULL; - int fdt_start_offset = 0; + int fdt_start_offset = 0, fdt_size; - /* boot-time devices get their device tree node created by SLOF, but for - * hotplugged devices we need QEMU to generate it so the guest can fetch - * it via RTAS - */ if (dev->hotplugged) { - fdt = spapr_create_pci_child_dt(phb, pdev, drc_index, drc_name, - &fdt_start_offset); + fdt = create_device_tree(&fdt_size); + fdt_start_offset = spapr_create_pci_child_dt(phb, pdev, fdt, 0); + if (!fdt_start_offset) { + error_setg(errp, "Failed to create pci child device tree node"); + goto out; + } } drck->attach(drc, DEVICE(pdev), fdt, fdt_start_offset, !dev->hotplugged, errp); +out: if (*errp) { g_free(fdt); } @@ -1046,6 +1133,20 @@ static sPAPRDRConnector *spapr_phb_get_pci_drc(sPAPRPHBState *phb, pdev->devfn); } +static uint32_t spapr_phb_get_pci_drc_index(sPAPRPHBState *phb, + PCIDevice *pdev) +{ + sPAPRDRConnector *drc = spapr_phb_get_pci_drc(phb, pdev); + sPAPRDRConnectorClass *drck; + + if (!drc) { + return 0; + } + + drck = SPAPR_DR_CONNECTOR_GET_CLASS(drc); + return drck->get_index(drc); +} + static void spapr_phb_hot_plug_child(HotplugHandler *plug_handler, DeviceState *plugged_dev, Error **errp) { @@ -1110,6 +1211,7 @@ static void spapr_phb_hot_unplug_child(HotplugHandler *plug_handler, static void spapr_phb_realize(DeviceState *dev, Error **errp) { + sPAPRMachineState *spapr = SPAPR_MACHINE(qdev_get_machine()); SysBusDevice *s = SYS_BUS_DEVICE(dev); sPAPRPHBState *sphb = SPAPR_PCI_HOST_BRIDGE(s); PCIHostState *phb = PCI_HOST_BRIDGE(s); @@ -1351,34 +1453,28 @@ static const VMStateDescription vmstate_spapr_pci_msi = { }, }; -static void spapr_pci_fill_msi_devs(gpointer key, gpointer value, - gpointer opaque) -{ - sPAPRPHBState *sphb = opaque; - - sphb->msi_devs[sphb->msi_devs_num].key = *(uint32_t *)key; - sphb->msi_devs[sphb->msi_devs_num].value = *(spapr_pci_msi *)value; - sphb->msi_devs_num++; -} - static void spapr_pci_pre_save(void *opaque) { sPAPRPHBState *sphb = opaque; - int msi_devs_num; + GHashTableIter iter; + gpointer key, value; + int i; if (sphb->msi_devs) { g_free(sphb->msi_devs); sphb->msi_devs = NULL; } - sphb->msi_devs_num = 0; - msi_devs_num = g_hash_table_size(sphb->msi); - if (!msi_devs_num) { + sphb->msi_devs_num = g_hash_table_size(sphb->msi); + if (!sphb->msi_devs_num) { return; } - sphb->msi_devs = g_malloc(msi_devs_num * sizeof(spapr_pci_msi_mig)); + sphb->msi_devs = g_malloc(sphb->msi_devs_num * sizeof(spapr_pci_msi_mig)); - g_hash_table_foreach(sphb->msi, spapr_pci_fill_msi_devs, sphb); - assert(sphb->msi_devs_num == msi_devs_num); + g_hash_table_iter_init(&iter, sphb->msi); + for (i = 0; g_hash_table_iter_next(&iter, &key, &value); ++i) { + sphb->msi_devs[i].key = *(uint32_t *) key; + sphb->msi_devs[i].value = *(spapr_pci_msi *) value; + } } static int spapr_pci_post_load(void *opaque, int version_id) @@ -1464,7 +1560,7 @@ static const TypeInfo spapr_phb_info = { } }; -PCIHostState *spapr_create_phb(sPAPREnvironment *spapr, int index) +PCIHostState *spapr_create_phb(sPAPRMachineState *spapr, int index) { DeviceState *dev; @@ -1475,12 +1571,90 @@ PCIHostState *spapr_create_phb(sPAPREnvironment *spapr, int index) return PCI_HOST_BRIDGE(dev); } +typedef struct sPAPRFDT { + void *fdt; + int node_off; + sPAPRPHBState *sphb; +} sPAPRFDT; + +static void spapr_populate_pci_devices_dt(PCIBus *bus, PCIDevice *pdev, + void *opaque) +{ + PCIBus *sec_bus; + sPAPRFDT *p = opaque; + int offset; + sPAPRFDT s_fdt; + + offset = spapr_create_pci_child_dt(p->sphb, pdev, p->fdt, p->node_off); + if (!offset) { + error_report("Failed to create pci child device tree node"); + return; + } + + if ((pci_default_read_config(pdev, PCI_HEADER_TYPE, 1) != + PCI_HEADER_TYPE_BRIDGE)) { + return; + } + + sec_bus = pci_bridge_get_sec_bus(PCI_BRIDGE(pdev)); + if (!sec_bus) { + return; + } + + s_fdt.fdt = p->fdt; + s_fdt.node_off = offset; + s_fdt.sphb = p->sphb; + pci_for_each_device(sec_bus, pci_bus_num(sec_bus), + spapr_populate_pci_devices_dt, + &s_fdt); +} + +static void spapr_phb_pci_enumerate_bridge(PCIBus *bus, PCIDevice *pdev, + void *opaque) +{ + unsigned int *bus_no = opaque; + unsigned int primary = *bus_no; + unsigned int subordinate = 0xff; + PCIBus *sec_bus = NULL; + + if ((pci_default_read_config(pdev, PCI_HEADER_TYPE, 1) != + PCI_HEADER_TYPE_BRIDGE)) { + return; + } + + (*bus_no)++; + pci_default_write_config(pdev, PCI_PRIMARY_BUS, primary, 1); + pci_default_write_config(pdev, PCI_SECONDARY_BUS, *bus_no, 1); + pci_default_write_config(pdev, PCI_SUBORDINATE_BUS, *bus_no, 1); + + sec_bus = pci_bridge_get_sec_bus(PCI_BRIDGE(pdev)); + if (!sec_bus) { + return; + } + + pci_default_write_config(pdev, PCI_SUBORDINATE_BUS, subordinate, 1); + pci_for_each_device(sec_bus, pci_bus_num(sec_bus), + spapr_phb_pci_enumerate_bridge, bus_no); + pci_default_write_config(pdev, PCI_SUBORDINATE_BUS, *bus_no, 1); +} + +static void spapr_phb_pci_enumerate(sPAPRPHBState *phb) +{ + PCIBus *bus = PCI_HOST_BRIDGE(phb)->bus; + unsigned int bus_no = 0; + + pci_for_each_device(bus, pci_bus_num(bus), + spapr_phb_pci_enumerate_bridge, + &bus_no); + +} + int spapr_populate_pci_dt(sPAPRPHBState *phb, uint32_t xics_phandle, void *fdt) { int bus_off, i, j, ret; - char nodename[256]; + char nodename[FDT_NAME_MAX]; uint32_t bus_range[] = { cpu_to_be32(0), cpu_to_be32(0xff) }; const uint64_t mmiosize = memory_region_size(&phb->memwindow); const uint64_t w32max = (1ULL << 32) - SPAPR_PCI_MEM_WIN_BUS_OFFSET; @@ -1514,9 +1688,11 @@ int spapr_populate_pci_dt(sPAPRPHBState *phb, cpu_to_be32(b_ddddd(-1)|b_fff(0)), 0x0, 0x0, cpu_to_be32(-1)}; uint32_t interrupt_map[PCI_SLOT_MAX * PCI_NUM_PINS][7]; sPAPRTCETable *tcet; + PCIBus *bus = PCI_HOST_BRIDGE(phb)->bus; + sPAPRFDT s_fdt; /* Start populating the FDT */ - sprintf(nodename, "pci@%" PRIx64, phb->buid); + snprintf(nodename, FDT_NAME_MAX, "pci@%" PRIx64, phb->buid); bus_off = fdt_add_subnode(fdt, 0, nodename); if (bus_off < 0) { return bus_off; @@ -1563,6 +1739,18 @@ int spapr_populate_pci_dt(sPAPRPHBState *phb, tcet->liobn, tcet->bus_offset, tcet->nb_table << tcet->page_shift); + /* Walk the bridges and program the bus numbers*/ + spapr_phb_pci_enumerate(phb); + _FDT(fdt_setprop_cell(fdt, bus_off, "qemu,phb-enumerated", 0x1)); + + /* Populate tree nodes with PCI devices attached */ + s_fdt.fdt = fdt; + s_fdt.node_off = bus_off; + s_fdt.sphb = phb; + pci_for_each_device(bus, pci_bus_num(bus), + spapr_populate_pci_devices_dt, + &s_fdt); + ret = spapr_drc_populate_dt(fdt, bus_off, OBJECT(phb), SPAPR_DR_CONNECTOR_TYPE_PCI); if (ret) { @@ -1631,6 +1819,7 @@ static int spapr_switch_one_vga(DeviceState *dev, void *opaque) void spapr_pci_switch_vga(bool big_endian) { + sPAPRMachineState *spapr = SPAPR_MACHINE(qdev_get_machine()); sPAPRPHBState *sphb; /* diff --git a/hw/ppc/spapr_pci_vfio.c b/hw/ppc/spapr_pci_vfio.c index 99a1be5..cca45ed 100644 --- a/hw/ppc/spapr_pci_vfio.c +++ b/hw/ppc/spapr_pci_vfio.c @@ -19,6 +19,7 @@ #include "hw/ppc/spapr.h" #include "hw/pci-host/spapr.h" +#include "hw/pci/msix.h" #include "linux/vfio.h" #include "hw/vfio/vfio.h" @@ -71,9 +72,26 @@ static void spapr_phb_vfio_finish_realize(sPAPRPHBState *sphb, Error **errp) spapr_tce_get_iommu(tcet)); } +static void spapr_phb_vfio_eeh_reenable(sPAPRPHBVFIOState *svphb) +{ + struct vfio_eeh_pe_op op = { + .argsz = sizeof(op), + .op = VFIO_EEH_PE_ENABLE + }; + + vfio_container_ioctl(&svphb->phb.iommu_as, + svphb->iommugroupid, VFIO_EEH_PE_OP, &op); +} + static void spapr_phb_vfio_reset(DeviceState *qdev) { - /* Do nothing */ + /* + * The PE might be in frozen state. To reenable the EEH + * functionality on it will clean the frozen state, which + * ensures that the contained PCI devices will work properly + * after reboot. + */ + spapr_phb_vfio_eeh_reenable(SPAPR_PCI_VFIO_HOST_BRIDGE(qdev)); } static int spapr_phb_vfio_eeh_set_option(sPAPRPHBState *sphb, @@ -142,6 +160,49 @@ static int spapr_phb_vfio_eeh_get_state(sPAPRPHBState *sphb, int *state) return RTAS_OUT_SUCCESS; } +static void spapr_phb_vfio_eeh_clear_dev_msix(PCIBus *bus, + PCIDevice *pdev, + void *opaque) +{ + /* Check if the device is VFIO PCI device */ + if (!object_dynamic_cast(OBJECT(pdev), "vfio-pci")) { + return; + } + + /* + * The MSIx table will be cleaned out by reset. We need + * disable it so that it can be reenabled properly. Also, + * the cached MSIx table should be cleared as it's not + * reflecting the contents in hardware. + */ + if (msix_enabled(pdev)) { + uint16_t flags; + + flags = pci_host_config_read_common(pdev, + pdev->msix_cap + PCI_MSIX_FLAGS, + pci_config_size(pdev), 2); + flags &= ~PCI_MSIX_FLAGS_ENABLE; + pci_host_config_write_common(pdev, + pdev->msix_cap + PCI_MSIX_FLAGS, + pci_config_size(pdev), flags, 2); + } + + msix_reset(pdev); +} + +static void spapr_phb_vfio_eeh_clear_bus_msix(PCIBus *bus, void *opaque) +{ + pci_for_each_device(bus, pci_bus_num(bus), + spapr_phb_vfio_eeh_clear_dev_msix, NULL); +} + +static void spapr_phb_vfio_eeh_pre_reset(sPAPRPHBState *sphb) +{ + PCIHostState *phb = PCI_HOST_BRIDGE(sphb); + + pci_for_each_bus(phb->bus, spapr_phb_vfio_eeh_clear_bus_msix, NULL); +} + static int spapr_phb_vfio_eeh_reset(sPAPRPHBState *sphb, int option) { sPAPRPHBVFIOState *svphb = SPAPR_PCI_VFIO_HOST_BRIDGE(sphb); @@ -153,9 +214,11 @@ static int spapr_phb_vfio_eeh_reset(sPAPRPHBState *sphb, int option) op.op = VFIO_EEH_PE_RESET_DEACTIVATE; break; case RTAS_SLOT_RESET_HOT: + spapr_phb_vfio_eeh_pre_reset(sphb); op.op = VFIO_EEH_PE_RESET_HOT; break; case RTAS_SLOT_RESET_FUNDAMENTAL: + spapr_phb_vfio_eeh_pre_reset(sphb); op.op = VFIO_EEH_PE_RESET_FUNDAMENTAL; break; default: diff --git a/hw/ppc/spapr_rtas.c b/hw/ppc/spapr_rtas.c index fa28d43..2986f94 100644 --- a/hw/ppc/spapr_rtas.c +++ b/hw/ppc/spapr_rtas.c @@ -29,6 +29,7 @@ #include "sysemu/char.h" #include "hw/qdev.h" #include "sysemu/device_tree.h" +#include "sysemu/cpus.h" #include "hw/ppc/spapr.h" #include "hw/ppc/spapr_vio.h" @@ -47,7 +48,7 @@ do { } while (0) #endif -static sPAPRConfigureConnectorState *spapr_ccs_find(sPAPREnvironment *spapr, +static sPAPRConfigureConnectorState *spapr_ccs_find(sPAPRMachineState *spapr, uint32_t drc_index) { sPAPRConfigureConnectorState *ccs = NULL; @@ -61,14 +62,14 @@ static sPAPRConfigureConnectorState *spapr_ccs_find(sPAPREnvironment *spapr, return ccs; } -static void spapr_ccs_add(sPAPREnvironment *spapr, +static void spapr_ccs_add(sPAPRMachineState *spapr, sPAPRConfigureConnectorState *ccs) { g_assert(!spapr_ccs_find(spapr, ccs->drc_index)); QTAILQ_INSERT_HEAD(&spapr->ccs_list, ccs, next); } -static void spapr_ccs_remove(sPAPREnvironment *spapr, +static void spapr_ccs_remove(sPAPRMachineState *spapr, sPAPRConfigureConnectorState *ccs) { QTAILQ_REMOVE(&spapr->ccs_list, ccs, next); @@ -77,7 +78,7 @@ static void spapr_ccs_remove(sPAPREnvironment *spapr, void spapr_ccs_reset_hook(void *opaque) { - sPAPREnvironment *spapr = opaque; + sPAPRMachineState *spapr = opaque; sPAPRConfigureConnectorState *ccs, *ccs_tmp; QTAILQ_FOREACH_SAFE(ccs, &spapr->ccs_list, next, ccs_tmp) { @@ -85,7 +86,7 @@ void spapr_ccs_reset_hook(void *opaque) } } -static void rtas_display_character(PowerPCCPU *cpu, sPAPREnvironment *spapr, +static void rtas_display_character(PowerPCCPU *cpu, sPAPRMachineState *spapr, uint32_t token, uint32_t nargs, target_ulong args, uint32_t nret, target_ulong rets) @@ -101,7 +102,7 @@ static void rtas_display_character(PowerPCCPU *cpu, sPAPREnvironment *spapr, } } -static void rtas_power_off(PowerPCCPU *cpu, sPAPREnvironment *spapr, +static void rtas_power_off(PowerPCCPU *cpu, sPAPRMachineState *spapr, uint32_t token, uint32_t nargs, target_ulong args, uint32_t nret, target_ulong rets) { @@ -113,7 +114,7 @@ static void rtas_power_off(PowerPCCPU *cpu, sPAPREnvironment *spapr, rtas_st(rets, 0, RTAS_OUT_SUCCESS); } -static void rtas_system_reboot(PowerPCCPU *cpu, sPAPREnvironment *spapr, +static void rtas_system_reboot(PowerPCCPU *cpu, sPAPRMachineState *spapr, uint32_t token, uint32_t nargs, target_ulong args, uint32_t nret, target_ulong rets) @@ -127,7 +128,7 @@ static void rtas_system_reboot(PowerPCCPU *cpu, sPAPREnvironment *spapr, } static void rtas_query_cpu_stopped_state(PowerPCCPU *cpu_, - sPAPREnvironment *spapr, + sPAPRMachineState *spapr, uint32_t token, uint32_t nargs, target_ulong args, uint32_t nret, target_ulong rets) @@ -157,7 +158,7 @@ static void rtas_query_cpu_stopped_state(PowerPCCPU *cpu_, rtas_st(rets, 0, RTAS_OUT_PARAM_ERROR); } -static void rtas_start_cpu(PowerPCCPU *cpu_, sPAPREnvironment *spapr, +static void rtas_start_cpu(PowerPCCPU *cpu_, sPAPRMachineState *spapr, uint32_t token, uint32_t nargs, target_ulong args, uint32_t nret, target_ulong rets) @@ -204,7 +205,7 @@ static void rtas_start_cpu(PowerPCCPU *cpu_, sPAPREnvironment *spapr, rtas_st(rets, 0, RTAS_OUT_PARAM_ERROR); } -static void rtas_stop_self(PowerPCCPU *cpu, sPAPREnvironment *spapr, +static void rtas_stop_self(PowerPCCPU *cpu, sPAPRMachineState *spapr, uint32_t token, uint32_t nargs, target_ulong args, uint32_t nret, target_ulong rets) @@ -227,7 +228,7 @@ static void rtas_stop_self(PowerPCCPU *cpu, sPAPREnvironment *spapr, } static void rtas_ibm_get_system_parameter(PowerPCCPU *cpu, - sPAPREnvironment *spapr, + sPAPRMachineState *spapr, uint32_t token, uint32_t nargs, target_ulong args, uint32_t nret, target_ulong rets) @@ -262,7 +263,7 @@ static void rtas_ibm_get_system_parameter(PowerPCCPU *cpu, } static void rtas_ibm_set_system_parameter(PowerPCCPU *cpu, - sPAPREnvironment *spapr, + sPAPRMachineState *spapr, uint32_t token, uint32_t nargs, target_ulong args, uint32_t nret, target_ulong rets) @@ -282,7 +283,7 @@ static void rtas_ibm_set_system_parameter(PowerPCCPU *cpu, } static void rtas_ibm_os_term(PowerPCCPU *cpu, - sPAPREnvironment *spapr, + sPAPRMachineState *spapr, uint32_t token, uint32_t nargs, target_ulong args, uint32_t nret, target_ulong rets) @@ -294,7 +295,7 @@ static void rtas_ibm_os_term(PowerPCCPU *cpu, rtas_st(rets, 0, ret); } -static void rtas_set_power_level(PowerPCCPU *cpu, sPAPREnvironment *spapr, +static void rtas_set_power_level(PowerPCCPU *cpu, sPAPRMachineState *spapr, uint32_t token, uint32_t nargs, target_ulong args, uint32_t nret, target_ulong rets) @@ -319,7 +320,7 @@ static void rtas_set_power_level(PowerPCCPU *cpu, sPAPREnvironment *spapr, rtas_st(rets, 1, 100); } -static void rtas_get_power_level(PowerPCCPU *cpu, sPAPREnvironment *spapr, +static void rtas_get_power_level(PowerPCCPU *cpu, sPAPRMachineState *spapr, uint32_t token, uint32_t nargs, target_ulong args, uint32_t nret, target_ulong rets) @@ -356,7 +357,7 @@ static bool sensor_type_is_dr(uint32_t sensor_type) return false; } -static void rtas_set_indicator(PowerPCCPU *cpu, sPAPREnvironment *spapr, +static void rtas_set_indicator(PowerPCCPU *cpu, sPAPRMachineState *spapr, uint32_t token, uint32_t nargs, target_ulong args, uint32_t nret, target_ulong rets) @@ -427,7 +428,7 @@ out_unimplemented: rtas_st(rets, 0, RTAS_OUT_NOT_SUPPORTED); } -static void rtas_get_sensor_state(PowerPCCPU *cpu, sPAPREnvironment *spapr, +static void rtas_get_sensor_state(PowerPCCPU *cpu, sPAPRMachineState *spapr, uint32_t token, uint32_t nargs, target_ulong args, uint32_t nret, target_ulong rets) @@ -481,7 +482,7 @@ static void rtas_get_sensor_state(PowerPCCPU *cpu, sPAPREnvironment *spapr, #define CC_WA_LEN 4096 static void rtas_ibm_configure_connector(PowerPCCPU *cpu, - sPAPREnvironment *spapr, + sPAPRMachineState *spapr, uint32_t token, uint32_t nargs, target_ulong args, uint32_t nret, target_ulong rets) @@ -601,7 +602,7 @@ static struct rtas_call { spapr_rtas_fn fn; } rtas_table[RTAS_TOKEN_MAX - RTAS_TOKEN_BASE]; -target_ulong spapr_rtas_call(PowerPCCPU *cpu, sPAPREnvironment *spapr, +target_ulong spapr_rtas_call(PowerPCCPU *cpu, sPAPRMachineState *spapr, uint32_t token, uint32_t nargs, target_ulong args, uint32_t nret, target_ulong rets) { @@ -651,6 +652,8 @@ int spapr_rtas_device_tree_setup(void *fdt, hwaddr rtas_addr, { int ret; int i; + uint32_t lrdr_capacity[5]; + MachineState *machine = MACHINE(qdev_get_machine()); ret = fdt_add_mem_rsv(fdt, rtas_addr, rtas_size); if (ret < 0) { @@ -699,6 +702,19 @@ int spapr_rtas_device_tree_setup(void *fdt, hwaddr rtas_addr, } } + + lrdr_capacity[0] = cpu_to_be32(((uint64_t)machine->maxram_size) >> 32); + lrdr_capacity[1] = cpu_to_be32(machine->maxram_size & 0xffffffff); + lrdr_capacity[2] = 0; + lrdr_capacity[3] = cpu_to_be32(SPAPR_MEMORY_BLOCK_SIZE); + lrdr_capacity[4] = cpu_to_be32(max_cpus/smp_threads); + ret = qemu_fdt_setprop(fdt, "/rtas", "ibm,lrdr-capacity", lrdr_capacity, + sizeof(lrdr_capacity)); + if (ret < 0) { + fprintf(stderr, "Couldn't add ibm,lrdr-capacity rtas property\n"); + return ret; + } + return 0; } diff --git a/hw/ppc/spapr_rtc.c b/hw/ppc/spapr_rtc.c index 9da3746..d20b8f2 100644 --- a/hw/ppc/spapr_rtc.c +++ b/hw/ppc/spapr_rtc.c @@ -76,7 +76,7 @@ int spapr_rtc_import_offset(DeviceState *dev, int64_t legacy_offset) return 0; } -static void rtas_get_time_of_day(PowerPCCPU *cpu, sPAPREnvironment *spapr, +static void rtas_get_time_of_day(PowerPCCPU *cpu, sPAPRMachineState *spapr, uint32_t token, uint32_t nargs, target_ulong args, uint32_t nret, target_ulong rets) @@ -106,7 +106,7 @@ static void rtas_get_time_of_day(PowerPCCPU *cpu, sPAPREnvironment *spapr, rtas_st(rets, 7, ns); } -static void rtas_set_time_of_day(PowerPCCPU *cpu, sPAPREnvironment *spapr, +static void rtas_set_time_of_day(PowerPCCPU *cpu, sPAPRMachineState *spapr, uint32_t token, uint32_t nargs, target_ulong args, uint32_t nret, target_ulong rets) diff --git a/hw/ppc/spapr_vio.c b/hw/ppc/spapr_vio.c index 8b59b64..c51eb8e 100644 --- a/hw/ppc/spapr_vio.c +++ b/hw/ppc/spapr_vio.c @@ -160,7 +160,7 @@ static int vio_make_devnode(VIOsPAPRDevice *dev, /* * CRQ handling */ -static target_ulong h_reg_crq(PowerPCCPU *cpu, sPAPREnvironment *spapr, +static target_ulong h_reg_crq(PowerPCCPU *cpu, sPAPRMachineState *spapr, target_ulong opcode, target_ulong *args) { target_ulong reg = args[0]; @@ -218,7 +218,7 @@ static target_ulong free_crq(VIOsPAPRDevice *dev) return H_SUCCESS; } -static target_ulong h_free_crq(PowerPCCPU *cpu, sPAPREnvironment *spapr, +static target_ulong h_free_crq(PowerPCCPU *cpu, sPAPRMachineState *spapr, target_ulong opcode, target_ulong *args) { target_ulong reg = args[0]; @@ -232,7 +232,7 @@ static target_ulong h_free_crq(PowerPCCPU *cpu, sPAPREnvironment *spapr, return free_crq(dev); } -static target_ulong h_send_crq(PowerPCCPU *cpu, sPAPREnvironment *spapr, +static target_ulong h_send_crq(PowerPCCPU *cpu, sPAPRMachineState *spapr, target_ulong opcode, target_ulong *args) { target_ulong reg = args[0]; @@ -255,7 +255,7 @@ static target_ulong h_send_crq(PowerPCCPU *cpu, sPAPREnvironment *spapr, return H_HARDWARE; } -static target_ulong h_enable_crq(PowerPCCPU *cpu, sPAPREnvironment *spapr, +static target_ulong h_enable_crq(PowerPCCPU *cpu, sPAPRMachineState *spapr, target_ulong opcode, target_ulong *args) { target_ulong reg = args[0]; @@ -333,7 +333,7 @@ void spapr_vio_set_bypass(VIOsPAPRDevice *dev, bool bypass) dev->tcet->bypass = bypass; } -static void rtas_set_tce_bypass(PowerPCCPU *cpu, sPAPREnvironment *spapr, +static void rtas_set_tce_bypass(PowerPCCPU *cpu, sPAPRMachineState *spapr, uint32_t token, uint32_t nargs, target_ulong args, uint32_t nret, target_ulong rets) @@ -364,7 +364,7 @@ static void rtas_set_tce_bypass(PowerPCCPU *cpu, sPAPREnvironment *spapr, rtas_st(rets, 0, RTAS_OUT_SUCCESS); } -static void rtas_quiesce(PowerPCCPU *cpu, sPAPREnvironment *spapr, +static void rtas_quiesce(PowerPCCPU *cpu, sPAPRMachineState *spapr, uint32_t token, uint32_t nargs, target_ulong args, uint32_t nret, target_ulong rets) @@ -426,6 +426,7 @@ static void spapr_vio_busdev_reset(DeviceState *qdev) static void spapr_vio_busdev_realize(DeviceState *qdev, Error **errp) { + sPAPRMachineState *spapr = SPAPR_MACHINE(qdev_get_machine()); VIOsPAPRDevice *dev = (VIOsPAPRDevice *)qdev; VIOsPAPRDeviceClass *pc = VIO_SPAPR_DEVICE_GET_CLASS(dev); char *id; @@ -491,7 +492,7 @@ static void spapr_vio_busdev_realize(DeviceState *qdev, Error **errp) pc->realize(dev, errp); } -static target_ulong h_vio_signal(PowerPCCPU *cpu, sPAPREnvironment *spapr, +static target_ulong h_vio_signal(PowerPCCPU *cpu, sPAPRMachineState *spapr, target_ulong opcode, target_ulong *args) { diff --git a/hw/ppc/virtex_ml507.c b/hw/ppc/virtex_ml507.c index 439732f..de86f7c 100644 --- a/hw/ppc/virtex_ml507.c +++ b/hw/ppc/virtex_ml507.c @@ -197,7 +197,6 @@ static int xilinx_load_device_tree(hwaddr addr, static void virtex_init(MachineState *machine) { ram_addr_t ram_size = machine->ram_size; - const char *cpu_model = machine->cpu_model; const char *kernel_filename = machine->kernel_filename; const char *kernel_cmdline = machine->kernel_cmdline; hwaddr initrd_base = 0; @@ -214,11 +213,11 @@ static void virtex_init(MachineState *machine) int i; /* init CPUs */ - if (cpu_model == NULL) { - cpu_model = "440-Xilinx"; + if (machine->cpu_model == NULL) { + machine->cpu_model = "440-Xilinx"; } - cpu = ppc440_init_xilinx(&ram_size, 1, cpu_model, 400000000); + cpu = ppc440_init_xilinx(&ram_size, 1, machine->cpu_model, 400000000); env = &cpu->env; qemu_register_reset(main_cpu_reset, cpu); diff --git a/include/hw/pci-host/spapr.h b/include/hw/pci-host/spapr.h index 9dca388..5322b56 100644 --- a/include/hw/pci-host/spapr.h +++ b/include/hw/pci-host/spapr.h @@ -119,21 +119,23 @@ struct sPAPRPHBVFIOState { static inline qemu_irq spapr_phb_lsi_qirq(struct sPAPRPHBState *phb, int pin) { + sPAPRMachineState *spapr = SPAPR_MACHINE(qdev_get_machine()); + return xics_get_qirq(spapr->icp, phb->lsi_table[pin].irq); } -PCIHostState *spapr_create_phb(sPAPREnvironment *spapr, int index); +PCIHostState *spapr_create_phb(sPAPRMachineState *spapr, int index); int spapr_populate_pci_dt(sPAPRPHBState *phb, uint32_t xics_phandle, void *fdt); -void spapr_pci_msi_init(sPAPREnvironment *spapr, hwaddr addr); +void spapr_pci_msi_init(sPAPRMachineState *spapr, hwaddr addr); void spapr_pci_rtas_init(void); -sPAPRPHBState *spapr_pci_find_phb(sPAPREnvironment *spapr, uint64_t buid); -PCIDevice *spapr_pci_find_dev(sPAPREnvironment *spapr, uint64_t buid, +sPAPRPHBState *spapr_pci_find_phb(sPAPRMachineState *spapr, uint64_t buid); +PCIDevice *spapr_pci_find_dev(sPAPRMachineState *spapr, uint64_t buid, uint32_t config_addr); #endif /* __HW_SPAPR_PCI_H__ */ diff --git a/include/hw/ppc/spapr.h b/include/hw/ppc/spapr.h index 7b4b1bb..91a61ab 100644 --- a/include/hw/ppc/spapr.h +++ b/include/hw/ppc/spapr.h @@ -2,6 +2,7 @@ #define __HW_SPAPR_H__ #include "sysemu/dma.h" +#include "hw/boards.h" #include "hw/ppc/xics.h" #include "hw/ppc/spapr_drc.h" @@ -12,15 +13,42 @@ typedef struct sPAPRConfigureConnectorState sPAPRConfigureConnectorState; typedef struct sPAPREventLogEntry sPAPREventLogEntry; #define HPTE64_V_HPTE_DIRTY 0x0000000000000040ULL +#define SPAPR_ENTRY_POINT 0x100 + +typedef struct sPAPRMachineClass sPAPRMachineClass; +typedef struct sPAPRMachineState sPAPRMachineState; + +#define TYPE_SPAPR_MACHINE "spapr-machine" +#define SPAPR_MACHINE(obj) \ + OBJECT_CHECK(sPAPRMachineState, (obj), TYPE_SPAPR_MACHINE) +#define SPAPR_MACHINE_GET_CLASS(obj) \ + OBJECT_GET_CLASS(sPAPRMachineClass, obj, TYPE_SPAPR_MACHINE) +#define SPAPR_MACHINE_CLASS(klass) \ + OBJECT_CLASS_CHECK(sPAPRMachineClass, klass, TYPE_SPAPR_MACHINE) + +/** + * sPAPRMachineClass: + */ +struct sPAPRMachineClass { + /*< private >*/ + MachineClass parent_class; + + /*< public >*/ +}; + +/** + * sPAPRMachineState: + */ +struct sPAPRMachineState { + /*< private >*/ + MachineState parent_obj; -typedef struct sPAPREnvironment { struct VIOsPAPRBus *vio_bus; QLIST_HEAD(, sPAPRPHBState) phbs; struct sPAPRNVRAM *nvram; XICSState *icp; DeviceState *rtc; - hwaddr ram_limit; void *htab; uint32_t htab_shift; hwaddr rma_size; @@ -29,7 +57,6 @@ typedef struct sPAPREnvironment { ssize_t rtas_size; void *rtas_blob; void *fdt_skel; - target_ulong entry_point; uint64_t rtc_offset; /* Now used only during incoming migration */ struct PPCTimebase tb; bool has_graphics; @@ -46,7 +73,10 @@ typedef struct sPAPREnvironment { /* RTAS state */ QTAILQ_HEAD(, sPAPRConfigureConnectorState) ccs_list; -} sPAPREnvironment; + + /*< public >*/ + char *kvm_type; +}; #define H_SUCCESS 0 #define H_BUSY 1 /* Hardware busy -- retry later */ @@ -319,8 +349,6 @@ typedef struct sPAPREnvironment { #define KVMPPC_H_CAS (KVMPPC_HCALL_BASE + 0x2) #define KVMPPC_HCALL_MAX KVMPPC_H_CAS -extern sPAPREnvironment *spapr; - typedef struct sPAPRDeviceTreeUpdateHeader { uint32_t version_id; } sPAPRDeviceTreeUpdateHeader; @@ -335,7 +363,7 @@ typedef struct sPAPRDeviceTreeUpdateHeader { do { } while (0) #endif -typedef target_ulong (*spapr_hcall_fn)(PowerPCCPU *cpu, sPAPREnvironment *spapr, +typedef target_ulong (*spapr_hcall_fn)(PowerPCCPU *cpu, sPAPRMachineState *sm, target_ulong opcode, target_ulong *args); @@ -490,12 +518,12 @@ static inline void rtas_st_buffer(target_ulong phys, target_ulong phys_len, rtas_st_buffer_direct(phys + 2, phys_len - 2, buffer, buffer_len); } -typedef void (*spapr_rtas_fn)(PowerPCCPU *cpu, sPAPREnvironment *spapr, +typedef void (*spapr_rtas_fn)(PowerPCCPU *cpu, sPAPRMachineState *sm, uint32_t token, uint32_t nargs, target_ulong args, uint32_t nret, target_ulong rets); void spapr_rtas_register(int token, const char *name, spapr_rtas_fn fn); -target_ulong spapr_rtas_call(PowerPCCPU *cpu, sPAPREnvironment *spapr, +target_ulong spapr_rtas_call(PowerPCCPU *cpu, sPAPRMachineState *sm, uint32_t token, uint32_t nargs, target_ulong args, uint32_t nret, target_ulong rets); int spapr_rtas_device_tree_setup(void *fdt, hwaddr rtas_addr, @@ -546,9 +574,10 @@ struct sPAPREventLogEntry { QTAILQ_ENTRY(sPAPREventLogEntry) next; }; -void spapr_events_init(sPAPREnvironment *spapr); +void spapr_events_init(sPAPRMachineState *sm); void spapr_events_fdt_skel(void *fdt, uint32_t epow_irq); -int spapr_h_cas_compose_response(target_ulong addr, target_ulong size); +int spapr_h_cas_compose_response(sPAPRMachineState *sm, + target_ulong addr, target_ulong size); sPAPRTCETable *spapr_tce_new_table(DeviceState *owner, uint32_t liobn, uint64_t bus_offset, uint32_t page_shift, @@ -578,4 +607,6 @@ void spapr_ccs_reset_hook(void *opaque); void spapr_rtc_read(DeviceState *dev, struct tm *tm, uint32_t *ns); int spapr_rtc_import_offset(DeviceState *dev, int64_t legacy_offset); +#define SPAPR_MEMORY_BLOCK_SIZE (1 << 28) /* 256MB */ + #endif /* !defined (__HW_SPAPR_H__) */ diff --git a/include/hw/ppc/spapr_vio.h b/include/hw/ppc/spapr_vio.h index f95016a..2299a54 100644 --- a/include/hw/ppc/spapr_vio.h +++ b/include/hw/ppc/spapr_vio.h @@ -88,6 +88,8 @@ extern int spapr_vio_signal(VIOsPAPRDevice *dev, target_ulong mode); static inline qemu_irq spapr_vio_qirq(VIOsPAPRDevice *dev) { + sPAPRMachineState *spapr = SPAPR_MACHINE(qdev_get_machine()); + return xics_get_qirq(spapr->icp, dev->irq); } @@ -126,7 +128,7 @@ static inline int spapr_vio_dma_set(VIOsPAPRDevice *dev, uint64_t taddr, int spapr_vio_send_crq(VIOsPAPRDevice *dev, uint8_t *crq); -VIOsPAPRDevice *vty_lookup(sPAPREnvironment *spapr, target_ulong reg); +VIOsPAPRDevice *vty_lookup(sPAPRMachineState *spapr, target_ulong reg); void vty_putchars(VIOsPAPRDevice *sdev, uint8_t *buf, int len); void spapr_vty_create(VIOsPAPRBus *bus, CharDriverState *chardev); void spapr_vlan_create(VIOsPAPRBus *bus, NICInfo *nd); diff --git a/include/hw/ppc/xics.h b/include/hw/ppc/xics.h index a214dd7..355a966 100644 --- a/include/hw/ppc/xics.h +++ b/include/hw/ppc/xics.h @@ -109,6 +109,7 @@ struct ICPState { uint8_t pending_priority; uint8_t mfrr; qemu_irq output; + bool cap_irq_xics_enabled; }; #define TYPE_ICS "ics" diff --git a/include/qom/cpu.h b/include/qom/cpu.h index 39f0f19..42f42f5 100644 --- a/include/qom/cpu.h +++ b/include/qom/cpu.h @@ -323,6 +323,8 @@ extern struct CPUTailQ cpus; #define CPU_FOREACH(cpu) QTAILQ_FOREACH(cpu, &cpus, node) #define CPU_FOREACH_SAFE(cpu, next_cpu) \ QTAILQ_FOREACH_SAFE(cpu, &cpus, node, next_cpu) +#define CPU_FOREACH_REVERSE(cpu) \ + QTAILQ_FOREACH_REVERSE(cpu, &cpus, CPUTailQ, node) #define first_cpu QTAILQ_FIRST(&cpus) DECLARE_TLS(CPUState *, current_cpu); diff --git a/linux-user/main.c b/linux-user/main.c index c855bcc..6c5c2ef 100644 --- a/linux-user/main.c +++ b/linux-user/main.c @@ -1424,8 +1424,7 @@ void cpu_loop (CPUSPARCState *env) #ifdef TARGET_PPC static inline uint64_t cpu_ppc_get_tb(CPUPPCState *env) { - /* TO FIX */ - return 0; + return cpu_get_real_ticks(); } uint64_t cpu_ppc_load_tbl(CPUPPCState *env) diff --git a/pc-bios/README b/pc-bios/README index 63e7254..05cf042 100644 --- a/pc-bios/README +++ b/pc-bios/README @@ -17,7 +17,7 @@ - SLOF (Slimline Open Firmware) is a free IEEE 1275 Open Firmware implementation for certain IBM POWER hardware. The sources are at https://github.com/aik/SLOF, and the image currently in qemu is - built from git tag qemu-slof-20150313. + built from git tag qemu-slof-20150429. - sgabios (the Serial Graphics Adapter option ROM) provides a means for legacy x86 software to communicate with an attached serial console as diff --git a/pc-bios/slof.bin b/pc-bios/slof.bin Binary files differindex ab72cba..0398ac6 100644 --- a/pc-bios/slof.bin +++ b/pc-bios/slof.bin diff --git a/roms/SLOF b/roms/SLOF -Subproject c89b0df661c0a6bfa9ff0ed4a371f631f5ee38b +Subproject 7d766a3ac9b2474f6c7da0084d43590cbbf047b diff --git a/target-ppc/kvm.c b/target-ppc/kvm.c index ddf469f..110436d 100644 --- a/target-ppc/kvm.c +++ b/target-ppc/kvm.c @@ -40,6 +40,7 @@ #include "trace.h" #include "exec/gdbstub.h" #include "exec/memattrs.h" +#include "sysemu/hostmem.h" //#define DEBUG_KVM @@ -303,16 +304,11 @@ static void kvm_get_smmu_info(PowerPCCPU *cpu, struct kvm_ppc_smmu_info *info) kvm_get_fallback_smmu_info(cpu, info); } -static long getrampagesize(void) +static long gethugepagesize(const char *mem_path) { struct statfs fs; int ret; - if (!mem_path) { - /* guest RAM is backed by normal anonymous pages */ - return getpagesize(); - } - do { ret = statfs(mem_path, &fs); } while (ret != 0 && errno == EINTR); @@ -334,6 +330,55 @@ static long getrampagesize(void) return fs.f_bsize; } +static int find_max_supported_pagesize(Object *obj, void *opaque) +{ + char *mem_path; + long *hpsize_min = opaque; + + if (object_dynamic_cast(obj, TYPE_MEMORY_BACKEND)) { + mem_path = object_property_get_str(obj, "mem-path", NULL); + if (mem_path) { + long hpsize = gethugepagesize(mem_path); + if (hpsize < *hpsize_min) { + *hpsize_min = hpsize; + } + } else { + *hpsize_min = getpagesize(); + } + } + + return 0; +} + +static long getrampagesize(void) +{ + long hpsize = LONG_MAX; + Object *memdev_root; + + if (mem_path) { + return gethugepagesize(mem_path); + } + + /* it's possible we have memory-backend objects with + * hugepage-backed RAM. these may get mapped into system + * address space via -numa parameters or memory hotplug + * hooks. we want to take these into account, but we + * also want to make sure these supported hugepage + * sizes are applicable across the entire range of memory + * we may boot from, so we take the min across all + * backends, and assume normal pages in cases where a + * backend isn't backed by hugepages. + */ + memdev_root = object_resolve_path("/objects", NULL); + if (!memdev_root) { + return getpagesize(); + } + + object_child_foreach(memdev_root, find_max_supported_pagesize, &hpsize); + + return (hpsize == LONG_MAX) ? getpagesize() : hpsize; +} + static bool kvm_valid_page_size(uint32_t flags, long rampgsize, uint32_t shift) { if (!(flags & KVM_PPC_PAGE_SIZES_REAL)) { |