From 277c7a4d717aedbcb253ca152ae4da67e4162470 Mon Sep 17 00:00:00 2001 From: Alexander Graf Date: Thu, 24 Jul 2014 10:46:47 +0200 Subject: PPC: KVM: Fix g3beige and mac99 when HV is loaded On PPC we have 2 different styles of KVM: PR and HV. HV can only virtualize sPAPR guests while PR can virtualize everything that's reasonably close to the host hardware platform. As long as only one kernel module (PR or HV) is loaded, the "default" kvm type is the module that's loaded. So if your hardware only supports PR mode you can easily spawn a Mac VM. However, if both HV and PR are loaded we default to HV mode. And in that case the Mac machines have to explicitly ask for PR mode to get a working VM. Fix this up by explicitly having the Mac machines ask for PR style KVM. This fixes bootup of Mac VMs on systems where bot HV and PR kvm modules are loaded for me. Signed-off-by: Alexander Graf --- hw/ppc/mac_newworld.c | 7 +++++++ hw/ppc/mac_oldworld.c | 7 +++++++ 2 files changed, 14 insertions(+) (limited to 'hw') diff --git a/hw/ppc/mac_newworld.c b/hw/ppc/mac_newworld.c index 1ec4bb4..0693168 100644 --- a/hw/ppc/mac_newworld.c +++ b/hw/ppc/mac_newworld.c @@ -477,12 +477,19 @@ static void ppc_core99_init(MachineState *machine) qemu_register_boot_set(fw_cfg_boot_set, fw_cfg); } +static int core99_kvm_type(const char *arg) +{ + /* Always force PR KVM */ + return 2; +} + static QEMUMachine core99_machine = { .name = "mac99", .desc = "Mac99 based PowerMAC", .init = ppc_core99_init, .max_cpus = MAX_CPUS, .default_boot_order = "cd", + .kvm_type = core99_kvm_type, }; static void core99_machine_init(void) diff --git a/hw/ppc/mac_oldworld.c b/hw/ppc/mac_oldworld.c index cd9bdbc..ec7ed38 100644 --- a/hw/ppc/mac_oldworld.c +++ b/hw/ppc/mac_oldworld.c @@ -346,6 +346,12 @@ static void ppc_heathrow_init(MachineState *machine) qemu_register_boot_set(fw_cfg_boot_set, fw_cfg); } +static int heathrow_kvm_type(const char *arg) +{ + /* Always force PR KVM */ + return 2; +} + static QEMUMachine heathrow_machine = { .name = "g3beige", .desc = "Heathrow based PowerMAC", @@ -355,6 +361,7 @@ static QEMUMachine heathrow_machine = { .is_default = 1, #endif .default_boot_order = "cd", /* TOFIX "cad" when Mac floppy is implemented */ + .kvm_type = heathrow_kvm_type, }; static void heathrow_machine_init(void) -- cgit v1.1 From 2e14072f9e859272c7b94b8e189bd30bb4954aa1 Mon Sep 17 00:00:00 2001 From: Nikunj A Dadhania Date: Mon, 30 Jun 2014 14:05:29 +0530 Subject: ppc: spapr-rtas - implement os-term rtas call PAPR compliant guest calls this in absence of kdump. This finally reaches the guest and can be handled according to the policies set by higher level tools(like taking dump) for further analysis by tools like crash. Linux kernel calls ibm,os-term when extended property of os-term is set. This makes sure that a return to the linux kernel is gauranteed. Signed-off-by: Nikunj A Dadhania [agraf: reduce RTAS_TOKEN_MAX] Signed-off-by: Alexander Graf --- hw/ppc/spapr.c | 9 +++++++++ hw/ppc/spapr_rtas.c | 15 +++++++++++++++ 2 files changed, 24 insertions(+) (limited to 'hw') diff --git a/hw/ppc/spapr.c b/hw/ppc/spapr.c index 5cb452f..6bb646c 100644 --- a/hw/ppc/spapr.c +++ b/hw/ppc/spapr.c @@ -502,6 +502,15 @@ static void *spapr_create_fdt_skel(hwaddr initrd_base, _FDT((fdt_property_cell(fdt, "rtas-error-log-max", RTAS_ERROR_LOG_MAX))); + /* + * According to PAPR, rtas ibm,os-term, does not gaurantee a return + * back to the guest cpu. + * + * While an additional ibm,extended-os-term property indicates that + * rtas call return will always occur. Set this property. + */ + _FDT((fdt_property(fdt, "ibm,extended-os-term", NULL, 0))); + _FDT((fdt_end_node(fdt))); /* interrupt controller */ diff --git a/hw/ppc/spapr_rtas.c b/hw/ppc/spapr_rtas.c index 9ba1ba6..2ec2a8e 100644 --- a/hw/ppc/spapr_rtas.c +++ b/hw/ppc/spapr_rtas.c @@ -277,6 +277,19 @@ static void rtas_ibm_set_system_parameter(PowerPCCPU *cpu, rtas_st(rets, 0, ret); } +static void rtas_ibm_os_term(PowerPCCPU *cpu, + sPAPREnvironment *spapr, + uint32_t token, uint32_t nargs, + target_ulong args, + uint32_t nret, target_ulong rets) +{ + target_ulong ret = 0; + + qapi_event_send_guest_panicked(GUEST_PANIC_ACTION_PAUSE, &error_abort); + + rtas_st(rets, 0, ret); +} + static struct rtas_call { const char *name; spapr_rtas_fn fn; @@ -404,6 +417,8 @@ static void core_rtas_register_types(void) spapr_rtas_register(RTAS_IBM_SET_SYSTEM_PARAMETER, "ibm,set-system-parameter", rtas_ibm_set_system_parameter); + spapr_rtas_register(RTAS_IBM_OS_TERM, "ibm,os-term", + rtas_ibm_os_term); } type_init(core_rtas_register_types) -- cgit v1.1 From 7d0cd464a756f3d47f308d7c47eb888b573a9fe4 Mon Sep 17 00:00:00 2001 From: Peter Maydell Date: Tue, 8 Jul 2014 16:02:26 +0100 Subject: hw/ppc/spapr_hcall.c: Fix typo in function names Fix a typo in the names of a couple of functions (s/resouce/resource/). Signed-off-by: Peter Maydell Signed-off-by: Alexander Graf --- hw/ppc/spapr_hcall.c | 22 +++++++++++----------- 1 file changed, 11 insertions(+), 11 deletions(-) (limited to 'hw') diff --git a/hw/ppc/spapr_hcall.c b/hw/ppc/spapr_hcall.c index 467858c..8651447 100644 --- a/hw/ppc/spapr_hcall.c +++ b/hw/ppc/spapr_hcall.c @@ -712,10 +712,10 @@ static target_ulong h_logical_dcbf(PowerPCCPU *cpu, sPAPREnvironment *spapr, return H_SUCCESS; } -static target_ulong h_set_mode_resouce_le(PowerPCCPU *cpu, - target_ulong mflags, - target_ulong value1, - target_ulong value2) +static target_ulong h_set_mode_resource_le(PowerPCCPU *cpu, + target_ulong mflags, + target_ulong value1, + target_ulong value2) { CPUState *cs; @@ -743,10 +743,10 @@ static target_ulong h_set_mode_resouce_le(PowerPCCPU *cpu, return H_UNSUPPORTED_FLAG; } -static target_ulong h_set_mode_resouce_addr_trans_mode(PowerPCCPU *cpu, - target_ulong mflags, - target_ulong value1, - target_ulong value2) +static target_ulong h_set_mode_resource_addr_trans_mode(PowerPCCPU *cpu, + target_ulong mflags, + target_ulong value1, + target_ulong value2) { CPUState *cs; PowerPCCPUClass *pcc = POWERPC_CPU_GET_CLASS(cpu); @@ -794,11 +794,11 @@ static target_ulong h_set_mode(PowerPCCPU *cpu, sPAPREnvironment *spapr, switch (resource) { case H_SET_MODE_RESOURCE_LE: - ret = h_set_mode_resouce_le(cpu, args[0], args[2], args[3]); + ret = h_set_mode_resource_le(cpu, args[0], args[2], args[3]); break; case H_SET_MODE_RESOURCE_ADDR_TRANS_MODE: - ret = h_set_mode_resouce_addr_trans_mode(cpu, args[0], - args[2], args[3]); + ret = h_set_mode_resource_addr_trans_mode(cpu, args[0], + args[2], args[3]); break; } -- cgit v1.1 From ef9514431d33e52eb611f799670ca86618c1b7d9 Mon Sep 17 00:00:00 2001 From: Nikunj A Dadhania Date: Wed, 9 Jul 2014 16:08:37 +0530 Subject: spapr: add uuid/host details to device tree Useful for identifying the guest/host uniquely within the guest. Adding following properties to the guest root node. vm,uuid - uuid of the guest host-model - Host model number host-serial - Host machine serial number hypervisor type - Tells its "kvm" Signed-off-by: Nikunj A Dadhania Signed-off-by: Alexander Graf --- hw/ppc/spapr.c | 28 ++++++++++++++++++++++++++++ 1 file changed, 28 insertions(+) (limited to 'hw') diff --git a/hw/ppc/spapr.c b/hw/ppc/spapr.c index 6bb646c..0adea31 100644 --- a/hw/ppc/spapr.c +++ b/hw/ppc/spapr.c @@ -319,6 +319,7 @@ static void *spapr_create_fdt_skel(hwaddr initrd_base, QemuOpts *opts = qemu_opts_find(qemu_find_opts("smp-opts"), NULL); unsigned sockets = opts ? qemu_opt_get_number(opts, "sockets", 0) : 0; uint32_t cpus_per_socket = sockets ? (smp_cpus / sockets) : 1; + char *buf; add_str(hypertas, "hcall-pft"); add_str(hypertas, "hcall-term"); @@ -348,6 +349,33 @@ static void *spapr_create_fdt_skel(hwaddr initrd_base, _FDT((fdt_property_string(fdt, "model", "IBM pSeries (emulated by qemu)"))); _FDT((fdt_property_string(fdt, "compatible", "qemu,pseries"))); + if (kvm_enabled()) { + _FDT((fdt_property_string(fdt, "hypervisor", "kvm"))); + } + + /* + * Add info to guest to indentify which host is it being run on + * and what is the uuid of the guest + */ + if (kvmppc_get_host_model(&buf)) { + _FDT((fdt_property_string(fdt, "host-model", buf))); + g_free(buf); + } + if (kvmppc_get_host_serial(&buf)) { + _FDT((fdt_property_string(fdt, "host-serial", buf))); + g_free(buf); + } + + buf = g_strdup_printf(UUID_FMT, qemu_uuid[0], qemu_uuid[1], + qemu_uuid[2], qemu_uuid[3], qemu_uuid[4], + qemu_uuid[5], qemu_uuid[6], qemu_uuid[7], + qemu_uuid[8], qemu_uuid[9], qemu_uuid[10], + qemu_uuid[11], qemu_uuid[12], qemu_uuid[13], + qemu_uuid[14], qemu_uuid[15]); + + _FDT((fdt_property_string(fdt, "vm,uuid", buf))); + g_free(buf); + _FDT((fdt_property_cell(fdt, "#address-cells", 0x2))); _FDT((fdt_property_cell(fdt, "#size-cells", 0x2))); -- cgit v1.1 From 261265cc912b375649fcdf7aded0f87359dba544 Mon Sep 17 00:00:00 2001 From: Alexander Graf Date: Fri, 11 Jul 2014 03:24:39 +0200 Subject: PPC: mac99: Move NVRAM to page boundary when necessary When running KVM we have to adhere to host page boundaries for memory slots. Unfortunately the NVRAM on mac99 is a 4k RAM hole inside of an MMIO flash area. So if our host is configured with 64k page size, we can't use the mac99 target with KVM. This is a real shame, as this limitation is not really an issue - we can easily map NVRAM somewhere else and at least Linux and Mac OS X use it at their new location. So in that emergency case when it's about failing to run at all and moving NVRAM to a place it shouldn't be at, choose the latter. This patch enables -M mac99 with KVM on 64k page size hosts. Signed-off-by: Alexander Graf --- hw/ppc/mac_newworld.c | 11 ++++++++++- 1 file changed, 10 insertions(+), 1 deletion(-) (limited to 'hw') diff --git a/hw/ppc/mac_newworld.c b/hw/ppc/mac_newworld.c index 0693168..26067b4 100644 --- a/hw/ppc/mac_newworld.c +++ b/hw/ppc/mac_newworld.c @@ -176,6 +176,7 @@ static void ppc_core99_init(MachineState *machine) SysBusDevice *s; DeviceState *dev; int *token = g_new(int, 1); + hwaddr nvram_addr = 0xFFF04000; linux_boot = (kernel_filename != NULL); @@ -426,11 +427,18 @@ static void ppc_core99_init(MachineState *machine) } /* The NewWorld NVRAM is not located in the MacIO device */ +#ifdef CONFIG_KVM + if (kvm_enabled() && getpagesize() > 4096) { + /* We can't combine read-write and read-only in a single page, so + move the NVRAM out of ROM again for KVM */ + nvram_addr = 0xFFE00000; + } +#endif dev = qdev_create(NULL, TYPE_MACIO_NVRAM); qdev_prop_set_uint32(dev, "size", 0x2000); qdev_prop_set_uint32(dev, "it_shift", 1); qdev_init_nofail(dev); - sysbus_mmio_map(SYS_BUS_DEVICE(dev), 0, 0xFFF04000); + sysbus_mmio_map(SYS_BUS_DEVICE(dev), 0, nvram_addr); nvr = MACIO_NVRAM(dev); pmac_format_nvram_partition(nvr, 0x2000); /* No PCI init: the BIOS will do it */ @@ -473,6 +481,7 @@ static void ppc_core99_init(MachineState *machine) /* Mac OS X requires a "known good" clock-frequency value; pass it one. */ fw_cfg_add_i32(fw_cfg, FW_CFG_PPC_CLOCKFREQ, CLOCKFREQ); fw_cfg_add_i32(fw_cfg, FW_CFG_PPC_BUSFREQ, BUSFREQ); + fw_cfg_add_i32(fw_cfg, FW_CFG_PPC_NVRAM_ADDR, nvram_addr); qemu_register_boot_set(fw_cfg_boot_set, fw_cfg); } -- cgit v1.1 From a21a7a701252717f05defee8a1a33d72c28fabb7 Mon Sep 17 00:00:00 2001 From: Gonglei Date: Sat, 26 Jul 2014 12:45:33 +0800 Subject: spapr: fix possible memory leak get_boot_devices_list() will malloc memory, spapr_finalize_fdt doesn't free it. Signed-off-by: Chenliang Signed-off-by: Gonglei Signed-off-by: Alexander Graf --- hw/ppc/spapr.c | 1 + 1 file changed, 1 insertion(+) (limited to 'hw') diff --git a/hw/ppc/spapr.c b/hw/ppc/spapr.c index 0adea31..522ee27 100644 --- a/hw/ppc/spapr.c +++ b/hw/ppc/spapr.c @@ -783,6 +783,7 @@ static void spapr_finalize_fdt(sPAPREnvironment *spapr, cpu_physical_memory_write(fdt_addr, fdt, fdt_totalsize(fdt)); + g_free(bootlist); g_free(fdt); } -- cgit v1.1 From 26a8c353bf0ffb485f4a68bea97efcef7d2bbaa3 Mon Sep 17 00:00:00 2001 From: Alexey Kardashevskiy Date: Thu, 3 Jul 2014 13:10:02 +1000 Subject: spapr: Move DT memory node rendering to a helper This moves recurring bits of code related to memory@xxx nodes creation to a helper. This makes use of the new helper for node@0. Signed-off-by: Alexey Kardashevskiy Signed-off-by: Alexander Graf --- hw/ppc/spapr.c | 48 ++++++++++++++++++++++++++++-------------------- 1 file changed, 28 insertions(+), 20 deletions(-) (limited to 'hw') diff --git a/hw/ppc/spapr.c b/hw/ppc/spapr.c index 522ee27..28c8578 100644 --- a/hw/ppc/spapr.c +++ b/hw/ppc/spapr.c @@ -634,6 +634,31 @@ int spapr_h_cas_compose_response(target_ulong addr, target_ulong size) return 0; } +static void spapr_populate_memory_node(void *fdt, int nodeid, hwaddr start, + hwaddr size) +{ + uint32_t associativity[] = { + cpu_to_be32(0x4), /* length */ + cpu_to_be32(0x0), cpu_to_be32(0x0), + cpu_to_be32(nodeid), cpu_to_be32(nodeid) + }; + char mem_name[32]; + uint64_t mem_reg_property[2]; + int off; + + mem_reg_property[0] = cpu_to_be64(start); + mem_reg_property[1] = cpu_to_be64(size); + + sprintf(mem_name, "memory@" TARGET_FMT_lx, start); + off = fdt_add_subnode(fdt, 0, mem_name); + _FDT(off); + _FDT((fdt_setprop_string(fdt, off, "device_type", "memory"))); + _FDT((fdt_setprop(fdt, off, "reg", mem_reg_property, + sizeof(mem_reg_property)))); + _FDT((fdt_setprop(fdt, off, "ibm,associativity", associativity, + sizeof(associativity)))); +} + static int spapr_populate_memory(sPAPREnvironment *spapr, void *fdt) { uint32_t associativity[] = {cpu_to_be32(0x4), cpu_to_be32(0x0), @@ -652,29 +677,12 @@ static int spapr_populate_memory(sPAPREnvironment *spapr, void *fdt) } /* RMA */ - mem_reg_property[0] = 0; - mem_reg_property[1] = cpu_to_be64(spapr->rma_size); - off = fdt_add_subnode(fdt, 0, "memory@0"); - _FDT(off); - _FDT((fdt_setprop_string(fdt, off, "device_type", "memory"))); - _FDT((fdt_setprop(fdt, off, "reg", mem_reg_property, - sizeof(mem_reg_property)))); - _FDT((fdt_setprop(fdt, off, "ibm,associativity", associativity, - sizeof(associativity)))); + spapr_populate_memory_node(fdt, 0, 0, spapr->rma_size); /* RAM: Node 0 */ if (node0_size > spapr->rma_size) { - mem_reg_property[0] = cpu_to_be64(spapr->rma_size); - mem_reg_property[1] = cpu_to_be64(node0_size - spapr->rma_size); - - sprintf(mem_name, "memory@" TARGET_FMT_lx, spapr->rma_size); - off = fdt_add_subnode(fdt, 0, mem_name); - _FDT(off); - _FDT((fdt_setprop_string(fdt, off, "device_type", "memory"))); - _FDT((fdt_setprop(fdt, off, "reg", mem_reg_property, - sizeof(mem_reg_property)))); - _FDT((fdt_setprop(fdt, off, "ibm,associativity", associativity, - sizeof(associativity)))); + spapr_populate_memory_node(fdt, 0, spapr->rma_size, + node0_size - spapr->rma_size); } /* RAM: Node 1 and beyond */ -- cgit v1.1 From 81014ac2b88b5fd275c33b463efe306668e920ed Mon Sep 17 00:00:00 2001 From: Alexey Kardashevskiy Date: Thu, 3 Jul 2014 13:10:03 +1000 Subject: spapr: Use DT memory node rendering helper for other nodes This finishes refactoring by using the spapr_populate_memory_node helper for all nodes and removing leftovers from spapr_populate_memory(). This is not a part of the previous patch because the patches look nicer apart. Signed-off-by: Alexey Kardashevskiy Signed-off-by: Alexander Graf --- hw/ppc/spapr.c | 19 ++----------------- 1 file changed, 2 insertions(+), 17 deletions(-) (limited to 'hw') diff --git a/hw/ppc/spapr.c b/hw/ppc/spapr.c index 28c8578..9b9b6c4 100644 --- a/hw/ppc/spapr.c +++ b/hw/ppc/spapr.c @@ -661,13 +661,8 @@ static void spapr_populate_memory_node(void *fdt, int nodeid, hwaddr start, static int spapr_populate_memory(sPAPREnvironment *spapr, void *fdt) { - uint32_t associativity[] = {cpu_to_be32(0x4), cpu_to_be32(0x0), - cpu_to_be32(0x0), cpu_to_be32(0x0), - cpu_to_be32(0x0)}; - char mem_name[32]; hwaddr node0_size, mem_start, node_size; - uint64_t mem_reg_property[2]; - int i, off; + int i; /* memory node(s) */ if (nb_numa_nodes > 1 && numa_info[0].node_mem < ram_size) { @@ -688,7 +683,6 @@ static int spapr_populate_memory(sPAPREnvironment *spapr, void *fdt) /* RAM: Node 1 and beyond */ mem_start = node0_size; for (i = 1; i < nb_numa_nodes; i++) { - mem_reg_property[0] = cpu_to_be64(mem_start); if (mem_start >= ram_size) { node_size = 0; } else { @@ -697,16 +691,7 @@ static int spapr_populate_memory(sPAPREnvironment *spapr, void *fdt) node_size = ram_size - mem_start; } } - mem_reg_property[1] = cpu_to_be64(node_size); - associativity[3] = associativity[4] = cpu_to_be32(i); - sprintf(mem_name, "memory@" TARGET_FMT_lx, mem_start); - off = fdt_add_subnode(fdt, 0, mem_name); - _FDT(off); - _FDT((fdt_setprop_string(fdt, off, "device_type", "memory"))); - _FDT((fdt_setprop(fdt, off, "reg", mem_reg_property, - sizeof(mem_reg_property)))); - _FDT((fdt_setprop(fdt, off, "ibm,associativity", associativity, - sizeof(associativity)))); + spapr_populate_memory_node(fdt, i, mem_start, node_size); mem_start += node_size; } -- cgit v1.1 From 7db8a127e373e468d1f61e46e01e50d1aa33e827 Mon Sep 17 00:00:00 2001 From: Alexey Kardashevskiy Date: Thu, 3 Jul 2014 13:10:04 +1000 Subject: spapr: Refactor spapr_populate_memory() to allow memoryless nodes Current QEMU does not support memoryless NUMA nodes, however actual hardware may have them so it makes sense to have a way to emulate them in QEMU. This prepares SPAPR for that. This moves 2 calls of spapr_populate_memory_node() into the existing loop over numa nodes so first several nodes may have no memory and this still will work. If there is no numa configuration, the code assumes there is just a single node at 0 and it has all the guest memory. Signed-off-by: Alexey Kardashevskiy Signed-off-by: Alexander Graf --- hw/ppc/spapr.c | 40 ++++++++++++++++++++-------------------- 1 file changed, 20 insertions(+), 20 deletions(-) (limited to 'hw') diff --git a/hw/ppc/spapr.c b/hw/ppc/spapr.c index 9b9b6c4..718a201 100644 --- a/hw/ppc/spapr.c +++ b/hw/ppc/spapr.c @@ -661,36 +661,36 @@ static void spapr_populate_memory_node(void *fdt, int nodeid, hwaddr start, static int spapr_populate_memory(sPAPREnvironment *spapr, void *fdt) { - hwaddr node0_size, mem_start, node_size; - int i; - - /* memory node(s) */ - if (nb_numa_nodes > 1 && numa_info[0].node_mem < ram_size) { - node0_size = numa_info[0].node_mem; - } else { - node0_size = ram_size; - } + hwaddr mem_start, node_size; + int i, nb_nodes = nb_numa_nodes; + NodeInfo *nodes = numa_info; + NodeInfo ramnode; - /* RMA */ - spapr_populate_memory_node(fdt, 0, 0, spapr->rma_size); - - /* RAM: Node 0 */ - if (node0_size > spapr->rma_size) { - spapr_populate_memory_node(fdt, 0, spapr->rma_size, - node0_size - spapr->rma_size); + /* No NUMA nodes, assume there is just one node with whole RAM */ + if (!nb_numa_nodes) { + nb_nodes = 1; + ramnode.node_mem = ram_size; + nodes = &ramnode; } - /* RAM: Node 1 and beyond */ - mem_start = node0_size; - for (i = 1; i < nb_numa_nodes; i++) { + for (i = 0, mem_start = 0; i < nb_nodes; ++i) { + if (!nodes[i].node_mem) { + continue; + } if (mem_start >= ram_size) { node_size = 0; } else { - node_size = numa_info[i].node_mem; + node_size = nodes[i].node_mem; if (node_size > ram_size - mem_start) { node_size = ram_size - mem_start; } } + if (!mem_start) { + /* ppc_spapr_init() checks for rma_size <= node0_size already */ + spapr_populate_memory_node(fdt, i, 0, spapr->rma_size); + mem_start += spapr->rma_size; + node_size -= spapr->rma_size; + } spapr_populate_memory_node(fdt, i, mem_start, node_size); mem_start += node_size; } -- cgit v1.1 From 6010818c30ce9c796b4e22fd261fc6fea1cecbfc Mon Sep 17 00:00:00 2001 From: Alexey Kardashevskiy Date: Thu, 3 Jul 2014 13:10:05 +1000 Subject: spapr: Split memory nodes to power-of-two blocks Linux kernel expects nodes to have power-of-two size and does WARN_ON if this is not the case: [ 0.041456] WARNING: at drivers/base/memory.c:115 which is: === /* Validate blk_sz is a power of 2 and not less than section size */ if ((block_sz & (block_sz - 1)) || (block_sz < MIN_MEMORY_BLOCK_SIZE)) { WARN_ON(1); block_sz = MIN_MEMORY_BLOCK_SIZE; } === This splits memory nodes into set of smaller blocks with a size which is a power of two. This makes sure the start address of every node is aligned to the node size. Signed-off-by: Alexey Kardashevskiy [agraf: squash windows compile fix in] Signed-off-by: Alexander Graf --- hw/ppc/spapr.c | 14 ++++++++++++-- 1 file changed, 12 insertions(+), 2 deletions(-) (limited to 'hw') diff --git a/hw/ppc/spapr.c b/hw/ppc/spapr.c index 718a201..f2fa11e 100644 --- a/hw/ppc/spapr.c +++ b/hw/ppc/spapr.c @@ -691,8 +691,18 @@ static int spapr_populate_memory(sPAPREnvironment *spapr, void *fdt) mem_start += spapr->rma_size; node_size -= spapr->rma_size; } - spapr_populate_memory_node(fdt, i, mem_start, node_size); - mem_start += node_size; + for ( ; node_size; ) { + hwaddr sizetmp = pow2floor(node_size); + + /* mem_start != 0 here */ + if (ctzl(mem_start) < ctzl(sizetmp)) { + sizetmp = 1ULL << ctzl(mem_start); + } + + spapr_populate_memory_node(fdt, i, mem_start, sizetmp); + node_size -= sizetmp; + mem_start += sizetmp; + } } return 0; -- cgit v1.1 From b082d65a30078d176f8d1fbb3b99e1449fa2fcff Mon Sep 17 00:00:00 2001 From: Alexey Kardashevskiy Date: Thu, 3 Jul 2014 13:10:06 +1000 Subject: spapr: Add a helper for node0_size calculation In multiple places there is a node0_size variable calculation which assumes that NUMA node #0 and memory node #0 are the same things which they are not. Since we are going to change it and do not want to change it in multiple places, let's make a helper. This adds a spapr_node0_size() helper and makes use of it. Signed-off-by: Alexey Kardashevskiy Signed-off-by: Alexander Graf --- hw/ppc/spapr.c | 20 ++++++++++++++++---- 1 file changed, 16 insertions(+), 4 deletions(-) (limited to 'hw') diff --git a/hw/ppc/spapr.c b/hw/ppc/spapr.c index f2fa11e..1623805 100644 --- a/hw/ppc/spapr.c +++ b/hw/ppc/spapr.c @@ -283,6 +283,19 @@ static size_t create_page_sizes_prop(CPUPPCState *env, uint32_t *prop, return (p - prop) * sizeof(uint32_t); } +static hwaddr spapr_node0_size(void) +{ + if (nb_numa_nodes) { + int i; + for (i = 0; i < nb_numa_nodes; ++i) { + if (numa_info[i].node_mem) { + return MIN(pow2floor(numa_info[i].node_mem), ram_size); + } + } + } + return ram_size; +} + #define _FDT(exp) \ do { \ int ret = (exp); \ @@ -833,9 +846,8 @@ static void spapr_reset_htab(sPAPREnvironment *spapr) /* Update the RMA size if necessary */ if (spapr->vrma_adjust) { - hwaddr node0_size = (nb_numa_nodes > 1) ? - numa_info[0].node_mem : ram_size; - spapr->rma_size = kvmppc_rma_size(node0_size, spapr->htab_shift); + spapr->rma_size = kvmppc_rma_size(spapr_node0_size(), + spapr->htab_shift); } } @@ -1268,7 +1280,7 @@ static void ppc_spapr_init(MachineState *machine) MemoryRegion *rma_region; void *rma = NULL; hwaddr rma_alloc_size; - hwaddr node0_size = (nb_numa_nodes > 1) ? numa_info[0].node_mem : ram_size; + hwaddr node0_size = spapr_node0_size(); uint32_t initrd_base = 0; long kernel_size = 0, initrd_size = 0; long load_limit, rtas_limit, fw_size; -- cgit v1.1 From c3b4f589d86ae4a6b9f6c1e0587998bc525833da Mon Sep 17 00:00:00 2001 From: Alexey Kardashevskiy Date: Thu, 3 Jul 2014 13:10:07 +1000 Subject: spapr: Fix ibm, associativity for memory nodes We want the associtivity lists of memory and CPU nodes to match but memory nodes have incorrect domain#3 which is zero for CPU so they won't match. This clears domain#3 in the list to match CPUs associtivity lists. Signed-off-by: Alexey Kardashevskiy Signed-off-by: Alexander Graf --- hw/ppc/spapr.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'hw') diff --git a/hw/ppc/spapr.c b/hw/ppc/spapr.c index 1623805..12dbf1b 100644 --- a/hw/ppc/spapr.c +++ b/hw/ppc/spapr.c @@ -653,7 +653,7 @@ static void spapr_populate_memory_node(void *fdt, int nodeid, hwaddr start, uint32_t associativity[] = { cpu_to_be32(0x4), /* length */ cpu_to_be32(0x0), cpu_to_be32(0x0), - cpu_to_be32(nodeid), cpu_to_be32(nodeid) + cpu_to_be32(0x0), cpu_to_be32(nodeid) }; char mem_name[32]; uint64_t mem_reg_property[2]; -- cgit v1.1 From ea87616d6c44d998affef3d3b9fdfc49d14b8150 Mon Sep 17 00:00:00 2001 From: Benjamin Herrenschmidt Date: Mon, 21 Jul 2014 13:02:03 +1000 Subject: loader: Add load_image_size() to replace load_image() A subsequent patch to ppc/spapr needs to load the RTAS blob into qemu memory rather than target memory (so it can later be copied into the right spot at machine reset time). I would use load_image() but it is marked deprecated because it doesn't take a buffer size as argument, so let's add load_image_size() that does. Signed-off-by: Benjamin Herrenschmidt [aik: fixed errors from checkpatch.pl] Signed-off-by: Alexey Kardashevskiy Signed-off-by: Alexander Graf --- hw/core/loader.c | 21 +++++++++++++++++++++ 1 file changed, 21 insertions(+) (limited to 'hw') diff --git a/hw/core/loader.c b/hw/core/loader.c index 193f0f8..597b117 100644 --- a/hw/core/loader.c +++ b/hw/core/loader.c @@ -89,6 +89,27 @@ int load_image(const char *filename, uint8_t *addr) return size; } +/* return the size or -1 if error */ +ssize_t load_image_size(const char *filename, void *addr, size_t size) +{ + int fd; + ssize_t actsize; + + fd = open(filename, O_RDONLY | O_BINARY); + if (fd < 0) { + return -1; + } + + actsize = read(fd, addr, size); + if (actsize < 0) { + close(fd); + return -1; + } + close(fd); + + return actsize; +} + /* read()-like version */ ssize_t read_targphys(const char *name, int fd, hwaddr dst_addr, size_t nbytes) -- cgit v1.1 From b7d1f77adaab790d20232df261d4e2ff6a77f556 Mon Sep 17 00:00:00 2001 From: Benjamin Herrenschmidt Date: Mon, 21 Jul 2014 13:02:04 +1000 Subject: spapr: Locate RTAS and device-tree based on real RMA We currently calculate the final RTAS and FDT location based on the early estimate of the RMA size, cropped to 256M on KVM since we only know the real RMA size at reset time which happens much later in the boot process. This means the FDT and RTAS end up right below 256M while they could be much higher, using precious RMA space and limiting what the OS bootloader can put there which has proved to be a problem with some OSes (such as when using very large initrd's) Fortunately, we do the actual copy of the device-tree into guest memory much later, during reset, late enough to be able to do it using the final RMA value, we just need to move the calculation to the right place. However, RTAS is still loaded too early, so we change the code to load the tiny blob into qemu memory early on, and then copy it into guest memory at reset time. It's small enough that the memory usage doesn't matter. Signed-off-by: Benjamin Herrenschmidt [aik: fixed errors from checkpatch.pl, defined RTAS_MAX_ADDR] Signed-off-by: Alexey Kardashevskiy [agraf: fix compilation on 32bit hosts] Signed-off-by: Alexander Graf --- hw/ppc/spapr.c | 34 ++++++++++++++++++++++------------ 1 file changed, 22 insertions(+), 12 deletions(-) (limited to 'hw') diff --git a/hw/ppc/spapr.c b/hw/ppc/spapr.c index 12dbf1b..2f16d9d 100644 --- a/hw/ppc/spapr.c +++ b/hw/ppc/spapr.c @@ -71,6 +71,7 @@ */ #define FDT_MAX_SIZE 0x40000 #define RTAS_MAX_SIZE 0x10000 +#define RTAS_MAX_ADDR 0x80000000 /* RTAS must stay below that */ #define FW_MAX_SIZE 0x400000 #define FW_FILE_NAME "slof.bin" #define FW_OVERHEAD 0x2800000 @@ -854,16 +855,30 @@ static void spapr_reset_htab(sPAPREnvironment *spapr) static void ppc_spapr_reset(void) { PowerPCCPU *first_ppc_cpu; + uint32_t rtas_limit; /* Reset the hash table & recalc the RMA */ spapr_reset_htab(spapr); qemu_devices_reset(); + /* + * We place the device tree and RTAS just below either the top of the RMA, + * or just below 2GB, whichever is lowere, so that it can be + * processed with 32-bit real mode code if necessary + */ + rtas_limit = MIN(spapr->rma_size, RTAS_MAX_ADDR); + spapr->rtas_addr = rtas_limit - RTAS_MAX_SIZE; + spapr->fdt_addr = spapr->rtas_addr - FDT_MAX_SIZE; + /* Load the fdt */ spapr_finalize_fdt(spapr, spapr->fdt_addr, spapr->rtas_addr, spapr->rtas_size); + /* Copy RTAS over */ + cpu_physical_memory_write(spapr->rtas_addr, spapr->rtas_blob, + spapr->rtas_size); + /* Set up the entry state */ first_ppc_cpu = POWERPC_CPU(first_cpu); first_ppc_cpu->env.gpr[3] = spapr->fdt_addr; @@ -1283,7 +1298,7 @@ static void ppc_spapr_init(MachineState *machine) hwaddr node0_size = spapr_node0_size(); uint32_t initrd_base = 0; long kernel_size = 0, initrd_size = 0; - long load_limit, rtas_limit, fw_size; + long load_limit, fw_size; bool kernel_le = false; char *filename; @@ -1328,13 +1343,8 @@ static void ppc_spapr_init(MachineState *machine) exit(1); } - /* We place the device tree and RTAS just below either the top of the RMA, - * or just below 2GB, whichever is lowere, so that it can be - * processed with 32-bit real mode code if necessary */ - rtas_limit = MIN(spapr->rma_size, 0x80000000); - spapr->rtas_addr = rtas_limit - RTAS_MAX_SIZE; - spapr->fdt_addr = spapr->rtas_addr - FDT_MAX_SIZE; - load_limit = spapr->fdt_addr - FW_OVERHEAD; + /* Setup a load limit for the ramdisk leaving room for SLOF and FDT */ + load_limit = MIN(spapr->rma_size, RTAS_MAX_ADDR) - FW_OVERHEAD; /* We aim for a hash table of size 1/128 the size of RAM. The * normal rule of thumb is 1/64 the size of RAM, but that's much @@ -1402,14 +1412,14 @@ static void ppc_spapr_init(MachineState *machine) } filename = qemu_find_file(QEMU_FILE_TYPE_BIOS, "spapr-rtas.bin"); - spapr->rtas_size = load_image_targphys(filename, spapr->rtas_addr, - rtas_limit - spapr->rtas_addr); - if (spapr->rtas_size < 0) { + spapr->rtas_size = get_image_size(filename); + spapr->rtas_blob = g_malloc(spapr->rtas_size); + if (load_image_size(filename, spapr->rtas_blob, spapr->rtas_size) < 0) { hw_error("qemu: could not load LPAR rtas '%s'\n", filename); exit(1); } if (spapr->rtas_size > RTAS_MAX_SIZE) { - hw_error("RTAS too big ! 0x%lx bytes (max is 0x%x)\n", + hw_error("RTAS too big ! 0x%zx bytes (max is 0x%x)\n", spapr->rtas_size, RTAS_MAX_SIZE); exit(1); } -- cgit v1.1 From 9674a356267ee9cf8230775f88c90c299a4affc9 Mon Sep 17 00:00:00 2001 From: Nikunj A Dadhania Date: Fri, 27 Jun 2014 12:17:38 +0530 Subject: ppc/spapr: Fix MAX_CPUS to 255 MAX_CPUS 256 is inconsistent with qemu supporting upto 255 cpus. This MAX_CPUS number was percolated back to "virsh capabilities" with wrong max_cpus. Signed-off-by: Nikunj A Dadhania Signed-off-by: Alexander Graf --- hw/ppc/spapr.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'hw') diff --git a/hw/ppc/spapr.c b/hw/ppc/spapr.c index 2f16d9d..555a007 100644 --- a/hw/ppc/spapr.c +++ b/hw/ppc/spapr.c @@ -81,7 +81,7 @@ #define TIMEBASE_FREQ 512000000ULL -#define MAX_CPUS 256 +#define MAX_CPUS 255 #define PHANDLE_XICP 0x00001111 -- cgit v1.1 From d696760b43ca46c070f74fe12d90f38904232467 Mon Sep 17 00:00:00 2001 From: Alexander Graf Date: Sun, 13 Jul 2014 16:45:46 +0200 Subject: PPC: mac99: Fix core99 timer frequency There is a special timer in the mac99 machine that we recently started to emulate. Unfortunately we emulated it in the wrong frequency. This patch adapts the frequency Mac OS X uses to evaluate results from this timer, making calculations it bases off of it work. Signed-off-by: Alexander Graf --- hw/misc/macio/macio.c | 9 +++++++-- 1 file changed, 7 insertions(+), 2 deletions(-) (limited to 'hw') diff --git a/hw/misc/macio/macio.c b/hw/misc/macio/macio.c index 47f45f5..35eaa00 100644 --- a/hw/misc/macio/macio.c +++ b/hw/misc/macio/macio.c @@ -243,13 +243,18 @@ static void timer_write(void *opaque, hwaddr addr, uint64_t value, static uint64_t timer_read(void *opaque, hwaddr addr, unsigned size) { uint32_t value = 0; + uint64_t systime = qemu_clock_get_ns(QEMU_CLOCK_VIRTUAL); + uint64_t kltime; + + kltime = muldiv64(systime, 4194300, get_ticks_per_sec() * 4); + kltime = muldiv64(kltime, 18432000, 1048575); switch (addr) { case 0x38: - value = qemu_clock_get_ns(QEMU_CLOCK_VIRTUAL); + value = kltime; break; case 0x3c: - value = qemu_clock_get_ns(QEMU_CLOCK_VIRTUAL) >> 32; + value = kltime >> 32; break; } -- cgit v1.1 From a8b0503701ed8de9353834b0955260f4d9f08640 Mon Sep 17 00:00:00 2001 From: Alexander Graf Date: Sun, 13 Jul 2014 16:50:39 +0200 Subject: PPC: mac_nvram: Remove unused functions The macio_nvram_read and macio_nvram_write functions are never called, just remove them. Signed-off-by: Alexander Graf --- hw/nvram/mac_nvram.c | 23 ----------------------- hw/ppc/mac.h | 2 -- 2 files changed, 25 deletions(-) (limited to 'hw') diff --git a/hw/nvram/mac_nvram.c b/hw/nvram/mac_nvram.c index 170b10b..bcff074 100644 --- a/hw/nvram/mac_nvram.c +++ b/hw/nvram/mac_nvram.c @@ -39,29 +39,6 @@ #define DEF_SYSTEM_SIZE 0xc10 -/* Direct access to NVRAM */ -uint8_t macio_nvram_read(MacIONVRAMState *s, uint32_t addr) -{ - uint32_t ret; - - if (addr < s->size) { - ret = s->data[addr]; - } else { - ret = -1; - } - NVR_DPRINTF("read addr %04" PRIx32 " val %" PRIx8 "\n", addr, ret); - - return ret; -} - -void macio_nvram_write(MacIONVRAMState *s, uint32_t addr, uint8_t val) -{ - NVR_DPRINTF("write addr %04" PRIx32 " val %" PRIx8 "\n", addr, val); - if (addr < s->size) { - s->data[addr] = val; - } -} - /* macio style NVRAM device */ static void macio_nvram_writeb(void *opaque, hwaddr addr, uint64_t value, unsigned size) diff --git a/hw/ppc/mac.h b/hw/ppc/mac.h index c1faf9c..23536f4 100644 --- a/hw/ppc/mac.h +++ b/hw/ppc/mac.h @@ -178,6 +178,4 @@ typedef struct MacIONVRAMState { } MacIONVRAMState; void pmac_format_nvram_partition (MacIONVRAMState *nvr, int len); -uint8_t macio_nvram_read(MacIONVRAMState *s, uint32_t addr); -void macio_nvram_write(MacIONVRAMState *s, uint32_t addr, uint8_t val); #endif /* !defined(__PPC_MAC_H__) */ -- cgit v1.1 From b19eae18c1cdf053fd85a39902cf77d8b561ef76 Mon Sep 17 00:00:00 2001 From: Alexander Graf Date: Sun, 13 Jul 2014 16:55:53 +0200 Subject: PPC: mac_nvram: Allow 2 and 4 byte accesses The NVRAM in our Core99 machine really supports 2byte and 4byte accesses just as well as 1byte accesses. In fact, Mac OS X uses those. Add support for higher register size granularities. Signed-off-by: Alexander Graf --- hw/nvram/mac_nvram.c | 4 ++++ 1 file changed, 4 insertions(+) (limited to 'hw') diff --git a/hw/nvram/mac_nvram.c b/hw/nvram/mac_nvram.c index bcff074..7656951 100644 --- a/hw/nvram/mac_nvram.c +++ b/hw/nvram/mac_nvram.c @@ -66,6 +66,10 @@ static uint64_t macio_nvram_readb(void *opaque, hwaddr addr, static const MemoryRegionOps macio_nvram_ops = { .read = macio_nvram_readb, .write = macio_nvram_writeb, + .valid.min_access_size = 1, + .valid.max_access_size = 4, + .impl.min_access_size = 1, + .impl.max_access_size = 1, .endianness = DEVICE_BIG_ENDIAN, }; -- cgit v1.1 From 2d9907a3332888e43bc73fe9b98a32f8de662526 Mon Sep 17 00:00:00 2001 From: Alexander Graf Date: Sun, 13 Jul 2014 17:09:55 +0200 Subject: PPC: mac_nvram: Split NVRAM into OF and OSX parts Mac OS X (at least with -M mac99) searches for a valid NVRAM partition of a special Apple type. If it can't find that partition in the first half of NVRAM, it will look at the second half. There are a few implications from this. The first is that we need to split NVRAM into 2 halves - one for Open Firmware use, the other one for Mac OS X. Without this split Mac OS X will just loop endlessly over the second half trying to find a partition. The other implication is that we should provide a specially crafted Mac OS X compatible NVRAM partition on the second half that Mac OS X can happily use as it sees fit. Signed-off-by: Alexander Graf --- hw/nvram/mac_nvram.c | 43 ++++++++++++++++++++++++++++++++++++++++--- 1 file changed, 40 insertions(+), 3 deletions(-) (limited to 'hw') diff --git a/hw/nvram/mac_nvram.c b/hw/nvram/mac_nvram.c index 7656951..d35f8a3 100644 --- a/hw/nvram/mac_nvram.c +++ b/hw/nvram/mac_nvram.c @@ -26,6 +26,7 @@ #include "hw/nvram/openbios_firmware_abi.h" #include "sysemu/sysemu.h" #include "hw/ppc/mac.h" +#include /* debug NVR */ //#define DEBUG_NVR @@ -137,15 +138,16 @@ static void macio_nvram_register_types(void) } /* Set up a system OpenBIOS NVRAM partition */ -void pmac_format_nvram_partition (MacIONVRAMState *nvr, int len) +static void pmac_format_nvram_partition_of(MacIONVRAMState *nvr, int off, + int len) { unsigned int i; - uint32_t start = 0, end; + uint32_t start = off, end; struct OpenBIOS_nvpart_v1 *part_header; // OpenBIOS nvram variables // Variable partition - part_header = (struct OpenBIOS_nvpart_v1 *)nvr->data; + part_header = (struct OpenBIOS_nvpart_v1 *)&nvr->data[start]; part_header->signature = OPENBIOS_PART_SYSTEM; pstrcpy(part_header->name, sizeof(part_header->name), "system"); @@ -173,4 +175,39 @@ void pmac_format_nvram_partition (MacIONVRAMState *nvr, int len) OpenBIOS_finish_partition(part_header, end - start); } +#define OSX_NVRAM_SIGNATURE (0x5A) + +/* Set up a Mac OS X NVRAM partition */ +static void pmac_format_nvram_partition_osx(MacIONVRAMState *nvr, int off, + int len) +{ + uint32_t start = off; + struct OpenBIOS_nvpart_v1 *part_header; + unsigned char *data = &nvr->data[start]; + + /* empty partition */ + part_header = (struct OpenBIOS_nvpart_v1 *)data; + part_header->signature = OSX_NVRAM_SIGNATURE; + pstrcpy(part_header->name, sizeof(part_header->name), "wwwwwwwwwwww"); + + OpenBIOS_finish_partition(part_header, len); + + /* Generation */ + stl_be_p(&data[20], 2); + + /* Adler32 checksum */ + stl_be_p(&data[16], adler32(0, &data[20], len - 20)); +} + +/* Set up NVRAM with OF and OSX partitions */ +void pmac_format_nvram_partition(MacIONVRAMState *nvr, int len) +{ + /* + * Mac OS X expects side "B" of the flash at the second half of NVRAM, + * so we use half of the chip for OF and the other half for a free OSX + * partition. + */ + pmac_format_nvram_partition_of(nvr, 0, len / 2); + pmac_format_nvram_partition_osx(nvr, len / 2, len / 2); +} type_init(macio_nvram_register_types) -- cgit v1.1 From caae6c961107c4c55731a86572f9a1f53837636b Mon Sep 17 00:00:00 2001 From: Alexander Graf Date: Sun, 13 Jul 2014 22:29:02 +0200 Subject: PPC: Mac: Move tbfreq into local variable We already expose the real CPU's tb frequency to the guest via fw_cfg. Soon we will need to also expose it to the MacIO, so let's move it to a variable that we can leverage every time we need the frequency. Signed-off-by: Alexander Graf --- hw/ppc/mac_newworld.c | 13 ++++++++++--- hw/ppc/mac_oldworld.c | 12 +++++++++--- 2 files changed, 19 insertions(+), 6 deletions(-) (limited to 'hw') diff --git a/hw/ppc/mac_newworld.c b/hw/ppc/mac_newworld.c index 26067b4..d525247 100644 --- a/hw/ppc/mac_newworld.c +++ b/hw/ppc/mac_newworld.c @@ -177,6 +177,7 @@ static void ppc_core99_init(MachineState *machine) DeviceState *dev; int *token = g_new(int, 1); hwaddr nvram_addr = 0xFFF04000; + uint64_t tbfreq; linux_boot = (kernel_filename != NULL); @@ -373,6 +374,14 @@ static void ppc_core99_init(MachineState *machine) pci_bus = pci_pmac_init(pic, get_system_memory(), get_system_io()); machine_arch = ARCH_MAC99; } + + /* Timebase Frequency */ + if (kvm_enabled()) { + tbfreq = kvmppc_get_tbfreq(); + } else { + tbfreq = TBFREQ; + } + /* init basic PC hardware */ escc_mem = escc_init(0, pic[0x25], pic[0x24], serial_hds[0], serial_hds[1], ESCC_CLOCK, 4); @@ -469,15 +478,13 @@ static void ppc_core99_init(MachineState *machine) #ifdef CONFIG_KVM uint8_t *hypercall; - fw_cfg_add_i32(fw_cfg, FW_CFG_PPC_TBFREQ, kvmppc_get_tbfreq()); hypercall = g_malloc(16); kvmppc_get_hypercall(env, hypercall, 16); fw_cfg_add_bytes(fw_cfg, FW_CFG_PPC_KVM_HC, hypercall, 16); fw_cfg_add_i32(fw_cfg, FW_CFG_PPC_KVM_PID, getpid()); #endif - } else { - fw_cfg_add_i32(fw_cfg, FW_CFG_PPC_TBFREQ, TBFREQ); } + fw_cfg_add_i32(fw_cfg, FW_CFG_PPC_TBFREQ, tbfreq); /* Mac OS X requires a "known good" clock-frequency value; pass it one. */ fw_cfg_add_i32(fw_cfg, FW_CFG_PPC_CLOCKFREQ, CLOCKFREQ); fw_cfg_add_i32(fw_cfg, FW_CFG_PPC_BUSFREQ, BUSFREQ); diff --git a/hw/ppc/mac_oldworld.c b/hw/ppc/mac_oldworld.c index ec7ed38..863dd2f 100644 --- a/hw/ppc/mac_oldworld.c +++ b/hw/ppc/mac_oldworld.c @@ -103,6 +103,7 @@ static void ppc_heathrow_init(MachineState *machine) uint16_t ppc_boot_device; DriveInfo *hd[MAX_IDE_BUS * MAX_IDE_DEVS]; void *fw_cfg; + uint64_t tbfreq; linux_boot = (kernel_filename != NULL); @@ -250,6 +251,13 @@ static void ppc_heathrow_init(MachineState *machine) } } + /* Timebase Frequency */ + if (kvm_enabled()) { + tbfreq = kvmppc_get_tbfreq(); + } else { + tbfreq = TBFREQ; + } + /* init basic PC hardware */ if (PPC_INPUT(env) != PPC_FLAGS_INPUT_6xx) { hw_error("Only 6xx bus is supported on heathrow machine\n"); @@ -330,15 +338,13 @@ static void ppc_heathrow_init(MachineState *machine) #ifdef CONFIG_KVM uint8_t *hypercall; - fw_cfg_add_i32(fw_cfg, FW_CFG_PPC_TBFREQ, kvmppc_get_tbfreq()); hypercall = g_malloc(16); kvmppc_get_hypercall(env, hypercall, 16); fw_cfg_add_bytes(fw_cfg, FW_CFG_PPC_KVM_HC, hypercall, 16); fw_cfg_add_i32(fw_cfg, FW_CFG_PPC_KVM_PID, getpid()); #endif - } else { - fw_cfg_add_i32(fw_cfg, FW_CFG_PPC_TBFREQ, TBFREQ); } + fw_cfg_add_i32(fw_cfg, FW_CFG_PPC_TBFREQ, tbfreq); /* Mac OS X requires a "known good" clock-frequency value; pass it one. */ fw_cfg_add_i32(fw_cfg, FW_CFG_PPC_CLOCKFREQ, CLOCKFREQ); fw_cfg_add_i32(fw_cfg, FW_CFG_PPC_BUSFREQ, BUSFREQ); -- cgit v1.1 From b981289c493c7ddabc1cdf7de99daa24642c7739 Mon Sep 17 00:00:00 2001 From: Alexander Graf Date: Sun, 13 Jul 2014 22:31:53 +0200 Subject: PPC: Cuda: Use cuda timer to expose tbfreq to guest Mac OS X calibrates a number of frequencies on bootup based on reading tb values on bootup and comparing them to via cuda timer values. The only variable we can really steer well (thanks to KVM) is the cuda frequency. So let's use that one to fake Mac OS X into believing the bus frequency is tbfreq * 4. That way Mac OS X will automatically calculate the correct timebase frequency. With this patch and the patch set I posted earlier I can successfully run Mac OS X 10.2, 10.3 and 10.4 guests with -M mac99 on TCG and KVM. Suggested-by: Benjamin Herrenschmidt Signed-off-by: Alexander Graf --- hw/misc/macio/cuda.c | 23 ++++++++++++++++++++--- hw/misc/macio/macio.c | 10 ++++++++++ hw/ppc/mac.h | 2 ++ hw/ppc/mac_newworld.c | 1 + hw/ppc/mac_oldworld.c | 1 + 5 files changed, 34 insertions(+), 3 deletions(-) (limited to 'hw') diff --git a/hw/misc/macio/cuda.c b/hw/misc/macio/cuda.c index ff6051d..b4273aa 100644 --- a/hw/misc/macio/cuda.c +++ b/hw/misc/macio/cuda.c @@ -123,13 +123,22 @@ static void cuda_update_irq(CUDAState *s) } } +static uint64_t get_tb(uint64_t freq) +{ + return muldiv64(qemu_clock_get_ns(QEMU_CLOCK_VIRTUAL), + freq, get_ticks_per_sec()); +} + static unsigned int get_counter(CUDATimer *s) { int64_t d; unsigned int counter; + uint64_t tb_diff; + + /* Reverse of the tb calculation algorithm that Mac OS X uses on bootup. */ + tb_diff = get_tb(s->frequency) - s->load_time; + d = (tb_diff * 0xBF401675E5DULL) / (s->frequency << 24); - d = muldiv64(qemu_clock_get_ns(QEMU_CLOCK_VIRTUAL) - s->load_time, - CUDA_TIMER_FREQ, get_ticks_per_sec()); if (s->index == 0) { /* the timer goes down from latch to -1 (period of latch + 2) */ if (d <= (s->counter_value + 1)) { @@ -147,7 +156,7 @@ static unsigned int get_counter(CUDATimer *s) static void set_counter(CUDAState *s, CUDATimer *ti, unsigned int val) { CUDA_DPRINTF("T%d.counter=%d\n", 1 + (ti->timer == NULL), val); - ti->load_time = qemu_clock_get_ns(QEMU_CLOCK_VIRTUAL); + ti->load_time = get_tb(s->frequency); ti->counter_value = val; cuda_timer_update(s, ti, ti->load_time); } @@ -688,6 +697,8 @@ static void cuda_realizefn(DeviceState *dev, Error **errp) struct tm tm; s->timers[0].timer = timer_new_ns(QEMU_CLOCK_VIRTUAL, cuda_timer1, s); + s->timers[0].frequency = s->frequency; + s->timers[1].frequency = s->frequency; qemu_get_timedate(&tm, 0); s->tick_offset = (uint32_t)mktimegm(&tm) + RTC_OFFSET; @@ -713,6 +724,11 @@ static void cuda_initfn(Object *obj) DEVICE(obj), "adb.0"); } +static Property cuda_properties[] = { + DEFINE_PROP_UINT64("frequency", CUDAState, frequency, 0), + DEFINE_PROP_END_OF_LIST() +}; + static void cuda_class_init(ObjectClass *oc, void *data) { DeviceClass *dc = DEVICE_CLASS(oc); @@ -720,6 +736,7 @@ static void cuda_class_init(ObjectClass *oc, void *data) dc->realize = cuda_realizefn; dc->reset = cuda_reset; dc->vmsd = &vmstate_cuda; + dc->props = cuda_properties; } static const TypeInfo cuda_type_info = { diff --git a/hw/misc/macio/macio.c b/hw/misc/macio/macio.c index 35eaa00..e0f1e88 100644 --- a/hw/misc/macio/macio.c +++ b/hw/misc/macio/macio.c @@ -42,6 +42,7 @@ typedef struct MacIOState void *dbdma; MemoryRegion *pic_mem; MemoryRegion *escc_mem; + uint64_t frequency; } MacIOState; #define OLDWORLD_MACIO(obj) \ @@ -351,12 +352,19 @@ static void macio_newworld_class_init(ObjectClass *oc, void *data) pdc->device_id = PCI_DEVICE_ID_APPLE_UNI_N_KEYL; } +static Property macio_properties[] = { + DEFINE_PROP_UINT64("frequency", MacIOState, frequency, 0), + DEFINE_PROP_END_OF_LIST() +}; + static void macio_class_init(ObjectClass *klass, void *data) { PCIDeviceClass *k = PCI_DEVICE_CLASS(klass); + DeviceClass *dc = DEVICE_CLASS(klass); k->vendor_id = PCI_VENDOR_ID_APPLE; k->class_id = PCI_CLASS_OTHERS << 8; + dc->props = macio_properties; } static const TypeInfo macio_oldworld_type_info = { @@ -403,6 +411,8 @@ void macio_init(PCIDevice *d, macio_state->escc_mem = escc_mem; /* Note: this code is strongly inspirated from the corresponding code in PearPC */ + qdev_prop_set_uint64(DEVICE(&macio_state->cuda), "frequency", + macio_state->frequency); qdev_init_nofail(DEVICE(d)); } diff --git a/hw/ppc/mac.h b/hw/ppc/mac.h index 23536f4..aff2b9a 100644 --- a/hw/ppc/mac.h +++ b/hw/ppc/mac.h @@ -57,6 +57,7 @@ typedef struct CUDATimer { uint16_t counter_value; int64_t load_time; int64_t next_irq_time; + uint64_t frequency; QEMUTimer *timer; } CUDATimer; @@ -97,6 +98,7 @@ typedef struct CUDAState { CUDATimer timers[2]; uint32_t tick_offset; + uint64_t frequency; uint8_t last_b; uint8_t last_acr; diff --git a/hw/ppc/mac_newworld.c b/hw/ppc/mac_newworld.c index d525247..8453bfa 100644 --- a/hw/ppc/mac_newworld.c +++ b/hw/ppc/mac_newworld.c @@ -395,6 +395,7 @@ static void ppc_core99_init(MachineState *machine) qdev_connect_gpio_out(dev, 2, pic[0x02]); /* IDE DMA */ qdev_connect_gpio_out(dev, 3, pic[0x0e]); /* IDE */ qdev_connect_gpio_out(dev, 4, pic[0x03]); /* IDE DMA */ + qdev_prop_set_uint64(dev, "frequency", tbfreq); macio_init(macio, pic_mem, escc_bar); /* We only emulate 2 out of 3 IDE controllers for now */ diff --git a/hw/ppc/mac_oldworld.c b/hw/ppc/mac_oldworld.c index 863dd2f..630a9f9 100644 --- a/hw/ppc/mac_oldworld.c +++ b/hw/ppc/mac_oldworld.c @@ -286,6 +286,7 @@ static void ppc_heathrow_init(MachineState *machine) qdev_connect_gpio_out(dev, 2, pic[0x02]); /* IDE-0 DMA */ qdev_connect_gpio_out(dev, 3, pic[0x0E]); /* IDE-1 */ qdev_connect_gpio_out(dev, 4, pic[0x03]); /* IDE-1 DMA */ + qdev_prop_set_uint64(dev, "frequency", tbfreq); macio_init(macio, pic_mem, escc_bar); macio_ide = MACIO_IDE(object_resolve_path_component(OBJECT(macio), -- cgit v1.1 From 32420522482ffc20f8e9423af4f41f4e05ce3a56 Mon Sep 17 00:00:00 2001 From: Alexey Kardashevskiy Date: Wed, 13 Aug 2014 17:20:53 +1000 Subject: spapr_pci: Fix config space corruption When disabling MSI/MSIX via "ibm,change-msi" RTAS call, no check was made if MSI or MSIX is actually supported and the MSI message was reset unconditionally. If this happened on a device which does not support MSI (but does support MSIX, otherwise "ibm,change-msi" would not be called), this device would have PCIDevice::msi_cap field (MSI capability offset) set to zero and writing a vector would actually clear PCI status. This clears MSI message only if MSI or MSIX is present on a device. Signed-off-by: Alexey Kardashevskiy Signed-off-by: Alexander Graf --- hw/ppc/spapr_pci.c | 8 ++++++-- 1 file changed, 6 insertions(+), 2 deletions(-) (limited to 'hw') diff --git a/hw/ppc/spapr_pci.c b/hw/ppc/spapr_pci.c index 9ed39a9..e894f07 100644 --- a/hw/ppc/spapr_pci.c +++ b/hw/ppc/spapr_pci.c @@ -262,7 +262,6 @@ static void rtas_ibm_change_msi(PowerPCCPU *cpu, sPAPREnvironment *spapr, unsigned int irq, max_irqs = 0, num = 0; sPAPRPHBState *phb = NULL; PCIDevice *pdev = NULL; - bool msix = false; spapr_pci_msi *msi; int *config_addr_key; @@ -300,7 +299,12 @@ static void rtas_ibm_change_msi(PowerPCCPU *cpu, sPAPREnvironment *spapr, } xics_free(spapr->icp, msi->first_irq, msi->num); - spapr_msi_setmsg(pdev, 0, msix, 0, num); + if (msi_present(pdev)) { + spapr_msi_setmsg(pdev, 0, false, 0, num); + } + if (msix_present(pdev)) { + spapr_msi_setmsg(pdev, 0, true, 0, num); + } g_hash_table_remove(phb->msi, &config_addr); trace_spapr_pci_msi("Released MSIs", config_addr); -- cgit v1.1 From 439ce1401bac1687c711cb6acf4ee8f3f457c05e Mon Sep 17 00:00:00 2001 From: Anton Blanchard Date: Fri, 22 Aug 2014 11:50:57 +1000 Subject: spapr-vlan: Don't touch last entry in buffer list The last 8 bytes of the buffer list is defined to contain the number of dropped frames. At the moment we use it to store rx entries, which trips up ethtool -S: rx_no_buffer: 9223380832981355136 Fix this by skipping the last buffer list entry. Signed-off-by: Anton Blanchard Reviewed-by: David Gibson Signed-off-by: Alexander Graf --- hw/net/spapr_llan.c | 13 ++++++++++--- 1 file changed, 10 insertions(+), 3 deletions(-) (limited to 'hw') diff --git a/hw/net/spapr_llan.c b/hw/net/spapr_llan.c index 2d47df6..23c47d3 100644 --- a/hw/net/spapr_llan.c +++ b/hw/net/spapr_llan.c @@ -72,7 +72,14 @@ typedef uint64_t vlan_bd_t; #define VLAN_RXQ_BD_OFF 0 #define VLAN_FILTER_BD_OFF 8 #define VLAN_RX_BDS_OFF 16 -#define VLAN_MAX_BUFS ((SPAPR_TCE_PAGE_SIZE - VLAN_RX_BDS_OFF) / 8) +/* + * The final 8 bytes of the buffer list is a counter of frames dropped + * because there was not a buffer in the buffer list capable of holding + * the frame. We must avoid it, or the operating system will report garbage + * for this statistic. + */ +#define VLAN_RX_BDS_LEN (SPAPR_TCE_PAGE_SIZE - VLAN_RX_BDS_OFF - 8) +#define VLAN_MAX_BUFS (VLAN_RX_BDS_LEN / 8) #define TYPE_VIO_SPAPR_VLAN_DEVICE "spapr-vlan" #define VIO_SPAPR_VLAN_DEVICE(obj) \ @@ -119,7 +126,7 @@ static ssize_t spapr_vlan_receive(NetClientState *nc, const uint8_t *buf, do { buf_ptr += 8; - if (buf_ptr >= SPAPR_TCE_PAGE_SIZE) { + if (buf_ptr >= (VLAN_RX_BDS_LEN + VLAN_RX_BDS_OFF)) { buf_ptr = VLAN_RX_BDS_OFF; } @@ -397,7 +404,7 @@ static target_ulong h_add_logical_lan_buffer(PowerPCCPU *cpu, do { dev->add_buf_ptr += 8; - if (dev->add_buf_ptr >= SPAPR_TCE_PAGE_SIZE) { + if (dev->add_buf_ptr >= (VLAN_RX_BDS_LEN + VLAN_RX_BDS_OFF)) { dev->add_buf_ptr = VLAN_RX_BDS_OFF; } -- cgit v1.1 From 8c46f7ec85a4dd9663489b2fa2b425cd7b3653e1 Mon Sep 17 00:00:00 2001 From: Greg Kurz Date: Wed, 27 Aug 2014 18:17:12 +0200 Subject: spapr_pci: map the MSI window in each PHB On sPAPR, virtio devices are connected to the PCI bus and use MSI-X. Commit cc943c36faa192cd4b32af8fe5edb31894017d35 has modified MSI-X so that writes are made using the bus master address space and follow the IOMMU path. Unfortunately, the IOMMU address space address space does not have an MSI window: the notification is silently dropped in unassigned_mem_write instead of reaching the guest... The most visible effect is that all virtio devices are non-functional on sPAPR since then. :( This patch does the following: 1) map the MSI window into the IOMMU address space for each PHB - since each PHB instantiates its own IOMMU address space, we can safely map the window at a fixed address (SPAPR_PCI_MSI_WINDOW) - no real need to keep the MSI window setup in a separate function, the spapr_pci_msi_init() code moves to spapr_phb_realize(). 2) kill the global MSI window as it is not needed in the end Signed-off-by: Greg Kurz Signed-off-by: Alexander Graf --- hw/ppc/spapr.c | 1 - hw/ppc/spapr_pci.c | 53 ++++++++++++++++++++++++----------------------------- 2 files changed, 24 insertions(+), 30 deletions(-) (limited to 'hw') diff --git a/hw/ppc/spapr.c b/hw/ppc/spapr.c index 555a007..65b28ac 100644 --- a/hw/ppc/spapr.c +++ b/hw/ppc/spapr.c @@ -1441,7 +1441,6 @@ static void ppc_spapr_init(MachineState *machine) spapr_create_nvram(spapr); /* Set up PCI */ - spapr_pci_msi_init(spapr, SPAPR_PCI_MSI_WINDOW); spapr_pci_rtas_init(); phb = spapr_create_phb(spapr, 0); diff --git a/hw/ppc/spapr_pci.c b/hw/ppc/spapr_pci.c index e894f07..ad0da7f 100644 --- a/hw/ppc/spapr_pci.c +++ b/hw/ppc/spapr_pci.c @@ -345,7 +345,7 @@ static void rtas_ibm_change_msi(PowerPCCPU *cpu, sPAPREnvironment *spapr, } /* Setup MSI/MSIX vectors in the device (via cfgspace or MSIX BAR) */ - spapr_msi_setmsg(pdev, spapr->msi_win_addr, ret_intr_type == RTAS_TYPE_MSIX, + spapr_msi_setmsg(pdev, SPAPR_PCI_MSI_WINDOW, ret_intr_type == RTAS_TYPE_MSIX, irq, req_num); /* Add MSI device to cache */ @@ -469,34 +469,6 @@ static const MemoryRegionOps spapr_msi_ops = { .endianness = DEVICE_LITTLE_ENDIAN }; -void spapr_pci_msi_init(sPAPREnvironment *spapr, hwaddr addr) -{ - uint64_t window_size = 4096; - - /* - * As MSI/MSIX interrupts trigger by writing at MSI/MSIX vectors, - * we need to allocate some memory to catch those writes coming - * from msi_notify()/msix_notify(). - * As MSIMessage:addr is going to be the same and MSIMessage:data - * is going to be a VIRQ number, 4 bytes of the MSI MR will only - * be used. - * - * For KVM we want to ensure that this memory is a full page so that - * our memory slot is of page size granularity. - */ -#ifdef CONFIG_KVM - if (kvm_enabled()) { - window_size = getpagesize(); - } -#endif - - spapr->msi_win_addr = addr; - memory_region_init_io(&spapr->msiwindow, NULL, &spapr_msi_ops, spapr, - "msi", window_size); - memory_region_add_subregion(get_system_memory(), spapr->msi_win_addr, - &spapr->msiwindow); -} - /* * PHB PCI device */ @@ -516,6 +488,7 @@ static void spapr_phb_realize(DeviceState *dev, Error **errp) char *namebuf; int i; PCIBus *bus; + uint64_t msi_window_size = 4096; if (sphb->index != -1) { hwaddr windows_base; @@ -608,6 +581,28 @@ static void spapr_phb_realize(DeviceState *dev, Error **errp) address_space_init(&sphb->iommu_as, &sphb->iommu_root, sphb->dtbusname); + /* + * As MSI/MSIX interrupts trigger by writing at MSI/MSIX vectors, + * we need to allocate some memory to catch those writes coming + * from msi_notify()/msix_notify(). + * As MSIMessage:addr is going to be the same and MSIMessage:data + * is going to be a VIRQ number, 4 bytes of the MSI MR will only + * be used. + * + * For KVM we want to ensure that this memory is a full page so that + * our memory slot is of page size granularity. + */ +#ifdef CONFIG_KVM + if (kvm_enabled()) { + msi_window_size = getpagesize(); + } +#endif + + memory_region_init_io(&sphb->msiwindow, NULL, &spapr_msi_ops, spapr, + "msi", msi_window_size); + memory_region_add_subregion(&sphb->iommu_root, SPAPR_PCI_MSI_WINDOW, + &sphb->msiwindow); + pci_setup_iommu(bus, spapr_pci_dma_iommu, sphb); pci_bus_set_route_irq_fn(bus, spapr_route_intx_pin_to_irq); -- cgit v1.1 From 85423d90c7bdbbae3d97ed3a12b5db79d00a3fb0 Mon Sep 17 00:00:00 2001 From: Anton Blanchard Date: Sat, 30 Aug 2014 15:55:21 +1000 Subject: hypervisor property clashes with hypervisor node dtc fails on a recent QEMU snapshot: ERROR (name_properties): "name" property in /hypervisor#1 is incorrect ("hypervisor" instead of base node name) Looking at the device tree we have a hypervisor property: # lsprop hypervisor hypervisor "kvm" But we also have a hypervisor node, with a name that doesn't match: # lsprop hypervisor#1/ name "hypervisor" compatible "linux,kvm" linux,phandle 7e5eb5d8 (2120136152) Commit c08ce91d309c (spapr: add uuid/host details to device tree) looks to have collided with an earlier patch. Remove the hypervisor property. Signed-off-by: Anton Blanchard Signed-off-by: Alexander Graf --- hw/ppc/spapr.c | 4 ---- 1 file changed, 4 deletions(-) (limited to 'hw') diff --git a/hw/ppc/spapr.c b/hw/ppc/spapr.c index 65b28ac..2ab4460 100644 --- a/hw/ppc/spapr.c +++ b/hw/ppc/spapr.c @@ -363,10 +363,6 @@ static void *spapr_create_fdt_skel(hwaddr initrd_base, _FDT((fdt_property_string(fdt, "model", "IBM pSeries (emulated by qemu)"))); _FDT((fdt_property_string(fdt, "compatible", "qemu,pseries"))); - if (kvm_enabled()) { - _FDT((fdt_property_string(fdt, "hypervisor", "kvm"))); - } - /* * Add info to guest to indentify which host is it being run on * and what is the uuid of the guest -- cgit v1.1