aboutsummaryrefslogtreecommitdiff
path: root/hw
diff options
context:
space:
mode:
authorRichard Henderson <richard.henderson@linaro.org>2023-06-26 08:38:19 +0200
committerRichard Henderson <richard.henderson@linaro.org>2023-06-26 08:38:19 +0200
commitf9925abbda1c324d901d8e7fe63bad09a35ae740 (patch)
tree18c2af50822bf8974eff8be3bea177190e01ebdf /hw
parent79dbd910c9ea6ca38f8e1b2616b1e5e885b85bd3 (diff)
parent5eb63b88d0ac259c2f49e62b6dcc6527a5caf255 (diff)
downloadqemu-f9925abbda1c324d901d8e7fe63bad09a35ae740.zip
qemu-f9925abbda1c324d901d8e7fe63bad09a35ae740.tar.gz
qemu-f9925abbda1c324d901d8e7fe63bad09a35ae740.tar.bz2
Merge tag 'pull-ppc-20230626' of https://github.com/legoater/qemu into staging
ppc queue: * New maintainers * Nested implementation cleanups * Various cleanups of the CPU implementation * SMT support for pseries * Improvements of the XIVE2 TIMA modeling * Extra avocado tests for pseries # -----BEGIN PGP SIGNATURE----- # # iQIzBAABCAAdFiEEoPZlSPBIlev+awtgUaNDx8/77KEFAmSZKF8ACgkQUaNDx8/7 # 7KGSiBAAlHC4S9J5ujzTIojaWY72d2ZinkC+WpBus9Wr91DqaUSUd/JbzDxQCvXh # dBWEbcyQ+abb8M3OQ3fMq9TfD2/LhxxXb+uwHIJ+ylITBnsRVCQv/4/gi3EkpRid # h4q3wYH8OYNfCQd/cWYXNgCSNj1nS9sRrEKFXaB0JeQWHzHxriJS/SoIhilqvUru # LFEytWNb3bxRkEkt8oAetOa9+DNLowUQ9IdzswqGcib09po3b1k4+ThfcvzU9nAc # ek31/h1W6cJbOJcgRO2dhWUZYp7cfmcnOa02E84tGFvvY/kYbjzPZZnoniSXD4uf # YWFCoB3VxUoZ/YKCT/pDKHVdXmLLrfckNbo9vQNEcwmjr8m0Q3d1ewD5O9oNRpgN # H0QMENfsdojztosOm3KPQ20aqNf1R7rQegYTiWf3B2fKZ6PIqnn3tBPxaEDkH7NC # GTAKnBhF48lcHSF/4XOfGdmqhGgPRWX/Tv0wia7RY/A4NEfiIImIu+nYSGNBbu3y # 7xlmtcumTlsRityOZnYI3bN5ubv++XPwU5NIJPACqvAbhif2rf1vQ9rMkkK785GL # ciJ/5f6zXsLU7DfWP+qbTBizchQgigXnRZEEc7Seo6Bwtru22oxug0qQZ5QCgyXl # Fg5Xuoq/6T4JC75pvxh1BjVlZc3Okzbfmsj+aZNrXO581HVJ2JI= # =XLtJ # -----END PGP SIGNATURE----- # gpg: Signature made Mon 26 Jun 2023 07:55:43 AM CEST # gpg: using RSA key A0F66548F04895EBFE6B0B6051A343C7CFFBECA1 # gpg: Good signature from "Cédric Le Goater <clg@kaod.org>" [undefined] # gpg: WARNING: This key is not certified with a trusted signature! # gpg: There is no indication that the signature belongs to the owner. # Primary key fingerprint: A0F6 6548 F048 95EB FE6B 0B60 51A3 43C7 CFFB ECA1 * tag 'pull-ppc-20230626' of https://github.com/legoater/qemu: (30 commits) tests/avocado: ppc test VOF bios Linux boot pnv/xive2: Check TIMA special ops against a dedicated array for P10 pnv/xive2: Add a get_config() method on the presenter class tests/avocado: Add ppc64 pseries multiprocessor boot tests tests/avocado: boot ppc64 pseries to Linux VFS mount spapr: TCG allow up to 8-thread SMT on POWER8 and newer CPUs hw/ppc/spapr: Test whether TCG is enabled with tcg_enabled() target/ppc: Add msgsnd/p and DPDES SMT support target/ppc: Add support for SMT CTRL register target/ppc: Add initial flags and helpers for SMT support target/ppc: Fix sc instruction handling of LEV field target/ppc: Better CTRL SPR implementation target/ppc: Add ISA v3.1 LEV indication in SRR1 for system call interrupts target/ppc: Implement HEIR SPR target/ppc: Add SRR1 prefix indication to interrupt handlers target/ppc: Change partition-scope translate interface target/ppc: Fix instruction loading endianness in alignment interrupt ppc/spapr: Move spapr nested HV to a new file ppc/spapr: load and store l2 state with helper functions ppc/spapr: Add a nested state struct ... Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
Diffstat (limited to 'hw')
-rw-r--r--hw/intc/pnv_xive.c11
-rw-r--r--hw/intc/pnv_xive2.c44
-rw-r--r--hw/intc/spapr_xive.c16
-rw-r--r--hw/intc/xive.c57
-rw-r--r--hw/pci-host/pnv_phb4.c14
-rw-r--r--hw/ppc/meson.build1
-rw-r--r--hw/ppc/pnv.c3
-rw-r--r--hw/ppc/ppc.c6
-rw-r--r--hw/ppc/ppc440_bamboo.c17
-rw-r--r--hw/ppc/prep.c20
-rw-r--r--hw/ppc/spapr.c18
-rw-r--r--hw/ppc/spapr_caps.c14
-rw-r--r--hw/ppc/spapr_cpu_core.c7
-rw-r--r--hw/ppc/spapr_hcall.c335
-rw-r--r--hw/ppc/spapr_nested.c395
15 files changed, 547 insertions, 411 deletions
diff --git a/hw/intc/pnv_xive.c b/hw/intc/pnv_xive.c
index 622f9d2..e536b3e 100644
--- a/hw/intc/pnv_xive.c
+++ b/hw/intc/pnv_xive.c
@@ -479,6 +479,16 @@ static int pnv_xive_match_nvt(XivePresenter *xptr, uint8_t format,
return count;
}
+static uint32_t pnv_xive_presenter_get_config(XivePresenter *xptr)
+{
+ uint32_t cfg = 0;
+
+ /* TIMA GEN1 is all P9 knows */
+ cfg |= XIVE_PRESENTER_GEN1_TIMA_OS;
+
+ return cfg;
+}
+
static uint8_t pnv_xive_get_block_id(XiveRouter *xrtr)
{
return pnv_xive_block_id(PNV_XIVE(xrtr));
@@ -1991,6 +2001,7 @@ static void pnv_xive_class_init(ObjectClass *klass, void *data)
xnc->notify = pnv_xive_notify;
xpc->match_nvt = pnv_xive_match_nvt;
+ xpc->get_config = pnv_xive_presenter_get_config;
};
static const TypeInfo pnv_xive_info = {
diff --git a/hw/intc/pnv_xive2.c b/hw/intc/pnv_xive2.c
index ec1edeb..ed438a2 100644
--- a/hw/intc/pnv_xive2.c
+++ b/hw/intc/pnv_xive2.c
@@ -501,6 +501,17 @@ static int pnv_xive2_match_nvt(XivePresenter *xptr, uint8_t format,
return count;
}
+static uint32_t pnv_xive2_presenter_get_config(XivePresenter *xptr)
+{
+ PnvXive2 *xive = PNV_XIVE2(xptr);
+ uint32_t cfg = 0;
+
+ if (xive->cq_regs[CQ_XIVE_CFG >> 3] & CQ_XIVE_CFG_GEN1_TIMA_OS) {
+ cfg |= XIVE_PRESENTER_GEN1_TIMA_OS;
+ }
+ return cfg;
+}
+
static uint8_t pnv_xive2_get_block_id(Xive2Router *xrtr)
{
return pnv_xive2_block_id(PNV_XIVE2(xrtr));
@@ -1645,17 +1656,6 @@ static const MemoryRegionOps pnv_xive2_ic_tm_indirect_ops = {
/*
* TIMA ops
*/
-
-/*
- * Special TIMA offsets to handle accesses in a POWER10 way.
- *
- * Only the CAM line updates done by the hypervisor should be handled
- * specifically.
- */
-#define HV_PAGE_OFFSET (XIVE_TM_HV_PAGE << TM_SHIFT)
-#define HV_PUSH_OS_CTX_OFFSET (HV_PAGE_OFFSET | (TM_QW1_OS + TM_WORD2))
-#define HV_PULL_OS_CTX_OFFSET (HV_PAGE_OFFSET | TM_SPC_PULL_OS_CTX)
-
static void pnv_xive2_tm_write(void *opaque, hwaddr offset,
uint64_t value, unsigned size)
{
@@ -1663,18 +1663,7 @@ static void pnv_xive2_tm_write(void *opaque, hwaddr offset,
PnvXive2 *xive = pnv_xive2_tm_get_xive(cpu);
XiveTCTX *tctx = XIVE_TCTX(pnv_cpu_state(cpu)->intc);
XivePresenter *xptr = XIVE_PRESENTER(xive);
- bool gen1_tima_os =
- xive->cq_regs[CQ_XIVE_CFG >> 3] & CQ_XIVE_CFG_GEN1_TIMA_OS;
-
- offset &= TM_ADDRESS_MASK;
-
- /* TODO: should we switch the TM ops table instead ? */
- if (!gen1_tima_os && offset == HV_PUSH_OS_CTX_OFFSET) {
- xive2_tm_push_os_ctx(xptr, tctx, offset, value, size);
- return;
- }
- /* Other TM ops are the same as XIVE1 */
xive_tctx_tm_write(xptr, tctx, offset, value, size);
}
@@ -1684,17 +1673,7 @@ static uint64_t pnv_xive2_tm_read(void *opaque, hwaddr offset, unsigned size)
PnvXive2 *xive = pnv_xive2_tm_get_xive(cpu);
XiveTCTX *tctx = XIVE_TCTX(pnv_cpu_state(cpu)->intc);
XivePresenter *xptr = XIVE_PRESENTER(xive);
- bool gen1_tima_os =
- xive->cq_regs[CQ_XIVE_CFG >> 3] & CQ_XIVE_CFG_GEN1_TIMA_OS;
-
- offset &= TM_ADDRESS_MASK;
-
- /* TODO: should we switch the TM ops table instead ? */
- if (!gen1_tima_os && offset == HV_PULL_OS_CTX_OFFSET) {
- return xive2_tm_pull_os_ctx(xptr, tctx, offset, size);
- }
- /* Other TM ops are the same as XIVE1 */
return xive_tctx_tm_read(xptr, tctx, offset, size);
}
@@ -1987,6 +1966,7 @@ static void pnv_xive2_class_init(ObjectClass *klass, void *data)
xnc->notify = pnv_xive2_notify;
xpc->match_nvt = pnv_xive2_match_nvt;
+ xpc->get_config = pnv_xive2_presenter_get_config;
};
static const TypeInfo pnv_xive2_info = {
diff --git a/hw/intc/spapr_xive.c b/hw/intc/spapr_xive.c
index dc641cc..8bcab28 100644
--- a/hw/intc/spapr_xive.c
+++ b/hw/intc/spapr_xive.c
@@ -475,6 +475,21 @@ static int spapr_xive_match_nvt(XivePresenter *xptr, uint8_t format,
return count;
}
+static uint32_t spapr_xive_presenter_get_config(XivePresenter *xptr)
+{
+ uint32_t cfg = 0;
+
+ /*
+ * Let's claim GEN1 TIMA format. If running with KVM on P10, the
+ * correct answer is deep in the hardware and not accessible to
+ * us. But it shouldn't matter as it only affects the presenter
+ * as seen by a guest OS.
+ */
+ cfg |= XIVE_PRESENTER_GEN1_TIMA_OS;
+
+ return cfg;
+}
+
static uint8_t spapr_xive_get_block_id(XiveRouter *xrtr)
{
return SPAPR_XIVE_BLOCK_ID;
@@ -832,6 +847,7 @@ static void spapr_xive_class_init(ObjectClass *klass, void *data)
sicc->post_load = spapr_xive_post_load;
xpc->match_nvt = spapr_xive_match_nvt;
+ xpc->get_config = spapr_xive_presenter_get_config;
xpc->in_kernel = spapr_xive_in_kernel_xptr;
}
diff --git a/hw/intc/xive.c b/hw/intc/xive.c
index 5204c14..84c079b 100644
--- a/hw/intc/xive.c
+++ b/hw/intc/xive.c
@@ -20,6 +20,7 @@
#include "monitor/monitor.h"
#include "hw/irq.h"
#include "hw/ppc/xive.h"
+#include "hw/ppc/xive2.h"
#include "hw/ppc/xive_regs.h"
#include "trace.h"
@@ -461,6 +462,13 @@ static void xive_tm_push_os_ctx(XivePresenter *xptr, XiveTCTX *tctx,
}
}
+static uint32_t xive_presenter_get_config(XivePresenter *xptr)
+{
+ XivePresenterClass *xpc = XIVE_PRESENTER_GET_CLASS(xptr);
+
+ return xpc->get_config(xptr);
+}
+
/*
* Define a mapping of "special" operations depending on the TIMA page
* offset and the size of the operation.
@@ -497,14 +505,47 @@ static const XiveTmOp xive_tm_operations[] = {
{ XIVE_TM_HV_PAGE, TM_SPC_PULL_POOL_CTX, 8, NULL, xive_tm_pull_pool_ctx },
};
-static const XiveTmOp *xive_tm_find_op(hwaddr offset, unsigned size, bool write)
+static const XiveTmOp xive2_tm_operations[] = {
+ /*
+ * MMIOs below 2K : raw values and special operations without side
+ * effects
+ */
+ { XIVE_TM_OS_PAGE, TM_QW1_OS + TM_CPPR, 1, xive_tm_set_os_cppr, NULL },
+ { XIVE_TM_HV_PAGE, TM_QW1_OS + TM_WORD2, 4, xive2_tm_push_os_ctx, NULL },
+ { XIVE_TM_HV_PAGE, TM_QW3_HV_PHYS + TM_CPPR, 1, xive_tm_set_hv_cppr, NULL },
+ { XIVE_TM_HV_PAGE, TM_QW3_HV_PHYS + TM_WORD2, 1, xive_tm_vt_push, NULL },
+ { XIVE_TM_HV_PAGE, TM_QW3_HV_PHYS + TM_WORD2, 1, NULL, xive_tm_vt_poll },
+
+ /* MMIOs above 2K : special operations with side effects */
+ { XIVE_TM_OS_PAGE, TM_SPC_ACK_OS_REG, 2, NULL, xive_tm_ack_os_reg },
+ { XIVE_TM_OS_PAGE, TM_SPC_SET_OS_PENDING, 1, xive_tm_set_os_pending, NULL },
+ { XIVE_TM_HV_PAGE, TM_SPC_PULL_OS_CTX, 4, NULL, xive2_tm_pull_os_ctx },
+ { XIVE_TM_HV_PAGE, TM_SPC_PULL_OS_CTX, 8, NULL, xive2_tm_pull_os_ctx },
+ { XIVE_TM_HV_PAGE, TM_SPC_ACK_HV_REG, 2, NULL, xive_tm_ack_hv_reg },
+ { XIVE_TM_HV_PAGE, TM_SPC_PULL_POOL_CTX, 4, NULL, xive_tm_pull_pool_ctx },
+ { XIVE_TM_HV_PAGE, TM_SPC_PULL_POOL_CTX, 8, NULL, xive_tm_pull_pool_ctx },
+};
+
+static const XiveTmOp *xive_tm_find_op(XivePresenter *xptr, hwaddr offset,
+ unsigned size, bool write)
{
uint8_t page_offset = (offset >> TM_SHIFT) & 0x3;
uint32_t op_offset = offset & TM_ADDRESS_MASK;
- int i;
+ const XiveTmOp *tm_ops;
+ int i, tm_ops_count;
+ uint32_t cfg;
+
+ cfg = xive_presenter_get_config(xptr);
+ if (cfg & XIVE_PRESENTER_GEN1_TIMA_OS) {
+ tm_ops = xive_tm_operations;
+ tm_ops_count = ARRAY_SIZE(xive_tm_operations);
+ } else {
+ tm_ops = xive2_tm_operations;
+ tm_ops_count = ARRAY_SIZE(xive2_tm_operations);
+ }
- for (i = 0; i < ARRAY_SIZE(xive_tm_operations); i++) {
- const XiveTmOp *xto = &xive_tm_operations[i];
+ for (i = 0; i < tm_ops_count; i++) {
+ const XiveTmOp *xto = &tm_ops[i];
/* Accesses done from a more privileged TIMA page is allowed */
if (xto->page_offset >= page_offset &&
@@ -535,7 +576,7 @@ void xive_tctx_tm_write(XivePresenter *xptr, XiveTCTX *tctx, hwaddr offset,
* First, check for special operations in the 2K region
*/
if (offset & TM_SPECIAL_OP) {
- xto = xive_tm_find_op(offset, size, true);
+ xto = xive_tm_find_op(tctx->xptr, offset, size, true);
if (!xto) {
qemu_log_mask(LOG_GUEST_ERROR, "XIVE: invalid write access at TIMA "
"@%"HWADDR_PRIx"\n", offset);
@@ -548,7 +589,7 @@ void xive_tctx_tm_write(XivePresenter *xptr, XiveTCTX *tctx, hwaddr offset,
/*
* Then, for special operations in the region below 2K.
*/
- xto = xive_tm_find_op(offset, size, true);
+ xto = xive_tm_find_op(tctx->xptr, offset, size, true);
if (xto) {
xto->write_handler(xptr, tctx, offset, value, size);
return;
@@ -574,7 +615,7 @@ uint64_t xive_tctx_tm_read(XivePresenter *xptr, XiveTCTX *tctx, hwaddr offset,
* First, check for special operations in the 2K region
*/
if (offset & TM_SPECIAL_OP) {
- xto = xive_tm_find_op(offset, size, false);
+ xto = xive_tm_find_op(tctx->xptr, offset, size, false);
if (!xto) {
qemu_log_mask(LOG_GUEST_ERROR, "XIVE: invalid read access to TIMA"
"@%"HWADDR_PRIx"\n", offset);
@@ -587,7 +628,7 @@ uint64_t xive_tctx_tm_read(XivePresenter *xptr, XiveTCTX *tctx, hwaddr offset,
/*
* Then, for special operations in the region below 2K.
*/
- xto = xive_tm_find_op(offset, size, false);
+ xto = xive_tm_find_op(tctx->xptr, offset, size, false);
if (xto) {
ret = xto->read_handler(xptr, tctx, offset, size);
goto out;
diff --git a/hw/pci-host/pnv_phb4.c b/hw/pci-host/pnv_phb4.c
index 542f9e2..6232cbe 100644
--- a/hw/pci-host/pnv_phb4.c
+++ b/hw/pci-host/pnv_phb4.c
@@ -133,13 +133,13 @@ static void pnv_phb4_rc_config_write(PnvPHB4 *phb, unsigned off,
PCIDevice *pdev;
if (size != 4) {
- phb_error(phb, "rc_config_write invalid size %d\n", size);
+ phb_error(phb, "rc_config_write invalid size %d", size);
return;
}
pdev = pci_find_device(pci->bus, 0, 0);
if (!pdev) {
- phb_error(phb, "rc_config_write device not found\n");
+ phb_error(phb, "rc_config_write device not found");
return;
}
@@ -155,13 +155,13 @@ static uint64_t pnv_phb4_rc_config_read(PnvPHB4 *phb, unsigned off,
uint64_t val;
if (size != 4) {
- phb_error(phb, "rc_config_read invalid size %d\n", size);
+ phb_error(phb, "rc_config_read invalid size %d", size);
return ~0ull;
}
pdev = pci_find_device(pci->bus, 0, 0);
if (!pdev) {
- phb_error(phb, "rc_config_read device not found\n");
+ phb_error(phb, "rc_config_read device not found");
return ~0ull;
}
@@ -1039,19 +1039,19 @@ static void pnv_pec_stk_nest_xscom_write(void *opaque, hwaddr addr,
if (phb->nest_regs[PEC_NEST_STK_BAR_EN] &
(PEC_NEST_STK_BAR_EN_MMIO0 |
PEC_NEST_STK_BAR_EN_MMIO1)) {
- phb_pec_error(pec, "Changing enabled BAR unsupported\n");
+ phb_pec_error(pec, "Changing enabled BAR unsupported");
}
phb->nest_regs[reg] = val & 0xffffffffff000000ull;
break;
case PEC_NEST_STK_PHB_REGS_BAR:
if (phb->nest_regs[PEC_NEST_STK_BAR_EN] & PEC_NEST_STK_BAR_EN_PHB) {
- phb_pec_error(pec, "Changing enabled BAR unsupported\n");
+ phb_pec_error(pec, "Changing enabled BAR unsupported");
}
phb->nest_regs[reg] = val & 0xffffffffffc00000ull;
break;
case PEC_NEST_STK_INT_BAR:
if (phb->nest_regs[PEC_NEST_STK_BAR_EN] & PEC_NEST_STK_BAR_EN_INT) {
- phb_pec_error(pec, "Changing enabled BAR unsupported\n");
+ phb_pec_error(pec, "Changing enabled BAR unsupported");
}
phb->nest_regs[reg] = val & 0xfffffff000000000ull;
break;
diff --git a/hw/ppc/meson.build b/hw/ppc/meson.build
index c927337..a313d4b 100644
--- a/hw/ppc/meson.build
+++ b/hw/ppc/meson.build
@@ -15,6 +15,7 @@ ppc_ss.add(when: 'CONFIG_PSERIES', if_true: files(
'spapr_vio.c',
'spapr_events.c',
'spapr_hcall.c',
+ 'spapr_nested.c',
'spapr_iommu.c',
'spapr_rtas.c',
'spapr_pci.c',
diff --git a/hw/ppc/pnv.c b/hw/ppc/pnv.c
index 590fc64..fc08317 100644
--- a/hw/ppc/pnv.c
+++ b/hw/ppc/pnv.c
@@ -799,7 +799,8 @@ static void pnv_init(MachineState *machine)
DeviceState *dev;
if (kvm_enabled()) {
- error_report("The powernv machine does not work with KVM acceleration");
+ error_report("machine %s does not support the KVM accelerator",
+ mc->name);
exit(EXIT_FAILURE);
}
diff --git a/hw/ppc/ppc.c b/hw/ppc/ppc.c
index 1b1220c..82e4408 100644
--- a/hw/ppc/ppc.c
+++ b/hw/ppc/ppc.c
@@ -1436,6 +1436,12 @@ int ppc_cpu_pir(PowerPCCPU *cpu)
return env->spr_cb[SPR_PIR].default_value;
}
+int ppc_cpu_tir(PowerPCCPU *cpu)
+{
+ CPUPPCState *env = &cpu->env;
+ return env->spr_cb[SPR_TIR].default_value;
+}
+
PowerPCCPU *ppc_get_vcpu_by_pir(int pir)
{
CPUState *cs;
diff --git a/hw/ppc/ppc440_bamboo.c b/hw/ppc/ppc440_bamboo.c
index f969fa3..f061b8c 100644
--- a/hw/ppc/ppc440_bamboo.c
+++ b/hw/ppc/ppc440_bamboo.c
@@ -19,7 +19,6 @@
#include "hw/pci/pci.h"
#include "hw/boards.h"
#include "sysemu/kvm.h"
-#include "kvm_ppc.h"
#include "sysemu/device_tree.h"
#include "hw/loader.h"
#include "elf.h"
@@ -97,16 +96,6 @@ static int bamboo_load_device_tree(MachineState *machine,
fprintf(stderr, "couldn't set /chosen/bootargs\n");
}
- /*
- * Copy data from the host device tree into the guest. Since the guest can
- * directly access the timebase without host involvement, we must expose
- * the correct frequencies.
- */
- if (kvm_enabled()) {
- tb_freq = kvmppc_get_tbfreq();
- clock_freq = kvmppc_get_clockfreq();
- }
-
qemu_fdt_setprop_cell(fdt, "/cpus/cpu@0", "clock-frequency",
clock_freq);
qemu_fdt_setprop_cell(fdt, "/cpus/cpu@0", "timebase-frequency",
@@ -175,6 +164,12 @@ static void bamboo_init(MachineState *machine)
int success;
int i;
+ if (kvm_enabled()) {
+ error_report("machine %s does not support the KVM accelerator",
+ MACHINE_GET_CLASS(machine)->name);
+ exit(EXIT_FAILURE);
+ }
+
cpu = POWERPC_CPU(cpu_create(machine->cpu_type));
env = &cpu->env;
diff --git a/hw/ppc/prep.c b/hw/ppc/prep.c
index 33bf232..d9231c7 100644
--- a/hw/ppc/prep.c
+++ b/hw/ppc/prep.c
@@ -45,7 +45,6 @@
#include "trace.h"
#include "elf.h"
#include "qemu/units.h"
-#include "kvm_ppc.h"
/* SMP is not enabled, for now */
#define MAX_CPUS 1
@@ -245,6 +244,12 @@ static void ibm_40p_init(MachineState *machine)
long kernel_size = 0, initrd_size = 0;
char boot_device;
+ if (kvm_enabled()) {
+ error_report("machine %s does not support the KVM accelerator",
+ MACHINE_GET_CLASS(machine)->name);
+ exit(EXIT_FAILURE);
+ }
+
/* init CPU */
cpu = POWERPC_CPU(cpu_create(machine->cpu_type));
env = &cpu->env;
@@ -392,18 +397,7 @@ static void ibm_40p_init(MachineState *machine)
fw_cfg_add_i16(fw_cfg, FW_CFG_PPC_HEIGHT, graphic_height);
fw_cfg_add_i16(fw_cfg, FW_CFG_PPC_DEPTH, graphic_depth);
- fw_cfg_add_i32(fw_cfg, FW_CFG_PPC_IS_KVM, kvm_enabled());
- if (kvm_enabled()) {
- uint8_t *hypercall;
-
- fw_cfg_add_i32(fw_cfg, FW_CFG_PPC_TBFREQ, kvmppc_get_tbfreq());
- hypercall = g_malloc(16);
- kvmppc_get_hypercall(env, hypercall, 16);
- fw_cfg_add_bytes(fw_cfg, FW_CFG_PPC_KVM_HC, hypercall, 16);
- fw_cfg_add_i32(fw_cfg, FW_CFG_PPC_KVM_PID, getpid());
- } else {
- fw_cfg_add_i32(fw_cfg, FW_CFG_PPC_TBFREQ, NANOSECONDS_PER_SECOND);
- }
+ fw_cfg_add_i32(fw_cfg, FW_CFG_PPC_TBFREQ, NANOSECONDS_PER_SECOND);
fw_cfg_add_i16(fw_cfg, FW_CFG_BOOT_DEVICE, boot_device);
qemu_register_boot_set(fw_cfg_boot_set, fw_cfg);
diff --git a/hw/ppc/spapr.c b/hw/ppc/spapr.c
index dcb7f1c..54dbfd7 100644
--- a/hw/ppc/spapr.c
+++ b/hw/ppc/spapr.c
@@ -61,6 +61,7 @@
#include "hw/ppc/fdt.h"
#include "hw/ppc/spapr.h"
+#include "hw/ppc/spapr_nested.h"
#include "hw/ppc/spapr_vio.h"
#include "hw/ppc/vof.h"
#include "hw/qdev-properties.h"
@@ -2524,10 +2525,19 @@ static void spapr_set_vsmt_mode(SpaprMachineState *spapr, Error **errp)
int ret;
unsigned int smp_threads = ms->smp.threads;
- if (!kvm_enabled() && (smp_threads > 1)) {
- error_setg(errp, "TCG cannot support more than 1 thread/core "
- "on a pseries machine");
- return;
+ if (tcg_enabled()) {
+ if (smp_threads > 1 &&
+ !ppc_type_check_compat(ms->cpu_type, CPU_POWERPC_LOGICAL_2_07, 0,
+ spapr->max_compat_pvr)) {
+ error_setg(errp, "TCG only supports SMT on POWER8 or newer CPUs");
+ return;
+ }
+
+ if (smp_threads > 8) {
+ error_setg(errp, "TCG cannot support more than 8 threads/core "
+ "on a pseries machine");
+ return;
+ }
}
if (!is_power_of_2(smp_threads)) {
error_setg(errp, "Cannot support %d threads/core on a pseries "
diff --git a/hw/ppc/spapr_caps.c b/hw/ppc/spapr_caps.c
index 3fd45a6d..5a0755d 100644
--- a/hw/ppc/spapr_caps.c
+++ b/hw/ppc/spapr_caps.c
@@ -473,6 +473,20 @@ static void cap_nested_kvm_hv_apply(SpaprMachineState *spapr,
error_append_hint(errp,
"Try appending -machine cap-nested-hv=off\n");
}
+ } else if (tcg_enabled()) {
+ MachineState *ms = MACHINE(spapr);
+ unsigned int smp_threads = ms->smp.threads;
+
+ /*
+ * Nested-HV vCPU env state to L2, so SMT-shared SPR updates, for
+ * example, do not necessarily update the correct SPR value on sibling
+ * threads that are in a different guest/host context.
+ */
+ if (smp_threads > 1) {
+ error_setg(errp, "TCG does not support nested-HV with SMT");
+ error_append_hint(errp, "Try appending -machine cap-nested-hv=off "
+ "or use threads=1 with -smp\n");
+ }
}
}
diff --git a/hw/ppc/spapr_cpu_core.c b/hw/ppc/spapr_cpu_core.c
index 9b88dd5..a4e3c2f 100644
--- a/hw/ppc/spapr_cpu_core.c
+++ b/hw/ppc/spapr_cpu_core.c
@@ -255,7 +255,7 @@ static void spapr_cpu_core_unrealize(DeviceState *dev)
}
static bool spapr_realize_vcpu(PowerPCCPU *cpu, SpaprMachineState *spapr,
- SpaprCpuCore *sc, Error **errp)
+ SpaprCpuCore *sc, int thread_index, Error **errp)
{
CPUPPCState *env = &cpu->env;
CPUState *cs = CPU(cpu);
@@ -267,6 +267,9 @@ static bool spapr_realize_vcpu(PowerPCCPU *cpu, SpaprMachineState *spapr,
cpu_ppc_set_vhyp(cpu, PPC_VIRTUAL_HYPERVISOR(spapr));
kvmppc_set_papr(cpu);
+ env->spr_cb[SPR_PIR].default_value = cs->cpu_index;
+ env->spr_cb[SPR_TIR].default_value = thread_index;
+
/* Set time-base frequency to 512 MHz. vhyp must be set first. */
cpu_ppc_tb_init(env, SPAPR_TIMEBASE_FREQ);
@@ -337,7 +340,7 @@ static void spapr_cpu_core_realize(DeviceState *dev, Error **errp)
for (i = 0; i < cc->nr_threads; i++) {
sc->threads[i] = spapr_create_vcpu(sc, i, errp);
if (!sc->threads[i] ||
- !spapr_realize_vcpu(sc->threads[i], spapr, sc, errp)) {
+ !spapr_realize_vcpu(sc->threads[i], spapr, sc, i, errp)) {
spapr_cpu_core_unrealize(dev);
return;
}
diff --git a/hw/ppc/spapr_hcall.c b/hw/ppc/spapr_hcall.c
index b904755..002ea0b 100644
--- a/hw/ppc/spapr_hcall.c
+++ b/hw/ppc/spapr_hcall.c
@@ -13,6 +13,7 @@
#include "hw/ppc/ppc.h"
#include "hw/ppc/spapr.h"
#include "hw/ppc/spapr_cpu_core.h"
+#include "hw/ppc/spapr_nested.h"
#include "mmu-hash64.h"
#include "cpu-models.h"
#include "trace.h"
@@ -1498,349 +1499,17 @@ target_ulong spapr_hypercall(PowerPCCPU *cpu, target_ulong opcode,
}
#ifdef CONFIG_TCG
-#define PRTS_MASK 0x1f
-
-static target_ulong h_set_ptbl(PowerPCCPU *cpu,
- SpaprMachineState *spapr,
- target_ulong opcode,
- target_ulong *args)
-{
- target_ulong ptcr = args[0];
-
- if (!spapr_get_cap(spapr, SPAPR_CAP_NESTED_KVM_HV)) {
- return H_FUNCTION;
- }
-
- if ((ptcr & PRTS_MASK) + 12 - 4 > 12) {
- return H_PARAMETER;
- }
-
- spapr->nested_ptcr = ptcr; /* Save new partition table */
-
- return H_SUCCESS;
-}
-
-static target_ulong h_tlb_invalidate(PowerPCCPU *cpu,
- SpaprMachineState *spapr,
- target_ulong opcode,
- target_ulong *args)
-{
- /*
- * The spapr virtual hypervisor nested HV implementation retains no L2
- * translation state except for TLB. And the TLB is always invalidated
- * across L1<->L2 transitions, so nothing is required here.
- */
-
- return H_SUCCESS;
-}
-
-static target_ulong h_copy_tofrom_guest(PowerPCCPU *cpu,
- SpaprMachineState *spapr,
- target_ulong opcode,
- target_ulong *args)
-{
- /*
- * This HCALL is not required, L1 KVM will take a slow path and walk the
- * page tables manually to do the data copy.
- */
- return H_FUNCTION;
-}
-
-/*
- * When this handler returns, the environment is switched to the L2 guest
- * and TCG begins running that. spapr_exit_nested() performs the switch from
- * L2 back to L1 and returns from the H_ENTER_NESTED hcall.
- */
-static target_ulong h_enter_nested(PowerPCCPU *cpu,
- SpaprMachineState *spapr,
- target_ulong opcode,
- target_ulong *args)
-{
- PowerPCCPUClass *pcc = POWERPC_CPU_GET_CLASS(cpu);
- CPUState *cs = CPU(cpu);
- CPUPPCState *env = &cpu->env;
- SpaprCpuState *spapr_cpu = spapr_cpu_state(cpu);
- target_ulong hv_ptr = args[0];
- target_ulong regs_ptr = args[1];
- target_ulong hdec, now = cpu_ppc_load_tbl(env);
- target_ulong lpcr, lpcr_mask;
- struct kvmppc_hv_guest_state *hvstate;
- struct kvmppc_hv_guest_state hv_state;
- struct kvmppc_pt_regs *regs;
- hwaddr len;
-
- if (spapr->nested_ptcr == 0) {
- return H_NOT_AVAILABLE;
- }
-
- len = sizeof(*hvstate);
- hvstate = address_space_map(CPU(cpu)->as, hv_ptr, &len, false,
- MEMTXATTRS_UNSPECIFIED);
- if (len != sizeof(*hvstate)) {
- address_space_unmap(CPU(cpu)->as, hvstate, len, 0, false);
- return H_PARAMETER;
- }
-
- memcpy(&hv_state, hvstate, len);
-
- address_space_unmap(CPU(cpu)->as, hvstate, len, len, false);
-
- /*
- * We accept versions 1 and 2. Version 2 fields are unused because TCG
- * does not implement DAWR*.
- */
- if (hv_state.version > HV_GUEST_STATE_VERSION) {
- return H_PARAMETER;
- }
-
- spapr_cpu->nested_host_state = g_try_new(CPUPPCState, 1);
- if (!spapr_cpu->nested_host_state) {
- return H_NO_MEM;
- }
-
- memcpy(spapr_cpu->nested_host_state, env, sizeof(CPUPPCState));
-
- len = sizeof(*regs);
- regs = address_space_map(CPU(cpu)->as, regs_ptr, &len, false,
- MEMTXATTRS_UNSPECIFIED);
- if (!regs || len != sizeof(*regs)) {
- address_space_unmap(CPU(cpu)->as, regs, len, 0, false);
- g_free(spapr_cpu->nested_host_state);
- return H_P2;
- }
-
- len = sizeof(env->gpr);
- assert(len == sizeof(regs->gpr));
- memcpy(env->gpr, regs->gpr, len);
-
- env->lr = regs->link;
- env->ctr = regs->ctr;
- cpu_write_xer(env, regs->xer);
- ppc_set_cr(env, regs->ccr);
-
- env->msr = regs->msr;
- env->nip = regs->nip;
-
- address_space_unmap(CPU(cpu)->as, regs, len, len, false);
-
- env->cfar = hv_state.cfar;
-
- assert(env->spr[SPR_LPIDR] == 0);
- env->spr[SPR_LPIDR] = hv_state.lpid;
-
- lpcr_mask = LPCR_DPFD | LPCR_ILE | LPCR_AIL | LPCR_LD | LPCR_MER;
- lpcr = (env->spr[SPR_LPCR] & ~lpcr_mask) | (hv_state.lpcr & lpcr_mask);
- lpcr |= LPCR_HR | LPCR_UPRT | LPCR_GTSE | LPCR_HVICE | LPCR_HDICE;
- lpcr &= ~LPCR_LPES0;
- env->spr[SPR_LPCR] = lpcr & pcc->lpcr_mask;
-
- env->spr[SPR_PCR] = hv_state.pcr;
- /* hv_state.amor is not used */
- env->spr[SPR_DPDES] = hv_state.dpdes;
- env->spr[SPR_HFSCR] = hv_state.hfscr;
- hdec = hv_state.hdec_expiry - now;
- spapr_cpu->nested_tb_offset = hv_state.tb_offset;
- /* TCG does not implement DAWR*, CIABR, PURR, SPURR, IC, VTB, HEIR SPRs*/
- env->spr[SPR_SRR0] = hv_state.srr0;
- env->spr[SPR_SRR1] = hv_state.srr1;
- env->spr[SPR_SPRG0] = hv_state.sprg[0];
- env->spr[SPR_SPRG1] = hv_state.sprg[1];
- env->spr[SPR_SPRG2] = hv_state.sprg[2];
- env->spr[SPR_SPRG3] = hv_state.sprg[3];
- env->spr[SPR_BOOKS_PID] = hv_state.pidr;
- env->spr[SPR_PPR] = hv_state.ppr;
-
- cpu_ppc_hdecr_init(env);
- cpu_ppc_store_hdecr(env, hdec);
-
- /*
- * The hv_state.vcpu_token is not needed. It is used by the KVM
- * implementation to remember which L2 vCPU last ran on which physical
- * CPU so as to invalidate process scope translations if it is moved
- * between physical CPUs. For now TLBs are always flushed on L1<->L2
- * transitions so this is not a problem.
- *
- * Could validate that the same vcpu_token does not attempt to run on
- * different L1 vCPUs at the same time, but that would be a L1 KVM bug
- * and it's not obviously worth a new data structure to do it.
- */
-
- env->tb_env->tb_offset += spapr_cpu->nested_tb_offset;
- spapr_cpu->in_nested = true;
-
- hreg_compute_hflags(env);
- ppc_maybe_interrupt(env);
- tlb_flush(cs);
- env->reserve_addr = -1; /* Reset the reservation */
-
- /*
- * The spapr hcall helper sets env->gpr[3] to the return value, but at
- * this point the L1 is not returning from the hcall but rather we
- * start running the L2, so r3 must not be clobbered, so return env->gpr[3]
- * to leave it unchanged.
- */
- return env->gpr[3];
-}
-
-void spapr_exit_nested(PowerPCCPU *cpu, int excp)
-{
- CPUState *cs = CPU(cpu);
- CPUPPCState *env = &cpu->env;
- SpaprCpuState *spapr_cpu = spapr_cpu_state(cpu);
- target_ulong r3_return = env->excp_vectors[excp]; /* hcall return value */
- target_ulong hv_ptr = spapr_cpu->nested_host_state->gpr[4];
- target_ulong regs_ptr = spapr_cpu->nested_host_state->gpr[5];
- struct kvmppc_hv_guest_state *hvstate;
- struct kvmppc_pt_regs *regs;
- hwaddr len;
-
- assert(spapr_cpu->in_nested);
-
- cpu_ppc_hdecr_exit(env);
-
- len = sizeof(*hvstate);
- hvstate = address_space_map(CPU(cpu)->as, hv_ptr, &len, true,
- MEMTXATTRS_UNSPECIFIED);
- if (len != sizeof(*hvstate)) {
- address_space_unmap(CPU(cpu)->as, hvstate, len, 0, true);
- r3_return = H_PARAMETER;
- goto out_restore_l1;
- }
-
- hvstate->cfar = env->cfar;
- hvstate->lpcr = env->spr[SPR_LPCR];
- hvstate->pcr = env->spr[SPR_PCR];
- hvstate->dpdes = env->spr[SPR_DPDES];
- hvstate->hfscr = env->spr[SPR_HFSCR];
-
- if (excp == POWERPC_EXCP_HDSI) {
- hvstate->hdar = env->spr[SPR_HDAR];
- hvstate->hdsisr = env->spr[SPR_HDSISR];
- hvstate->asdr = env->spr[SPR_ASDR];
- } else if (excp == POWERPC_EXCP_HISI) {
- hvstate->asdr = env->spr[SPR_ASDR];
- }
-
- /* HEIR should be implemented for HV mode and saved here. */
- hvstate->srr0 = env->spr[SPR_SRR0];
- hvstate->srr1 = env->spr[SPR_SRR1];
- hvstate->sprg[0] = env->spr[SPR_SPRG0];
- hvstate->sprg[1] = env->spr[SPR_SPRG1];
- hvstate->sprg[2] = env->spr[SPR_SPRG2];
- hvstate->sprg[3] = env->spr[SPR_SPRG3];
- hvstate->pidr = env->spr[SPR_BOOKS_PID];
- hvstate->ppr = env->spr[SPR_PPR];
-
- /* Is it okay to specify write length larger than actual data written? */
- address_space_unmap(CPU(cpu)->as, hvstate, len, len, true);
-
- len = sizeof(*regs);
- regs = address_space_map(CPU(cpu)->as, regs_ptr, &len, true,
- MEMTXATTRS_UNSPECIFIED);
- if (!regs || len != sizeof(*regs)) {
- address_space_unmap(CPU(cpu)->as, regs, len, 0, true);
- r3_return = H_P2;
- goto out_restore_l1;
- }
-
- len = sizeof(env->gpr);
- assert(len == sizeof(regs->gpr));
- memcpy(regs->gpr, env->gpr, len);
-
- regs->link = env->lr;
- regs->ctr = env->ctr;
- regs->xer = cpu_read_xer(env);
- regs->ccr = ppc_get_cr(env);
-
- if (excp == POWERPC_EXCP_MCHECK ||
- excp == POWERPC_EXCP_RESET ||
- excp == POWERPC_EXCP_SYSCALL) {
- regs->nip = env->spr[SPR_SRR0];
- regs->msr = env->spr[SPR_SRR1] & env->msr_mask;
- } else {
- regs->nip = env->spr[SPR_HSRR0];
- regs->msr = env->spr[SPR_HSRR1] & env->msr_mask;
- }
-
- /* Is it okay to specify write length larger than actual data written? */
- address_space_unmap(CPU(cpu)->as, regs, len, len, true);
-
-out_restore_l1:
- memcpy(env->gpr, spapr_cpu->nested_host_state->gpr, sizeof(env->gpr));
- env->lr = spapr_cpu->nested_host_state->lr;
- env->ctr = spapr_cpu->nested_host_state->ctr;
- memcpy(env->crf, spapr_cpu->nested_host_state->crf, sizeof(env->crf));
- env->cfar = spapr_cpu->nested_host_state->cfar;
- env->xer = spapr_cpu->nested_host_state->xer;
- env->so = spapr_cpu->nested_host_state->so;
- env->ov = spapr_cpu->nested_host_state->ov;
- env->ov32 = spapr_cpu->nested_host_state->ov32;
- env->ca32 = spapr_cpu->nested_host_state->ca32;
- env->msr = spapr_cpu->nested_host_state->msr;
- env->nip = spapr_cpu->nested_host_state->nip;
-
- assert(env->spr[SPR_LPIDR] != 0);
- env->spr[SPR_LPCR] = spapr_cpu->nested_host_state->spr[SPR_LPCR];
- env->spr[SPR_LPIDR] = spapr_cpu->nested_host_state->spr[SPR_LPIDR];
- env->spr[SPR_PCR] = spapr_cpu->nested_host_state->spr[SPR_PCR];
- env->spr[SPR_DPDES] = 0;
- env->spr[SPR_HFSCR] = spapr_cpu->nested_host_state->spr[SPR_HFSCR];
- env->spr[SPR_SRR0] = spapr_cpu->nested_host_state->spr[SPR_SRR0];
- env->spr[SPR_SRR1] = spapr_cpu->nested_host_state->spr[SPR_SRR1];
- env->spr[SPR_SPRG0] = spapr_cpu->nested_host_state->spr[SPR_SPRG0];
- env->spr[SPR_SPRG1] = spapr_cpu->nested_host_state->spr[SPR_SPRG1];
- env->spr[SPR_SPRG2] = spapr_cpu->nested_host_state->spr[SPR_SPRG2];
- env->spr[SPR_SPRG3] = spapr_cpu->nested_host_state->spr[SPR_SPRG3];
- env->spr[SPR_BOOKS_PID] = spapr_cpu->nested_host_state->spr[SPR_BOOKS_PID];
- env->spr[SPR_PPR] = spapr_cpu->nested_host_state->spr[SPR_PPR];
-
- /*
- * Return the interrupt vector address from H_ENTER_NESTED to the L1
- * (or error code).
- */
- env->gpr[3] = r3_return;
-
- env->tb_env->tb_offset -= spapr_cpu->nested_tb_offset;
- spapr_cpu->in_nested = false;
-
- hreg_compute_hflags(env);
- ppc_maybe_interrupt(env);
- tlb_flush(cs);
- env->reserve_addr = -1; /* Reset the reservation */
-
- g_free(spapr_cpu->nested_host_state);
- spapr_cpu->nested_host_state = NULL;
-}
-
-static void hypercall_register_nested(void)
-{
- spapr_register_hypercall(KVMPPC_H_SET_PARTITION_TABLE, h_set_ptbl);
- spapr_register_hypercall(KVMPPC_H_ENTER_NESTED, h_enter_nested);
- spapr_register_hypercall(KVMPPC_H_TLB_INVALIDATE, h_tlb_invalidate);
- spapr_register_hypercall(KVMPPC_H_COPY_TOFROM_GUEST, h_copy_tofrom_guest);
-}
-
static void hypercall_register_softmmu(void)
{
/* DO NOTHING */
}
#else
-void spapr_exit_nested(PowerPCCPU *cpu, int excp)
-{
- g_assert_not_reached();
-}
-
static target_ulong h_softmmu(PowerPCCPU *cpu, SpaprMachineState *spapr,
target_ulong opcode, target_ulong *args)
{
g_assert_not_reached();
}
-static void hypercall_register_nested(void)
-{
- /* DO NOTHING */
-}
-
static void hypercall_register_softmmu(void)
{
/* hcall-pft */
@@ -1910,7 +1579,7 @@ static void hypercall_register_types(void)
spapr_register_hypercall(KVMPPC_H_UPDATE_DT, h_update_dt);
- hypercall_register_nested();
+ spapr_register_nested();
}
type_init(hypercall_register_types)
diff --git a/hw/ppc/spapr_nested.c b/hw/ppc/spapr_nested.c
new file mode 100644
index 0000000..121aa96
--- /dev/null
+++ b/hw/ppc/spapr_nested.c
@@ -0,0 +1,395 @@
+#include "qemu/osdep.h"
+#include "qemu/cutils.h"
+#include "exec/exec-all.h"
+#include "helper_regs.h"
+#include "hw/ppc/ppc.h"
+#include "hw/ppc/spapr.h"
+#include "hw/ppc/spapr_cpu_core.h"
+#include "hw/ppc/spapr_nested.h"
+
+#ifdef CONFIG_TCG
+#define PRTS_MASK 0x1f
+
+static target_ulong h_set_ptbl(PowerPCCPU *cpu,
+ SpaprMachineState *spapr,
+ target_ulong opcode,
+ target_ulong *args)
+{
+ target_ulong ptcr = args[0];
+
+ if (!spapr_get_cap(spapr, SPAPR_CAP_NESTED_KVM_HV)) {
+ return H_FUNCTION;
+ }
+
+ if ((ptcr & PRTS_MASK) + 12 - 4 > 12) {
+ return H_PARAMETER;
+ }
+
+ spapr->nested_ptcr = ptcr; /* Save new partition table */
+
+ return H_SUCCESS;
+}
+
+static target_ulong h_tlb_invalidate(PowerPCCPU *cpu,
+ SpaprMachineState *spapr,
+ target_ulong opcode,
+ target_ulong *args)
+{
+ /*
+ * The spapr virtual hypervisor nested HV implementation retains no L2
+ * translation state except for TLB. And the TLB is always invalidated
+ * across L1<->L2 transitions, so nothing is required here.
+ */
+
+ return H_SUCCESS;
+}
+
+static target_ulong h_copy_tofrom_guest(PowerPCCPU *cpu,
+ SpaprMachineState *spapr,
+ target_ulong opcode,
+ target_ulong *args)
+{
+ /*
+ * This HCALL is not required, L1 KVM will take a slow path and walk the
+ * page tables manually to do the data copy.
+ */
+ return H_FUNCTION;
+}
+
+static void nested_save_state(struct nested_ppc_state *save, PowerPCCPU *cpu)
+{
+ CPUPPCState *env = &cpu->env;
+
+ memcpy(save->gpr, env->gpr, sizeof(save->gpr));
+
+ save->lr = env->lr;
+ save->ctr = env->ctr;
+ save->cfar = env->cfar;
+ save->msr = env->msr;
+ save->nip = env->nip;
+
+ save->cr = ppc_get_cr(env);
+ save->xer = cpu_read_xer(env);
+
+ save->lpcr = env->spr[SPR_LPCR];
+ save->lpidr = env->spr[SPR_LPIDR];
+ save->pcr = env->spr[SPR_PCR];
+ save->dpdes = env->spr[SPR_DPDES];
+ save->hfscr = env->spr[SPR_HFSCR];
+ save->srr0 = env->spr[SPR_SRR0];
+ save->srr1 = env->spr[SPR_SRR1];
+ save->sprg0 = env->spr[SPR_SPRG0];
+ save->sprg1 = env->spr[SPR_SPRG1];
+ save->sprg2 = env->spr[SPR_SPRG2];
+ save->sprg3 = env->spr[SPR_SPRG3];
+ save->pidr = env->spr[SPR_BOOKS_PID];
+ save->ppr = env->spr[SPR_PPR];
+
+ save->tb_offset = env->tb_env->tb_offset;
+}
+
+static void nested_load_state(PowerPCCPU *cpu, struct nested_ppc_state *load)
+{
+ CPUState *cs = CPU(cpu);
+ CPUPPCState *env = &cpu->env;
+
+ memcpy(env->gpr, load->gpr, sizeof(env->gpr));
+
+ env->lr = load->lr;
+ env->ctr = load->ctr;
+ env->cfar = load->cfar;
+ env->msr = load->msr;
+ env->nip = load->nip;
+
+ ppc_set_cr(env, load->cr);
+ cpu_write_xer(env, load->xer);
+
+ env->spr[SPR_LPCR] = load->lpcr;
+ env->spr[SPR_LPIDR] = load->lpidr;
+ env->spr[SPR_PCR] = load->pcr;
+ env->spr[SPR_DPDES] = load->dpdes;
+ env->spr[SPR_HFSCR] = load->hfscr;
+ env->spr[SPR_SRR0] = load->srr0;
+ env->spr[SPR_SRR1] = load->srr1;
+ env->spr[SPR_SPRG0] = load->sprg0;
+ env->spr[SPR_SPRG1] = load->sprg1;
+ env->spr[SPR_SPRG2] = load->sprg2;
+ env->spr[SPR_SPRG3] = load->sprg3;
+ env->spr[SPR_BOOKS_PID] = load->pidr;
+ env->spr[SPR_PPR] = load->ppr;
+
+ env->tb_env->tb_offset = load->tb_offset;
+
+ /*
+ * MSR updated, compute hflags and possible interrupts.
+ */
+ hreg_compute_hflags(env);
+ ppc_maybe_interrupt(env);
+
+ /*
+ * Nested HV does not tag TLB entries between L1 and L2, so must
+ * flush on transition.
+ */
+ tlb_flush(cs);
+ env->reserve_addr = -1; /* Reset the reservation */
+}
+
+/*
+ * When this handler returns, the environment is switched to the L2 guest
+ * and TCG begins running that. spapr_exit_nested() performs the switch from
+ * L2 back to L1 and returns from the H_ENTER_NESTED hcall.
+ */
+static target_ulong h_enter_nested(PowerPCCPU *cpu,
+ SpaprMachineState *spapr,
+ target_ulong opcode,
+ target_ulong *args)
+{
+ PowerPCCPUClass *pcc = POWERPC_CPU_GET_CLASS(cpu);
+ CPUPPCState *env = &cpu->env;
+ SpaprCpuState *spapr_cpu = spapr_cpu_state(cpu);
+ struct nested_ppc_state l2_state;
+ target_ulong hv_ptr = args[0];
+ target_ulong regs_ptr = args[1];
+ target_ulong hdec, now = cpu_ppc_load_tbl(env);
+ target_ulong lpcr, lpcr_mask;
+ struct kvmppc_hv_guest_state *hvstate;
+ struct kvmppc_hv_guest_state hv_state;
+ struct kvmppc_pt_regs *regs;
+ hwaddr len;
+
+ if (spapr->nested_ptcr == 0) {
+ return H_NOT_AVAILABLE;
+ }
+
+ len = sizeof(*hvstate);
+ hvstate = address_space_map(CPU(cpu)->as, hv_ptr, &len, false,
+ MEMTXATTRS_UNSPECIFIED);
+ if (len != sizeof(*hvstate)) {
+ address_space_unmap(CPU(cpu)->as, hvstate, len, 0, false);
+ return H_PARAMETER;
+ }
+
+ memcpy(&hv_state, hvstate, len);
+
+ address_space_unmap(CPU(cpu)->as, hvstate, len, len, false);
+
+ /*
+ * We accept versions 1 and 2. Version 2 fields are unused because TCG
+ * does not implement DAWR*.
+ */
+ if (hv_state.version > HV_GUEST_STATE_VERSION) {
+ return H_PARAMETER;
+ }
+
+ if (hv_state.lpid == 0) {
+ return H_PARAMETER;
+ }
+
+ spapr_cpu->nested_host_state = g_try_new(struct nested_ppc_state, 1);
+ if (!spapr_cpu->nested_host_state) {
+ return H_NO_MEM;
+ }
+
+ assert(env->spr[SPR_LPIDR] == 0);
+ assert(env->spr[SPR_DPDES] == 0);
+ nested_save_state(spapr_cpu->nested_host_state, cpu);
+
+ len = sizeof(*regs);
+ regs = address_space_map(CPU(cpu)->as, regs_ptr, &len, false,
+ MEMTXATTRS_UNSPECIFIED);
+ if (!regs || len != sizeof(*regs)) {
+ address_space_unmap(CPU(cpu)->as, regs, len, 0, false);
+ g_free(spapr_cpu->nested_host_state);
+ return H_P2;
+ }
+
+ len = sizeof(l2_state.gpr);
+ assert(len == sizeof(regs->gpr));
+ memcpy(l2_state.gpr, regs->gpr, len);
+
+ l2_state.lr = regs->link;
+ l2_state.ctr = regs->ctr;
+ l2_state.xer = regs->xer;
+ l2_state.cr = regs->ccr;
+ l2_state.msr = regs->msr;
+ l2_state.nip = regs->nip;
+
+ address_space_unmap(CPU(cpu)->as, regs, len, len, false);
+
+ l2_state.cfar = hv_state.cfar;
+ l2_state.lpidr = hv_state.lpid;
+
+ lpcr_mask = LPCR_DPFD | LPCR_ILE | LPCR_AIL | LPCR_LD | LPCR_MER;
+ lpcr = (env->spr[SPR_LPCR] & ~lpcr_mask) | (hv_state.lpcr & lpcr_mask);
+ lpcr |= LPCR_HR | LPCR_UPRT | LPCR_GTSE | LPCR_HVICE | LPCR_HDICE;
+ lpcr &= ~LPCR_LPES0;
+ l2_state.lpcr = lpcr & pcc->lpcr_mask;
+
+ l2_state.pcr = hv_state.pcr;
+ /* hv_state.amor is not used */
+ l2_state.dpdes = hv_state.dpdes;
+ l2_state.hfscr = hv_state.hfscr;
+ /* TCG does not implement DAWR*, CIABR, PURR, SPURR, IC, VTB, HEIR SPRs*/
+ l2_state.srr0 = hv_state.srr0;
+ l2_state.srr1 = hv_state.srr1;
+ l2_state.sprg0 = hv_state.sprg[0];
+ l2_state.sprg1 = hv_state.sprg[1];
+ l2_state.sprg2 = hv_state.sprg[2];
+ l2_state.sprg3 = hv_state.sprg[3];
+ l2_state.pidr = hv_state.pidr;
+ l2_state.ppr = hv_state.ppr;
+ l2_state.tb_offset = env->tb_env->tb_offset + hv_state.tb_offset;
+
+ /*
+ * Switch to the nested guest environment and start the "hdec" timer.
+ */
+ nested_load_state(cpu, &l2_state);
+
+ hdec = hv_state.hdec_expiry - now;
+ cpu_ppc_hdecr_init(env);
+ cpu_ppc_store_hdecr(env, hdec);
+
+ /*
+ * The hv_state.vcpu_token is not needed. It is used by the KVM
+ * implementation to remember which L2 vCPU last ran on which physical
+ * CPU so as to invalidate process scope translations if it is moved
+ * between physical CPUs. For now TLBs are always flushed on L1<->L2
+ * transitions so this is not a problem.
+ *
+ * Could validate that the same vcpu_token does not attempt to run on
+ * different L1 vCPUs at the same time, but that would be a L1 KVM bug
+ * and it's not obviously worth a new data structure to do it.
+ */
+
+ spapr_cpu->in_nested = true;
+
+ /*
+ * The spapr hcall helper sets env->gpr[3] to the return value, but at
+ * this point the L1 is not returning from the hcall but rather we
+ * start running the L2, so r3 must not be clobbered, so return env->gpr[3]
+ * to leave it unchanged.
+ */
+ return env->gpr[3];
+}
+
+void spapr_exit_nested(PowerPCCPU *cpu, int excp)
+{
+ CPUPPCState *env = &cpu->env;
+ SpaprCpuState *spapr_cpu = spapr_cpu_state(cpu);
+ struct nested_ppc_state l2_state;
+ target_ulong hv_ptr = spapr_cpu->nested_host_state->gpr[4];
+ target_ulong regs_ptr = spapr_cpu->nested_host_state->gpr[5];
+ target_ulong hsrr0, hsrr1, hdar, asdr, hdsisr;
+ struct kvmppc_hv_guest_state *hvstate;
+ struct kvmppc_pt_regs *regs;
+ hwaddr len;
+
+ assert(spapr_cpu->in_nested);
+
+ nested_save_state(&l2_state, cpu);
+ hsrr0 = env->spr[SPR_HSRR0];
+ hsrr1 = env->spr[SPR_HSRR1];
+ hdar = env->spr[SPR_HDAR];
+ hdsisr = env->spr[SPR_HDSISR];
+ asdr = env->spr[SPR_ASDR];
+
+ /*
+ * Switch back to the host environment (including for any error).
+ */
+ assert(env->spr[SPR_LPIDR] != 0);
+ nested_load_state(cpu, spapr_cpu->nested_host_state);
+ env->gpr[3] = env->excp_vectors[excp]; /* hcall return value */
+
+ cpu_ppc_hdecr_exit(env);
+
+ spapr_cpu->in_nested = false;
+
+ g_free(spapr_cpu->nested_host_state);
+ spapr_cpu->nested_host_state = NULL;
+
+ len = sizeof(*hvstate);
+ hvstate = address_space_map(CPU(cpu)->as, hv_ptr, &len, true,
+ MEMTXATTRS_UNSPECIFIED);
+ if (len != sizeof(*hvstate)) {
+ address_space_unmap(CPU(cpu)->as, hvstate, len, 0, true);
+ env->gpr[3] = H_PARAMETER;
+ return;
+ }
+
+ hvstate->cfar = l2_state.cfar;
+ hvstate->lpcr = l2_state.lpcr;
+ hvstate->pcr = l2_state.pcr;
+ hvstate->dpdes = l2_state.dpdes;
+ hvstate->hfscr = l2_state.hfscr;
+
+ if (excp == POWERPC_EXCP_HDSI) {
+ hvstate->hdar = hdar;
+ hvstate->hdsisr = hdsisr;
+ hvstate->asdr = asdr;
+ } else if (excp == POWERPC_EXCP_HISI) {
+ hvstate->asdr = asdr;
+ }
+
+ /* HEIR should be implemented for HV mode and saved here. */
+ hvstate->srr0 = l2_state.srr0;
+ hvstate->srr1 = l2_state.srr1;
+ hvstate->sprg[0] = l2_state.sprg0;
+ hvstate->sprg[1] = l2_state.sprg1;
+ hvstate->sprg[2] = l2_state.sprg2;
+ hvstate->sprg[3] = l2_state.sprg3;
+ hvstate->pidr = l2_state.pidr;
+ hvstate->ppr = l2_state.ppr;
+
+ /* Is it okay to specify write length larger than actual data written? */
+ address_space_unmap(CPU(cpu)->as, hvstate, len, len, true);
+
+ len = sizeof(*regs);
+ regs = address_space_map(CPU(cpu)->as, regs_ptr, &len, true,
+ MEMTXATTRS_UNSPECIFIED);
+ if (!regs || len != sizeof(*regs)) {
+ address_space_unmap(CPU(cpu)->as, regs, len, 0, true);
+ env->gpr[3] = H_P2;
+ return;
+ }
+
+ len = sizeof(env->gpr);
+ assert(len == sizeof(regs->gpr));
+ memcpy(regs->gpr, l2_state.gpr, len);
+
+ regs->link = l2_state.lr;
+ regs->ctr = l2_state.ctr;
+ regs->xer = l2_state.xer;
+ regs->ccr = l2_state.cr;
+
+ if (excp == POWERPC_EXCP_MCHECK ||
+ excp == POWERPC_EXCP_RESET ||
+ excp == POWERPC_EXCP_SYSCALL) {
+ regs->nip = l2_state.srr0;
+ regs->msr = l2_state.srr1 & env->msr_mask;
+ } else {
+ regs->nip = hsrr0;
+ regs->msr = hsrr1 & env->msr_mask;
+ }
+
+ /* Is it okay to specify write length larger than actual data written? */
+ address_space_unmap(CPU(cpu)->as, regs, len, len, true);
+}
+
+void spapr_register_nested(void)
+{
+ spapr_register_hypercall(KVMPPC_H_SET_PARTITION_TABLE, h_set_ptbl);
+ spapr_register_hypercall(KVMPPC_H_ENTER_NESTED, h_enter_nested);
+ spapr_register_hypercall(KVMPPC_H_TLB_INVALIDATE, h_tlb_invalidate);
+ spapr_register_hypercall(KVMPPC_H_COPY_TOFROM_GUEST, h_copy_tofrom_guest);
+}
+#else
+void spapr_exit_nested(PowerPCCPU *cpu, int excp)
+{
+ g_assert_not_reached();
+}
+
+void spapr_register_nested(void)
+{
+ /* DO NOTHING */
+}
+#endif