diff options
author | Peter Maydell <peter.maydell@linaro.org> | 2019-05-30 15:08:00 +0100 |
---|---|---|
committer | Peter Maydell <peter.maydell@linaro.org> | 2019-05-30 15:08:00 +0100 |
commit | 60905286cb5150de854e08279bca7dfc4b549e91 (patch) | |
tree | 1d168061ed2308a88c0652e52d3227b65a08469b | |
parent | 48a8b399619cf3bb745a2e052f9fec142f14d75d (diff) | |
parent | ce4b1b56852ea741170ae85d3b8c0771c1ca7c9e (diff) | |
download | qemu-60905286cb5150de854e08279bca7dfc4b549e91.zip qemu-60905286cb5150de854e08279bca7dfc4b549e91.tar.gz qemu-60905286cb5150de854e08279bca7dfc4b549e91.tar.bz2 |
Merge remote-tracking branch 'remotes/dgibson/tags/ppc-for-4.1-20190529' into staging
ppc patch queue 2019-05-29
Next pull request against qemu-4.1. Highlights:
* KVM accelerated support for the XIVE interrupt controller in PAPR
guests
* A number of TCG vector fixes
* Fixes for the PReP / 40p machine
* Improvements to make check-tcg test coverage
Other than that it's just a bunch of assorted fixes, cleanups and
minor improvements.
This supersedes both the pull request dated 2019-05-21 and the one
dated 2019-05-22. I've dropped one hunk which I think may have caused
the check-tcg failure that Peter saw (by enabling the ppc64abi32
build, which I think has been broken for ages). I'm not entirely
certain, since I haven't reproduced exactly the same failure.
# gpg: Signature made Wed 29 May 2019 07:49:04 BST
# gpg: using RSA key 75F46586AE61A66CC44E87DC6C38CACA20D9B392
# gpg: Good signature from "David Gibson <david@gibson.dropbear.id.au>" [full]
# gpg: aka "David Gibson (Red Hat) <dgibson@redhat.com>" [full]
# gpg: aka "David Gibson (ozlabs.org) <dgibson@ozlabs.org>" [full]
# gpg: aka "David Gibson (kernel.org) <dwg@kernel.org>" [unknown]
# Primary key fingerprint: 75F4 6586 AE61 A66C C44E 87DC 6C38 CACA 20D9 B392
* remotes/dgibson/tags/ppc-for-4.1-20190529: (44 commits)
ppc/pnv: add dummy XSCOM registers for PRD initialization
ppc/pnv: introduce new skiboot platform properties
spapr: Don't migrate the hpt_maxpagesize cap to older machine types
spapr: change default interrupt mode to 'dual'
spapr/xive: fix multiple resets when using the 'dual' interrupt mode
docs: provide documentation on the POWER9 XIVE interrupt controller
spapr/irq: add KVM support to the 'dual' machine
ppc/xics: fix irq priority in ics_set_irq_type()
spapr/irq: initialize the IRQ device only once
spapr/irq: introduce a spapr_irq_init_device() helper
spapr: check for the activation of the KVM IRQ device
spapr: introduce routines to delete the KVM IRQ device
sysbus: add a sysbus_mmio_unmap() helper
spapr/xive: activate KVM support
spapr/xive: add migration support for KVM
spapr/xive: introduce a VM state change handler
spapr/xive: add state synchronization with KVM
spapr/xive: add hcall support when under KVM
spapr/xive: add KVM support
spapr: Print out extra hints when CAS negotiation of interrupt mode fails
...
Signed-off-by: Peter Maydell <peter.maydell@linaro.org>
43 files changed, 1980 insertions, 212 deletions
diff --git a/MAINTAINERS b/MAINTAINERS index 67dcffd..a96829e 100644 --- a/MAINTAINERS +++ b/MAINTAINERS @@ -1720,6 +1720,7 @@ L: qemu-ppc@nongnu.org S: Supported F: hw/*/*xive* F: include/hw/*/*xive* +F: docs/*/*xive* Subsystems ---------- @@ -198,7 +198,7 @@ supported_kvm_target() { i386:i386 | i386:x86_64 | i386:x32 | \ x86_64:i386 | x86_64:x86_64 | x86_64:x32 | \ mips:mips | mipsel:mips | \ - ppc:ppc | ppc64:ppc | ppc:ppc64 | ppc64:ppc64 | \ + ppc:ppc | ppc64:ppc | ppc:ppc64 | ppc64:ppc64 | ppc64:ppc64le | \ s390x:s390x) return 0 ;; @@ -502,8 +502,11 @@ cross_cc_arm="arm-linux-gnueabihf-gcc" cross_cc_cflags_armeb="-mbig-endian" cross_cc_i386="i386-pc-linux-gnu-gcc" cross_cc_cflags_i386="" -cross_cc_powerpc="powerpc-linux-gnu-gcc" -cross_cc_powerpc="powerpc-linux-gnu-gcc" +cross_cc_ppc="powerpc-linux-gnu-gcc" +cross_cc_cflags_ppc="-m32" +cross_cc_ppc64="powerpc-linux-gnu-gcc" +cross_cc_cflags_ppc64="-m64" +cross_cc_ppc64le="powerpc64le-linux-gnu-gcc" enabled_cross_compilers="" @@ -700,7 +703,11 @@ elif check_define __sparc__ ; then fi elif check_define _ARCH_PPC ; then if check_define _ARCH_PPC64 ; then - cpu="ppc64" + if check_define _LITTLE_ENDIAN ; then + cpu="ppc64le" + else + cpu="ppc64" + fi else cpu="ppc" fi @@ -731,10 +738,14 @@ ARCH= # Note that this case should only have supported host CPUs, not guests. case "$cpu" in ppc|ppc64|s390|s390x|sparc64|x32|riscv32|riscv64) - cpu="$cpu" supported_cpu="yes" eval "cross_cc_${cpu}=\$host_cc" ;; + ppc64le) + ARCH="ppc64" + supported_cpu="yes" + cross_cc_ppc64le=$host_cc + ;; i386|i486|i586|i686|i86pc|BePC) cpu="i386" supported_cpu="yes" @@ -1538,44 +1549,44 @@ case "$cpu" in ppc) CPU_CFLAGS="-m32" LDFLAGS="-m32 $LDFLAGS" - cross_cc_powerpc=$cc - cross_cc_cflags_powerpc=$CPU_CFLAGS + cross_cc_ppc=$cc + cross_cc_cflags_ppc="$CPU_CFLAGS" ;; ppc64) CPU_CFLAGS="-m64" LDFLAGS="-m64 $LDFLAGS" cross_cc_ppc64=$cc - cross_cc_cflags_ppc64=$CPU_CFLAGS + cross_cc_cflags_ppc64="$CPU_CFLAGS" ;; sparc) CPU_CFLAGS="-m32 -mv8plus -mcpu=ultrasparc" LDFLAGS="-m32 -mv8plus $LDFLAGS" cross_cc_sparc=$cc - cross_cc_cflags_sparc=$CPU_CFLAGS + cross_cc_cflags_sparc="$CPU_CFLAGS" ;; sparc64) CPU_CFLAGS="-m64 -mcpu=ultrasparc" LDFLAGS="-m64 $LDFLAGS" cross_cc_sparc64=$cc - cross_cc_cflags_sparc64=$CPU_CFLAGS + cross_cc_cflags_sparc64="$CPU_CFLAGS" ;; s390) CPU_CFLAGS="-m31" LDFLAGS="-m31 $LDFLAGS" cross_cc_s390=$cc - cross_cc_cflags_s390=$CPU_CFLAGS + cross_cc_cflags_s390="$CPU_CFLAGS" ;; s390x) CPU_CFLAGS="-m64" LDFLAGS="-m64 $LDFLAGS" cross_cc_s390x=$cc - cross_cc_cflags_s390x=$CPU_CFLAGS + cross_cc_cflags_s390x="$CPU_CFLAGS" ;; i386) CPU_CFLAGS="-m32" LDFLAGS="-m32 $LDFLAGS" cross_cc_i386=$cc - cross_cc_cflags_i386=$CPU_CFLAGS + cross_cc_cflags_i386="$CPU_CFLAGS" ;; x86_64) # ??? Only extremely old AMD cpus do not have cmpxchg16b. @@ -1584,13 +1595,13 @@ case "$cpu" in CPU_CFLAGS="-m64 -mcx16" LDFLAGS="-m64 $LDFLAGS" cross_cc_x86_64=$cc - cross_cc_cflags_x86_64=$CPU_CFLAGS + cross_cc_cflags_x86_64="$CPU_CFLAGS" ;; x32) CPU_CFLAGS="-mx32" LDFLAGS="-mx32 $LDFLAGS" cross_cc_i386=$cc - cross_cc_cflags_i386=$CPU_CFLAGS + cross_cc_cflags_i386="$CPU_CFLAGS" ;; # No special flags required for other host CPUs esac @@ -6198,7 +6209,7 @@ if { test "$cpu" = "i386" || test "$cpu" = "x86_64"; } && \ fi done fi -if test "$cpu" = "ppc64" && test "$targetos" != "Darwin" ; then +if test "$ARCH" = "ppc64" && test "$targetos" != "Darwin" ; then roms="$roms spapr-rtas" fi @@ -7392,7 +7403,7 @@ if test "$linux" = "yes" ; then i386|x86_64|x32) linux_arch=x86 ;; - ppc|ppc64) + ppc|ppc64|ppc64le) linux_arch=powerpc ;; s390x) @@ -7553,7 +7564,8 @@ case "$target_name" in ;; ppc) gdb_xml_files="power-core.xml power-fpu.xml power-altivec.xml power-spe.xml" - target_compiler=$cross_cc_powerpc + target_compiler=$cross_cc_ppc + target_compiler_cflags="$cross_cc_cflags_ppc" ;; ppc64) TARGET_BASE_ARCH=ppc @@ -7561,6 +7573,7 @@ case "$target_name" in mttcg=yes gdb_xml_files="power64-core.xml power-fpu.xml power-altivec.xml power-spe.xml power-vsx.xml" target_compiler=$cross_cc_ppc64 + target_compiler_cflags="$cross_cc_cflags_ppc64" ;; ppc64le) TARGET_ARCH=ppc64 diff --git a/docs/index.rst b/docs/index.rst index 3690955..baa5791 100644 --- a/docs/index.rst +++ b/docs/index.rst @@ -12,4 +12,5 @@ Welcome to QEMU's documentation! interop/index devel/index + specs/index diff --git a/docs/specs/index.rst b/docs/specs/index.rst new file mode 100644 index 0000000..2e92751 --- /dev/null +++ b/docs/specs/index.rst @@ -0,0 +1,13 @@ +. This is the top level page for the 'specs' manual + + +QEMU full-system emulation guest hardware specifications +======================================================== + + +Contents: + +.. toctree:: + :maxdepth: 2 + + xive diff --git a/docs/specs/ppc-spapr-xive.rst b/docs/specs/ppc-spapr-xive.rst new file mode 100644 index 0000000..539ce7c --- /dev/null +++ b/docs/specs/ppc-spapr-xive.rst @@ -0,0 +1,174 @@ +XIVE for sPAPR (pseries machines) +================================= + +The POWER9 processor comes with a new interrupt controller +architecture, called XIVE as "eXternal Interrupt Virtualization +Engine". It supports a larger number of interrupt sources and offers +virtualization features which enables the HW to deliver interrupts +directly to virtual processors without hypervisor assistance. + +A QEMU ``pseries`` machine (which is PAPR compliant) using POWER9 +processors can run under two interrupt modes: + +- *Legacy Compatibility Mode* + + the hypervisor provides identical interfaces and similar + functionality to PAPR+ Version 2.7. This is the default mode + + It is also referred as *XICS* in QEMU. + +- *XIVE native exploitation mode* + + the hypervisor provides new interfaces to manage the XIVE control + structures, and provides direct control for interrupt management + through MMIO pages. + +Which interrupt modes can be used by the machine is negotiated with +the guest O/S during the Client Architecture Support negotiation +sequence. The two modes are mutually exclusive. + +Both interrupt mode share the same IRQ number space. See below for the +layout. + +CAS Negotiation +--------------- + +QEMU advertises the supported interrupt modes in the device tree +property "ibm,arch-vec-5-platform-support" in byte 23 and the OS +Selection for XIVE is indicated in the "ibm,architecture-vec-5" +property byte 23. + +The interrupt modes supported by the machine depend on the CPU type +(POWER9 is required for XIVE) but also on the machine property +``ic-mode`` which can be set on the command line. It can take the +following values: ``xics``, ``xive``, ``dual`` and currently ``xics`` +is the default but it may change in the future. + +The choosen interrupt mode is activated after a reconfiguration done +in a machine reset. + +XIVE Device tree properties +--------------------------- + +The properties for the PAPR interrupt controller node when the *XIVE +native exploitation mode* is selected shoud contain: + +- ``device_type`` + + value should be "power-ivpe". + +- ``compatible`` + + value should be "ibm,power-ivpe". + +- ``reg`` + + contains the base address and size of the thread interrupt + managnement areas (TIMA), for the User level and for the Guest OS + level. Only the Guest OS level is taken into account today. + +- ``ibm,xive-eq-sizes`` + + the size of the event queues. One cell per size supported, contains + log2 of size, in ascending order. + +- ``ibm,xive-lisn-ranges`` + + the IRQ interrupt number ranges assigned to the guest for the IPIs. + +The root node also exports : + +- ``ibm,plat-res-int-priorities`` + + contains a list of priorities that the hypervisor has reserved for + its own use. + +IRQ number space +---------------- + +IRQ Number space of the ``pseries`` machine is 8K wide and is the same +for both interrupt mode. The different ranges are defined as follow : + +- ``0x0000 .. 0x0FFF`` 4K CPU IPIs (only used under XIVE) +- ``0x1000 .. 0x1000`` 1 EPOW +- ``0x1001 .. 0x1001`` 1 HOTPLUG +- ``0x1100 .. 0x11FF`` 256 VIO devices +- ``0x1200 .. 0x127F`` 32 PHBs devices +- ``0x1280 .. 0x12FF`` unused +- ``0x1300 .. 0x1FFF`` PHB MSIs + +Monitoring XIVE +--------------- + +The state of the XIVE interrupt controller can be queried through the +monitor commands ``info pic``. The output comes in two parts. + +First, the state of the thread interrupt context registers is dumped +for each CPU : + +:: + + (qemu) info pic + CPU[0000]: QW NSR CPPR IPB LSMFB ACK# INC AGE PIPR W2 + CPU[0000]: USER 00 00 00 00 00 00 00 00 00000000 + CPU[0000]: OS 00 ff 00 00 ff 00 ff ff 80000400 + CPU[0000]: POOL 00 00 00 00 00 00 00 00 00000000 + CPU[0000]: PHYS 00 00 00 00 00 00 00 ff 00000000 + ... + +In the case of a ``pseries`` machine, QEMU acts as the hypervisor and only +the O/S and USER register rings make sense. ``W2`` contains the vCPU CAM +line which is set to the VP identifier. + +Then comes the routing information which aggregates the EAS and the +END configuration: + +:: + + ... + LISN PQ EISN CPU/PRIO EQ + 00000000 MSI -- 00000010 0/6 380/16384 @1fe3e0000 ^1 [ 80000010 ... ] + 00000001 MSI -- 00000010 1/6 305/16384 @1fc230000 ^1 [ 80000010 ... ] + 00000002 MSI -- 00000010 2/6 220/16384 @1fc2f0000 ^1 [ 80000010 ... ] + 00000003 MSI -- 00000010 3/6 201/16384 @1fc390000 ^1 [ 80000010 ... ] + 00000004 MSI -Q M 00000000 + 00000005 MSI -Q M 00000000 + 00000006 MSI -Q M 00000000 + 00000007 MSI -Q M 00000000 + 00001000 MSI -- 00000012 0/6 380/16384 @1fe3e0000 ^1 [ 80000010 ... ] + 00001001 MSI -- 00000013 0/6 380/16384 @1fe3e0000 ^1 [ 80000010 ... ] + 00001100 MSI -- 00000100 1/6 305/16384 @1fc230000 ^1 [ 80000010 ... ] + 00001101 MSI -Q M 00000000 + 00001200 LSI -Q M 00000000 + 00001201 LSI -Q M 00000000 + 00001202 LSI -Q M 00000000 + 00001203 LSI -Q M 00000000 + 00001300 MSI -- 00000102 1/6 305/16384 @1fc230000 ^1 [ 80000010 ... ] + 00001301 MSI -- 00000103 2/6 220/16384 @1fc2f0000 ^1 [ 80000010 ... ] + 00001302 MSI -- 00000104 3/6 201/16384 @1fc390000 ^1 [ 80000010 ... ] + +The source information and configuration: + +- The ``LISN`` column outputs the interrupt number of the source in + range ``[ 0x0 ... 0x1FFF ]`` and its type : ``MSI`` or ``LSI`` +- The ``PQ`` column reflects the state of the PQ bits of the source : + + - ``--`` source is ready to take events + - ``P-`` an event was sent and an EOI is PENDING + - ``PQ`` an event was QUEUED + - ``-Q`` source is OFF + + a ``M`` indicates that source is *MASKED* at the EAS level, + +The targeting configuration : + +- The ``EISN`` column is the event data that will be queued in the event + queue of the O/S. +- The ``CPU/PRIO`` column is the tuple defining the CPU number and + priority queue serving the source. +- The ``EQ`` column outputs : + + - the current index of the event queue/ the max number of entries + - the O/S event queue address + - the toggle bit + - the last entries that were pushed in the event queue. diff --git a/docs/specs/ppc-xive.rst b/docs/specs/ppc-xive.rst new file mode 100644 index 0000000..b997dc0 --- /dev/null +++ b/docs/specs/ppc-xive.rst @@ -0,0 +1,199 @@ +================================ +POWER9 XIVE interrupt controller +================================ + +The POWER9 processor comes with a new interrupt controller +architecture, called XIVE as "eXternal Interrupt Virtualization +Engine". + +Compared to the previous architecture, the main characteristics of +XIVE are to support a larger number of interrupt sources and to +deliver interrupts directly to virtual processors without hypervisor +assistance. This removes the context switches required for the +delivery process. + + +XIVE architecture +================= + +The XIVE IC is composed of three sub-engines, each taking care of a +processing layer of external interrupts: + +- Interrupt Virtualization Source Engine (IVSE), or Source Controller + (SC). These are found in PCI PHBs, in the PSI host bridge + controller, but also inside the main controller for the core IPIs + and other sub-chips (NX, CAP, NPU) of the chip/processor. They are + configured to feed the IVRE with events. +- Interrupt Virtualization Routing Engine (IVRE) or Virtualization + Controller (VC). It handles event coalescing and perform interrupt + routing by matching an event source number with an Event + Notification Descriptor (END). +- Interrupt Virtualization Presentation Engine (IVPE) or Presentation + Controller (PC). It maintains the interrupt context state of each + thread and handles the delivery of the external interrupt to the + thread. + +:: + + XIVE Interrupt Controller + +------------------------------------+ IPIs + | +---------+ +---------+ +--------+ | +-------+ + | |IVRE | |Common Q | |IVPE |----> | CORES | + | | esb | | | | |----> | | + | | eas | | Bridge | | tctx |----> | | + | |SC end | | | | nvt | | | | + +------+ | +---------+ +----+----+ +--------+ | +-+-+-+-+ + | RAM | +------------------|-----------------+ | | | + | | | | | | + | | | | | | + | | +--------------------v------------------------v-v-v--+ other + | <--+ Power Bus +--> chips + | esb | +---------+-----------------------+------------------+ + | eas | | | + | end | +--|------+ | + | nvt | +----+----+ | +----+----+ + +------+ |IVSE | | |IVSE | + | | | | | + | PQ-bits | | | PQ-bits | + | local |-+ | in VC | + +---------+ +---------+ + PCIe NX,NPU,CAPI + + + PQ-bits: 2 bits source state machine (P:pending Q:queued) + esb: Event State Buffer (Array of PQ bits in an IVSE) + eas: Event Assignment Structure + end: Event Notification Descriptor + nvt: Notification Virtual Target + tctx: Thread interrupt Context registers + + + +XIVE internal tables +-------------------- + +Each of the sub-engines uses a set of tables to redirect interrupts +from event sources to CPU threads. + +:: + + +-------+ + User or O/S | EQ | + or +------>|entries| + Hypervisor | | .. | + Memory | +-------+ + | ^ + | | + +-------------------------------------------------+ + | | + Hypervisor +------+ +---+--+ +---+--+ +------+ + Memory | ESB | | EAT | | ENDT | | NVTT | + (skiboot) +----+-+ +----+-+ +----+-+ +------+ + ^ | ^ | ^ | ^ + | | | | | | | + +-------------------------------------------------+ + | | | | | | | + | | | | | | | + +----|--|--------|--|--------|--|-+ +-|-----+ +------+ + | | | | | | | | | | tctx| |Thread| + IPI or ---+ + v + v + v |---| + .. |-----> | + HW events | | | | | | + | IVRE | | IVPE | +------+ + +---------------------------------+ +-------+ + + +The IVSE have a 2-bits state machine, P for pending and Q for queued, +for each source that allows events to be triggered. They are stored in +an Event State Buffer (ESB) array and can be controlled by MMIOs. + +If the event is let through, the IVRE looks up in the Event Assignment +Structure (EAS) table for an Event Notification Descriptor (END) +configured for the source. Each Event Notification Descriptor defines +a notification path to a CPU and an in-memory Event Queue, in which +will be enqueued an EQ data for the O/S to pull. + +The IVPE determines if a Notification Virtual Target (NVT) can handle +the event by scanning the thread contexts of the VCPUs dispatched on +the processor HW threads. It maintains the interrupt context state of +each thread in a NVT table. + +XIVE thread interrupt context +----------------------------- + +The XIVE presenter can generate four different exceptions to its +HW threads: + +- hypervisor exception +- O/S exception +- Event-Based Branch (user level) +- msgsnd (doorbell) + +Each exception has a state independent from the others called a Thread +Interrupt Management context. This context is a set of registers which +lets the thread handle priority management and interrupt +acknowledgment among other things. The most important ones being : + +- Interrupt Priority Register (PIPR) +- Interrupt Pending Buffer (IPB) +- Current Processor Priority (CPPR) +- Notification Source Register (NSR) + +TIMA +~~~~ + +The Thread Interrupt Management registers are accessible through a +specific MMIO region, called the Thread Interrupt Management Area +(TIMA), four aligned pages, each exposing a different view of the +registers. First page (page address ending in ``0b00``) gives access +to the entire context and is reserved for the ring 0 view for the +physical thread context. The second (page address ending in ``0b01``) +is for the hypervisor, ring 1 view. The third (page address ending in +``0b10``) is for the operating system, ring 2 view. The fourth (page +address ending in ``0b11``) is for user level, ring 3 view. + +Interrupt flow from an O/S perspective +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + +After an event data has been enqueued in the O/S Event Queue, the IVPE +raises the bit corresponding to the priority of the pending interrupt +in the register IBP (Interrupt Pending Buffer) to indicate that an +event is pending in one of the 8 priority queues. The Pending +Interrupt Priority Register (PIPR) is also updated using the IPB. This +register represent the priority of the most favored pending +notification. + +The PIPR is then compared to the the Current Processor Priority +Register (CPPR). If it is more favored (numerically less than), the +CPU interrupt line is raised and the EO bit of the Notification Source +Register (NSR) is updated to notify the presence of an exception for +the O/S. The O/S acknowledges the interrupt with a special load in the +Thread Interrupt Management Area. + +The O/S handles the interrupt and when done, performs an EOI using a +MMIO operation on the ESB management page of the associate source. + +Overview of the QEMU models for XIVE +==================================== + +The XiveSource models the IVSE in general, internal and external. It +handles the source ESBs and the MMIO interface to control them. + +The XiveNotifier is a small helper interface interconnecting the +XiveSource to the XiveRouter. + +The XiveRouter is an abstract model acting as a combined IVRE and +IVPE. It routes event notifications using the EAS and END tables to +the IVPE sub-engine which does a CAM scan to find a CPU to deliver the +exception. Storage should be provided by the inheriting classes. + +XiveEnDSource is a special source object. It exposes the END ESB MMIOs +of the Event Queues which are used for coalescing event notifications +and for escalation. Not used on the field, only to sync the EQ cache +in OPAL. + +Finally, the XiveTCTX contains the interrupt state context of a thread, +four sets of registers, one for each exception that can be delivered +to a CPU. These contexts are scanned by the IVPE to find a matching VP +when a notification is triggered. It also models the Thread Interrupt +Management Area (TIMA), which exposes the thread context registers to +the CPU for interrupt management. diff --git a/hw/core/sysbus.c b/hw/core/sysbus.c index 307cf90..689a867 100644 --- a/hw/core/sysbus.c +++ b/hw/core/sysbus.c @@ -153,6 +153,16 @@ static void sysbus_mmio_map_common(SysBusDevice *dev, int n, hwaddr addr, } } +void sysbus_mmio_unmap(SysBusDevice *dev, int n) +{ + assert(n >= 0 && n < dev->num_mmio); + + if (dev->mmio[n].addr != (hwaddr)-1) { + memory_region_del_subregion(get_system_memory(), dev->mmio[n].memory); + dev->mmio[n].addr = (hwaddr)-1; + } +} + void sysbus_mmio_map(SysBusDevice *dev, int n, hwaddr addr) { sysbus_mmio_map_common(dev, n, addr, false, 0); diff --git a/hw/intc/Makefile.objs b/hw/intc/Makefile.objs index df712c3..03019b9 100644 --- a/hw/intc/Makefile.objs +++ b/hw/intc/Makefile.objs @@ -39,6 +39,7 @@ obj-$(CONFIG_XICS_SPAPR) += xics_spapr.o obj-$(CONFIG_XICS_KVM) += xics_kvm.o obj-$(CONFIG_XIVE) += xive.o obj-$(CONFIG_XIVE_SPAPR) += spapr_xive.o +obj-$(CONFIG_XIVE_KVM) += spapr_xive_kvm.o obj-$(CONFIG_POWERNV) += xics_pnv.o pnv_xive.o obj-$(CONFIG_ALLWINNER_A10_PIC) += allwinner-a10-pic.o obj-$(CONFIG_S390_FLIC) += s390_flic.o diff --git a/hw/intc/spapr_xive.c b/hw/intc/spapr_xive.c index 097f88d..62e0ef8 100644 --- a/hw/intc/spapr_xive.c +++ b/hw/intc/spapr_xive.c @@ -41,13 +41,6 @@ #define SPAPR_XIVE_NVT_BASE 0x400 /* - * The sPAPR machine has a unique XIVE IC device. Assign a fixed value - * to the controller block id value. It can nevertheless be changed - * for testing purpose. - */ -#define SPAPR_XIVE_BLOCK_ID 0x0 - -/* * sPAPR NVT and END indexing helpers */ static uint32_t spapr_xive_nvt_to_target(uint8_t nvt_blk, uint32_t nvt_idx) @@ -86,6 +79,22 @@ static int spapr_xive_target_to_nvt(uint32_t target, * sPAPR END indexing uses a simple mapping of the CPU vcpu_id, 8 * priorities per CPU */ +int spapr_xive_end_to_target(uint8_t end_blk, uint32_t end_idx, + uint32_t *out_server, uint8_t *out_prio) +{ + + assert(end_blk == SPAPR_XIVE_BLOCK_ID); + + if (out_server) { + *out_server = end_idx >> 3; + } + + if (out_prio) { + *out_prio = end_idx & 0x7; + } + return 0; +} + static void spapr_xive_cpu_to_end(PowerPCCPU *cpu, uint8_t prio, uint8_t *out_end_blk, uint32_t *out_end_idx) { @@ -120,6 +129,7 @@ static int spapr_xive_target_to_end(uint32_t target, uint8_t prio, static void spapr_xive_end_pic_print_info(SpaprXive *xive, XiveEND *end, Monitor *mon) { + uint64_t qaddr_base = xive_end_qaddr(end); uint32_t qindex = xive_get_field32(END_W1_PAGE_OFF, end->w1); uint32_t qgen = xive_get_field32(END_W1_GENERATION, end->w1); uint32_t qsize = xive_get_field32(END_W0_QSIZE, end->w0); @@ -127,9 +137,9 @@ static void spapr_xive_end_pic_print_info(SpaprXive *xive, XiveEND *end, uint32_t nvt = xive_get_field32(END_W6_NVT_INDEX, end->w6); uint8_t priority = xive_get_field32(END_W7_F0_PRIORITY, end->w7); - monitor_printf(mon, "%3d/%d % 6d/%5d ^%d", + monitor_printf(mon, "%3d/%d % 6d/%5d @%"PRIx64" ^%d", spapr_xive_nvt_to_target(0, nvt), - priority, qindex, qentries, qgen); + priority, qindex, qentries, qaddr_base, qgen); xive_end_queue_pic_print_info(end, 6, mon); monitor_printf(mon, "]"); @@ -140,7 +150,17 @@ void spapr_xive_pic_print_info(SpaprXive *xive, Monitor *mon) XiveSource *xsrc = &xive->source; int i; - monitor_printf(mon, " LSIN PQ EISN CPU/PRIO EQ\n"); + if (kvm_irqchip_in_kernel()) { + Error *local_err = NULL; + + kvmppc_xive_synchronize_state(xive, &local_err); + if (local_err) { + error_report_err(local_err); + return; + } + } + + monitor_printf(mon, " LISN PQ EISN CPU/PRIO EQ\n"); for (i = 0; i < xive->nr_irqs; i++) { uint8_t pq = xive_source_esb_get(xsrc, i); @@ -173,7 +193,7 @@ void spapr_xive_pic_print_info(SpaprXive *xive, Monitor *mon) } } -static void spapr_xive_map_mmio(SpaprXive *xive) +void spapr_xive_map_mmio(SpaprXive *xive) { sysbus_mmio_map(SYS_BUS_DEVICE(xive), 0, xive->vc_base); sysbus_mmio_map(SYS_BUS_DEVICE(xive), 1, xive->end_base); @@ -250,6 +270,9 @@ static void spapr_xive_instance_init(Object *obj) object_initialize_child(obj, "end_source", &xive->end_source, sizeof(xive->end_source), TYPE_XIVE_END_SOURCE, &error_abort, NULL); + + /* Not connected to the KVM XIVE device */ + xive->fd = -1; } static void spapr_xive_realize(DeviceState *dev, Error **errp) @@ -304,22 +327,36 @@ static void spapr_xive_realize(DeviceState *dev, Error **errp) xive->eat = g_new0(XiveEAS, xive->nr_irqs); xive->endt = g_new0(XiveEND, xive->nr_ends); - /* TIMA initialization */ - memory_region_init_io(&xive->tm_mmio, OBJECT(xive), &xive_tm_ops, xive, - "xive.tima", 4ull << TM_SHIFT); + xive->nodename = g_strdup_printf("interrupt-controller@%" PRIx64, + xive->tm_base + XIVE_TM_USER_PAGE * (1 << TM_SHIFT)); + + qemu_register_reset(spapr_xive_reset, dev); /* Define all XIVE MMIO regions on SysBus */ sysbus_init_mmio(SYS_BUS_DEVICE(xive), &xsrc->esb_mmio); sysbus_init_mmio(SYS_BUS_DEVICE(xive), &end_xsrc->esb_mmio); sysbus_init_mmio(SYS_BUS_DEVICE(xive), &xive->tm_mmio); +} - /* Map all regions */ - spapr_xive_map_mmio(xive); +void spapr_xive_init(SpaprXive *xive, Error **errp) +{ + XiveSource *xsrc = &xive->source; - xive->nodename = g_strdup_printf("interrupt-controller@%" PRIx64, - xive->tm_base + XIVE_TM_USER_PAGE * (1 << TM_SHIFT)); + /* + * The emulated XIVE device can only be initialized once. If the + * ESB memory region has been already mapped, it means we have been + * through there. + */ + if (memory_region_is_mapped(&xsrc->esb_mmio)) { + return; + } - qemu_register_reset(spapr_xive_reset, dev); + /* TIMA initialization */ + memory_region_init_io(&xive->tm_mmio, OBJECT(xive), &xive_tm_ops, xive, + "xive.tima", 4ull << TM_SHIFT); + + /* Map all regions */ + spapr_xive_map_mmio(xive); } static int spapr_xive_get_eas(XiveRouter *xrtr, uint8_t eas_blk, @@ -427,10 +464,34 @@ static const VMStateDescription vmstate_spapr_xive_eas = { }, }; +static int vmstate_spapr_xive_pre_save(void *opaque) +{ + if (kvm_irqchip_in_kernel()) { + return kvmppc_xive_pre_save(SPAPR_XIVE(opaque)); + } + + return 0; +} + +/* + * Called by the sPAPR IRQ backend 'post_load' method at the machine + * level. + */ +int spapr_xive_post_load(SpaprXive *xive, int version_id) +{ + if (kvm_irqchip_in_kernel()) { + return kvmppc_xive_post_load(xive, version_id); + } + + return 0; +} + static const VMStateDescription vmstate_spapr_xive = { .name = TYPE_SPAPR_XIVE, .version_id = 1, .minimum_version_id = 1, + .pre_save = vmstate_spapr_xive_pre_save, + .post_load = NULL, /* handled at the machine level */ .fields = (VMStateField[]) { VMSTATE_UINT32_EQUAL(nr_irqs, SpaprXive, NULL), VMSTATE_STRUCT_VARRAY_POINTER_UINT32(eat, SpaprXive, nr_irqs, @@ -494,6 +555,17 @@ bool spapr_xive_irq_claim(SpaprXive *xive, uint32_t lisn, bool lsi) if (lsi) { xive_source_irq_set_lsi(xsrc, lisn); } + + if (kvm_irqchip_in_kernel()) { + Error *local_err = NULL; + + kvmppc_xive_source_reset_one(xsrc, lisn, &local_err); + if (local_err) { + error_report_err(local_err); + return false; + } + } + return true; } @@ -755,6 +827,16 @@ static target_ulong h_int_set_source_config(PowerPCCPU *cpu, new_eas.w = xive_set_field64(EAS_END_DATA, new_eas.w, eisn); } + if (kvm_irqchip_in_kernel()) { + Error *local_err = NULL; + + kvmppc_xive_set_source_config(xive, lisn, &new_eas, &local_err); + if (local_err) { + error_report_err(local_err); + return H_HARDWARE; + } + } + out: xive->eat[lisn] = new_eas; return H_SUCCESS; @@ -993,6 +1075,12 @@ static target_ulong h_int_set_queue_config(PowerPCCPU *cpu, case 16: case 21: case 24: + if (!QEMU_IS_ALIGNED(qpage, 1ul << qsize)) { + qemu_log_mask(LOG_GUEST_ERROR, "XIVE: EQ @0x%" HWADDR_PRIx + " is not naturally aligned with %" HWADDR_PRIx "\n", + qpage, (hwaddr)1 << qsize); + return H_P4; + } end.w2 = cpu_to_be32((qpage >> 32) & 0x0fffffff); end.w3 = cpu_to_be32(qpage & 0xffffffff); end.w0 |= cpu_to_be32(END_W0_ENQUEUE); @@ -1060,6 +1148,16 @@ static target_ulong h_int_set_queue_config(PowerPCCPU *cpu, */ out: + if (kvm_irqchip_in_kernel()) { + Error *local_err = NULL; + + kvmppc_xive_set_queue_config(xive, end_blk, end_idx, &end, &local_err); + if (local_err) { + error_report_err(local_err); + return H_HARDWARE; + } + } + /* Update END */ memcpy(&xive->endt[end_idx], &end, sizeof(XiveEND)); return H_SUCCESS; @@ -1144,14 +1242,23 @@ static target_ulong h_int_get_queue_config(PowerPCCPU *cpu, } if (xive_end_is_enqueue(end)) { - args[1] = (uint64_t) be32_to_cpu(end->w2 & 0x0fffffff) << 32 - | be32_to_cpu(end->w3); + args[1] = xive_end_qaddr(end); args[2] = xive_get_field32(END_W0_QSIZE, end->w0) + 12; } else { args[1] = 0; args[2] = 0; } + if (kvm_irqchip_in_kernel()) { + Error *local_err = NULL; + + kvmppc_xive_get_queue_config(xive, end_blk, end_idx, end, &local_err); + if (local_err) { + error_report_err(local_err); + return H_HARDWARE; + } + } + /* TODO: do we need any locking on the END ? */ if (flags & SPAPR_XIVE_END_DEBUG) { /* Load the event queue generation number into the return flags */ @@ -1304,15 +1411,20 @@ static target_ulong h_int_esb(PowerPCCPU *cpu, return H_P3; } - mmio_addr = xive->vc_base + xive_source_esb_mgmt(xsrc, lisn) + offset; + if (kvm_irqchip_in_kernel()) { + args[0] = kvmppc_xive_esb_rw(xsrc, lisn, offset, data, + flags & SPAPR_XIVE_ESB_STORE); + } else { + mmio_addr = xive->vc_base + xive_source_esb_mgmt(xsrc, lisn) + offset; - if (dma_memory_rw(&address_space_memory, mmio_addr, &data, 8, - (flags & SPAPR_XIVE_ESB_STORE))) { - qemu_log_mask(LOG_GUEST_ERROR, "XIVE: failed to access ESB @0x%" - HWADDR_PRIx "\n", mmio_addr); - return H_HARDWARE; + if (dma_memory_rw(&address_space_memory, mmio_addr, &data, 8, + (flags & SPAPR_XIVE_ESB_STORE))) { + qemu_log_mask(LOG_GUEST_ERROR, "XIVE: failed to access ESB @0x%" + HWADDR_PRIx "\n", mmio_addr); + return H_HARDWARE; + } + args[0] = (flags & SPAPR_XIVE_ESB_STORE) ? -1 : data; } - args[0] = (flags & SPAPR_XIVE_ESB_STORE) ? -1 : data; return H_SUCCESS; } @@ -1369,7 +1481,20 @@ static target_ulong h_int_sync(PowerPCCPU *cpu, * This is not needed when running the emulation under QEMU */ - /* This is not real hardware. Nothing to be done */ + /* + * This is not real hardware. Nothing to be done unless when + * under KVM + */ + + if (kvm_irqchip_in_kernel()) { + Error *local_err = NULL; + + kvmppc_xive_sync_source(xive, lisn, &local_err); + if (local_err) { + error_report_err(local_err); + return H_HARDWARE; + } + } return H_SUCCESS; } @@ -1404,6 +1529,16 @@ static target_ulong h_int_reset(PowerPCCPU *cpu, } device_reset(DEVICE(xive)); + + if (kvm_irqchip_in_kernel()) { + Error *local_err = NULL; + + kvmppc_xive_reset(xive, &local_err); + if (local_err) { + error_report_err(local_err); + return H_HARDWARE; + } + } return H_SUCCESS; } diff --git a/hw/intc/spapr_xive_kvm.c b/hw/intc/spapr_xive_kvm.c new file mode 100644 index 0000000..b48f135 --- /dev/null +++ b/hw/intc/spapr_xive_kvm.c @@ -0,0 +1,823 @@ +/* + * QEMU PowerPC sPAPR XIVE interrupt controller model + * + * Copyright (c) 2017-2019, IBM Corporation. + * + * This code is licensed under the GPL version 2 or later. See the + * COPYING file in the top-level directory. + */ + +#include "qemu/osdep.h" +#include "qemu/log.h" +#include "qemu/error-report.h" +#include "qapi/error.h" +#include "target/ppc/cpu.h" +#include "sysemu/cpus.h" +#include "sysemu/kvm.h" +#include "hw/ppc/spapr.h" +#include "hw/ppc/spapr_cpu_core.h" +#include "hw/ppc/spapr_xive.h" +#include "hw/ppc/xive.h" +#include "kvm_ppc.h" + +#include <sys/ioctl.h> + +/* + * Helpers for CPU hotplug + * + * TODO: make a common KVMEnabledCPU layer for XICS and XIVE + */ +typedef struct KVMEnabledCPU { + unsigned long vcpu_id; + QLIST_ENTRY(KVMEnabledCPU) node; +} KVMEnabledCPU; + +static QLIST_HEAD(, KVMEnabledCPU) + kvm_enabled_cpus = QLIST_HEAD_INITIALIZER(&kvm_enabled_cpus); + +static bool kvm_cpu_is_enabled(CPUState *cs) +{ + KVMEnabledCPU *enabled_cpu; + unsigned long vcpu_id = kvm_arch_vcpu_id(cs); + + QLIST_FOREACH(enabled_cpu, &kvm_enabled_cpus, node) { + if (enabled_cpu->vcpu_id == vcpu_id) { + return true; + } + } + return false; +} + +static void kvm_cpu_enable(CPUState *cs) +{ + KVMEnabledCPU *enabled_cpu; + unsigned long vcpu_id = kvm_arch_vcpu_id(cs); + + enabled_cpu = g_malloc(sizeof(*enabled_cpu)); + enabled_cpu->vcpu_id = vcpu_id; + QLIST_INSERT_HEAD(&kvm_enabled_cpus, enabled_cpu, node); +} + +static void kvm_cpu_disable_all(void) +{ + KVMEnabledCPU *enabled_cpu, *next; + + QLIST_FOREACH_SAFE(enabled_cpu, &kvm_enabled_cpus, node, next) { + QLIST_REMOVE(enabled_cpu, node); + g_free(enabled_cpu); + } +} + +/* + * XIVE Thread Interrupt Management context (KVM) + */ + +static void kvmppc_xive_cpu_set_state(XiveTCTX *tctx, Error **errp) +{ + uint64_t state[2]; + int ret; + + /* word0 and word1 of the OS ring. */ + state[0] = *((uint64_t *) &tctx->regs[TM_QW1_OS]); + + ret = kvm_set_one_reg(tctx->cs, KVM_REG_PPC_VP_STATE, state); + if (ret != 0) { + error_setg_errno(errp, errno, + "XIVE: could not restore KVM state of CPU %ld", + kvm_arch_vcpu_id(tctx->cs)); + } +} + +void kvmppc_xive_cpu_get_state(XiveTCTX *tctx, Error **errp) +{ + SpaprXive *xive = SPAPR_MACHINE(qdev_get_machine())->xive; + uint64_t state[2] = { 0 }; + int ret; + + /* The KVM XIVE device is not in use */ + if (xive->fd == -1) { + return; + } + + ret = kvm_get_one_reg(tctx->cs, KVM_REG_PPC_VP_STATE, state); + if (ret != 0) { + error_setg_errno(errp, errno, + "XIVE: could not capture KVM state of CPU %ld", + kvm_arch_vcpu_id(tctx->cs)); + return; + } + + /* word0 and word1 of the OS ring. */ + *((uint64_t *) &tctx->regs[TM_QW1_OS]) = state[0]; +} + +typedef struct { + XiveTCTX *tctx; + Error *err; +} XiveCpuGetState; + +static void kvmppc_xive_cpu_do_synchronize_state(CPUState *cpu, + run_on_cpu_data arg) +{ + XiveCpuGetState *s = arg.host_ptr; + + kvmppc_xive_cpu_get_state(s->tctx, &s->err); +} + +void kvmppc_xive_cpu_synchronize_state(XiveTCTX *tctx, Error **errp) +{ + XiveCpuGetState s = { + .tctx = tctx, + .err = NULL, + }; + + /* + * Kick the vCPU to make sure they are available for the KVM ioctl. + */ + run_on_cpu(tctx->cs, kvmppc_xive_cpu_do_synchronize_state, + RUN_ON_CPU_HOST_PTR(&s)); + + if (s.err) { + error_propagate(errp, s.err); + return; + } +} + +void kvmppc_xive_cpu_connect(XiveTCTX *tctx, Error **errp) +{ + SpaprXive *xive = SPAPR_MACHINE(qdev_get_machine())->xive; + unsigned long vcpu_id; + int ret; + + /* The KVM XIVE device is not in use */ + if (xive->fd == -1) { + return; + } + + /* Check if CPU was hot unplugged and replugged. */ + if (kvm_cpu_is_enabled(tctx->cs)) { + return; + } + + vcpu_id = kvm_arch_vcpu_id(tctx->cs); + + ret = kvm_vcpu_enable_cap(tctx->cs, KVM_CAP_PPC_IRQ_XIVE, 0, xive->fd, + vcpu_id, 0); + if (ret < 0) { + error_setg(errp, "XIVE: unable to connect CPU%ld to KVM device: %s", + vcpu_id, strerror(errno)); + return; + } + + kvm_cpu_enable(tctx->cs); +} + +/* + * XIVE Interrupt Source (KVM) + */ + +void kvmppc_xive_set_source_config(SpaprXive *xive, uint32_t lisn, XiveEAS *eas, + Error **errp) +{ + uint32_t end_idx; + uint32_t end_blk; + uint8_t priority; + uint32_t server; + bool masked; + uint32_t eisn; + uint64_t kvm_src; + Error *local_err = NULL; + + assert(xive_eas_is_valid(eas)); + + end_idx = xive_get_field64(EAS_END_INDEX, eas->w); + end_blk = xive_get_field64(EAS_END_BLOCK, eas->w); + eisn = xive_get_field64(EAS_END_DATA, eas->w); + masked = xive_eas_is_masked(eas); + + spapr_xive_end_to_target(end_blk, end_idx, &server, &priority); + + kvm_src = priority << KVM_XIVE_SOURCE_PRIORITY_SHIFT & + KVM_XIVE_SOURCE_PRIORITY_MASK; + kvm_src |= server << KVM_XIVE_SOURCE_SERVER_SHIFT & + KVM_XIVE_SOURCE_SERVER_MASK; + kvm_src |= ((uint64_t) masked << KVM_XIVE_SOURCE_MASKED_SHIFT) & + KVM_XIVE_SOURCE_MASKED_MASK; + kvm_src |= ((uint64_t)eisn << KVM_XIVE_SOURCE_EISN_SHIFT) & + KVM_XIVE_SOURCE_EISN_MASK; + + kvm_device_access(xive->fd, KVM_DEV_XIVE_GRP_SOURCE_CONFIG, lisn, + &kvm_src, true, &local_err); + if (local_err) { + error_propagate(errp, local_err); + return; + } +} + +void kvmppc_xive_sync_source(SpaprXive *xive, uint32_t lisn, Error **errp) +{ + kvm_device_access(xive->fd, KVM_DEV_XIVE_GRP_SOURCE_SYNC, lisn, + NULL, true, errp); +} + +/* + * At reset, the interrupt sources are simply created and MASKED. We + * only need to inform the KVM XIVE device about their type: LSI or + * MSI. + */ +void kvmppc_xive_source_reset_one(XiveSource *xsrc, int srcno, Error **errp) +{ + SpaprXive *xive = SPAPR_XIVE(xsrc->xive); + uint64_t state = 0; + + /* The KVM XIVE device is not in use */ + if (xive->fd == -1) { + return; + } + + if (xive_source_irq_is_lsi(xsrc, srcno)) { + state |= KVM_XIVE_LEVEL_SENSITIVE; + if (xsrc->status[srcno] & XIVE_STATUS_ASSERTED) { + state |= KVM_XIVE_LEVEL_ASSERTED; + } + } + + kvm_device_access(xive->fd, KVM_DEV_XIVE_GRP_SOURCE, srcno, &state, + true, errp); +} + +static void kvmppc_xive_source_reset(XiveSource *xsrc, Error **errp) +{ + int i; + + for (i = 0; i < xsrc->nr_irqs; i++) { + Error *local_err = NULL; + + kvmppc_xive_source_reset_one(xsrc, i, &local_err); + if (local_err) { + error_propagate(errp, local_err); + return; + } + } +} + +/* + * This is used to perform the magic loads on the ESB pages, described + * in xive.h. + * + * Memory barriers should not be needed for loads (no store for now). + */ +static uint64_t xive_esb_rw(XiveSource *xsrc, int srcno, uint32_t offset, + uint64_t data, bool write) +{ + uint64_t *addr = xsrc->esb_mmap + xive_source_esb_mgmt(xsrc, srcno) + + offset; + + if (write) { + *addr = cpu_to_be64(data); + return -1; + } else { + /* Prevent the compiler from optimizing away the load */ + volatile uint64_t value = be64_to_cpu(*addr); + return value; + } +} + +static uint8_t xive_esb_read(XiveSource *xsrc, int srcno, uint32_t offset) +{ + return xive_esb_rw(xsrc, srcno, offset, 0, 0) & 0x3; +} + +static void xive_esb_trigger(XiveSource *xsrc, int srcno) +{ + uint64_t *addr = xsrc->esb_mmap + xive_source_esb_page(xsrc, srcno); + + *addr = 0x0; +} + +uint64_t kvmppc_xive_esb_rw(XiveSource *xsrc, int srcno, uint32_t offset, + uint64_t data, bool write) +{ + if (write) { + return xive_esb_rw(xsrc, srcno, offset, data, 1); + } + + /* + * Special Load EOI handling for LSI sources. Q bit is never set + * and the interrupt should be re-triggered if the level is still + * asserted. + */ + if (xive_source_irq_is_lsi(xsrc, srcno) && + offset == XIVE_ESB_LOAD_EOI) { + xive_esb_read(xsrc, srcno, XIVE_ESB_SET_PQ_00); + if (xsrc->status[srcno] & XIVE_STATUS_ASSERTED) { + xive_esb_trigger(xsrc, srcno); + } + return 0; + } else { + return xive_esb_rw(xsrc, srcno, offset, 0, 0); + } +} + +static void kvmppc_xive_source_get_state(XiveSource *xsrc) +{ + int i; + + for (i = 0; i < xsrc->nr_irqs; i++) { + /* Perform a load without side effect to retrieve the PQ bits */ + uint8_t pq = xive_esb_read(xsrc, i, XIVE_ESB_GET); + + /* and save PQ locally */ + xive_source_esb_set(xsrc, i, pq); + } +} + +void kvmppc_xive_source_set_irq(void *opaque, int srcno, int val) +{ + XiveSource *xsrc = opaque; + SpaprXive *xive = SPAPR_XIVE(xsrc->xive); + struct kvm_irq_level args; + int rc; + + /* The KVM XIVE device should be in use */ + assert(xive->fd != -1); + + args.irq = srcno; + if (!xive_source_irq_is_lsi(xsrc, srcno)) { + if (!val) { + return; + } + args.level = KVM_INTERRUPT_SET; + } else { + if (val) { + xsrc->status[srcno] |= XIVE_STATUS_ASSERTED; + args.level = KVM_INTERRUPT_SET_LEVEL; + } else { + xsrc->status[srcno] &= ~XIVE_STATUS_ASSERTED; + args.level = KVM_INTERRUPT_UNSET; + } + } + rc = kvm_vm_ioctl(kvm_state, KVM_IRQ_LINE, &args); + if (rc < 0) { + error_report("XIVE: kvm_irq_line() failed : %s", strerror(errno)); + } +} + +/* + * sPAPR XIVE interrupt controller (KVM) + */ +void kvmppc_xive_get_queue_config(SpaprXive *xive, uint8_t end_blk, + uint32_t end_idx, XiveEND *end, + Error **errp) +{ + struct kvm_ppc_xive_eq kvm_eq = { 0 }; + uint64_t kvm_eq_idx; + uint8_t priority; + uint32_t server; + Error *local_err = NULL; + + assert(xive_end_is_valid(end)); + + /* Encode the tuple (server, prio) as a KVM EQ index */ + spapr_xive_end_to_target(end_blk, end_idx, &server, &priority); + + kvm_eq_idx = priority << KVM_XIVE_EQ_PRIORITY_SHIFT & + KVM_XIVE_EQ_PRIORITY_MASK; + kvm_eq_idx |= server << KVM_XIVE_EQ_SERVER_SHIFT & + KVM_XIVE_EQ_SERVER_MASK; + + kvm_device_access(xive->fd, KVM_DEV_XIVE_GRP_EQ_CONFIG, kvm_eq_idx, + &kvm_eq, false, &local_err); + if (local_err) { + error_propagate(errp, local_err); + return; + } + + /* + * The EQ index and toggle bit are updated by HW. These are the + * only fields from KVM we want to update QEMU with. The other END + * fields should already be in the QEMU END table. + */ + end->w1 = xive_set_field32(END_W1_GENERATION, 0ul, kvm_eq.qtoggle) | + xive_set_field32(END_W1_PAGE_OFF, 0ul, kvm_eq.qindex); +} + +void kvmppc_xive_set_queue_config(SpaprXive *xive, uint8_t end_blk, + uint32_t end_idx, XiveEND *end, + Error **errp) +{ + struct kvm_ppc_xive_eq kvm_eq = { 0 }; + uint64_t kvm_eq_idx; + uint8_t priority; + uint32_t server; + Error *local_err = NULL; + + /* + * Build the KVM state from the local END structure. + */ + + kvm_eq.flags = 0; + if (xive_get_field32(END_W0_UCOND_NOTIFY, end->w0)) { + kvm_eq.flags |= KVM_XIVE_EQ_ALWAYS_NOTIFY; + } + + /* + * If the hcall is disabling the EQ, set the size and page address + * to zero. When migrating, only valid ENDs are taken into + * account. + */ + if (xive_end_is_valid(end)) { + kvm_eq.qshift = xive_get_field32(END_W0_QSIZE, end->w0) + 12; + kvm_eq.qaddr = xive_end_qaddr(end); + /* + * The EQ toggle bit and index should only be relevant when + * restoring the EQ state + */ + kvm_eq.qtoggle = xive_get_field32(END_W1_GENERATION, end->w1); + kvm_eq.qindex = xive_get_field32(END_W1_PAGE_OFF, end->w1); + } else { + kvm_eq.qshift = 0; + kvm_eq.qaddr = 0; + } + + /* Encode the tuple (server, prio) as a KVM EQ index */ + spapr_xive_end_to_target(end_blk, end_idx, &server, &priority); + + kvm_eq_idx = priority << KVM_XIVE_EQ_PRIORITY_SHIFT & + KVM_XIVE_EQ_PRIORITY_MASK; + kvm_eq_idx |= server << KVM_XIVE_EQ_SERVER_SHIFT & + KVM_XIVE_EQ_SERVER_MASK; + + kvm_device_access(xive->fd, KVM_DEV_XIVE_GRP_EQ_CONFIG, kvm_eq_idx, + &kvm_eq, true, &local_err); + if (local_err) { + error_propagate(errp, local_err); + return; + } +} + +void kvmppc_xive_reset(SpaprXive *xive, Error **errp) +{ + kvm_device_access(xive->fd, KVM_DEV_XIVE_GRP_CTRL, KVM_DEV_XIVE_RESET, + NULL, true, errp); +} + +static void kvmppc_xive_get_queues(SpaprXive *xive, Error **errp) +{ + Error *local_err = NULL; + int i; + + for (i = 0; i < xive->nr_ends; i++) { + if (!xive_end_is_valid(&xive->endt[i])) { + continue; + } + + kvmppc_xive_get_queue_config(xive, SPAPR_XIVE_BLOCK_ID, i, + &xive->endt[i], &local_err); + if (local_err) { + error_propagate(errp, local_err); + return; + } + } +} + +/* + * The primary goal of the XIVE VM change handler is to mark the EQ + * pages dirty when all XIVE event notifications have stopped. + * + * Whenever the VM is stopped, the VM change handler sets the source + * PQs to PENDING to stop the flow of events and to possibly catch a + * triggered interrupt occuring while the VM is stopped. The previous + * state is saved in anticipation of a migration. The XIVE controller + * is then synced through KVM to flush any in-flight event + * notification and stabilize the EQs. + * + * At this stage, we can mark the EQ page dirty and let a migration + * sequence transfer the EQ pages to the destination, which is done + * just after the stop state. + * + * The previous configuration of the sources is restored when the VM + * runs again. If an interrupt was queued while the VM was stopped, + * simply generate a trigger. + */ +static void kvmppc_xive_change_state_handler(void *opaque, int running, + RunState state) +{ + SpaprXive *xive = opaque; + XiveSource *xsrc = &xive->source; + Error *local_err = NULL; + int i; + + /* + * Restore the sources to their initial state. This is called when + * the VM resumes after a stop or a migration. + */ + if (running) { + for (i = 0; i < xsrc->nr_irqs; i++) { + uint8_t pq = xive_source_esb_get(xsrc, i); + uint8_t old_pq; + + old_pq = xive_esb_read(xsrc, i, XIVE_ESB_SET_PQ_00 + (pq << 8)); + + /* + * An interrupt was queued while the VM was stopped, + * generate a trigger. + */ + if (pq == XIVE_ESB_RESET && old_pq == XIVE_ESB_QUEUED) { + xive_esb_trigger(xsrc, i); + } + } + + return; + } + + /* + * Mask the sources, to stop the flow of event notifications, and + * save the PQs locally in the XiveSource object. The XiveSource + * state will be collected later on by its vmstate handler if a + * migration is in progress. + */ + for (i = 0; i < xsrc->nr_irqs; i++) { + uint8_t pq = xive_esb_read(xsrc, i, XIVE_ESB_GET); + + /* + * PQ is set to PENDING to possibly catch a triggered + * interrupt occuring while the VM is stopped (hotplug event + * for instance) . + */ + if (pq != XIVE_ESB_OFF) { + pq = xive_esb_read(xsrc, i, XIVE_ESB_SET_PQ_10); + } + xive_source_esb_set(xsrc, i, pq); + } + + /* + * Sync the XIVE controller in KVM, to flush in-flight event + * notification that should be enqueued in the EQs and mark the + * XIVE EQ pages dirty to collect all updates. + */ + kvm_device_access(xive->fd, KVM_DEV_XIVE_GRP_CTRL, + KVM_DEV_XIVE_EQ_SYNC, NULL, true, &local_err); + if (local_err) { + error_report_err(local_err); + return; + } +} + +void kvmppc_xive_synchronize_state(SpaprXive *xive, Error **errp) +{ + /* The KVM XIVE device is not in use */ + if (xive->fd == -1) { + return; + } + + /* + * When the VM is stopped, the sources are masked and the previous + * state is saved in anticipation of a migration. We should not + * synchronize the source state in that case else we will override + * the saved state. + */ + if (runstate_is_running()) { + kvmppc_xive_source_get_state(&xive->source); + } + + /* EAT: there is no extra state to query from KVM */ + + /* ENDT */ + kvmppc_xive_get_queues(xive, errp); +} + +/* + * The SpaprXive 'pre_save' method is called by the vmstate handler of + * the SpaprXive model, after the XIVE controller is synced in the VM + * change handler. + */ +int kvmppc_xive_pre_save(SpaprXive *xive) +{ + Error *local_err = NULL; + + /* The KVM XIVE device is not in use */ + if (xive->fd == -1) { + return 0; + } + + /* EAT: there is no extra state to query from KVM */ + + /* ENDT */ + kvmppc_xive_get_queues(xive, &local_err); + if (local_err) { + error_report_err(local_err); + return -1; + } + + return 0; +} + +/* + * The SpaprXive 'post_load' method is not called by a vmstate + * handler. It is called at the sPAPR machine level at the end of the + * migration sequence by the sPAPR IRQ backend 'post_load' method, + * when all XIVE states have been transferred and loaded. + */ +int kvmppc_xive_post_load(SpaprXive *xive, int version_id) +{ + Error *local_err = NULL; + CPUState *cs; + int i; + + /* The KVM XIVE device should be in use */ + assert(xive->fd != -1); + + /* Restore the ENDT first. The targetting depends on it. */ + for (i = 0; i < xive->nr_ends; i++) { + if (!xive_end_is_valid(&xive->endt[i])) { + continue; + } + + kvmppc_xive_set_queue_config(xive, SPAPR_XIVE_BLOCK_ID, i, + &xive->endt[i], &local_err); + if (local_err) { + error_report_err(local_err); + return -1; + } + } + + /* Restore the EAT */ + for (i = 0; i < xive->nr_irqs; i++) { + if (!xive_eas_is_valid(&xive->eat[i])) { + continue; + } + + kvmppc_xive_set_source_config(xive, i, &xive->eat[i], &local_err); + if (local_err) { + error_report_err(local_err); + return -1; + } + } + + /* Restore the thread interrupt contexts */ + CPU_FOREACH(cs) { + PowerPCCPU *cpu = POWERPC_CPU(cs); + + kvmppc_xive_cpu_set_state(spapr_cpu_state(cpu)->tctx, &local_err); + if (local_err) { + error_report_err(local_err); + return -1; + } + } + + /* The source states will be restored when the machine starts running */ + return 0; +} + +static void *kvmppc_xive_mmap(SpaprXive *xive, int pgoff, size_t len, + Error **errp) +{ + void *addr; + uint32_t page_shift = 16; /* TODO: fix page_shift */ + + addr = mmap(NULL, len, PROT_WRITE | PROT_READ, MAP_SHARED, xive->fd, + pgoff << page_shift); + if (addr == MAP_FAILED) { + error_setg_errno(errp, errno, "XIVE: unable to set memory mapping"); + return NULL; + } + + return addr; +} + +/* + * All the XIVE memory regions are now backed by mappings from the KVM + * XIVE device. + */ +void kvmppc_xive_connect(SpaprXive *xive, Error **errp) +{ + XiveSource *xsrc = &xive->source; + Error *local_err = NULL; + size_t esb_len = (1ull << xsrc->esb_shift) * xsrc->nr_irqs; + size_t tima_len = 4ull << TM_SHIFT; + CPUState *cs; + + /* + * The KVM XIVE device already in use. This is the case when + * rebooting under the XIVE-only interrupt mode. + */ + if (xive->fd != -1) { + return; + } + + if (!kvmppc_has_cap_xive()) { + error_setg(errp, "IRQ_XIVE capability must be present for KVM"); + return; + } + + /* First, create the KVM XIVE device */ + xive->fd = kvm_create_device(kvm_state, KVM_DEV_TYPE_XIVE, false); + if (xive->fd < 0) { + error_setg_errno(errp, -xive->fd, "XIVE: error creating KVM device"); + return; + } + + /* + * 1. Source ESB pages - KVM mapping + */ + xsrc->esb_mmap = kvmppc_xive_mmap(xive, KVM_XIVE_ESB_PAGE_OFFSET, esb_len, + &local_err); + if (local_err) { + error_propagate(errp, local_err); + return; + } + + memory_region_init_ram_device_ptr(&xsrc->esb_mmio, OBJECT(xsrc), + "xive.esb", esb_len, xsrc->esb_mmap); + + /* + * 2. END ESB pages (No KVM support yet) + */ + + /* + * 3. TIMA pages - KVM mapping + */ + xive->tm_mmap = kvmppc_xive_mmap(xive, KVM_XIVE_TIMA_PAGE_OFFSET, tima_len, + &local_err); + if (local_err) { + error_propagate(errp, local_err); + return; + } + memory_region_init_ram_device_ptr(&xive->tm_mmio, OBJECT(xive), + "xive.tima", tima_len, xive->tm_mmap); + + xive->change = qemu_add_vm_change_state_handler( + kvmppc_xive_change_state_handler, xive); + + /* Connect the presenters to the initial VCPUs of the machine */ + CPU_FOREACH(cs) { + PowerPCCPU *cpu = POWERPC_CPU(cs); + + kvmppc_xive_cpu_connect(spapr_cpu_state(cpu)->tctx, &local_err); + if (local_err) { + error_propagate(errp, local_err); + return; + } + } + + /* Update the KVM sources */ + kvmppc_xive_source_reset(xsrc, &local_err); + if (local_err) { + error_propagate(errp, local_err); + return; + } + + kvm_kernel_irqchip = true; + kvm_msi_via_irqfd_allowed = true; + kvm_gsi_direct_mapping = true; + + /* Map all regions */ + spapr_xive_map_mmio(xive); +} + +void kvmppc_xive_disconnect(SpaprXive *xive, Error **errp) +{ + XiveSource *xsrc; + size_t esb_len; + + /* The KVM XIVE device is not in use */ + if (!xive || xive->fd == -1) { + return; + } + + if (!kvmppc_has_cap_xive()) { + error_setg(errp, "IRQ_XIVE capability must be present for KVM"); + return; + } + + /* Clear the KVM mapping */ + xsrc = &xive->source; + esb_len = (1ull << xsrc->esb_shift) * xsrc->nr_irqs; + + sysbus_mmio_unmap(SYS_BUS_DEVICE(xive), 0); + munmap(xsrc->esb_mmap, esb_len); + + sysbus_mmio_unmap(SYS_BUS_DEVICE(xive), 1); + + sysbus_mmio_unmap(SYS_BUS_DEVICE(xive), 2); + munmap(xive->tm_mmap, 4ull << TM_SHIFT); + + /* + * When the KVM device fd is closed, the KVM device is destroyed + * and removed from the list of devices of the VM. The VCPU + * presenters are also detached from the device. + */ + close(xive->fd); + xive->fd = -1; + + kvm_kernel_irqchip = false; + kvm_msi_via_irqfd_allowed = false; + kvm_gsi_direct_mapping = false; + + /* Clear the local list of presenter (hotplug) */ + kvm_cpu_disable_all(); + + /* VM Change state handler is not needed anymore */ + qemu_del_vm_change_state_handler(xive->change); +} diff --git a/hw/intc/xics.c b/hw/intc/xics.c index af7dc70..79f5a8a 100644 --- a/hw/intc/xics.c +++ b/hw/intc/xics.c @@ -610,6 +610,12 @@ static const TypeInfo ics_simple_info = { .class_size = sizeof(ICSStateClass), }; +static void ics_reset_irq(ICSIRQState *irq) +{ + irq->priority = 0xff; + irq->saved_priority = 0xff; +} + static void ics_base_reset(DeviceState *dev) { ICSState *ics = ICS_BASE(dev); @@ -623,8 +629,7 @@ static void ics_base_reset(DeviceState *dev) memset(ics->irqs, 0, sizeof(ICSIRQState) * ics->nr_irqs); for (i = 0; i < ics->nr_irqs; i++) { - ics->irqs[i].priority = 0xff; - ics->irqs[i].saved_priority = 0xff; + ics_reset_irq(ics->irqs + i); ics->irqs[i].flags = flags[i]; } } @@ -760,6 +765,7 @@ void ics_set_irq_type(ICSState *ics, int srcno, bool lsi) lsi ? XICS_FLAGS_IRQ_LSI : XICS_FLAGS_IRQ_MSI; if (kvm_irqchip_in_kernel()) { + ics_reset_irq(ics->irqs + srcno); ics_set_kvm_state_one(ics, srcno); } } diff --git a/hw/intc/xics_kvm.c b/hw/intc/xics_kvm.c index 78a252e..5ba5b77 100644 --- a/hw/intc/xics_kvm.c +++ b/hw/intc/xics_kvm.c @@ -33,6 +33,7 @@ #include "trace.h" #include "sysemu/kvm.h" #include "hw/ppc/spapr.h" +#include "hw/ppc/spapr_cpu_core.h" #include "hw/ppc/xics.h" #include "hw/ppc/xics_spapr.h" #include "kvm_ppc.h" @@ -51,6 +52,16 @@ typedef struct KVMEnabledICP { static QLIST_HEAD(, KVMEnabledICP) kvm_enabled_icps = QLIST_HEAD_INITIALIZER(&kvm_enabled_icps); +static void kvm_disable_icps(void) +{ + KVMEnabledICP *enabled_icp, *next; + + QLIST_FOREACH_SAFE(enabled_icp, &kvm_enabled_icps, node, next) { + QLIST_REMOVE(enabled_icp, node); + g_free(enabled_icp); + } +} + /* * ICP-KVM */ @@ -59,6 +70,11 @@ void icp_get_kvm_state(ICPState *icp) uint64_t state; int ret; + /* The KVM XICS device is not in use */ + if (kernel_xics_fd == -1) { + return; + } + /* ICP for this CPU thread is not in use, exiting */ if (!icp->cs) { return; @@ -95,6 +111,11 @@ int icp_set_kvm_state(ICPState *icp) uint64_t state; int ret; + /* The KVM XICS device is not in use */ + if (kernel_xics_fd == -1) { + return 0; + } + /* ICP for this CPU thread is not in use, exiting */ if (!icp->cs) { return 0; @@ -123,8 +144,9 @@ void icp_kvm_realize(DeviceState *dev, Error **errp) unsigned long vcpu_id; int ret; + /* The KVM XICS device is not in use */ if (kernel_xics_fd == -1) { - abort(); + return; } cs = icp->cs; @@ -160,6 +182,11 @@ void ics_get_kvm_state(ICSState *ics) uint64_t state; int i; + /* The KVM XICS device is not in use */ + if (kernel_xics_fd == -1) { + return; + } + for (i = 0; i < ics->nr_irqs; i++) { ICSIRQState *irq = &ics->irqs[i]; @@ -220,6 +247,11 @@ int ics_set_kvm_state_one(ICSState *ics, int srcno) ICSIRQState *irq = &ics->irqs[srcno]; int ret; + /* The KVM XICS device is not in use */ + if (kernel_xics_fd == -1) { + return 0; + } + state = irq->server; state |= (uint64_t)(irq->saved_priority & KVM_XICS_PRIORITY_MASK) << KVM_XICS_PRIORITY_SHIFT; @@ -259,6 +291,11 @@ int ics_set_kvm_state(ICSState *ics) { int i; + /* The KVM XICS device is not in use */ + if (kernel_xics_fd == -1) { + return 0; + } + for (i = 0; i < ics->nr_irqs; i++) { int ret; @@ -276,6 +313,9 @@ void ics_kvm_set_irq(ICSState *ics, int srcno, int val) struct kvm_irq_level args; int rc; + /* The KVM XICS device should be in use */ + assert(kernel_xics_fd != -1); + args.irq = srcno + ics->offset; if (ics->irqs[srcno].flags & XICS_FLAGS_IRQ_MSI) { if (!val) { @@ -303,6 +343,16 @@ static void rtas_dummy(PowerPCCPU *cpu, SpaprMachineState *spapr, int xics_kvm_init(SpaprMachineState *spapr, Error **errp) { int rc; + CPUState *cs; + Error *local_err = NULL; + + /* + * The KVM XICS device already in use. This is the case when + * rebooting under the XICS-only interrupt mode. + */ + if (kernel_xics_fd != -1) { + return 0; + } if (!kvm_enabled() || !kvm_check_extension(kvm_state, KVM_CAP_IRQ_XICS)) { error_setg(errp, @@ -351,6 +401,26 @@ int xics_kvm_init(SpaprMachineState *spapr, Error **errp) kvm_msi_via_irqfd_allowed = true; kvm_gsi_direct_mapping = true; + /* Create the presenters */ + CPU_FOREACH(cs) { + PowerPCCPU *cpu = POWERPC_CPU(cs); + + icp_kvm_realize(DEVICE(spapr_cpu_state(cpu)->icp), &local_err); + if (local_err) { + error_propagate(errp, local_err); + goto fail; + } + } + + /* Update the KVM sources */ + ics_set_kvm_state(spapr->ics); + + /* Connect the presenters to the initial VCPUs of the machine */ + CPU_FOREACH(cs) { + PowerPCCPU *cpu = POWERPC_CPU(cs); + icp_set_kvm_state(spapr_cpu_state(cpu)->icp); + } + return 0; fail: @@ -360,3 +430,44 @@ fail: kvmppc_define_rtas_kernel_token(0, "ibm,int-off"); return -1; } + +void xics_kvm_disconnect(SpaprMachineState *spapr, Error **errp) +{ + /* The KVM XICS device is not in use */ + if (kernel_xics_fd == -1) { + return; + } + + if (!kvm_enabled() || !kvm_check_extension(kvm_state, KVM_CAP_IRQ_XICS)) { + error_setg(errp, + "KVM and IRQ_XICS capability must be present for KVM XICS device"); + return; + } + + /* + * Only on P9 using the XICS-on XIVE KVM device: + * + * When the KVM device fd is closed, the device is destroyed and + * removed from the list of devices of the VM. The VCPU presenters + * are also detached from the device. + */ + close(kernel_xics_fd); + kernel_xics_fd = -1; + + spapr_rtas_unregister(RTAS_IBM_SET_XIVE); + spapr_rtas_unregister(RTAS_IBM_GET_XIVE); + spapr_rtas_unregister(RTAS_IBM_INT_OFF); + spapr_rtas_unregister(RTAS_IBM_INT_ON); + + kvmppc_define_rtas_kernel_token(0, "ibm,set-xive"); + kvmppc_define_rtas_kernel_token(0, "ibm,get-xive"); + kvmppc_define_rtas_kernel_token(0, "ibm,int-on"); + kvmppc_define_rtas_kernel_token(0, "ibm,int-off"); + + kvm_kernel_irqchip = false; + kvm_msi_via_irqfd_allowed = false; + kvm_gsi_direct_mapping = false; + + /* Clear the presenter from the VCPUs */ + kvm_disable_icps(); +} diff --git a/hw/intc/xics_spapr.c b/hw/intc/xics_spapr.c index 9d2b8ad..5a1835e 100644 --- a/hw/intc/xics_spapr.c +++ b/hw/intc/xics_spapr.c @@ -239,6 +239,13 @@ static void rtas_int_on(PowerPCCPU *cpu, SpaprMachineState *spapr, void xics_spapr_init(SpaprMachineState *spapr) { + /* Emulated mode can only be initialized once. */ + if (spapr->ics->init) { + return; + } + + spapr->ics->init = true; + /* Registration of global state belongs into realize */ spapr_rtas_register(RTAS_IBM_SET_XIVE, "ibm,set-xive", rtas_set_xive); spapr_rtas_register(RTAS_IBM_GET_XIVE, "ibm,get-xive", rtas_get_xive); diff --git a/hw/intc/xive.c b/hw/intc/xive.c index a0b8700..0c74e47 100644 --- a/hw/intc/xive.c +++ b/hw/intc/xive.c @@ -493,6 +493,16 @@ void xive_tctx_pic_print_info(XiveTCTX *tctx, Monitor *mon) int cpu_index = tctx->cs ? tctx->cs->cpu_index : -1; int i; + if (kvm_irqchip_in_kernel()) { + Error *local_err = NULL; + + kvmppc_xive_cpu_synchronize_state(tctx, &local_err); + if (local_err) { + error_report_err(local_err); + return; + } + } + monitor_printf(mon, "CPU[%04x]: QW NSR CPPR IPB LSMFB ACK# INC AGE PIPR" " W2\n", cpu_index); @@ -555,6 +565,15 @@ static void xive_tctx_realize(DeviceState *dev, Error **errp) return; } + /* Connect the presenter to the VCPU (required for CPU hotplug) */ + if (kvm_irqchip_in_kernel()) { + kvmppc_xive_cpu_connect(tctx, &local_err); + if (local_err) { + error_propagate(errp, local_err); + return; + } + } + qemu_register_reset(xive_tctx_reset, dev); } @@ -563,10 +582,27 @@ static void xive_tctx_unrealize(DeviceState *dev, Error **errp) qemu_unregister_reset(xive_tctx_reset, dev); } +static int vmstate_xive_tctx_pre_save(void *opaque) +{ + Error *local_err = NULL; + + if (kvm_irqchip_in_kernel()) { + kvmppc_xive_cpu_get_state(XIVE_TCTX(opaque), &local_err); + if (local_err) { + error_report_err(local_err); + return -1; + } + } + + return 0; +} + static const VMStateDescription vmstate_xive_tctx = { .name = TYPE_XIVE_TCTX, .version_id = 1, .minimum_version_id = 1, + .pre_save = vmstate_xive_tctx_pre_save, + .post_load = NULL, /* handled by the sPAPRxive model */ .fields = (VMStateField[]) { VMSTATE_BUFFER(regs, XiveTCTX), VMSTATE_END_OF_LIST() @@ -990,9 +1026,11 @@ static void xive_source_realize(DeviceState *dev, Error **errp) xsrc->status = g_malloc0(xsrc->nr_irqs); xsrc->lsi_map = bitmap_new(xsrc->nr_irqs); - memory_region_init_io(&xsrc->esb_mmio, OBJECT(xsrc), - &xive_source_esb_ops, xsrc, "xive.esb", - (1ull << xsrc->esb_shift) * xsrc->nr_irqs); + if (!kvm_irqchip_in_kernel()) { + memory_region_init_io(&xsrc->esb_mmio, OBJECT(xsrc), + &xive_source_esb_ops, xsrc, "xive.esb", + (1ull << xsrc->esb_shift) * xsrc->nr_irqs); + } qemu_register_reset(xive_source_reset, dev); } @@ -1042,8 +1080,7 @@ static const TypeInfo xive_source_info = { void xive_end_queue_pic_print_info(XiveEND *end, uint32_t width, Monitor *mon) { - uint64_t qaddr_base = (uint64_t) be32_to_cpu(end->w2 & 0x0fffffff) << 32 - | be32_to_cpu(end->w3); + uint64_t qaddr_base = xive_end_qaddr(end); uint32_t qsize = xive_get_field32(END_W0_QSIZE, end->w0); uint32_t qindex = xive_get_field32(END_W1_PAGE_OFF, end->w1); uint32_t qentries = 1 << (qsize + 10); @@ -1072,8 +1109,7 @@ void xive_end_queue_pic_print_info(XiveEND *end, uint32_t width, Monitor *mon) void xive_end_pic_print_info(XiveEND *end, uint32_t end_idx, Monitor *mon) { - uint64_t qaddr_base = (uint64_t) be32_to_cpu(end->w2 & 0x0fffffff) << 32 - | be32_to_cpu(end->w3); + uint64_t qaddr_base = xive_end_qaddr(end); uint32_t qindex = xive_get_field32(END_W1_PAGE_OFF, end->w1); uint32_t qgen = xive_get_field32(END_W1_GENERATION, end->w1); uint32_t qsize = xive_get_field32(END_W0_QSIZE, end->w0); @@ -1101,8 +1137,7 @@ void xive_end_pic_print_info(XiveEND *end, uint32_t end_idx, Monitor *mon) static void xive_end_enqueue(XiveEND *end, uint32_t data) { - uint64_t qaddr_base = (uint64_t) be32_to_cpu(end->w2 & 0x0fffffff) << 32 - | be32_to_cpu(end->w3); + uint64_t qaddr_base = xive_end_qaddr(end); uint32_t qsize = xive_get_field32(END_W0_QSIZE, end->w0); uint32_t qindex = xive_get_field32(END_W1_PAGE_OFF, end->w1); uint32_t qgen = xive_get_field32(END_W1_GENERATION, end->w1); diff --git a/hw/isa/i82378.c b/hw/isa/i82378.c index a5d67bc..c08970b 100644 --- a/hw/isa/i82378.c +++ b/hw/isa/i82378.c @@ -21,7 +21,6 @@ #include "hw/pci/pci.h" #include "hw/i386/pc.h" #include "hw/timer/i8254.h" -#include "hw/timer/mc146818rtc.h" #include "hw/audio/pcspk.h" #define TYPE_I82378 "i82378" @@ -105,9 +104,6 @@ static void i82378_realize(PCIDevice *pci, Error **errp) /* 2 82C37 (dma) */ isa = isa_create_simple(isabus, "i82374"); - - /* timer */ - isa_create_simple(isabus, TYPE_MC146818_RTC); } static void i82378_init(Object *obj) diff --git a/hw/ppc/Kconfig b/hw/ppc/Kconfig index a346515..f927ec9 100644 --- a/hw/ppc/Kconfig +++ b/hw/ppc/Kconfig @@ -122,3 +122,8 @@ config XIVE_SPAPR default y depends on PSERIES select XIVE + +config XIVE_KVM + bool + default y + depends on XIVE_SPAPR && KVM diff --git a/hw/ppc/pnv.c b/hw/ppc/pnv.c index 31aa20e..046f0a8 100644 --- a/hw/ppc/pnv.c +++ b/hw/ppc/pnv.c @@ -450,7 +450,8 @@ static void pnv_dt_power_mgt(void *fdt) static void *pnv_dt_create(MachineState *machine) { - const char plat_compat[] = "qemu,powernv\0ibm,powernv"; + const char plat_compat8[] = "qemu,powernv8\0qemu,powernv\0ibm,powernv"; + const char plat_compat9[] = "qemu,powernv9\0ibm,powernv"; PnvMachineState *pnv = PNV_MACHINE(machine); void *fdt; char *buf; @@ -465,8 +466,14 @@ static void *pnv_dt_create(MachineState *machine) _FDT((fdt_setprop_cell(fdt, 0, "#size-cells", 0x2))); _FDT((fdt_setprop_string(fdt, 0, "model", "IBM PowerNV (emulated by qemu)"))); - _FDT((fdt_setprop(fdt, 0, "compatible", plat_compat, - sizeof(plat_compat)))); + if (pnv_is_power9(pnv)) { + _FDT((fdt_setprop(fdt, 0, "compatible", plat_compat9, + sizeof(plat_compat9)))); + } else { + _FDT((fdt_setprop(fdt, 0, "compatible", plat_compat8, + sizeof(plat_compat8)))); + } + buf = qemu_uuid_unparse_strdup(&qemu_uuid); _FDT((fdt_setprop_string(fdt, 0, "vm,uuid", buf))); diff --git a/hw/ppc/pnv_xscom.c b/hw/ppc/pnv_xscom.c index c285ef5..f53a6d7 100644 --- a/hw/ppc/pnv_xscom.c +++ b/hw/ppc/pnv_xscom.c @@ -29,6 +29,12 @@ #include <libfdt.h> +/* PRD registers */ +#define PRD_P8_IPOLL_REG_MASK 0x01020013 +#define PRD_P8_IPOLL_REG_STATUS 0x01020014 +#define PRD_P9_IPOLL_REG_MASK 0x000F0033 +#define PRD_P9_IPOLL_REG_STATUS 0x000F0034 + static void xscom_complete(CPUState *cs, uint64_t hmer_bits) { /* @@ -70,6 +76,12 @@ static uint64_t xscom_read_default(PnvChip *chip, uint32_t pcba) case 0x1010c00: /* PIBAM FIR */ case 0x1010c03: /* PIBAM FIR MASK */ + /* PRD registers */ + case PRD_P8_IPOLL_REG_MASK: + case PRD_P8_IPOLL_REG_STATUS: + case PRD_P9_IPOLL_REG_MASK: + case PRD_P9_IPOLL_REG_STATUS: + /* P9 xscom reset */ case 0x0090018: /* Receive status reg */ case 0x0090012: /* log register */ @@ -124,6 +136,12 @@ static bool xscom_write_default(PnvChip *chip, uint32_t pcba, uint64_t val) case 0x201302a: /* CAPP stuff */ case 0x2013801: /* CAPP stuff */ case 0x2013802: /* CAPP stuff */ + + /* P8 PRD registers */ + case PRD_P8_IPOLL_REG_MASK: + case PRD_P8_IPOLL_REG_STATUS: + case PRD_P9_IPOLL_REG_MASK: + case PRD_P9_IPOLL_REG_STATUS: return true; default: return false; diff --git a/hw/ppc/prep.c b/hw/ppc/prep.c index b7f459d..2a8009e 100644 --- a/hw/ppc/prep.c +++ b/hw/ppc/prep.c @@ -601,7 +601,7 @@ static int prep_set_cmos_checksum(DeviceState *dev, void *opaque) uint16_t checksum = *(uint16_t *)opaque; ISADevice *rtc; - if (object_dynamic_cast(OBJECT(dev), "mc146818rtc")) { + if (object_dynamic_cast(OBJECT(dev), TYPE_MC146818_RTC)) { rtc = ISA_DEVICE(dev); rtc_set_memory(rtc, 0x2e, checksum & 0xff); rtc_set_memory(rtc, 0x3e, checksum & 0xff); @@ -675,6 +675,11 @@ static void ibm_40p_init(MachineState *machine) qdev_prop_set_uint32(dev, "ram-size", machine->ram_size); qdev_init_nofail(dev); + /* RTC */ + dev = DEVICE(isa_create(isa_bus, TYPE_MC146818_RTC)); + qdev_prop_set_int32(dev, "base_year", 1900); + qdev_init_nofail(dev); + /* initialize CMOS checksums */ cmos_checksum = 0x6aa9; qbus_walk_children(BUS(isa_bus), prep_set_cmos_checksum, NULL, NULL, NULL, diff --git a/hw/ppc/spapr.c b/hw/ppc/spapr.c index 2ef3ce4..e2b33e5 100644 --- a/hw/ppc/spapr.c +++ b/hw/ppc/spapr.c @@ -500,7 +500,10 @@ static void spapr_populate_cpu_dt(CPUState *cs, void *fdt, int offset, _FDT((fdt_setprop(fdt, offset, "64-bit", NULL, 0))); if (env->spr_cb[SPR_PURR].oea_read) { - _FDT((fdt_setprop(fdt, offset, "ibm,purr", NULL, 0))); + _FDT((fdt_setprop_cell(fdt, offset, "ibm,purr", 1))); + } + if (env->spr_cb[SPR_SPURR].oea_read) { + _FDT((fdt_setprop_cell(fdt, offset, "ibm,spurr", 1))); } if (ppc_hash64_has(cpu, PPC_HASH64_1TSEG)) { @@ -2122,6 +2125,7 @@ static const VMStateDescription vmstate_spapr = { &vmstate_spapr_cap_cfpc, &vmstate_spapr_cap_sbbc, &vmstate_spapr_cap_ibs, + &vmstate_spapr_cap_hpt_maxpagesize, &vmstate_spapr_irq_map, &vmstate_spapr_cap_nested_kvm_hv, &vmstate_spapr_dtb, @@ -4348,7 +4352,7 @@ static void spapr_machine_class_init(ObjectClass *oc, void *data) smc->default_caps.caps[SPAPR_CAP_LARGE_DECREMENTER] = SPAPR_CAP_ON; smc->default_caps.caps[SPAPR_CAP_CCF_ASSIST] = SPAPR_CAP_OFF; spapr_caps_add_properties(smc, &error_abort); - smc->irq = &spapr_irq_xics; + smc->irq = &spapr_irq_dual; smc->dr_phb_enabled = true; } @@ -4407,18 +4411,7 @@ DEFINE_SPAPR_MACHINE(4_1, "4.1", true); /* * pseries-4.0 */ -static void spapr_machine_4_0_class_options(MachineClass *mc) -{ - spapr_machine_4_1_class_options(mc); - compat_props_add(mc->compat_props, hw_compat_4_0, hw_compat_4_0_len); -} - -DEFINE_SPAPR_MACHINE(4_0, "4.0", false); - -/* - * pseries-3.1 - */ -static void phb_placement_3_1(SpaprMachineState *spapr, uint32_t index, +static void phb_placement_4_0(SpaprMachineState *spapr, uint32_t index, uint64_t *buid, hwaddr *pio, hwaddr *mmio32, hwaddr *mmio64, unsigned n_dma, uint32_t *liobns, @@ -4430,6 +4423,22 @@ static void phb_placement_3_1(SpaprMachineState *spapr, uint32_t index, *nv2atsd = 0; } +static void spapr_machine_4_0_class_options(MachineClass *mc) +{ + SpaprMachineClass *smc = SPAPR_MACHINE_CLASS(mc); + + spapr_machine_4_1_class_options(mc); + compat_props_add(mc->compat_props, hw_compat_4_0, hw_compat_4_0_len); + smc->phb_placement = phb_placement_4_0; + smc->irq = &spapr_irq_xics; + smc->pre_4_1_migration = true; +} + +DEFINE_SPAPR_MACHINE(4_0, "4.0", false); + +/* + * pseries-3.1 + */ static void spapr_machine_3_1_class_options(MachineClass *mc) { SpaprMachineClass *smc = SPAPR_MACHINE_CLASS(mc); @@ -4445,7 +4454,6 @@ static void spapr_machine_3_1_class_options(MachineClass *mc) smc->default_caps.caps[SPAPR_CAP_SBBC] = SPAPR_CAP_BROKEN; smc->default_caps.caps[SPAPR_CAP_IBS] = SPAPR_CAP_BROKEN; smc->default_caps.caps[SPAPR_CAP_LARGE_DECREMENTER] = SPAPR_CAP_OFF; - smc->phb_placement = phb_placement_3_1; } DEFINE_SPAPR_MACHINE(3_1, "3.1", false); diff --git a/hw/ppc/spapr_caps.c b/hw/ppc/spapr_caps.c index 9b1c10b..31b4661 100644 --- a/hw/ppc/spapr_caps.c +++ b/hw/ppc/spapr_caps.c @@ -64,6 +64,7 @@ typedef struct SpaprCapabilityInfo { void (*apply)(SpaprMachineState *spapr, uint8_t val, Error **errp); void (*cpu_apply)(SpaprMachineState *spapr, PowerPCCPU *cpu, uint8_t val, Error **errp); + bool (*migrate_needed)(void *opaque); } SpaprCapabilityInfo; static void spapr_cap_get_bool(Object *obj, Visitor *v, const char *name, @@ -350,6 +351,11 @@ static void cap_hpt_maxpagesize_apply(SpaprMachineState *spapr, spapr_check_pagesize(spapr, qemu_minrampagesize(), errp); } +static bool cap_hpt_maxpagesize_migrate_needed(void *opaque) +{ + return !SPAPR_MACHINE_GET_CLASS(opaque)->pre_4_1_migration; +} + static bool spapr_pagesize_cb(void *opaque, uint32_t seg_pshift, uint32_t pshift) { @@ -542,6 +548,7 @@ SpaprCapabilityInfo capability_table[SPAPR_CAP_NUM] = { .type = "int", .apply = cap_hpt_maxpagesize_apply, .cpu_apply = cap_hpt_maxpagesize_cpu_apply, + .migrate_needed = cap_hpt_maxpagesize_migrate_needed, }, [SPAPR_CAP_NESTED_KVM_HV] = { .name = "nested-hv", @@ -679,8 +686,11 @@ int spapr_caps_post_migration(SpaprMachineState *spapr) static bool spapr_cap_##sname##_needed(void *opaque) \ { \ SpaprMachineState *spapr = opaque; \ + bool (*needed)(void *opaque) = \ + capability_table[cap].migrate_needed; \ \ - return spapr->cmd_line_caps[cap] && \ + return needed ? needed(opaque) : true && \ + spapr->cmd_line_caps[cap] && \ (spapr->eff.caps[cap] != \ spapr->def.caps[cap]); \ } \ @@ -703,6 +713,7 @@ SPAPR_CAP_MIG_STATE(dfp, SPAPR_CAP_DFP); SPAPR_CAP_MIG_STATE(cfpc, SPAPR_CAP_CFPC); SPAPR_CAP_MIG_STATE(sbbc, SPAPR_CAP_SBBC); SPAPR_CAP_MIG_STATE(ibs, SPAPR_CAP_IBS); +SPAPR_CAP_MIG_STATE(hpt_maxpagesize, SPAPR_CAP_HPT_MAXPAGESIZE); SPAPR_CAP_MIG_STATE(nested_kvm_hv, SPAPR_CAP_NESTED_KVM_HV); SPAPR_CAP_MIG_STATE(large_decr, SPAPR_CAP_LARGE_DECREMENTER); SPAPR_CAP_MIG_STATE(ccf_assist, SPAPR_CAP_CCF_ASSIST); diff --git a/hw/ppc/spapr_cpu_core.c b/hw/ppc/spapr_cpu_core.c index f04e06c..5621fb9 100644 --- a/hw/ppc/spapr_cpu_core.c +++ b/hw/ppc/spapr_cpu_core.c @@ -58,9 +58,11 @@ static void spapr_cpu_reset(void *opaque) * * Disable Power-saving mode Exit Cause exceptions for the CPU, so * we don't get spurious wakups before an RTAS start-cpu call. + * For the same reason, set PSSCR_EC. */ lpcr &= ~(LPCR_VPM0 | LPCR_VPM1 | LPCR_ISL | LPCR_KBV | pcc->lpcr_pm); lpcr |= LPCR_LPES0 | LPCR_LPES1; + env->spr[SPR_PSSCR] |= PSSCR_EC; /* Set RMLS to the max (ie, 16G) */ lpcr &= ~LPCR_RMLS; diff --git a/hw/ppc/spapr_hcall.c b/hw/ppc/spapr_hcall.c index 6c16d2b..0a050ad 100644 --- a/hw/ppc/spapr_hcall.c +++ b/hw/ppc/spapr_hcall.c @@ -1513,6 +1513,7 @@ static target_ulong h_client_architecture_support(PowerPCCPU *cpu, bool guest_radix; Error *local_err = NULL; bool raw_mode_supported = false; + bool guest_xive; cas_pvr = cas_check_pvr(spapr, cpu, &addr, &raw_mode_supported, &local_err); if (local_err) { @@ -1545,10 +1546,17 @@ static target_ulong h_client_architecture_support(PowerPCCPU *cpu, error_report("guest requested hash and radix MMU, which is invalid."); exit(EXIT_FAILURE); } + if (spapr_ovec_test(ov5_guest, OV5_XIVE_BOTH)) { + error_report("guest requested an invalid interrupt mode"); + exit(EXIT_FAILURE); + } + /* The radix/hash bit in byte 24 requires special handling: */ guest_radix = spapr_ovec_test(ov5_guest, OV5_MMU_RADIX_300); spapr_ovec_clear(ov5_guest, OV5_MMU_RADIX_300); + guest_xive = spapr_ovec_test(ov5_guest, OV5_XIVE_EXPLOIT); + /* * HPT resizing is a bit of a special case, because when enabled * we assume an HPT guest will support it until it says it @@ -1633,6 +1641,24 @@ static target_ulong h_client_architecture_support(PowerPCCPU *cpu, } /* + * Ensure the guest asks for an interrupt mode we support; otherwise + * terminate the boot. + */ + if (guest_xive) { + if (spapr->irq->ov5 == SPAPR_OV5_XIVE_LEGACY) { + error_report( +"Guest requested unavailable interrupt mode (XIVE), try the ic-mode=xive or ic-mode=dual machine property"); + exit(EXIT_FAILURE); + } + } else { + if (spapr->irq->ov5 == SPAPR_OV5_XIVE_EXPLOIT) { + error_report( +"Guest requested unavailable interrupt mode (XICS), either don't set the ic-mode machine property or try ic-mode=xics or ic-mode=dual"); + exit(EXIT_FAILURE); + } + } + + /* * Generate a machine reset when we have an update of the * interrupt mode. Only required when the machine supports both * modes. diff --git a/hw/ppc/spapr_irq.c b/hw/ppc/spapr_irq.c index b1f79ea..3156daf 100644 --- a/hw/ppc/spapr_irq.c +++ b/hw/ppc/spapr_irq.c @@ -62,38 +62,46 @@ void spapr_irq_msi_reset(SpaprMachineState *spapr) bitmap_clear(spapr->irq_map, 0, spapr->irq_map_nr); } - -/* - * XICS IRQ backend. - */ - -static void spapr_irq_init_xics(SpaprMachineState *spapr, int nr_irqs, - Error **errp) +static void spapr_irq_init_device(SpaprMachineState *spapr, + SpaprIrq *irq, Error **errp) { MachineState *machine = MACHINE(spapr); - Object *obj; Error *local_err = NULL; - bool xics_kvm = false; - if (kvm_enabled()) { - if (machine_kernel_irqchip_allowed(machine) && - !xics_kvm_init(spapr, &local_err)) { - xics_kvm = true; - } - if (machine_kernel_irqchip_required(machine) && !xics_kvm) { + if (kvm_enabled() && machine_kernel_irqchip_allowed(machine)) { + irq->init_kvm(spapr, &local_err); + if (local_err && machine_kernel_irqchip_required(machine)) { error_prepend(&local_err, "kernel_irqchip requested but unavailable: "); error_propagate(errp, local_err); return; } - error_free(local_err); - local_err = NULL; - } - if (!xics_kvm) { - xics_spapr_init(spapr); + if (!local_err) { + return; + } + + /* + * We failed to initialize the KVM device, fallback to + * emulated mode + */ + error_prepend(&local_err, "kernel_irqchip allowed but unavailable: "); + warn_report_err(local_err); } + irq->init_emu(spapr, errp); +} + +/* + * XICS IRQ backend. + */ + +static void spapr_irq_init_xics(SpaprMachineState *spapr, int nr_irqs, + Error **errp) +{ + Object *obj; + Error *local_err = NULL; + obj = object_new(TYPE_ICS_SIMPLE); object_property_add_child(OBJECT(spapr), "ics", obj, &error_abort); object_property_add_const_link(obj, ICS_PROP_XICS, OBJECT(spapr), @@ -212,7 +220,13 @@ static void spapr_irq_set_irq_xics(void *opaque, int srcno, int val) static void spapr_irq_reset_xics(SpaprMachineState *spapr, Error **errp) { - /* TODO: create the KVM XICS device */ + Error *local_err = NULL; + + spapr_irq_init_device(spapr, &spapr_irq_xics, &local_err); + if (local_err) { + error_propagate(errp, local_err); + return; + } } static const char *spapr_irq_get_nodename_xics(SpaprMachineState *spapr) @@ -220,6 +234,18 @@ static const char *spapr_irq_get_nodename_xics(SpaprMachineState *spapr) return XICS_NODENAME; } +static void spapr_irq_init_emu_xics(SpaprMachineState *spapr, Error **errp) +{ + xics_spapr_init(spapr); +} + +static void spapr_irq_init_kvm_xics(SpaprMachineState *spapr, Error **errp) +{ + if (kvm_enabled()) { + xics_kvm_init(spapr, errp); + } +} + #define SPAPR_IRQ_XICS_NR_IRQS 0x1000 #define SPAPR_IRQ_XICS_NR_MSIS \ (XICS_IRQ_BASE + SPAPR_IRQ_XICS_NR_IRQS - SPAPR_IRQ_MSI) @@ -240,6 +266,8 @@ SpaprIrq spapr_irq_xics = { .reset = spapr_irq_reset_xics, .set_irq = spapr_irq_set_irq_xics, .get_nodename = spapr_irq_get_nodename_xics, + .init_emu = spapr_irq_init_emu_xics, + .init_kvm = spapr_irq_init_kvm_xics, }; /* @@ -248,19 +276,10 @@ SpaprIrq spapr_irq_xics = { static void spapr_irq_init_xive(SpaprMachineState *spapr, int nr_irqs, Error **errp) { - MachineState *machine = MACHINE(spapr); uint32_t nr_servers = spapr_max_server_number(spapr); DeviceState *dev; int i; - /* KVM XIVE device not yet available */ - if (kvm_enabled()) { - if (machine_kernel_irqchip_required(machine)) { - error_setg(errp, "kernel_irqchip requested. no KVM XIVE support"); - return; - } - } - dev = qdev_create(NULL, TYPE_SPAPR_XIVE); qdev_prop_set_uint32(dev, "nr-irqs", nr_irqs); /* @@ -350,12 +369,13 @@ static void spapr_irq_cpu_intc_create_xive(SpaprMachineState *spapr, static int spapr_irq_post_load_xive(SpaprMachineState *spapr, int version_id) { - return 0; + return spapr_xive_post_load(spapr->xive, version_id); } static void spapr_irq_reset_xive(SpaprMachineState *spapr, Error **errp) { CPUState *cs; + Error *local_err = NULL; CPU_FOREACH(cs) { PowerPCCPU *cpu = POWERPC_CPU(cs); @@ -364,6 +384,12 @@ static void spapr_irq_reset_xive(SpaprMachineState *spapr, Error **errp) spapr_xive_set_tctx_os_cam(spapr_cpu_state(cpu)->tctx); } + spapr_irq_init_device(spapr, &spapr_irq_xive, &local_err); + if (local_err) { + error_propagate(errp, local_err); + return; + } + /* Activate the XIVE MMIOs */ spapr_xive_mmio_set_enabled(spapr->xive, true); } @@ -372,7 +398,11 @@ static void spapr_irq_set_irq_xive(void *opaque, int srcno, int val) { SpaprMachineState *spapr = opaque; - xive_source_set_irq(&spapr->xive->source, srcno, val); + if (kvm_irqchip_in_kernel()) { + kvmppc_xive_source_set_irq(&spapr->xive->source, srcno, val); + } else { + xive_source_set_irq(&spapr->xive->source, srcno, val); + } } static const char *spapr_irq_get_nodename_xive(SpaprMachineState *spapr) @@ -380,6 +410,18 @@ static const char *spapr_irq_get_nodename_xive(SpaprMachineState *spapr) return spapr->xive->nodename; } +static void spapr_irq_init_emu_xive(SpaprMachineState *spapr, Error **errp) +{ + spapr_xive_init(spapr->xive, errp); +} + +static void spapr_irq_init_kvm_xive(SpaprMachineState *spapr, Error **errp) +{ + if (kvm_enabled()) { + kvmppc_xive_connect(spapr->xive, errp); + } +} + /* * XIVE uses the full IRQ number space. Set it to 8K to be compatible * with XICS. @@ -404,6 +446,8 @@ SpaprIrq spapr_irq_xive = { .reset = spapr_irq_reset_xive, .set_irq = spapr_irq_set_irq_xive, .get_nodename = spapr_irq_get_nodename_xive, + .init_emu = spapr_irq_init_emu_xive, + .init_kvm = spapr_irq_init_kvm_xive, }; /* @@ -428,14 +472,8 @@ static SpaprIrq *spapr_irq_current(SpaprMachineState *spapr) static void spapr_irq_init_dual(SpaprMachineState *spapr, int nr_irqs, Error **errp) { - MachineState *machine = MACHINE(spapr); Error *local_err = NULL; - if (kvm_enabled() && machine_kernel_irqchip_allowed(machine)) { - error_setg(errp, "No KVM support for the 'dual' machine"); - return; - } - spapr_irq_xics.init(spapr, spapr_irq_xics.nr_irqs, &local_err); if (local_err) { error_propagate(errp, local_err); @@ -514,6 +552,9 @@ static int spapr_irq_post_load_dual(SpaprMachineState *spapr, int version_id) * defaults to XICS at startup. */ if (spapr_ovec_test(spapr->ov5_cas, OV5_XIVE_EXPLOIT)) { + if (kvm_irqchip_in_kernel()) { + xics_kvm_disconnect(spapr, &error_fatal); + } spapr_irq_xive.reset(spapr, &error_fatal); } @@ -522,12 +563,30 @@ static int spapr_irq_post_load_dual(SpaprMachineState *spapr, int version_id) static void spapr_irq_reset_dual(SpaprMachineState *spapr, Error **errp) { + Error *local_err = NULL; + /* * Deactivate the XIVE MMIOs. The XIVE backend will reenable them * if selected. */ spapr_xive_mmio_set_enabled(spapr->xive, false); + /* Destroy all KVM devices */ + if (kvm_irqchip_in_kernel()) { + xics_kvm_disconnect(spapr, &local_err); + if (local_err) { + error_propagate(errp, local_err); + error_prepend(errp, "KVM XICS disconnect failed: "); + return; + } + kvmppc_xive_disconnect(spapr->xive, &local_err); + if (local_err) { + error_propagate(errp, local_err); + error_prepend(errp, "KVM XIVE disconnect failed: "); + return; + } + } + spapr_irq_current(spapr)->reset(spapr, errp); } @@ -565,6 +624,8 @@ SpaprIrq spapr_irq_dual = { .reset = spapr_irq_reset_dual, .set_irq = spapr_irq_set_irq_dual, .get_nodename = spapr_irq_get_nodename_dual, + .init_emu = NULL, /* should not be used */ + .init_kvm = NULL, /* should not be used */ }; @@ -763,6 +824,9 @@ SpaprIrq spapr_irq_xics_legacy = { .dt_populate = spapr_dt_xics, .cpu_intc_create = spapr_irq_cpu_intc_create_xics, .post_load = spapr_irq_post_load_xics, + .reset = spapr_irq_reset_xics, .set_irq = spapr_irq_set_irq_xics, .get_nodename = spapr_irq_get_nodename_xics, + .init_emu = spapr_irq_init_emu_xics, + .init_kvm = spapr_irq_init_kvm_xics, }; diff --git a/hw/ppc/spapr_rtas.c b/hw/ppc/spapr_rtas.c index ee24212..5bc1a93 100644 --- a/hw/ppc/spapr_rtas.c +++ b/hw/ppc/spapr_rtas.c @@ -177,6 +177,7 @@ static void rtas_start_cpu(PowerPCCPU *callcpu, SpaprMachineState *spapr, } else { lpcr &= ~(LPCR_UPRT | LPCR_GTSE | LPCR_HR); } + env->spr[SPR_PSSCR] &= ~PSSCR_EC; } ppc_store_lpcr(newcpu, lpcr); @@ -205,8 +206,11 @@ static void rtas_stop_self(PowerPCCPU *cpu, SpaprMachineState *spapr, /* Disable Power-saving mode Exit Cause exceptions for the CPU. * This could deliver an interrupt on a dying CPU and crash the - * guest */ + * guest. + * For the same reason, set PSSCR_EC. + */ ppc_store_lpcr(cpu, env->spr[SPR_LPCR] & ~pcc->lpcr_pm); + env->spr[SPR_PSSCR] |= PSSCR_EC; cs->halted = 1; kvmppc_set_reg_ppc_online(cpu, 0); qemu_cpu_kick(cs); diff --git a/include/hw/ppc/spapr.h b/include/hw/ppc/spapr.h index 7e32f30..4f5becf 100644 --- a/include/hw/ppc/spapr.h +++ b/include/hw/ppc/spapr.h @@ -119,6 +119,7 @@ struct SpaprMachineClass { bool pre_2_10_has_unused_icps; bool legacy_irq_allocation; bool broken_host_serial_model; /* present real host info to the guest */ + bool pre_4_1_migration; /* don't migrate hpt-max-page-size */ void (*phb_placement)(SpaprMachineState *spapr, uint32_t index, uint64_t *buid, hwaddr *pio, @@ -849,6 +850,7 @@ extern const VMStateDescription vmstate_spapr_cap_dfp; extern const VMStateDescription vmstate_spapr_cap_cfpc; extern const VMStateDescription vmstate_spapr_cap_sbbc; extern const VMStateDescription vmstate_spapr_cap_ibs; +extern const VMStateDescription vmstate_spapr_cap_hpt_maxpagesize; extern const VMStateDescription vmstate_spapr_cap_nested_kvm_hv; extern const VMStateDescription vmstate_spapr_cap_large_decr; extern const VMStateDescription vmstate_spapr_cap_ccf_assist; diff --git a/include/hw/ppc/spapr_irq.h b/include/hw/ppc/spapr_irq.h index b855f74..14cab73 100644 --- a/include/hw/ppc/spapr_irq.h +++ b/include/hw/ppc/spapr_irq.h @@ -48,6 +48,8 @@ typedef struct SpaprIrq { void (*reset)(SpaprMachineState *spapr, Error **errp); void (*set_irq)(void *opaque, int srcno, int val); const char *(*get_nodename)(SpaprMachineState *spapr); + void (*init_emu)(SpaprMachineState *spapr, Error **errp); + void (*init_kvm)(SpaprMachineState *spapr, Error **errp); } SpaprIrq; extern SpaprIrq spapr_irq_xics; diff --git a/include/hw/ppc/spapr_xive.h b/include/hw/ppc/spapr_xive.h index fc3e965..b26befc 100644 --- a/include/hw/ppc/spapr_xive.h +++ b/include/hw/ppc/spapr_xive.h @@ -38,16 +38,55 @@ typedef struct SpaprXive { /* TIMA mapping address */ hwaddr tm_base; MemoryRegion tm_mmio; + + /* KVM support */ + int fd; + void *tm_mmap; + VMChangeStateEntry *change; } SpaprXive; +/* + * The sPAPR machine has a unique XIVE IC device. Assign a fixed value + * to the controller block id value. It can nevertheless be changed + * for testing purpose. + */ +#define SPAPR_XIVE_BLOCK_ID 0x0 + bool spapr_xive_irq_claim(SpaprXive *xive, uint32_t lisn, bool lsi); bool spapr_xive_irq_free(SpaprXive *xive, uint32_t lisn); void spapr_xive_pic_print_info(SpaprXive *xive, Monitor *mon); +int spapr_xive_post_load(SpaprXive *xive, int version_id); void spapr_xive_hcall_init(SpaprMachineState *spapr); void spapr_dt_xive(SpaprMachineState *spapr, uint32_t nr_servers, void *fdt, uint32_t phandle); void spapr_xive_set_tctx_os_cam(XiveTCTX *tctx); void spapr_xive_mmio_set_enabled(SpaprXive *xive, bool enable); +void spapr_xive_map_mmio(SpaprXive *xive); + +int spapr_xive_end_to_target(uint8_t end_blk, uint32_t end_idx, + uint32_t *out_server, uint8_t *out_prio); +void spapr_xive_init(SpaprXive *xive, Error **errp); + +/* + * KVM XIVE device helpers + */ +void kvmppc_xive_connect(SpaprXive *xive, Error **errp); +void kvmppc_xive_disconnect(SpaprXive *xive, Error **errp); +void kvmppc_xive_reset(SpaprXive *xive, Error **errp); +void kvmppc_xive_set_source_config(SpaprXive *xive, uint32_t lisn, XiveEAS *eas, + Error **errp); +void kvmppc_xive_sync_source(SpaprXive *xive, uint32_t lisn, Error **errp); +uint64_t kvmppc_xive_esb_rw(XiveSource *xsrc, int srcno, uint32_t offset, + uint64_t data, bool write); +void kvmppc_xive_set_queue_config(SpaprXive *xive, uint8_t end_blk, + uint32_t end_idx, XiveEND *end, + Error **errp); +void kvmppc_xive_get_queue_config(SpaprXive *xive, uint8_t end_blk, + uint32_t end_idx, XiveEND *end, + Error **errp); +void kvmppc_xive_synchronize_state(SpaprXive *xive, Error **errp); +int kvmppc_xive_pre_save(SpaprXive *xive); +int kvmppc_xive_post_load(SpaprXive *xive, int version_id); #endif /* PPC_SPAPR_XIVE_H */ diff --git a/include/hw/ppc/xics.h b/include/hw/ppc/xics.h index eb65ad7..d6f8e4c 100644 --- a/include/hw/ppc/xics.h +++ b/include/hw/ppc/xics.h @@ -119,6 +119,7 @@ struct ICSState { uint32_t offset; ICSIRQState *irqs; XICSFabric *xics; + bool init; /* sPAPR ICS device initialized */ }; #define ICS_PROP_XICS "xics" diff --git a/include/hw/ppc/xics_spapr.h b/include/hw/ppc/xics_spapr.h index 15a8dcf..2476b54 100644 --- a/include/hw/ppc/xics_spapr.h +++ b/include/hw/ppc/xics_spapr.h @@ -34,6 +34,7 @@ void spapr_dt_xics(SpaprMachineState *spapr, uint32_t nr_servers, void *fdt, uint32_t phandle); int xics_kvm_init(SpaprMachineState *spapr, Error **errp); +void xics_kvm_disconnect(SpaprMachineState *spapr, Error **errp); void xics_spapr_init(SpaprMachineState *spapr); #endif /* XICS_SPAPR_H */ diff --git a/include/hw/ppc/xive.h b/include/hw/ppc/xive.h index c4f2774..d872f96 100644 --- a/include/hw/ppc/xive.h +++ b/include/hw/ppc/xive.h @@ -140,6 +140,7 @@ #ifndef PPC_XIVE_H #define PPC_XIVE_H +#include "sysemu/kvm.h" #include "hw/qdev-core.h" #include "hw/sysbus.h" #include "hw/ppc/xive_regs.h" @@ -194,6 +195,9 @@ typedef struct XiveSource { uint32_t esb_shift; MemoryRegion esb_mmio; + /* KVM support */ + void *esb_mmap; + XiveNotifier *xive; } XiveSource; @@ -423,4 +427,14 @@ static inline uint32_t xive_nvt_cam_line(uint8_t nvt_blk, uint32_t nvt_idx) return (nvt_blk << 19) | nvt_idx; } +/* + * KVM XIVE device helpers + */ + +void kvmppc_xive_source_reset_one(XiveSource *xsrc, int srcno, Error **errp); +void kvmppc_xive_source_set_irq(void *opaque, int srcno, int val); +void kvmppc_xive_cpu_connect(XiveTCTX *tctx, Error **errp); +void kvmppc_xive_cpu_synchronize_state(XiveTCTX *tctx, Error **errp); +void kvmppc_xive_cpu_get_state(XiveTCTX *tctx, Error **errp); + #endif /* PPC_XIVE_H */ diff --git a/include/hw/ppc/xive_regs.h b/include/hw/ppc/xive_regs.h index bf36678..1a8c5b5 100644 --- a/include/hw/ppc/xive_regs.h +++ b/include/hw/ppc/xive_regs.h @@ -208,6 +208,12 @@ typedef struct XiveEND { #define xive_end_is_backlog(end) (be32_to_cpu((end)->w0) & END_W0_BACKLOG) #define xive_end_is_escalate(end) (be32_to_cpu((end)->w0) & END_W0_ESCALATE_CTL) +static inline uint64_t xive_end_qaddr(XiveEND *end) +{ + return ((uint64_t) be32_to_cpu(end->w2) & 0x0fffffff) << 32 | + be32_to_cpu(end->w3); +} + /* Notification Virtual Target (NVT) */ typedef struct XiveNVT { uint32_t w0; diff --git a/include/hw/sysbus.h b/include/hw/sysbus.h index 1aedcf0..4c668fb 100644 --- a/include/hw/sysbus.h +++ b/include/hw/sysbus.h @@ -89,6 +89,7 @@ qemu_irq sysbus_get_connected_irq(SysBusDevice *dev, int n); void sysbus_mmio_map(SysBusDevice *dev, int n, hwaddr addr); void sysbus_mmio_map_overlap(SysBusDevice *dev, int n, hwaddr addr, int priority); +void sysbus_mmio_unmap(SysBusDevice *dev, int n); void sysbus_add_io(SysBusDevice *dev, hwaddr addr, MemoryRegion *mem); MemoryRegion *sysbus_address_space(SysBusDevice *dev); diff --git a/target/ppc/helper.h b/target/ppc/helper.h index 638a6e9..02b67a3 100644 --- a/target/ppc/helper.h +++ b/target/ppc/helper.h @@ -180,18 +180,6 @@ DEF_HELPER_3(vmuloub, void, avr, avr, avr) DEF_HELPER_3(vmulouh, void, avr, avr, avr) DEF_HELPER_3(vmulouw, void, avr, avr, avr) DEF_HELPER_3(vmuluwm, void, avr, avr, avr) -DEF_HELPER_3(vsrab, void, avr, avr, avr) -DEF_HELPER_3(vsrah, void, avr, avr, avr) -DEF_HELPER_3(vsraw, void, avr, avr, avr) -DEF_HELPER_3(vsrad, void, avr, avr, avr) -DEF_HELPER_3(vsrb, void, avr, avr, avr) -DEF_HELPER_3(vsrh, void, avr, avr, avr) -DEF_HELPER_3(vsrw, void, avr, avr, avr) -DEF_HELPER_3(vsrd, void, avr, avr, avr) -DEF_HELPER_3(vslb, void, avr, avr, avr) -DEF_HELPER_3(vslh, void, avr, avr, avr) -DEF_HELPER_3(vslw, void, avr, avr, avr) -DEF_HELPER_3(vsld, void, avr, avr, avr) DEF_HELPER_3(vslo, void, avr, avr, avr) DEF_HELPER_3(vsro, void, avr, avr, avr) DEF_HELPER_3(vsrv, void, avr, avr, avr) diff --git a/target/ppc/int_helper.c b/target/ppc/int_helper.c index 9af779a..8ce89f2 100644 --- a/target/ppc/int_helper.c +++ b/target/ppc/int_helper.c @@ -1791,23 +1791,6 @@ VSHIFT(l, 1) VSHIFT(r, 0) #undef VSHIFT -#define VSL(suffix, element, mask) \ - void helper_vsl##suffix(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b) \ - { \ - int i; \ - \ - for (i = 0; i < ARRAY_SIZE(r->element); i++) { \ - unsigned int shift = b->element[i] & mask; \ - \ - r->element[i] = a->element[i] << shift; \ - } \ - } -VSL(b, u8, 0x7) -VSL(h, u16, 0x0F) -VSL(w, u32, 0x1F) -VSL(d, u64, 0x3F) -#undef VSL - void helper_vslv(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b) { int i; @@ -1815,10 +1798,10 @@ void helper_vslv(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b) size = ARRAY_SIZE(r->u8); for (i = 0; i < size; i++) { - shift = b->u8[i] & 0x7; /* extract shift value */ - bytes = (a->u8[i] << 8) + /* extract adjacent bytes */ - (((i + 1) < size) ? a->u8[i + 1] : 0); - r->u8[i] = (bytes << shift) >> 8; /* shift and store result */ + shift = b->VsrB(i) & 0x7; /* extract shift value */ + bytes = (a->VsrB(i) << 8) + /* extract adjacent bytes */ + (((i + 1) < size) ? a->VsrB(i + 1) : 0); + r->VsrB(i) = (bytes << shift) >> 8; /* shift and store result */ } } @@ -1833,10 +1816,10 @@ void helper_vsrv(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b) * order will guarantee that computed result is not fed back. */ for (i = ARRAY_SIZE(r->u8) - 1; i >= 0; i--) { - shift = b->u8[i] & 0x7; /* extract shift value */ - bytes = ((i ? a->u8[i - 1] : 0) << 8) + a->u8[i]; + shift = b->VsrB(i) & 0x7; /* extract shift value */ + bytes = ((i ? a->VsrB(i - 1) : 0) << 8) + a->VsrB(i); /* extract adjacent bytes */ - r->u8[i] = (bytes >> shift) & 0xFF; /* shift and store result */ + r->VsrB(i) = (bytes >> shift) & 0xFF; /* shift and store result */ } } @@ -1980,26 +1963,6 @@ VNEG(vnegw, s32) VNEG(vnegd, s64) #undef VNEG -#define VSR(suffix, element, mask) \ - void helper_vsr##suffix(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b) \ - { \ - int i; \ - \ - for (i = 0; i < ARRAY_SIZE(r->element); i++) { \ - unsigned int shift = b->element[i] & mask; \ - r->element[i] = a->element[i] >> shift; \ - } \ - } -VSR(ab, s8, 0x7) -VSR(ah, s16, 0xF) -VSR(aw, s32, 0x1F) -VSR(ad, s64, 0x3F) -VSR(b, u8, 0x7) -VSR(h, u16, 0xF) -VSR(w, u32, 0x1F) -VSR(d, u64, 0x3F) -#undef VSR - void helper_vsro(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b) { int sh = (b->VsrB(0xf) >> 3) & 0xf; @@ -2053,7 +2016,7 @@ void helper_vsum2sws(CPUPPCState *env, ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b) for (i = 0; i < ARRAY_SIZE(r->u64); i++) { int64_t t = (int64_t)b->VsrSW(upper + i * 2); - result.VsrW(i) = 0; + result.VsrD(i) = 0; for (j = 0; j < ARRAY_SIZE(r->u64); j++) { t += a->VsrSW(2 * i + j); } diff --git a/target/ppc/kvm.c b/target/ppc/kvm.c index 02e22e2..3bf0a46 100644 --- a/target/ppc/kvm.c +++ b/target/ppc/kvm.c @@ -75,6 +75,7 @@ static int cap_fixup_hcalls; static int cap_htm; /* Hardware transactional memory support */ static int cap_mmu_radix; static int cap_mmu_hash_v3; +static int cap_xive; static int cap_resize_hpt; static int cap_ppc_pvr_compat; static int cap_ppc_safe_cache; @@ -146,6 +147,7 @@ int kvm_arch_init(MachineState *ms, KVMState *s) cap_htm = kvm_vm_check_extension(s, KVM_CAP_PPC_HTM); cap_mmu_radix = kvm_vm_check_extension(s, KVM_CAP_PPC_MMU_RADIX); cap_mmu_hash_v3 = kvm_vm_check_extension(s, KVM_CAP_PPC_MMU_HASH_V3); + cap_xive = kvm_vm_check_extension(s, KVM_CAP_PPC_IRQ_XIVE); cap_resize_hpt = kvm_vm_check_extension(s, KVM_CAP_SPAPR_RESIZE_HPT); kvmppc_get_cpu_characteristics(s); cap_ppc_nested_kvm_hv = kvm_vm_check_extension(s, KVM_CAP_PPC_NESTED_HV); @@ -1721,7 +1723,7 @@ int kvm_arch_handle_exit(CPUState *cs, struct kvm_run *run) trace_kvm_handle_dcr_write(); ret = kvmppc_handle_dcr_write(env, run->dcr.dcrn, run->dcr.data); } else { - trace_kvm_handle_drc_read(); + trace_kvm_handle_dcr_read(); ret = kvmppc_handle_dcr_read(env, run->dcr.dcrn, &run->dcr.data); } break; @@ -2478,6 +2480,11 @@ static int parse_cap_ppc_count_cache_flush_assist(struct kvm_ppc_cpu_char c) return 0; } +bool kvmppc_has_cap_xive(void) +{ + return cap_xive; +} + static void kvmppc_get_cpu_characteristics(KVMState *s) { struct kvm_ppc_cpu_char c; diff --git a/target/ppc/kvm_ppc.h b/target/ppc/kvm_ppc.h index 2238513..45776ca 100644 --- a/target/ppc/kvm_ppc.h +++ b/target/ppc/kvm_ppc.h @@ -60,6 +60,7 @@ bool kvmppc_has_cap_fixup_hcalls(void); bool kvmppc_has_cap_htm(void); bool kvmppc_has_cap_mmu_radix(void); bool kvmppc_has_cap_mmu_hash_v3(void); +bool kvmppc_has_cap_xive(void); int kvmppc_get_cap_safe_cache(void); int kvmppc_get_cap_safe_bounds_check(void); int kvmppc_get_cap_safe_indirect_branch(void); @@ -316,6 +317,11 @@ static inline bool kvmppc_has_cap_mmu_hash_v3(void) return false; } +static inline bool kvmppc_has_cap_xive(void) +{ + return false; +} + static inline int kvmppc_get_cap_safe_cache(void) { return 0; diff --git a/target/ppc/trace-events b/target/ppc/trace-events index 7b3cfe1..3dc6740 100644 --- a/target/ppc/trace-events +++ b/target/ppc/trace-events @@ -22,7 +22,7 @@ kvm_failed_put_vpa(void) "Warning: Unable to set VPA information to KVM" kvm_failed_get_vpa(void) "Warning: Unable to get VPA information from KVM" kvm_injected_interrupt(int irq) "injected interrupt %d" kvm_handle_dcr_write(void) "handle dcr write" -kvm_handle_drc_read(void) "handle dcr read" +kvm_handle_dcr_read(void) "handle dcr read" kvm_handle_halt(void) "handle halt" kvm_handle_papr_hcall(void) "handle PAPR hypercall" kvm_handle_epr(void) "handle epr" diff --git a/target/ppc/translate/vmx-impl.inc.c b/target/ppc/translate/vmx-impl.inc.c index 6861f4c..663275b 100644 --- a/target/ppc/translate/vmx-impl.inc.c +++ b/target/ppc/translate/vmx-impl.inc.c @@ -530,21 +530,21 @@ GEN_VXFORM(vmuleuw, 4, 10); GEN_VXFORM(vmulesb, 4, 12); GEN_VXFORM(vmulesh, 4, 13); GEN_VXFORM(vmulesw, 4, 14); -GEN_VXFORM(vslb, 2, 4); -GEN_VXFORM(vslh, 2, 5); -GEN_VXFORM(vslw, 2, 6); +GEN_VXFORM_V(vslb, MO_8, tcg_gen_gvec_shlv, 2, 4); +GEN_VXFORM_V(vslh, MO_16, tcg_gen_gvec_shlv, 2, 5); +GEN_VXFORM_V(vslw, MO_32, tcg_gen_gvec_shlv, 2, 6); GEN_VXFORM(vrlwnm, 2, 6); GEN_VXFORM_DUAL(vslw, PPC_ALTIVEC, PPC_NONE, \ vrlwnm, PPC_NONE, PPC2_ISA300) -GEN_VXFORM(vsld, 2, 23); -GEN_VXFORM(vsrb, 2, 8); -GEN_VXFORM(vsrh, 2, 9); -GEN_VXFORM(vsrw, 2, 10); -GEN_VXFORM(vsrd, 2, 27); -GEN_VXFORM(vsrab, 2, 12); -GEN_VXFORM(vsrah, 2, 13); -GEN_VXFORM(vsraw, 2, 14); -GEN_VXFORM(vsrad, 2, 15); +GEN_VXFORM_V(vsld, MO_64, tcg_gen_gvec_shlv, 2, 23); +GEN_VXFORM_V(vsrb, MO_8, tcg_gen_gvec_shrv, 2, 8); +GEN_VXFORM_V(vsrh, MO_16, tcg_gen_gvec_shrv, 2, 9); +GEN_VXFORM_V(vsrw, MO_32, tcg_gen_gvec_shrv, 2, 10); +GEN_VXFORM_V(vsrd, MO_64, tcg_gen_gvec_shrv, 2, 27); +GEN_VXFORM_V(vsrab, MO_8, tcg_gen_gvec_sarv, 2, 12); +GEN_VXFORM_V(vsrah, MO_16, tcg_gen_gvec_sarv, 2, 13); +GEN_VXFORM_V(vsraw, MO_32, tcg_gen_gvec_sarv, 2, 14); +GEN_VXFORM_V(vsrad, MO_64, tcg_gen_gvec_sarv, 2, 15); GEN_VXFORM(vsrv, 2, 28); GEN_VXFORM(vslv, 2, 29); GEN_VXFORM(vslo, 6, 16); diff --git a/target/ppc/translate/vsx-impl.inc.c b/target/ppc/translate/vsx-impl.inc.c index 11d9b75..199d22d 100644 --- a/target/ppc/translate/vsx-impl.inc.c +++ b/target/ppc/translate/vsx-impl.inc.c @@ -227,7 +227,7 @@ static void gen_lxvb16x(DisasContext *ctx) tcg_temp_free_i64(xtl); } -#define VSX_VECTOR_LOAD_STORE(name, op, indexed) \ +#define VSX_VECTOR_LOAD(name, op, indexed) \ static void gen_##name(DisasContext *ctx) \ { \ int xt; \ @@ -254,8 +254,6 @@ static void gen_##name(DisasContext *ctx) \ } \ xth = tcg_temp_new_i64(); \ xtl = tcg_temp_new_i64(); \ - get_cpu_vsrh(xth, xt); \ - get_cpu_vsrl(xtl, xt); \ gen_set_access_type(ctx, ACCESS_INT); \ EA = tcg_temp_new(); \ if (indexed) { \ @@ -281,10 +279,61 @@ static void gen_##name(DisasContext *ctx) \ tcg_temp_free_i64(xtl); \ } -VSX_VECTOR_LOAD_STORE(lxv, ld_i64, 0) -VSX_VECTOR_LOAD_STORE(stxv, st_i64, 0) -VSX_VECTOR_LOAD_STORE(lxvx, ld_i64, 1) -VSX_VECTOR_LOAD_STORE(stxvx, st_i64, 1) +VSX_VECTOR_LOAD(lxv, ld_i64, 0) +VSX_VECTOR_LOAD(lxvx, ld_i64, 1) + +#define VSX_VECTOR_STORE(name, op, indexed) \ +static void gen_##name(DisasContext *ctx) \ +{ \ + int xt; \ + TCGv EA; \ + TCGv_i64 xth; \ + TCGv_i64 xtl; \ + \ + if (indexed) { \ + xt = xT(ctx->opcode); \ + } else { \ + xt = DQxT(ctx->opcode); \ + } \ + \ + if (xt < 32) { \ + if (unlikely(!ctx->vsx_enabled)) { \ + gen_exception(ctx, POWERPC_EXCP_VSXU); \ + return; \ + } \ + } else { \ + if (unlikely(!ctx->altivec_enabled)) { \ + gen_exception(ctx, POWERPC_EXCP_VPU); \ + return; \ + } \ + } \ + xth = tcg_temp_new_i64(); \ + xtl = tcg_temp_new_i64(); \ + get_cpu_vsrh(xth, xt); \ + get_cpu_vsrl(xtl, xt); \ + gen_set_access_type(ctx, ACCESS_INT); \ + EA = tcg_temp_new(); \ + if (indexed) { \ + gen_addr_reg_index(ctx, EA); \ + } else { \ + gen_addr_imm_index(ctx, EA, 0x0F); \ + } \ + if (ctx->le_mode) { \ + tcg_gen_qemu_##op(xtl, EA, ctx->mem_idx, MO_LEQ); \ + tcg_gen_addi_tl(EA, EA, 8); \ + tcg_gen_qemu_##op(xth, EA, ctx->mem_idx, MO_LEQ); \ + } else { \ + tcg_gen_qemu_##op(xth, EA, ctx->mem_idx, MO_BEQ); \ + tcg_gen_addi_tl(EA, EA, 8); \ + tcg_gen_qemu_##op(xtl, EA, ctx->mem_idx, MO_BEQ); \ + } \ + tcg_temp_free(EA); \ + tcg_temp_free_i64(xth); \ + tcg_temp_free_i64(xtl); \ +} + +VSX_VECTOR_STORE(stxv, st_i64, 0) +VSX_VECTOR_STORE(stxvx, st_i64, 1) #ifdef TARGET_PPC64 #define VSX_VECTOR_LOAD_STORE_LENGTH(name) \ @@ -329,7 +378,6 @@ static void gen_##name(DisasContext *ctx) \ return; \ } \ xth = tcg_temp_new_i64(); \ - get_cpu_vsrh(xth, rD(ctx->opcode) + 32); \ gen_set_access_type(ctx, ACCESS_INT); \ EA = tcg_temp_new(); \ gen_addr_imm_index(ctx, EA, 0x03); \ @@ -513,8 +561,8 @@ static void gen_##name(DisasContext *ctx) \ tcg_temp_free_i64(xth); \ } -VSX_LOAD_SCALAR_DS(stxsd, st64_i64) -VSX_LOAD_SCALAR_DS(stxssp, st32fs) +VSX_STORE_SCALAR_DS(stxsd, st64_i64) +VSX_STORE_SCALAR_DS(stxssp, st32fs) static void gen_mfvsrwz(DisasContext *ctx) { @@ -858,8 +906,8 @@ static void glue(gen_, name)(DisasContext *ctx) \ xbh = tcg_temp_new_i64(); \ xbl = tcg_temp_new_i64(); \ sgm = tcg_temp_new_i64(); \ - set_cpu_vsrh(xB(ctx->opcode), xbh); \ - set_cpu_vsrl(xB(ctx->opcode), xbl); \ + get_cpu_vsrh(xbh, xB(ctx->opcode)); \ + get_cpu_vsrl(xbl, xB(ctx->opcode)); \ tcg_gen_movi_i64(sgm, sgn_mask); \ switch (op) { \ case OP_ABS: { \ @@ -1192,7 +1240,7 @@ static void gen_xxbrq(DisasContext *ctx) tcg_gen_bswap64_i64(xtl, xbh); set_cpu_vsrl(xT(ctx->opcode), xtl); tcg_gen_mov_i64(xth, t0); - set_cpu_vsrl(xT(ctx->opcode), xth); + set_cpu_vsrh(xT(ctx->opcode), xth); tcg_temp_free_i64(t0); tcg_temp_free_i64(xth); @@ -1220,7 +1268,7 @@ static void gen_xxbrw(DisasContext *ctx) get_cpu_vsrl(xbl, xB(ctx->opcode)); gen_bswap32x4(xth, xtl, xbh, xbl); - set_cpu_vsrl(xT(ctx->opcode), xth); + set_cpu_vsrh(xT(ctx->opcode), xth); set_cpu_vsrl(xT(ctx->opcode), xtl); tcg_temp_free_i64(xth); @@ -1355,13 +1403,13 @@ static void gen_xxspltib(DisasContext *ctx) int rt = xT(ctx->opcode); if (rt < 32) { - if (unlikely(!ctx->altivec_enabled)) { - gen_exception(ctx, POWERPC_EXCP_VPU); + if (unlikely(!ctx->vsx_enabled)) { + gen_exception(ctx, POWERPC_EXCP_VSXU); return; } } else { - if (unlikely(!ctx->vsx_enabled)) { - gen_exception(ctx, POWERPC_EXCP_VSXU); + if (unlikely(!ctx->altivec_enabled)) { + gen_exception(ctx, POWERPC_EXCP_VPU); return; } } @@ -1820,7 +1868,7 @@ static void gen_xvxsigdp(DisasContext *ctx) tcg_gen_movi_i64(t0, 0x0010000000000000); tcg_gen_movcond_i64(TCG_COND_EQ, t0, exp, zr, zr, t0); tcg_gen_movcond_i64(TCG_COND_EQ, t0, exp, nan, zr, t0); - tcg_gen_deposit_i64(xth, t0, xbl, 0, 52); + tcg_gen_deposit_i64(xtl, t0, xbl, 0, 52); set_cpu_vsrl(xT(ctx->opcode), xtl); tcg_temp_free_i64(t0); diff --git a/tests/docker/Makefile.include b/tests/docker/Makefile.include index c0e1bf5..aaf5396 100644 --- a/tests/docker/Makefile.include +++ b/tests/docker/Makefile.include @@ -107,6 +107,7 @@ docker-image-debian-sparc64-cross: docker-image-debian-sid docker-image-debian-mips64-cross: docker-image-debian-sid docker-image-debian-riscv64-cross: docker-image-debian-sid docker-image-debian-powerpc-cross: docker-image-debian-sid +docker-image-debian-ppc64-cross: docker-image-debian-sid docker-image-travis: NOUSER=1 # Specialist build images, sometimes very limited tools diff --git a/tests/docker/dockerfiles/debian-ppc64-cross.docker b/tests/docker/dockerfiles/debian-ppc64-cross.docker new file mode 100644 index 0000000..7f239c3 --- /dev/null +++ b/tests/docker/dockerfiles/debian-ppc64-cross.docker @@ -0,0 +1,11 @@ +# +# Docker ppc64 cross-compiler target +# +# This docker target builds on the debian sid base image which +# contains cross compilers for Debian "ports" targets. +FROM qemu:debian-sid + +RUN DEBIAN_FRONTEND=noninteractive eatmydata \ + apt-get install -y --no-install-recommends \ + gcc-powerpc64-linux-gnu \ + libc6-dev-ppc64-cross || { echo "Failed to build - see debian-sid.docker notes"; exit 1; } diff --git a/tests/tcg/ppc/Makefile.include b/tests/tcg/ppc/Makefile.include index b062c30..ae01fb8 100644 --- a/tests/tcg/ppc/Makefile.include +++ b/tests/tcg/ppc/Makefile.include @@ -1,6 +1,9 @@ ifeq ($(TARGET_NAME),ppc) DOCKER_IMAGE=debian-powerpc-cross DOCKER_CROSS_COMPILER=powerpc-linux-gnu-gcc +else ifeq ($(TARGET_NAME),ppc64) +DOCKER_IMAGE=debian-ppc64-cross +DOCKER_CROSS_COMPILER=powerpc64-linux-gnu-gcc else ifeq ($(TARGET_NAME),ppc64le) DOCKER_IMAGE=debian-ppc64el-cross DOCKER_CROSS_COMPILER=powerpc64le-linux-gnu-gcc |