From fc8c745d50150a63f6c5ba2cd0b83b430963b7e8 Mon Sep 17 00:00:00 2001 From: Alexey Kardashevskiy Date: Fri, 25 Jun 2021 15:51:55 +1000 Subject: spapr: Implement Open Firmware client interface The PAPR platform describes an OS environment that's presented by a combination of a hypervisor and firmware. The features it specifies require collaboration between the firmware and the hypervisor. Since the beginning, the runtime component of the firmware (RTAS) has been implemented as a 20 byte shim which simply forwards it to a hypercall implemented in qemu. The boot time firmware component is SLOF - but a build that's specific to qemu, and has always needed to be updated in sync with it. Even though we've managed to limit the amount of runtime communication we need between qemu and SLOF, there's some, and it has become increasingly awkward to handle as we've implemented new features. This implements a boot time OF client interface (CI) which is enabled by a new "x-vof" pseries machine option (stands for "Virtual Open Firmware). When enabled, QEMU implements the custom H_OF_CLIENT hcall which implements Open Firmware Client Interface (OF CI). This allows using a smaller stateless firmware which does not have to manage the device tree. The new "vof.bin" firmware image is included with source code under pc-bios/. It also includes RTAS blob. This implements a handful of CI methods just to get -kernel/-initrd working. In particular, this implements the device tree fetching and simple memory allocator - "claim" (an OF CI memory allocator) and updates "/memory@0/available" to report the client about available memory. This implements changing some device tree properties which we know how to deal with, the rest is ignored. To allow changes, this skips fdt_pack() when x-vof=on as not packing the blob leaves some room for appending. In absence of SLOF, this assigns phandles to device tree nodes to make device tree traversing work. When x-vof=on, this adds "/chosen" every time QEMU (re)builds a tree. This adds basic instances support which are managed by a hash map ihandle -> [phandle]. Before the guest started, the used memory is: 0..e60 - the initial firmware 8000..10000 - stack 400000.. - kernel 3ea0000.. - initramdisk This OF CI does not implement "interpret". Unlike SLOF, this does not format uninitialized nvram. Instead, this includes a disk image with pre-formatted nvram. With this basic support, this can only boot into kernel directly. However this is just enough for the petitboot kernel and initradmdisk to boot from any possible source. Note this requires reasonably recent guest kernel with: https://git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git/commit/?id=df5be5be8735 The immediate benefit is much faster booting time which especially crucial with fully emulated early CPU bring up environments. Also this may come handy when/if GRUB-in-the-userspace sees light of the day. This separates VOF and sPAPR in a hope that VOF bits may be reused by other POWERPC boards which do not support pSeries. This assumes potential support for booting from QEMU backends such as blockdev or netdev without devices/drivers used. Signed-off-by: Alexey Kardashevskiy Message-Id: <20210625055155.2252896-1-aik@ozlabs.ru> Reviewed-by: BALATON Zoltan [dwg: Adjusted some includes which broke compile in some more obscure compilation setups] Signed-off-by: David Gibson --- pc-bios/vof/Makefile | 23 +++++++++++++ pc-bios/vof/bootmem.c | 14 ++++++++ pc-bios/vof/ci.c | 91 ++++++++++++++++++++++++++++++++++++++++++++++++++ pc-bios/vof/entry.S | 49 +++++++++++++++++++++++++++ pc-bios/vof/libc.c | 92 +++++++++++++++++++++++++++++++++++++++++++++++++++ pc-bios/vof/main.c | 21 ++++++++++++ pc-bios/vof/vof.h | 43 ++++++++++++++++++++++++ pc-bios/vof/vof.lds | 48 +++++++++++++++++++++++++++ 8 files changed, 381 insertions(+) create mode 100644 pc-bios/vof/Makefile create mode 100644 pc-bios/vof/bootmem.c create mode 100644 pc-bios/vof/ci.c create mode 100644 pc-bios/vof/entry.S create mode 100644 pc-bios/vof/libc.c create mode 100644 pc-bios/vof/main.c create mode 100644 pc-bios/vof/vof.h create mode 100644 pc-bios/vof/vof.lds (limited to 'pc-bios/vof') diff --git a/pc-bios/vof/Makefile b/pc-bios/vof/Makefile new file mode 100644 index 0000000..aa1678c --- /dev/null +++ b/pc-bios/vof/Makefile @@ -0,0 +1,23 @@ +all: build-all + +build-all: vof.bin + +CROSS ?= +CC = $(CROSS)gcc +LD = $(CROSS)ld +OBJCOPY = $(CROSS)objcopy + +%.o: %.S + $(CC) -m32 -mbig-endian -mcpu=power4 -c -o $@ $< + +%.o: %.c + $(CC) -m32 -mbig-endian -mcpu=power4 -c -fno-stack-protector -o $@ $< + +vof.elf: entry.o main.o ci.o bootmem.o libc.o + $(LD) -nostdlib -e_start -Tvof.lds -EB -o $@ $^ + +%.bin: %.elf + $(OBJCOPY) -O binary -j .text -j .data -j .toc -j .got2 $^ $@ + +clean: + rm -f *.o vof.bin vof.elf *~ diff --git a/pc-bios/vof/bootmem.c b/pc-bios/vof/bootmem.c new file mode 100644 index 0000000..771b9e9 --- /dev/null +++ b/pc-bios/vof/bootmem.c @@ -0,0 +1,14 @@ +#include "vof.h" + +void boot_from_memory(uint64_t initrd, uint64_t initrdsize) +{ + uint64_t kern[2]; + phandle chosen = ci_finddevice("/chosen"); + + if (ci_getprop(chosen, "qemu,boot-kernel", kern, sizeof(kern)) != + sizeof(kern)) { + return; + } + + do_boot(kern[0], initrd, initrdsize); +} diff --git a/pc-bios/vof/ci.c b/pc-bios/vof/ci.c new file mode 100644 index 0000000..2b56050 --- /dev/null +++ b/pc-bios/vof/ci.c @@ -0,0 +1,91 @@ +#include "vof.h" + +struct prom_args { + uint32_t service; + uint32_t nargs; + uint32_t nret; + uint32_t args[10]; +}; + +typedef unsigned long prom_arg_t; + +#define ADDR(x) (uint32_t)(x) + +static int prom_handle(struct prom_args *pargs) +{ + void *rtasbase; + uint32_t rtassize = 0; + phandle rtas; + + if (strcmp("call-method", (void *)(unsigned long)pargs->service)) { + return -1; + } + + if (strcmp("instantiate-rtas", (void *)(unsigned long)pargs->args[0])) { + return -1; + } + + rtas = ci_finddevice("/rtas"); + /* rtas-size is set by QEMU depending of FWNMI support */ + ci_getprop(rtas, "rtas-size", &rtassize, sizeof(rtassize)); + if (rtassize < hv_rtas_size) { + return -1; + } + + rtasbase = (void *)(unsigned long) pargs->args[2]; + + memcpy(rtasbase, hv_rtas, hv_rtas_size); + pargs->args[pargs->nargs] = 0; + pargs->args[pargs->nargs + 1] = pargs->args[2]; + + return 0; +} + +void prom_entry(uint32_t args) +{ + if (prom_handle((void *)(unsigned long) args)) { + ci_entry(args); + } +} + +static int call_ci(const char *service, int nargs, int nret, ...) +{ + int i; + struct prom_args args; + va_list list; + + args.service = ADDR(service); + args.nargs = nargs; + args.nret = nret; + + va_start(list, nret); + for (i = 0; i < nargs; i++) { + args.args[i] = va_arg(list, prom_arg_t); + } + va_end(list); + + for (i = 0; i < nret; i++) { + args.args[nargs + i] = 0; + } + + if (ci_entry((uint32_t)(&args)) < 0) { + return PROM_ERROR; + } + + return (nret > 0) ? args.args[nargs] : 0; +} + +void ci_panic(const char *str) +{ + call_ci("exit", 0, 0); +} + +phandle ci_finddevice(const char *path) +{ + return call_ci("finddevice", 1, 1, path); +} + +uint32_t ci_getprop(phandle ph, const char *propname, void *prop, int len) +{ + return call_ci("getprop", 4, 1, ph, propname, prop, len); +} diff --git a/pc-bios/vof/entry.S b/pc-bios/vof/entry.S new file mode 100644 index 0000000..10a101f --- /dev/null +++ b/pc-bios/vof/entry.S @@ -0,0 +1,49 @@ +#define LOAD32(rn, name) \ + lis rn,name##@h; \ + ori rn,rn,name##@l + +#define ENTRY(func_name) \ + .text; \ + .align 2; \ + .globl .func_name; \ + .func_name: \ + .globl func_name; \ + func_name: + +#define KVMPPC_HCALL_BASE 0xf000 +#define KVMPPC_H_RTAS (KVMPPC_HCALL_BASE + 0x0) +#define KVMPPC_H_VOF_CLIENT (KVMPPC_HCALL_BASE + 0x5) + + . = 0x100 /* Do exactly as SLOF does */ + +ENTRY(_start) + LOAD32(2, __toc_start) + b entry_c + +ENTRY(_prom_entry) + LOAD32(2, __toc_start) + stwu %r1,-112(%r1) + stw %r31,104(%r1) + mflr %r31 + bl prom_entry + nop + mtlr %r31 + lwz %r31,104(%r1) + addi %r1,%r1,112 + blr + +ENTRY(ci_entry) + mr 4,3 + LOAD32(3,KVMPPC_H_VOF_CLIENT) + sc 1 + blr + +/* This is the actual RTAS blob copied to the OS at instantiate-rtas */ +ENTRY(hv_rtas) + mr %r4,%r3 + LOAD32(3,KVMPPC_H_RTAS) + sc 1 + blr + .globl hv_rtas_size +hv_rtas_size: + .long . - hv_rtas; diff --git a/pc-bios/vof/libc.c b/pc-bios/vof/libc.c new file mode 100644 index 0000000..00c10e6 --- /dev/null +++ b/pc-bios/vof/libc.c @@ -0,0 +1,92 @@ +#include "vof.h" + +int strlen(const char *s) +{ + int len = 0; + + while (*s != 0) { + len += 1; + s += 1; + } + + return len; +} + +int strcmp(const char *s1, const char *s2) +{ + while (*s1 != 0 && *s2 != 0) { + if (*s1 != *s2) { + break; + } + s1 += 1; + s2 += 1; + } + + return *s1 - *s2; +} + +void *memcpy(void *dest, const void *src, size_t n) +{ + char *cdest; + const char *csrc = src; + + cdest = dest; + while (n-- > 0) { + *cdest++ = *csrc++; + } + + return dest; +} + +int memcmp(const void *ptr1, const void *ptr2, size_t n) +{ + const unsigned char *p1 = ptr1; + const unsigned char *p2 = ptr2; + + while (n-- > 0) { + if (*p1 != *p2) { + return *p1 - *p2; + } + p1 += 1; + p2 += 1; + } + + return 0; +} + +void *memmove(void *dest, const void *src, size_t n) +{ + char *cdest; + const char *csrc; + int i; + + /* Do the buffers overlap in a bad way? */ + if (src < dest && src + n >= dest) { + /* Copy from end to start */ + cdest = dest + n - 1; + csrc = src + n - 1; + for (i = 0; i < n; i++) { + *cdest-- = *csrc--; + } + } else { + /* Normal copy is possible */ + cdest = dest; + csrc = src; + for (i = 0; i < n; i++) { + *cdest++ = *csrc++; + } + } + + return dest; +} + +void *memset(void *dest, int c, size_t size) +{ + unsigned char *d = (unsigned char *)dest; + + while (size-- > 0) { + *d++ = (unsigned char)c; + } + + return dest; +} diff --git a/pc-bios/vof/main.c b/pc-bios/vof/main.c new file mode 100644 index 0000000..9fc30d2 --- /dev/null +++ b/pc-bios/vof/main.c @@ -0,0 +1,21 @@ +#include "vof.h" + +void do_boot(unsigned long addr, unsigned long _r3, unsigned long _r4) +{ + register unsigned long r3 __asm__("r3") = _r3; + register unsigned long r4 __asm__("r4") = _r4; + register unsigned long r5 __asm__("r5") = (unsigned long) _prom_entry; + + ((client *)(uint32_t)addr)(); +} + +void entry_c(void) +{ + register unsigned long r3 __asm__("r3"); + register unsigned long r4 __asm__("r4"); + register unsigned long r5 __asm__("r5"); + uint64_t initrd = r3, initrdsize = r4; + + boot_from_memory(initrd, initrdsize); + ci_panic("*** No boot target ***\n"); +} diff --git a/pc-bios/vof/vof.h b/pc-bios/vof/vof.h new file mode 100644 index 0000000..2d89580 --- /dev/null +++ b/pc-bios/vof/vof.h @@ -0,0 +1,43 @@ +/* + * Virtual Open Firmware + * + * SPDX-License-Identifier: GPL-2.0-or-later + */ +#include + +typedef unsigned char uint8_t; +typedef unsigned short uint16_t; +typedef unsigned long uint32_t; +typedef unsigned long long uint64_t; +#define NULL (0) +#define PROM_ERROR (-1u) +typedef unsigned long ihandle; +typedef unsigned long phandle; +typedef int size_t; +typedef void client(void); + +/* globals */ +extern void _prom_entry(void); /* OF CI entry point (i.e. this firmware) */ + +void do_boot(unsigned long addr, unsigned long r3, unsigned long r4); + +/* libc */ +int strlen(const char *s); +int strcmp(const char *s1, const char *s2); +void *memcpy(void *dest, const void *src, size_t n); +int memcmp(const void *ptr1, const void *ptr2, size_t n); +void *memmove(void *dest, const void *src, size_t n); +void *memset(void *dest, int c, size_t size); + +/* CI wrappers */ +void ci_panic(const char *str); +phandle ci_finddevice(const char *path); +uint32_t ci_getprop(phandle ph, const char *propname, void *prop, int len); + +/* booting from -kernel */ +void boot_from_memory(uint64_t initrd, uint64_t initrdsize); + +/* Entry points for CI and RTAS */ +extern uint32_t ci_entry(uint32_t params); +extern unsigned long hv_rtas(unsigned long params); +extern unsigned int hv_rtas_size; diff --git a/pc-bios/vof/vof.lds b/pc-bios/vof/vof.lds new file mode 100644 index 0000000..1506ab4 --- /dev/null +++ b/pc-bios/vof/vof.lds @@ -0,0 +1,48 @@ +OUTPUT_FORMAT("elf32-powerpc") +OUTPUT_ARCH(powerpc:common) + +/* set the entry point */ +ENTRY ( __start ) + +SECTIONS { + __executable_start = .; + + .text : { + *(.text) + } + + __etext = .; + + . = ALIGN(8); + + .data : { + *(.data) + *(.rodata .rodata.*) + *(.got1) + *(.sdata) + *(.opd) + } + + /* FIXME bss at end ??? */ + + . = ALIGN(8); + __bss_start = .; + .bss : { + *(.sbss) *(.scommon) + *(.dynbss) + *(.bss) + } + + . = ALIGN(8); + __bss_end = .; + __bss_size = (__bss_end - __bss_start); + + . = ALIGN(256); + __toc_start = DEFINED (.TOC.) ? .TOC. : ADDR (.got) + 0x8000; + .got : + { + *(.toc .got) + } + . = ALIGN(8); + __toc_end = .; +} -- cgit v1.1