From b9eba474fb52dbf205c7b466af458397a7487216 Mon Sep 17 00:00:00 2001 From: Andrew Waterman Date: Wed, 17 Mar 2021 00:30:12 -0700 Subject: pk: support >2 GiB of user memory for RV64 Previously, the pk would always run from virtual address MEM_START. Instead, remap it into the negative virtual addresses, allowing user processes to expand beyond MEM_START. --- pk/console.c | 21 ++++++++++++++------- pk/frontend.c | 3 ++- pk/handlers.c | 4 +++- pk/mmap.c | 35 +++++++++++++++++++++-------------- pk/mmap.h | 9 +++++++-- pk/pk.c | 36 +++++++++++++++++++++++++++--------- pk/syscall.c | 2 ++ 7 files changed, 76 insertions(+), 34 deletions(-) (limited to 'pk') diff --git a/pk/console.c b/pk/console.c index ec84981..3d3edae 100644 --- a/pk/console.c +++ b/pk/console.c @@ -1,6 +1,7 @@ // See LICENSE for license details. #include "pk.h" +#include "mmap.h" #include "file.h" #include "frontend.h" #include @@ -23,21 +24,27 @@ void printk(const char* s, ...) va_end(vl); } -void dump_tf(trapframe_t* tf) +static const char* get_regname(int r) { - static const char* regnames[] = { - "z ", "ra", "sp", "gp", "tp", "t0", "t1", "t2", - "s0", "s1", "a0", "a1", "a2", "a3", "a4", "a5", - "a6", "a7", "s2", "s3", "s4", "s5", "s6", "s7", - "s8", "s9", "sA", "sB", "t3", "t4", "t5", "t6" + static const char regnames[] = { + "z \0" "ra\0" "sp\0" "gp\0" "tp\0" "t0\0" "t1\0" "t2\0" + "s0\0" "s1\0" "a0\0" "a1\0" "a2\0" "a3\0" "a4\0" "a5\0" + "a6\0" "a7\0" "s2\0" "s3\0" "s4\0" "s5\0" "s6\0" "s7\0" + "s8\0" "s9\0" "sA\0" "sB\0" "t3\0" "t4\0" "t5\0" "t6" }; + return ®names[r * 3]; +} + +void dump_tf(trapframe_t* tf) +{ + tf->gpr[0] = 0; for(int i = 0; i < 32; i+=4) { for(int j = 0; j < 4; j++) - printk("%s %lx%c",regnames[i+j],tf->gpr[i+j],j < 3 ? ' ' : '\n'); + printk("%s %lx%c", get_regname(i+j), tf->gpr[i+j], j < 3 ? ' ' : '\n'); } printk("pc %lx va %lx insn %x sr %lx\n", tf->epc, tf->badvaddr, (uint32_t)tf->insn, tf->status); diff --git a/pk/frontend.c b/pk/frontend.c index a02cb89..cbe5377 100644 --- a/pk/frontend.c +++ b/pk/frontend.c @@ -5,6 +5,7 @@ #include "frontend.h" #include "syscall.h" #include "htif.h" +#include "mmap.h" #include long frontend_syscall(long n, uint64_t a0, uint64_t a1, uint64_t a2, uint64_t a3, uint64_t a4, uint64_t a5, uint64_t a6) @@ -23,7 +24,7 @@ long frontend_syscall(long n, uint64_t a0, uint64_t a1, uint64_t a2, uint64_t a3 magic_mem[6] = a5; magic_mem[7] = a6; - htif_syscall((uintptr_t)magic_mem); + htif_syscall(kva2pa_maybe(magic_mem)); long ret = magic_mem[0]; diff --git a/pk/handlers.c b/pk/handlers.c index cb12d22..efd14f1 100644 --- a/pk/handlers.c +++ b/pk/handlers.c @@ -122,5 +122,7 @@ void handle_trap(trapframe_t* tf) kassert(tf->cause < ARRAY_SIZE(trap_handlers) && trap_handlers[tf->cause]); - trap_handlers[tf->cause](tf); + trap_handler f = (void*)pa2kva(trap_handlers[tf->cause]); + + f(tf); } diff --git a/pk/mmap.c b/pk/mmap.c index 33f6d50..3351295 100644 --- a/pk/mmap.c +++ b/pk/mmap.c @@ -10,6 +10,8 @@ #include #include +uintptr_t kva2pa_offset; + typedef struct vmr_t { struct vmr_t* next; uintptr_t addr; @@ -68,7 +70,9 @@ static uintptr_t __page_alloc() node->next = NULL; page_freelist_depth--; - return (uintptr_t)memset((void*)addr, 0, RISCV_PGSIZE); + memset((void*)pa2kva(addr), 0, RISCV_PGSIZE); + + return addr; } static uintptr_t __page_alloc_assert() @@ -103,7 +107,7 @@ static vmr_t* __vmr_alloc(uintptr_t addr, size_t length, file_t* file, size_t offset, unsigned refcnt, int prot) { if (vmr_freelist_head == NULL) { - vmr_t* new_vmrs = (vmr_t*)__page_alloc(); + vmr_t* new_vmrs = (vmr_t*)pa2kva(__page_alloc()); if (new_vmrs == NULL) return NULL; @@ -161,7 +165,7 @@ static size_t pt_idx(uintptr_t addr, int level) static pte_t* __walk_internal(uintptr_t addr, int create, int level) { - pte_t* t = root_page_table; + pte_t* t = (pte_t*)pa2kva(root_page_table); for (int i = RISCV_PGLEVELS - 1; i > level; i--) { size_t idx = pt_idx(addr, i); if (unlikely(!(t[idx] & PTE_V))) { @@ -174,7 +178,7 @@ static pte_t* __walk_internal(uintptr_t addr, int create, int level) return 0; } } - t = (pte_t*)(pte_ppn(t[idx]) << RISCV_PGSHIFT); + t = (pte_t*)pa2kva(pte_ppn(t[idx]) << RISCV_PGSHIFT); } return &t[pt_idx(addr, level)]; } @@ -251,8 +255,8 @@ static int __handle_page_fault(uintptr_t vaddr, int prot) return -1; else if (!(*pte & PTE_V)) { - uintptr_t kva = __page_alloc_assert(); - uintptr_t ppn = kva / RISCV_PGSIZE; + uintptr_t ppn = __page_alloc_assert() / RISCV_PGSIZE; + uintptr_t kva = pa2kva(ppn * RISCV_PGSIZE); vmr_t* v = (vmr_t*)*pte; *pte = pte_create(ppn, prot_to_type(PROT_READ|PROT_WRITE, 0)); @@ -488,6 +492,10 @@ void populate_mapping(const void* start, size_t size, int prot) uintptr_t pk_vm_init() { + // PA space must fit within half of VA space + uintptr_t user_size = -KVA_START; + mem_size = MIN(mem_size, user_size); + extern char _end; first_free_page = ROUNDUP((uintptr_t)&_end, RISCV_PGSIZE); free_pages = (mem_size - (first_free_page - MEM_START)) / RISCV_PGSIZE; @@ -498,19 +506,18 @@ uintptr_t pk_vm_init() next_free_page = freelist_node_array_size / RISCV_PGSIZE; root_page_table = (void*)__page_alloc_assert(); - __map_kernel_range(MEM_START, MEM_START, mem_size, PROT_READ|PROT_WRITE|PROT_EXEC); + __map_kernel_range(KVA_START, MEM_START, mem_size, PROT_READ|PROT_WRITE|PROT_EXEC); - current.mmap_max = current.brk_max = MEM_START; - - size_t mem_pages = mem_size >> RISCV_PGSHIFT; - size_t stack_size = MIN(mem_pages >> 5, 2048) * RISCV_PGSIZE; - size_t stack_bottom = __do_mmap(current.mmap_max - stack_size, stack_size, PROT_READ|PROT_WRITE|PROT_EXEC, MAP_PRIVATE|MAP_ANONYMOUS|MAP_FIXED, 0, 0); - kassert(stack_bottom != (uintptr_t)-1); - current.stack_top = stack_bottom + stack_size; + current.mmap_max = current.brk_max = user_size; flush_tlb(); write_csr(sptbr, ((uintptr_t)root_page_table >> RISCV_PGSHIFT) | SATP_MODE_CHOICE); uintptr_t kernel_stack_top = __page_alloc_assert() + RISCV_PGSIZE; + + // relocate + kva2pa_offset = KVA_START - MEM_START; + page_freelist_node_array = (void*)pa2kva(page_freelist_node_array); + return kernel_stack_top; } diff --git a/pk/mmap.h b/pk/mmap.h index 9e3fad6..7e68777 100644 --- a/pk/mmap.h +++ b/pk/mmap.h @@ -34,7 +34,12 @@ uintptr_t do_mremap(uintptr_t addr, size_t old_size, size_t new_size, int flags) uintptr_t do_mprotect(uintptr_t addr, size_t length, int prot); uintptr_t do_brk(uintptr_t addr); -#define kva2pa(va) ((uintptr_t)(va)) -#define is_uva(va) ((uintptr_t)(va) < MEM_START) +#define KVA_START ((uintptr_t)-1 << (VA_BITS-1)) + +extern uintptr_t kva2pa_offset; +#define kva2pa(va) ((uintptr_t)(va) - kva2pa_offset) +#define pa2kva(pa) ((uintptr_t)(pa) + kva2pa_offset) +#define kva2pa_maybe(va) ((uintptr_t)(va) >= KVA_START ? kva2pa(va) : (uintptr_t)(va)) +#define is_uva(va) ((uintptr_t)(va) < KVA_START) #endif diff --git a/pk/pk.c b/pk/pk.c index cb03c70..012b697 100644 --- a/pk/pk.c +++ b/pk/pk.c @@ -6,6 +6,7 @@ #include "elf.h" #include "mtrap.h" #include "frontend.h" +#include "bits.h" #include "usermem.h" #include @@ -67,11 +68,11 @@ static size_t parse_args(arg_buf* args) uint64_t* pk_argv = &args->buf[1]; // pk_argv[0] is the proxy kernel itself. skip it and any flags. size_t pk_argc = args->buf[0], arg = 1; - for ( ; arg < pk_argc && *(char*)(uintptr_t)pk_argv[arg] == '-'; arg++) - handle_option((const char*)(uintptr_t)pk_argv[arg]); + for ( ; arg < pk_argc && *(char*)pa2kva(pk_argv[arg]) == '-'; arg++) + handle_option((const char*)pa2kva(pk_argv[arg])); for (size_t i = 0; arg + i < pk_argc; i++) - args->argv[i] = (char*)(uintptr_t)pk_argv[arg + i]; + args->argv[i] = (char*)pa2kva(pk_argv[arg + i]); return pk_argc - arg; } @@ -85,6 +86,12 @@ static void init_tf(trapframe_t* tf, long pc, long sp) static void run_loaded_program(size_t argc, char** argv, uintptr_t kstack_top) { + size_t mem_pages = mem_size >> RISCV_PGSHIFT; + size_t stack_size = MIN(mem_pages >> 5, 2048) * RISCV_PGSIZE; + size_t stack_bottom = __do_mmap(current.mmap_max - stack_size, stack_size, PROT_READ|PROT_WRITE|PROT_EXEC, MAP_PRIVATE|MAP_ANONYMOUS|MAP_FIXED, 0, 0); + kassert(stack_bottom != (uintptr_t)-1); + current.stack_top = stack_bottom + stack_size; + // copy phdrs to user stack size_t stack_top = current.stack_top - current.phdr_size; memcpy_to_user((void*)stack_top, (void*)current.phdr, current.phdr_size); @@ -167,15 +174,25 @@ static void run_loaded_program(size_t argc, char** argv, uintptr_t kstack_top) start_user(&tf); } -static void rest_of_boot_loader(uintptr_t kstack_top) +void rest_of_boot_loader(uintptr_t kstack_top); + +asm ("\n\ + .globl rest_of_boot_loader\n\ +rest_of_boot_loader:\n\ + mv sp, a0\n\ + tail rest_of_boot_loader_2"); + +void rest_of_boot_loader_2(uintptr_t kstack_top) { - arg_buf args; + file_init(); + + static arg_buf args; // avoid large stack allocation size_t argc = parse_args(&args); if (!argc) panic("tell me what ELF to load!"); // load program named by argv[0] - long phdrs[128]; + static long phdrs[128]; // avoid large stack allocation current.phdr = (uintptr_t)phdrs; current.phdr_size = sizeof(phdrs); load_elf(args.argv[0], ¤t); @@ -185,14 +202,15 @@ static void rest_of_boot_loader(uintptr_t kstack_top) void boot_loader(uintptr_t dtb) { + uintptr_t kernel_stack_top = pk_vm_init(); + extern char trap_entry; - write_csr(stvec, &trap_entry); + write_csr(stvec, pa2kva(&trap_entry)); write_csr(sscratch, 0); write_csr(sie, 0); set_csr(sstatus, SSTATUS_FS | SSTATUS_VS); - file_init(); - enter_supervisor_mode(rest_of_boot_loader, pk_vm_init(), 0); + enter_supervisor_mode((void*)pa2kva(rest_of_boot_loader), pa2kva(kernel_stack_top), 0); } void boot_other_hart(uintptr_t dtb) diff --git a/pk/syscall.c b/pk/syscall.c index af3660d..b47c237 100644 --- a/pk/syscall.c +++ b/pk/syscall.c @@ -662,5 +662,7 @@ long do_syscall(long a0, long a1, long a2, long a3, long a4, long a5, unsigned l if (!f) panic("bad syscall #%ld!",n); + f = (void*)pa2kva(f); + return f(a0, a1, a2, a3, a4, a5, n); } -- cgit v1.1