aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorAndrew Waterman <andrew@sifive.com>2021-03-17 00:30:12 -0700
committerAndrew Waterman <andrew@sifive.com>2021-03-25 22:21:06 -0700
commitb9eba474fb52dbf205c7b466af458397a7487216 (patch)
treee203753f33c579d163b9c94bffa1d50c92c21b65
parent0b5ef9cf755811e6a8a2901585f3ecb5604e55f1 (diff)
downloadpk-b9eba474fb52dbf205c7b466af458397a7487216.zip
pk-b9eba474fb52dbf205c7b466af458397a7487216.tar.gz
pk-b9eba474fb52dbf205c7b466af458397a7487216.tar.bz2
pk: support >2 GiB of user memory for RV64
Previously, the pk would always run from virtual address MEM_START. Instead, remap it into the negative virtual addresses, allowing user processes to expand beyond MEM_START.
-rw-r--r--pk/console.c21
-rw-r--r--pk/frontend.c3
-rw-r--r--pk/handlers.c4
-rw-r--r--pk/mmap.c35
-rw-r--r--pk/mmap.h9
-rw-r--r--pk/pk.c36
-rw-r--r--pk/syscall.c2
7 files changed, 76 insertions, 34 deletions
diff --git a/pk/console.c b/pk/console.c
index ec84981..3d3edae 100644
--- a/pk/console.c
+++ b/pk/console.c
@@ -1,6 +1,7 @@
// See LICENSE for license details.
#include "pk.h"
+#include "mmap.h"
#include "file.h"
#include "frontend.h"
#include <stdint.h>
@@ -23,21 +24,27 @@ void printk(const char* s, ...)
va_end(vl);
}
-void dump_tf(trapframe_t* tf)
+static const char* get_regname(int r)
{
- static const char* regnames[] = {
- "z ", "ra", "sp", "gp", "tp", "t0", "t1", "t2",
- "s0", "s1", "a0", "a1", "a2", "a3", "a4", "a5",
- "a6", "a7", "s2", "s3", "s4", "s5", "s6", "s7",
- "s8", "s9", "sA", "sB", "t3", "t4", "t5", "t6"
+ static const char regnames[] = {
+ "z \0" "ra\0" "sp\0" "gp\0" "tp\0" "t0\0" "t1\0" "t2\0"
+ "s0\0" "s1\0" "a0\0" "a1\0" "a2\0" "a3\0" "a4\0" "a5\0"
+ "a6\0" "a7\0" "s2\0" "s3\0" "s4\0" "s5\0" "s6\0" "s7\0"
+ "s8\0" "s9\0" "sA\0" "sB\0" "t3\0" "t4\0" "t5\0" "t6"
};
+ return &regnames[r * 3];
+}
+
+void dump_tf(trapframe_t* tf)
+{
+
tf->gpr[0] = 0;
for(int i = 0; i < 32; i+=4)
{
for(int j = 0; j < 4; j++)
- printk("%s %lx%c",regnames[i+j],tf->gpr[i+j],j < 3 ? ' ' : '\n');
+ printk("%s %lx%c", get_regname(i+j), tf->gpr[i+j], j < 3 ? ' ' : '\n');
}
printk("pc %lx va %lx insn %x sr %lx\n", tf->epc, tf->badvaddr,
(uint32_t)tf->insn, tf->status);
diff --git a/pk/frontend.c b/pk/frontend.c
index a02cb89..cbe5377 100644
--- a/pk/frontend.c
+++ b/pk/frontend.c
@@ -5,6 +5,7 @@
#include "frontend.h"
#include "syscall.h"
#include "htif.h"
+#include "mmap.h"
#include <stdint.h>
long frontend_syscall(long n, uint64_t a0, uint64_t a1, uint64_t a2, uint64_t a3, uint64_t a4, uint64_t a5, uint64_t a6)
@@ -23,7 +24,7 @@ long frontend_syscall(long n, uint64_t a0, uint64_t a1, uint64_t a2, uint64_t a3
magic_mem[6] = a5;
magic_mem[7] = a6;
- htif_syscall((uintptr_t)magic_mem);
+ htif_syscall(kva2pa_maybe(magic_mem));
long ret = magic_mem[0];
diff --git a/pk/handlers.c b/pk/handlers.c
index cb12d22..efd14f1 100644
--- a/pk/handlers.c
+++ b/pk/handlers.c
@@ -122,5 +122,7 @@ void handle_trap(trapframe_t* tf)
kassert(tf->cause < ARRAY_SIZE(trap_handlers) && trap_handlers[tf->cause]);
- trap_handlers[tf->cause](tf);
+ trap_handler f = (void*)pa2kva(trap_handlers[tf->cause]);
+
+ f(tf);
}
diff --git a/pk/mmap.c b/pk/mmap.c
index 33f6d50..3351295 100644
--- a/pk/mmap.c
+++ b/pk/mmap.c
@@ -10,6 +10,8 @@
#include <stdbool.h>
#include <errno.h>
+uintptr_t kva2pa_offset;
+
typedef struct vmr_t {
struct vmr_t* next;
uintptr_t addr;
@@ -68,7 +70,9 @@ static uintptr_t __page_alloc()
node->next = NULL;
page_freelist_depth--;
- return (uintptr_t)memset((void*)addr, 0, RISCV_PGSIZE);
+ memset((void*)pa2kva(addr), 0, RISCV_PGSIZE);
+
+ return addr;
}
static uintptr_t __page_alloc_assert()
@@ -103,7 +107,7 @@ static vmr_t* __vmr_alloc(uintptr_t addr, size_t length, file_t* file,
size_t offset, unsigned refcnt, int prot)
{
if (vmr_freelist_head == NULL) {
- vmr_t* new_vmrs = (vmr_t*)__page_alloc();
+ vmr_t* new_vmrs = (vmr_t*)pa2kva(__page_alloc());
if (new_vmrs == NULL)
return NULL;
@@ -161,7 +165,7 @@ static size_t pt_idx(uintptr_t addr, int level)
static pte_t* __walk_internal(uintptr_t addr, int create, int level)
{
- pte_t* t = root_page_table;
+ pte_t* t = (pte_t*)pa2kva(root_page_table);
for (int i = RISCV_PGLEVELS - 1; i > level; i--) {
size_t idx = pt_idx(addr, i);
if (unlikely(!(t[idx] & PTE_V))) {
@@ -174,7 +178,7 @@ static pte_t* __walk_internal(uintptr_t addr, int create, int level)
return 0;
}
}
- t = (pte_t*)(pte_ppn(t[idx]) << RISCV_PGSHIFT);
+ t = (pte_t*)pa2kva(pte_ppn(t[idx]) << RISCV_PGSHIFT);
}
return &t[pt_idx(addr, level)];
}
@@ -251,8 +255,8 @@ static int __handle_page_fault(uintptr_t vaddr, int prot)
return -1;
else if (!(*pte & PTE_V))
{
- uintptr_t kva = __page_alloc_assert();
- uintptr_t ppn = kva / RISCV_PGSIZE;
+ uintptr_t ppn = __page_alloc_assert() / RISCV_PGSIZE;
+ uintptr_t kva = pa2kva(ppn * RISCV_PGSIZE);
vmr_t* v = (vmr_t*)*pte;
*pte = pte_create(ppn, prot_to_type(PROT_READ|PROT_WRITE, 0));
@@ -488,6 +492,10 @@ void populate_mapping(const void* start, size_t size, int prot)
uintptr_t pk_vm_init()
{
+ // PA space must fit within half of VA space
+ uintptr_t user_size = -KVA_START;
+ mem_size = MIN(mem_size, user_size);
+
extern char _end;
first_free_page = ROUNDUP((uintptr_t)&_end, RISCV_PGSIZE);
free_pages = (mem_size - (first_free_page - MEM_START)) / RISCV_PGSIZE;
@@ -498,19 +506,18 @@ uintptr_t pk_vm_init()
next_free_page = freelist_node_array_size / RISCV_PGSIZE;
root_page_table = (void*)__page_alloc_assert();
- __map_kernel_range(MEM_START, MEM_START, mem_size, PROT_READ|PROT_WRITE|PROT_EXEC);
+ __map_kernel_range(KVA_START, MEM_START, mem_size, PROT_READ|PROT_WRITE|PROT_EXEC);
- current.mmap_max = current.brk_max = MEM_START;
-
- size_t mem_pages = mem_size >> RISCV_PGSHIFT;
- size_t stack_size = MIN(mem_pages >> 5, 2048) * RISCV_PGSIZE;
- size_t stack_bottom = __do_mmap(current.mmap_max - stack_size, stack_size, PROT_READ|PROT_WRITE|PROT_EXEC, MAP_PRIVATE|MAP_ANONYMOUS|MAP_FIXED, 0, 0);
- kassert(stack_bottom != (uintptr_t)-1);
- current.stack_top = stack_bottom + stack_size;
+ current.mmap_max = current.brk_max = user_size;
flush_tlb();
write_csr(sptbr, ((uintptr_t)root_page_table >> RISCV_PGSHIFT) | SATP_MODE_CHOICE);
uintptr_t kernel_stack_top = __page_alloc_assert() + RISCV_PGSIZE;
+
+ // relocate
+ kva2pa_offset = KVA_START - MEM_START;
+ page_freelist_node_array = (void*)pa2kva(page_freelist_node_array);
+
return kernel_stack_top;
}
diff --git a/pk/mmap.h b/pk/mmap.h
index 9e3fad6..7e68777 100644
--- a/pk/mmap.h
+++ b/pk/mmap.h
@@ -34,7 +34,12 @@ uintptr_t do_mremap(uintptr_t addr, size_t old_size, size_t new_size, int flags)
uintptr_t do_mprotect(uintptr_t addr, size_t length, int prot);
uintptr_t do_brk(uintptr_t addr);
-#define kva2pa(va) ((uintptr_t)(va))
-#define is_uva(va) ((uintptr_t)(va) < MEM_START)
+#define KVA_START ((uintptr_t)-1 << (VA_BITS-1))
+
+extern uintptr_t kva2pa_offset;
+#define kva2pa(va) ((uintptr_t)(va) - kva2pa_offset)
+#define pa2kva(pa) ((uintptr_t)(pa) + kva2pa_offset)
+#define kva2pa_maybe(va) ((uintptr_t)(va) >= KVA_START ? kva2pa(va) : (uintptr_t)(va))
+#define is_uva(va) ((uintptr_t)(va) < KVA_START)
#endif
diff --git a/pk/pk.c b/pk/pk.c
index cb03c70..012b697 100644
--- a/pk/pk.c
+++ b/pk/pk.c
@@ -6,6 +6,7 @@
#include "elf.h"
#include "mtrap.h"
#include "frontend.h"
+#include "bits.h"
#include "usermem.h"
#include <stdbool.h>
@@ -67,11 +68,11 @@ static size_t parse_args(arg_buf* args)
uint64_t* pk_argv = &args->buf[1];
// pk_argv[0] is the proxy kernel itself. skip it and any flags.
size_t pk_argc = args->buf[0], arg = 1;
- for ( ; arg < pk_argc && *(char*)(uintptr_t)pk_argv[arg] == '-'; arg++)
- handle_option((const char*)(uintptr_t)pk_argv[arg]);
+ for ( ; arg < pk_argc && *(char*)pa2kva(pk_argv[arg]) == '-'; arg++)
+ handle_option((const char*)pa2kva(pk_argv[arg]));
for (size_t i = 0; arg + i < pk_argc; i++)
- args->argv[i] = (char*)(uintptr_t)pk_argv[arg + i];
+ args->argv[i] = (char*)pa2kva(pk_argv[arg + i]);
return pk_argc - arg;
}
@@ -85,6 +86,12 @@ static void init_tf(trapframe_t* tf, long pc, long sp)
static void run_loaded_program(size_t argc, char** argv, uintptr_t kstack_top)
{
+ size_t mem_pages = mem_size >> RISCV_PGSHIFT;
+ size_t stack_size = MIN(mem_pages >> 5, 2048) * RISCV_PGSIZE;
+ size_t stack_bottom = __do_mmap(current.mmap_max - stack_size, stack_size, PROT_READ|PROT_WRITE|PROT_EXEC, MAP_PRIVATE|MAP_ANONYMOUS|MAP_FIXED, 0, 0);
+ kassert(stack_bottom != (uintptr_t)-1);
+ current.stack_top = stack_bottom + stack_size;
+
// copy phdrs to user stack
size_t stack_top = current.stack_top - current.phdr_size;
memcpy_to_user((void*)stack_top, (void*)current.phdr, current.phdr_size);
@@ -167,15 +174,25 @@ static void run_loaded_program(size_t argc, char** argv, uintptr_t kstack_top)
start_user(&tf);
}
-static void rest_of_boot_loader(uintptr_t kstack_top)
+void rest_of_boot_loader(uintptr_t kstack_top);
+
+asm ("\n\
+ .globl rest_of_boot_loader\n\
+rest_of_boot_loader:\n\
+ mv sp, a0\n\
+ tail rest_of_boot_loader_2");
+
+void rest_of_boot_loader_2(uintptr_t kstack_top)
{
- arg_buf args;
+ file_init();
+
+ static arg_buf args; // avoid large stack allocation
size_t argc = parse_args(&args);
if (!argc)
panic("tell me what ELF to load!");
// load program named by argv[0]
- long phdrs[128];
+ static long phdrs[128]; // avoid large stack allocation
current.phdr = (uintptr_t)phdrs;
current.phdr_size = sizeof(phdrs);
load_elf(args.argv[0], &current);
@@ -185,14 +202,15 @@ static void rest_of_boot_loader(uintptr_t kstack_top)
void boot_loader(uintptr_t dtb)
{
+ uintptr_t kernel_stack_top = pk_vm_init();
+
extern char trap_entry;
- write_csr(stvec, &trap_entry);
+ write_csr(stvec, pa2kva(&trap_entry));
write_csr(sscratch, 0);
write_csr(sie, 0);
set_csr(sstatus, SSTATUS_FS | SSTATUS_VS);
- file_init();
- enter_supervisor_mode(rest_of_boot_loader, pk_vm_init(), 0);
+ enter_supervisor_mode((void*)pa2kva(rest_of_boot_loader), pa2kva(kernel_stack_top), 0);
}
void boot_other_hart(uintptr_t dtb)
diff --git a/pk/syscall.c b/pk/syscall.c
index af3660d..b47c237 100644
--- a/pk/syscall.c
+++ b/pk/syscall.c
@@ -662,5 +662,7 @@ long do_syscall(long a0, long a1, long a2, long a3, long a4, long a5, unsigned l
if (!f)
panic("bad syscall #%ld!",n);
+ f = (void*)pa2kva(f);
+
return f(a0, a1, a2, a3, a4, a5, n);
}