From cc72987e655578b0529b6c3c8084e810cf40b358 Mon Sep 17 00:00:00 2001 From: Andrew Waterman Date: Sat, 13 Jul 2013 21:43:57 -0700 Subject: Support Linux ABI and (optionally) virtual memory --- pk/atomic.h | 12 ++ pk/console.c | 147 ++++++++++++++++++++++ pk/elf.c | 78 ++++++------ pk/elf.h | 9 +- pk/entry.S | 1 + pk/file.c | 108 ++++++++-------- pk/file.h | 11 +- pk/fp.c | 4 - pk/handlers.c | 33 +++-- pk/init.c | 220 ++++++++++---------------------- pk/pcr.h | 14 +++ pk/pk.h | 39 ++++-- pk/pk.mk.in | 3 + pk/syscall.c | 121 +++++++++++++----- pk/syscall.h | 4 +- pk/vm.c | 392 ++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ pk/vm.h | 26 ++++ 17 files changed, 908 insertions(+), 314 deletions(-) create mode 100644 pk/console.c create mode 100644 pk/vm.c create mode 100644 pk/vm.h diff --git a/pk/atomic.h b/pk/atomic.h index c0c1d84..8e80c78 100644 --- a/pk/atomic.h +++ b/pk/atomic.h @@ -43,6 +43,18 @@ static inline long atomic_swap(atomic_t* a, long val) #endif } +static inline long atomic_cas(atomic_t* a, long compare, long swap) +{ +#ifdef PK_ENABLE_ATOMICS + return __sync_val_compare_and_swap(&a->val, compare, swap); +#else + long ret = atomic_read(a); + if (ret == compare) + atomic_set(a, swap); + return ret; +#endif +} + static inline void spinlock_lock(spinlock_t* lock) { do diff --git a/pk/console.c b/pk/console.c new file mode 100644 index 0000000..cfa58d4 --- /dev/null +++ b/pk/console.c @@ -0,0 +1,147 @@ +#include "pk.h" +#include "file.h" +#include "syscall.h" +#include +#include +#include +#include + +static void vsprintk(char* out, const char* s, va_list vl) +{ + bool format = false; + bool longarg = false; + for( ; *s; s++) + { + if(format) + { + switch(*s) + { + case 'l': + longarg = true; + break; + case 'p': + longarg = true; + *out++ = '0'; + *out++ = 'x'; + case 'x': + { + long n = longarg ? va_arg(vl,long) : va_arg(vl,int); + for(int i = 2*(longarg ? sizeof(long) : sizeof(int))-1; i >= 0; i--) + { + int d = (n >> (4*i)) & 0xF; + *out++ = (d < 10 ? '0'+d : 'a'+d-10); + } + longarg = false; + format = false; + break; + } + case 'd': + { + long n = longarg ? va_arg(vl,long) : va_arg(vl,int); + if(n < 0) + { + n = -n; + *out++ = '-'; + } + long digits = 1; + for(long nn = n ; nn /= 10; digits++); + for(int i = digits-1; i >= 0; i--) + { + out[i] = '0' + n%10; + n /= 10; + } + out += digits; + longarg = false; + format = false; + break; + } + case 's': + { + const char* s2 = va_arg(vl,const char*); + while(*s2) + *out++ = *s2++; + longarg = false; + format = false; + break; + } + case 'c': + { + *out++ = (char)va_arg(vl,int); + longarg = false; + format = false; + break; + } + default: + panic("bad fmt"); + } + } + else if(*s == '%') + format = true; + else + *out++ = *s; + } + *out++ = '\0'; +} + +static void vprintk(const char* s, va_list vl) +{ + char out[1024]; // XXX + vsprintk(out, s, vl); + file_write(stderr, out, strlen(out)); +} + +void printk(const char* s, ...) +{ + va_list vl; + va_start(vl, s); + + vprintk(s, vl); + + va_end(vl); +} + +void sprintk(char* out, const char* s, ...) +{ + va_list vl; + va_start(vl,s); + + vsprintk(out,s,vl); + + va_end(vl); +} + +void dump_tf(trapframe_t* tf) +{ + static const char* regnames[] = { + "z ", "ra", "s0", "s1", "s2", "s3", "s4", "s5", + "s6", "s7", "s8", "s9", "sA", "sB", "sp", "tp", + "v0", "v1", "a0", "a1", "a2", "a3", "a4", "a5", + "a6", "a7", "a8", "a9", "aA", "aB", "aC", "aD" + }; + + tf->gpr[0] = 0; + + for(int i = 0; i < 32; i+=4) + { + for(int j = 0; j < 4; j++) + printk("%s %lx%c",regnames[i+j],tf->gpr[i+j],j < 3 ? ' ' : '\n'); + } + printk("sr %lx pc %lx va %lx insn %x\n",tf->sr,tf->epc,tf->badvaddr, + (uint32_t)tf->insn); +} + +void do_panic(const char* s, ...) +{ + va_list vl; + va_start(vl, s); + + vprintk(s, vl); + sys_exit(-1); + + va_end(vl); +} + +void kassert_fail(const char* s) +{ + do_panic("assertion failed: %s\n", s); +} diff --git a/pk/elf.c b/pk/elf.c index ecd2f61..c942848 100644 --- a/pk/elf.c +++ b/pk/elf.c @@ -1,72 +1,74 @@ // See LICENSE for license details. +#include "file.h" +#include "pk.h" +#include "pcr.h" +#include "vm.h" #include #include #include #include -#include "file.h" -#include "pk.h" -long load_elf(const char* fn, int* user64) +void load_elf(const char* fn, elf_info* info) { - sysret_t ret = file_open(fn, strlen(fn)+1, O_RDONLY, 0); + sysret_t ret = file_open(fn, O_RDONLY, 0); file_t* file = (file_t*)ret.result; - if(ret.result == -1) + if (ret.result == -1) goto fail; - char buf[2048]; // XXX - int header_size = file_read(file, buf, sizeof(buf)).result; - const Elf64_Ehdr* eh64 = (const Elf64_Ehdr*)buf; - if(header_size < (int)sizeof(Elf64_Ehdr) || - !(eh64->e_ident[0] == '\177' && eh64->e_ident[1] == 'E' && - eh64->e_ident[2] == 'L' && eh64->e_ident[3] == 'F')) + Elf64_Ehdr eh64; + ssize_t ehdr_size = file_pread(file, &eh64, sizeof(eh64), 0).result; + if (ehdr_size < (ssize_t)sizeof(eh64) || + !(eh64.e_ident[0] == '\177' && eh64.e_ident[1] == 'E' && + eh64.e_ident[2] == 'L' && eh64.e_ident[3] == 'F')) goto fail; #define LOAD_ELF do { \ - eh = (typeof(eh))buf; \ - kassert(header_size >= eh->e_phoff + eh->e_phnum*sizeof(*ph)); \ - ph = (typeof(ph))(buf+eh->e_phoff); \ + eh = (typeof(eh))&eh64; \ + size_t phdr_size = eh->e_phnum*sizeof(*ph); \ + if (info->phdr_top - phdr_size < info->stack_bottom) \ + goto fail; \ + info->phdr = info->phdr_top - phdr_size; \ + ssize_t ret = file_pread(file, (void*)info->phdr, phdr_size, eh->e_phoff).result; \ + if (ret < (ssize_t)phdr_size) goto fail; \ + info->entry = eh->e_entry; \ + info->phnum = eh->e_phnum; \ + info->phent = sizeof(*ph); \ + ph = (typeof(ph))info->phdr; \ for(int i = 0; i < eh->e_phnum; i++, ph++) { \ if(ph->p_type == SHT_PROGBITS && ph->p_memsz) { \ - extern char _end; \ - if((char*)(long)ph->p_vaddr < &_end) \ - { \ - long diff = &_end - (char*)(long)ph->p_vaddr; \ - ph->p_vaddr += diff; \ - ph->p_offset += diff; \ - ph->p_memsz = diff >= ph->p_memsz ? 0 : ph->p_memsz - diff; \ - ph->p_filesz = diff >= ph->p_filesz ? 0 : ph->p_filesz - diff; \ - } \ - if(file_pread(file, (char*)(long)ph->p_vaddr, ph->p_filesz, ph->p_offset).result != ph->p_filesz) \ + info->brk_min = MAX(info->brk_min, ph->p_vaddr + ph->p_memsz); \ + size_t vaddr = ROUNDDOWN(ph->p_vaddr, RISCV_PGSIZE), prepad = ph->p_vaddr - vaddr; \ + size_t memsz = ph->p_memsz + prepad, filesz = ph->p_filesz + prepad; \ + size_t offset = ph->p_offset - prepad; \ + if (__do_mmap(vaddr, filesz, -1, MAP_FIXED|MAP_PRIVATE, file, offset) != vaddr) \ goto fail; \ - memset((char*)(long)ph->p_vaddr+ph->p_filesz, 0, ph->p_memsz-ph->p_filesz); \ + size_t mapped = ROUNDUP(filesz, RISCV_PGSIZE); \ + if (memsz > mapped) \ + if (__do_mmap(vaddr + mapped, memsz - mapped, -1, MAP_FIXED|MAP_PRIVATE|MAP_ANONYMOUS, 0, 0) != vaddr + mapped) \ + goto fail; \ } \ } \ } while(0) - long entry; - *user64 = 0; - if (IS_ELF32(*eh64)) + info->elf64 = IS_ELF64(eh64); + if (info->elf64) { - Elf32_Ehdr* eh; - Elf32_Phdr* ph; + Elf64_Ehdr* eh; + Elf64_Phdr* ph; LOAD_ELF; - entry = eh->e_entry; } - else if (IS_ELF64(*eh64)) + else if (IS_ELF32(eh64)) { - *user64 = 1; - Elf64_Ehdr* eh; - Elf64_Phdr* ph; + Elf32_Ehdr* eh; + Elf32_Phdr* ph; LOAD_ELF; - entry = eh->e_entry; } else goto fail; file_decref(file); - - return entry; + return; fail: panic("couldn't open ELF program: %s!", fn); diff --git a/pk/elf.h b/pk/elf.h index ea39078..f91a57f 100644 --- a/pk/elf.h +++ b/pk/elf.h @@ -1,7 +1,5 @@ // See LICENSE for license details. -// See LICENSE for details. - #ifndef _ELF_H #define _ELF_H @@ -17,6 +15,13 @@ #define SHT_PROGBITS 1 #define SHT_NOBITS 8 +#define AT_NULL 0 +#define AT_PHDR 3 +#define AT_PHENT 4 +#define AT_PHNUM 5 +#define AT_PAGESZ 6 +#define AT_ENTRY 9 + typedef struct { uint8_t e_ident[16]; uint16_t e_type; diff --git a/pk/entry.S b/pk/entry.S index 6441d9f..4a762c0 100644 --- a/pk/entry.S +++ b/pk/entry.S @@ -140,6 +140,7 @@ trap_entry: jal handle_trap .bss + .align 4 .global stack_bot .global stack_top stack_bot: diff --git a/pk/file.c b/pk/file.c index 195bcdc..ecc5f28 100644 --- a/pk/file.c +++ b/pk/file.c @@ -6,66 +6,52 @@ #include "pk.h" #include "frontend.h" #include "pcr.h" +#include "vm.h" #define MAX_FDS 32 -file_t* fds[MAX_FDS]; +static file_t* fds[MAX_FDS]; #define MAX_FILES 32 -file_t files[MAX_FILES] = {[0 ... MAX_FILES-1] = {-1,{0}}}; +static file_t files[MAX_FILES] = {[0 ... MAX_FILES-1] = {-1,{0}}}; file_t *stdout, *stdin, *stderr; -static void file_incref(file_t* f) +void file_incref(file_t* f) { - atomic_add(&f->refcnt,1); + atomic_add(&f->refcnt, 1); } void file_decref(file_t* f) { - if(atomic_add(&f->refcnt,-1) == 2) + if (atomic_add(&f->refcnt, -1) == 2) { - if(f->kfd != -1) - { - frontend_syscall(SYS_close,f->kfd,0,0,0); - f->kfd = -1; - } - atomic_add(&f->refcnt,-1); // I think this could just be atomic_set(..,0) + int kfd = f->kfd; + mb(); + atomic_set(&f->refcnt, 0); + + frontend_syscall(SYS_close, kfd, 0, 0, 0); } } static file_t* file_get_free() { - for(int i = 0; i < MAX_FILES; i++) - { - if(atomic_read(&files[i].refcnt) == 0) - { - if(atomic_add(&files[i].refcnt,1) == 0) - { - atomic_add(&files[i].refcnt,1); - return &files[i]; - } - file_decref(&files[i]); - } - } + for (file_t* f = files; f < files + MAX_FILES; f++) + if (atomic_read(&f->refcnt) == 0 && atomic_cas(&f->refcnt, 0, 2) == 0) + return f; return NULL; } -static int fd_get_free() +int file_dup(file_t* f) { - for(int i = 0; i < MAX_FDS; i++) - if(fds[i] == NULL) + for (int i = 0; i < MAX_FDS; i++) + { + if (fds[i] == NULL && __sync_bool_compare_and_swap(&fds[i], 0, f)) + { + file_incref(f); return i; + } + } return -1; } -int file_dup(file_t* f) -{ - int fd = fd_get_free(); - if(fd == -1) - return -1; - file_incref(f); - fds[fd] = f; - return fd; -} - void file_init() { stdin = file_get_free(); @@ -84,16 +70,28 @@ void file_init() file_t* file_get(int fd) { - return fd < 0 || fd >= MAX_FDS ? NULL : fds[fd]; + file_t* f; + if (fd < 0 || fd >= MAX_FDS || (f = fds[fd]) == NULL) + return 0; + + long old_cnt; + do { + old_cnt = atomic_read(&f->refcnt); + if (old_cnt == 0) + return 0; + } while (atomic_cas(&f->refcnt, old_cnt, old_cnt+1) != old_cnt); + + return f; } -sysret_t file_open(const char* fn, size_t len, int flags, int mode) +sysret_t file_open(const char* fn, int flags, int mode) { file_t* f = file_get_free(); if(!f) return (sysret_t){-1,ENOMEM}; - sysret_t ret = frontend_syscall(SYS_open,(long)fn,len,flags,mode); + size_t fn_size = strlen(fn)+1; + sysret_t ret = frontend_syscall(SYS_open, (long)fn, fn_size, flags, mode); if(ret.result != -1) { f->kfd = ret.result; @@ -108,39 +106,47 @@ sysret_t file_open(const char* fn, size_t len, int flags, int mode) int fd_close(int fd) { file_t* f = file_get(fd); - if(!f) + if (!f) + return -1; + int success = __sync_bool_compare_and_swap(&fds[fd], f, 0); + file_decref(f); + if (!success) return -1; - fds[fd] = NULL; file_decref(f); return 0; } -sysret_t file_read(file_t* f, char* buf, size_t size) +sysret_t file_read(file_t* f, void* buf, size_t size) { - return frontend_syscall(SYS_read,f->kfd,(long)buf,size,0); + populate_mapping(buf, size, PROT_WRITE); + return frontend_syscall(SYS_read, f->kfd, (uintptr_t)buf, size, 0); } -sysret_t file_pread(file_t* f, char* buf, size_t size, off_t offset) +sysret_t file_pread(file_t* f, void* buf, size_t size, off_t offset) { - return frontend_syscall(SYS_pread,f->kfd,(long)buf,size,offset); + populate_mapping(buf, size, PROT_WRITE); + return frontend_syscall(SYS_pread, f->kfd, (uintptr_t)buf, size, offset); } -sysret_t file_write(file_t* f, const char* buf, size_t size) +sysret_t file_write(file_t* f, const void* buf, size_t size) { - return frontend_syscall(SYS_write,f->kfd,(long)buf,size,0); + populate_mapping(buf, size, PROT_READ); + return frontend_syscall(SYS_write, f->kfd, (uintptr_t)buf, size, 0); } -sysret_t file_pwrite(file_t* f, const char* buf, size_t size, off_t offset) +sysret_t file_pwrite(file_t* f, const void* buf, size_t size, off_t offset) { - return frontend_syscall(SYS_pwrite,f->kfd,(long)buf,size,offset); + populate_mapping(buf, size, PROT_READ); + return frontend_syscall(SYS_pwrite, f->kfd, (uintptr_t)buf, size, offset); } sysret_t file_stat(file_t* f, struct stat* s) { - return frontend_syscall(SYS_fstat,f->kfd,(long)s,0,0); + populate_mapping(s, sizeof(*s), PROT_WRITE); + return frontend_syscall(SYS_fstat, f->kfd, (uintptr_t)s, 0, 0); } sysret_t file_lseek(file_t* f, size_t ptr, int dir) { - return frontend_syscall(SYS_lseek,f->kfd,ptr,dir,0); + return frontend_syscall(SYS_lseek, f->kfd, ptr, dir, 0); } diff --git a/pk/file.h b/pk/file.h index 89d0523..42f47fc 100644 --- a/pk/file.h +++ b/pk/file.h @@ -16,14 +16,15 @@ typedef struct file extern file_t *stdin, *stdout, *stderr; file_t* file_get(int fd); -sysret_t file_open(const char* fn, size_t len, int flags, int mode); +sysret_t file_open(const char* fn, int flags, int mode); void file_decref(file_t*); +void file_incref(file_t*); int file_dup(file_t*); -sysret_t file_pwrite(file_t* f, const char* buf, size_t n, off_t off); -sysret_t file_pread(file_t* f, char* buf, size_t n, off_t off); -sysret_t file_write(file_t* f, const char* buf, size_t n); -sysret_t file_read(file_t* f, char* buf, size_t n); +sysret_t file_pwrite(file_t* f, const void* buf, size_t n, off_t off); +sysret_t file_pread(file_t* f, void* buf, size_t n, off_t off); +sysret_t file_write(file_t* f, const void* buf, size_t n); +sysret_t file_read(file_t* f, void* buf, size_t n); sysret_t file_stat(file_t* f, struct stat* s); sysret_t file_lseek(file_t* f, size_t ptr, int dir); int fd_close(int fd); diff --git a/pk/fp.c b/pk/fp.c index 96d8ddc..aaa8f0f 100644 --- a/pk/fp.c +++ b/pk/fp.c @@ -21,10 +21,6 @@ static uint64_t get_fp_reg(unsigned int which, unsigned int dp); static inline void validate_address(trapframe_t* tf, long addr, int size, int store) { - if(addr & (size-1)) - store ? handle_misaligned_store(tf) : handle_misaligned_load(tf); - if(addr < USER_START) - store ? handle_fault_store(tf) : handle_fault_load(tf); } int emulate_fp(trapframe_t* tf) diff --git a/pk/handlers.c b/pk/handlers.c index 5caa29e..7493ac9 100644 --- a/pk/handlers.c +++ b/pk/handlers.c @@ -3,6 +3,8 @@ #include "pcr.h" #include "pk.h" #include "config.h" +#include "syscall.h" +#include "vm.h" int have_fp = 1; // initialized to 1 because it can't be in the .bss section! int have_vector = 1; @@ -55,8 +57,6 @@ static void handle_illegal_instruction(trapframe_t* tf) static void handle_fp_disabled(trapframe_t* tf) { - setpcr(PCR_SR, SR_ET); - if(have_fp && !(mfpcr(PCR_SR) & SR_EF)) init_fp(tf); else @@ -88,39 +88,46 @@ void handle_misaligned_store(trapframe_t* tf) panic("Misaligned store!"); } -static void handle_fault_fetch(trapframe_t* tf) +static void segfault(trapframe_t* tf, uintptr_t addr, const char* type) { dump_tf(tf); - panic("Faulting instruction access!"); + const char* who = (tf->sr & SR_PS) ? "Kernel" : "User"; + panic("%s %s segfault @ %p", who, type, addr); +} + +static void handle_fault_fetch(trapframe_t* tf) +{ + if (handle_page_fault(tf->epc, PROT_EXEC) != 0) + segfault(tf, tf->epc, "fetch"); } void handle_fault_load(trapframe_t* tf) { - dump_tf(tf); - panic("Faulting load!"); + if (handle_page_fault(tf->badvaddr, PROT_READ) != 0) + segfault(tf, tf->badvaddr, "load"); } void handle_fault_store(trapframe_t* tf) { - dump_tf(tf); - panic("Faulting store!"); + if (handle_page_fault(tf->badvaddr, PROT_WRITE) != 0) + segfault(tf, tf->badvaddr, "store"); } static void handle_syscall(trapframe_t* tf) { - setpcr(PCR_SR, SR_ET); - - long n = tf->gpr[16]; - sysret_t ret = syscall(tf->gpr[18], tf->gpr[19], tf->gpr[20], tf->gpr[21], n); + sysret_t ret = syscall(tf->gpr[18], tf->gpr[19], tf->gpr[20], tf->gpr[21], + tf->gpr[22], tf->gpr[23], tf->gpr[16]); tf->gpr[16] = ret.result; - tf->gpr[17] = ret.result == -1 ? ret.err : 0; + tf->gpr[21] = ret.err; advance_pc(tf); } void handle_trap(trapframe_t* tf) { + setpcr(PCR_SR, SR_ET); + typedef void (*trap_handler)(trapframe_t*); const static trap_handler trap_handlers[] = { diff --git a/pk/init.c b/pk/init.c index 6ee4154..48667c3 100644 --- a/pk/init.c +++ b/pk/init.c @@ -3,189 +3,97 @@ #include "pcr.h" #include "pk.h" #include "file.h" +#include "vm.h" #include "frontend.h" -#include +#include "elf.h" #include -#include #include -static void vsprintk(char* out, const char* s, va_list vl) -{ - bool format = false; - bool longarg = false; - for( ; *s; s++) - { - if(format) - { - switch(*s) - { - case 'l': - longarg = true; - break; - case 'x': - { - long n = longarg ? va_arg(vl,long) : va_arg(vl,int); - for(int i = 2*(longarg ? sizeof(long) : sizeof(int))-1; i >= 0; i--) - { - int d = (n >> (4*i)) & 0xF; - *out++ = (d < 10 ? '0'+d : 'a'+d-10); - } - longarg = false; - format = false; - break; - } - case 'd': - { - long n = longarg ? va_arg(vl,long) : va_arg(vl,int); - if(n < 0) - { - n = -n; - *out++ = '-'; - } - long digits = 1; - for(long nn = n ; nn /= 10; digits++); - for(int i = digits-1; i >= 0; i--) - { - out[i] = '0' + n%10; - n /= 10; - } - out += digits; - longarg = false; - format = false; - break; - } - case 's': - { - const char* s2 = va_arg(vl,const char*); - while(*s2) - *out++ = *s2++; - longarg = false; - format = false; - break; - } - case 'c': - { - *out++ = (char)va_arg(vl,int); - longarg = false; - format = false; - break; - } - default: - panic("bad fmt"); - } - } - else if(*s == '%') - format = true; - else - *out++ = *s; - } - *out++ = '\0'; -} - -void printk(const char* s, ...) -{ - va_list vl; - va_start(vl,s); - - char out[1024]; // XXX - vsprintk(out,s,vl); - file_write(stderr,out,strlen(out)); - - va_end(vl); -} - -void sprintk(char* out, const char* s, ...) -{ - va_list vl; - va_start(vl,s); - - vsprintk(out,s,vl); - - va_end(vl); -} - -void dump_tf(trapframe_t* tf) -{ - static const char* regnames[] = { - "z ", "ra", "s0", "s1", "s2", "s3", "s4", "s5", - "s6", "s7", "s8", "s9", "sA", "sB", "sp", "tp", - "v0", "v1", "a0", "a1", "a2", "a3", "a4", "a5", - "a6", "a7", "a8", "a9", "aA", "aB", "aC", "aD" - }; - - tf->gpr[0] = 0; - - for(int i = 0; i < 32; i+=4) - { - for(int j = 0; j < 4; j++) - printk("%s %lx%c",regnames[i+j],tf->gpr[i+j],j < 3 ? ' ' : '\n'); - } - printk("sr %lx pc %lx va %lx insn %x\n",tf->sr,tf->epc,tf->badvaddr, - (uint32_t)tf->insn); -} +elf_info current; void init_tf(trapframe_t* tf, long pc, long sp, int user64) { memset(tf,0,sizeof(*tf)); if(sizeof(void*) != 8) kassert(!user64); - tf->sr = (mfpcr(PCR_SR) & (SR_IM | SR_S64)) | SR_S | SR_EC; + tf->sr = (mfpcr(PCR_SR) & (SR_IM | SR_S64 | SR_VM)) | SR_S | SR_EC; if(user64) tf->sr |= SR_U64; tf->gpr[14] = sp; tf->epc = pc; } -static void bss_init() +static void user_init() { - // front-end server zeroes the bss automagically -} + struct args { + uint64_t argc; + uint64_t argv[]; + }; -struct args -{ - uint64_t argc; - uint64_t argv[]; -}; + const int argc_argv_size = 1024; + size_t stack_top = current.stack_top; + struct args* args = (struct args*)(stack_top - argc_argv_size); + populate_mapping(args, argc_argv_size, PROT_WRITE); + sysret_t r = frontend_syscall(SYS_getmainvars, (long)args, argc_argv_size, 0, 0); + kassert(r.result == 0); -static struct args* stack_init(unsigned long* stack_top) -{ - *stack_top -= USER_MAINVARS_SIZE; + // argv[0] is the proxy kernel itself. skip it. + args->argv[0] = args->argc - 1; + args = (struct args*)args->argv; + stack_top = (uintptr_t)args; + + // load program named by argv[0] + current.phdr_top = stack_top; + load_elf((char*)args->argv[0], ¤t); + + struct { + long key; + long value; + } aux[] = { + {AT_ENTRY, current.entry}, + {AT_PHNUM, current.phnum}, + {AT_PHENT, current.phent}, + {AT_PHDR, current.phdr}, + {AT_PAGESZ, RISCV_PGSIZE}, + {AT_NULL, 0} + }; - struct args* args = (struct args*)(*stack_top - sizeof(args->argc)); - sysret_t r = frontend_syscall(SYS_getmainvars, (long)args, USER_MAINVARS_SIZE, 0, 0); - kassert(r.result == 0); - - // chop off argv[0] - args->argv[0] = args->argc-1; - return (struct args*)args->argv; -} + // place argc, argv, envp, auxp on stack + #define PUSH_ARG(type, value) do { \ + *((type*)sp) = value; \ + sp += sizeof(type); \ + } while (0) + + #define STACK_INIT(type) do { \ + unsigned naux = sizeof(aux)/sizeof(aux[0]); \ + stack_top -= (1 + args->argc + 1 + 1 + 2*naux) * sizeof(type); \ + stack_top &= -16; \ + long sp = stack_top; \ + PUSH_ARG(type, args->argc); \ + for (unsigned i = 0; i < args->argc; i++) \ + PUSH_ARG(type, args->argv[i]); \ + PUSH_ARG(type, 0); /* argv[argc] = NULL */ \ + PUSH_ARG(type, 0); /* envp[0] = NULL */ \ + for (unsigned i = 0; i < naux; i++) { \ + PUSH_ARG(type, aux[i].key); \ + PUSH_ARG(type, aux[i].value); \ + } \ + } while (0) + + if (current.elf64) + STACK_INIT(uint64_t); + else + STACK_INIT(uint32_t); -static void jump_usrstart(const char* fn, long sp) -{ trapframe_t tf; - - int user64; - long start = load_elf(fn, &user64); + init_tf(&tf, current.entry, stack_top, current.elf64); __clear_cache(0, 0); - - init_tf(&tf, start, sp, user64); pop_tf(&tf); } -uint32_t mem_mb; - void boot() { - bss_init(); file_init(); - - // word 0 of memory contains # of MB of memory - mem_mb = *(uint32_t*)0; - - unsigned long stack_top = 0x80000000; - if (mem_mb < stack_top / (1024 * 1024)) - stack_top = mem_mb * (1024 * 1024); - - struct args* args = stack_init(&stack_top); - jump_usrstart((char*)(long)args->argv[0], stack_top); + vm_init(); + user_init(); } diff --git a/pk/pcr.h b/pk/pcr.h index cc78f2f..9fea232 100644 --- a/pk/pcr.h +++ b/pk/pcr.h @@ -64,6 +64,16 @@ #ifdef __riscv +#ifdef __riscv64 +# define RISCV_PGLEVELS 3 +# define RISCV_PGSHIFT 13 +#else +# define RISCV_PGLEVELS 2 +# define RISCV_PGSHIFT 12 +#endif +#define RISCV_PGLEVEL_BITS 10 +#define RISCV_PGSIZE (1 << RISCV_PGSHIFT) + #define ASM_CR(r) _ASM_CR(r) #define _ASM_CR(r) cr##r @@ -85,6 +95,10 @@ asm volatile ("clearpcr %0,cr%2,%1" : "=r"(__tmp) : "i"(val), "i"(reg)); \ __tmp; }) +#define rdcycle() ({ unsigned long __tmp; \ + asm volatile ("rdcycle %0" : "=r"(__tmp)); \ + __tmp; }) + #endif #endif diff --git a/pk/pk.h b/pk/pk.h index 548c17e..52fc70d 100644 --- a/pk/pk.h +++ b/pk/pk.h @@ -3,13 +3,10 @@ #ifndef _PK_H #define _PK_H -#define USER_MAINVARS_SIZE 0x1000 -#define USER_START 0x10000 - #ifndef __ASSEMBLER__ #include -#include +#include typedef struct { @@ -21,8 +18,15 @@ typedef struct long insn; } trapframe_t; -#define panic(s,...) do { printk(s"\n", ##__VA_ARGS__); sys_exit(-1); } while(0) -#define kassert(cond) do { if(!(cond)) panic("assertion failed: "#cond); } while(0) +#define panic(s,...) do { do_panic(s"\n", ##__VA_ARGS__); } while(0) +#define kassert(cond) do { if(!(cond)) kassert_fail(""#cond); } while(0) +void do_panic(const char* s, ...) __attribute__((noreturn)); +void kassert_fail(const char* s) __attribute__((noreturn)); +#define MAX(a, b) ((a) > (b) ? (a) : (b)) +#define MIN(a, b) ((a) < (b) ? (a) : (b)) +#define CLAMP(a, lo, hi) MIN(MAX(a, lo), hi) +#define ROUNDUP(a, b) ((((a)-1)/(b)+1)*(b)) +#define ROUNDDOWN(a, b) ((a)/(b)*(b)) #ifdef __cplusplus extern "C" { @@ -48,10 +52,25 @@ void handle_fault_load(trapframe_t*); void handle_fault_store(trapframe_t*); void boot(); -void sys_exit(int code) __attribute__((noreturn)); -sysret_t syscall(long a0, long a1, long a2, long a3, long n); - -long load_elf(const char* fn, int* user64); +typedef struct { + int elf64; + int phent; + int phnum; + size_t user_min; + size_t entry; + size_t brk_min; + size_t brk; + size_t brk_max; + size_t mmap_max; + size_t stack_bottom; + size_t phdr; + size_t phdr_top; + size_t stack_top; +} elf_info; + +extern elf_info current; + +void load_elf(const char* fn, elf_info* info); static inline void advance_pc(trapframe_t* tf) { diff --git a/pk/pk.mk.in b/pk/pk.mk.in index 5a30d16..14cc461 100644 --- a/pk/pk.mk.in +++ b/pk/pk.mk.in @@ -12,6 +12,7 @@ pk_hdrs = \ frontend.h \ riscv-opc.h \ elf.h \ + vm.h \ pk_c_srcs = \ init.c \ @@ -22,6 +23,8 @@ pk_c_srcs = \ fp.c \ int.c \ elf.c \ + console.c \ + vm.c \ pk_asm_srcs = \ entry.S \ diff --git a/pk/syscall.c b/pk/syscall.c index 711bb9e..1d3940c 100644 --- a/pk/syscall.c +++ b/pk/syscall.c @@ -1,44 +1,53 @@ // See LICENSE for license details. -#include -#include -#include +#include "syscall.h" #include "pk.h" #include "pcr.h" #include "file.h" #include "frontend.h" +#include "vm.h" +#include +#include -typedef sysret_t (*syscall_t)(long,long,long,long,long); +typedef sysret_t (*syscall_t)(long, long, long, long, long, long, long); void sys_exit(int code) { - frontend_syscall(SYS_exit,code,0,0,0); - panic("exit didn't exit!"); + frontend_syscall(SYS_exit, code, 0, 0, 0); + while (1); } sysret_t sys_read(int fd, char* buf, size_t n) { sysret_t r = {-1,EBADF}; file_t* f = file_get(fd); - if(!f) - return r; - return file_read(f,buf,n); + if (f) + { + r = file_read(f, buf, n); + file_decref(f); + } + + return r; } sysret_t sys_write(int fd, const char* buf, size_t n) { sysret_t r = {-1,EBADF}; file_t* f = file_get(fd); - if(!f) - return r; - return file_write(f,buf,n); + if (f) + { + r = file_write(f, buf, n); + file_decref(f); + } + + return r; } -sysret_t sys_open(const char* name, size_t len, int flags, int mode) +sysret_t sys_open(const char* name, int flags, int mode) { - sysret_t ret = file_open(name, len, flags, mode); + sysret_t ret = file_open(name, flags, mode); if(ret.result == -1) return ret; @@ -50,62 +59,98 @@ sysret_t sys_open(const char* name, size_t len, int flags, int mode) sysret_t sys_close(int fd) { - return (sysret_t){fd_close(fd),EBADF}; + int ret = fd_close(fd); + return (sysret_t){ret, ret & EBADF}; } sysret_t sys_fstat(int fd, void* st) { sysret_t r = {-1,EBADF}; file_t* f = file_get(fd); - if(!f) - return r; - return file_stat(f,st); + if (f) + { + r = file_stat(f, st); + file_decref(f); + } + + return r; } sysret_t sys_lseek(int fd, size_t ptr, int dir) { sysret_t r = {-1,EBADF}; file_t* f = file_get(fd); - if(!f) - return r; - return file_lseek(f,ptr,dir); + if (f) + { + r = file_lseek(f, ptr, dir); + file_decref(f); + } + + return r; } -sysret_t sys_stat(const char* name, size_t len, void* st) +sysret_t sys_stat(const char* name, void* st) { - return frontend_syscall(SYS_stat,(long)name,len,(long)st,0); + size_t name_size = strlen(name)+1; + populate_mapping(st, sizeof(struct stat), PROT_WRITE); + return frontend_syscall(SYS_stat, (uintptr_t)name, name_size, (uintptr_t)st, 0); } -sysret_t sys_lstat(const char* name, size_t len, void* st) +sysret_t sys_lstat(const char* name, void* st) { - return frontend_syscall(SYS_lstat,(long)name,len,(long)st,0); + size_t name_size = strlen(name)+1; + populate_mapping(st, sizeof(struct stat), PROT_WRITE); + return frontend_syscall(SYS_lstat, (uintptr_t)name, name_size, (uintptr_t)st, 0); } -sysret_t sys_link(const char* old_name, size_t old_len, - const char* new_name, size_t new_len) +sysret_t sys_link(const char* old_name, const char* new_name) { - return frontend_syscall(SYS_link,(long)old_name,old_len, - (long)new_name,new_len); + size_t old_size = strlen(old_name)+1; + size_t new_size = strlen(new_name)+1; + return frontend_syscall(SYS_link, (uintptr_t)old_name, old_size, + (uintptr_t)new_name, new_size); } sysret_t sys_unlink(const char* name, size_t len) { - return frontend_syscall(SYS_unlink,(long)name,len,0,0); + size_t name_size = strlen(name)+1; + return frontend_syscall(SYS_unlink, (uintptr_t)name, name_size, 0, 0); } sysret_t sys_brk(size_t pos) { - if(pos / (1024 * 1024) >= mem_mb) - return (sysret_t){-1, ENOMEM}; + return do_brk(pos); +} + +sysret_t sys_uname(void* buf) +{ + const int sz = 65; + strcpy(buf + 0*sz, "Proxy Kernel"); + strcpy(buf + 1*sz, ""); + strcpy(buf + 2*sz, "3.4.5"); + strcpy(buf + 3*sz, ""); + strcpy(buf + 4*sz, ""); + strcpy(buf + 5*sz, ""); return (sysret_t){0,0}; } -sysret_t syscall(long a0, long a1, long a2, long a3, long n) +sysret_t sys_getuid() +{ + return (sysret_t){0,0}; +} + +sysret_t sys_mmap(uintptr_t addr, size_t length, int prot, int flags, int fd, off_t offset) +{ + return do_mmap(addr, length, prot, flags, fd, offset); +} + +sysret_t syscall(long a0, long a1, long a2, long a3, long a4, long a5, long n) { const static void* syscall_table[] = { [SYS_exit] = sys_exit, + [SYS_exit_group] = sys_exit, [SYS_read] = sys_read, [SYS_write] = sys_write, [SYS_open] = sys_open, @@ -117,10 +162,18 @@ sysret_t syscall(long a0, long a1, long a2, long a3, long n) [SYS_link] = sys_link, [SYS_unlink] = sys_unlink, [SYS_brk] = sys_brk, + [SYS_uname] = sys_uname, + [SYS_getuid] = sys_getuid, + [SYS_geteuid] = sys_getuid, + [SYS_getgid] = sys_getuid, + [SYS_getegid] = sys_getuid, + [SYS_mmap] = sys_mmap, }; if(n >= ARRAY_SIZE(syscall_table) || !syscall_table[n]) panic("bad syscall #%ld!",n); - return ((syscall_t)syscall_table[n])(a0, a1, a2, a3, n); + sysret_t r = ((syscall_t)syscall_table[n])(a0, a1, a2, a3, a4, a5, n); + printk("syscall %d %x %x %x = %x\n", n, a0, a1, a2, r.result); + return r; } diff --git a/pk/syscall.h b/pk/syscall.h index d39a6a3..f759e83 100644 --- a/pk/syscall.h +++ b/pk/syscall.h @@ -1,4 +1,6 @@ // See LICENSE for license details. - #include + +void sys_exit(int code) __attribute__((noreturn)); +sysret_t syscall(long a0, long a1, long a2, long a3, long a4, long a5, long n); diff --git a/pk/vm.c b/pk/vm.c new file mode 100644 index 0000000..bb95d09 --- /dev/null +++ b/pk/vm.c @@ -0,0 +1,392 @@ +#include "vm.h" +#include "file.h" +#include "atomic.h" +#include "pcr.h" +#include "pk.h" +#include +#include + +typedef struct { + uintptr_t addr; + size_t length; + file_t* file; + size_t offset; + size_t refcnt; + int prot; +} vmr_t; + +#define MAX_VMR 32 +spinlock_t vm_lock = SPINLOCK_INIT; +static vmr_t vmrs[MAX_VMR]; + +typedef uintptr_t pte_t; +static pte_t* root_page_table; +static uintptr_t first_free_page; +static size_t next_free_page; +static size_t free_pages; +static int have_vm; + +static uintptr_t __page_alloc() +{ + if (next_free_page == free_pages) + return 0; + uintptr_t addr = first_free_page + RISCV_PGSIZE * next_free_page++; + memset((void*)addr, 0, RISCV_PGSIZE); + return addr; +} + +static vmr_t* __vmr_alloc(uintptr_t addr, size_t length, file_t* file, + size_t offset, size_t refcnt, int prot) +{ + for (vmr_t* v = vmrs; v < vmrs + MAX_VMR; v++) + { + if (v->refcnt == 0) + { + v->addr = addr; + v->length = length; + v->file = file; + v->offset = offset; + v->refcnt = refcnt; + v->prot = prot; + return v; + } + } + return NULL; +} + +static void __vmr_decref(vmr_t* v, size_t dec) +{ + if ((v->refcnt -= dec) == 0) + { + if (v->file) + file_decref(v->file); + } +} + +static int pte_valid(pte_t pte) +{ + return pte & 2; +} + +static size_t pte_ppn(pte_t pte) +{ + return pte >> RISCV_PGSHIFT; +} + +static int ptd_valid(pte_t pte) +{ + return pte & 1; +} + +static pte_t ptd_create(uintptr_t ppn) +{ + return ppn << RISCV_PGSHIFT | 1; +} + +static uintptr_t ppn(uintptr_t addr) +{ + return addr >> RISCV_PGSHIFT; +} + +static size_t pt_idx(uintptr_t addr, int level) +{ + size_t idx = addr >> (RISCV_PGLEVEL_BITS*level + RISCV_PGSHIFT); + return idx & ((1 << RISCV_PGLEVEL_BITS) - 1); +} + +static int prot2perm[] = { + [0] = 0, + [PROT_READ] = 4, + [PROT_WRITE] = 2, + [PROT_WRITE|PROT_READ] = 6, + [PROT_EXEC] = 1, + [PROT_EXEC|PROT_READ] = 5, + [PROT_EXEC|PROT_WRITE] = 3, + [PROT_EXEC|PROT_WRITE|PROT_READ] = 7 +}; + +static pte_t super_pte_create(uintptr_t ppn, int kprot, int uprot, int level) +{ + int perm = prot2perm[kprot&7] << 7 | prot2perm[uprot&7] << 4 | 2; + return (ppn << (RISCV_PGLEVEL_BITS*level + RISCV_PGSHIFT)) | perm; +} + +static pte_t pte_create(uintptr_t ppn, int kprot, int uprot) +{ + return super_pte_create(ppn, kprot, uprot, 0); +} + +static __attribute__((always_inline)) pte_t* __walk_internal(uintptr_t addr, int create) +{ + const size_t pte_per_page = RISCV_PGSIZE/sizeof(void*); + pte_t* t = root_page_table; + + for (unsigned i = RISCV_PGLEVELS-1; i > 0; i--) + { + size_t idx = pt_idx(addr, i); + kassert(!pte_valid(t[idx])); + if (!ptd_valid(t[idx])) + { + if (!create) + return 0; + uintptr_t page = __page_alloc(); + if (page == 0) + return 0; + t[idx] = ptd_create(ppn(page)); + } + t = (pte_t*)(pte_ppn(t[idx]) << RISCV_PGSHIFT); + } + return &t[pt_idx(addr, 0)]; +} + +static pte_t* __walk(uintptr_t addr) +{ + return __walk_internal(addr, 0); +} + +static pte_t* __walk_create(uintptr_t addr) +{ + return __walk_internal(addr, 1); +} + +static int __va_avail(uintptr_t vaddr) +{ + pte_t* pte = __walk(vaddr); + return pte == 0 || *pte == 0; +} + +static uintptr_t __vm_alloc(size_t npage) +{ + uintptr_t start = current.brk, end = current.mmap_max - npage*RISCV_PGSIZE; + for (uintptr_t a = start; a <= end; a += RISCV_PGSIZE) + { + if (!__va_avail(a)) + continue; + uintptr_t first = a, last = a + (npage-1) * RISCV_PGSIZE; + for (a = last; a > first && __va_avail(a); a -= RISCV_PGSIZE) + ; + if (a > first) + continue; + return a; + } + return 0; +} + +static void flush_tlb() +{ + mtpcr(PCR_PTBR, mfpcr(PCR_PTBR)); +} + +static int __handle_page_fault(uintptr_t vaddr, int prot) +{ + uintptr_t vpn = vaddr >> RISCV_PGSHIFT; + vaddr = vpn << RISCV_PGSHIFT; + + pte_t* pte = __walk(vaddr); + + if (pte == 0 || *pte == 0) + return -1; + else if (!pte_valid(*pte)) + { + kassert(vaddr < current.stack_top && vaddr >= current.user_min); + uintptr_t ppn = vpn; + + vmr_t* v = (vmr_t*)*pte; + *pte = pte_create(ppn, PROT_READ|PROT_WRITE, 0); + if (v->file) + { + size_t flen = MIN(RISCV_PGSIZE, v->length - (vaddr - v->addr)); + kassert(flen == file_pread(v->file, (void*)vaddr, flen, vaddr - v->addr + v->offset).result); + if (flen < RISCV_PGSIZE) + memset((void*)vaddr + flen, 0, RISCV_PGSIZE - flen); + } + else + memset((void*)vaddr, 0, RISCV_PGSIZE); + *pte = pte_create(ppn, v->prot, v->prot); + } + + pte_t perms = pte_create(0, prot, prot); + if ((*pte & perms) != perms) + return -1; + + flush_tlb(); + return 0; +} + +int handle_page_fault(uintptr_t vaddr, int prot) +{ + spinlock_lock(&vm_lock); + int ret = __handle_page_fault(vaddr, prot); + spinlock_unlock(&vm_lock); + return ret; +} + +uintptr_t __do_mmap(uintptr_t addr, size_t length, int prot, int flags, file_t* f, off_t offset) +{ + size_t npage = (length-1)/RISCV_PGSIZE+1; + vmr_t* v = __vmr_alloc(addr, length, f, offset, npage, prot); + if (!v) + goto fail_vmr; + + if (flags & MAP_FIXED) + { + if ((addr & (RISCV_PGSIZE-1)) || addr < current.user_min || + addr + length > current.stack_top || addr + length < addr) + goto fail_vma; + } + else if ((addr = __vm_alloc(npage)) == 0) + goto fail_vma; + + for (uintptr_t a = addr; a < addr + length; a += RISCV_PGSIZE) + { + pte_t* pte = __walk_create(a); + kassert(pte); + + if (*pte) + kassert(*pte == 0); // TODO __do_munmap + + *pte = (pte_t)v; + } + + if (!have_vm || (flags & MAP_POPULATE)) + for (uintptr_t a = addr; a < addr + length; a += RISCV_PGSIZE) + kassert(__handle_page_fault(a, prot) == 0); + + if (f) file_incref(f); + + return addr; + +fail_vma: + __vmr_decref(v, npage); +fail_vmr: + return (uintptr_t)-1; +} + +sysret_t do_mmap(uintptr_t addr, size_t length, int prot, int flags, int fd, off_t offset) +{ + if (!(flags & MAP_PRIVATE) || length == 0 || (offset & (RISCV_PGSIZE-1))) + return (sysret_t){-1, EINVAL}; + + file_t* f = NULL; + if (!(flags & MAP_ANONYMOUS) && (f = file_get(fd)) == NULL) + return (sysret_t){-1, EBADF}; + + spinlock_lock(&vm_lock); + addr = __do_mmap(addr, length, prot, flags, f, offset); + if (addr < current.brk_max) + current.brk_max = addr; + spinlock_unlock(&vm_lock); + + if (f) file_decref(f); + return (sysret_t){addr, 0}; +} + +size_t __do_brk(size_t addr) +{ + size_t newbrk = addr; + if (addr < current.brk_min) + newbrk = current.brk_min; + else if (addr > current.brk_max) + newbrk = current.brk_max; + + if (current.brk == 0) + current.brk = ROUNDUP(current.brk_min, RISCV_PGSIZE); + + size_t newbrk_page = ROUNDUP(newbrk, RISCV_PGSIZE); + if (current.brk > newbrk_page) + kassert(0); // TODO __do_munmap + else if (current.brk < newbrk_page) + kassert(__do_mmap(current.brk, newbrk_page - current.brk, -1, MAP_FIXED|MAP_PRIVATE|MAP_ANONYMOUS, 0, 0) == current.brk); + current.brk = newbrk_page; + + return newbrk; +} + +sysret_t do_brk(size_t addr) +{ + spinlock_lock(&vm_lock); + addr = __do_brk(addr); + spinlock_unlock(&vm_lock); + + return (sysret_t){addr, 0}; +} + +static void __map_kernel_range(uintptr_t paddr, size_t len, int prot) +{ + pte_t perms = pte_create(0, prot, 0); + for (uintptr_t a = paddr; a < paddr + len; a += RISCV_PGSIZE) + { + pte_t* pte = __walk_create(a); + kassert(pte); + *pte = a | perms; + } +} + +void populate_mapping(const void* start, size_t size, int prot) +{ + uintptr_t a0 = ROUNDDOWN((uintptr_t)start, RISCV_PGSIZE); + for (uintptr_t a = a0; a < (uintptr_t)start+size; a += RISCV_PGSIZE) + { + atomic_t* atom = (atomic_t*)(a & -sizeof(atomic_t)); + if (prot & PROT_WRITE) + atomic_add(atom, 0); + else + atomic_read(atom); + } +} + +void vm_init() +{ + extern char _end; + current.user_min = ROUNDUP((uintptr_t)&_end, RISCV_PGSIZE); + current.brk_min = current.user_min; + current.brk = 0; + + uint32_t mem_mb = *(volatile uint32_t*)0; + + if (mem_mb == 0) + { + current.stack_bottom = 0; + current.stack_top = 0; + current.brk_max = 0; + current.mmap_max = 0; + } + else + { + uintptr_t max_addr = (uintptr_t)mem_mb << 20; + size_t mem_pages = max_addr >> RISCV_PGSHIFT; + const size_t min_free_pages = 2*RISCV_PGLEVELS; + const size_t min_stack_pages = 8; + const size_t max_stack_pages = 128; + kassert(mem_pages > min_free_pages + min_stack_pages); + free_pages = MAX(mem_pages >> (RISCV_PGLEVEL_BITS-1), min_free_pages); + size_t stack_pages = CLAMP(mem_pages/32, min_stack_pages, max_stack_pages); + first_free_page = max_addr - free_pages * RISCV_PGSIZE; + + uintptr_t root_page_table_paddr = __page_alloc(); + kassert(root_page_table_paddr); + root_page_table = (pte_t*)root_page_table_paddr; + + __map_kernel_range(0, current.user_min, PROT_READ|PROT_WRITE|PROT_EXEC); + + mtpcr(PCR_PTBR, root_page_table_paddr); + setpcr(PCR_SR, SR_VM); + have_vm = mfpcr(PCR_SR) & SR_VM; + clearpcr(PCR_SR, SR_VM); + + size_t stack_size = RISCV_PGSIZE * stack_pages; + current.stack_top = first_free_page; + uintptr_t stack_bot = current.stack_top - stack_size; + + if (have_vm) + { + __map_kernel_range(first_free_page, free_pages * RISCV_PGSIZE, PROT_READ|PROT_WRITE); + kassert(__do_mmap(stack_bot, stack_size, -1, MAP_FIXED|MAP_PRIVATE|MAP_ANONYMOUS, 0, 0) == stack_bot); + setpcr(PCR_SR, SR_VM); + } + + current.stack_bottom = stack_bot; + stack_bot -= RISCV_PGSIZE; // guard page + current.mmap_max = current.brk_max = stack_bot; + } +} diff --git a/pk/vm.h b/pk/vm.h new file mode 100644 index 0000000..349d9ef --- /dev/null +++ b/pk/vm.h @@ -0,0 +1,26 @@ +#ifndef _VM_H +#define _VM_H + +#include "syscall.h" +#include "file.h" +#include +#include +#include + +#define PROT_READ 1 +#define PROT_WRITE 2 +#define PROT_EXEC 4 + +#define MAP_PRIVATE 0x2 +#define MAP_FIXED 0x10 +#define MAP_ANONYMOUS 0x20 +#define MAP_POPULATE 0x8000 + +void vm_init(); +int handle_page_fault(uintptr_t vaddr, int prot); +void populate_mapping(const void* start, size_t size, int prot); +uintptr_t __do_mmap(uintptr_t addr, size_t length, int prot, int flags, file_t* file, off_t offset); +sysret_t do_mmap(uintptr_t addr, size_t length, int prot, int flags, int fd, off_t offset); +sysret_t do_brk(uintptr_t addr); + +#endif -- cgit v1.1