aboutsummaryrefslogtreecommitdiff
path: root/pk
diff options
context:
space:
mode:
authorAndrew Waterman <waterman@cs.berkeley.edu>2013-07-13 21:43:57 -0700
committerAndrew Waterman <waterman@cs.berkeley.edu>2013-07-13 21:44:16 -0700
commitcc72987e655578b0529b6c3c8084e810cf40b358 (patch)
treea7a99a9406dfef2d4103e85bc0976cb8d039d7e7 /pk
parent0bdb8c84092bf7c5eb4c981c620997a5893bfb70 (diff)
downloadpk-cc72987e655578b0529b6c3c8084e810cf40b358.zip
pk-cc72987e655578b0529b6c3c8084e810cf40b358.tar.gz
pk-cc72987e655578b0529b6c3c8084e810cf40b358.tar.bz2
Support Linux ABI and (optionally) virtual memory
Diffstat (limited to 'pk')
-rw-r--r--pk/atomic.h12
-rw-r--r--pk/console.c147
-rw-r--r--pk/elf.c78
-rw-r--r--pk/elf.h9
-rw-r--r--pk/entry.S1
-rw-r--r--pk/file.c108
-rw-r--r--pk/file.h11
-rw-r--r--pk/fp.c4
-rw-r--r--pk/handlers.c33
-rw-r--r--pk/init.c220
-rw-r--r--pk/pcr.h14
-rw-r--r--pk/pk.h39
-rw-r--r--pk/pk.mk.in3
-rw-r--r--pk/syscall.c121
-rw-r--r--pk/syscall.h4
-rw-r--r--pk/vm.c392
-rw-r--r--pk/vm.h26
17 files changed, 908 insertions, 314 deletions
diff --git a/pk/atomic.h b/pk/atomic.h
index c0c1d84..8e80c78 100644
--- a/pk/atomic.h
+++ b/pk/atomic.h
@@ -43,6 +43,18 @@ static inline long atomic_swap(atomic_t* a, long val)
#endif
}
+static inline long atomic_cas(atomic_t* a, long compare, long swap)
+{
+#ifdef PK_ENABLE_ATOMICS
+ return __sync_val_compare_and_swap(&a->val, compare, swap);
+#else
+ long ret = atomic_read(a);
+ if (ret == compare)
+ atomic_set(a, swap);
+ return ret;
+#endif
+}
+
static inline void spinlock_lock(spinlock_t* lock)
{
do
diff --git a/pk/console.c b/pk/console.c
new file mode 100644
index 0000000..cfa58d4
--- /dev/null
+++ b/pk/console.c
@@ -0,0 +1,147 @@
+#include "pk.h"
+#include "file.h"
+#include "syscall.h"
+#include <stdint.h>
+#include <stdarg.h>
+#include <stdbool.h>
+#include <string.h>
+
+static void vsprintk(char* out, const char* s, va_list vl)
+{
+ bool format = false;
+ bool longarg = false;
+ for( ; *s; s++)
+ {
+ if(format)
+ {
+ switch(*s)
+ {
+ case 'l':
+ longarg = true;
+ break;
+ case 'p':
+ longarg = true;
+ *out++ = '0';
+ *out++ = 'x';
+ case 'x':
+ {
+ long n = longarg ? va_arg(vl,long) : va_arg(vl,int);
+ for(int i = 2*(longarg ? sizeof(long) : sizeof(int))-1; i >= 0; i--)
+ {
+ int d = (n >> (4*i)) & 0xF;
+ *out++ = (d < 10 ? '0'+d : 'a'+d-10);
+ }
+ longarg = false;
+ format = false;
+ break;
+ }
+ case 'd':
+ {
+ long n = longarg ? va_arg(vl,long) : va_arg(vl,int);
+ if(n < 0)
+ {
+ n = -n;
+ *out++ = '-';
+ }
+ long digits = 1;
+ for(long nn = n ; nn /= 10; digits++);
+ for(int i = digits-1; i >= 0; i--)
+ {
+ out[i] = '0' + n%10;
+ n /= 10;
+ }
+ out += digits;
+ longarg = false;
+ format = false;
+ break;
+ }
+ case 's':
+ {
+ const char* s2 = va_arg(vl,const char*);
+ while(*s2)
+ *out++ = *s2++;
+ longarg = false;
+ format = false;
+ break;
+ }
+ case 'c':
+ {
+ *out++ = (char)va_arg(vl,int);
+ longarg = false;
+ format = false;
+ break;
+ }
+ default:
+ panic("bad fmt");
+ }
+ }
+ else if(*s == '%')
+ format = true;
+ else
+ *out++ = *s;
+ }
+ *out++ = '\0';
+}
+
+static void vprintk(const char* s, va_list vl)
+{
+ char out[1024]; // XXX
+ vsprintk(out, s, vl);
+ file_write(stderr, out, strlen(out));
+}
+
+void printk(const char* s, ...)
+{
+ va_list vl;
+ va_start(vl, s);
+
+ vprintk(s, vl);
+
+ va_end(vl);
+}
+
+void sprintk(char* out, const char* s, ...)
+{
+ va_list vl;
+ va_start(vl,s);
+
+ vsprintk(out,s,vl);
+
+ va_end(vl);
+}
+
+void dump_tf(trapframe_t* tf)
+{
+ static const char* regnames[] = {
+ "z ", "ra", "s0", "s1", "s2", "s3", "s4", "s5",
+ "s6", "s7", "s8", "s9", "sA", "sB", "sp", "tp",
+ "v0", "v1", "a0", "a1", "a2", "a3", "a4", "a5",
+ "a6", "a7", "a8", "a9", "aA", "aB", "aC", "aD"
+ };
+
+ tf->gpr[0] = 0;
+
+ for(int i = 0; i < 32; i+=4)
+ {
+ for(int j = 0; j < 4; j++)
+ printk("%s %lx%c",regnames[i+j],tf->gpr[i+j],j < 3 ? ' ' : '\n');
+ }
+ printk("sr %lx pc %lx va %lx insn %x\n",tf->sr,tf->epc,tf->badvaddr,
+ (uint32_t)tf->insn);
+}
+
+void do_panic(const char* s, ...)
+{
+ va_list vl;
+ va_start(vl, s);
+
+ vprintk(s, vl);
+ sys_exit(-1);
+
+ va_end(vl);
+}
+
+void kassert_fail(const char* s)
+{
+ do_panic("assertion failed: %s\n", s);
+}
diff --git a/pk/elf.c b/pk/elf.c
index ecd2f61..c942848 100644
--- a/pk/elf.c
+++ b/pk/elf.c
@@ -1,72 +1,74 @@
// See LICENSE for license details.
+#include "file.h"
+#include "pk.h"
+#include "pcr.h"
+#include "vm.h"
#include <sys/stat.h>
#include <fcntl.h>
#include <elf.h>
#include <string.h>
-#include "file.h"
-#include "pk.h"
-long load_elf(const char* fn, int* user64)
+void load_elf(const char* fn, elf_info* info)
{
- sysret_t ret = file_open(fn, strlen(fn)+1, O_RDONLY, 0);
+ sysret_t ret = file_open(fn, O_RDONLY, 0);
file_t* file = (file_t*)ret.result;
- if(ret.result == -1)
+ if (ret.result == -1)
goto fail;
- char buf[2048]; // XXX
- int header_size = file_read(file, buf, sizeof(buf)).result;
- const Elf64_Ehdr* eh64 = (const Elf64_Ehdr*)buf;
- if(header_size < (int)sizeof(Elf64_Ehdr) ||
- !(eh64->e_ident[0] == '\177' && eh64->e_ident[1] == 'E' &&
- eh64->e_ident[2] == 'L' && eh64->e_ident[3] == 'F'))
+ Elf64_Ehdr eh64;
+ ssize_t ehdr_size = file_pread(file, &eh64, sizeof(eh64), 0).result;
+ if (ehdr_size < (ssize_t)sizeof(eh64) ||
+ !(eh64.e_ident[0] == '\177' && eh64.e_ident[1] == 'E' &&
+ eh64.e_ident[2] == 'L' && eh64.e_ident[3] == 'F'))
goto fail;
#define LOAD_ELF do { \
- eh = (typeof(eh))buf; \
- kassert(header_size >= eh->e_phoff + eh->e_phnum*sizeof(*ph)); \
- ph = (typeof(ph))(buf+eh->e_phoff); \
+ eh = (typeof(eh))&eh64; \
+ size_t phdr_size = eh->e_phnum*sizeof(*ph); \
+ if (info->phdr_top - phdr_size < info->stack_bottom) \
+ goto fail; \
+ info->phdr = info->phdr_top - phdr_size; \
+ ssize_t ret = file_pread(file, (void*)info->phdr, phdr_size, eh->e_phoff).result; \
+ if (ret < (ssize_t)phdr_size) goto fail; \
+ info->entry = eh->e_entry; \
+ info->phnum = eh->e_phnum; \
+ info->phent = sizeof(*ph); \
+ ph = (typeof(ph))info->phdr; \
for(int i = 0; i < eh->e_phnum; i++, ph++) { \
if(ph->p_type == SHT_PROGBITS && ph->p_memsz) { \
- extern char _end; \
- if((char*)(long)ph->p_vaddr < &_end) \
- { \
- long diff = &_end - (char*)(long)ph->p_vaddr; \
- ph->p_vaddr += diff; \
- ph->p_offset += diff; \
- ph->p_memsz = diff >= ph->p_memsz ? 0 : ph->p_memsz - diff; \
- ph->p_filesz = diff >= ph->p_filesz ? 0 : ph->p_filesz - diff; \
- } \
- if(file_pread(file, (char*)(long)ph->p_vaddr, ph->p_filesz, ph->p_offset).result != ph->p_filesz) \
+ info->brk_min = MAX(info->brk_min, ph->p_vaddr + ph->p_memsz); \
+ size_t vaddr = ROUNDDOWN(ph->p_vaddr, RISCV_PGSIZE), prepad = ph->p_vaddr - vaddr; \
+ size_t memsz = ph->p_memsz + prepad, filesz = ph->p_filesz + prepad; \
+ size_t offset = ph->p_offset - prepad; \
+ if (__do_mmap(vaddr, filesz, -1, MAP_FIXED|MAP_PRIVATE, file, offset) != vaddr) \
goto fail; \
- memset((char*)(long)ph->p_vaddr+ph->p_filesz, 0, ph->p_memsz-ph->p_filesz); \
+ size_t mapped = ROUNDUP(filesz, RISCV_PGSIZE); \
+ if (memsz > mapped) \
+ if (__do_mmap(vaddr + mapped, memsz - mapped, -1, MAP_FIXED|MAP_PRIVATE|MAP_ANONYMOUS, 0, 0) != vaddr + mapped) \
+ goto fail; \
} \
} \
} while(0)
- long entry;
- *user64 = 0;
- if (IS_ELF32(*eh64))
+ info->elf64 = IS_ELF64(eh64);
+ if (info->elf64)
{
- Elf32_Ehdr* eh;
- Elf32_Phdr* ph;
+ Elf64_Ehdr* eh;
+ Elf64_Phdr* ph;
LOAD_ELF;
- entry = eh->e_entry;
}
- else if (IS_ELF64(*eh64))
+ else if (IS_ELF32(eh64))
{
- *user64 = 1;
- Elf64_Ehdr* eh;
- Elf64_Phdr* ph;
+ Elf32_Ehdr* eh;
+ Elf32_Phdr* ph;
LOAD_ELF;
- entry = eh->e_entry;
}
else
goto fail;
file_decref(file);
-
- return entry;
+ return;
fail:
panic("couldn't open ELF program: %s!", fn);
diff --git a/pk/elf.h b/pk/elf.h
index ea39078..f91a57f 100644
--- a/pk/elf.h
+++ b/pk/elf.h
@@ -1,7 +1,5 @@
// See LICENSE for license details.
-// See LICENSE for details.
-
#ifndef _ELF_H
#define _ELF_H
@@ -17,6 +15,13 @@
#define SHT_PROGBITS 1
#define SHT_NOBITS 8
+#define AT_NULL 0
+#define AT_PHDR 3
+#define AT_PHENT 4
+#define AT_PHNUM 5
+#define AT_PAGESZ 6
+#define AT_ENTRY 9
+
typedef struct {
uint8_t e_ident[16];
uint16_t e_type;
diff --git a/pk/entry.S b/pk/entry.S
index 6441d9f..4a762c0 100644
--- a/pk/entry.S
+++ b/pk/entry.S
@@ -140,6 +140,7 @@ trap_entry:
jal handle_trap
.bss
+ .align 4
.global stack_bot
.global stack_top
stack_bot:
diff --git a/pk/file.c b/pk/file.c
index 195bcdc..ecc5f28 100644
--- a/pk/file.c
+++ b/pk/file.c
@@ -6,66 +6,52 @@
#include "pk.h"
#include "frontend.h"
#include "pcr.h"
+#include "vm.h"
#define MAX_FDS 32
-file_t* fds[MAX_FDS];
+static file_t* fds[MAX_FDS];
#define MAX_FILES 32
-file_t files[MAX_FILES] = {[0 ... MAX_FILES-1] = {-1,{0}}};
+static file_t files[MAX_FILES] = {[0 ... MAX_FILES-1] = {-1,{0}}};
file_t *stdout, *stdin, *stderr;
-static void file_incref(file_t* f)
+void file_incref(file_t* f)
{
- atomic_add(&f->refcnt,1);
+ atomic_add(&f->refcnt, 1);
}
void file_decref(file_t* f)
{
- if(atomic_add(&f->refcnt,-1) == 2)
+ if (atomic_add(&f->refcnt, -1) == 2)
{
- if(f->kfd != -1)
- {
- frontend_syscall(SYS_close,f->kfd,0,0,0);
- f->kfd = -1;
- }
- atomic_add(&f->refcnt,-1); // I think this could just be atomic_set(..,0)
+ int kfd = f->kfd;
+ mb();
+ atomic_set(&f->refcnt, 0);
+
+ frontend_syscall(SYS_close, kfd, 0, 0, 0);
}
}
static file_t* file_get_free()
{
- for(int i = 0; i < MAX_FILES; i++)
- {
- if(atomic_read(&files[i].refcnt) == 0)
- {
- if(atomic_add(&files[i].refcnt,1) == 0)
- {
- atomic_add(&files[i].refcnt,1);
- return &files[i];
- }
- file_decref(&files[i]);
- }
- }
+ for (file_t* f = files; f < files + MAX_FILES; f++)
+ if (atomic_read(&f->refcnt) == 0 && atomic_cas(&f->refcnt, 0, 2) == 0)
+ return f;
return NULL;
}
-static int fd_get_free()
+int file_dup(file_t* f)
{
- for(int i = 0; i < MAX_FDS; i++)
- if(fds[i] == NULL)
+ for (int i = 0; i < MAX_FDS; i++)
+ {
+ if (fds[i] == NULL && __sync_bool_compare_and_swap(&fds[i], 0, f))
+ {
+ file_incref(f);
return i;
+ }
+ }
return -1;
}
-int file_dup(file_t* f)
-{
- int fd = fd_get_free();
- if(fd == -1)
- return -1;
- file_incref(f);
- fds[fd] = f;
- return fd;
-}
-
void file_init()
{
stdin = file_get_free();
@@ -84,16 +70,28 @@ void file_init()
file_t* file_get(int fd)
{
- return fd < 0 || fd >= MAX_FDS ? NULL : fds[fd];
+ file_t* f;
+ if (fd < 0 || fd >= MAX_FDS || (f = fds[fd]) == NULL)
+ return 0;
+
+ long old_cnt;
+ do {
+ old_cnt = atomic_read(&f->refcnt);
+ if (old_cnt == 0)
+ return 0;
+ } while (atomic_cas(&f->refcnt, old_cnt, old_cnt+1) != old_cnt);
+
+ return f;
}
-sysret_t file_open(const char* fn, size_t len, int flags, int mode)
+sysret_t file_open(const char* fn, int flags, int mode)
{
file_t* f = file_get_free();
if(!f)
return (sysret_t){-1,ENOMEM};
- sysret_t ret = frontend_syscall(SYS_open,(long)fn,len,flags,mode);
+ size_t fn_size = strlen(fn)+1;
+ sysret_t ret = frontend_syscall(SYS_open, (long)fn, fn_size, flags, mode);
if(ret.result != -1)
{
f->kfd = ret.result;
@@ -108,39 +106,47 @@ sysret_t file_open(const char* fn, size_t len, int flags, int mode)
int fd_close(int fd)
{
file_t* f = file_get(fd);
- if(!f)
+ if (!f)
+ return -1;
+ int success = __sync_bool_compare_and_swap(&fds[fd], f, 0);
+ file_decref(f);
+ if (!success)
return -1;
- fds[fd] = NULL;
file_decref(f);
return 0;
}
-sysret_t file_read(file_t* f, char* buf, size_t size)
+sysret_t file_read(file_t* f, void* buf, size_t size)
{
- return frontend_syscall(SYS_read,f->kfd,(long)buf,size,0);
+ populate_mapping(buf, size, PROT_WRITE);
+ return frontend_syscall(SYS_read, f->kfd, (uintptr_t)buf, size, 0);
}
-sysret_t file_pread(file_t* f, char* buf, size_t size, off_t offset)
+sysret_t file_pread(file_t* f, void* buf, size_t size, off_t offset)
{
- return frontend_syscall(SYS_pread,f->kfd,(long)buf,size,offset);
+ populate_mapping(buf, size, PROT_WRITE);
+ return frontend_syscall(SYS_pread, f->kfd, (uintptr_t)buf, size, offset);
}
-sysret_t file_write(file_t* f, const char* buf, size_t size)
+sysret_t file_write(file_t* f, const void* buf, size_t size)
{
- return frontend_syscall(SYS_write,f->kfd,(long)buf,size,0);
+ populate_mapping(buf, size, PROT_READ);
+ return frontend_syscall(SYS_write, f->kfd, (uintptr_t)buf, size, 0);
}
-sysret_t file_pwrite(file_t* f, const char* buf, size_t size, off_t offset)
+sysret_t file_pwrite(file_t* f, const void* buf, size_t size, off_t offset)
{
- return frontend_syscall(SYS_pwrite,f->kfd,(long)buf,size,offset);
+ populate_mapping(buf, size, PROT_READ);
+ return frontend_syscall(SYS_pwrite, f->kfd, (uintptr_t)buf, size, offset);
}
sysret_t file_stat(file_t* f, struct stat* s)
{
- return frontend_syscall(SYS_fstat,f->kfd,(long)s,0,0);
+ populate_mapping(s, sizeof(*s), PROT_WRITE);
+ return frontend_syscall(SYS_fstat, f->kfd, (uintptr_t)s, 0, 0);
}
sysret_t file_lseek(file_t* f, size_t ptr, int dir)
{
- return frontend_syscall(SYS_lseek,f->kfd,ptr,dir,0);
+ return frontend_syscall(SYS_lseek, f->kfd, ptr, dir, 0);
}
diff --git a/pk/file.h b/pk/file.h
index 89d0523..42f47fc 100644
--- a/pk/file.h
+++ b/pk/file.h
@@ -16,14 +16,15 @@ typedef struct file
extern file_t *stdin, *stdout, *stderr;
file_t* file_get(int fd);
-sysret_t file_open(const char* fn, size_t len, int flags, int mode);
+sysret_t file_open(const char* fn, int flags, int mode);
void file_decref(file_t*);
+void file_incref(file_t*);
int file_dup(file_t*);
-sysret_t file_pwrite(file_t* f, const char* buf, size_t n, off_t off);
-sysret_t file_pread(file_t* f, char* buf, size_t n, off_t off);
-sysret_t file_write(file_t* f, const char* buf, size_t n);
-sysret_t file_read(file_t* f, char* buf, size_t n);
+sysret_t file_pwrite(file_t* f, const void* buf, size_t n, off_t off);
+sysret_t file_pread(file_t* f, void* buf, size_t n, off_t off);
+sysret_t file_write(file_t* f, const void* buf, size_t n);
+sysret_t file_read(file_t* f, void* buf, size_t n);
sysret_t file_stat(file_t* f, struct stat* s);
sysret_t file_lseek(file_t* f, size_t ptr, int dir);
int fd_close(int fd);
diff --git a/pk/fp.c b/pk/fp.c
index 96d8ddc..aaa8f0f 100644
--- a/pk/fp.c
+++ b/pk/fp.c
@@ -21,10 +21,6 @@ static uint64_t get_fp_reg(unsigned int which, unsigned int dp);
static inline void
validate_address(trapframe_t* tf, long addr, int size, int store)
{
- if(addr & (size-1))
- store ? handle_misaligned_store(tf) : handle_misaligned_load(tf);
- if(addr < USER_START)
- store ? handle_fault_store(tf) : handle_fault_load(tf);
}
int emulate_fp(trapframe_t* tf)
diff --git a/pk/handlers.c b/pk/handlers.c
index 5caa29e..7493ac9 100644
--- a/pk/handlers.c
+++ b/pk/handlers.c
@@ -3,6 +3,8 @@
#include "pcr.h"
#include "pk.h"
#include "config.h"
+#include "syscall.h"
+#include "vm.h"
int have_fp = 1; // initialized to 1 because it can't be in the .bss section!
int have_vector = 1;
@@ -55,8 +57,6 @@ static void handle_illegal_instruction(trapframe_t* tf)
static void handle_fp_disabled(trapframe_t* tf)
{
- setpcr(PCR_SR, SR_ET);
-
if(have_fp && !(mfpcr(PCR_SR) & SR_EF))
init_fp(tf);
else
@@ -88,39 +88,46 @@ void handle_misaligned_store(trapframe_t* tf)
panic("Misaligned store!");
}
-static void handle_fault_fetch(trapframe_t* tf)
+static void segfault(trapframe_t* tf, uintptr_t addr, const char* type)
{
dump_tf(tf);
- panic("Faulting instruction access!");
+ const char* who = (tf->sr & SR_PS) ? "Kernel" : "User";
+ panic("%s %s segfault @ %p", who, type, addr);
+}
+
+static void handle_fault_fetch(trapframe_t* tf)
+{
+ if (handle_page_fault(tf->epc, PROT_EXEC) != 0)
+ segfault(tf, tf->epc, "fetch");
}
void handle_fault_load(trapframe_t* tf)
{
- dump_tf(tf);
- panic("Faulting load!");
+ if (handle_page_fault(tf->badvaddr, PROT_READ) != 0)
+ segfault(tf, tf->badvaddr, "load");
}
void handle_fault_store(trapframe_t* tf)
{
- dump_tf(tf);
- panic("Faulting store!");
+ if (handle_page_fault(tf->badvaddr, PROT_WRITE) != 0)
+ segfault(tf, tf->badvaddr, "store");
}
static void handle_syscall(trapframe_t* tf)
{
- setpcr(PCR_SR, SR_ET);
-
- long n = tf->gpr[16];
- sysret_t ret = syscall(tf->gpr[18], tf->gpr[19], tf->gpr[20], tf->gpr[21], n);
+ sysret_t ret = syscall(tf->gpr[18], tf->gpr[19], tf->gpr[20], tf->gpr[21],
+ tf->gpr[22], tf->gpr[23], tf->gpr[16]);
tf->gpr[16] = ret.result;
- tf->gpr[17] = ret.result == -1 ? ret.err : 0;
+ tf->gpr[21] = ret.err;
advance_pc(tf);
}
void handle_trap(trapframe_t* tf)
{
+ setpcr(PCR_SR, SR_ET);
+
typedef void (*trap_handler)(trapframe_t*);
const static trap_handler trap_handlers[] = {
diff --git a/pk/init.c b/pk/init.c
index 6ee4154..48667c3 100644
--- a/pk/init.c
+++ b/pk/init.c
@@ -3,189 +3,97 @@
#include "pcr.h"
#include "pk.h"
#include "file.h"
+#include "vm.h"
#include "frontend.h"
-#include <stdarg.h>
+#include "elf.h"
#include <stdint.h>
-#include <stdbool.h>
#include <string.h>
-static void vsprintk(char* out, const char* s, va_list vl)
-{
- bool format = false;
- bool longarg = false;
- for( ; *s; s++)
- {
- if(format)
- {
- switch(*s)
- {
- case 'l':
- longarg = true;
- break;
- case 'x':
- {
- long n = longarg ? va_arg(vl,long) : va_arg(vl,int);
- for(int i = 2*(longarg ? sizeof(long) : sizeof(int))-1; i >= 0; i--)
- {
- int d = (n >> (4*i)) & 0xF;
- *out++ = (d < 10 ? '0'+d : 'a'+d-10);
- }
- longarg = false;
- format = false;
- break;
- }
- case 'd':
- {
- long n = longarg ? va_arg(vl,long) : va_arg(vl,int);
- if(n < 0)
- {
- n = -n;
- *out++ = '-';
- }
- long digits = 1;
- for(long nn = n ; nn /= 10; digits++);
- for(int i = digits-1; i >= 0; i--)
- {
- out[i] = '0' + n%10;
- n /= 10;
- }
- out += digits;
- longarg = false;
- format = false;
- break;
- }
- case 's':
- {
- const char* s2 = va_arg(vl,const char*);
- while(*s2)
- *out++ = *s2++;
- longarg = false;
- format = false;
- break;
- }
- case 'c':
- {
- *out++ = (char)va_arg(vl,int);
- longarg = false;
- format = false;
- break;
- }
- default:
- panic("bad fmt");
- }
- }
- else if(*s == '%')
- format = true;
- else
- *out++ = *s;
- }
- *out++ = '\0';
-}
-
-void printk(const char* s, ...)
-{
- va_list vl;
- va_start(vl,s);
-
- char out[1024]; // XXX
- vsprintk(out,s,vl);
- file_write(stderr,out,strlen(out));
-
- va_end(vl);
-}
-
-void sprintk(char* out, const char* s, ...)
-{
- va_list vl;
- va_start(vl,s);
-
- vsprintk(out,s,vl);
-
- va_end(vl);
-}
-
-void dump_tf(trapframe_t* tf)
-{
- static const char* regnames[] = {
- "z ", "ra", "s0", "s1", "s2", "s3", "s4", "s5",
- "s6", "s7", "s8", "s9", "sA", "sB", "sp", "tp",
- "v0", "v1", "a0", "a1", "a2", "a3", "a4", "a5",
- "a6", "a7", "a8", "a9", "aA", "aB", "aC", "aD"
- };
-
- tf->gpr[0] = 0;
-
- for(int i = 0; i < 32; i+=4)
- {
- for(int j = 0; j < 4; j++)
- printk("%s %lx%c",regnames[i+j],tf->gpr[i+j],j < 3 ? ' ' : '\n');
- }
- printk("sr %lx pc %lx va %lx insn %x\n",tf->sr,tf->epc,tf->badvaddr,
- (uint32_t)tf->insn);
-}
+elf_info current;
void init_tf(trapframe_t* tf, long pc, long sp, int user64)
{
memset(tf,0,sizeof(*tf));
if(sizeof(void*) != 8)
kassert(!user64);
- tf->sr = (mfpcr(PCR_SR) & (SR_IM | SR_S64)) | SR_S | SR_EC;
+ tf->sr = (mfpcr(PCR_SR) & (SR_IM | SR_S64 | SR_VM)) | SR_S | SR_EC;
if(user64)
tf->sr |= SR_U64;
tf->gpr[14] = sp;
tf->epc = pc;
}
-static void bss_init()
+static void user_init()
{
- // front-end server zeroes the bss automagically
-}
+ struct args {
+ uint64_t argc;
+ uint64_t argv[];
+ };
-struct args
-{
- uint64_t argc;
- uint64_t argv[];
-};
+ const int argc_argv_size = 1024;
+ size_t stack_top = current.stack_top;
+ struct args* args = (struct args*)(stack_top - argc_argv_size);
+ populate_mapping(args, argc_argv_size, PROT_WRITE);
+ sysret_t r = frontend_syscall(SYS_getmainvars, (long)args, argc_argv_size, 0, 0);
+ kassert(r.result == 0);
-static struct args* stack_init(unsigned long* stack_top)
-{
- *stack_top -= USER_MAINVARS_SIZE;
+ // argv[0] is the proxy kernel itself. skip it.
+ args->argv[0] = args->argc - 1;
+ args = (struct args*)args->argv;
+ stack_top = (uintptr_t)args;
+
+ // load program named by argv[0]
+ current.phdr_top = stack_top;
+ load_elf((char*)args->argv[0], &current);
+
+ struct {
+ long key;
+ long value;
+ } aux[] = {
+ {AT_ENTRY, current.entry},
+ {AT_PHNUM, current.phnum},
+ {AT_PHENT, current.phent},
+ {AT_PHDR, current.phdr},
+ {AT_PAGESZ, RISCV_PGSIZE},
+ {AT_NULL, 0}
+ };
- struct args* args = (struct args*)(*stack_top - sizeof(args->argc));
- sysret_t r = frontend_syscall(SYS_getmainvars, (long)args, USER_MAINVARS_SIZE, 0, 0);
- kassert(r.result == 0);
-
- // chop off argv[0]
- args->argv[0] = args->argc-1;
- return (struct args*)args->argv;
-}
+ // place argc, argv, envp, auxp on stack
+ #define PUSH_ARG(type, value) do { \
+ *((type*)sp) = value; \
+ sp += sizeof(type); \
+ } while (0)
+
+ #define STACK_INIT(type) do { \
+ unsigned naux = sizeof(aux)/sizeof(aux[0]); \
+ stack_top -= (1 + args->argc + 1 + 1 + 2*naux) * sizeof(type); \
+ stack_top &= -16; \
+ long sp = stack_top; \
+ PUSH_ARG(type, args->argc); \
+ for (unsigned i = 0; i < args->argc; i++) \
+ PUSH_ARG(type, args->argv[i]); \
+ PUSH_ARG(type, 0); /* argv[argc] = NULL */ \
+ PUSH_ARG(type, 0); /* envp[0] = NULL */ \
+ for (unsigned i = 0; i < naux; i++) { \
+ PUSH_ARG(type, aux[i].key); \
+ PUSH_ARG(type, aux[i].value); \
+ } \
+ } while (0)
+
+ if (current.elf64)
+ STACK_INIT(uint64_t);
+ else
+ STACK_INIT(uint32_t);
-static void jump_usrstart(const char* fn, long sp)
-{
trapframe_t tf;
-
- int user64;
- long start = load_elf(fn, &user64);
+ init_tf(&tf, current.entry, stack_top, current.elf64);
__clear_cache(0, 0);
-
- init_tf(&tf, start, sp, user64);
pop_tf(&tf);
}
-uint32_t mem_mb;
-
void boot()
{
- bss_init();
file_init();
-
- // word 0 of memory contains # of MB of memory
- mem_mb = *(uint32_t*)0;
-
- unsigned long stack_top = 0x80000000;
- if (mem_mb < stack_top / (1024 * 1024))
- stack_top = mem_mb * (1024 * 1024);
-
- struct args* args = stack_init(&stack_top);
- jump_usrstart((char*)(long)args->argv[0], stack_top);
+ vm_init();
+ user_init();
}
diff --git a/pk/pcr.h b/pk/pcr.h
index cc78f2f..9fea232 100644
--- a/pk/pcr.h
+++ b/pk/pcr.h
@@ -64,6 +64,16 @@
#ifdef __riscv
+#ifdef __riscv64
+# define RISCV_PGLEVELS 3
+# define RISCV_PGSHIFT 13
+#else
+# define RISCV_PGLEVELS 2
+# define RISCV_PGSHIFT 12
+#endif
+#define RISCV_PGLEVEL_BITS 10
+#define RISCV_PGSIZE (1 << RISCV_PGSHIFT)
+
#define ASM_CR(r) _ASM_CR(r)
#define _ASM_CR(r) cr##r
@@ -85,6 +95,10 @@
asm volatile ("clearpcr %0,cr%2,%1" : "=r"(__tmp) : "i"(val), "i"(reg)); \
__tmp; })
+#define rdcycle() ({ unsigned long __tmp; \
+ asm volatile ("rdcycle %0" : "=r"(__tmp)); \
+ __tmp; })
+
#endif
#endif
diff --git a/pk/pk.h b/pk/pk.h
index 548c17e..52fc70d 100644
--- a/pk/pk.h
+++ b/pk/pk.h
@@ -3,13 +3,10 @@
#ifndef _PK_H
#define _PK_H
-#define USER_MAINVARS_SIZE 0x1000
-#define USER_START 0x10000
-
#ifndef __ASSEMBLER__
#include <stdint.h>
-#include <machine/syscall.h>
+#include <string.h>
typedef struct
{
@@ -21,8 +18,15 @@ typedef struct
long insn;
} trapframe_t;
-#define panic(s,...) do { printk(s"\n", ##__VA_ARGS__); sys_exit(-1); } while(0)
-#define kassert(cond) do { if(!(cond)) panic("assertion failed: "#cond); } while(0)
+#define panic(s,...) do { do_panic(s"\n", ##__VA_ARGS__); } while(0)
+#define kassert(cond) do { if(!(cond)) kassert_fail(""#cond); } while(0)
+void do_panic(const char* s, ...) __attribute__((noreturn));
+void kassert_fail(const char* s) __attribute__((noreturn));
+#define MAX(a, b) ((a) > (b) ? (a) : (b))
+#define MIN(a, b) ((a) < (b) ? (a) : (b))
+#define CLAMP(a, lo, hi) MIN(MAX(a, lo), hi)
+#define ROUNDUP(a, b) ((((a)-1)/(b)+1)*(b))
+#define ROUNDDOWN(a, b) ((a)/(b)*(b))
#ifdef __cplusplus
extern "C" {
@@ -48,10 +52,25 @@ void handle_fault_load(trapframe_t*);
void handle_fault_store(trapframe_t*);
void boot();
-void sys_exit(int code) __attribute__((noreturn));
-sysret_t syscall(long a0, long a1, long a2, long a3, long n);
-
-long load_elf(const char* fn, int* user64);
+typedef struct {
+ int elf64;
+ int phent;
+ int phnum;
+ size_t user_min;
+ size_t entry;
+ size_t brk_min;
+ size_t brk;
+ size_t brk_max;
+ size_t mmap_max;
+ size_t stack_bottom;
+ size_t phdr;
+ size_t phdr_top;
+ size_t stack_top;
+} elf_info;
+
+extern elf_info current;
+
+void load_elf(const char* fn, elf_info* info);
static inline void advance_pc(trapframe_t* tf)
{
diff --git a/pk/pk.mk.in b/pk/pk.mk.in
index 5a30d16..14cc461 100644
--- a/pk/pk.mk.in
+++ b/pk/pk.mk.in
@@ -12,6 +12,7 @@ pk_hdrs = \
frontend.h \
riscv-opc.h \
elf.h \
+ vm.h \
pk_c_srcs = \
init.c \
@@ -22,6 +23,8 @@ pk_c_srcs = \
fp.c \
int.c \
elf.c \
+ console.c \
+ vm.c \
pk_asm_srcs = \
entry.S \
diff --git a/pk/syscall.c b/pk/syscall.c
index 711bb9e..1d3940c 100644
--- a/pk/syscall.c
+++ b/pk/syscall.c
@@ -1,44 +1,53 @@
// See LICENSE for license details.
-#include <machine/syscall.h>
-#include <string.h>
-#include <errno.h>
+#include "syscall.h"
#include "pk.h"
#include "pcr.h"
#include "file.h"
#include "frontend.h"
+#include "vm.h"
+#include <string.h>
+#include <errno.h>
-typedef sysret_t (*syscall_t)(long,long,long,long,long);
+typedef sysret_t (*syscall_t)(long, long, long, long, long, long, long);
void sys_exit(int code)
{
- frontend_syscall(SYS_exit,code,0,0,0);
- panic("exit didn't exit!");
+ frontend_syscall(SYS_exit, code, 0, 0, 0);
+ while (1);
}
sysret_t sys_read(int fd, char* buf, size_t n)
{
sysret_t r = {-1,EBADF};
file_t* f = file_get(fd);
- if(!f)
- return r;
- return file_read(f,buf,n);
+ if (f)
+ {
+ r = file_read(f, buf, n);
+ file_decref(f);
+ }
+
+ return r;
}
sysret_t sys_write(int fd, const char* buf, size_t n)
{
sysret_t r = {-1,EBADF};
file_t* f = file_get(fd);
- if(!f)
- return r;
- return file_write(f,buf,n);
+ if (f)
+ {
+ r = file_write(f, buf, n);
+ file_decref(f);
+ }
+
+ return r;
}
-sysret_t sys_open(const char* name, size_t len, int flags, int mode)
+sysret_t sys_open(const char* name, int flags, int mode)
{
- sysret_t ret = file_open(name, len, flags, mode);
+ sysret_t ret = file_open(name, flags, mode);
if(ret.result == -1)
return ret;
@@ -50,62 +59,98 @@ sysret_t sys_open(const char* name, size_t len, int flags, int mode)
sysret_t sys_close(int fd)
{
- return (sysret_t){fd_close(fd),EBADF};
+ int ret = fd_close(fd);
+ return (sysret_t){ret, ret & EBADF};
}
sysret_t sys_fstat(int fd, void* st)
{
sysret_t r = {-1,EBADF};
file_t* f = file_get(fd);
- if(!f)
- return r;
- return file_stat(f,st);
+ if (f)
+ {
+ r = file_stat(f, st);
+ file_decref(f);
+ }
+
+ return r;
}
sysret_t sys_lseek(int fd, size_t ptr, int dir)
{
sysret_t r = {-1,EBADF};
file_t* f = file_get(fd);
- if(!f)
- return r;
- return file_lseek(f,ptr,dir);
+ if (f)
+ {
+ r = file_lseek(f, ptr, dir);
+ file_decref(f);
+ }
+
+ return r;
}
-sysret_t sys_stat(const char* name, size_t len, void* st)
+sysret_t sys_stat(const char* name, void* st)
{
- return frontend_syscall(SYS_stat,(long)name,len,(long)st,0);
+ size_t name_size = strlen(name)+1;
+ populate_mapping(st, sizeof(struct stat), PROT_WRITE);
+ return frontend_syscall(SYS_stat, (uintptr_t)name, name_size, (uintptr_t)st, 0);
}
-sysret_t sys_lstat(const char* name, size_t len, void* st)
+sysret_t sys_lstat(const char* name, void* st)
{
- return frontend_syscall(SYS_lstat,(long)name,len,(long)st,0);
+ size_t name_size = strlen(name)+1;
+ populate_mapping(st, sizeof(struct stat), PROT_WRITE);
+ return frontend_syscall(SYS_lstat, (uintptr_t)name, name_size, (uintptr_t)st, 0);
}
-sysret_t sys_link(const char* old_name, size_t old_len,
- const char* new_name, size_t new_len)
+sysret_t sys_link(const char* old_name, const char* new_name)
{
- return frontend_syscall(SYS_link,(long)old_name,old_len,
- (long)new_name,new_len);
+ size_t old_size = strlen(old_name)+1;
+ size_t new_size = strlen(new_name)+1;
+ return frontend_syscall(SYS_link, (uintptr_t)old_name, old_size,
+ (uintptr_t)new_name, new_size);
}
sysret_t sys_unlink(const char* name, size_t len)
{
- return frontend_syscall(SYS_unlink,(long)name,len,0,0);
+ size_t name_size = strlen(name)+1;
+ return frontend_syscall(SYS_unlink, (uintptr_t)name, name_size, 0, 0);
}
sysret_t sys_brk(size_t pos)
{
- if(pos / (1024 * 1024) >= mem_mb)
- return (sysret_t){-1, ENOMEM};
+ return do_brk(pos);
+}
+
+sysret_t sys_uname(void* buf)
+{
+ const int sz = 65;
+ strcpy(buf + 0*sz, "Proxy Kernel");
+ strcpy(buf + 1*sz, "");
+ strcpy(buf + 2*sz, "3.4.5");
+ strcpy(buf + 3*sz, "");
+ strcpy(buf + 4*sz, "");
+ strcpy(buf + 5*sz, "");
return (sysret_t){0,0};
}
-sysret_t syscall(long a0, long a1, long a2, long a3, long n)
+sysret_t sys_getuid()
+{
+ return (sysret_t){0,0};
+}
+
+sysret_t sys_mmap(uintptr_t addr, size_t length, int prot, int flags, int fd, off_t offset)
+{
+ return do_mmap(addr, length, prot, flags, fd, offset);
+}
+
+sysret_t syscall(long a0, long a1, long a2, long a3, long a4, long a5, long n)
{
const static void* syscall_table[] = {
[SYS_exit] = sys_exit,
+ [SYS_exit_group] = sys_exit,
[SYS_read] = sys_read,
[SYS_write] = sys_write,
[SYS_open] = sys_open,
@@ -117,10 +162,18 @@ sysret_t syscall(long a0, long a1, long a2, long a3, long n)
[SYS_link] = sys_link,
[SYS_unlink] = sys_unlink,
[SYS_brk] = sys_brk,
+ [SYS_uname] = sys_uname,
+ [SYS_getuid] = sys_getuid,
+ [SYS_geteuid] = sys_getuid,
+ [SYS_getgid] = sys_getuid,
+ [SYS_getegid] = sys_getuid,
+ [SYS_mmap] = sys_mmap,
};
if(n >= ARRAY_SIZE(syscall_table) || !syscall_table[n])
panic("bad syscall #%ld!",n);
- return ((syscall_t)syscall_table[n])(a0, a1, a2, a3, n);
+ sysret_t r = ((syscall_t)syscall_table[n])(a0, a1, a2, a3, a4, a5, n);
+ printk("syscall %d %x %x %x = %x\n", n, a0, a1, a2, r.result);
+ return r;
}
diff --git a/pk/syscall.h b/pk/syscall.h
index d39a6a3..f759e83 100644
--- a/pk/syscall.h
+++ b/pk/syscall.h
@@ -1,4 +1,6 @@
// See LICENSE for license details.
-
#include <machine/syscall.h>
+
+void sys_exit(int code) __attribute__((noreturn));
+sysret_t syscall(long a0, long a1, long a2, long a3, long a4, long a5, long n);
diff --git a/pk/vm.c b/pk/vm.c
new file mode 100644
index 0000000..bb95d09
--- /dev/null
+++ b/pk/vm.c
@@ -0,0 +1,392 @@
+#include "vm.h"
+#include "file.h"
+#include "atomic.h"
+#include "pcr.h"
+#include "pk.h"
+#include <stdint.h>
+#include <errno.h>
+
+typedef struct {
+ uintptr_t addr;
+ size_t length;
+ file_t* file;
+ size_t offset;
+ size_t refcnt;
+ int prot;
+} vmr_t;
+
+#define MAX_VMR 32
+spinlock_t vm_lock = SPINLOCK_INIT;
+static vmr_t vmrs[MAX_VMR];
+
+typedef uintptr_t pte_t;
+static pte_t* root_page_table;
+static uintptr_t first_free_page;
+static size_t next_free_page;
+static size_t free_pages;
+static int have_vm;
+
+static uintptr_t __page_alloc()
+{
+ if (next_free_page == free_pages)
+ return 0;
+ uintptr_t addr = first_free_page + RISCV_PGSIZE * next_free_page++;
+ memset((void*)addr, 0, RISCV_PGSIZE);
+ return addr;
+}
+
+static vmr_t* __vmr_alloc(uintptr_t addr, size_t length, file_t* file,
+ size_t offset, size_t refcnt, int prot)
+{
+ for (vmr_t* v = vmrs; v < vmrs + MAX_VMR; v++)
+ {
+ if (v->refcnt == 0)
+ {
+ v->addr = addr;
+ v->length = length;
+ v->file = file;
+ v->offset = offset;
+ v->refcnt = refcnt;
+ v->prot = prot;
+ return v;
+ }
+ }
+ return NULL;
+}
+
+static void __vmr_decref(vmr_t* v, size_t dec)
+{
+ if ((v->refcnt -= dec) == 0)
+ {
+ if (v->file)
+ file_decref(v->file);
+ }
+}
+
+static int pte_valid(pte_t pte)
+{
+ return pte & 2;
+}
+
+static size_t pte_ppn(pte_t pte)
+{
+ return pte >> RISCV_PGSHIFT;
+}
+
+static int ptd_valid(pte_t pte)
+{
+ return pte & 1;
+}
+
+static pte_t ptd_create(uintptr_t ppn)
+{
+ return ppn << RISCV_PGSHIFT | 1;
+}
+
+static uintptr_t ppn(uintptr_t addr)
+{
+ return addr >> RISCV_PGSHIFT;
+}
+
+static size_t pt_idx(uintptr_t addr, int level)
+{
+ size_t idx = addr >> (RISCV_PGLEVEL_BITS*level + RISCV_PGSHIFT);
+ return idx & ((1 << RISCV_PGLEVEL_BITS) - 1);
+}
+
+static int prot2perm[] = {
+ [0] = 0,
+ [PROT_READ] = 4,
+ [PROT_WRITE] = 2,
+ [PROT_WRITE|PROT_READ] = 6,
+ [PROT_EXEC] = 1,
+ [PROT_EXEC|PROT_READ] = 5,
+ [PROT_EXEC|PROT_WRITE] = 3,
+ [PROT_EXEC|PROT_WRITE|PROT_READ] = 7
+};
+
+static pte_t super_pte_create(uintptr_t ppn, int kprot, int uprot, int level)
+{
+ int perm = prot2perm[kprot&7] << 7 | prot2perm[uprot&7] << 4 | 2;
+ return (ppn << (RISCV_PGLEVEL_BITS*level + RISCV_PGSHIFT)) | perm;
+}
+
+static pte_t pte_create(uintptr_t ppn, int kprot, int uprot)
+{
+ return super_pte_create(ppn, kprot, uprot, 0);
+}
+
+static __attribute__((always_inline)) pte_t* __walk_internal(uintptr_t addr, int create)
+{
+ const size_t pte_per_page = RISCV_PGSIZE/sizeof(void*);
+ pte_t* t = root_page_table;
+
+ for (unsigned i = RISCV_PGLEVELS-1; i > 0; i--)
+ {
+ size_t idx = pt_idx(addr, i);
+ kassert(!pte_valid(t[idx]));
+ if (!ptd_valid(t[idx]))
+ {
+ if (!create)
+ return 0;
+ uintptr_t page = __page_alloc();
+ if (page == 0)
+ return 0;
+ t[idx] = ptd_create(ppn(page));
+ }
+ t = (pte_t*)(pte_ppn(t[idx]) << RISCV_PGSHIFT);
+ }
+ return &t[pt_idx(addr, 0)];
+}
+
+static pte_t* __walk(uintptr_t addr)
+{
+ return __walk_internal(addr, 0);
+}
+
+static pte_t* __walk_create(uintptr_t addr)
+{
+ return __walk_internal(addr, 1);
+}
+
+static int __va_avail(uintptr_t vaddr)
+{
+ pte_t* pte = __walk(vaddr);
+ return pte == 0 || *pte == 0;
+}
+
+static uintptr_t __vm_alloc(size_t npage)
+{
+ uintptr_t start = current.brk, end = current.mmap_max - npage*RISCV_PGSIZE;
+ for (uintptr_t a = start; a <= end; a += RISCV_PGSIZE)
+ {
+ if (!__va_avail(a))
+ continue;
+ uintptr_t first = a, last = a + (npage-1) * RISCV_PGSIZE;
+ for (a = last; a > first && __va_avail(a); a -= RISCV_PGSIZE)
+ ;
+ if (a > first)
+ continue;
+ return a;
+ }
+ return 0;
+}
+
+static void flush_tlb()
+{
+ mtpcr(PCR_PTBR, mfpcr(PCR_PTBR));
+}
+
+static int __handle_page_fault(uintptr_t vaddr, int prot)
+{
+ uintptr_t vpn = vaddr >> RISCV_PGSHIFT;
+ vaddr = vpn << RISCV_PGSHIFT;
+
+ pte_t* pte = __walk(vaddr);
+
+ if (pte == 0 || *pte == 0)
+ return -1;
+ else if (!pte_valid(*pte))
+ {
+ kassert(vaddr < current.stack_top && vaddr >= current.user_min);
+ uintptr_t ppn = vpn;
+
+ vmr_t* v = (vmr_t*)*pte;
+ *pte = pte_create(ppn, PROT_READ|PROT_WRITE, 0);
+ if (v->file)
+ {
+ size_t flen = MIN(RISCV_PGSIZE, v->length - (vaddr - v->addr));
+ kassert(flen == file_pread(v->file, (void*)vaddr, flen, vaddr - v->addr + v->offset).result);
+ if (flen < RISCV_PGSIZE)
+ memset((void*)vaddr + flen, 0, RISCV_PGSIZE - flen);
+ }
+ else
+ memset((void*)vaddr, 0, RISCV_PGSIZE);
+ *pte = pte_create(ppn, v->prot, v->prot);
+ }
+
+ pte_t perms = pte_create(0, prot, prot);
+ if ((*pte & perms) != perms)
+ return -1;
+
+ flush_tlb();
+ return 0;
+}
+
+int handle_page_fault(uintptr_t vaddr, int prot)
+{
+ spinlock_lock(&vm_lock);
+ int ret = __handle_page_fault(vaddr, prot);
+ spinlock_unlock(&vm_lock);
+ return ret;
+}
+
+uintptr_t __do_mmap(uintptr_t addr, size_t length, int prot, int flags, file_t* f, off_t offset)
+{
+ size_t npage = (length-1)/RISCV_PGSIZE+1;
+ vmr_t* v = __vmr_alloc(addr, length, f, offset, npage, prot);
+ if (!v)
+ goto fail_vmr;
+
+ if (flags & MAP_FIXED)
+ {
+ if ((addr & (RISCV_PGSIZE-1)) || addr < current.user_min ||
+ addr + length > current.stack_top || addr + length < addr)
+ goto fail_vma;
+ }
+ else if ((addr = __vm_alloc(npage)) == 0)
+ goto fail_vma;
+
+ for (uintptr_t a = addr; a < addr + length; a += RISCV_PGSIZE)
+ {
+ pte_t* pte = __walk_create(a);
+ kassert(pte);
+
+ if (*pte)
+ kassert(*pte == 0); // TODO __do_munmap
+
+ *pte = (pte_t)v;
+ }
+
+ if (!have_vm || (flags & MAP_POPULATE))
+ for (uintptr_t a = addr; a < addr + length; a += RISCV_PGSIZE)
+ kassert(__handle_page_fault(a, prot) == 0);
+
+ if (f) file_incref(f);
+
+ return addr;
+
+fail_vma:
+ __vmr_decref(v, npage);
+fail_vmr:
+ return (uintptr_t)-1;
+}
+
+sysret_t do_mmap(uintptr_t addr, size_t length, int prot, int flags, int fd, off_t offset)
+{
+ if (!(flags & MAP_PRIVATE) || length == 0 || (offset & (RISCV_PGSIZE-1)))
+ return (sysret_t){-1, EINVAL};
+
+ file_t* f = NULL;
+ if (!(flags & MAP_ANONYMOUS) && (f = file_get(fd)) == NULL)
+ return (sysret_t){-1, EBADF};
+
+ spinlock_lock(&vm_lock);
+ addr = __do_mmap(addr, length, prot, flags, f, offset);
+ if (addr < current.brk_max)
+ current.brk_max = addr;
+ spinlock_unlock(&vm_lock);
+
+ if (f) file_decref(f);
+ return (sysret_t){addr, 0};
+}
+
+size_t __do_brk(size_t addr)
+{
+ size_t newbrk = addr;
+ if (addr < current.brk_min)
+ newbrk = current.brk_min;
+ else if (addr > current.brk_max)
+ newbrk = current.brk_max;
+
+ if (current.brk == 0)
+ current.brk = ROUNDUP(current.brk_min, RISCV_PGSIZE);
+
+ size_t newbrk_page = ROUNDUP(newbrk, RISCV_PGSIZE);
+ if (current.brk > newbrk_page)
+ kassert(0); // TODO __do_munmap
+ else if (current.brk < newbrk_page)
+ kassert(__do_mmap(current.brk, newbrk_page - current.brk, -1, MAP_FIXED|MAP_PRIVATE|MAP_ANONYMOUS, 0, 0) == current.brk);
+ current.brk = newbrk_page;
+
+ return newbrk;
+}
+
+sysret_t do_brk(size_t addr)
+{
+ spinlock_lock(&vm_lock);
+ addr = __do_brk(addr);
+ spinlock_unlock(&vm_lock);
+
+ return (sysret_t){addr, 0};
+}
+
+static void __map_kernel_range(uintptr_t paddr, size_t len, int prot)
+{
+ pte_t perms = pte_create(0, prot, 0);
+ for (uintptr_t a = paddr; a < paddr + len; a += RISCV_PGSIZE)
+ {
+ pte_t* pte = __walk_create(a);
+ kassert(pte);
+ *pte = a | perms;
+ }
+}
+
+void populate_mapping(const void* start, size_t size, int prot)
+{
+ uintptr_t a0 = ROUNDDOWN((uintptr_t)start, RISCV_PGSIZE);
+ for (uintptr_t a = a0; a < (uintptr_t)start+size; a += RISCV_PGSIZE)
+ {
+ atomic_t* atom = (atomic_t*)(a & -sizeof(atomic_t));
+ if (prot & PROT_WRITE)
+ atomic_add(atom, 0);
+ else
+ atomic_read(atom);
+ }
+}
+
+void vm_init()
+{
+ extern char _end;
+ current.user_min = ROUNDUP((uintptr_t)&_end, RISCV_PGSIZE);
+ current.brk_min = current.user_min;
+ current.brk = 0;
+
+ uint32_t mem_mb = *(volatile uint32_t*)0;
+
+ if (mem_mb == 0)
+ {
+ current.stack_bottom = 0;
+ current.stack_top = 0;
+ current.brk_max = 0;
+ current.mmap_max = 0;
+ }
+ else
+ {
+ uintptr_t max_addr = (uintptr_t)mem_mb << 20;
+ size_t mem_pages = max_addr >> RISCV_PGSHIFT;
+ const size_t min_free_pages = 2*RISCV_PGLEVELS;
+ const size_t min_stack_pages = 8;
+ const size_t max_stack_pages = 128;
+ kassert(mem_pages > min_free_pages + min_stack_pages);
+ free_pages = MAX(mem_pages >> (RISCV_PGLEVEL_BITS-1), min_free_pages);
+ size_t stack_pages = CLAMP(mem_pages/32, min_stack_pages, max_stack_pages);
+ first_free_page = max_addr - free_pages * RISCV_PGSIZE;
+
+ uintptr_t root_page_table_paddr = __page_alloc();
+ kassert(root_page_table_paddr);
+ root_page_table = (pte_t*)root_page_table_paddr;
+
+ __map_kernel_range(0, current.user_min, PROT_READ|PROT_WRITE|PROT_EXEC);
+
+ mtpcr(PCR_PTBR, root_page_table_paddr);
+ setpcr(PCR_SR, SR_VM);
+ have_vm = mfpcr(PCR_SR) & SR_VM;
+ clearpcr(PCR_SR, SR_VM);
+
+ size_t stack_size = RISCV_PGSIZE * stack_pages;
+ current.stack_top = first_free_page;
+ uintptr_t stack_bot = current.stack_top - stack_size;
+
+ if (have_vm)
+ {
+ __map_kernel_range(first_free_page, free_pages * RISCV_PGSIZE, PROT_READ|PROT_WRITE);
+ kassert(__do_mmap(stack_bot, stack_size, -1, MAP_FIXED|MAP_PRIVATE|MAP_ANONYMOUS, 0, 0) == stack_bot);
+ setpcr(PCR_SR, SR_VM);
+ }
+
+ current.stack_bottom = stack_bot;
+ stack_bot -= RISCV_PGSIZE; // guard page
+ current.mmap_max = current.brk_max = stack_bot;
+ }
+}
diff --git a/pk/vm.h b/pk/vm.h
new file mode 100644
index 0000000..349d9ef
--- /dev/null
+++ b/pk/vm.h
@@ -0,0 +1,26 @@
+#ifndef _VM_H
+#define _VM_H
+
+#include "syscall.h"
+#include "file.h"
+#include <string.h>
+#include <stdint.h>
+#include <sys/types.h>
+
+#define PROT_READ 1
+#define PROT_WRITE 2
+#define PROT_EXEC 4
+
+#define MAP_PRIVATE 0x2
+#define MAP_FIXED 0x10
+#define MAP_ANONYMOUS 0x20
+#define MAP_POPULATE 0x8000
+
+void vm_init();
+int handle_page_fault(uintptr_t vaddr, int prot);
+void populate_mapping(const void* start, size_t size, int prot);
+uintptr_t __do_mmap(uintptr_t addr, size_t length, int prot, int flags, file_t* file, off_t offset);
+sysret_t do_mmap(uintptr_t addr, size_t length, int prot, int flags, int fd, off_t offset);
+sysret_t do_brk(uintptr_t addr);
+
+#endif