diff options
author | Andrew Waterman <waterman@cs.berkeley.edu> | 2015-03-12 17:38:04 -0700 |
---|---|---|
committer | Andrew Waterman <waterman@cs.berkeley.edu> | 2015-03-12 17:38:04 -0700 |
commit | 6517fe26a2a0c89c3112f4a383c601572c71d64a (patch) | |
tree | d37eea7ae6f3e15eee94afb5c9c749a4cd800577 /pk/vm.c | |
parent | a4ae7da6ef0c09c2616a0b82f7f569e4e134f75c (diff) | |
download | pk-6517fe26a2a0c89c3112f4a383c601572c71d64a.zip pk-6517fe26a2a0c89c3112f4a383c601572c71d64a.tar.gz pk-6517fe26a2a0c89c3112f4a383c601572c71d64a.tar.bz2 |
Update to new privileged spec
Diffstat (limited to 'pk/vm.c')
-rw-r--r-- | pk/vm.c | 226 |
1 files changed, 130 insertions, 96 deletions
@@ -10,7 +10,7 @@ typedef struct { size_t length; file_t* file; size_t offset; - size_t refcnt; + unsigned refcnt; int prot; } vmr_t; @@ -26,20 +26,21 @@ static size_t free_pages; static uintptr_t __page_alloc() { - if (next_free_page == free_pages) - return 0; + kassert(next_free_page != free_pages); uintptr_t addr = first_free_page + RISCV_PGSIZE * next_free_page++; memset((void*)addr, 0, RISCV_PGSIZE); return addr; } static vmr_t* __vmr_alloc(uintptr_t addr, size_t length, file_t* file, - size_t offset, size_t refcnt, int prot) + size_t offset, unsigned refcnt, int prot) { for (vmr_t* v = vmrs; v < vmrs + MAX_VMR; v++) { if (v->refcnt == 0) { + if (file) + file_incref(file); v->addr = addr; v->length = length; v->file = file; @@ -52,7 +53,7 @@ static vmr_t* __vmr_alloc(uintptr_t addr, size_t length, file_t* file, return NULL; } -static void __vmr_decref(vmr_t* v, size_t dec) +static void __vmr_decref(vmr_t* v, unsigned dec) { if ((v->refcnt -= dec) == 0) { @@ -95,9 +96,18 @@ static pte_t pte_create(uintptr_t ppn, int kprot, int uprot) return super_pte_create(ppn, kprot, uprot, 0); } -static __attribute__((always_inline)) pte_t* __walk_internal(uintptr_t addr, int create) +static void __maybe_create_root_page_table() +{ + if (root_page_table) + return; + root_page_table = (void*)__page_alloc(); + if (have_vm) + write_csr(sptbr, root_page_table); +} +static pte_t* __walk_internal(uintptr_t addr, int create) { const size_t pte_per_page = RISCV_PGSIZE/sizeof(void*); + __maybe_create_root_page_table(); pte_t* t = root_page_table; for (unsigned i = RISCV_PGLEVELS-1; i > 0; i--) @@ -108,8 +118,6 @@ static __attribute__((always_inline)) pte_t* __walk_internal(uintptr_t addr, int if (!create) return 0; uintptr_t page = __page_alloc(); - if (page == 0) - return 0; t[idx] = ptd_create(ppn(page)); } else @@ -138,16 +146,15 @@ static int __va_avail(uintptr_t vaddr) static uintptr_t __vm_alloc(size_t npage) { uintptr_t start = current.brk, end = current.mmap_max - npage*RISCV_PGSIZE; - for (uintptr_t a = start; a <= end; a += RISCV_PGSIZE) + for (uintptr_t a = end; a >= start; a -= RISCV_PGSIZE) { if (!__va_avail(a)) continue; - uintptr_t first = a, last = a + (npage-1) * RISCV_PGSIZE; - for (a = last; a > first && __va_avail(a); a -= RISCV_PGSIZE) + uintptr_t last = a, first = a - (npage-1) * RISCV_PGSIZE; + for (a = first; a < last && __va_avail(a); a += RISCV_PGSIZE) ; - if (a > first) - continue; - return a; + if (a >= last) + return a; } return 0; } @@ -157,6 +164,13 @@ static void flush_tlb() asm volatile("sfence.vm"); } +int __valid_user_range(uintptr_t vaddr, size_t len) +{ + if (vaddr + len < vaddr) + return 0; + return vaddr >= current.first_free_paddr && vaddr + len <= current.mmap_max; +} + static int __handle_page_fault(uintptr_t vaddr, int prot) { uintptr_t vpn = vaddr >> RISCV_PGSHIFT; @@ -168,7 +182,7 @@ static int __handle_page_fault(uintptr_t vaddr, int prot) return -1; else if (!(*pte & PTE_V)) { - kassert(vaddr < current.stack_top && vaddr >= current.user_min); + kassert(__valid_user_range(vaddr, 1)); uintptr_t ppn = vpn; vmr_t* v = (vmr_t*)*pte; @@ -225,8 +239,7 @@ uintptr_t __do_mmap(uintptr_t addr, size_t length, int prot, int flags, file_t* size_t npage = (length-1)/RISCV_PGSIZE+1; if (flags & MAP_FIXED) { - if ((addr & (RISCV_PGSIZE-1)) || addr < current.user_min || - addr + length > current.stack_top || addr + length < addr) + if ((addr & (RISCV_PGSIZE-1)) || !__valid_user_range(addr, length)) return (uintptr_t)-1; } else if ((addr = __vm_alloc(npage)) == 0) @@ -247,19 +260,19 @@ uintptr_t __do_mmap(uintptr_t addr, size_t length, int prot, int flags, file_t* *pte = (pte_t)v; } - if (f) file_incref(f); - if (!have_vm || (flags & MAP_POPULATE)) for (uintptr_t a = addr; a < addr + length; a += RISCV_PGSIZE) kassert(__handle_page_fault(a, prot) == 0); + if (current.brk_min != 0 && addr < current.brk_max) + current.brk_max = ROUNDUP(addr + length, RISCV_PGSIZE); + return addr; } int do_munmap(uintptr_t addr, size_t length) { - if ((addr & (RISCV_PGSIZE-1)) || addr < current.user_min || - addr + length > current.stack_top || addr + length < addr) + if ((addr & (RISCV_PGSIZE-1)) || !__valid_user_range(addr, length)) return -EINVAL; spinlock_lock(&vm_lock); @@ -280,8 +293,6 @@ uintptr_t do_mmap(uintptr_t addr, size_t length, int prot, int flags, int fd, of spinlock_lock(&vm_lock); addr = __do_mmap(addr, length, prot, flags, f, offset); - if (addr < current.brk_max) - current.brk_max = addr; spinlock_unlock(&vm_lock); if (f) file_decref(f); @@ -318,29 +329,34 @@ uintptr_t do_brk(size_t addr) return addr; } +uintptr_t __do_mremap(uintptr_t addr, size_t old_size, size_t new_size, int flags) +{ + for (size_t i = 0; i < MAX_VMR; i++) + { + if (vmrs[i].refcnt && addr == vmrs[i].addr && old_size == vmrs[i].length) + { + size_t old_npage = (vmrs[i].length-1)/RISCV_PGSIZE+1; + size_t new_npage = (new_size-1)/RISCV_PGSIZE+1; + if (new_size < old_size) + __do_munmap(addr + new_size, old_size - new_size); + else if (new_size > old_size) + __do_mmap(addr + old_size, new_size - old_size, vmrs[i].prot, 0, + vmrs[i].file, vmrs[i].offset + new_size - old_size); + __vmr_decref(&vmrs[i], old_npage - new_npage); + return addr; + } + } + return -1; +} + uintptr_t do_mremap(uintptr_t addr, size_t old_size, size_t new_size, int flags) { - uintptr_t res = -1; if (((addr | old_size | new_size) & (RISCV_PGSIZE-1)) || (flags & MREMAP_FIXED)) return -EINVAL; spinlock_lock(&vm_lock); - for (size_t i = 0; i < MAX_VMR; i++) - { - if (vmrs[i].refcnt && addr == vmrs[i].addr && old_size == vmrs[i].length) - { - size_t old_npage = (vmrs[i].length-1)/RISCV_PGSIZE+1; - size_t new_npage = (new_size-1)/RISCV_PGSIZE+1; - if (new_size < old_size) - __do_munmap(addr + new_size, old_size - new_size); - else if (new_size > old_size) - __do_mmap(addr + old_size, new_size - old_size, vmrs[i].prot, 0, - vmrs[i].file, vmrs[i].offset + new_size - old_size); - __vmr_decref(&vmrs[i], old_npage - new_npage); - res = addr; - } - } + uintptr_t res = __do_mremap(addr, old_size, new_size, flags); spinlock_unlock(&vm_lock); return res; @@ -385,14 +401,15 @@ uintptr_t do_mprotect(uintptr_t addr, size_t length, int prot) return res; } -static void __map_kernel_range(uintptr_t paddr, size_t len, int prot) +void __map_kernel_range(uintptr_t vaddr, uintptr_t paddr, size_t len, int prot) { + uintptr_t n = ROUNDUP(len, RISCV_PGSIZE) / RISCV_PGSIZE; pte_t perms = pte_create(0, prot, 0); - for (uintptr_t a = paddr; a < paddr + len; a += RISCV_PGSIZE) + for (uintptr_t a = vaddr, i = 0; i < n; i++, a += RISCV_PGSIZE) { pte_t* pte = __walk_create(a); kassert(pte); - *pte = a | perms; + *pte = (a - vaddr + paddr) | perms; } } @@ -401,71 +418,88 @@ void populate_mapping(const void* start, size_t size, int prot) uintptr_t a0 = ROUNDDOWN((uintptr_t)start, RISCV_PGSIZE); for (uintptr_t a = a0; a < (uintptr_t)start+size; a += RISCV_PGSIZE) { - atomic_t* atom = (atomic_t*)(a & -sizeof(atomic_t)); if (prot & PROT_WRITE) - atomic_add(atom, 0); + atomic_add((int*)a, 0); else - atomic_read(atom); + atomic_read((int*)a); } } -void vm_init() +static uintptr_t sbi_top_paddr() { extern char _end; - current.user_min = ROUNDUP((uintptr_t)&_end, RISCV_PGSIZE); - current.brk_min = current.user_min; - current.brk = 0; + return ROUNDUP((uintptr_t)&_end, RISCV_PGSIZE); +} - uint32_t mem_mb = *(volatile uint32_t*)0; +#define first_free_paddr() (sbi_top_paddr() + RISCV_PGSIZE /* boot stack */) - if (mem_mb == 0) - { - current.stack_bottom = 0; - current.stack_top = 0; - current.brk_max = 0; - current.mmap_max = 0; - } - else - { - uintptr_t max_addr = (uintptr_t)mem_mb << 20; - size_t mem_pages = max_addr >> RISCV_PGSHIFT; - const size_t min_free_pages = 2*RISCV_PGLEVELS; - const size_t min_stack_pages = 8; - const size_t max_stack_pages = 1024; - kassert(mem_pages > min_free_pages + min_stack_pages); - free_pages = MAX(mem_pages >> (RISCV_PGLEVEL_BITS-1), min_free_pages); - size_t stack_pages = CLAMP(mem_pages/32, min_stack_pages, max_stack_pages); - first_free_page = max_addr - free_pages * RISCV_PGSIZE; - - uintptr_t root_page_table_paddr = __page_alloc(); - kassert(root_page_table_paddr); - root_page_table = (pte_t*)root_page_table_paddr; - - __map_kernel_range(0, current.user_min, PROT_READ|PROT_WRITE|PROT_EXEC); - - int vm_field = sizeof(long) == 4 ? VM_SV32 : VM_SV43; - if (have_vm) - { -#if 0 - write_csr(sptbr, root_page_table_paddr); - set_csr(mstatus, vm_field << __builtin_ctz(MSTATUS_VM)); -#endif - have_vm = (clear_csr(mstatus, MSTATUS_VM) & MSTATUS_VM) != VM_MBARE; - } +void vm_init() +{ + current.first_free_paddr = first_free_paddr(); - size_t stack_size = RISCV_PGSIZE * stack_pages; - current.stack_top = MIN(first_free_page, 0x80000000); // for RV32 sanity - uintptr_t stack_bot = current.stack_top - stack_size; + size_t mem_pages = mem_size >> RISCV_PGSHIFT; + free_pages = MAX(8, mem_pages >> (RISCV_PGLEVEL_BITS-1)); + first_free_page = mem_size - free_pages * RISCV_PGSIZE; + current.mmap_max = current.brk_max = first_free_page; +} - if (have_vm) - { - __map_kernel_range(first_free_page, free_pages * RISCV_PGSIZE, PROT_READ|PROT_WRITE); - kassert(__do_mmap(stack_bot, stack_size, -1, MAP_FIXED|MAP_PRIVATE|MAP_ANONYMOUS, 0, 0) == stack_bot); - set_csr(mstatus, vm_field); - } +void supervisor_vm_init() +{ + uintptr_t highest_va = -current.first_free_paddr; + mem_size = MIN(mem_size, highest_va - current.first_user_vaddr) & -SUPERPAGE_SIZE; + + pte_t* sbi_pt = (pte_t*)(current.first_vaddr_after_user + current.bias); + memset(sbi_pt, 0, RISCV_PGSIZE); + pte_t* middle_pt = (void*)sbi_pt + RISCV_PGSIZE; +#if RISCV_PGLEVELS == 2 + root_page_table = middle_pt; +#elif RISCV_PGLEVELS == 3 + kassert(current.first_user_vaddr >= -(SUPERPAGE_SIZE << RISCV_PGLEVEL_BITS)); + root_page_table = (void*)middle_pt + RISCV_PGSIZE; + memset(root_page_table, 0, RISCV_PGSIZE); + root_page_table[(1<<RISCV_PGLEVEL_BITS)-1] = (uintptr_t)middle_pt | PTE_T | PTE_V; +#else +#error +#endif + write_csr(sptbr, root_page_table); - current.stack_bottom = stack_bot; - stack_bot -= RISCV_PGSIZE; // guard page - current.mmap_max = current.brk_max = stack_bot; + for (uintptr_t vaddr = current.first_user_vaddr, paddr = vaddr + current.bias, end = current.first_vaddr_after_user; + paddr < mem_size; vaddr += SUPERPAGE_SIZE, paddr += SUPERPAGE_SIZE) { + int l2_shift = RISCV_PGLEVEL_BITS + RISCV_PGSHIFT; + int l2_idx = (vaddr >> l2_shift) & ((1 << RISCV_PGLEVEL_BITS)-1); + middle_pt[l2_idx] = paddr | PTE_V | PTE_G | PTE_SR | PTE_SW | PTE_SX; } + current.first_vaddr_after_user += (void*)root_page_table + RISCV_PGSIZE - (void*)sbi_pt; + + // map SBI at top of vaddr space + uintptr_t num_sbi_pages = sbi_top_paddr() / RISCV_PGSIZE; + for (uintptr_t i = 0; i < num_sbi_pages; i++) { + uintptr_t idx = (1 << RISCV_PGLEVEL_BITS) - num_sbi_pages + i; + sbi_pt[idx] = (i * RISCV_PGSIZE) | PTE_V | PTE_G | PTE_SR | PTE_SX; + } + pte_t* sbi_pte = middle_pt + ((1 << RISCV_PGLEVEL_BITS)-1); + kassert(!*sbi_pte); + *sbi_pte = (uintptr_t)sbi_pt | PTE_T | PTE_V; + + // disable our allocator + kassert(next_free_page == 0); + free_pages = 0; + + flush_tlb(); +} + +void pk_vm_init() +{ + __map_kernel_range(0, 0, current.first_free_paddr, PROT_READ|PROT_WRITE|PROT_EXEC); + __map_kernel_range(first_free_page, first_free_page, free_pages * RISCV_PGSIZE, PROT_READ|PROT_WRITE); + + extern char trap_entry; + write_csr(stvec, &trap_entry); + write_csr(sscratch, __page_alloc() + RISCV_PGSIZE); + + size_t stack_size = RISCV_PGSIZE * CLAMP(mem_size/(RISCV_PGSIZE*32), 1, 256); + current.stack_bottom = __do_mmap(0, stack_size, PROT_READ|PROT_WRITE|PROT_EXEC, MAP_PRIVATE|MAP_ANONYMOUS, 0, 0); + kassert(current.stack_bottom != (uintptr_t)-1); + current.stack_top = current.stack_bottom + stack_size; + kassert(current.stack_top == current.mmap_max); } |