From aedc6165cc28a4a74267eefd5da1862981f5b40f Mon Sep 17 00:00:00 2001 From: Andrew Waterman Date: Tue, 24 Mar 2015 16:38:46 -0700 Subject: New virtual memory implementation (Sv39) --- pk/encoding.h | 55 ++++++++++++++++++++++++++------------ pk/mentry.S | 2 +- pk/vm.c | 85 ++++++++++++++++++++++++++++------------------------------- pk/vm.h | 3 +++ 4 files changed, 82 insertions(+), 63 deletions(-) (limited to 'pk') diff --git a/pk/encoding.h b/pk/encoding.h index cf8b2a2..132f81d 100644 --- a/pk/encoding.h +++ b/pk/encoding.h @@ -63,18 +63,39 @@ #define IMPL_ROCKET 2 // page table entry (PTE) fields -#define PTE_V 0x001 // Entry is a page Table descriptor -#define PTE_T 0x002 // Entry is a page Table, not a terminal node -#define PTE_G 0x004 // Global -#define PTE_UR 0x008 // User Write permission -#define PTE_UW 0x010 // User Read permission -#define PTE_UX 0x020 // User eXecute permission -#define PTE_SR 0x040 // Supervisor Read permission -#define PTE_SW 0x080 // Supervisor Write permission -#define PTE_SX 0x100 // Supervisor eXecute permission -#define PTE_R 0x200 // Referenced -#define PTE_D 0x400 // Dirty -#define PTE_PERM (PTE_SR | PTE_SW | PTE_SX | PTE_UR | PTE_UW | PTE_UX) +#define PTE_TYPE 0x007 +#define PTE_PERM 0x018 +#define PTE_G 0x020 // Global +#define PTE_R 0x040 // Referenced +#define PTE_D 0x080 // Dirty +#define PTE_SOFT 0x300 // Reserved for Software +#define PTE_PPN_SHIFT 10 +#define PTE_TYPE_INVALID 0 +#define PTE_TYPE_TABLE 1 +#define PTE_TYPE_U 2 +#define PTE_TYPE_S 3 +#define PTE_TYPE_US 4 +#define PTE_TYPE_US_SR 4 +#define PTE_TYPE_US_SRW 5 +#define PTE_TYPE_US_SRX 6 +#define PTE_TYPE_US_SRWX 7 + +#define PROT_TO_PERM(PROT) ((((PROT) & PROT_EXEC) ? 2 : 0) | (((PROT) & PROT_WRITE) ? 1 : 0)) +#define PTE_CREATE(PPN, PERM_U, PERM_S) \ + (((PPN) << PTE_PPN_SHIFT) | (PROT_TO_PERM(PERM_U) << 3) | \ + ((PERM_U) && (PERM_S) ? (PTE_TYPE_US | PROT_TO_PERM(PERM_S)) : \ + (PERM_S) ? (PTE_TYPE_S | (PROT_TO_PERM(PERM_S) << 3)) : \ + (PERM_U) ? PTE_TYPE_U : 0)) + +#define PTE_UR(PTE) ((0xF4F4F4F4U >> ((PTE) & 0x1f)) & 1) +#define PTE_UW(PTE) ((0xF400F400U >> ((PTE) & 0x1f)) & 1) +#define PTE_UX(PTE) ((0xF4F40000U >> ((PTE) & 0x1f)) & 1) +#define PTE_SR(PTE) ((0xF8F8F8F8U >> ((PTE) & 0x1f)) & 1) +#define PTE_SW(PTE) ((0xA8A0A8A0U >> ((PTE) & 0x1f)) & 1) +#define PTE_SX(PTE) ((0xC8C8C0C0U >> ((PTE) & 0x1f)) & 1) +#define PTE_CHECK_PERM(PTE, SUPERVISOR, WRITE, EXEC) \ + ((SUPERVISOR) ? ((WRITE) ? PTE_SW(PTE) : (EXEC) ? PTE_SX(PTE) : PTE_SR(PTE)) \ + : ((WRITE) ? PTE_UW(PTE) : (EXEC) ? PTE_UX(PTE) : PTE_UR(PTE))) #ifdef __riscv @@ -84,15 +105,15 @@ # define MSTATUS_HA MSTATUS64_HA # define MSTATUS_SD MSTATUS64_SD # define SSTATUS_SD SSTATUS64_SD -# define RISCV_PGLEVELS 3 -# define RISCV_PGSHIFT 13 +# define RISCV_PGLEVELS 3 /* Sv39 */ +# define RISCV_PGLEVEL_BITS 9 #else # define MSTATUS_SD MSTATUS32_SD # define SSTATUS_SD SSTATUS32_SD -# define RISCV_PGLEVELS 2 -# define RISCV_PGSHIFT 12 +# define RISCV_PGLEVELS 2 /* Sv32 */ +# define RISCV_PGLEVEL_BITS 10 #endif -#define RISCV_PGLEVEL_BITS 10 +#define RISCV_PGSHIFT 12 #define RISCV_PGSIZE (1 << RISCV_PGSHIFT) #ifndef __ASSEMBLER__ diff --git a/pk/mentry.S b/pk/mentry.S index 563da41..cfbb6a6 100644 --- a/pk/mentry.S +++ b/pk/mentry.S @@ -116,7 +116,7 @@ mentry: .align 6 # Entry point for power-on reset. # TODO per-hart stacks - la sp, _end + RISCV_PGSIZE + 1 + la sp, _end + 2*RISCV_PGSIZE - 1 li t0, -RISCV_PGSIZE and sp, sp, t0 j machine_init diff --git a/pk/vm.c b/pk/vm.c index c54417f..fd40fcb 100644 --- a/pk/vm.c +++ b/pk/vm.c @@ -16,7 +16,7 @@ typedef struct { #define MAX_VMR 32 spinlock_t vm_lock = SPINLOCK_INIT; -static vmr_t vmrs[MAX_VMR]; +static vmr_t vmrs[MAX_VMR] __attribute__((aligned(PTE_TYPE+1))); typedef uintptr_t pte_t; static pte_t* root_page_table; @@ -64,12 +64,17 @@ static void __vmr_decref(vmr_t* v, unsigned dec) static size_t pte_ppn(pte_t pte) { - return pte >> RISCV_PGSHIFT; + return pte >> PTE_PPN_SHIFT; } static pte_t ptd_create(uintptr_t ppn) { - return ppn << RISCV_PGSHIFT | PTE_T | PTE_V; + return (ppn << PTE_PPN_SHIFT) | PTE_TYPE_TABLE; +} + +static inline pte_t pte_create(uintptr_t ppn, int kprot, int uprot) +{ + return PTE_CREATE(ppn, uprot, kprot); } static uintptr_t ppn(uintptr_t addr) @@ -83,19 +88,6 @@ static size_t pt_idx(uintptr_t addr, int level) return idx & ((1 << RISCV_PGLEVEL_BITS) - 1); } -static pte_t super_pte_create(uintptr_t ppn, int kprot, int uprot, int level) -{ - kprot &= (PROT_READ | PROT_WRITE | PROT_EXEC); - uprot &= (PROT_READ | PROT_WRITE | PROT_EXEC); - int perm = (kprot * PTE_SR) | (uprot * PTE_UR) | PTE_V; - return (ppn << (RISCV_PGLEVEL_BITS*level + RISCV_PGSHIFT)) | perm; -} - -static pte_t pte_create(uintptr_t ppn, int kprot, int uprot) -{ - return super_pte_create(ppn, kprot, uprot, 0); -} - static void __maybe_create_root_page_table() { if (root_page_table) @@ -104,6 +96,7 @@ static void __maybe_create_root_page_table() if (have_vm) write_csr(sptbr, root_page_table); } + static pte_t* __walk_internal(uintptr_t addr, int create) { const size_t pte_per_page = RISCV_PGSIZE/sizeof(void*); @@ -113,7 +106,7 @@ static pte_t* __walk_internal(uintptr_t addr, int create) for (unsigned i = RISCV_PGLEVELS-1; i > 0; i--) { size_t idx = pt_idx(addr, i); - if (!(t[idx] & PTE_V)) + if ((t[idx] & PTE_TYPE) == PTE_TYPE_INVALID) { if (!create) return 0; @@ -121,7 +114,7 @@ static pte_t* __walk_internal(uintptr_t addr, int create) t[idx] = ptd_create(ppn(page)); } else - kassert(t[idx] & PTE_T); + kassert((t[idx] & PTE_TYPE) == PTE_TYPE_TABLE); t = (pte_t*)(pte_ppn(t[idx]) << RISCV_PGSHIFT); } return &t[pt_idx(addr, 0)]; @@ -146,15 +139,16 @@ static int __va_avail(uintptr_t vaddr) static uintptr_t __vm_alloc(size_t npage) { uintptr_t start = current.brk, end = current.mmap_max - npage*RISCV_PGSIZE; - for (uintptr_t a = end; a >= start; a -= RISCV_PGSIZE) + for (uintptr_t a = start; a <= end; a += RISCV_PGSIZE) { if (!__va_avail(a)) continue; - uintptr_t last = a, first = a - (npage-1) * RISCV_PGSIZE; - for (a = first; a < last && __va_avail(a); a += RISCV_PGSIZE) + uintptr_t first = a, last = a + (npage-1) * RISCV_PGSIZE; + for (a = last; a > first && __va_avail(a); a -= RISCV_PGSIZE) ; - if (a >= last) - return a; + if (a > first) + continue; + return a; } return 0; } @@ -180,7 +174,7 @@ static int __handle_page_fault(uintptr_t vaddr, int prot) if (pte == 0 || *pte == 0) return -1; - else if (!(*pte & PTE_V)) + else if ((*pte & PTE_TYPE) == PTE_TYPE_INVALID) { kassert(__valid_user_range(vaddr, 1)); uintptr_t ppn = vpn; @@ -226,7 +220,7 @@ static void __do_munmap(uintptr_t addr, size_t len) if (pte == 0 || *pte == 0) continue; - if (!(*pte & PTE_V)) + if ((*pte & PTE_TYPE) == PTE_TYPE_INVALID) __vmr_decref((vmr_t*)*pte, 1); *pte = 0; @@ -377,7 +371,7 @@ uintptr_t do_mprotect(uintptr_t addr, size_t length, int prot) break; } - if(!(*pte & PTE_V)){ + if ((*pte & PTE_TYPE) == PTE_TYPE_INVALID) { vmr_t* v = (vmr_t*)*pte; if((v->prot ^ prot) & ~v->prot){ //TODO:look at file to find perms @@ -385,15 +379,15 @@ uintptr_t do_mprotect(uintptr_t addr, size_t length, int prot) break; } v->prot = prot; - }else{ - pte_t perms = pte_create(0, 0, prot); - if ((*pte & perms) != perms){ + } else { + if (((prot & PROT_READ) && !PTE_UR(*pte)) + || ((prot & PROT_WRITE) && !PTE_UW(*pte)) + || ((prot & PROT_EXEC) && !PTE_UX(*pte))) { //TODO:look at file to find perms res = -EACCES; break; } - pte_t permset = (*pte & ~(PTE_UR | PTE_UW | PTE_UX)) | perms; - *pte = permset; + *pte = pte_create(pte_ppn(*pte), prot, prot); } } spinlock_unlock(&vm_lock); @@ -404,12 +398,11 @@ uintptr_t do_mprotect(uintptr_t addr, size_t length, int prot) void __map_kernel_range(uintptr_t vaddr, uintptr_t paddr, size_t len, int prot) { uintptr_t n = ROUNDUP(len, RISCV_PGSIZE) / RISCV_PGSIZE; - pte_t perms = pte_create(0, prot, 0); for (uintptr_t a = vaddr, i = 0; i < n; i++, a += RISCV_PGSIZE) { pte_t* pte = __walk_create(a); kassert(pte); - *pte = (a - vaddr + paddr) | perms; + *pte = pte_create((a - vaddr + paddr) >> RISCV_PGSHIFT, prot, 0); } } @@ -452,22 +445,25 @@ void supervisor_vm_init() memset(sbi_pt, 0, RISCV_PGSIZE); pte_t* middle_pt = (void*)sbi_pt + RISCV_PGSIZE; #if RISCV_PGLEVELS == 2 + size_t num_middle_pts = 1; root_page_table = middle_pt; #elif RISCV_PGLEVELS == 3 - kassert(current.first_user_vaddr >= -(SUPERPAGE_SIZE << RISCV_PGLEVEL_BITS)); - root_page_table = (void*)middle_pt + RISCV_PGSIZE; - memset(root_page_table, 0, RISCV_PGSIZE); - root_page_table[(1<> RISCV_PGSHIFT) + i); #else #error #endif + memset(middle_pt, 0, root_page_table - middle_pt + RISCV_PGSIZE); write_csr(sptbr, root_page_table); for (uintptr_t vaddr = current.first_user_vaddr, paddr = vaddr + current.bias, end = current.first_vaddr_after_user; paddr < mem_size; vaddr += SUPERPAGE_SIZE, paddr += SUPERPAGE_SIZE) { int l2_shift = RISCV_PGLEVEL_BITS + RISCV_PGSHIFT; - int l2_idx = (vaddr >> l2_shift) & ((1 << RISCV_PGLEVEL_BITS)-1); - middle_pt[l2_idx] = paddr | PTE_V | PTE_G | PTE_SR | PTE_SW | PTE_SX; + size_t l2_idx = (current.first_user_vaddr >> l2_shift) & ((1 << RISCV_PGLEVEL_BITS)-1); + l2_idx += ((vaddr - current.first_user_vaddr) >> l2_shift); + middle_pt[l2_idx] = pte_create(paddr >> RISCV_PGSHIFT, PROT_READ|PROT_WRITE|PROT_EXEC, 0); } current.first_vaddr_after_user += (void*)root_page_table + RISCV_PGSIZE - (void*)sbi_pt; @@ -475,11 +471,11 @@ void supervisor_vm_init() uintptr_t num_sbi_pages = sbi_top_paddr() / RISCV_PGSIZE; for (uintptr_t i = 0; i < num_sbi_pages; i++) { uintptr_t idx = (1 << RISCV_PGLEVEL_BITS) - num_sbi_pages + i; - sbi_pt[idx] = (i * RISCV_PGSIZE) | PTE_V | PTE_G | PTE_SR | PTE_SX; + sbi_pt[idx] = pte_create(i, PROT_READ|PROT_EXEC, 0); } - pte_t* sbi_pte = middle_pt + ((1 << RISCV_PGLEVEL_BITS)-1); + pte_t* sbi_pte = middle_pt + ((num_middle_pts << RISCV_PGLEVEL_BITS)-1); kassert(!*sbi_pte); - *sbi_pte = (uintptr_t)sbi_pt | PTE_T | PTE_V; + *sbi_pte = ptd_create((uintptr_t)sbi_pt >> RISCV_PGSHIFT); // disable our allocator kassert(next_free_page == 0); @@ -498,8 +494,7 @@ void pk_vm_init() write_csr(sscratch, __page_alloc() + RISCV_PGSIZE); size_t stack_size = RISCV_PGSIZE * CLAMP(mem_size/(RISCV_PGSIZE*32), 1, 256); - current.stack_bottom = __do_mmap(0, stack_size, PROT_READ|PROT_WRITE|PROT_EXEC, MAP_PRIVATE|MAP_ANONYMOUS, 0, 0); - kassert(current.stack_bottom != (uintptr_t)-1); + current.stack_bottom = __do_mmap(current.mmap_max - stack_size, stack_size, PROT_READ|PROT_WRITE|PROT_EXEC, MAP_PRIVATE|MAP_ANONYMOUS|MAP_FIXED, 0, 0); current.stack_top = current.stack_bottom + stack_size; - kassert(current.stack_top == current.mmap_max); + kassert(current.stack_bottom != (uintptr_t)-1); } diff --git a/pk/vm.h b/pk/vm.h index 273d71c..6959a38 100644 --- a/pk/vm.h +++ b/pk/vm.h @@ -8,6 +8,9 @@ #include #define SUPERPAGE_SIZE ((uintptr_t)(RISCV_PGSIZE << RISCV_PGLEVEL_BITS)) +#if RISCV_PGLEVELS > 2 +# define MEGAPAGE_SIZE (SUPERPAGE_SIZE << RISCV_PGLEVEL_BITS) +#endif #define PROT_READ 1 #define PROT_WRITE 2 -- cgit v1.1