aboutsummaryrefslogtreecommitdiff
path: root/pk/vm.c
diff options
context:
space:
mode:
Diffstat (limited to 'pk/vm.c')
-rw-r--r--pk/vm.c226
1 files changed, 130 insertions, 96 deletions
diff --git a/pk/vm.c b/pk/vm.c
index 290a12c..c54417f 100644
--- a/pk/vm.c
+++ b/pk/vm.c
@@ -10,7 +10,7 @@ typedef struct {
size_t length;
file_t* file;
size_t offset;
- size_t refcnt;
+ unsigned refcnt;
int prot;
} vmr_t;
@@ -26,20 +26,21 @@ static size_t free_pages;
static uintptr_t __page_alloc()
{
- if (next_free_page == free_pages)
- return 0;
+ kassert(next_free_page != free_pages);
uintptr_t addr = first_free_page + RISCV_PGSIZE * next_free_page++;
memset((void*)addr, 0, RISCV_PGSIZE);
return addr;
}
static vmr_t* __vmr_alloc(uintptr_t addr, size_t length, file_t* file,
- size_t offset, size_t refcnt, int prot)
+ size_t offset, unsigned refcnt, int prot)
{
for (vmr_t* v = vmrs; v < vmrs + MAX_VMR; v++)
{
if (v->refcnt == 0)
{
+ if (file)
+ file_incref(file);
v->addr = addr;
v->length = length;
v->file = file;
@@ -52,7 +53,7 @@ static vmr_t* __vmr_alloc(uintptr_t addr, size_t length, file_t* file,
return NULL;
}
-static void __vmr_decref(vmr_t* v, size_t dec)
+static void __vmr_decref(vmr_t* v, unsigned dec)
{
if ((v->refcnt -= dec) == 0)
{
@@ -95,9 +96,18 @@ static pte_t pte_create(uintptr_t ppn, int kprot, int uprot)
return super_pte_create(ppn, kprot, uprot, 0);
}
-static __attribute__((always_inline)) pte_t* __walk_internal(uintptr_t addr, int create)
+static void __maybe_create_root_page_table()
+{
+ if (root_page_table)
+ return;
+ root_page_table = (void*)__page_alloc();
+ if (have_vm)
+ write_csr(sptbr, root_page_table);
+}
+static pte_t* __walk_internal(uintptr_t addr, int create)
{
const size_t pte_per_page = RISCV_PGSIZE/sizeof(void*);
+ __maybe_create_root_page_table();
pte_t* t = root_page_table;
for (unsigned i = RISCV_PGLEVELS-1; i > 0; i--)
@@ -108,8 +118,6 @@ static __attribute__((always_inline)) pte_t* __walk_internal(uintptr_t addr, int
if (!create)
return 0;
uintptr_t page = __page_alloc();
- if (page == 0)
- return 0;
t[idx] = ptd_create(ppn(page));
}
else
@@ -138,16 +146,15 @@ static int __va_avail(uintptr_t vaddr)
static uintptr_t __vm_alloc(size_t npage)
{
uintptr_t start = current.brk, end = current.mmap_max - npage*RISCV_PGSIZE;
- for (uintptr_t a = start; a <= end; a += RISCV_PGSIZE)
+ for (uintptr_t a = end; a >= start; a -= RISCV_PGSIZE)
{
if (!__va_avail(a))
continue;
- uintptr_t first = a, last = a + (npage-1) * RISCV_PGSIZE;
- for (a = last; a > first && __va_avail(a); a -= RISCV_PGSIZE)
+ uintptr_t last = a, first = a - (npage-1) * RISCV_PGSIZE;
+ for (a = first; a < last && __va_avail(a); a += RISCV_PGSIZE)
;
- if (a > first)
- continue;
- return a;
+ if (a >= last)
+ return a;
}
return 0;
}
@@ -157,6 +164,13 @@ static void flush_tlb()
asm volatile("sfence.vm");
}
+int __valid_user_range(uintptr_t vaddr, size_t len)
+{
+ if (vaddr + len < vaddr)
+ return 0;
+ return vaddr >= current.first_free_paddr && vaddr + len <= current.mmap_max;
+}
+
static int __handle_page_fault(uintptr_t vaddr, int prot)
{
uintptr_t vpn = vaddr >> RISCV_PGSHIFT;
@@ -168,7 +182,7 @@ static int __handle_page_fault(uintptr_t vaddr, int prot)
return -1;
else if (!(*pte & PTE_V))
{
- kassert(vaddr < current.stack_top && vaddr >= current.user_min);
+ kassert(__valid_user_range(vaddr, 1));
uintptr_t ppn = vpn;
vmr_t* v = (vmr_t*)*pte;
@@ -225,8 +239,7 @@ uintptr_t __do_mmap(uintptr_t addr, size_t length, int prot, int flags, file_t*
size_t npage = (length-1)/RISCV_PGSIZE+1;
if (flags & MAP_FIXED)
{
- if ((addr & (RISCV_PGSIZE-1)) || addr < current.user_min ||
- addr + length > current.stack_top || addr + length < addr)
+ if ((addr & (RISCV_PGSIZE-1)) || !__valid_user_range(addr, length))
return (uintptr_t)-1;
}
else if ((addr = __vm_alloc(npage)) == 0)
@@ -247,19 +260,19 @@ uintptr_t __do_mmap(uintptr_t addr, size_t length, int prot, int flags, file_t*
*pte = (pte_t)v;
}
- if (f) file_incref(f);
-
if (!have_vm || (flags & MAP_POPULATE))
for (uintptr_t a = addr; a < addr + length; a += RISCV_PGSIZE)
kassert(__handle_page_fault(a, prot) == 0);
+ if (current.brk_min != 0 && addr < current.brk_max)
+ current.brk_max = ROUNDUP(addr + length, RISCV_PGSIZE);
+
return addr;
}
int do_munmap(uintptr_t addr, size_t length)
{
- if ((addr & (RISCV_PGSIZE-1)) || addr < current.user_min ||
- addr + length > current.stack_top || addr + length < addr)
+ if ((addr & (RISCV_PGSIZE-1)) || !__valid_user_range(addr, length))
return -EINVAL;
spinlock_lock(&vm_lock);
@@ -280,8 +293,6 @@ uintptr_t do_mmap(uintptr_t addr, size_t length, int prot, int flags, int fd, of
spinlock_lock(&vm_lock);
addr = __do_mmap(addr, length, prot, flags, f, offset);
- if (addr < current.brk_max)
- current.brk_max = addr;
spinlock_unlock(&vm_lock);
if (f) file_decref(f);
@@ -318,29 +329,34 @@ uintptr_t do_brk(size_t addr)
return addr;
}
+uintptr_t __do_mremap(uintptr_t addr, size_t old_size, size_t new_size, int flags)
+{
+ for (size_t i = 0; i < MAX_VMR; i++)
+ {
+ if (vmrs[i].refcnt && addr == vmrs[i].addr && old_size == vmrs[i].length)
+ {
+ size_t old_npage = (vmrs[i].length-1)/RISCV_PGSIZE+1;
+ size_t new_npage = (new_size-1)/RISCV_PGSIZE+1;
+ if (new_size < old_size)
+ __do_munmap(addr + new_size, old_size - new_size);
+ else if (new_size > old_size)
+ __do_mmap(addr + old_size, new_size - old_size, vmrs[i].prot, 0,
+ vmrs[i].file, vmrs[i].offset + new_size - old_size);
+ __vmr_decref(&vmrs[i], old_npage - new_npage);
+ return addr;
+ }
+ }
+ return -1;
+}
+
uintptr_t do_mremap(uintptr_t addr, size_t old_size, size_t new_size, int flags)
{
- uintptr_t res = -1;
if (((addr | old_size | new_size) & (RISCV_PGSIZE-1)) ||
(flags & MREMAP_FIXED))
return -EINVAL;
spinlock_lock(&vm_lock);
- for (size_t i = 0; i < MAX_VMR; i++)
- {
- if (vmrs[i].refcnt && addr == vmrs[i].addr && old_size == vmrs[i].length)
- {
- size_t old_npage = (vmrs[i].length-1)/RISCV_PGSIZE+1;
- size_t new_npage = (new_size-1)/RISCV_PGSIZE+1;
- if (new_size < old_size)
- __do_munmap(addr + new_size, old_size - new_size);
- else if (new_size > old_size)
- __do_mmap(addr + old_size, new_size - old_size, vmrs[i].prot, 0,
- vmrs[i].file, vmrs[i].offset + new_size - old_size);
- __vmr_decref(&vmrs[i], old_npage - new_npage);
- res = addr;
- }
- }
+ uintptr_t res = __do_mremap(addr, old_size, new_size, flags);
spinlock_unlock(&vm_lock);
return res;
@@ -385,14 +401,15 @@ uintptr_t do_mprotect(uintptr_t addr, size_t length, int prot)
return res;
}
-static void __map_kernel_range(uintptr_t paddr, size_t len, int prot)
+void __map_kernel_range(uintptr_t vaddr, uintptr_t paddr, size_t len, int prot)
{
+ uintptr_t n = ROUNDUP(len, RISCV_PGSIZE) / RISCV_PGSIZE;
pte_t perms = pte_create(0, prot, 0);
- for (uintptr_t a = paddr; a < paddr + len; a += RISCV_PGSIZE)
+ for (uintptr_t a = vaddr, i = 0; i < n; i++, a += RISCV_PGSIZE)
{
pte_t* pte = __walk_create(a);
kassert(pte);
- *pte = a | perms;
+ *pte = (a - vaddr + paddr) | perms;
}
}
@@ -401,71 +418,88 @@ void populate_mapping(const void* start, size_t size, int prot)
uintptr_t a0 = ROUNDDOWN((uintptr_t)start, RISCV_PGSIZE);
for (uintptr_t a = a0; a < (uintptr_t)start+size; a += RISCV_PGSIZE)
{
- atomic_t* atom = (atomic_t*)(a & -sizeof(atomic_t));
if (prot & PROT_WRITE)
- atomic_add(atom, 0);
+ atomic_add((int*)a, 0);
else
- atomic_read(atom);
+ atomic_read((int*)a);
}
}
-void vm_init()
+static uintptr_t sbi_top_paddr()
{
extern char _end;
- current.user_min = ROUNDUP((uintptr_t)&_end, RISCV_PGSIZE);
- current.brk_min = current.user_min;
- current.brk = 0;
+ return ROUNDUP((uintptr_t)&_end, RISCV_PGSIZE);
+}
- uint32_t mem_mb = *(volatile uint32_t*)0;
+#define first_free_paddr() (sbi_top_paddr() + RISCV_PGSIZE /* boot stack */)
- if (mem_mb == 0)
- {
- current.stack_bottom = 0;
- current.stack_top = 0;
- current.brk_max = 0;
- current.mmap_max = 0;
- }
- else
- {
- uintptr_t max_addr = (uintptr_t)mem_mb << 20;
- size_t mem_pages = max_addr >> RISCV_PGSHIFT;
- const size_t min_free_pages = 2*RISCV_PGLEVELS;
- const size_t min_stack_pages = 8;
- const size_t max_stack_pages = 1024;
- kassert(mem_pages > min_free_pages + min_stack_pages);
- free_pages = MAX(mem_pages >> (RISCV_PGLEVEL_BITS-1), min_free_pages);
- size_t stack_pages = CLAMP(mem_pages/32, min_stack_pages, max_stack_pages);
- first_free_page = max_addr - free_pages * RISCV_PGSIZE;
-
- uintptr_t root_page_table_paddr = __page_alloc();
- kassert(root_page_table_paddr);
- root_page_table = (pte_t*)root_page_table_paddr;
-
- __map_kernel_range(0, current.user_min, PROT_READ|PROT_WRITE|PROT_EXEC);
-
- int vm_field = sizeof(long) == 4 ? VM_SV32 : VM_SV43;
- if (have_vm)
- {
-#if 0
- write_csr(sptbr, root_page_table_paddr);
- set_csr(mstatus, vm_field << __builtin_ctz(MSTATUS_VM));
-#endif
- have_vm = (clear_csr(mstatus, MSTATUS_VM) & MSTATUS_VM) != VM_MBARE;
- }
+void vm_init()
+{
+ current.first_free_paddr = first_free_paddr();
- size_t stack_size = RISCV_PGSIZE * stack_pages;
- current.stack_top = MIN(first_free_page, 0x80000000); // for RV32 sanity
- uintptr_t stack_bot = current.stack_top - stack_size;
+ size_t mem_pages = mem_size >> RISCV_PGSHIFT;
+ free_pages = MAX(8, mem_pages >> (RISCV_PGLEVEL_BITS-1));
+ first_free_page = mem_size - free_pages * RISCV_PGSIZE;
+ current.mmap_max = current.brk_max = first_free_page;
+}
- if (have_vm)
- {
- __map_kernel_range(first_free_page, free_pages * RISCV_PGSIZE, PROT_READ|PROT_WRITE);
- kassert(__do_mmap(stack_bot, stack_size, -1, MAP_FIXED|MAP_PRIVATE|MAP_ANONYMOUS, 0, 0) == stack_bot);
- set_csr(mstatus, vm_field);
- }
+void supervisor_vm_init()
+{
+ uintptr_t highest_va = -current.first_free_paddr;
+ mem_size = MIN(mem_size, highest_va - current.first_user_vaddr) & -SUPERPAGE_SIZE;
+
+ pte_t* sbi_pt = (pte_t*)(current.first_vaddr_after_user + current.bias);
+ memset(sbi_pt, 0, RISCV_PGSIZE);
+ pte_t* middle_pt = (void*)sbi_pt + RISCV_PGSIZE;
+#if RISCV_PGLEVELS == 2
+ root_page_table = middle_pt;
+#elif RISCV_PGLEVELS == 3
+ kassert(current.first_user_vaddr >= -(SUPERPAGE_SIZE << RISCV_PGLEVEL_BITS));
+ root_page_table = (void*)middle_pt + RISCV_PGSIZE;
+ memset(root_page_table, 0, RISCV_PGSIZE);
+ root_page_table[(1<<RISCV_PGLEVEL_BITS)-1] = (uintptr_t)middle_pt | PTE_T | PTE_V;
+#else
+#error
+#endif
+ write_csr(sptbr, root_page_table);
- current.stack_bottom = stack_bot;
- stack_bot -= RISCV_PGSIZE; // guard page
- current.mmap_max = current.brk_max = stack_bot;
+ for (uintptr_t vaddr = current.first_user_vaddr, paddr = vaddr + current.bias, end = current.first_vaddr_after_user;
+ paddr < mem_size; vaddr += SUPERPAGE_SIZE, paddr += SUPERPAGE_SIZE) {
+ int l2_shift = RISCV_PGLEVEL_BITS + RISCV_PGSHIFT;
+ int l2_idx = (vaddr >> l2_shift) & ((1 << RISCV_PGLEVEL_BITS)-1);
+ middle_pt[l2_idx] = paddr | PTE_V | PTE_G | PTE_SR | PTE_SW | PTE_SX;
}
+ current.first_vaddr_after_user += (void*)root_page_table + RISCV_PGSIZE - (void*)sbi_pt;
+
+ // map SBI at top of vaddr space
+ uintptr_t num_sbi_pages = sbi_top_paddr() / RISCV_PGSIZE;
+ for (uintptr_t i = 0; i < num_sbi_pages; i++) {
+ uintptr_t idx = (1 << RISCV_PGLEVEL_BITS) - num_sbi_pages + i;
+ sbi_pt[idx] = (i * RISCV_PGSIZE) | PTE_V | PTE_G | PTE_SR | PTE_SX;
+ }
+ pte_t* sbi_pte = middle_pt + ((1 << RISCV_PGLEVEL_BITS)-1);
+ kassert(!*sbi_pte);
+ *sbi_pte = (uintptr_t)sbi_pt | PTE_T | PTE_V;
+
+ // disable our allocator
+ kassert(next_free_page == 0);
+ free_pages = 0;
+
+ flush_tlb();
+}
+
+void pk_vm_init()
+{
+ __map_kernel_range(0, 0, current.first_free_paddr, PROT_READ|PROT_WRITE|PROT_EXEC);
+ __map_kernel_range(first_free_page, first_free_page, free_pages * RISCV_PGSIZE, PROT_READ|PROT_WRITE);
+
+ extern char trap_entry;
+ write_csr(stvec, &trap_entry);
+ write_csr(sscratch, __page_alloc() + RISCV_PGSIZE);
+
+ size_t stack_size = RISCV_PGSIZE * CLAMP(mem_size/(RISCV_PGSIZE*32), 1, 256);
+ current.stack_bottom = __do_mmap(0, stack_size, PROT_READ|PROT_WRITE|PROT_EXEC, MAP_PRIVATE|MAP_ANONYMOUS, 0, 0);
+ kassert(current.stack_bottom != (uintptr_t)-1);
+ current.stack_top = current.stack_bottom + stack_size;
+ kassert(current.stack_top == current.mmap_max);
}