From 108c414a7730e9666a787d1bc55229d5cb07f7ce Mon Sep 17 00:00:00 2001 From: Andrew Waterman Date: Wed, 10 Feb 2021 19:15:17 -0800 Subject: pk: remove linear VA mapping constraint This will improve flexibility going forward. --- pk/mmap.c | 129 ++++++++++++++++++++++++++++++++++++++++---------------------- 1 file changed, 83 insertions(+), 46 deletions(-) diff --git a/pk/mmap.c b/pk/mmap.c index 8f52c0d..2e9be94 100644 --- a/pk/mmap.c +++ b/pk/mmap.c @@ -18,38 +18,69 @@ typedef struct { int prot; } vmr_t; +#define RISCV_PGLEVELS ((VA_BITS - RISCV_PGSHIFT) / RISCV_PGLEVEL_BITS) + #define MAX_VMR (RISCV_PGSIZE / sizeof(vmr_t)) static spinlock_t vm_lock = SPINLOCK_INIT; static vmr_t* vmrs; -uintptr_t first_free_paddr; static uintptr_t first_free_page; static size_t next_free_page; static size_t free_pages; int demand_paging = 1; // unless -p flag is given +struct freelist_node_t { + struct freelist_node_t* next; + uintptr_t addr; +}; + +static struct freelist_node_t* freelist_head; +static struct freelist_node_t* freelist_node_array; + +static void __augment_freelist() +{ + if (next_free_page == free_pages) + panic("Out of memory!"); + + struct freelist_node_t* node = &freelist_node_array[next_free_page]; + node->addr = first_free_page + RISCV_PGSIZE * next_free_page; + node->next = freelist_head; + freelist_head = node; + + next_free_page++; +} + static uintptr_t __page_alloc() { - kassert(next_free_page != free_pages); - uintptr_t addr = first_free_page + RISCV_PGSIZE * next_free_page++; - memset((void*)addr, 0, RISCV_PGSIZE); - return addr; + if (freelist_head == NULL) + __augment_freelist(); + + struct freelist_node_t* node = freelist_head; + uintptr_t addr = node->addr; + freelist_head = node->next; + node->next = NULL; + + return (uintptr_t)memset((void*)addr, 0, RISCV_PGSIZE); +} + +static void __page_free(uintptr_t addr) +{ + size_t idx = (addr - first_free_page) / RISCV_PGSIZE; + kassert(idx < free_pages); + struct freelist_node_t* node = &freelist_node_array[idx]; + kassert(node->addr == addr); + kassert(node->next == NULL); + + node->next = freelist_head; + freelist_head = node; } static vmr_t* __vmr_alloc(uintptr_t addr, size_t length, file_t* file, size_t offset, unsigned refcnt, int prot) { - if (!vmrs) { - spinlock_lock(&vm_lock); - if (!vmrs) { - vmr_t* page = (vmr_t*)__page_alloc(); - mb(); - vmrs = page; - } - spinlock_unlock(&vm_lock); - } - mb(); + if (!vmrs) + vmrs = (vmr_t*)__page_alloc(); for (vmr_t* v = vmrs; v < vmrs + MAX_VMR; v++) { if (v->refcnt == 0) { @@ -92,34 +123,30 @@ static size_t pt_idx(uintptr_t addr, int level) return idx & ((1 << RISCV_PGLEVEL_BITS) - 1); } -static pte_t* __walk_create(uintptr_t addr); - -static pte_t* __attribute__((noinline)) __continue_walk_create(uintptr_t addr, pte_t* pte) -{ - *pte = ptd_create(ppn(__page_alloc())); - return __walk_create(addr); -} - -static pte_t* __walk_internal(uintptr_t addr, int create) +static pte_t* __walk_internal(uintptr_t addr, int create, int level) { pte_t* t = root_page_table; - for (int i = (VA_BITS - RISCV_PGSHIFT) / RISCV_PGLEVEL_BITS - 1; i > 0; i--) { + for (int i = RISCV_PGLEVELS - 1; i > level; i--) { size_t idx = pt_idx(addr, i); - if (unlikely(!(t[idx] & PTE_V))) - return create ? __continue_walk_create(addr, &t[idx]) : 0; + if (unlikely(!(t[idx] & PTE_V))) { + if (create) + t[idx] = ptd_create(ppn(__page_alloc())); + else + return 0; + } t = (pte_t*)(pte_ppn(t[idx]) << RISCV_PGSHIFT); } - return &t[pt_idx(addr, 0)]; + return &t[pt_idx(addr, level)]; } static pte_t* __walk(uintptr_t addr) { - return __walk_internal(addr, 0); + return __walk_internal(addr, 0, 0); } static pte_t* __walk_create(uintptr_t addr) { - return __walk_internal(addr, 1); + return __walk_internal(addr, 1, 0); } static int __va_avail(uintptr_t vaddr) @@ -174,8 +201,8 @@ static int __handle_page_fault(uintptr_t vaddr, int prot) return -1; else if (!(*pte & PTE_V)) { - uintptr_t ppn = vpn + (first_free_paddr / RISCV_PGSIZE); - uintptr_t kva = ppn * RISCV_PGSIZE; + uintptr_t kva = __page_alloc(); + uintptr_t ppn = kva / RISCV_PGSIZE; vmr_t* v = (vmr_t*)*pte; *pte = pte_create(ppn, prot_to_type(PROT_READ|PROT_WRITE, 0)); @@ -218,7 +245,9 @@ static void __do_munmap(uintptr_t addr, size_t len) if (pte == 0 || *pte == 0) continue; - if (!(*pte & PTE_V)) + if (*pte & PTE_V) + __page_free(pte_ppn(*pte) << RISCV_PGSHIFT); + else __vmr_decref((vmr_t*)*pte, 1); *pte = 0; @@ -371,11 +400,19 @@ void __map_kernel_range(uintptr_t vaddr, uintptr_t paddr, size_t len, int prot) { uintptr_t n = ROUNDUP(len, RISCV_PGSIZE) / RISCV_PGSIZE; uintptr_t offset = paddr - vaddr; - for (uintptr_t a = vaddr, i = 0; i < n; i++, a += RISCV_PGSIZE) - { - pte_t* pte = __walk_create(a); + + while (len > 0) { + size_t megapage_size = RISCV_PGSIZE << RISCV_PGLEVEL_BITS; + int level = (vaddr | paddr) % megapage_size == 0 && len >= megapage_size; + size_t pgsize = RISCV_PGSIZE << (level * RISCV_PGLEVEL_BITS); + + pte_t* pte = __walk_internal(vaddr, 1, level); kassert(pte); - *pte = pte_create((a + offset) >> RISCV_PGSHIFT, prot_to_type(prot, 0)); + *pte = pte_create((vaddr + offset) >> RISCV_PGSHIFT, prot_to_type(prot, 0)); + + len -= pgsize; + vaddr += pgsize; + paddr += pgsize; } } @@ -393,21 +430,21 @@ void populate_mapping(const void* start, size_t size, int prot) uintptr_t pk_vm_init() { - // HTIF address signedness and va2pa macro both cap memory size to 2 GiB - mem_size = MIN(mem_size, 1U << 31); - size_t mem_pages = mem_size >> RISCV_PGSHIFT; - free_pages = MAX(8, mem_pages >> (RISCV_PGLEVEL_BITS-1)); - extern char _end; first_free_page = ROUNDUP((uintptr_t)&_end, RISCV_PGSIZE); - first_free_paddr = first_free_page + free_pages * RISCV_PGSIZE; + free_pages = (mem_size - (first_free_page - MEM_START)) / RISCV_PGSIZE; + + size_t num_freelist_nodes = mem_size / RISCV_PGSIZE; + size_t freelist_node_array_size = ROUNDUP(num_freelist_nodes * sizeof(struct freelist_node_t), RISCV_PGSIZE); + freelist_node_array = (struct freelist_node_t*)first_free_page; + next_free_page = freelist_node_array_size / RISCV_PGSIZE; root_page_table = (void*)__page_alloc(); - __map_kernel_range(MEM_START, MEM_START, first_free_paddr - MEM_START, PROT_READ|PROT_WRITE|PROT_EXEC); + __map_kernel_range(MEM_START, MEM_START, mem_size, PROT_READ|PROT_WRITE|PROT_EXEC); - current.mmap_max = current.brk_max = - MIN(MEM_START, mem_size - (first_free_paddr - MEM_START)); + current.mmap_max = current.brk_max = MEM_START; + size_t mem_pages = mem_size >> RISCV_PGSHIFT; size_t stack_size = MIN(mem_pages >> 5, 2048) * RISCV_PGSIZE; size_t stack_bottom = __do_mmap(current.mmap_max - stack_size, stack_size, PROT_READ|PROT_WRITE|PROT_EXEC, MAP_PRIVATE|MAP_ANONYMOUS|MAP_FIXED, 0, 0); kassert(stack_bottom != (uintptr_t)-1); -- cgit v1.1