From 146391f7eb837e895a46c90ca52333d62448a07b Mon Sep 17 00:00:00 2001 From: Andrew Waterman Date: Wed, 17 Mar 2021 01:19:13 -0700 Subject: pk: refactor vm free list management --- pk/mmap.c | 158 +++++++++++++++++++++++++++++++++++++++----------------------- pk/mmap.h | 3 +- pk/pk.c | 9 +++- pk/pk.h | 6 +++ 4 files changed, 115 insertions(+), 61 deletions(-) (limited to 'pk') diff --git a/pk/mmap.c b/pk/mmap.c index 3351295..a19b9a6 100644 --- a/pk/mmap.c +++ b/pk/mmap.c @@ -34,45 +34,94 @@ static size_t free_pages; static size_t pages_promised; int demand_paging = 1; // unless -p flag is given +uint64_t randomize_mapping; // set by --randomize-mapping typedef struct freelist_node_t { - struct freelist_node_t* next; uintptr_t addr; } freelist_node_t; -static freelist_node_t* page_freelist_head; size_t page_freelist_depth; -static freelist_node_t* page_freelist_node_array; +static freelist_node_t* page_freelist_storage; -static bool __augment_freelist() +static uintptr_t free_page_addr(size_t idx) { - if (next_free_page == free_pages) - return false; + return first_free_page + idx * RISCV_PGSIZE; +} + +static uintptr_t __early_alloc(size_t size) +{ + size_t num_pages = ROUNDUP(size, RISCV_PGSIZE) / RISCV_PGSIZE; + if (num_pages + next_free_page < num_pages || num_pages + next_free_page > free_pages) + return 0; + + uintptr_t addr = free_page_addr(next_free_page); + next_free_page += num_pages; + return addr; +} + +static void __maybe_fuzz_page_freelist(); + +static void __page_freelist_insert(freelist_node_t node) +{ + __maybe_fuzz_page_freelist(); + + page_freelist_storage[page_freelist_depth++] = node; +} + +static freelist_node_t __page_freelist_remove() +{ + __maybe_fuzz_page_freelist(); - freelist_node_t* node = &page_freelist_node_array[next_free_page]; - node->addr = first_free_page + RISCV_PGSIZE * next_free_page; - node->next = page_freelist_head; - page_freelist_head = node; - page_freelist_depth++; - next_free_page++; + return page_freelist_storage[--page_freelist_depth]; +} + +static bool __augment_page_freelist() +{ + uintptr_t page = __early_alloc(RISCV_PGSIZE); + if (page != 0) { + freelist_node_t node = { .addr = page }; + __page_freelist_insert(node); + } + return page; +} + +static void __maybe_fuzz_page_freelist() +{ + if (randomize_mapping) { + randomize_mapping = lfsr63(randomize_mapping); + + if (randomize_mapping % 2 == 0 && page_freelist_depth) { + size_t swap_idx = randomize_mapping % page_freelist_depth; + freelist_node_t tmp = page_freelist_storage[swap_idx]; + page_freelist_storage[swap_idx] = page_freelist_storage[page_freelist_depth-1]; + page_freelist_storage[page_freelist_depth-1] = tmp; + } + + if (randomize_mapping % 16 == 0) + __augment_page_freelist(); + } +} + +static bool __page_freelist_empty() +{ + return page_freelist_depth == 0; +} - return true; +static size_t __num_free_pages() +{ + return page_freelist_depth + (free_pages - next_free_page); } static uintptr_t __page_alloc() { - if (page_freelist_head == NULL && !__augment_freelist()) + if (__page_freelist_empty() && !__augment_page_freelist()) return 0; - freelist_node_t* node = page_freelist_head; - uintptr_t addr = node->addr; - page_freelist_head = node->next; - node->next = NULL; - page_freelist_depth--; + freelist_node_t node = __page_freelist_remove(); - memset((void*)pa2kva(addr), 0, RISCV_PGSIZE); + memset((void*)pa2kva(node.addr), 0, RISCV_PGSIZE); - return addr; + return node.addr; } static uintptr_t __page_alloc_assert() @@ -87,20 +136,8 @@ static uintptr_t __page_alloc_assert() static void __page_free(uintptr_t addr) { - size_t idx = (addr - first_free_page) / RISCV_PGSIZE; - kassert(idx < free_pages); - freelist_node_t* node = &page_freelist_node_array[idx]; - kassert(node->addr == addr); - kassert(node->next == NULL); - - node->next = page_freelist_head; - page_freelist_head = node; - page_freelist_depth++; -} - -static size_t __num_free_pages() -{ - return page_freelist_depth + (free_pages - next_free_page); + freelist_node_t node = { .addr = addr }; + __page_freelist_insert(node); } static vmr_t* __vmr_alloc(uintptr_t addr, size_t length, file_t* file, @@ -163,7 +200,7 @@ static size_t pt_idx(uintptr_t addr, int level) return idx & ((1 << RISCV_PGLEVEL_BITS) - 1); } -static pte_t* __walk_internal(uintptr_t addr, int create, int level) +static inline pte_t* __walk_internal(uintptr_t addr, int create, int level) { pte_t* t = (pte_t*)pa2kva(root_page_table); for (int i = RISCV_PGLEVELS - 1; i > level; i--) { @@ -269,8 +306,6 @@ static int __handle_page_fault(uintptr_t vaddr, int prot) if (ret < RISCV_PGSIZE) memset((void*)vaddr + ret, 0, RISCV_PGSIZE - ret); } - else - memset((void*)vaddr, 0, RISCV_PGSIZE); __vmr_decref(v, 1); *pte = pte_create(ppn, prot_to_type(v->prot, 1)); } @@ -458,23 +493,27 @@ uintptr_t do_mprotect(uintptr_t addr, size_t length, int prot) return res; } -void __map_kernel_range(uintptr_t vaddr, uintptr_t paddr, size_t len, int prot) +static inline void __map_kernel_page(uintptr_t vaddr, uintptr_t paddr, int level, int prot) { - uintptr_t n = ROUNDUP(len, RISCV_PGSIZE) / RISCV_PGSIZE; - uintptr_t offset = paddr - vaddr; + pte_t* pte = __walk_internal(vaddr, 1, level); + kassert(pte); + *pte = pte_create(paddr >> RISCV_PGSHIFT, prot_to_type(prot, 0)); +} - while (len > 0) { - size_t megapage_size = RISCV_PGSIZE << RISCV_PGLEVEL_BITS; - int level = (vaddr | paddr) % megapage_size == 0 && len >= megapage_size; - size_t pgsize = RISCV_PGSIZE << (level * RISCV_PGLEVEL_BITS); +static void __map_kernel_range(uintptr_t vaddr, uintptr_t paddr, size_t len, int prot) +{ + size_t megapage_size = RISCV_PGSIZE << RISCV_PGLEVEL_BITS; + bool megapage_coaligned = (vaddr ^ paddr) % megapage_size == 0; - pte_t* pte = __walk_internal(vaddr, 1, level); - kassert(pte); - *pte = pte_create((vaddr + offset) >> RISCV_PGSHIFT, prot_to_type(prot, 0)); + // could support misaligned mappings, but no need today + kassert((vaddr | paddr | len) % megapage_size == 0); + + while (len > 0) { + __map_kernel_page(vaddr, paddr, 1, prot); - len -= pgsize; - vaddr += pgsize; - paddr += pgsize; + len -= megapage_size; + vaddr += megapage_size; + paddr += megapage_size; } } @@ -490,26 +529,29 @@ void populate_mapping(const void* start, size_t size, int prot) } } -uintptr_t pk_vm_init() +static void init_early_alloc() { // PA space must fit within half of VA space uintptr_t user_size = -KVA_START; mem_size = MIN(mem_size, user_size); + current.mmap_max = current.brk_max = user_size; + extern char _end; first_free_page = ROUNDUP((uintptr_t)&_end, RISCV_PGSIZE); free_pages = (mem_size - (first_free_page - MEM_START)) / RISCV_PGSIZE; +} + +uintptr_t pk_vm_init() +{ + init_early_alloc(); size_t num_freelist_nodes = mem_size / RISCV_PGSIZE; - size_t freelist_node_array_size = ROUNDUP(num_freelist_nodes * sizeof(freelist_node_t), RISCV_PGSIZE); - page_freelist_node_array = (freelist_node_t*)first_free_page; - next_free_page = freelist_node_array_size / RISCV_PGSIZE; + page_freelist_storage = (freelist_node_t*)__early_alloc(num_freelist_nodes * sizeof(freelist_node_t)); root_page_table = (void*)__page_alloc_assert(); __map_kernel_range(KVA_START, MEM_START, mem_size, PROT_READ|PROT_WRITE|PROT_EXEC); - current.mmap_max = current.brk_max = user_size; - flush_tlb(); write_csr(sptbr, ((uintptr_t)root_page_table >> RISCV_PGSHIFT) | SATP_MODE_CHOICE); @@ -517,7 +559,7 @@ uintptr_t pk_vm_init() // relocate kva2pa_offset = KVA_START - MEM_START; - page_freelist_node_array = (void*)pa2kva(page_freelist_node_array); + page_freelist_storage = (void*)pa2kva(page_freelist_storage); return kernel_stack_top; } diff --git a/pk/mmap.h b/pk/mmap.h index 7e68777..949b9a7 100644 --- a/pk/mmap.h +++ b/pk/mmap.h @@ -22,10 +22,11 @@ #define MREMAP_FIXED 0x2 extern int demand_paging; +extern uint64_t randomize_mapping; + uintptr_t pk_vm_init(); int handle_page_fault(uintptr_t vaddr, int prot); void populate_mapping(const void* start, size_t size, int prot); -void __map_kernel_range(uintptr_t va, uintptr_t pa, size_t len, int prot); int __valid_user_range(uintptr_t vaddr, size_t len); uintptr_t __do_mmap(uintptr_t addr, size_t length, int prot, int flags, file_t* file, off_t offset); uintptr_t do_mmap(uintptr_t addr, size_t length, int prot, int flags, int fd, off_t offset); diff --git a/pk/pk.c b/pk/pk.c index 012b697..e83c29b 100644 --- a/pk/pk.c +++ b/pk/pk.c @@ -48,6 +48,11 @@ static void handle_option(const char* arg) return; } + if (strcmp(arg, "--randomize-mapping") == 0) { + randomize_mapping = 1; + return; + } + panic("unrecognized option: `%s'", arg); suggest_help(); } @@ -184,8 +189,6 @@ rest_of_boot_loader:\n\ void rest_of_boot_loader_2(uintptr_t kstack_top) { - file_init(); - static arg_buf args; // avoid large stack allocation size_t argc = parse_args(&args); if (!argc) @@ -202,6 +205,8 @@ void rest_of_boot_loader_2(uintptr_t kstack_top) void boot_loader(uintptr_t dtb) { + file_init(); + uintptr_t kernel_stack_top = pk_vm_init(); extern char trap_entry; diff --git a/pk/pk.h b/pk/pk.h index a55ab6c..0ac2cdc 100644 --- a/pk/pk.h +++ b/pk/pk.h @@ -36,6 +36,12 @@ int snprintf(char* out, size_t n, const char* s, ...); void start_user(trapframe_t* tf) __attribute__((noreturn)); void dump_tf(trapframe_t*); +static uint64_t lfsr63(uint64_t x) +{ + uint64_t bit = (x ^ (x >> 1)) & 1; + return (x >> 1) | (bit << 62); +} + static inline int insn_len(long insn) { return (insn & 0x3) < 0x3 ? 2 : 4; -- cgit v1.1