aboutsummaryrefslogtreecommitdiff
path: root/pk
diff options
context:
space:
mode:
Diffstat (limited to 'pk')
-rw-r--r--pk/mmap.c129
1 files changed, 83 insertions, 46 deletions
diff --git a/pk/mmap.c b/pk/mmap.c
index 8f52c0d..2e9be94 100644
--- a/pk/mmap.c
+++ b/pk/mmap.c
@@ -18,38 +18,69 @@ typedef struct {
int prot;
} vmr_t;
+#define RISCV_PGLEVELS ((VA_BITS - RISCV_PGSHIFT) / RISCV_PGLEVEL_BITS)
+
#define MAX_VMR (RISCV_PGSIZE / sizeof(vmr_t))
static spinlock_t vm_lock = SPINLOCK_INIT;
static vmr_t* vmrs;
-uintptr_t first_free_paddr;
static uintptr_t first_free_page;
static size_t next_free_page;
static size_t free_pages;
int demand_paging = 1; // unless -p flag is given
+struct freelist_node_t {
+ struct freelist_node_t* next;
+ uintptr_t addr;
+};
+
+static struct freelist_node_t* freelist_head;
+static struct freelist_node_t* freelist_node_array;
+
+static void __augment_freelist()
+{
+ if (next_free_page == free_pages)
+ panic("Out of memory!");
+
+ struct freelist_node_t* node = &freelist_node_array[next_free_page];
+ node->addr = first_free_page + RISCV_PGSIZE * next_free_page;
+ node->next = freelist_head;
+ freelist_head = node;
+
+ next_free_page++;
+}
+
static uintptr_t __page_alloc()
{
- kassert(next_free_page != free_pages);
- uintptr_t addr = first_free_page + RISCV_PGSIZE * next_free_page++;
- memset((void*)addr, 0, RISCV_PGSIZE);
- return addr;
+ if (freelist_head == NULL)
+ __augment_freelist();
+
+ struct freelist_node_t* node = freelist_head;
+ uintptr_t addr = node->addr;
+ freelist_head = node->next;
+ node->next = NULL;
+
+ return (uintptr_t)memset((void*)addr, 0, RISCV_PGSIZE);
+}
+
+static void __page_free(uintptr_t addr)
+{
+ size_t idx = (addr - first_free_page) / RISCV_PGSIZE;
+ kassert(idx < free_pages);
+ struct freelist_node_t* node = &freelist_node_array[idx];
+ kassert(node->addr == addr);
+ kassert(node->next == NULL);
+
+ node->next = freelist_head;
+ freelist_head = node;
}
static vmr_t* __vmr_alloc(uintptr_t addr, size_t length, file_t* file,
size_t offset, unsigned refcnt, int prot)
{
- if (!vmrs) {
- spinlock_lock(&vm_lock);
- if (!vmrs) {
- vmr_t* page = (vmr_t*)__page_alloc();
- mb();
- vmrs = page;
- }
- spinlock_unlock(&vm_lock);
- }
- mb();
+ if (!vmrs)
+ vmrs = (vmr_t*)__page_alloc();
for (vmr_t* v = vmrs; v < vmrs + MAX_VMR; v++) {
if (v->refcnt == 0) {
@@ -92,34 +123,30 @@ static size_t pt_idx(uintptr_t addr, int level)
return idx & ((1 << RISCV_PGLEVEL_BITS) - 1);
}
-static pte_t* __walk_create(uintptr_t addr);
-
-static pte_t* __attribute__((noinline)) __continue_walk_create(uintptr_t addr, pte_t* pte)
-{
- *pte = ptd_create(ppn(__page_alloc()));
- return __walk_create(addr);
-}
-
-static pte_t* __walk_internal(uintptr_t addr, int create)
+static pte_t* __walk_internal(uintptr_t addr, int create, int level)
{
pte_t* t = root_page_table;
- for (int i = (VA_BITS - RISCV_PGSHIFT) / RISCV_PGLEVEL_BITS - 1; i > 0; i--) {
+ for (int i = RISCV_PGLEVELS - 1; i > level; i--) {
size_t idx = pt_idx(addr, i);
- if (unlikely(!(t[idx] & PTE_V)))
- return create ? __continue_walk_create(addr, &t[idx]) : 0;
+ if (unlikely(!(t[idx] & PTE_V))) {
+ if (create)
+ t[idx] = ptd_create(ppn(__page_alloc()));
+ else
+ return 0;
+ }
t = (pte_t*)(pte_ppn(t[idx]) << RISCV_PGSHIFT);
}
- return &t[pt_idx(addr, 0)];
+ return &t[pt_idx(addr, level)];
}
static pte_t* __walk(uintptr_t addr)
{
- return __walk_internal(addr, 0);
+ return __walk_internal(addr, 0, 0);
}
static pte_t* __walk_create(uintptr_t addr)
{
- return __walk_internal(addr, 1);
+ return __walk_internal(addr, 1, 0);
}
static int __va_avail(uintptr_t vaddr)
@@ -174,8 +201,8 @@ static int __handle_page_fault(uintptr_t vaddr, int prot)
return -1;
else if (!(*pte & PTE_V))
{
- uintptr_t ppn = vpn + (first_free_paddr / RISCV_PGSIZE);
- uintptr_t kva = ppn * RISCV_PGSIZE;
+ uintptr_t kva = __page_alloc();
+ uintptr_t ppn = kva / RISCV_PGSIZE;
vmr_t* v = (vmr_t*)*pte;
*pte = pte_create(ppn, prot_to_type(PROT_READ|PROT_WRITE, 0));
@@ -218,7 +245,9 @@ static void __do_munmap(uintptr_t addr, size_t len)
if (pte == 0 || *pte == 0)
continue;
- if (!(*pte & PTE_V))
+ if (*pte & PTE_V)
+ __page_free(pte_ppn(*pte) << RISCV_PGSHIFT);
+ else
__vmr_decref((vmr_t*)*pte, 1);
*pte = 0;
@@ -371,11 +400,19 @@ void __map_kernel_range(uintptr_t vaddr, uintptr_t paddr, size_t len, int prot)
{
uintptr_t n = ROUNDUP(len, RISCV_PGSIZE) / RISCV_PGSIZE;
uintptr_t offset = paddr - vaddr;
- for (uintptr_t a = vaddr, i = 0; i < n; i++, a += RISCV_PGSIZE)
- {
- pte_t* pte = __walk_create(a);
+
+ while (len > 0) {
+ size_t megapage_size = RISCV_PGSIZE << RISCV_PGLEVEL_BITS;
+ int level = (vaddr | paddr) % megapage_size == 0 && len >= megapage_size;
+ size_t pgsize = RISCV_PGSIZE << (level * RISCV_PGLEVEL_BITS);
+
+ pte_t* pte = __walk_internal(vaddr, 1, level);
kassert(pte);
- *pte = pte_create((a + offset) >> RISCV_PGSHIFT, prot_to_type(prot, 0));
+ *pte = pte_create((vaddr + offset) >> RISCV_PGSHIFT, prot_to_type(prot, 0));
+
+ len -= pgsize;
+ vaddr += pgsize;
+ paddr += pgsize;
}
}
@@ -393,21 +430,21 @@ void populate_mapping(const void* start, size_t size, int prot)
uintptr_t pk_vm_init()
{
- // HTIF address signedness and va2pa macro both cap memory size to 2 GiB
- mem_size = MIN(mem_size, 1U << 31);
- size_t mem_pages = mem_size >> RISCV_PGSHIFT;
- free_pages = MAX(8, mem_pages >> (RISCV_PGLEVEL_BITS-1));
-
extern char _end;
first_free_page = ROUNDUP((uintptr_t)&_end, RISCV_PGSIZE);
- first_free_paddr = first_free_page + free_pages * RISCV_PGSIZE;
+ free_pages = (mem_size - (first_free_page - MEM_START)) / RISCV_PGSIZE;
+
+ size_t num_freelist_nodes = mem_size / RISCV_PGSIZE;
+ size_t freelist_node_array_size = ROUNDUP(num_freelist_nodes * sizeof(struct freelist_node_t), RISCV_PGSIZE);
+ freelist_node_array = (struct freelist_node_t*)first_free_page;
+ next_free_page = freelist_node_array_size / RISCV_PGSIZE;
root_page_table = (void*)__page_alloc();
- __map_kernel_range(MEM_START, MEM_START, first_free_paddr - MEM_START, PROT_READ|PROT_WRITE|PROT_EXEC);
+ __map_kernel_range(MEM_START, MEM_START, mem_size, PROT_READ|PROT_WRITE|PROT_EXEC);
- current.mmap_max = current.brk_max =
- MIN(MEM_START, mem_size - (first_free_paddr - MEM_START));
+ current.mmap_max = current.brk_max = MEM_START;
+ size_t mem_pages = mem_size >> RISCV_PGSHIFT;
size_t stack_size = MIN(mem_pages >> 5, 2048) * RISCV_PGSIZE;
size_t stack_bottom = __do_mmap(current.mmap_max - stack_size, stack_size, PROT_READ|PROT_WRITE|PROT_EXEC, MAP_PRIVATE|MAP_ANONYMOUS|MAP_FIXED, 0, 0);
kassert(stack_bottom != (uintptr_t)-1);