diff options
Diffstat (limited to 'nptl/allocatestack.c')
-rw-r--r-- | nptl/allocatestack.c | 290 |
1 files changed, 180 insertions, 110 deletions
diff --git a/nptl/allocatestack.c b/nptl/allocatestack.c index 2cb562f..800ca89 100644 --- a/nptl/allocatestack.c +++ b/nptl/allocatestack.c @@ -1,4 +1,4 @@ -/* Copyright (C) 2002-2024 Free Software Foundation, Inc. +/* Copyright (C) 2002-2025 Free Software Foundation, Inc. This file is part of the GNU C Library. The GNU C Library is free software; you can redistribute it and/or @@ -132,6 +132,8 @@ get_cached_stack (size_t *sizep, void **memp) __libc_lock_init (result->exit_lock); memset (&result->tls_state, 0, sizeof result->tls_state); + result->getrandom_buf = NULL; + /* Clear the DTV. */ dtv_t *dtv = GET_DTV (TLS_TPADJ (result)); for (size_t cnt = 0; cnt < dtv[-1].counter; ++cnt) @@ -144,10 +146,37 @@ get_cached_stack (size_t *sizep, void **memp) return result; } +/* Assume support for MADV_ADVISE_GUARD, setup_stack_prot will disable it + and fallback to ALLOCATE_GUARD_PROT_NONE if the madvise call fails. */ +static int allocate_stack_mode = ALLOCATE_GUARD_MADV_GUARD; + +static inline int stack_prot (void) +{ + return (PROT_READ | PROT_WRITE + | ((GL(dl_stack_flags) & PF_X) ? PROT_EXEC : 0)); +} + +static void * +allocate_thread_stack (size_t size, size_t guardsize) +{ + /* MADV_ADVISE_GUARD does not require an additional PROT_NONE mapping. */ + int prot = stack_prot (); + + if (atomic_load_relaxed (&allocate_stack_mode) == ALLOCATE_GUARD_PROT_NONE) + /* If a guard page is required, avoid committing memory by first allocate + with PROT_NONE and then reserve with required permission excluding the + guard page. */ + prot = guardsize == 0 ? prot : PROT_NONE; + + return __mmap (NULL, size, prot, MAP_PRIVATE | MAP_ANONYMOUS | MAP_STACK, -1, + 0); +} + + /* Return the guard page position on allocated stack. */ static inline char * __attribute ((always_inline)) -guard_position (void *mem, size_t size, size_t guardsize, struct pthread *pd, +guard_position (void *mem, size_t size, size_t guardsize, const struct pthread *pd, size_t pagesize_m1) { #if _STACK_GROWS_DOWN @@ -157,27 +186,131 @@ guard_position (void *mem, size_t size, size_t guardsize, struct pthread *pd, #endif } -/* Based on stack allocated with PROT_NONE, setup the required portions with - 'prot' flags based on the guard page position. */ -static inline int -setup_stack_prot (char *mem, size_t size, char *guard, size_t guardsize, - const int prot) +/* Setup the MEM thread stack of SIZE bytes with the required protection flags + along with a guard area of GUARDSIZE size. It first tries with + MADV_GUARD_INSTALL, and then fallback to setup the guard area using the + extra PROT_NONE mapping. Update PD with the type of guard area setup. */ +static inline bool +setup_stack_prot (char *mem, size_t size, struct pthread *pd, + size_t guardsize, size_t pagesize_m1) { - char *guardend = guard + guardsize; + if (__glibc_unlikely (guardsize == 0)) + return true; + + char *guard = guard_position (mem, size, guardsize, pd, pagesize_m1); + if (atomic_load_relaxed (&allocate_stack_mode) == ALLOCATE_GUARD_MADV_GUARD) + { + if (__madvise (guard, guardsize, MADV_GUARD_INSTALL) == 0) + { + pd->stack_mode = ALLOCATE_GUARD_MADV_GUARD; + return true; + } + + /* If madvise fails it means the kernel does not support the guard + advise (we assume that the syscall is available, guard is page-aligned + and length is non negative). The stack has already the expected + protection flags, so it just need to PROT_NONE the guard area. */ + atomic_store_relaxed (&allocate_stack_mode, ALLOCATE_GUARD_PROT_NONE); + if (__mprotect (guard, guardsize, PROT_NONE) != 0) + return false; + } + else + { + const int prot = stack_prot (); + char *guardend = guard + guardsize; #if _STACK_GROWS_DOWN - /* As defined at guard_position, for architectures with downward stack - the guard page is always at start of the allocated area. */ - if (__mprotect (guardend, size - guardsize, prot) != 0) - return errno; + /* As defined at guard_position, for architectures with downward stack + the guard page is always at start of the allocated area. */ + if (__mprotect (guardend, size - guardsize, prot) != 0) + return false; #else - size_t mprots1 = (uintptr_t) guard - (uintptr_t) mem; - if (__mprotect (mem, mprots1, prot) != 0) - return errno; - size_t mprots2 = ((uintptr_t) mem + size) - (uintptr_t) guardend; - if (__mprotect (guardend, mprots2, prot) != 0) - return errno; + size_t mprots1 = (uintptr_t) guard - (uintptr_t) mem; + if (__mprotect (mem, mprots1, prot) != 0) + return false; + size_t mprots2 = ((uintptr_t) mem + size) - (uintptr_t) guardend; + if (__mprotect (guardend, mprots2, prot) != 0) + return false; #endif - return 0; + } + + pd->stack_mode = ALLOCATE_GUARD_PROT_NONE; + return true; +} + +/* Update the guard area of the thread stack MEM of size SIZE with the new + GUARDISZE. It uses the method defined by PD stack_mode. */ +static inline bool +adjust_stack_prot (char *mem, size_t size, const struct pthread *pd, + size_t guardsize, size_t pagesize_m1) +{ + /* The required guard area is larger than the current one. For + _STACK_GROWS_DOWN it means the guard should increase as: + + |guard|---------------------------------stack| + |new guard--|---------------------------stack| + + while for _STACK_GROWS_UP: + + |stack---------------------------|guard|-----| + |stack--------------------|new guard---|-----| + + Both madvise and mprotect allows overlap the required region, + so use the new guard placement with the new size. */ + if (guardsize > pd->guardsize) + { + char *guard = guard_position (mem, size, guardsize, pd, pagesize_m1); + if (pd->stack_mode == ALLOCATE_GUARD_MADV_GUARD) + return __madvise (guard, guardsize, MADV_GUARD_INSTALL) == 0; + else if (pd->stack_mode == ALLOCATE_GUARD_PROT_NONE) + return __mprotect (guard, guardsize, PROT_NONE) == 0; + } + /* The current guard area is larger than the required one. For + _STACK_GROWS_DOWN is means change the guard as: + + |guard-------|-------------------------stack| + |new guard|----------------------------stack| + + And for _STACK_GROWS_UP: + + |stack---------------------|guard-------|---| + |stack------------------------|new guard|---| + + For ALLOCATE_GUARD_MADV_GUARD it means remove the slack area + (disjointed region of guard and new guard), while for + ALLOCATE_GUARD_PROT_NONE it requires to mprotect it with the stack + protection flags. */ + else if (pd->guardsize > guardsize) + { + size_t slacksize = pd->guardsize - guardsize; + if (pd->stack_mode == ALLOCATE_GUARD_MADV_GUARD) + { + void *slack = +#if _STACK_GROWS_DOWN + mem + guardsize; +#else + guard_position (mem, size, pd->guardsize, pd, pagesize_m1); +#endif + return __madvise (slack, slacksize, MADV_GUARD_REMOVE) == 0; + } + else if (pd->stack_mode == ALLOCATE_GUARD_PROT_NONE) + { + const int prot = stack_prot (); +#if _STACK_GROWS_DOWN + return __mprotect (mem + guardsize, slacksize, prot) == 0; +#else + char *new_guard = (char *)(((uintptr_t) pd - guardsize) + & ~pagesize_m1); + char *old_guard = (char *)(((uintptr_t) pd - pd->guardsize) + & ~pagesize_m1); + /* The guard size difference might be > 0, but once rounded + to the nearest page the size difference might be zero. */ + if (new_guard > old_guard + && __mprotect (old_guard, new_guard - old_guard, prot) != 0) + return false; +#endif + } + } + return true; } /* Mark the memory of the stack as usable to the kernel. It frees everything @@ -289,7 +422,7 @@ allocate_stack (const struct pthread_attr *attr, struct pthread **pdp, /* This is a user-provided stack. It will not be queued in the stack cache nor will the memory (except the TLS memory) be freed. */ - pd->user_stack = true; + pd->stack_mode = ALLOCATE_GUARD_USER; /* This is at least the second thread. */ pd->header.multiple_threads = 1; @@ -323,10 +456,7 @@ allocate_stack (const struct pthread_attr *attr, struct pthread **pdp, /* Allocate some anonymous memory. If possible use the cache. */ size_t guardsize; size_t reported_guardsize; - size_t reqsize; void *mem; - const int prot = (PROT_READ | PROT_WRITE - | ((GL(dl_stack_flags) & PF_X) ? PROT_EXEC : 0)); /* Adjust the stack size for alignment. */ size &= ~tls_static_align_m1; @@ -356,16 +486,10 @@ allocate_stack (const struct pthread_attr *attr, struct pthread **pdp, return EINVAL; /* Try to get a stack from the cache. */ - reqsize = size; pd = get_cached_stack (&size, &mem); if (pd == NULL) { - /* If a guard page is required, avoid committing memory by first - allocate with PROT_NONE and then reserve with required permission - excluding the guard page. */ - mem = __mmap (NULL, size, (guardsize == 0) ? prot : PROT_NONE, - MAP_PRIVATE | MAP_ANONYMOUS | MAP_STACK, -1, 0); - + mem = allocate_thread_stack (size, guardsize); if (__glibc_unlikely (mem == MAP_FAILED)) return errno; @@ -392,15 +516,10 @@ allocate_stack (const struct pthread_attr *attr, struct pthread **pdp, #endif /* Now mprotect the required region excluding the guard area. */ - if (__glibc_likely (guardsize > 0)) + if (!setup_stack_prot (mem, size, pd, guardsize, pagesize_m1)) { - char *guard = guard_position (mem, size, guardsize, pd, - pagesize_m1); - if (setup_stack_prot (mem, size, guard, guardsize, prot) != 0) - { - __munmap (mem, size); - return errno; - } + __munmap (mem, size); + return errno; } /* Remember the stack-related values. */ @@ -446,25 +565,6 @@ allocate_stack (const struct pthread_attr *attr, struct pthread **pdp, lll_unlock (GL (dl_stack_cache_lock), LLL_PRIVATE); - - /* There might have been a race. Another thread might have - caused the stacks to get exec permission while this new - stack was prepared. Detect if this was possible and - change the permission if necessary. */ - if (__builtin_expect ((GL(dl_stack_flags) & PF_X) != 0 - && (prot & PROT_EXEC) == 0, 0)) - { - int err = __nptl_change_stack_perm (pd); - if (err != 0) - { - /* Free the stack memory we just allocated. */ - (void) __munmap (mem, size); - - return err; - } - } - - /* Note that all of the stack and the thread descriptor is zeroed. This means we do not have to initialize fields with initial value zero. This is specifically true for @@ -473,59 +573,31 @@ allocate_stack (const struct pthread_attr *attr, struct pthread **pdp, which will be read next. */ } - /* Create or resize the guard area if necessary. */ - if (__glibc_unlikely (guardsize > pd->guardsize)) + /* Create or resize the guard area if necessary on an already + allocated stack. */ + if (!adjust_stack_prot (mem, size, pd, guardsize, pagesize_m1)) { - char *guard = guard_position (mem, size, guardsize, pd, - pagesize_m1); - if (__mprotect (guard, guardsize, PROT_NONE) != 0) - { - mprot_error: - lll_lock (GL (dl_stack_cache_lock), LLL_PRIVATE); + lll_lock (GL (dl_stack_cache_lock), LLL_PRIVATE); - /* Remove the thread from the list. */ - __nptl_stack_list_del (&pd->list); + /* Remove the thread from the list. */ + __nptl_stack_list_del (&pd->list); - lll_unlock (GL (dl_stack_cache_lock), LLL_PRIVATE); + lll_unlock (GL (dl_stack_cache_lock), LLL_PRIVATE); - /* Get rid of the TLS block we allocated. */ - _dl_deallocate_tls (TLS_TPADJ (pd), false); + /* Get rid of the TLS block we allocated. */ + _dl_deallocate_tls (TLS_TPADJ (pd), false); - /* Free the stack memory regardless of whether the size - of the cache is over the limit or not. If this piece - of memory caused problems we better do not use it - anymore. Uh, and we ignore possible errors. There - is nothing we could do. */ - (void) __munmap (mem, size); + /* Free the stack memory regardless of whether the size + of the cache is over the limit or not. If this piece + of memory caused problems we better do not use it + anymore. Uh, and we ignore possible errors. There + is nothing we could do. */ + (void) __munmap (mem, size); - return errno; - } - - pd->guardsize = guardsize; + return errno; } - else if (__builtin_expect (pd->guardsize - guardsize > size - reqsize, - 0)) - { - /* The old guard area is too large. */ - -#if _STACK_GROWS_DOWN - if (__mprotect ((char *) mem + guardsize, pd->guardsize - guardsize, - prot) != 0) - goto mprot_error; -#elif _STACK_GROWS_UP - char *new_guard = (char *)(((uintptr_t) pd - guardsize) - & ~pagesize_m1); - char *old_guard = (char *)(((uintptr_t) pd - pd->guardsize) - & ~pagesize_m1); - /* The guard size difference might be > 0, but once rounded - to the nearest page the size difference might be zero. */ - if (new_guard > old_guard - && __mprotect (old_guard, new_guard - old_guard, prot) != 0) - goto mprot_error; -#endif - pd->guardsize = guardsize; - } + pd->guardsize = guardsize; /* The pthread_getattr_np() calls need to get passed the size requested in the attribute, regardless of how large the actually used guardsize is. */ @@ -566,10 +638,6 @@ allocate_stack (const struct pthread_attr *attr, struct pthread **pdp, return 0; } -/* Maximum supported name from initial kernel support, not exported - by user API. */ -#define ANON_VMA_NAME_MAX_LEN 80 - #define SET_STACK_NAME(__prefix, __stack, __stacksize, __tid) \ ({ \ char __stack_name[sizeof (__prefix) + \ @@ -585,19 +653,21 @@ allocate_stack (const struct pthread_attr *attr, struct pthread **pdp, static void name_stack_maps (struct pthread *pd, bool set) { + size_t adjust = pd->stack_mode == ALLOCATE_GUARD_PROT_NONE ? + pd->guardsize : 0; #if _STACK_GROWS_DOWN - void *stack = pd->stackblock + pd->guardsize; + void *stack = pd->stackblock + adjust; #else void *stack = pd->stackblock; #endif - size_t stacksize = pd->stackblock_size - pd->guardsize; + size_t stacksize = pd->stackblock_size - adjust; if (!set) - __set_vma_name (stack, stacksize, NULL); + __set_vma_name (stack, stacksize, " glibc: unused stack"); else { unsigned int tid = pd->tid; - if (pd->user_stack) + if (pd->stack_mode == ALLOCATE_GUARD_USER) SET_STACK_NAME (" glibc: pthread user stack: ", stack, stacksize, tid); else SET_STACK_NAME (" glibc: pthread stack: ", stack, stacksize, tid); |