aboutsummaryrefslogtreecommitdiff
path: root/nptl/allocatestack.c
diff options
context:
space:
mode:
Diffstat (limited to 'nptl/allocatestack.c')
-rw-r--r--nptl/allocatestack.c290
1 files changed, 180 insertions, 110 deletions
diff --git a/nptl/allocatestack.c b/nptl/allocatestack.c
index 2cb562f..800ca89 100644
--- a/nptl/allocatestack.c
+++ b/nptl/allocatestack.c
@@ -1,4 +1,4 @@
-/* Copyright (C) 2002-2024 Free Software Foundation, Inc.
+/* Copyright (C) 2002-2025 Free Software Foundation, Inc.
This file is part of the GNU C Library.
The GNU C Library is free software; you can redistribute it and/or
@@ -132,6 +132,8 @@ get_cached_stack (size_t *sizep, void **memp)
__libc_lock_init (result->exit_lock);
memset (&result->tls_state, 0, sizeof result->tls_state);
+ result->getrandom_buf = NULL;
+
/* Clear the DTV. */
dtv_t *dtv = GET_DTV (TLS_TPADJ (result));
for (size_t cnt = 0; cnt < dtv[-1].counter; ++cnt)
@@ -144,10 +146,37 @@ get_cached_stack (size_t *sizep, void **memp)
return result;
}
+/* Assume support for MADV_ADVISE_GUARD, setup_stack_prot will disable it
+ and fallback to ALLOCATE_GUARD_PROT_NONE if the madvise call fails. */
+static int allocate_stack_mode = ALLOCATE_GUARD_MADV_GUARD;
+
+static inline int stack_prot (void)
+{
+ return (PROT_READ | PROT_WRITE
+ | ((GL(dl_stack_flags) & PF_X) ? PROT_EXEC : 0));
+}
+
+static void *
+allocate_thread_stack (size_t size, size_t guardsize)
+{
+ /* MADV_ADVISE_GUARD does not require an additional PROT_NONE mapping. */
+ int prot = stack_prot ();
+
+ if (atomic_load_relaxed (&allocate_stack_mode) == ALLOCATE_GUARD_PROT_NONE)
+ /* If a guard page is required, avoid committing memory by first allocate
+ with PROT_NONE and then reserve with required permission excluding the
+ guard page. */
+ prot = guardsize == 0 ? prot : PROT_NONE;
+
+ return __mmap (NULL, size, prot, MAP_PRIVATE | MAP_ANONYMOUS | MAP_STACK, -1,
+ 0);
+}
+
+
/* Return the guard page position on allocated stack. */
static inline char *
__attribute ((always_inline))
-guard_position (void *mem, size_t size, size_t guardsize, struct pthread *pd,
+guard_position (void *mem, size_t size, size_t guardsize, const struct pthread *pd,
size_t pagesize_m1)
{
#if _STACK_GROWS_DOWN
@@ -157,27 +186,131 @@ guard_position (void *mem, size_t size, size_t guardsize, struct pthread *pd,
#endif
}
-/* Based on stack allocated with PROT_NONE, setup the required portions with
- 'prot' flags based on the guard page position. */
-static inline int
-setup_stack_prot (char *mem, size_t size, char *guard, size_t guardsize,
- const int prot)
+/* Setup the MEM thread stack of SIZE bytes with the required protection flags
+ along with a guard area of GUARDSIZE size. It first tries with
+ MADV_GUARD_INSTALL, and then fallback to setup the guard area using the
+ extra PROT_NONE mapping. Update PD with the type of guard area setup. */
+static inline bool
+setup_stack_prot (char *mem, size_t size, struct pthread *pd,
+ size_t guardsize, size_t pagesize_m1)
{
- char *guardend = guard + guardsize;
+ if (__glibc_unlikely (guardsize == 0))
+ return true;
+
+ char *guard = guard_position (mem, size, guardsize, pd, pagesize_m1);
+ if (atomic_load_relaxed (&allocate_stack_mode) == ALLOCATE_GUARD_MADV_GUARD)
+ {
+ if (__madvise (guard, guardsize, MADV_GUARD_INSTALL) == 0)
+ {
+ pd->stack_mode = ALLOCATE_GUARD_MADV_GUARD;
+ return true;
+ }
+
+ /* If madvise fails it means the kernel does not support the guard
+ advise (we assume that the syscall is available, guard is page-aligned
+ and length is non negative). The stack has already the expected
+ protection flags, so it just need to PROT_NONE the guard area. */
+ atomic_store_relaxed (&allocate_stack_mode, ALLOCATE_GUARD_PROT_NONE);
+ if (__mprotect (guard, guardsize, PROT_NONE) != 0)
+ return false;
+ }
+ else
+ {
+ const int prot = stack_prot ();
+ char *guardend = guard + guardsize;
#if _STACK_GROWS_DOWN
- /* As defined at guard_position, for architectures with downward stack
- the guard page is always at start of the allocated area. */
- if (__mprotect (guardend, size - guardsize, prot) != 0)
- return errno;
+ /* As defined at guard_position, for architectures with downward stack
+ the guard page is always at start of the allocated area. */
+ if (__mprotect (guardend, size - guardsize, prot) != 0)
+ return false;
#else
- size_t mprots1 = (uintptr_t) guard - (uintptr_t) mem;
- if (__mprotect (mem, mprots1, prot) != 0)
- return errno;
- size_t mprots2 = ((uintptr_t) mem + size) - (uintptr_t) guardend;
- if (__mprotect (guardend, mprots2, prot) != 0)
- return errno;
+ size_t mprots1 = (uintptr_t) guard - (uintptr_t) mem;
+ if (__mprotect (mem, mprots1, prot) != 0)
+ return false;
+ size_t mprots2 = ((uintptr_t) mem + size) - (uintptr_t) guardend;
+ if (__mprotect (guardend, mprots2, prot) != 0)
+ return false;
#endif
- return 0;
+ }
+
+ pd->stack_mode = ALLOCATE_GUARD_PROT_NONE;
+ return true;
+}
+
+/* Update the guard area of the thread stack MEM of size SIZE with the new
+ GUARDISZE. It uses the method defined by PD stack_mode. */
+static inline bool
+adjust_stack_prot (char *mem, size_t size, const struct pthread *pd,
+ size_t guardsize, size_t pagesize_m1)
+{
+ /* The required guard area is larger than the current one. For
+ _STACK_GROWS_DOWN it means the guard should increase as:
+
+ |guard|---------------------------------stack|
+ |new guard--|---------------------------stack|
+
+ while for _STACK_GROWS_UP:
+
+ |stack---------------------------|guard|-----|
+ |stack--------------------|new guard---|-----|
+
+ Both madvise and mprotect allows overlap the required region,
+ so use the new guard placement with the new size. */
+ if (guardsize > pd->guardsize)
+ {
+ char *guard = guard_position (mem, size, guardsize, pd, pagesize_m1);
+ if (pd->stack_mode == ALLOCATE_GUARD_MADV_GUARD)
+ return __madvise (guard, guardsize, MADV_GUARD_INSTALL) == 0;
+ else if (pd->stack_mode == ALLOCATE_GUARD_PROT_NONE)
+ return __mprotect (guard, guardsize, PROT_NONE) == 0;
+ }
+ /* The current guard area is larger than the required one. For
+ _STACK_GROWS_DOWN is means change the guard as:
+
+ |guard-------|-------------------------stack|
+ |new guard|----------------------------stack|
+
+ And for _STACK_GROWS_UP:
+
+ |stack---------------------|guard-------|---|
+ |stack------------------------|new guard|---|
+
+ For ALLOCATE_GUARD_MADV_GUARD it means remove the slack area
+ (disjointed region of guard and new guard), while for
+ ALLOCATE_GUARD_PROT_NONE it requires to mprotect it with the stack
+ protection flags. */
+ else if (pd->guardsize > guardsize)
+ {
+ size_t slacksize = pd->guardsize - guardsize;
+ if (pd->stack_mode == ALLOCATE_GUARD_MADV_GUARD)
+ {
+ void *slack =
+#if _STACK_GROWS_DOWN
+ mem + guardsize;
+#else
+ guard_position (mem, size, pd->guardsize, pd, pagesize_m1);
+#endif
+ return __madvise (slack, slacksize, MADV_GUARD_REMOVE) == 0;
+ }
+ else if (pd->stack_mode == ALLOCATE_GUARD_PROT_NONE)
+ {
+ const int prot = stack_prot ();
+#if _STACK_GROWS_DOWN
+ return __mprotect (mem + guardsize, slacksize, prot) == 0;
+#else
+ char *new_guard = (char *)(((uintptr_t) pd - guardsize)
+ & ~pagesize_m1);
+ char *old_guard = (char *)(((uintptr_t) pd - pd->guardsize)
+ & ~pagesize_m1);
+ /* The guard size difference might be > 0, but once rounded
+ to the nearest page the size difference might be zero. */
+ if (new_guard > old_guard
+ && __mprotect (old_guard, new_guard - old_guard, prot) != 0)
+ return false;
+#endif
+ }
+ }
+ return true;
}
/* Mark the memory of the stack as usable to the kernel. It frees everything
@@ -289,7 +422,7 @@ allocate_stack (const struct pthread_attr *attr, struct pthread **pdp,
/* This is a user-provided stack. It will not be queued in the
stack cache nor will the memory (except the TLS memory) be freed. */
- pd->user_stack = true;
+ pd->stack_mode = ALLOCATE_GUARD_USER;
/* This is at least the second thread. */
pd->header.multiple_threads = 1;
@@ -323,10 +456,7 @@ allocate_stack (const struct pthread_attr *attr, struct pthread **pdp,
/* Allocate some anonymous memory. If possible use the cache. */
size_t guardsize;
size_t reported_guardsize;
- size_t reqsize;
void *mem;
- const int prot = (PROT_READ | PROT_WRITE
- | ((GL(dl_stack_flags) & PF_X) ? PROT_EXEC : 0));
/* Adjust the stack size for alignment. */
size &= ~tls_static_align_m1;
@@ -356,16 +486,10 @@ allocate_stack (const struct pthread_attr *attr, struct pthread **pdp,
return EINVAL;
/* Try to get a stack from the cache. */
- reqsize = size;
pd = get_cached_stack (&size, &mem);
if (pd == NULL)
{
- /* If a guard page is required, avoid committing memory by first
- allocate with PROT_NONE and then reserve with required permission
- excluding the guard page. */
- mem = __mmap (NULL, size, (guardsize == 0) ? prot : PROT_NONE,
- MAP_PRIVATE | MAP_ANONYMOUS | MAP_STACK, -1, 0);
-
+ mem = allocate_thread_stack (size, guardsize);
if (__glibc_unlikely (mem == MAP_FAILED))
return errno;
@@ -392,15 +516,10 @@ allocate_stack (const struct pthread_attr *attr, struct pthread **pdp,
#endif
/* Now mprotect the required region excluding the guard area. */
- if (__glibc_likely (guardsize > 0))
+ if (!setup_stack_prot (mem, size, pd, guardsize, pagesize_m1))
{
- char *guard = guard_position (mem, size, guardsize, pd,
- pagesize_m1);
- if (setup_stack_prot (mem, size, guard, guardsize, prot) != 0)
- {
- __munmap (mem, size);
- return errno;
- }
+ __munmap (mem, size);
+ return errno;
}
/* Remember the stack-related values. */
@@ -446,25 +565,6 @@ allocate_stack (const struct pthread_attr *attr, struct pthread **pdp,
lll_unlock (GL (dl_stack_cache_lock), LLL_PRIVATE);
-
- /* There might have been a race. Another thread might have
- caused the stacks to get exec permission while this new
- stack was prepared. Detect if this was possible and
- change the permission if necessary. */
- if (__builtin_expect ((GL(dl_stack_flags) & PF_X) != 0
- && (prot & PROT_EXEC) == 0, 0))
- {
- int err = __nptl_change_stack_perm (pd);
- if (err != 0)
- {
- /* Free the stack memory we just allocated. */
- (void) __munmap (mem, size);
-
- return err;
- }
- }
-
-
/* Note that all of the stack and the thread descriptor is
zeroed. This means we do not have to initialize fields
with initial value zero. This is specifically true for
@@ -473,59 +573,31 @@ allocate_stack (const struct pthread_attr *attr, struct pthread **pdp,
which will be read next. */
}
- /* Create or resize the guard area if necessary. */
- if (__glibc_unlikely (guardsize > pd->guardsize))
+ /* Create or resize the guard area if necessary on an already
+ allocated stack. */
+ if (!adjust_stack_prot (mem, size, pd, guardsize, pagesize_m1))
{
- char *guard = guard_position (mem, size, guardsize, pd,
- pagesize_m1);
- if (__mprotect (guard, guardsize, PROT_NONE) != 0)
- {
- mprot_error:
- lll_lock (GL (dl_stack_cache_lock), LLL_PRIVATE);
+ lll_lock (GL (dl_stack_cache_lock), LLL_PRIVATE);
- /* Remove the thread from the list. */
- __nptl_stack_list_del (&pd->list);
+ /* Remove the thread from the list. */
+ __nptl_stack_list_del (&pd->list);
- lll_unlock (GL (dl_stack_cache_lock), LLL_PRIVATE);
+ lll_unlock (GL (dl_stack_cache_lock), LLL_PRIVATE);
- /* Get rid of the TLS block we allocated. */
- _dl_deallocate_tls (TLS_TPADJ (pd), false);
+ /* Get rid of the TLS block we allocated. */
+ _dl_deallocate_tls (TLS_TPADJ (pd), false);
- /* Free the stack memory regardless of whether the size
- of the cache is over the limit or not. If this piece
- of memory caused problems we better do not use it
- anymore. Uh, and we ignore possible errors. There
- is nothing we could do. */
- (void) __munmap (mem, size);
+ /* Free the stack memory regardless of whether the size
+ of the cache is over the limit or not. If this piece
+ of memory caused problems we better do not use it
+ anymore. Uh, and we ignore possible errors. There
+ is nothing we could do. */
+ (void) __munmap (mem, size);
- return errno;
- }
-
- pd->guardsize = guardsize;
+ return errno;
}
- else if (__builtin_expect (pd->guardsize - guardsize > size - reqsize,
- 0))
- {
- /* The old guard area is too large. */
-
-#if _STACK_GROWS_DOWN
- if (__mprotect ((char *) mem + guardsize, pd->guardsize - guardsize,
- prot) != 0)
- goto mprot_error;
-#elif _STACK_GROWS_UP
- char *new_guard = (char *)(((uintptr_t) pd - guardsize)
- & ~pagesize_m1);
- char *old_guard = (char *)(((uintptr_t) pd - pd->guardsize)
- & ~pagesize_m1);
- /* The guard size difference might be > 0, but once rounded
- to the nearest page the size difference might be zero. */
- if (new_guard > old_guard
- && __mprotect (old_guard, new_guard - old_guard, prot) != 0)
- goto mprot_error;
-#endif
- pd->guardsize = guardsize;
- }
+ pd->guardsize = guardsize;
/* The pthread_getattr_np() calls need to get passed the size
requested in the attribute, regardless of how large the
actually used guardsize is. */
@@ -566,10 +638,6 @@ allocate_stack (const struct pthread_attr *attr, struct pthread **pdp,
return 0;
}
-/* Maximum supported name from initial kernel support, not exported
- by user API. */
-#define ANON_VMA_NAME_MAX_LEN 80
-
#define SET_STACK_NAME(__prefix, __stack, __stacksize, __tid) \
({ \
char __stack_name[sizeof (__prefix) + \
@@ -585,19 +653,21 @@ allocate_stack (const struct pthread_attr *attr, struct pthread **pdp,
static void
name_stack_maps (struct pthread *pd, bool set)
{
+ size_t adjust = pd->stack_mode == ALLOCATE_GUARD_PROT_NONE ?
+ pd->guardsize : 0;
#if _STACK_GROWS_DOWN
- void *stack = pd->stackblock + pd->guardsize;
+ void *stack = pd->stackblock + adjust;
#else
void *stack = pd->stackblock;
#endif
- size_t stacksize = pd->stackblock_size - pd->guardsize;
+ size_t stacksize = pd->stackblock_size - adjust;
if (!set)
- __set_vma_name (stack, stacksize, NULL);
+ __set_vma_name (stack, stacksize, " glibc: unused stack");
else
{
unsigned int tid = pd->tid;
- if (pd->user_stack)
+ if (pd->stack_mode == ALLOCATE_GUARD_USER)
SET_STACK_NAME (" glibc: pthread user stack: ", stack, stacksize, tid);
else
SET_STACK_NAME (" glibc: pthread stack: ", stack, stacksize, tid);