aboutsummaryrefslogtreecommitdiff
path: root/sysdeps/unix/sysv
diff options
context:
space:
mode:
authorMichael Jeanson <mjeanson@efficios.com>2024-07-10 15:48:49 -0400
committerMichael Jeanson <mjeanson@efficios.com>2025-01-10 20:20:27 +0000
commit93d0bfbe8ffa9c3dcbfc8e953216542f500dac07 (patch)
treee7e47bb8b3f6202e13f0a32f71c375ba1e4e9dcd /sysdeps/unix/sysv
parent494d65129ed5ae1154b75cc189bbdde5e9ecf1df (diff)
downloadglibc-93d0bfbe8ffa9c3dcbfc8e953216542f500dac07.zip
glibc-93d0bfbe8ffa9c3dcbfc8e953216542f500dac07.tar.gz
glibc-93d0bfbe8ffa9c3dcbfc8e953216542f500dac07.tar.bz2
nptl: Move the rseq area to the 'extra TLS' block
Move the rseq area to the newly added 'extra TLS' block, this is the last step in adding support for the rseq extended ABI. The size of the rseq area is now dynamic and depends on the rseq features reported by the kernel through the elf auxiliary vector. This will allow applications to use rseq features past the 32 bytes of the original rseq ABI as they become available in future kernels. Signed-off-by: Michael Jeanson <mjeanson@efficios.com> Reviewed-by: Mathieu Desnoyers <mathieu.desnoyers@efficios.com> Reviewed-by: Florian Weimer <fweimer@redhat.com>
Diffstat (limited to 'sysdeps/unix/sysv')
-rw-r--r--sysdeps/unix/sysv/linux/Makefile10
-rw-r--r--sysdeps/unix/sysv/linux/dl-parse_auxv.h12
-rw-r--r--sysdeps/unix/sysv/linux/rseq-internal.h49
-rw-r--r--sysdeps/unix/sysv/linux/sched_getcpu.c3
-rw-r--r--sysdeps/unix/sysv/linux/tst-rseq-disable-static.c1
-rw-r--r--sysdeps/unix/sysv/linux/tst-rseq-disable.c77
-rw-r--r--sysdeps/unix/sysv/linux/tst-rseq-nptl-static.c1
-rw-r--r--sysdeps/unix/sysv/linux/tst-rseq-static.c1
-rw-r--r--sysdeps/unix/sysv/linux/tst-rseq.c97
-rw-r--r--sysdeps/unix/sysv/linux/tst-rseq.h2
10 files changed, 205 insertions, 48 deletions
diff --git a/sysdeps/unix/sysv/linux/Makefile b/sysdeps/unix/sysv/linux/Makefile
index 8a75529..c8d30cc 100644
--- a/sysdeps/unix/sysv/linux/Makefile
+++ b/sysdeps/unix/sysv/linux/Makefile
@@ -268,6 +268,11 @@ tests-internal += \
tst-rseq-disable \
# tests-internal
+tests-static += \
+ tst-rseq-disable-static \
+ tst-rseq-static \
+ # tests-static
+
tests-time64 += \
tst-adjtimex-time64 \
tst-clock_adjtime-time64 \
@@ -411,6 +416,7 @@ $(objpfx)tst-sched-consts.out: ../sysdeps/unix/sysv/linux/tst-sched-consts.py
$(objpfx)tst-sched-consts.out: $(sysdeps-linux-python-deps)
tst-rseq-disable-ENV = GLIBC_TUNABLES=glibc.pthread.rseq=0
+tst-rseq-disable-static-ENV = GLIBC_TUNABLES=glibc.pthread.rseq=0
endif # $(subdir) == misc
@@ -685,4 +691,8 @@ tests += \
tests-internal += \
tst-rseq-nptl \
# tests-internal
+
+tests-static += \
+ tst-rseq-nptl-static \
+ # tests-static
endif
diff --git a/sysdeps/unix/sysv/linux/dl-parse_auxv.h b/sysdeps/unix/sysv/linux/dl-parse_auxv.h
index 2d42437..41250c9 100644
--- a/sysdeps/unix/sysv/linux/dl-parse_auxv.h
+++ b/sysdeps/unix/sysv/linux/dl-parse_auxv.h
@@ -61,15 +61,9 @@ void _dl_parse_auxv (ElfW(auxv_t) *av, dl_parse_auxv_t auxv_values)
#endif
/* Get the rseq feature size, with a minimum of RSEQ_AREA_SIZE_INITIAL_USED
- (20) for kernels that don't have AT_RSEQ_FEATURE_SIZE. Limit the feature
- size to RSEQ_AREA_SIZE_MAX_USED (28) which fits the rseq area in 'struct
- pthread' and represents the maximum feature size of currently released
- kernels. Since no kernels currently cross the 32 bytes of the original
- ABI, the semantics of a feature size of 32 or more are still undetermined.
- */
- _rseq_size = MIN (MAX (auxv_values[AT_RSEQ_FEATURE_SIZE],
- RSEQ_AREA_SIZE_INITIAL_USED),
- RSEQ_AREA_SIZE_MAX_USED);
+ (20) for kernels that don't have AT_RSEQ_FEATURE_SIZE. */
+ _rseq_size = MAX (auxv_values[AT_RSEQ_FEATURE_SIZE],
+ RSEQ_AREA_SIZE_INITIAL_USED);
_rseq_align = MAX (auxv_values[AT_RSEQ_ALIGN], RSEQ_MIN_ALIGN);
DL_PLATFORM_AUXV
diff --git a/sysdeps/unix/sysv/linux/rseq-internal.h b/sysdeps/unix/sysv/linux/rseq-internal.h
index 00be15c..f89e784 100644
--- a/sysdeps/unix/sysv/linux/rseq-internal.h
+++ b/sysdeps/unix/sysv/linux/rseq-internal.h
@@ -26,6 +26,30 @@
#include <sys/rseq.h>
#include <ldsodefs.h>
#include <thread_pointer.h>
+#include <rseq-access.h>
+
+/* rseq area registered with the kernel. Use a custom definition here to
+ isolate from the system provided header which could lack some fields of the
+ Extended ABI.
+
+ This is only used to get the field offsets and sizes, it should never be
+ used for direct object allocations.
+
+ Access to fields of the Extended ABI beyond the 20 bytes of the original ABI
+ (after 'flags') must be gated by a check of the feature size. */
+struct rseq_area
+{
+ /* Original ABI. */
+ uint32_t cpu_id_start;
+ uint32_t cpu_id;
+ uint64_t rseq_cs;
+ uint32_t flags;
+ /* Extended ABI. */
+ uint32_t node_id;
+ uint32_t mm_cid;
+ /* Flexible array member to discourage direct object allocations. */
+ char end[];
+};
/* Minimum size of the rseq area allocation required by the syscall. The
actually used rseq feature size may be less (20 bytes initially). */
@@ -47,10 +71,12 @@ extern size_t _rseq_align attribute_hidden;
/* Size of the active features in the rseq area.
Populated from the auxiliary vector with a minimum of '20'.
+ Set to '0' on registration failure of the main thread.
In .data.relro but not yet write-protected. */
extern unsigned int _rseq_size attribute_hidden;
-/* Offset from the thread pointer to the rseq area.
+/* Offset from the thread pointer to the rseq area, always set to allow
+ checking the registration status by reading the 'cpu_id' field.
In .data.relro but not yet write-protected. */
extern ptrdiff_t _rseq_offset attribute_hidden;
@@ -75,34 +101,35 @@ rseq_register_current_thread (struct pthread *self, bool do_rseq)
{
unsigned int size = __rseq_size;
+ /* The feature size can be smaller than the minimum rseq area size of 32
+ bytes accepted by the syscall, if this is the case, bump the size of
+ the registration to the minimum. The 'extra TLS' block is always at
+ least 32 bytes. */
if (size < RSEQ_AREA_SIZE_INITIAL)
- /* The initial implementation used only 20 bytes out of 32,
- but still expected size 32. */
size = RSEQ_AREA_SIZE_INITIAL;
/* Initialize the rseq fields that are read by the kernel on
registration, there is no guarantee that struct pthread is
cleared on all architectures. */
- THREAD_SETMEM (self, rseq_area.cpu_id, RSEQ_CPU_ID_UNINITIALIZED);
- THREAD_SETMEM (self, rseq_area.cpu_id_start, 0);
- THREAD_SETMEM (self, rseq_area.rseq_cs, 0);
- THREAD_SETMEM (self, rseq_area.flags, 0);
+ RSEQ_SETMEM (cpu_id, RSEQ_CPU_ID_UNINITIALIZED);
+ RSEQ_SETMEM (cpu_id_start, 0);
+ RSEQ_SETMEM (rseq_cs, 0);
+ RSEQ_SETMEM (flags, 0);
- int ret = INTERNAL_SYSCALL_CALL (rseq, &self->rseq_area,
- size, 0, RSEQ_SIG);
+ int ret = INTERNAL_SYSCALL_CALL (rseq, RSEQ_SELF (), size, 0, RSEQ_SIG);
if (!INTERNAL_SYSCALL_ERROR_P (ret))
return true;
}
/* When rseq is disabled by tunables or the registration fails, inform
userspace by setting 'cpu_id' to RSEQ_CPU_ID_REGISTRATION_FAILED. */
- THREAD_SETMEM (self, rseq_area.cpu_id, RSEQ_CPU_ID_REGISTRATION_FAILED);
+ RSEQ_SETMEM (cpu_id, RSEQ_CPU_ID_REGISTRATION_FAILED);
return false;
}
#else /* RSEQ_SIG */
static inline bool
rseq_register_current_thread (struct pthread *self, bool do_rseq)
{
- THREAD_SETMEM (self, rseq_area.cpu_id, RSEQ_CPU_ID_REGISTRATION_FAILED);
+ RSEQ_SETMEM (cpu_id, RSEQ_CPU_ID_REGISTRATION_FAILED);
return false;
}
#endif /* RSEQ_SIG */
diff --git a/sysdeps/unix/sysv/linux/sched_getcpu.c b/sysdeps/unix/sysv/linux/sched_getcpu.c
index 9a2e2a5..828b651 100644
--- a/sysdeps/unix/sysv/linux/sched_getcpu.c
+++ b/sysdeps/unix/sysv/linux/sched_getcpu.c
@@ -19,6 +19,7 @@
#include <sched.h>
#include <sysdep.h>
#include <sysdep-vdso.h>
+#include <rseq-internal.h>
static int
vsyscall_sched_getcpu (void)
@@ -36,6 +37,6 @@ vsyscall_sched_getcpu (void)
int
sched_getcpu (void)
{
- int cpu_id = THREAD_GETMEM_VOLATILE (THREAD_SELF, rseq_area.cpu_id);
+ int cpu_id = RSEQ_GETMEM_ONCE (cpu_id);
return __glibc_likely (cpu_id >= 0) ? cpu_id : vsyscall_sched_getcpu ();
}
diff --git a/sysdeps/unix/sysv/linux/tst-rseq-disable-static.c b/sysdeps/unix/sysv/linux/tst-rseq-disable-static.c
new file mode 100644
index 0000000..2687d13
--- /dev/null
+++ b/sysdeps/unix/sysv/linux/tst-rseq-disable-static.c
@@ -0,0 +1 @@
+#include "tst-rseq-disable.c"
diff --git a/sysdeps/unix/sysv/linux/tst-rseq-disable.c b/sysdeps/unix/sysv/linux/tst-rseq-disable.c
index 0e191ff..8e9c11f 100644
--- a/sysdeps/unix/sysv/linux/tst-rseq-disable.c
+++ b/sysdeps/unix/sysv/linux/tst-rseq-disable.c
@@ -26,32 +26,82 @@
#include <unistd.h>
#ifdef RSEQ_SIG
+# include <sys/auxv.h>
+# include <dl-tls.h>
+# include "tst-rseq.h"
+
+/* Used to test private registration with the rseq system call because glibc
+ rseq is disabled. */
+static __thread struct rseq local_rseq = {
+ .cpu_id = RSEQ_CPU_ID_REGISTRATION_FAILED,
+};
+
+/* Used to check if the address of the rseq area comes before or after the tls
+ blocks depending on the TLS model. */
+static __thread char tls_var __attribute__ ((tls_model ("initial-exec")));
/* Check that rseq can be registered and has not been taken by glibc. */
static void
check_rseq_disabled (void)
{
- struct pthread *pd = THREAD_SELF;
+ struct rseq *rseq_abi = (struct rseq *) ((char *) __thread_pointer () +
+ __rseq_offset);
+
+#if TLS_TCB_AT_TP
+ /* The rseq area block should come before the thread pointer and be at least
+ 32 bytes. */
+ TEST_VERIFY (__rseq_offset <= -RSEQ_AREA_SIZE_INITIAL);
+
+ /* The rseq area block should come before TLS variables. */
+ TEST_VERIFY ((intptr_t) rseq_abi < (intptr_t) &tls_var);
+#elif TLS_DTV_AT_TP
+ /* The rseq area block should come after the TCB, add the TLS block offset to
+ the rseq offset to get a value relative to the TCB and test that it's
+ non-negative. */
+ TEST_VERIFY (__rseq_offset + TLS_TP_OFFSET >= 0);
+
+ /* The rseq area block should come after TLS variables. */
+ TEST_VERIFY ((intptr_t) rseq_abi > (intptr_t) &tls_var);
+#else
+# error "Either TLS_TCB_AT_TP or TLS_DTV_AT_TP must be defined"
+#endif
+ /* __rseq_flags is unused and should always be '0'. */
TEST_COMPARE (__rseq_flags, 0);
- TEST_VERIFY ((char *) __thread_pointer () + __rseq_offset
- == (char *) &pd->rseq_area);
+
+ /* When rseq is not registered, __rseq_size should always be '0'. */
TEST_COMPARE (__rseq_size, 0);
- TEST_COMPARE ((int) pd->rseq_area.cpu_id, RSEQ_CPU_ID_REGISTRATION_FAILED);
- int ret = syscall (__NR_rseq, &pd->rseq_area, sizeof (pd->rseq_area),
- 0, RSEQ_SIG);
+ /* When rseq is not registered, the 'cpu_id' field should be set to
+ RSEQ_CPU_ID_REGISTRATION_FAILED. */
+ TEST_COMPARE ((int) rseq_abi->cpu_id, RSEQ_CPU_ID_REGISTRATION_FAILED);
+
+ /* Test a rseq registration which should succeed since the internal
+ registration is disabled. */
+ int ret = syscall (__NR_rseq, &local_rseq, RSEQ_AREA_SIZE_INITIAL, 0, RSEQ_SIG);
if (ret == 0)
{
- ret = syscall (__NR_rseq, &pd->rseq_area, sizeof (pd->rseq_area),
+ /* A successful registration should set the cpu id. */
+ TEST_VERIFY (local_rseq.cpu_id >= 0);
+
+ /* Test we can also unregister rseq. */
+ ret = syscall (__NR_rseq, &local_rseq, RSEQ_AREA_SIZE_INITIAL,
RSEQ_FLAG_UNREGISTER, RSEQ_SIG);
TEST_COMPARE (ret, 0);
- pd->rseq_area.cpu_id = RSEQ_CPU_ID_REGISTRATION_FAILED;
}
else
{
- TEST_VERIFY (errno != -EINVAL);
- TEST_VERIFY (errno != -EBUSY);
+ /* Check if we failed with EINVAL which would mean an invalid rseq flags,
+ a mis-aligned rseq area address or an incorrect rseq size. */
+ TEST_VERIFY (errno != EINVAL);
+
+ /* Check if we failed with EBUSY which means an existing rseq
+ registration. */
+ TEST_VERIFY (errno != EBUSY);
+
+ /* Check if we failed with EFAULT which means an invalid rseq area
+ address. */
+ TEST_VERIFY (errno != EFAULT);
}
}
@@ -71,6 +121,13 @@ proc_func (void *ignored)
static int
do_test (void)
{
+ printf ("info: __rseq_size: %u\n", __rseq_size);
+ printf ("info: __rseq_offset: %td\n", __rseq_offset);
+ printf ("info: __rseq_flags: %u\n", __rseq_flags);
+ printf ("info: getauxval (AT_RSEQ_FEATURE_SIZE): %ld\n",
+ getauxval (AT_RSEQ_FEATURE_SIZE));
+ printf ("info: getauxval (AT_RSEQ_ALIGN): %ld\n", getauxval (AT_RSEQ_ALIGN));
+
puts ("info: checking main thread");
check_rseq_disabled ();
diff --git a/sysdeps/unix/sysv/linux/tst-rseq-nptl-static.c b/sysdeps/unix/sysv/linux/tst-rseq-nptl-static.c
new file mode 100644
index 0000000..6e2c923
--- /dev/null
+++ b/sysdeps/unix/sysv/linux/tst-rseq-nptl-static.c
@@ -0,0 +1 @@
+#include "tst-rseq-nptl.c"
diff --git a/sysdeps/unix/sysv/linux/tst-rseq-static.c b/sysdeps/unix/sysv/linux/tst-rseq-static.c
new file mode 100644
index 0000000..1d97f3b
--- /dev/null
+++ b/sysdeps/unix/sysv/linux/tst-rseq-static.c
@@ -0,0 +1 @@
+#include "tst-rseq.c"
diff --git a/sysdeps/unix/sysv/linux/tst-rseq.c b/sysdeps/unix/sysv/linux/tst-rseq.c
index b152280..00181cf 100644
--- a/sysdeps/unix/sysv/linux/tst-rseq.c
+++ b/sysdeps/unix/sysv/linux/tst-rseq.c
@@ -19,6 +19,8 @@
not linked against libpthread. */
#include <support/check.h>
+#include <support/namespace.h>
+#include <support/xthread.h>
#include <stdio.h>
#include <sys/rseq.h>
#include <unistd.h>
@@ -32,25 +34,82 @@
# include <sys/auxv.h>
# include <thread_pointer.h>
# include <tls.h>
+# include <dl-tls.h>
+# include <sys/auxv.h>
# include "tst-rseq.h"
+/* Used to check if the address of the rseq area comes before or after the tls
+ blocks depending on the TLS model. */
+static __thread char tls_var __attribute__ ((tls_model ("initial-exec")));
+
static void
do_rseq_main_test (void)
{
- struct pthread *pd = THREAD_SELF;
- size_t rseq_feature_size = MIN (MAX (getauxval (AT_RSEQ_FEATURE_SIZE),
- RSEQ_AREA_SIZE_INITIAL_USED),
- RSEQ_AREA_SIZE_MAX_USED);
+ size_t rseq_align = MAX (getauxval (AT_RSEQ_ALIGN), RSEQ_MIN_ALIGN);
+ size_t rseq_feature_size = MAX (getauxval (AT_RSEQ_FEATURE_SIZE),
+ RSEQ_AREA_SIZE_INITIAL_USED);
+ size_t rseq_alloc_size = roundup (MAX (rseq_feature_size,
+ RSEQ_AREA_SIZE_INITIAL_USED), rseq_align);
+ struct rseq *rseq_abi = __thread_pointer () + __rseq_offset;
TEST_VERIFY_EXIT (rseq_thread_registered ());
+
+ /* __rseq_flags is unused and should always be '0'. */
TEST_COMPARE (__rseq_flags, 0);
- TEST_VERIFY ((char *) __thread_pointer () + __rseq_offset
- == (char *) &pd->rseq_area);
+
+ /* When rseq is registered, __rseq_size should report the feature size. */
TEST_COMPARE (__rseq_size, rseq_feature_size);
+
+ /* When rseq is registered, the 'cpu_id' field should be set to a valid cpu
+ * number. */
+ TEST_VERIFY ((int32_t) rseq_abi->cpu_id >= 0);
+
+ /* The rseq area address must be aligned. */
+ TEST_VERIFY (((unsigned long) rseq_abi % rseq_align) == 0);
+
+#if TLS_TCB_AT_TP
+ /* The rseq area block should come before the thread pointer and be at least
+ 32 bytes. */
+ TEST_VERIFY (__rseq_offset <= -RSEQ_AREA_SIZE_INITIAL);
+
+ /* The rseq area block should come before TLS variables. */
+ TEST_VERIFY ((intptr_t) rseq_abi < (intptr_t) &tls_var);
+#elif TLS_DTV_AT_TP
+ /* The rseq area block should come after the TCB, add the TLS block offset to
+ the rseq offset to get a value relative to the TCB and test that it's
+ non-negative. */
+ TEST_VERIFY (__rseq_offset + TLS_TP_OFFSET >= 0);
+
+ /* The rseq area block should come after TLS variables. */
+ TEST_VERIFY ((intptr_t) rseq_abi > (intptr_t) &tls_var);
+#else
+# error "Either TLS_TCB_AT_TP or TLS_DTV_AT_TP must be defined"
+#endif
+
+ /* Test a rseq registration with the same arguments as the internal
+ registration which should fail with errno == EBUSY. */
+ TEST_VERIFY (((unsigned long) rseq_abi % rseq_align) == 0);
+ TEST_VERIFY (__rseq_size <= rseq_alloc_size);
+ int ret = syscall (__NR_rseq, rseq_abi, rseq_alloc_size, 0, RSEQ_SIG);
+ TEST_VERIFY (ret != 0);
+ TEST_COMPARE (errno, EBUSY);
+}
+
+static void *
+thread_func (void *ignored)
+{
+ do_rseq_main_test ();
+ return NULL;
}
static void
-do_rseq_test (void)
+proc_func (void *ignored)
+{
+ do_rseq_main_test ();
+}
+
+static int
+do_test (void)
{
if (!rseq_available ())
{
@@ -62,21 +121,27 @@ do_rseq_test (void)
printf ("info: getauxval (AT_RSEQ_FEATURE_SIZE): %ld\n",
getauxval (AT_RSEQ_FEATURE_SIZE));
printf ("info: getauxval (AT_RSEQ_ALIGN): %ld\n", getauxval (AT_RSEQ_ALIGN));
+
+ puts ("info: checking main thread");
+ do_rseq_main_test ();
+
+ puts ("info: checking main thread (2)");
do_rseq_main_test ();
+
+ puts ("info: checking new thread");
+ xpthread_join (xpthread_create (NULL, thread_func, NULL));
+
+ puts ("info: checking subprocess");
+ support_isolate_in_subprocess (proc_func, NULL);
+
+ return 0;
}
#else /* RSEQ_SIG */
-static void
-do_rseq_test (void)
-{
- FAIL_UNSUPPORTED ("glibc does not define RSEQ_SIG, skipping test");
-}
-#endif /* RSEQ_SIG */
-
static int
do_test (void)
{
- do_rseq_test ();
- return 0;
+ FAIL_UNSUPPORTED ("glibc does not define RSEQ_SIG, skipping test");
}
+#endif /* RSEQ_SIG */
#include <support/test-driver.c>
diff --git a/sysdeps/unix/sysv/linux/tst-rseq.h b/sysdeps/unix/sysv/linux/tst-rseq.h
index 15f512a..812accc 100644
--- a/sysdeps/unix/sysv/linux/tst-rseq.h
+++ b/sysdeps/unix/sysv/linux/tst-rseq.h
@@ -28,7 +28,7 @@
static inline bool
rseq_thread_registered (void)
{
- return THREAD_GETMEM_VOLATILE (THREAD_SELF, rseq_area.cpu_id) >= 0;
+ return RSEQ_GETMEM_ONCE (cpu_id) >= 0;
}
static inline int