diff options
-rw-r--r-- | nptl/descr.h | 4 | ||||
-rw-r--r-- | nptl/pthread_create.c | 13 | ||||
-rw-r--r-- | sysdeps/nptl/dl-tls_init_tp.c | 8 | ||||
-rw-r--r-- | sysdeps/unix/sysv/linux/Makefile | 9 | ||||
-rw-r--r-- | sysdeps/unix/sysv/linux/aarch64/bits/rseq.h | 43 | ||||
-rw-r--r-- | sysdeps/unix/sysv/linux/arm/bits/rseq.h | 83 | ||||
-rw-r--r-- | sysdeps/unix/sysv/linux/bits/rseq.h | 29 | ||||
-rw-r--r-- | sysdeps/unix/sysv/linux/mips/bits/rseq.h | 62 | ||||
-rw-r--r-- | sysdeps/unix/sysv/linux/powerpc/bits/rseq.h | 37 | ||||
-rw-r--r-- | sysdeps/unix/sysv/linux/rseq-internal.h | 45 | ||||
-rw-r--r-- | sysdeps/unix/sysv/linux/s390/bits/rseq.h | 37 | ||||
-rw-r--r-- | sysdeps/unix/sysv/linux/sys/rseq.h | 174 | ||||
-rw-r--r-- | sysdeps/unix/sysv/linux/tst-rseq-nptl.c | 260 | ||||
-rw-r--r-- | sysdeps/unix/sysv/linux/tst-rseq.c | 64 | ||||
-rw-r--r-- | sysdeps/unix/sysv/linux/tst-rseq.h | 57 | ||||
-rw-r--r-- | sysdeps/unix/sysv/linux/x86/bits/rseq.h | 30 |
16 files changed, 952 insertions, 3 deletions
diff --git a/nptl/descr.h b/nptl/descr.h index af2a6ab..92db305 100644 --- a/nptl/descr.h +++ b/nptl/descr.h @@ -34,6 +34,7 @@ #include <bits/types/res_state.h> #include <kernel-features.h> #include <tls-internal-struct.h> +#include <sys/rseq.h> #ifndef TCB_ALIGNMENT # define TCB_ALIGNMENT 32 @@ -406,6 +407,9 @@ struct pthread /* Used on strsignal. */ struct tls_internal_t tls_state; + /* rseq area registered with the kernel. */ + struct rseq rseq_area; + /* This member must be last. */ char end_padding[]; diff --git a/nptl/pthread_create.c b/nptl/pthread_create.c index bad9eeb..ea0d793 100644 --- a/nptl/pthread_create.c +++ b/nptl/pthread_create.c @@ -32,6 +32,7 @@ #include <default-sched.h> #include <futex-internal.h> #include <tls-setup.h> +#include <rseq-internal.h> #include "libioP.h" #include <sys/single_threaded.h> #include <version.h> @@ -366,6 +367,9 @@ start_thread (void *arg) /* Initialize pointers to locale data. */ __ctype_init (); + /* Register rseq TLS to the kernel. */ + rseq_register_current_thread (pd); + #ifndef __ASSUME_SET_ROBUST_LIST if (__nptl_set_robust_list_avail) #endif @@ -571,6 +575,15 @@ out: process is really dead since 'clone' got passed the CLONE_CHILD_CLEARTID flag. The 'tid' field in the TCB will be set to zero. + rseq TLS is still registered at this point. Rely on implicit + unregistration performed by the kernel on thread teardown. This is not a + problem because the rseq TLS lives on the stack, and the stack outlives + the thread. If TCB allocation is ever changed, additional steps may be + required, such as performing explicit rseq unregistration before + reclaiming the rseq TLS area memory. It is NOT sufficient to block + signals because the kernel may write to the rseq area even without + signals. + The exit code is zero since in case all threads exit by calling 'pthread_exit' the exit status must be 0 (zero). */ while (1) diff --git a/sysdeps/nptl/dl-tls_init_tp.c b/sysdeps/nptl/dl-tls_init_tp.c index ca494dd..fedb876 100644 --- a/sysdeps/nptl/dl-tls_init_tp.c +++ b/sysdeps/nptl/dl-tls_init_tp.c @@ -21,6 +21,7 @@ #include <list.h> #include <pthreadP.h> #include <tls.h> +#include <rseq-internal.h> #ifndef __ASSUME_SET_ROBUST_LIST bool __nptl_set_robust_list_avail; @@ -57,11 +58,12 @@ __tls_pre_init_tp (void) void __tls_init_tp (void) { + struct pthread *pd = THREAD_SELF; + /* Set up thread stack list management. */ - list_add (&THREAD_SELF->list, &GL (dl_stack_user)); + list_add (&pd->list, &GL (dl_stack_user)); /* Early initialization of the TCB. */ - struct pthread *pd = THREAD_SELF; pd->tid = INTERNAL_SYSCALL_CALL (set_tid_address, &pd->tid); THREAD_SETMEM (pd, specific[0], &pd->specific_1stblock[0]); THREAD_SETMEM (pd, user_stack, true); @@ -90,6 +92,8 @@ __tls_init_tp (void) } } + rseq_register_current_thread (pd); + /* Set initial thread's stack block from 0 up to __libc_stack_end. It will be bigger than it actually is, but for unwind.c/pt-longjmp.c purposes this is good enough. */ diff --git a/sysdeps/unix/sysv/linux/Makefile b/sysdeps/unix/sysv/linux/Makefile index 29c6c78..eb0f5fc 100644 --- a/sysdeps/unix/sysv/linux/Makefile +++ b/sysdeps/unix/sysv/linux/Makefile @@ -131,7 +131,10 @@ ifeq ($(have-GLIBC_2.27)$(build-shared),yesyes) tests += tst-ofdlocks-compat endif -tests-internal += tst-sigcontext-get_pc +tests-internal += \ + tst-rseq \ + tst-sigcontext-get_pc \ + # tests-internal tests-time64 += \ tst-adjtimex-time64 \ @@ -357,4 +360,8 @@ endif ifeq ($(subdir),nptl) tests += tst-align-clone tst-getpid1 + +# tst-rseq-nptl is an internal test because it requires a definition of +# __NR_rseq from the internal system call list. +tests-internal += tst-rseq-nptl endif diff --git a/sysdeps/unix/sysv/linux/aarch64/bits/rseq.h b/sysdeps/unix/sysv/linux/aarch64/bits/rseq.h new file mode 100644 index 0000000..9ba9272 --- /dev/null +++ b/sysdeps/unix/sysv/linux/aarch64/bits/rseq.h @@ -0,0 +1,43 @@ +/* Restartable Sequences Linux aarch64 architecture header. + Copyright (C) 2021 Free Software Foundation, Inc. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library; if not, see + <https://www.gnu.org/licenses/>. */ + +#ifndef _SYS_RSEQ_H +# error "Never use <bits/rseq.h> directly; include <sys/rseq.h> instead." +#endif + +/* RSEQ_SIG is a signature required before each abort handler code. + + It is a 32-bit value that maps to actual architecture code compiled + into applications and libraries. It needs to be defined for each + architecture. When choosing this value, it needs to be taken into + account that generating invalid instructions may have ill effects on + tools like objdump, and may also have impact on the CPU speculative + execution efficiency in some cases. + + aarch64 -mbig-endian generates mixed endianness code vs data: + little-endian code and big-endian data. Ensure the RSEQ_SIG signature + matches code endianness. */ + +#define RSEQ_SIG_CODE 0xd428bc00 /* BRK #0x45E0. */ + +#ifdef __AARCH64EB__ +# define RSEQ_SIG_DATA 0x00bc28d4 /* BRK #0x45E0. */ +#else +# define RSEQ_SIG_DATA RSEQ_SIG_CODE +#endif + +#define RSEQ_SIG RSEQ_SIG_DATA diff --git a/sysdeps/unix/sysv/linux/arm/bits/rseq.h b/sysdeps/unix/sysv/linux/arm/bits/rseq.h new file mode 100644 index 0000000..0542b26 --- /dev/null +++ b/sysdeps/unix/sysv/linux/arm/bits/rseq.h @@ -0,0 +1,83 @@ +/* Restartable Sequences Linux arm architecture header. + Copyright (C) 2021 Free Software Foundation, Inc. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library; if not, see + <https://www.gnu.org/licenses/>. */ + +#ifndef _SYS_RSEQ_H +# error "Never use <bits/rseq.h> directly; include <sys/rseq.h> instead." +#endif + +/* + RSEQ_SIG is a signature required before each abort handler code. + + It is a 32-bit value that maps to actual architecture code compiled + into applications and libraries. It needs to be defined for each + architecture. When choosing this value, it needs to be taken into + account that generating invalid instructions may have ill effects on + tools like objdump, and may also have impact on the CPU speculative + execution efficiency in some cases. + + - ARM little endian + + RSEQ_SIG uses the udf A32 instruction with an uncommon immediate operand + value 0x5de3. This traps if user-space reaches this instruction by mistake, + and the uncommon operand ensures the kernel does not move the instruction + pointer to attacker-controlled code on rseq abort. + + The instruction pattern in the A32 instruction set is: + + e7f5def3 udf #24035 ; 0x5de3 + + This translates to the following instruction pattern in the T16 instruction + set: + + little endian: + def3 udf #243 ; 0xf3 + e7f5 b.n <7f5> + + - ARMv6+ big endian (BE8): + + ARMv6+ -mbig-endian generates mixed endianness code vs data: little-endian + code and big-endian data. The data value of the signature needs to have its + byte order reversed to generate the trap instruction: + + Data: 0xf3def5e7 + + Translates to this A32 instruction pattern: + + e7f5def3 udf #24035 ; 0x5de3 + + Translates to this T16 instruction pattern: + + def3 udf #243 ; 0xf3 + e7f5 b.n <7f5> + + - Prior to ARMv6 big endian (BE32): + + Prior to ARMv6, -mbig-endian generates big-endian code and data + (which match), so the endianness of the data representation of the + signature should not be reversed. However, the choice between BE32 + and BE8 is done by the linker, so we cannot know whether code and + data endianness will be mixed before the linker is invoked. So rather + than try to play tricks with the linker, the rseq signature is simply + data (not a trap instruction) prior to ARMv6 on big endian. This is + why the signature is expressed as data (.word) rather than as + instruction (.inst) in assembler. */ + +#ifdef __ARMEB__ +# define RSEQ_SIG 0xf3def5e7 /* udf #24035 ; 0x5de3 (ARMv6+) */ +#else +# define RSEQ_SIG 0xe7f5def3 /* udf #24035 ; 0x5de3 */ +#endif diff --git a/sysdeps/unix/sysv/linux/bits/rseq.h b/sysdeps/unix/sysv/linux/bits/rseq.h new file mode 100644 index 0000000..46cf5d1 --- /dev/null +++ b/sysdeps/unix/sysv/linux/bits/rseq.h @@ -0,0 +1,29 @@ +/* Restartable Sequences architecture header. Stub version. + Copyright (C) 2021 Free Software Foundation, Inc. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library; if not, see + <https://www.gnu.org/licenses/>. */ + +#ifndef _SYS_RSEQ_H +# error "Never use <bits/rseq.h> directly; include <sys/rseq.h> instead." +#endif + +/* RSEQ_SIG is a signature required before each abort handler code. + + It is a 32-bit value that maps to actual architecture code compiled + into applications and libraries. It needs to be defined for each + architecture. When choosing this value, it needs to be taken into + account that generating invalid instructions may have ill effects on + tools like objdump, and may also have impact on the CPU speculative + execution efficiency in some cases. */ diff --git a/sysdeps/unix/sysv/linux/mips/bits/rseq.h b/sysdeps/unix/sysv/linux/mips/bits/rseq.h new file mode 100644 index 0000000..a9defee --- /dev/null +++ b/sysdeps/unix/sysv/linux/mips/bits/rseq.h @@ -0,0 +1,62 @@ +/* Restartable Sequences Linux mips architecture header. + Copyright (C) 2021 Free Software Foundation, Inc. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library; if not, see + <https://www.gnu.org/licenses/>. */ + +#ifndef _SYS_RSEQ_H +# error "Never use <bits/rseq.h> directly; include <sys/rseq.h> instead." +#endif + +/* RSEQ_SIG is a signature required before each abort handler code. + + It is a 32-bit value that maps to actual architecture code compiled + into applications and libraries. It needs to be defined for each + architecture. When choosing this value, it needs to be taken into + account that generating invalid instructions may have ill effects on + tools like objdump, and may also have impact on the CPU speculative + execution efficiency in some cases. + + RSEQ_SIG uses the break instruction. The instruction pattern is: + + On MIPS: + 0350000d break 0x350 + + On nanoMIPS: + 00100350 break 0x350 + + On microMIPS: + 0000d407 break 0x350 + + For nanoMIPS32 and microMIPS, the instruction stream is encoded as + 16-bit halfwords, so the signature halfwords need to be swapped + accordingly for little-endian. */ + +#if defined (__nanomips__) +# ifdef __MIPSEL__ +# define RSEQ_SIG 0x03500010 +# else +# define RSEQ_SIG 0x00100350 +# endif +#elif defined (__mips_micromips) +# ifdef __MIPSEL__ +# define RSEQ_SIG 0xd4070000 +# else +# define RSEQ_SIG 0x0000d407 +# endif +#elif defined (__mips__) +# define RSEQ_SIG 0x0350000d +#else +/* Unknown MIPS architecture. */ +#endif diff --git a/sysdeps/unix/sysv/linux/powerpc/bits/rseq.h b/sysdeps/unix/sysv/linux/powerpc/bits/rseq.h new file mode 100644 index 0000000..05b3cf7 --- /dev/null +++ b/sysdeps/unix/sysv/linux/powerpc/bits/rseq.h @@ -0,0 +1,37 @@ +/* Restartable Sequences Linux powerpc architecture header. + Copyright (C) 2021 Free Software Foundation, Inc. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library; if not, see + <https://www.gnu.org/licenses/>. */ + +#ifndef _SYS_RSEQ_H +# error "Never use <bits/rseq.h> directly; include <sys/rseq.h> instead." +#endif + +/* RSEQ_SIG is a signature required before each abort handler code. + + It is a 32-bit value that maps to actual architecture code compiled + into applications and libraries. It needs to be defined for each + architecture. When choosing this value, it needs to be taken into + account that generating invalid instructions may have ill effects on + tools like objdump, and may also have impact on the CPU speculative + execution efficiency in some cases. + + RSEQ_SIG uses the following trap instruction: + + powerpc-be: 0f e5 00 0b twui r5,11 + powerpc64-le: 0b 00 e5 0f twui r5,11 + powerpc64-be: 0f e5 00 0b twui r5,11 */ + +#define RSEQ_SIG 0x0fe5000b diff --git a/sysdeps/unix/sysv/linux/rseq-internal.h b/sysdeps/unix/sysv/linux/rseq-internal.h new file mode 100644 index 0000000..909f547 --- /dev/null +++ b/sysdeps/unix/sysv/linux/rseq-internal.h @@ -0,0 +1,45 @@ +/* Restartable Sequences internal API. Linux implementation. + Copyright (C) 2021 Free Software Foundation, Inc. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library; if not, see + <https://www.gnu.org/licenses/>. */ + +#ifndef RSEQ_INTERNAL_H +#define RSEQ_INTERNAL_H + +#include <sysdep.h> +#include <errno.h> +#include <kernel-features.h> +#include <stdio.h> +#include <sys/rseq.h> + +#ifdef RSEQ_SIG +static inline void +rseq_register_current_thread (struct pthread *self) +{ + int ret = INTERNAL_SYSCALL_CALL (rseq, + &self->rseq_area, sizeof (self->rseq_area), + 0, RSEQ_SIG); + if (INTERNAL_SYSCALL_ERROR_P (ret)) + THREAD_SETMEM (self, rseq_area.cpu_id, RSEQ_CPU_ID_REGISTRATION_FAILED); +} +#else /* RSEQ_SIG */ +static inline void +rseq_register_current_thread (struct pthread *self) +{ + THREAD_SETMEM (self, rseq_area.cpu_id, RSEQ_CPU_ID_REGISTRATION_FAILED); +} +#endif /* RSEQ_SIG */ + +#endif /* rseq-internal.h */ diff --git a/sysdeps/unix/sysv/linux/s390/bits/rseq.h b/sysdeps/unix/sysv/linux/s390/bits/rseq.h new file mode 100644 index 0000000..3030e38 --- /dev/null +++ b/sysdeps/unix/sysv/linux/s390/bits/rseq.h @@ -0,0 +1,37 @@ +/* Restartable Sequences Linux s390 architecture header. + Copyright (C) 2021 Free Software Foundation, Inc. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library; if not, see + <https://www.gnu.org/licenses/>. */ + +#ifndef _SYS_RSEQ_H +# error "Never use <bits/rseq.h> directly; include <sys/rseq.h> instead." +#endif + +/* RSEQ_SIG is a signature required before each abort handler code. + + It is a 32-bit value that maps to actual architecture code compiled + into applications and libraries. It needs to be defined for each + architecture. When choosing this value, it needs to be taken into + account that generating invalid instructions may have ill effects on + tools like objdump, and may also have impact on the CPU speculative + execution efficiency in some cases. + + RSEQ_SIG uses the trap4 instruction. As Linux does not make use of the + access-register mode nor the linkage stack this instruction will always + cause a special-operation exception (the trap-enabled bit in the DUCT + is and will stay 0). The instruction pattern is + b2 ff 0f ff trap4 4095(%r0) */ + +#define RSEQ_SIG 0xB2FF0FFF diff --git a/sysdeps/unix/sysv/linux/sys/rseq.h b/sysdeps/unix/sysv/linux/sys/rseq.h new file mode 100644 index 0000000..c8edff5 --- /dev/null +++ b/sysdeps/unix/sysv/linux/sys/rseq.h @@ -0,0 +1,174 @@ +/* Restartable Sequences exported symbols. Linux header. + Copyright (C) 2021 Free Software Foundation, Inc. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library; if not, see + <https://www.gnu.org/licenses/>. */ + +#ifndef _SYS_RSEQ_H +#define _SYS_RSEQ_H 1 + +/* Architecture-specific rseq signature. */ +#include <bits/rseq.h> + +#include <stdint.h> +#include <sys/cdefs.h> +#include <bits/endian.h> + +#ifdef __has_include +# if __has_include ("linux/rseq.h") +# define __GLIBC_HAVE_KERNEL_RSEQ +# endif +#else +# include <linux/version.h> +# if LINUX_VERSION_CODE >= KERNEL_VERSION (4, 18, 0) +# define __GLIBC_HAVE_KERNEL_RSEQ +# endif +#endif + +#ifdef __GLIBC_HAVE_KERNEL_RSEQ +/* We use the structures declarations from the kernel headers. */ +# include <linux/rseq.h> +#else /* __GLIBC_HAVE_KERNEL_RSEQ */ +/* We use a copy of the include/uapi/linux/rseq.h kernel header. */ + +enum rseq_cpu_id_state + { + RSEQ_CPU_ID_UNINITIALIZED = -1, + RSEQ_CPU_ID_REGISTRATION_FAILED = -2, + }; + +enum rseq_flags + { + RSEQ_FLAG_UNREGISTER = (1 << 0), + }; + +enum rseq_cs_flags_bit + { + RSEQ_CS_FLAG_NO_RESTART_ON_PREEMPT_BIT = 0, + RSEQ_CS_FLAG_NO_RESTART_ON_SIGNAL_BIT = 1, + RSEQ_CS_FLAG_NO_RESTART_ON_MIGRATE_BIT = 2, + }; + +enum rseq_cs_flags + { + RSEQ_CS_FLAG_NO_RESTART_ON_PREEMPT = + (1U << RSEQ_CS_FLAG_NO_RESTART_ON_PREEMPT_BIT), + RSEQ_CS_FLAG_NO_RESTART_ON_SIGNAL = + (1U << RSEQ_CS_FLAG_NO_RESTART_ON_SIGNAL_BIT), + RSEQ_CS_FLAG_NO_RESTART_ON_MIGRATE = + (1U << RSEQ_CS_FLAG_NO_RESTART_ON_MIGRATE_BIT), + }; + +/* struct rseq_cs is aligned on 32 bytes to ensure it is always + contained within a single cache-line. It is usually declared as + link-time constant data. */ +struct rseq_cs + { + /* Version of this structure. */ + uint32_t version; + /* enum rseq_cs_flags. */ + uint32_t flags; + uint64_t start_ip; + /* Offset from start_ip. */ + uint64_t post_commit_offset; + uint64_t abort_ip; + } __attribute__ ((__aligned__ (32))); + +/* struct rseq is aligned on 32 bytes to ensure it is always + contained within a single cache-line. + + A single struct rseq per thread is allowed. */ +struct rseq + { + /* Restartable sequences cpu_id_start field. Updated by the + kernel. Read by user-space with single-copy atomicity + semantics. This field should only be read by the thread which + registered this data structure. Aligned on 32-bit. Always + contains a value in the range of possible CPUs, although the + value may not be the actual current CPU (e.g. if rseq is not + initialized). This CPU number value should always be compared + against the value of the cpu_id field before performing a rseq + commit or returning a value read from a data structure indexed + using the cpu_id_start value. */ + uint32_t cpu_id_start; + /* Restartable sequences cpu_id field. Updated by the kernel. + Read by user-space with single-copy atomicity semantics. This + field should only be read by the thread which registered this + data structure. Aligned on 32-bit. Values + RSEQ_CPU_ID_UNINITIALIZED and RSEQ_CPU_ID_REGISTRATION_FAILED + have a special semantic: the former means "rseq uninitialized", + and latter means "rseq initialization failed". This value is + meant to be read within rseq critical sections and compared + with the cpu_id_start value previously read, before performing + the commit instruction, or read and compared with the + cpu_id_start value before returning a value loaded from a data + structure indexed using the cpu_id_start value. */ + uint32_t cpu_id; + /* Restartable sequences rseq_cs field. + + Contains NULL when no critical section is active for the current + thread, or holds a pointer to the currently active struct rseq_cs. + + Updated by user-space, which sets the address of the currently + active rseq_cs at the beginning of assembly instruction sequence + block, and set to NULL by the kernel when it restarts an assembly + instruction sequence block, as well as when the kernel detects that + it is preempting or delivering a signal outside of the range + targeted by the rseq_cs. Also needs to be set to NULL by user-space + before reclaiming memory that contains the targeted struct rseq_cs. + + Read and set by the kernel. Set by user-space with single-copy + atomicity semantics. This field should only be updated by the + thread which registered this data structure. Aligned on 64-bit. */ + union + { + uint64_t ptr64; +# ifdef __LP64__ + uint64_t ptr; +# else /* __LP64__ */ + struct + { +#if __BYTE_ORDER == __BIG_ENDIAN + uint32_t padding; /* Initialized to zero. */ + uint32_t ptr32; +# else /* LITTLE */ + uint32_t ptr32; + uint32_t padding; /* Initialized to zero. */ +# endif /* ENDIAN */ + } ptr; +# endif /* __LP64__ */ + } rseq_cs; + + /* Restartable sequences flags field. + + This field should only be updated by the thread which + registered this data structure. Read by the kernel. + Mainly used for single-stepping through rseq critical sections + with debuggers. + + - RSEQ_CS_FLAG_NO_RESTART_ON_PREEMPT + Inhibit instruction sequence block restart on preemption + for this thread. + - RSEQ_CS_FLAG_NO_RESTART_ON_SIGNAL + Inhibit instruction sequence block restart on signal + delivery for this thread. + - RSEQ_CS_FLAG_NO_RESTART_ON_MIGRATE + Inhibit instruction sequence block restart on migration for + this thread. */ + uint32_t flags; + } __attribute__ ((__aligned__ (32))); + +#endif /* __GLIBC_HAVE_KERNEL_RSEQ */ + +#endif /* sys/rseq.h */ diff --git a/sysdeps/unix/sysv/linux/tst-rseq-nptl.c b/sysdeps/unix/sysv/linux/tst-rseq-nptl.c new file mode 100644 index 0000000..d31d944 --- /dev/null +++ b/sysdeps/unix/sysv/linux/tst-rseq-nptl.c @@ -0,0 +1,260 @@ +/* Restartable Sequences NPTL test. + Copyright (C) 2021 Free Software Foundation, Inc. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library; if not, see + <https://www.gnu.org/licenses/>. */ + +/* These tests validate that rseq is registered from various execution + contexts (main thread, destructor, other threads, other threads created + from destructor, forked process (without exec), pthread_atfork handlers, + pthread setspecific destructors, signal handlers, atexit handlers). + + See the Linux kernel selftests for extensive rseq stress-tests. */ + +#include <stdio.h> +#include <support/check.h> +#include <support/xthread.h> +#include <sys/rseq.h> +#include <unistd.h> + +#ifdef RSEQ_SIG +# include <array_length.h> +# include <errno.h> +# include <error.h> +# include <pthread.h> +# include <signal.h> +# include <stdlib.h> +# include <string.h> +# include <support/namespace.h> +# include <support/xsignal.h> +# include <syscall.h> +# include <sys/types.h> +# include <sys/wait.h> +# include "tst-rseq.h" + +static pthread_key_t rseq_test_key; + +static void +atfork_prepare (void) +{ + if (!rseq_thread_registered ()) + { + printf ("error: rseq not registered in pthread atfork prepare\n"); + support_record_failure (); + } +} + +static void +atfork_parent (void) +{ + if (!rseq_thread_registered ()) + { + printf ("error: rseq not registered in pthread atfork parent\n"); + support_record_failure (); + } +} + +static void +atfork_child (void) +{ + if (!rseq_thread_registered ()) + { + printf ("error: rseq not registered in pthread atfork child\n"); + support_record_failure (); + } +} + +static void +rseq_key_destructor (void *arg) +{ + /* Cannot use deferred failure reporting after main returns. */ + if (!rseq_thread_registered ()) + FAIL_EXIT1 ("rseq not registered in pthread key destructor"); +} + +static void +atexit_handler (void) +{ + /* Cannot use deferred failure reporting after main returns. */ + if (!rseq_thread_registered ()) + FAIL_EXIT1 ("rseq not registered in atexit handler"); +} + +/* Used to avoid -Werror=stringop-overread warning with + pthread_setspecific and GCC 11. */ +static char one = 1; + +static void +do_rseq_main_test (void) +{ + TEST_COMPARE (atexit (atexit_handler), 0); + rseq_test_key = xpthread_key_create (rseq_key_destructor); + TEST_COMPARE (pthread_atfork (atfork_prepare, atfork_parent, atfork_child), 0); + xraise (SIGUSR1); + TEST_COMPARE (pthread_setspecific (rseq_test_key, &one), 0); + TEST_VERIFY_EXIT (rseq_thread_registered ()); +} + +static void +cancel_routine (void *arg) +{ + if (!rseq_thread_registered ()) + { + printf ("error: rseq not registered in cancel routine\n"); + support_record_failure (); + } +} + +static pthread_barrier_t cancel_thread_barrier; +static pthread_cond_t cancel_thread_cond = PTHREAD_COND_INITIALIZER; +static pthread_mutex_t cancel_thread_mutex = PTHREAD_MUTEX_INITIALIZER; + +static void +test_cancel_thread (void) +{ + pthread_cleanup_push (cancel_routine, NULL); + (void) xpthread_barrier_wait (&cancel_thread_barrier); + /* Wait forever until cancellation. */ + xpthread_cond_wait (&cancel_thread_cond, &cancel_thread_mutex); + pthread_cleanup_pop (0); +} + +static void * +thread_function (void * arg) +{ + int i = (int) (intptr_t) arg; + + xraise (SIGUSR1); + if (i == 0) + test_cancel_thread (); + TEST_COMPARE (pthread_setspecific (rseq_test_key, &one), 0); + return rseq_thread_registered () ? NULL : (void *) 1l; +} + +static void +sighandler (int sig) +{ + if (!rseq_thread_registered ()) + { + printf ("error: rseq not registered in signal handler\n"); + support_record_failure (); + } +} + +static void +setup_signals (void) +{ + struct sigaction sa; + + sigemptyset (&sa.sa_mask); + sigaddset (&sa.sa_mask, SIGUSR1); + sa.sa_flags = 0; + sa.sa_handler = sighandler; + xsigaction (SIGUSR1, &sa, NULL); +} + +static int +do_rseq_threads_test (int nr_threads) +{ + pthread_t th[nr_threads]; + int i; + int result = 0; + + xpthread_barrier_init (&cancel_thread_barrier, NULL, 2); + + for (i = 0; i < nr_threads; ++i) + th[i] = xpthread_create (NULL, thread_function, + (void *) (intptr_t) i); + + (void) xpthread_barrier_wait (&cancel_thread_barrier); + + xpthread_cancel (th[0]); + + for (i = 0; i < nr_threads; ++i) + { + void *v; + + v = xpthread_join (th[i]); + if (i != 0 && v != NULL) + { + printf ("error: join %d successful, but child failed\n", i); + result = 1; + } + else if (i == 0 && v == NULL) + { + printf ("error: join %d successful, child did not fail as expected\n", i); + result = 1; + } + } + + xpthread_barrier_destroy (&cancel_thread_barrier); + + return result; +} + +static void +subprocess_callback (void *closure) +{ + do_rseq_main_test (); +} + +static void +do_rseq_fork_test (void) +{ + support_isolate_in_subprocess (subprocess_callback, NULL); +} + +static int +do_rseq_test (void) +{ + int t[] = { 1, 2, 6, 5, 4, 3, 50 }; + int i, result = 0; + + if (!rseq_available ()) + FAIL_UNSUPPORTED ("kernel does not support rseq, skipping test"); + setup_signals (); + xraise (SIGUSR1); + do_rseq_main_test (); + for (i = 0; i < array_length (t); i++) + if (do_rseq_threads_test (t[i])) + result = 1; + do_rseq_fork_test (); + return result; +} + +static void __attribute__ ((destructor)) +do_rseq_destructor_test (void) +{ + /* Cannot use deferred failure reporting after main returns. */ + if (do_rseq_test ()) + FAIL_EXIT1 ("rseq not registered within destructor"); + xpthread_key_delete (rseq_test_key); +} + +#else /* RSEQ_SIG */ +static int +do_rseq_test (void) +{ + FAIL_UNSUPPORTED ("glibc does not define RSEQ_SIG, skipping test"); + return 0; +} +#endif /* RSEQ_SIG */ + +static int +do_test (void) +{ + return do_rseq_test (); +} + +#include <support/test-driver.c> diff --git a/sysdeps/unix/sysv/linux/tst-rseq.c b/sysdeps/unix/sysv/linux/tst-rseq.c new file mode 100644 index 0000000..926376b --- /dev/null +++ b/sysdeps/unix/sysv/linux/tst-rseq.c @@ -0,0 +1,64 @@ +/* Restartable Sequences single-threaded tests. + Copyright (C) 2021 Free Software Foundation, Inc. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library; if not, see + <https://www.gnu.org/licenses/>. */ + +/* These tests validate that rseq is registered from main in an executable + not linked against libpthread. */ + +#include <support/check.h> +#include <stdio.h> +#include <sys/rseq.h> +#include <unistd.h> + +#ifdef RSEQ_SIG +# include <errno.h> +# include <error.h> +# include <stdlib.h> +# include <string.h> +# include <syscall.h> +# include "tst-rseq.h" + +static void +do_rseq_main_test (void) +{ + TEST_VERIFY_EXIT (rseq_thread_registered ()); +} + +static void +do_rseq_test (void) +{ + if (!rseq_available ()) + { + FAIL_UNSUPPORTED ("kernel does not support rseq, skipping test"); + } + do_rseq_main_test (); +} +#else /* RSEQ_SIG */ +static void +do_rseq_test (void) +{ + FAIL_UNSUPPORTED ("glibc does not define RSEQ_SIG, skipping test"); +} +#endif /* RSEQ_SIG */ + +static int +do_test (void) +{ + do_rseq_test (); + return 0; +} + +#include <support/test-driver.c> diff --git a/sysdeps/unix/sysv/linux/tst-rseq.h b/sysdeps/unix/sysv/linux/tst-rseq.h new file mode 100644 index 0000000..a476c31 --- /dev/null +++ b/sysdeps/unix/sysv/linux/tst-rseq.h @@ -0,0 +1,57 @@ +/* Restartable Sequences tests header. + Copyright (C) 2021 Free Software Foundation, Inc. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library; if not, see + <https://www.gnu.org/licenses/>. */ + +#include <errno.h> +#include <error.h> +#include <stdbool.h> +#include <stdint.h> +#include <support/check.h> +#include <syscall.h> +#include <sys/rseq.h> +#include <tls.h> + +static inline bool +rseq_thread_registered (void) +{ + return THREAD_GETMEM_VOLATILE (THREAD_SELF, rseq_area.cpu_id) >= 0; +} + +static inline int +sys_rseq (struct rseq *rseq_abi, uint32_t rseq_len, int flags, uint32_t sig) +{ + return syscall (__NR_rseq, rseq_abi, rseq_len, flags, sig); +} + +static inline bool +rseq_available (void) +{ + int rc; + + rc = sys_rseq (NULL, 0, 0, 0); + if (rc != -1) + FAIL_EXIT1 ("Unexpected rseq return value %d", rc); + switch (errno) + { + case ENOSYS: + return false; + case EINVAL: + /* rseq is implemented, but detected an invalid rseq_len parameter. */ + return true; + default: + FAIL_EXIT1 ("Unexpected rseq error %s", strerror (errno)); + } +} diff --git a/sysdeps/unix/sysv/linux/x86/bits/rseq.h b/sysdeps/unix/sysv/linux/x86/bits/rseq.h new file mode 100644 index 0000000..9fc909e --- /dev/null +++ b/sysdeps/unix/sysv/linux/x86/bits/rseq.h @@ -0,0 +1,30 @@ +/* Restartable Sequences Linux x86 architecture header. + Copyright (C) 2021 Free Software Foundation, Inc. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library; if not, see + <https://www.gnu.org/licenses/>. */ + +#ifndef _SYS_RSEQ_H +# error "Never use <bits/rseq.h> directly; include <sys/rseq.h> instead." +#endif + +/* RSEQ_SIG is a signature required before each abort handler code. + + RSEQ_SIG is used with the following reserved undefined instructions, which + trap in user-space: + + x86-32: 0f b9 3d 53 30 05 53 ud1 0x53053053,%edi + x86-64: 0f b9 3d 53 30 05 53 ud1 0x53053053(%rip),%edi */ + +#define RSEQ_SIG 0x53053053 |