aboutsummaryrefslogtreecommitdiff
path: root/sysdeps/unix
diff options
context:
space:
mode:
Diffstat (limited to 'sysdeps/unix')
-rw-r--r--sysdeps/unix/sysv/linux/aarch64/sysdep.h1
-rw-r--r--sysdeps/unix/sysv/linux/dl-vdso-setup.c5
-rw-r--r--sysdeps/unix/sysv/linux/dl-vdso-setup.h3
-rw-r--r--sysdeps/unix/sysv/linux/getrandom-internal.h29
-rw-r--r--sysdeps/unix/sysv/linux/getrandom.c304
-rw-r--r--sysdeps/unix/sysv/linux/loongarch/sysdep.h1
-rw-r--r--sysdeps/unix/sysv/linux/not-cancel.h7
-rw-r--r--sysdeps/unix/sysv/linux/powerpc/sysdep.h1
-rw-r--r--sysdeps/unix/sysv/linux/s390/sysdep.h1
-rw-r--r--sysdeps/unix/sysv/linux/x86_64/sysdep.h1
10 files changed, 350 insertions, 3 deletions
diff --git a/sysdeps/unix/sysv/linux/aarch64/sysdep.h b/sysdeps/unix/sysv/linux/aarch64/sysdep.h
index bbbe357..974b503 100644
--- a/sysdeps/unix/sysv/linux/aarch64/sysdep.h
+++ b/sysdeps/unix/sysv/linux/aarch64/sysdep.h
@@ -164,6 +164,7 @@
# define HAVE_CLOCK_GETRES64_VSYSCALL "__kernel_clock_getres"
# define HAVE_CLOCK_GETTIME64_VSYSCALL "__kernel_clock_gettime"
# define HAVE_GETTIMEOFDAY_VSYSCALL "__kernel_gettimeofday"
+# define HAVE_GETRANDOM_VSYSCALL "__kernel_getrandom"
# define HAVE_CLONE3_WRAPPER 1
diff --git a/sysdeps/unix/sysv/linux/dl-vdso-setup.c b/sysdeps/unix/sysv/linux/dl-vdso-setup.c
index 3a44944..476c6db 100644
--- a/sysdeps/unix/sysv/linux/dl-vdso-setup.c
+++ b/sysdeps/unix/sysv/linux/dl-vdso-setup.c
@@ -66,6 +66,11 @@ PROCINFO_CLASS int (*_dl_vdso_clock_getres) (clockid_t,
PROCINFO_CLASS int (*_dl_vdso_clock_getres_time64) (clockid_t,
struct __timespec64 *) RELRO;
# endif
+# ifdef HAVE_GETRANDOM_VSYSCALL
+PROCINFO_CLASS ssize_t (*_dl_vdso_getrandom) (void *buffer, size_t len,
+ unsigned int flags, void *state,
+ size_t state_len) RELRO;
+# endif
/* PowerPC specific ones. */
# ifdef HAVE_GET_TBFREQ
diff --git a/sysdeps/unix/sysv/linux/dl-vdso-setup.h b/sysdeps/unix/sysv/linux/dl-vdso-setup.h
index 8aee5a8..cde99f6 100644
--- a/sysdeps/unix/sysv/linux/dl-vdso-setup.h
+++ b/sysdeps/unix/sysv/linux/dl-vdso-setup.h
@@ -50,6 +50,9 @@ setup_vdso_pointers (void)
#ifdef HAVE_RISCV_HWPROBE
GLRO(dl_vdso_riscv_hwprobe) = dl_vdso_vsym (HAVE_RISCV_HWPROBE);
#endif
+#ifdef HAVE_GETRANDOM_VSYSCALL
+ GLRO(dl_vdso_getrandom) = dl_vdso_vsym (HAVE_GETRANDOM_VSYSCALL);
+#endif
}
#endif
diff --git a/sysdeps/unix/sysv/linux/getrandom-internal.h b/sysdeps/unix/sysv/linux/getrandom-internal.h
new file mode 100644
index 0000000..37e6c9b
--- /dev/null
+++ b/sysdeps/unix/sysv/linux/getrandom-internal.h
@@ -0,0 +1,29 @@
+/* Internal definitions for Linux getrandom implementation.
+ Copyright (C) 2024 Free Software Foundation, Inc.
+ This file is part of the GNU C Library.
+
+ The GNU C Library is free software; you can redistribute it and/or
+ modify it under the terms of the GNU Lesser General Public
+ License as published by the Free Software Foundation; either
+ version 2.1 of the License, or (at your option) any later version.
+
+ The GNU C Library is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ Lesser General Public License for more details.
+
+ You should have received a copy of the GNU Lesser General Public
+ License along with the GNU C Library; if not, see
+ <https://www.gnu.org/licenses/>. */
+
+#ifndef _GETRANDOM_INTERNAL_H
+#define _GETRANDOM_INTERNAL_H
+
+#include <pthreadP.h>
+
+extern void __getrandom_early_init (_Bool) attribute_hidden;
+
+extern void __getrandom_fork_subprocess (void) attribute_hidden;
+extern void __getrandom_vdso_release (struct pthread *curp) attribute_hidden;
+extern void __getrandom_reset_state (struct pthread *curp) attribute_hidden;
+#endif
diff --git a/sysdeps/unix/sysv/linux/getrandom.c b/sysdeps/unix/sysv/linux/getrandom.c
index 777d1de..c8c5782 100644
--- a/sysdeps/unix/sysv/linux/getrandom.c
+++ b/sysdeps/unix/sysv/linux/getrandom.c
@@ -21,12 +21,314 @@
#include <unistd.h>
#include <sysdep-cancel.h>
+static inline ssize_t
+getrandom_syscall (void *buffer, size_t length, unsigned int flags,
+ bool cancel)
+{
+ return cancel
+ ? SYSCALL_CANCEL (getrandom, buffer, length, flags)
+ : INLINE_SYSCALL_CALL (getrandom, buffer, length, flags);
+}
+
+#ifdef HAVE_GETRANDOM_VSYSCALL
+# include <assert.h>
+# include <ldsodefs.h>
+# include <libc-lock.h>
+# include <list.h>
+# include <setvmaname.h>
+# include <sys/mman.h>
+# include <sys/sysinfo.h>
+# include <tls-internal.h>
+
+/* These values will be initialized at loading time by calling the
+ _dl_vdso_getrandom with a special value. The 'state_size' is the opaque
+ state size per-thread allocated with a mmap using 'mmap_prot' and
+ 'mmap_flags' argument. */
+static uint32_t state_size;
+static uint32_t state_size_cache_aligned;
+static uint32_t mmap_prot;
+static uint32_t mmap_flags;
+
+/* The function below are used on reentracy handling with (i.e. SA_NODEFER).
+ Before allocating a new state or issue the vDSO, atomically read the
+ current thread buffer, and if this is already reserved (is_reserved_ptr)
+ fallback to the syscall. Otherwise, reserve the buffer by atomically
+ setting the LSB of the opaque state pointer. The bit is cleared after the
+ vDSO is called, or before issuing the fallback syscall. */
+
+static inline void *reserve_ptr (void *p)
+{
+ return (void *) ((uintptr_t) (p) | 1UL);
+}
+
+static inline void *release_ptr (void *p)
+{
+ return (void *) ((uintptr_t) (p) & ~1UL);
+}
+
+static inline bool is_reserved_ptr (void *p)
+{
+ return (uintptr_t) (p) & 1UL;
+}
+
+static struct
+{
+ __libc_lock_define (, lock);
+
+ void **states; /* Queue of opaque states allocated with the kernel
+ provided flags and used on getrandom vDSO call. */
+ size_t len; /* Number of available free states in the queue. */
+ size_t total; /* Number of states allocated from the kernel. */
+ size_t cap; /* Total number of states that 'states' can hold before
+ needed to be resized. */
+} grnd_alloc = {
+ .lock = LLL_LOCK_INITIALIZER
+};
+
+static bool
+vgetrandom_get_state_alloc (void)
+{
+ /* Start by allocating one page for the opaque states. */
+ size_t block_size = ALIGN_UP (state_size_cache_aligned, GLRO(dl_pagesize));
+ size_t states_per_page = GLRO (dl_pagesize) / state_size_cache_aligned;
+ void *block = __mmap (NULL, GLRO(dl_pagesize), mmap_prot, mmap_flags, -1, 0);
+ if (block == MAP_FAILED)
+ return false;
+ __set_vma_name (block, block_size, " glibc: getrandom");
+
+ if (grnd_alloc.total + states_per_page > grnd_alloc.cap)
+ {
+ /* Use a new mmap instead of trying to mremap. It avoids a
+ potential multithread fork issue where fork is called just after
+ mremap returns but before assigning to the grnd_alloc.states,
+ thus making the its value invalid in the child. */
+ void *old_states = grnd_alloc.states;
+ size_t new_states_size = ALIGN_UP ((grnd_alloc.total + states_per_page)
+ * sizeof (*grnd_alloc.states),
+ GLRO(dl_pagesize));
+
+ /* There is no need to memcpy any opaque state information because
+ all the allocated opaque states are assigned to running threads
+ (meaning that if we iterate over them we can reconstruct the state
+ list). */
+ void **states = __mmap (NULL, new_states_size, PROT_READ | PROT_WRITE,
+ MAP_PRIVATE | MAP_ANONYMOUS, -1, 0);
+ if (states == MAP_FAILED)
+ {
+ __munmap (block, block_size);
+ return false;
+ }
+
+ /* Atomically replace the old state, so if a fork happens the child
+ process will see a consistent free state buffer. The size might
+ not be updated, but it does not really matter since the buffer is
+ always increased. */
+ grnd_alloc.states = states;
+ atomic_thread_fence_seq_cst ();
+ if (old_states != NULL)
+ __munmap (old_states, grnd_alloc.cap * sizeof (*grnd_alloc.states));
+
+ __set_vma_name (states, new_states_size, " glibc: getrandom states");
+ grnd_alloc.cap = new_states_size / sizeof (*grnd_alloc.states);
+ atomic_thread_fence_seq_cst ();
+ }
+
+ for (size_t i = 0; i < states_per_page; ++i)
+ {
+ /* There is no need to handle states that straddle a page because
+ we allocate only one page. */
+ grnd_alloc.states[i] = block;
+ block += state_size_cache_aligned;
+ }
+ /* Concurrent fork should not observe the previous pointer value. */
+ grnd_alloc.len = states_per_page;
+ grnd_alloc.total += states_per_page;
+ atomic_thread_fence_seq_cst ();
+
+ return true;
+}
+
+/* Allocate an opaque state for vgetrandom. If the grnd_alloc does not have
+ any, mmap() another page of them using the vgetrandom parameters. */
+static void *
+vgetrandom_get_state (void)
+{
+ void *state = NULL;
+
+ /* The signal blocking avoid the potential issue where _Fork() (which is
+ async-signal-safe) is called with the lock taken. The function is
+ called only once during thread lifetime, so the overhead should be
+ minimal. */
+ internal_sigset_t set;
+ internal_signal_block_all (&set);
+ __libc_lock_lock (grnd_alloc.lock);
+
+ if (grnd_alloc.len > 0 || vgetrandom_get_state_alloc ())
+ state = grnd_alloc.states[--grnd_alloc.len];
+
+ __libc_lock_unlock (grnd_alloc.lock);
+ internal_signal_restore_set (&set);
+
+ return state;
+}
+
+/* Returns true when vgetrandom is used successfully. Returns false if the
+ syscall fallback should be issued in the case the vDSO is not present, in
+ the case of reentrancy, or if any memory allocation fails. */
+static ssize_t
+getrandom_vdso (void *buffer, size_t length, unsigned int flags, bool cancel)
+{
+ if (__glibc_unlikely (state_size == 0))
+ return getrandom_syscall (buffer, length, flags, cancel);
+
+ struct pthread *self = THREAD_SELF;
+
+ void *state = atomic_load_relaxed (&self->getrandom_buf);
+ if (is_reserved_ptr (state))
+ return getrandom_syscall (buffer, length, flags, cancel);
+ atomic_store_relaxed (&self->getrandom_buf, reserve_ptr (state));
+ __atomic_signal_fence (__ATOMIC_ACQ_REL);
+
+ bool r = false;
+ if (state == NULL)
+ {
+ state = vgetrandom_get_state ();
+ if (state == NULL)
+ goto out;
+ }
+
+ /* Since the vDSO implementation does not issue the syscall with the
+ cancellation bridge (__syscall_cancel_arch), use GRND_NONBLOCK so there
+ is no potential unbounded blocking in the kernel. It should be a rare
+ situation, only at system startup when RNG is not initialized. */
+ ssize_t ret = GLRO (dl_vdso_getrandom) (buffer,
+ length,
+ flags | GRND_NONBLOCK,
+ state,
+ state_size);
+ if (INTERNAL_SYSCALL_ERROR_P (ret))
+ {
+ /* Fallback to the syscall if the kernel would block. */
+ int err = INTERNAL_SYSCALL_ERRNO (ret);
+ if (err == EAGAIN && !(flags & GRND_NONBLOCK))
+ goto out;
+
+ __set_errno (err);
+ ret = -1;
+ }
+ r = true;
+
+out:
+ __atomic_signal_fence (__ATOMIC_ACQ_REL);
+ atomic_store_relaxed (&self->getrandom_buf, state);
+ return r ? ret : getrandom_syscall (buffer, length, flags, cancel);
+}
+#endif
+
+void
+__getrandom_early_init (_Bool initial)
+{
+#ifdef HAVE_GETRANDOM_VSYSCALL
+ /* libcs loaded for audit modules, dlmopen, etc. fallback to syscall. */
+ if (initial && (GLRO (dl_vdso_getrandom) != NULL))
+ {
+ /* Used to query the vDSO for the required mmap flags and the opaque
+ per-thread state size. Defined by linux/random.h. */
+ struct vgetrandom_opaque_params
+ {
+ uint32_t size_of_opaque_state;
+ uint32_t mmap_prot;
+ uint32_t mmap_flags;
+ uint32_t reserved[13];
+ } params;
+ if (GLRO(dl_vdso_getrandom) (NULL, 0, 0, &params, ~0UL) == 0)
+ {
+ /* Align each opaque state to L1 data cache size to avoid false
+ sharing. If the size can not be obtained, use the kernel
+ provided one. */
+ state_size = params.size_of_opaque_state;
+
+ long int ld1sz = __sysconf (_SC_LEVEL1_DCACHE_LINESIZE);
+ if (ld1sz <= 0)
+ ld1sz = 1;
+ state_size_cache_aligned = ALIGN_UP (state_size, ld1sz);
+ /* Do not enable vDSO if the required opaque state size is larger
+ than a page because we only allocate one page per time to hold
+ the states. */
+ if (state_size_cache_aligned > GLRO(dl_pagesize))
+ {
+ state_size = 0;
+ return;
+ }
+ mmap_prot = params.mmap_prot;
+ mmap_flags = params.mmap_flags;
+ }
+ }
+#endif
+}
+
+/* Re-add the state state from CURP on the free list. This function is
+ called after fork returns in the child, so no locking is required. */
+void
+__getrandom_reset_state (struct pthread *curp)
+{
+#ifdef HAVE_GETRANDOM_VSYSCALL
+ if (grnd_alloc.states == NULL || curp->getrandom_buf == NULL)
+ return;
+ assert (grnd_alloc.len < grnd_alloc.cap);
+ grnd_alloc.states[grnd_alloc.len++] = release_ptr (curp->getrandom_buf);
+ curp->getrandom_buf = NULL;
+#endif
+}
+
+/* Called when a thread terminates, and adds its random buffer back into the
+ allocator pool for use in a future thread. This is called by
+ pthread_create during thread termination, and after signal has been
+ blocked. */
+void
+__getrandom_vdso_release (struct pthread *curp)
+{
+#ifdef HAVE_GETRANDOM_VSYSCALL
+ if (curp->getrandom_buf == NULL)
+ return;
+
+ __libc_lock_lock (grnd_alloc.lock);
+ grnd_alloc.states[grnd_alloc.len++] = curp->getrandom_buf;
+ __libc_lock_unlock (grnd_alloc.lock);
+#endif
+}
+
+/* Reset the internal lock state in case another thread has locked while
+ this thread calls fork. The stale thread states will be handled by
+ reclaim_stacks which calls __getrandom_reset_state on each thread. */
+void
+__getrandom_fork_subprocess (void)
+{
+#ifdef HAVE_GETRANDOM_VSYSCALL
+ grnd_alloc.lock = LLL_LOCK_INITIALIZER;
+#endif
+}
+
+ssize_t
+__getrandom_nocancel (void *buffer, size_t length, unsigned int flags)
+{
+#ifdef HAVE_GETRANDOM_VSYSCALL
+ return getrandom_vdso (buffer, length, flags, false);
+#else
+ return getrandom_syscall (buffer, length, flags, false);
+#endif
+}
+
/* Write up to LENGTH bytes of randomness starting at BUFFER.
Return the number of bytes written, or -1 on error. */
ssize_t
__getrandom (void *buffer, size_t length, unsigned int flags)
{
- return SYSCALL_CANCEL (getrandom, buffer, length, flags);
+#ifdef HAVE_GETRANDOM_VSYSCALL
+ return getrandom_vdso (buffer, length, flags, true);
+#else
+ return getrandom_syscall (buffer, length, flags, true);
+#endif
}
libc_hidden_def (__getrandom)
weak_alias (__getrandom, getrandom)
diff --git a/sysdeps/unix/sysv/linux/loongarch/sysdep.h b/sysdeps/unix/sysv/linux/loongarch/sysdep.h
index eb0ba79..e2d853a 100644
--- a/sysdeps/unix/sysv/linux/loongarch/sysdep.h
+++ b/sysdeps/unix/sysv/linux/loongarch/sysdep.h
@@ -119,6 +119,7 @@
#define HAVE_CLOCK_GETTIME64_VSYSCALL "__vdso_clock_gettime"
#define HAVE_GETTIMEOFDAY_VSYSCALL "__vdso_gettimeofday"
#define HAVE_GETCPU_VSYSCALL "__vdso_getcpu"
+#define HAVE_GETRANDOM_VSYSCALL "__vdso_getrandom"
#define HAVE_CLONE3_WRAPPER 1
diff --git a/sysdeps/unix/sysv/linux/not-cancel.h b/sysdeps/unix/sysv/linux/not-cancel.h
index 2a7585b..12f2691 100644
--- a/sysdeps/unix/sysv/linux/not-cancel.h
+++ b/sysdeps/unix/sysv/linux/not-cancel.h
@@ -27,6 +27,7 @@
#include <sys/syscall.h>
#include <sys/wait.h>
#include <time.h>
+#include <sys/random.h>
/* Non cancellable open syscall. */
__typeof (open) __open_nocancel;
@@ -84,15 +85,17 @@ __writev_nocancel_nostatus (int fd, const struct iovec *iov, int iovcnt)
}
static inline ssize_t
-__getrandom_nocancel (void *buf, size_t buflen, unsigned int flags)
+__getrandom_nocancel_direct (void *buf, size_t buflen, unsigned int flags)
{
return INLINE_SYSCALL_CALL (getrandom, buf, buflen, flags);
}
+__typeof (getrandom) __getrandom_nocancel attribute_hidden;
+
/* Non cancellable getrandom syscall that does not also set errno in case of
failure. */
static inline ssize_t
-__getrandom_nocancel_nostatus (void *buf, size_t buflen, unsigned int flags)
+__getrandom_nocancel_nostatus_direct (void *buf, size_t buflen, unsigned int flags)
{
return INTERNAL_SYSCALL_CALL (getrandom, buf, buflen, flags);
}
diff --git a/sysdeps/unix/sysv/linux/powerpc/sysdep.h b/sysdeps/unix/sysv/linux/powerpc/sysdep.h
index a69b7db..48f3d0d 100644
--- a/sysdeps/unix/sysv/linux/powerpc/sysdep.h
+++ b/sysdeps/unix/sysv/linux/powerpc/sysdep.h
@@ -223,5 +223,6 @@
#define HAVE_TIME_VSYSCALL "__kernel_time"
#define HAVE_GETTIMEOFDAY_VSYSCALL "__kernel_gettimeofday"
#define HAVE_GET_TBFREQ "__kernel_get_tbfreq"
+#define HAVE_GETRANDOM_VSYSCALL "__kernel_getrandom"
#endif /* _LINUX_POWERPC_SYSDEP_H */
diff --git a/sysdeps/unix/sysv/linux/s390/sysdep.h b/sysdeps/unix/sysv/linux/s390/sysdep.h
index 9b3000c..9698c57 100644
--- a/sysdeps/unix/sysv/linux/s390/sysdep.h
+++ b/sysdeps/unix/sysv/linux/s390/sysdep.h
@@ -72,6 +72,7 @@
#ifdef __s390x__
#define HAVE_CLOCK_GETRES64_VSYSCALL "__kernel_clock_getres"
#define HAVE_CLOCK_GETTIME64_VSYSCALL "__kernel_clock_gettime"
+#define HAVE_GETRANDOM_VSYSCALL "__kernel_getrandom"
#else
#define HAVE_CLOCK_GETRES_VSYSCALL "__kernel_clock_getres"
#define HAVE_CLOCK_GETTIME_VSYSCALL "__kernel_clock_gettime"
diff --git a/sysdeps/unix/sysv/linux/x86_64/sysdep.h b/sysdeps/unix/sysv/linux/x86_64/sysdep.h
index a2b021b..7dc072a 100644
--- a/sysdeps/unix/sysv/linux/x86_64/sysdep.h
+++ b/sysdeps/unix/sysv/linux/x86_64/sysdep.h
@@ -376,6 +376,7 @@
# define HAVE_TIME_VSYSCALL "__vdso_time"
# define HAVE_GETCPU_VSYSCALL "__vdso_getcpu"
# define HAVE_CLOCK_GETRES64_VSYSCALL "__vdso_clock_getres"
+# define HAVE_GETRANDOM_VSYSCALL "__vdso_getrandom"
# define HAVE_CLONE3_WRAPPER 1