diff options
25 files changed, 839 insertions, 228 deletions
@@ -837,12 +837,15 @@ typedef struct #define NT_ARM_ZT 0x40d /* ARM SME ZT registers. */ #define NT_ARM_FPMR 0x40e /* ARM floating point mode register. */ #define NT_ARM_POE 0x40f /* ARM POE registers. */ +#define NT_ARM_GCS 0x410 /* ARM GCS state. */ #define NT_VMCOREDD 0x700 /* Vmcore Device Dump Note. */ #define NT_MIPS_DSP 0x800 /* MIPS DSP ASE registers. */ #define NT_MIPS_FP_MODE 0x801 /* MIPS floating-point mode. */ #define NT_MIPS_MSA 0x802 /* MIPS SIMD registers. */ #define NT_RISCV_CSR 0x900 /* RISC-V Control and Status Registers */ #define NT_RISCV_VECTOR 0x901 /* RISC-V vector registers */ +#define NT_RISCV_TAGGED_ADDR_CTRL 0x902 /* RISC-V tagged + address control */ #define NT_LOONGARCH_CPUCFG 0xa00 /* LoongArch CPU config registers. */ #define NT_LOONGARCH_CSR 0xa01 /* LoongArch control and status registers. */ diff --git a/hurd/Makefile b/hurd/Makefile index cf70b8c..cbc3c23 100644 --- a/hurd/Makefile +++ b/hurd/Makefile @@ -19,6 +19,11 @@ subdir := hurd include ../Makeconfig +tests := test-sig-xstate \ + test-sig-rpc-interrupted +$(objpfx)test-sig-xstate: $(shared-thread-library) +$(objpfx)test-sig-rpc-interrupted: $(shared-thread-library) $(objdir)/hurd/libhurduser.so + headers = \ $(interface-headers) \ hurd.h \ diff --git a/hurd/test-sig-rpc-interrupted.c b/hurd/test-sig-rpc-interrupted.c new file mode 100644 index 0000000..a89d70e --- /dev/null +++ b/hurd/test-sig-rpc-interrupted.c @@ -0,0 +1,185 @@ +/* Test the state save/restore procedures during signal handling when an + interruptible RPC is restarted. + + Copyright (C) 2024 Free Software Foundation, Inc. + This file is part of the GNU C Library. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library; if not, see + <https://www.gnu.org/licenses/>. */ + + +#include <assert.h> +#include <pthread.h> +#include <signal.h> +#include <stdbool.h> +#include <stdio.h> +#include <stdlib.h> +#include <string.h> +#include <unistd.h> + +#include <mach/message.h> +#include <mach/gnumach.h> +#include <mach/mach_traps.h> +#include <mach/mig_errors.h> +#include <mach-shortcuts.h> +#include <mach_init.h> +#include <hurd/io.h> +#include <hurd/io_reply.h> + +#include <support/check.h> +#include <support/xthread.h> + +#include "test-xstate.h" + +void handler (int signum, siginfo_t *info, void *context) +{ + printf ("signal %d setting a different CPU state\n", signum); + char buf3[XSTATE_BUFFER_SIZE]; + memset (buf3, 0x77, XSTATE_BUFFER_SIZE); + SET_XSTATE (buf3); +} + +static const mach_msg_type_t RetCodeCheck = { + .msgt_name = (unsigned char) MACH_MSG_TYPE_INTEGER_32, + .msgt_size = 32, + .msgt_number = 1, + .msgt_inline = TRUE, + .msgt_longform = FALSE, + .msgt_deallocate = FALSE, + .msgt_unused = 0 +}; + + +/* Helper thread to simulate a proper RPC interruption during dignal handling */ +void* fake_interruptor (void *arg) +{ + int err; + sigset_t ss; + TEST_COMPARE (sigemptyset (&ss), 0); + TEST_COMPARE (sigaddset (&ss, SIGUSR1), 0); + TEST_COMPARE (sigprocmask (SIG_BLOCK, &ss, NULL), 0); + + struct { + mach_msg_header_t Head; + } request; + mach_port_t rxport = *((mach_port_t*)arg); + err = mach_msg (&request.Head, MACH_RCV_MSG, 0, sizeof (request), rxport, + MACH_MSG_TIMEOUT_NONE, MACH_PORT_NULL); + TEST_COMPARE (err, MACH_MSG_SUCCESS); + TEST_COMPARE (request.Head.msgh_bits, 0x1112); + TEST_COMPARE (request.Head.msgh_size, sizeof (request.Head)); + TEST_COMPARE (request.Head.msgh_id, 33000); + + mig_reply_header_t reply; + reply.Head = request.Head; + reply.Head.msgh_id += 100; + reply.RetCodeType = RetCodeCheck; + reply.RetCode = KERN_SUCCESS; + err = mach_msg (&reply.Head, MACH_SEND_MSG, sizeof (reply), 0, MACH_PORT_NULL, + MACH_MSG_TIMEOUT_NONE, MACH_PORT_NULL); + TEST_COMPARE (err, MACH_MSG_SUCCESS); + + return NULL; +} + + +/* Helper thread to send a signal to the main thread in the middle of + * an interruptible rpc */ +void* signal_sender (void *arg) +{ + int err; + sigset_t ss; + TEST_COMPARE (sigemptyset (&ss), 0); + TEST_COMPARE (sigaddset (&ss, SIGUSR1), 0); + TEST_COMPARE (sigprocmask (SIG_BLOCK, &ss, NULL), 0); + + /* Receive the first request, we won't answer to this. */ + struct { + mach_msg_header_t head; + char data[64]; + } m1, m2; + mach_port_t rxport = *((mach_port_t*)arg); + memset (&m1, 0, sizeof (m1)); + memset (&m2, 0, sizeof (m2)); + err = mach_msg (&m1.head, MACH_RCV_MSG, 0, sizeof (m1), rxport, + MACH_MSG_TIMEOUT_NONE, MACH_PORT_NULL); + TEST_COMPARE (err, MACH_MSG_SUCCESS); + + /* interrupt the ongoing rpc with a signal, using the + * interruptible rpc protocol */ + pthread_t thintr = xpthread_create (NULL, fake_interruptor, arg); + TEST_COMPARE (kill (getpid (), SIGUSR1), 0); + xpthread_join (thintr); + + /* Complete the interruption by sending EINTR */ + mig_reply_header_t reply; + reply.Head = m1.head; + reply.Head.msgh_id += 100; + reply.RetCodeType = RetCodeCheck; + reply.RetCode = EINTR; + err = mach_msg (&reply.Head, MACH_SEND_MSG, sizeof (reply), 0, MACH_PORT_NULL, + MACH_MSG_TIMEOUT_NONE, MACH_PORT_NULL); + TEST_COMPARE (err, MACH_MSG_SUCCESS); + + /* Receive the retried rpc, and check that it has the same payload + * as the first one. Port names might still be different. */ + err = mach_msg (&m2.head, MACH_RCV_MSG, 0, sizeof (m2), rxport, + MACH_MSG_TIMEOUT_NONE, MACH_PORT_NULL); + TEST_COMPARE (m1.head.msgh_bits, m2.head.msgh_bits); + TEST_COMPARE (m1.head.msgh_size, m2.head.msgh_size); + TEST_COMPARE (m1.head.msgh_id, m2.head.msgh_id); + TEST_COMPARE_BLOB (m1.data, sizeof (m1.data), m2.data, sizeof (m2.data)); + + /* And finally make the rpc succeed by sending a valid reply */ + err = io_read_reply (m2.head.msgh_remote_port, MACH_MSG_TYPE_MOVE_SEND_ONCE, + KERN_SUCCESS, NULL, 0); + TEST_COMPARE (err, MACH_MSG_SUCCESS); + + return NULL; +} + + +static int do_test (void) +{ +#if ! XSTATE_HELPERS_SUPPORTED + FAIL_UNSUPPORTED ("Test not supported on this arch."); +#endif + + /* Setup signal handling; we need to handle the signal in the main + * thread, the other ones will explicitely block SIGUSR1. */ + struct sigaction act = { 0 }; + act.sa_flags = SA_RESTART; + act.sa_sigaction = &handler; + TEST_COMPARE (sigaction (SIGUSR1, &act, NULL), 0); + + mach_port_t fakeio; + int err; + err = mach_port_allocate (mach_task_self (), MACH_PORT_RIGHT_RECEIVE, &fakeio); + TEST_COMPARE (err, MACH_MSG_SUCCESS); + + err = mach_port_insert_right (mach_task_self (), fakeio, fakeio, + MACH_MSG_TYPE_MAKE_SEND); + TEST_COMPARE (err, MACH_MSG_SUCCESS); + + pthread_t thsender = xpthread_create (NULL, signal_sender, &fakeio); + + char *buf; + mach_msg_type_number_t n; + TEST_COMPARE (io_read (fakeio, &buf, &n, 1, 2), 0); + + xpthread_join (thsender); + return EXIT_SUCCESS; +} + +#include <support/test-driver.c> diff --git a/hurd/test-sig-xstate.c b/hurd/test-sig-xstate.c new file mode 100644 index 0000000..0a68a44 --- /dev/null +++ b/hurd/test-sig-xstate.c @@ -0,0 +1,94 @@ +/* Test the state save/restore procedures during signal handling. + + Copyright (C) 2025 Free Software Foundation, Inc. + This file is part of the GNU C Library. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library; if not, see + <https://www.gnu.org/licenses/>. */ + + +#include <assert.h> +#include <pthread.h> +#include <signal.h> +#include <stdbool.h> +#include <stdio.h> +#include <stdlib.h> +#include <string.h> +#include <unistd.h> + +#include <mach/message.h> +#include <mach/gnumach.h> +#include <mach/mach_traps.h> +#include <mach-shortcuts.h> +#include <mach_init.h> +#include <hurd/io.h> +#include <hurd/io_reply.h> + +#include <support/check.h> +#include <support/xthread.h> + +#include "test-xstate.h" + +static volatile bool loopflag = true; + +void handler (int signum, siginfo_t *info, void *context) +{ + char buf3[XSTATE_BUFFER_SIZE]; + memset (buf3, 0x77, XSTATE_BUFFER_SIZE); + SET_XSTATE (buf3); + printf ("signal %d setting a different CPU state\n", signum); + loopflag = false; +} + +/* Helper thread to send a signal to the main thread */ +void* signal_sender (void *arg) +{ + sigset_t ss; + assert (! sigemptyset (&ss)); + assert (! sigaddset (&ss, SIGUSR1)); + assert (! sigprocmask (SIG_BLOCK, &ss, NULL)); + + TEST_COMPARE (kill (getpid (), SIGUSR1), 0); + + return NULL; +} + +static int do_test (void) +{ +#if ! XSTATE_HELPERS_SUPPORTED + FAIL_UNSUPPORTED ("Test not supported on this arch."); +#endif + + struct sigaction act = { 0 }; + act.sa_sigaction = &handler; + TEST_COMPARE (sigaction (SIGUSR1, &act, NULL), 0); + + pthread_t thsender = xpthread_create (NULL, signal_sender, NULL); + + char buf1[XSTATE_BUFFER_SIZE], buf2[XSTATE_BUFFER_SIZE]; + memset (buf1, 0x33, XSTATE_BUFFER_SIZE); + + SET_XSTATE (buf1); + + while (loopflag) + ; + + GET_XSTATE (buf2); + TEST_COMPARE_BLOB (buf1, sizeof (buf1), buf2, sizeof (buf2)); + + xpthread_join (thsender); + return EXIT_SUCCESS; +} + +#include <support/test-driver.c> diff --git a/hurd/test-xstate.h b/hurd/test-xstate.h new file mode 100644 index 0000000..a8185dc --- /dev/null +++ b/hurd/test-xstate.h @@ -0,0 +1,40 @@ +/* Helpers to test XSTATE during signal handling + + Copyright (C) 2025 Free Software Foundation, Inc. + This file is part of the GNU C Library. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library; if not, see + <https://www.gnu.org/licenses/>. */ + +#ifndef _TEST_XSTATE_H +#define _TEST_XSTATE_H + +#if defined __x86_64__ || defined __i386__ +#define XSTATE_HELPERS_SUPPORTED 1 +#define XSTATE_BUFFER_SIZE 16 +#define SET_XSTATE(b) do { \ + asm volatile ("movups (%0),%%xmm0" :: "r" (b)); \ + } while (0) + +#define GET_XSTATE(b) do { \ + asm volatile ("movups %%xmm0,(%0)" :: "r" (b)); \ + } while (0) + +#else +#define XSTATE_HELPERS_SUPPORTED 0 +#define XSTATE_BUFFER_SIZE 1 +#define SET_XSTATE(b) +#endif + +#endif /* _TEST_XSTATE_H */ diff --git a/libio/stdio.h b/libio/stdio.h index 4436525..d042b36 100644 --- a/libio/stdio.h +++ b/libio/stdio.h @@ -168,8 +168,11 @@ extern int renameat (int __oldfd, const char *__old, int __newfd, #ifdef __USE_GNU /* Flags for renameat2. */ # define RENAME_NOREPLACE (1 << 0) +# define AT_RENAME_NOREPLACE RENAME_NOREPLACE # define RENAME_EXCHANGE (1 << 1) +# define AT_RENAME_EXCHANGE RENAME_EXCHANGE # define RENAME_WHITEOUT (1 << 2) +# define AT_RENAME_WHITEOUT RENAME_WHITEOUT /* Rename file OLD relative to OLDFD to NEW relative to NEWFD, with additional flags. */ diff --git a/malloc/malloc-check.c b/malloc/malloc-check.c index 814a916..c5265ec 100644 --- a/malloc/malloc-check.c +++ b/malloc/malloc-check.c @@ -235,7 +235,7 @@ free_check (void *mem) { /* Mark the chunk as belonging to the library again. */ (void)tag_region (chunk2mem (p), memsize (p)); - _int_free (&main_arena, p, 1); + _int_free_chunk (&main_arena, p, chunksize (p), 1); __libc_lock_unlock (main_arena.mutex); } __set_errno (err); diff --git a/malloc/malloc.c b/malloc/malloc.c index a0bc733..23b9306 100644 --- a/malloc/malloc.c +++ b/malloc/malloc.c @@ -1086,8 +1086,6 @@ typedef struct malloc_chunk* mchunkptr; /* Internal routines. */ static void* _int_malloc(mstate, size_t); -static void _int_free (mstate, mchunkptr, int); -static void _int_free_check (mstate, mchunkptr, INTERNAL_SIZE_T); static void _int_free_chunk (mstate, mchunkptr, INTERNAL_SIZE_T, int); static void _int_free_merge_chunk (mstate, mchunkptr, INTERNAL_SIZE_T); static INTERNAL_SIZE_T _int_free_create_chunk (mstate, @@ -1101,6 +1099,9 @@ static void* _int_memalign(mstate, size_t, size_t); static void* _mid_memalign(size_t, size_t, void *); #endif +#if USE_TCACHE +static void malloc_printerr_tail(const char *str); +#endif static void malloc_printerr(const char *str) __attribute__ ((noreturn)); static void munmap_chunk(mchunkptr p); @@ -1273,7 +1274,6 @@ nextchunk-> +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ sysmalloc: Returns untagged memory. _int_malloc: Returns untagged memory. - _int_free: Takes untagged memory. _int_memalign: Returns untagged memory. _int_memalign: Returns untagged memory. _mid_memalign: Returns tagged memory. @@ -3163,7 +3163,7 @@ tcache_put (mchunkptr chunk, size_t tc_idx) { tcache_entry *e = (tcache_entry *) chunk2mem (chunk); - /* Mark this chunk as "in the tcache" so the test in _int_free will + /* Mark this chunk as "in the tcache" so the test in __libc_free will detect a double free. */ e->key = tcache_key; @@ -3241,37 +3241,12 @@ tcache_double_free_verify (tcache_entry *e, size_t tc_idx) malloc_printerr ("free(): unaligned chunk detected in tcache 2"); if (tmp == e) malloc_printerr ("free(): double free detected in tcache 2"); - /* If we get here, it was a coincidence. We've wasted a - few cycles, but don't abort. */ } -} - -/* Try to free chunk to the tcache, if success return true. - Caller must ensure that chunk and size are valid. */ -static __always_inline bool -tcache_free (mchunkptr p, INTERNAL_SIZE_T size) -{ - bool done = false; - size_t tc_idx = csize2tidx (size); - if (tcache != NULL && tc_idx < mp_.tcache_bins) - { - /* Check to see if it's already in the tcache. */ - tcache_entry *e = (tcache_entry *) chunk2mem (p); - - /* This test succeeds on double free. However, we don't 100% - trust it (it also matches random payload data at a 1 in - 2^<size_t> chance), so verify it's not an unlikely - coincidence before aborting. */ - if (__glibc_unlikely (e->key == tcache_key)) - tcache_double_free_verify (e, tc_idx); - - if (tcache->counts[tc_idx] < mp_.tcache_count) - { - tcache_put (p, tc_idx); - done = true; - } - } - return done; + /* No double free detected - it might be in a tcache of another thread, + or user data that happens to match the key. Since we are not sure, + clear the key and retry freeing it. */ + e->key = 0; + __libc_free (e); } static void @@ -3316,6 +3291,11 @@ tcache_init(void) if (tcache_shutting_down) return; + /* Check minimum mmap chunk is larger than max tcache size. This means + mmap chunks with their different layout are never added to tcache. */ + if (MAX_TCACHE_SIZE >= GLRO (dl_pagesize) / 2) + malloc_printerr ("max tcache size too large"); + arena_get (ar_ptr, bytes); victim = _int_malloc (ar_ptr, bytes); if (!victim && ar_ptr != NULL) @@ -3361,13 +3341,15 @@ tcache_try_malloc (size_t bytes, void **memptr) size_t tc_idx = csize2tidx (tbytes); - MAYBE_INIT_TCACHE (); - if (tcache_available (tc_idx)) - *memptr = tcache_get (tc_idx); + { + *memptr = tcache_get (tc_idx); + return false; + } else *memptr = NULL; + MAYBE_INIT_TCACHE (); return false; } @@ -3442,7 +3424,6 @@ libc_hidden_def (__libc_malloc) void __libc_free (void *mem) { - mstate ar_ptr; mchunkptr p; /* chunk corresponding to mem */ if (mem == NULL) /* free(0) has no effect */ @@ -3453,37 +3434,41 @@ __libc_free (void *mem) if (__glibc_unlikely (mtag_enabled)) *(volatile char *)mem; - int err = errno; - p = mem2chunk (mem); - if (chunk_is_mmapped (p)) /* release mmapped memory. */ - { - /* See if the dynamic brk/mmap threshold needs adjusting. - Dumped fake mmapped chunks do not affect the threshold. */ - if (!mp_.no_dyn_threshold - && chunksize_nomask (p) > mp_.mmap_threshold - && chunksize_nomask (p) <= DEFAULT_MMAP_THRESHOLD_MAX) - { - mp_.mmap_threshold = chunksize (p); - mp_.trim_threshold = 2 * mp_.mmap_threshold; - LIBC_PROBE (memory_mallopt_free_dyn_thresholds, 2, - mp_.mmap_threshold, mp_.trim_threshold); - } - munmap_chunk (p); - } - else + /* Mark the chunk as belonging to the library again. */ + tag_region (chunk2mem (p), memsize (p)); + + INTERNAL_SIZE_T size = chunksize (p); + + if (__glibc_unlikely (misaligned_chunk (p))) + return malloc_printerr_tail ("free(): invalid pointer"); + + check_inuse_chunk (arena_for_chunk (p), p); + +#if USE_TCACHE + size_t tc_idx = csize2tidx (size); + + if (__glibc_likely (tcache != NULL && tc_idx < mp_.tcache_bins)) { - MAYBE_INIT_TCACHE (); + /* Check to see if it's already in the tcache. */ + tcache_entry *e = (tcache_entry *) chunk2mem (p); - /* Mark the chunk as belonging to the library again. */ - (void)tag_region (chunk2mem (p), memsize (p)); + /* Check for double free - verify if the key matches. */ + if (__glibc_unlikely (e->key == tcache_key)) + return tcache_double_free_verify (e, tc_idx); - ar_ptr = arena_for_chunk (p); - _int_free (ar_ptr, p, 0); + if (__glibc_likely (tcache->counts[tc_idx] < mp_.tcache_count)) + return tcache_put (p, tc_idx); } +#endif - __set_errno (err); + /* Check size >= MINSIZE and p + size does not overflow. */ + if (__glibc_unlikely (__builtin_add_overflow_p ((uintptr_t) p, size - MINSIZE, + (uintptr_t) 0))) + return malloc_printerr_tail ("free(): invalid size"); + + _int_free_chunk (arena_for_chunk (p), p, size, 0); } libc_hidden_def (__libc_free) @@ -3696,8 +3681,6 @@ _mid_memalign (size_t alignment, size_t bytes, void *address) } size_t tc_idx = csize2tidx (tbytes); - MAYBE_INIT_TCACHE (); - if (tcache_available (tc_idx)) { /* The tcache itself isn't encoded, but the chain is. */ @@ -3714,6 +3697,7 @@ _mid_memalign (size_t alignment, size_t bytes, void *address) return tag_new_usable (victim); } } + MAYBE_INIT_TCACHE (); } #endif @@ -4560,24 +4544,6 @@ _int_malloc (mstate av, size_t bytes) ------------------------------ free ------------------------------ */ -static __always_inline void -_int_free_check (mstate av, mchunkptr p, INTERNAL_SIZE_T size) -{ - /* Little security check which won't hurt performance: the - allocator never wraps around at the end of the address space. - Therefore we can exclude some size values which might appear - here by accident or by "design" from some intruder. */ - if (__builtin_expect ((uintptr_t) p > (uintptr_t) -size, 0) - || __builtin_expect (misaligned_chunk (p), 0)) - malloc_printerr ("free(): invalid pointer"); - /* We know that each chunk is at least MINSIZE bytes in size or a - multiple of MALLOC_ALIGNMENT. */ - if (__glibc_unlikely (size < MINSIZE || !aligned_OK (size))) - malloc_printerr ("free(): invalid size"); - - check_inuse_chunk (av, p); -} - /* Free chunk P of SIZE bytes to the arena. HAVE_LOCK indicates where the arena for P has already been locked. Caller must ensure chunk and size are valid. */ @@ -4669,6 +4635,9 @@ _int_free_chunk (mstate av, mchunkptr p, INTERNAL_SIZE_T size, int have_lock) else if (!chunk_is_mmapped(p)) { + /* Preserve errno in case block merging results in munmap. */ + int err = errno; + /* If we're single-threaded, don't lock the arena. */ if (SINGLE_THREAD_P) have_lock = true; @@ -4680,35 +4649,34 @@ _int_free_chunk (mstate av, mchunkptr p, INTERNAL_SIZE_T size, int have_lock) if (!have_lock) __libc_lock_unlock (av->mutex); + + __set_errno (err); } /* If the chunk was allocated via mmap, release via munmap(). */ else { - munmap_chunk (p); - } -} -/* Free chunk P to its arena AV. HAVE_LOCK indicates where the arena for - P has already been locked. It will perform sanity check, then try the - fast path to free into tcache. If the attempt not success, free the - chunk to arena. */ -static __always_inline void -_int_free (mstate av, mchunkptr p, int have_lock) -{ - INTERNAL_SIZE_T size; /* its size */ - - size = chunksize (p); + /* Preserve errno in case munmap sets it. */ + int err = errno; - _int_free_check (av, p, size); + /* See if the dynamic brk/mmap threshold needs adjusting. + Dumped fake mmapped chunks do not affect the threshold. */ + if (!mp_.no_dyn_threshold + && chunksize_nomask (p) > mp_.mmap_threshold + && chunksize_nomask (p) <= DEFAULT_MMAP_THRESHOLD_MAX) + { + mp_.mmap_threshold = chunksize (p); + mp_.trim_threshold = 2 * mp_.mmap_threshold; + LIBC_PROBE (memory_mallopt_free_dyn_thresholds, 2, + mp_.mmap_threshold, mp_.trim_threshold); + } -#if USE_TCACHE - if (tcache_free (p, size)) - return; -#endif + munmap_chunk (p); - _int_free_chunk (av, p, size, have_lock); + __set_errno (err); + } } /* Try to merge chunk P of SIZE bytes with its neighbors. Put the @@ -5845,6 +5813,17 @@ malloc_printerr (const char *str) __builtin_unreachable (); } +#if USE_TCACHE +static __attribute_noinline__ void +malloc_printerr_tail (const char *str) +{ + /* Ensure this cannot be a no-return function. */ + if (!__malloc_initialized) + return; + malloc_printerr (str); +} +#endif + #if IS_IN (libc) /* We need a wrapper function for one of the additions of POSIX. */ int diff --git a/manual/tunables.texi b/manual/tunables.texi index 67064f5..d11ca7e 100644 --- a/manual/tunables.texi +++ b/manual/tunables.texi @@ -367,7 +367,7 @@ stack is allowed from the main program. Setting the value to @code{0} disables the ABI auto-negotiation (meaning no executable stacks even if the ABI or ELF header requires it), @code{1} enables auto-negotiation (although the program might not need an executable stack), while @code{2} forces an executable -stack at process start. Tthis is provided for compatibility reasons, when +stack at process start. This is provided for compatibility reasons, when the program dynamically loads modules with @code{dlopen} which require an executable stack. diff --git a/sysdeps/aarch64/multiarch/memcpy_oryon1.S b/sysdeps/aarch64/multiarch/memcpy_oryon1.S index e86d8b0..cc267db 100644 --- a/sysdeps/aarch64/multiarch/memcpy_oryon1.S +++ b/sysdeps/aarch64/multiarch/memcpy_oryon1.S @@ -152,6 +152,46 @@ L(copy96): .p2align 6 L(copy_long): + /* On oryon1 cores, large memcpy's are helped by using ldnp/stnp. + This loop is identical to the one below it but using ldnp/stnp + instructions. For loops that are less than 32768 bytes, + the ldnp/stnp instructions will not help and will cause a slow + down so only use the ldnp/stnp loop for the largest sizes. */ + + cmp count, #32768 + b.lo L(copy_long_without_nontemp) + and tmp1, dstin, 15 + bic dst, dstin, 15 + ldnp D_l, D_h, [src] + sub src, src, tmp1 + add count, count, tmp1 /* Count is now 16 too large. */ + ldnp A_l, A_h, [src, 16] + stnp D_l, D_h, [dstin] + ldnp B_l, B_h, [src, 32] + ldnp C_l, C_h, [src, 48] + ldnp D_l, D_h, [src, 64] + add src, src, #64 + subs count, count, 128 + 16 /* Test and readjust count. */ + +L(nontemp_loop64): + tbz src, #6, 1f +1: + stnp A_l, A_h, [dst, 16] + ldnp A_l, A_h, [src, 16] + stnp B_l, B_h, [dst, 32] + ldnp B_l, B_h, [src, 32] + stnp C_l, C_h, [dst, 48] + ldnp C_l, C_h, [src, 48] + stnp D_l, D_h, [dst, 64] + ldnp D_l, D_h, [src, 64] + add src, src, #64 + add dst, dst, #64 + subs count, count, 64 + b.hi L(nontemp_loop64) + b L(last64) + +L(copy_long_without_nontemp): + and tmp1, dstin, 15 bic dst, dstin, 15 ldp D_l, D_h, [src] diff --git a/sysdeps/aarch64/multiarch/memset_oryon1.S b/sysdeps/aarch64/multiarch/memset_oryon1.S index 0f9b718..88f4ef4 100644 --- a/sysdeps/aarch64/multiarch/memset_oryon1.S +++ b/sysdeps/aarch64/multiarch/memset_oryon1.S @@ -90,6 +90,8 @@ L(set_long): cmp count, 256 ccmp valw, 0, 0, cs b.eq L(try_zva) + cmp count, #32768 + b.hi L(set_long_with_nontemp) /* Small-size or non-zero memset does not use DC ZVA. */ sub count, dstend, dst @@ -112,6 +114,30 @@ L(set_long): stp val, val, [dstend, -16] ret +L(set_long_with_nontemp): + /* Small-size or non-zero memset does not use DC ZVA. */ + sub count, dstend, dst + + /* Adjust count and bias for loop. By subtracting extra 1 from count, + it is easy to use tbz instruction to check whether loop tailing + count is less than 33 bytes, so as to bypass 2 unnecessary stps. */ + sub count, count, 64+16+1 + +1: stnp val, val, [dst, 16] + stnp val, val, [dst, 32] + stnp val, val, [dst, 48] + stnp val, val, [dst, 64] + add dst, dst, #64 + subs count, count, 64 + b.hs 1b + + tbz count, 5, 1f /* Remaining count is less than 33 bytes? */ + stnp val, val, [dst, 16] + stnp val, val, [dst, 32] +1: stnp val, val, [dstend, -32] + stnp val, val, [dstend, -16] + ret + L(try_zva): /* Write the first and last 64 byte aligned block using stp rather than using DC ZVA as it is faster. */ diff --git a/sysdeps/mach/hurd/dup3.c b/sysdeps/mach/hurd/dup3.c index 22af45b..49545ae 100644 --- a/sysdeps/mach/hurd/dup3.c +++ b/sysdeps/mach/hurd/dup3.c @@ -69,6 +69,7 @@ __dup3 (int fd, int fd2, int flags) { /* Get a hold of the destination descriptor. */ struct hurd_fd *d2; + error_t err; __mutex_lock (&_hurd_dtable_lock); @@ -107,22 +108,51 @@ __dup3 (int fd, int fd2, int flags) } else { - /* Give the ports each a user ref for the new descriptor. */ - __mach_port_mod_refs (__mach_task_self (), port, - MACH_PORT_RIGHT_SEND, 1); - if (ctty != MACH_PORT_NULL) - __mach_port_mod_refs (__mach_task_self (), ctty, - MACH_PORT_RIGHT_SEND, 1); - - /* Install the ports and flags in the new descriptor slot. */ - __spin_lock (&d2->port.lock); - if (flags & O_CLOEXEC) - d2->flags = d_flags | FD_CLOEXEC; - else - /* dup clears FD_CLOEXEC. */ - d2->flags = d_flags & ~FD_CLOEXEC; - _hurd_port_set (&d2->ctty, ctty); - _hurd_port_locked_set (&d2->port, port); /* Unlocks D2. */ + /* Give the io server port a user ref for the new descriptor. */ + err = __mach_port_mod_refs (__mach_task_self (), port, + MACH_PORT_RIGHT_SEND, 1); + + if (err == KERN_UREFS_OVERFLOW) + fd2 = __hurd_fail (EMFILE); + else if (err) + fd2 = __hurd_fail (EINVAL); + else if (ctty != MACH_PORT_NULL) + { + /* We have confirmed the io server port has got a user ref + count, now give ctty port a user ref for the new + descriptor. */ + err = __mach_port_mod_refs (__mach_task_self (), ctty, + MACH_PORT_RIGHT_SEND, 1); + + if (err) + { + /* In this case the io server port has got a ref count + but the ctty port failed to get one, so we need to + clean the ref count we just assigned. */ + __mach_port_mod_refs (__mach_task_self (), port, + MACH_PORT_RIGHT_SEND, -1); + + if (err == KERN_UREFS_OVERFLOW) + fd2 = __hurd_fail (EMFILE); + else + fd2 = __hurd_fail (EINVAL); + } + } + + if (!err) + { + /* The ref counts of the ports are incremented + successfully. */ + /* Install the ports and flags in the new descriptor slot. */ + __spin_lock (&d2->port.lock); + if (flags & O_CLOEXEC) + d2->flags = d_flags | FD_CLOEXEC; + else + /* dup clears FD_CLOEXEC. */ + d2->flags = d_flags & ~FD_CLOEXEC; + _hurd_port_set (&d2->ctty, ctty); + _hurd_port_locked_set (&d2->port, port); /* Unlocks D2. */ + } } } diff --git a/sysdeps/mach/hurd/fcntl.c b/sysdeps/mach/hurd/fcntl.c index a65c190..de576af 100644 --- a/sysdeps/mach/hurd/fcntl.c +++ b/sysdeps/mach/hurd/fcntl.c @@ -83,18 +83,47 @@ __libc_fcntl (int fd, int cmd, ...) result = -1; else { - /* Give the ports each a user ref for the new descriptor. */ - __mach_port_mod_refs (__mach_task_self (), port, - MACH_PORT_RIGHT_SEND, 1); - if (ctty != MACH_PORT_NULL) - __mach_port_mod_refs (__mach_task_self (), ctty, - MACH_PORT_RIGHT_SEND, 1); - - /* Install the ports and flags in the new descriptor. */ - if (ctty != MACH_PORT_NULL) - _hurd_port_set (&new->ctty, ctty); - new->flags = flags; - _hurd_port_locked_set (&new->port, port); /* Unlocks NEW. */ + /* Give the io server port a user ref for the new descriptor. */ + err = __mach_port_mod_refs (__mach_task_self (), port, + MACH_PORT_RIGHT_SEND, 1); + + if (err == KERN_UREFS_OVERFLOW) + result = __hurd_fail (EMFILE); + else if (err) + result = __hurd_fail (EINVAL); + else if (ctty != MACH_PORT_NULL) + { + /* We have confirmed the io server port has got a user ref + count, now give ctty port a user ref for the new + descriptor. */ + err = __mach_port_mod_refs (__mach_task_self (), ctty, + MACH_PORT_RIGHT_SEND, 1); + + if (err) + { + /* In this case the io server port has got a ref count + but the ctty port fails to get one, so we need to clean + the ref count we just assigned. */ + __mach_port_mod_refs (__mach_task_self (), port, + MACH_PORT_RIGHT_SEND, -1); + + if (err == KERN_UREFS_OVERFLOW) + result = __hurd_fail (EMFILE); + else + result = __hurd_fail (EINVAL); + } + } + + if (!err) + { + /* The ref counts of the ports are incremented successfully. */ + /* Install the ports and flags in the new descriptor. */ + if (ctty != MACH_PORT_NULL) + _hurd_port_set (&new->ctty, ctty); + new->flags = flags; + /* Unlocks NEW. */ + _hurd_port_locked_set (&new->port, port); + } } HURD_CRITICAL_END; diff --git a/sysdeps/mach/hurd/futimens.c b/sysdeps/mach/hurd/futimens.c index 30ef0a6..1212529 100644 --- a/sysdeps/mach/hurd/futimens.c +++ b/sysdeps/mach/hurd/futimens.c @@ -32,7 +32,9 @@ __futimens (int fd, const struct timespec tsp[2]) struct timespec atime, mtime; error_t err; - utime_ts_from_tspec (tsp, &atime, &mtime); + err = utime_ts_from_tspec (tsp, &atime, &mtime); + if (err) + return err; err = HURD_DPORT_USE (fd, __file_utimens (port, atime, mtime)); @@ -40,7 +42,9 @@ __futimens (int fd, const struct timespec tsp[2]) { time_value_t atim, mtim; - utime_tvalue_from_tspec (tsp, &atim, &mtim); + err = utime_tvalue_from_tspec (tsp, &atim, &mtim); + if (err) + return err; err = HURD_DPORT_USE (fd, __file_utimes (port, atim, mtim)); } diff --git a/sysdeps/mach/hurd/futimes.c b/sysdeps/mach/hurd/futimes.c index 20f47f3..97385d7 100644 --- a/sysdeps/mach/hurd/futimes.c +++ b/sysdeps/mach/hurd/futimes.c @@ -32,7 +32,9 @@ __futimes (int fd, const struct timeval tvp[2]) struct timespec atime, mtime; error_t err; - utime_ts_from_tval (tvp, &atime, &mtime); + err = utime_ts_from_tval (tvp, &atime, &mtime); + if (err) + return err; err = HURD_DPORT_USE (fd, __file_utimens (port, atime, mtime)); @@ -40,7 +42,9 @@ __futimes (int fd, const struct timeval tvp[2]) { time_value_t atim, mtim; - utime_tvalue_from_tval (tvp, &atim, &mtim); + err = utime_tvalue_from_tval (tvp, &atim, &mtim); + if (err) + return err; err = HURD_DPORT_USE (fd, __file_utimes (port, atim, mtim)); } diff --git a/sysdeps/mach/hurd/i386/bits/sigcontext.h b/sysdeps/mach/hurd/i386/bits/sigcontext.h index 6e5e220..c44e4de 100644 --- a/sysdeps/mach/hurd/i386/bits/sigcontext.h +++ b/sysdeps/mach/hurd/i386/bits/sigcontext.h @@ -88,6 +88,8 @@ struct sigcontext struct i386_fp_save sc_fpsave; struct i386_fp_regs sc_fpregs; int sc_fpexcsr; /* FPSR including exception bits. */ + + struct i386_xfloat_state *xstate; }; /* Traditional BSD names for some members. */ diff --git a/sysdeps/mach/hurd/i386/sigreturn.c b/sysdeps/mach/hurd/i386/sigreturn.c index ce8df8d..dc57d61 100644 --- a/sysdeps/mach/hurd/i386/sigreturn.c +++ b/sysdeps/mach/hurd/i386/sigreturn.c @@ -21,6 +21,8 @@ #include <stdlib.h> #include <string.h> +#include <cpuid.h> + /* This is run on the thread stack after restoring it, to be able to unlock SS off sigstack. */ static void @@ -123,10 +125,35 @@ __sigreturn (struct sigcontext *scp) if (scp->sc_onstack) ss->sigaltstack.ss_flags &= ~SS_ONSTACK; - if (scp->sc_fpused) - /* Restore the FPU state. Mach conveniently stores the state - in the format the i387 `frstor' instruction uses to restore it. */ - asm volatile ("frstor %0" : : "m" (scp->sc_fpsave)); +#ifdef i386_XFLOAT_STATE + if (scp->xstate) + { + if (scp->xstate->initialized) + { + unsigned eax, ebx, ecx, edx; + __cpuid_count(0xd, 0, eax, ebx, ecx, edx); + switch (scp->xstate->fp_save_kind) + { + case 0: // FNSAVE + asm volatile("frstor %0" : : "m" (scp->xstate->hw_state)); + break; + case 1: // FXSAVE + asm volatile("fxrstor %0" : : "m" (scp->xstate->hw_state), \ + "a" (eax), "d" (edx)); + break; + default: // XSAVE, XSAVEOPT, XSAVEC, XSAVES + asm volatile("xrstor %0" : : "m" (scp->xstate->hw_state), \ + "a" (eax), "d" (edx)); + break; + } + } + } + else +#endif + if (scp->sc_fpused) + /* Restore the FPU state. Mach conveniently stores the state + in the format the i387 `frstor' instruction uses to restore it. */ + asm volatile ("frstor %0" : : "m" (scp->sc_fpsave)); { /* There are convenient instructions to pop state off the stack, so we diff --git a/sysdeps/mach/hurd/symlinkat.c b/sysdeps/mach/hurd/symlinkat.c index e7dfb67..cb6250e 100644 --- a/sysdeps/mach/hurd/symlinkat.c +++ b/sysdeps/mach/hurd/symlinkat.c @@ -47,7 +47,7 @@ __symlinkat (const char *from, int fd, const char *to) if (! *name) /* Can't link to the existing directory itself. */ - err = ENOTDIR; + err = EEXIST; else /* Create a new, unlinked node in the target directory. */ err = __dir_mkfile (dir, O_WRITE, 0777 & ~_hurd_umask, &node); diff --git a/sysdeps/mach/hurd/utime-helper.c b/sysdeps/mach/hurd/utime-helper.c index d88bccd..6afa871 100644 --- a/sysdeps/mach/hurd/utime-helper.c +++ b/sysdeps/mach/hurd/utime-helper.c @@ -21,8 +21,14 @@ #include <stddef.h> #include <sys/time.h> +static inline bool +check_tval (const struct timeval *tvp) +{ + return tvp->tv_usec >= 0 && tvp->tv_usec < USEC_PER_SEC; +} + /* Initializes atime/mtime timespec structures from an array of timeval. */ -static inline void +static inline error_t utime_ts_from_tval (const struct timeval tvp[2], struct timespec *atime, struct timespec *mtime) { @@ -37,13 +43,19 @@ utime_ts_from_tval (const struct timeval tvp[2], } else { + if (!check_tval (&tvp[0])) + return EINVAL; + if (!check_tval (&tvp[1])) + return EINVAL; + TIMEVAL_TO_TIMESPEC (&tvp[0], atime); TIMEVAL_TO_TIMESPEC (&tvp[1], mtime); } + return 0; } /* Initializes atime/mtime time_value_t structures from an array of timeval. */ -static inline void +static inline error_t utime_tvalue_from_tval (const struct timeval tvp[2], time_value_t *atime, time_value_t *mtime) { @@ -53,11 +65,17 @@ utime_tvalue_from_tval (const struct timeval tvp[2], atime->microseconds = mtime->microseconds = -1; else { + if (!check_tval (&tvp[0])) + return EINVAL; + if (!check_tval (&tvp[1])) + return EINVAL; + atime->seconds = tvp[0].tv_sec; atime->microseconds = tvp[0].tv_usec; mtime->seconds = tvp[1].tv_sec; mtime->microseconds = tvp[1].tv_usec; } + return 0; } /* Changes the access time of the file behind PORT using a timeval array. */ @@ -67,7 +85,9 @@ hurd_futimes (const file_t port, const struct timeval tvp[2]) error_t err; struct timespec atime, mtime; - utime_ts_from_tval (tvp, &atime, &mtime); + err = utime_ts_from_tval (tvp, &atime, &mtime); + if (err) + return err; err = __file_utimens (port, atime, mtime); @@ -75,7 +95,9 @@ hurd_futimes (const file_t port, const struct timeval tvp[2]) { time_value_t atim, mtim; - utime_tvalue_from_tval (tvp, &atim, &mtim); + err = utime_tvalue_from_tval (tvp, &atim, &mtim); + if (err) + return err; err = __file_utimes (port, atim, mtim); } @@ -83,8 +105,16 @@ hurd_futimes (const file_t port, const struct timeval tvp[2]) return err; } +static inline bool +check_tspec (const struct timespec *tsp) +{ + return tsp->tv_nsec == UTIME_NOW + || tsp->tv_nsec == UTIME_OMIT + || tsp->tv_nsec >= 0 && tsp->tv_nsec < NSEC_PER_SEC; +} + /* Initializes atime/mtime timespec structures from an array of timespec. */ -static inline void +static inline error_t utime_ts_from_tspec (const struct timespec tsp[2], struct timespec *atime, struct timespec *mtime) { @@ -99,13 +129,19 @@ utime_ts_from_tspec (const struct timespec tsp[2], } else { + if (!check_tspec (&tsp[0])) + return EINVAL; + if (!check_tspec (&tsp[1])) + return EINVAL; + *atime = tsp[0]; *mtime = tsp[1]; } + return 0; } /* Initializes atime/mtime time_value_t structures from an array of timespec. */ -static inline void +static inline error_t utime_tvalue_from_tspec (const struct timespec tsp[2], time_value_t *atime, time_value_t *mtime) { @@ -115,6 +151,11 @@ utime_tvalue_from_tspec (const struct timespec tsp[2], atime->microseconds = mtime->microseconds = -1; else { + if (!check_tspec (&tsp[0])) + return EINVAL; + if (!check_tspec (&tsp[1])) + return EINVAL; + if (tsp[0].tv_nsec == UTIME_NOW) atime->microseconds = -1; else if (tsp[0].tv_nsec == UTIME_OMIT) @@ -128,6 +169,7 @@ utime_tvalue_from_tspec (const struct timespec tsp[2], else TIMESPEC_TO_TIME_VALUE (mtime, &(tsp[1])); } + return 0; } /* Changes the access time of the file behind PORT using a timespec array. */ @@ -137,7 +179,9 @@ hurd_futimens (const file_t port, const struct timespec tsp[2]) error_t err; struct timespec atime, mtime; - utime_ts_from_tspec (tsp, &atime, &mtime); + err = utime_ts_from_tspec (tsp, &atime, &mtime); + if (err) + return err; err = __file_utimens (port, atime, mtime); @@ -145,7 +189,9 @@ hurd_futimens (const file_t port, const struct timespec tsp[2]) { time_value_t atim, mtim; - utime_tvalue_from_tspec (tsp, &atim, &mtim); + err = utime_tvalue_from_tspec (tsp, &atim, &mtim); + if (err) + return err; err = __file_utimes (port, atim, mtim); } diff --git a/sysdeps/mach/hurd/x86/trampoline.c b/sysdeps/mach/hurd/x86/trampoline.c index 8e2890f..6f23c56 100644 --- a/sysdeps/mach/hurd/x86/trampoline.c +++ b/sysdeps/mach/hurd/x86/trampoline.c @@ -26,7 +26,11 @@ #include "hurdfault.h" #include <intr-msg.h> #include <sys/ucontext.h> - +#ifdef __x86_64__ +#include <mach/x86_64/mach_i386.h> +#else +#include <mach/i386/mach_i386.h> +#endif /* Fill in a siginfo_t structure for SA_SIGINFO-enabled handlers. */ static void fill_siginfo (siginfo_t *si, int signo, @@ -106,6 +110,7 @@ _hurd_setup_sighandler (struct hurd_sigstate *ss, const struct sigaction *action void firewall (void); void *sigsp; struct sigcontext *scp; + vm_size_t xstate_size; struct { union @@ -145,6 +150,14 @@ _hurd_setup_sighandler (struct hurd_sigstate *ss, const struct sigaction *action struct hurd_userlink link; ucontext_t ucontext; siginfo_t siginfo; +#ifdef __x86_64__ + char _pad2[56]; +#else + char _pad2[20]; +#endif + char xstate[]; + /* Don't add anything after xstate, as it's dynamically + sized. */ } *stackframe; #ifdef __x86_64__ @@ -170,6 +183,17 @@ _hurd_setup_sighandler (struct hurd_sigstate *ss, const struct sigaction *action if (! machine_get_basic_state (ss->thread, state)) return NULL; + /* Initialize the size of the CPU extended state, to be saved during + * signal handling */ +#ifdef i386_XFLOAT_STATE + _Static_assert ((sizeof(*stackframe) + sizeof(struct i386_xfloat_state)) % 64 == 0, + "stackframe size must be multiple of 64-byte minus " + "sizeof(struct i386_xfloat_state), please adjust _pad2"); + + if (__i386_get_xstate_size(__mach_host_self(), &xstate_size)) +#endif + xstate_size = 0; + /* Save the original SP in the gratuitous `esp' slot. We may need to reset the SP (the `uesp' slot) to avoid clobbering an interrupted RPC frame. */ @@ -196,14 +220,21 @@ _hurd_setup_sighandler (struct hurd_sigstate *ss, const struct sigaction *action #endif } - /* Push the arguments to call `trampoline' on the stack. */ - sigsp -= sizeof (*stackframe); -#ifdef __x86_64__ - /* Align SP at 16 bytes. Coupled with the fact that sigreturn_addr is - 16-byte aligned within the stackframe struct, this ensures that it ends - up on a 16-byte aligned address, as required by the ABI. */ - sigsp = (void *) ((uintptr_t) sigsp & ~15UL); -#endif + /* Push the arguments to call `trampoline' on the stack. + * The extended state might have a variable size depending on the platform, + * so we dynamically allocate it on the stack frame.*/ + sigsp -= sizeof (*stackframe) + xstate_size; + + /* Align SP at 64 bytes. This is needed for two reasons: + * - sigreturn_addr is 16-byte aligned within the stackframe + * struct, and this ensures that it ends up on a 16-byte aligned + * address, as required by the ABI. + * - the XSAVE state needs to be aligned at 64 bytes (on both i386 and + * x86_64), so we align the stackframe also at 64 bytes and add the + * required padding at the end, see the _pad2 field. + */ + sigsp = (void *) ((uintptr_t) sigsp & ~63UL); + stackframe = sigsp; if (_hurdsig_catch_memory_fault (stackframe)) @@ -248,14 +279,40 @@ _hurd_setup_sighandler (struct hurd_sigstate *ss, const struct sigaction *action memcpy (&scp->sc_i386_thread_state, &state->basic, sizeof (state->basic)); - /* struct sigcontext is laid out so that starting at sc_fpkind mimics - a struct i386_float_state. */ - _Static_assert (offsetof (struct sigcontext, sc_i386_float_state) - % __alignof__ (struct i386_float_state) == 0, - "sc_i386_float_state layout mismatch"); - ok = machine_get_state (ss->thread, state, i386_FLOAT_STATE, - &state->fpu, &scp->sc_i386_float_state, - sizeof (state->fpu)); + scp->xstate = NULL; +#ifdef i386_XFLOAT_STATE + if (xstate_size > 0) + { + mach_msg_type_number_t got = (xstate_size / sizeof (int)); + + ok = (! __thread_get_state (ss->thread, i386_XFLOAT_STATE, + (thread_state_t) stackframe->xstate, &got) + && got == (xstate_size / sizeof (int))); + + if (ok && ((struct i386_xfloat_state*) stackframe->xstate)->fp_save_kind > 5) + /* We support up to XSAVES */ + ok = 0; + + if (ok) + { + scp->xstate = (struct i386_xfloat_state*) stackframe->xstate; + assert((uintptr_t)scp->xstate->hw_state % 64 == 0); + } + } + else +#endif + ok = 0; + if (!ok) + { + /* struct sigcontext is laid out so that starting at sc_fpkind mimics + a struct i386_float_state. */ + _Static_assert (offsetof (struct sigcontext, sc_i386_float_state) + % __alignof__ (struct i386_float_state) == 0, + "sc_i386_float_state layout mismatch"); + ok = machine_get_state (ss->thread, state, i386_FLOAT_STATE, + &state->fpu, &scp->sc_i386_float_state, + sizeof (state->fpu)); + } /* Set up the arguments for the signal handler. */ stackframe->signo = signo; @@ -404,7 +461,10 @@ _hurd_setup_sighandler (struct hurd_sigstate *ss, const struct sigaction *action - in gdb: gdb/i386-gnu-tdep.c gnu_sigtramp_code. */ #ifdef __x86_64__ -asm ("rpc_wait_trampoline:\n" +asm ("trampoline:\n" + "fnclex\n" /* Clear any pending exception. */ + "jmp _trampoline\n" + "rpc_wait_trampoline:\n" /* This is the entry point when we have an RPC reply message to receive before running the handler. The MACH_MSG_SEND bit has already been cleared in the OPTION argument in our %rsi. The interrupted user @@ -423,7 +483,7 @@ asm ("rpc_wait_trampoline:\n" /* Switch to the signal stack. */ "movq %rbx, %rsp\n" - "trampoline:\n" + "_trampoline:\n" /* Entry point for running the handler normally. The arguments to the handler function are on the top of the stack, same as in the i386 version: @@ -449,7 +509,10 @@ asm ("rpc_wait_trampoline:\n" "movq 16(%rsp), %rdi\n" "ret"); #else -asm ("rpc_wait_trampoline:\n"); +asm ("trampoline:\n" + "fnclex\n" /* Clear any pending exception. */ + "jmp _trampoline\n" + "rpc_wait_trampoline:\n"); /* This is the entry point when we have an RPC reply message to receive before running the handler. The MACH_MSG_SEND bit has already been cleared in the OPTION argument on our stack. The interrupted user @@ -469,7 +532,7 @@ asm (/* Retry the interrupted mach_msg system call. */ /* Switch to the signal stack. */ "movl %ebx, %esp\n"); - asm ("trampoline:\n"); +asm ("_trampoline:\n"); /* Entry point for running the handler normally. The arguments to the handler function are already on the top of the stack: diff --git a/sysdeps/mach/hurd/x86_64/bits/sigcontext.h b/sysdeps/mach/hurd/x86_64/bits/sigcontext.h index 7bac881..d83795f 100644 --- a/sysdeps/mach/hurd/x86_64/bits/sigcontext.h +++ b/sysdeps/mach/hurd/x86_64/bits/sigcontext.h @@ -96,6 +96,8 @@ struct sigcontext struct i386_fp_save sc_fpsave; struct i386_fp_regs sc_fpregs; int sc_fpexcsr; /* FPSR including exception bits. */ + + struct i386_xfloat_state *xstate; }; /* Traditional BSD names for some members. */ diff --git a/sysdeps/mach/hurd/x86_64/sigreturn.c b/sysdeps/mach/hurd/x86_64/sigreturn.c index 81a2d3b..773c00f 100644 --- a/sysdeps/mach/hurd/x86_64/sigreturn.c +++ b/sysdeps/mach/hurd/x86_64/sigreturn.c @@ -20,6 +20,8 @@ #include <hurd/msg.h> #include <stdlib.h> +#include <cpuid.h> + /* This is run on the thread stack after restoring it, to be able to unlock SS off sigstack. */ void @@ -116,10 +118,35 @@ __sigreturn (struct sigcontext *scp) if (scp->sc_onstack) ss->sigaltstack.ss_flags &= ~SS_ONSTACK; - if (scp->sc_fpused) - /* Restore the FPU state. Mach conveniently stores the state - in the format the i387 `frstor' instruction uses to restore it. */ - asm volatile ("frstor %0" : : "m" (scp->sc_fpsave)); +#ifdef i386_XFLOAT_STATE + if (scp->xstate) + { + if (scp->xstate->initialized) + { + unsigned eax, ebx, ecx, edx; + __cpuid_count(0xd, 0, eax, ebx, ecx, edx); + switch (scp->xstate->fp_save_kind) + { + case 0: // FNSAVE + asm volatile("frstor %0" : : "m" (scp->xstate->hw_state)); + break; + case 1: // FXSAVE + asm volatile("fxrstor %0" : : "m" (scp->xstate->hw_state), \ + "a" (eax), "d" (edx)); + break; + default: // XSAVE, XSAVEOPT, XSAVEC, XSAVES + asm volatile("xrstor %0" : : "m" (scp->xstate->hw_state), \ + "a" (eax), "d" (edx)); + break; + } + } + } + else +#endif + if (scp->sc_fpused) + /* Restore the FPU state. Mach conveniently stores the state + in the format the i387 `frstor' instruction uses to restore it. */ + asm volatile ("frstor %0" : : "m" (scp->sc_fpsave)); /* Copy the registers onto the user's stack, to be able to release the altstack (by unlocking sigstate). Note that unless an altstack is used, diff --git a/sysdeps/unix/sysv/linux/bits/fcntl-linux.h b/sysdeps/unix/sysv/linux/bits/fcntl-linux.h index dfc554a..f425a4b 100644 --- a/sysdeps/unix/sysv/linux/bits/fcntl-linux.h +++ b/sysdeps/unix/sysv/linux/bits/fcntl-linux.h @@ -379,6 +379,8 @@ struct file_handle identity and may not be usable to open_by_handle_at. */ +# define AT_HANDLE_MNT_ID_UNIQUE 1 /* Return the 64-bit unique mount + ID. */ #endif __BEGIN_DECLS diff --git a/sysdeps/x86_64/Makefile b/sysdeps/x86_64/Makefile index 9d31685..5723ec1 100644 --- a/sysdeps/x86_64/Makefile +++ b/sysdeps/x86_64/Makefile @@ -142,7 +142,6 @@ CFLAGS-tst-avxmod.c += $(AVX-CFLAGS) AVX512-CFLAGS = -mavx512f CFLAGS-tst-audit10-aux.c += $(AVX512-CFLAGS) CFLAGS-tst-auditmod10a.c += $(AVX512-CFLAGS) -CFLAGS-tst-auditmod10b.c += $(AVX512-CFLAGS) CFLAGS-tst-avx512-aux.c += $(AVX512-CFLAGS) CFLAGS-tst-avx512mod.c += $(AVX512-CFLAGS) diff --git a/sysdeps/x86_64/tst-auditmod10b.c b/sysdeps/x86_64/tst-auditmod10b.c index 6eb21b6..0b994ef 100644 --- a/sysdeps/x86_64/tst-auditmod10b.c +++ b/sysdeps/x86_64/tst-auditmod10b.c @@ -125,7 +125,6 @@ la_symbind64 (Elf64_Sym *sym, unsigned int ndx, uintptr_t *refcook, #include <tst-audit.h> -#ifdef __AVX512F__ #include <immintrin.h> #include <cpuid.h> @@ -148,9 +147,37 @@ check_avx512 (void) return (eax & 0xe6) == 0xe6; } -#else -#include <emmintrin.h> -#endif +void +__attribute__ ((target ("avx512f"))) +pltenter_avx512f (La_regs *regs, long int *framesizep) +{ + __m512i zero = _mm512_setzero_si512 (); + if (memcmp (®s->lr_vector[0], &zero, sizeof (zero)) + || memcmp (®s->lr_vector[1], &zero, sizeof (zero)) + || memcmp (®s->lr_vector[2], &zero, sizeof (zero)) + || memcmp (®s->lr_vector[3], &zero, sizeof (zero)) + || memcmp (®s->lr_vector[4], &zero, sizeof (zero)) + || memcmp (®s->lr_vector[5], &zero, sizeof (zero)) + || memcmp (®s->lr_vector[6], &zero, sizeof (zero)) + || memcmp (®s->lr_vector[7], &zero, sizeof (zero))) + abort (); + + for (int i = 0; i < 8; i++) + regs->lr_vector[i].zmm[0] + = (La_x86_64_zmm) _mm512_set1_epi64 (i + 1); + + __m512i zmm = _mm512_set1_epi64 (-1); + asm volatile ("vmovdqa64 %0, %%zmm0" : : "x" (zmm) : "xmm0" ); + asm volatile ("vmovdqa64 %0, %%zmm1" : : "x" (zmm) : "xmm1" ); + asm volatile ("vmovdqa64 %0, %%zmm2" : : "x" (zmm) : "xmm2" ); + asm volatile ("vmovdqa64 %0, %%zmm3" : : "x" (zmm) : "xmm3" ); + asm volatile ("vmovdqa64 %0, %%zmm4" : : "x" (zmm) : "xmm4" ); + asm volatile ("vmovdqa64 %0, %%zmm5" : : "x" (zmm) : "xmm5" ); + asm volatile ("vmovdqa64 %0, %%zmm6" : : "x" (zmm) : "xmm6" ); + asm volatile ("vmovdqa64 %0, %%zmm7" : : "x" (zmm) : "xmm7" ); + + *framesizep = 1024; +} ElfW(Addr) pltenter (ElfW(Sym) *sym, unsigned int ndx, uintptr_t *refcook, @@ -160,39 +187,33 @@ pltenter (ElfW(Sym) *sym, unsigned int ndx, uintptr_t *refcook, printf ("pltenter: symname=%s, st_value=%#lx, ndx=%u, flags=%u\n", symname, (long int) sym->st_value, ndx, *flags); -#ifdef __AVX512F__ if (check_avx512 () && strcmp (symname, "audit_test") == 0) + pltenter_avx512f (regs, framesizep); + + return sym->st_value; +} + +void +__attribute__ ((target ("avx512f"))) +pltexit_avx512f (const La_regs *inregs, La_retval *outregs) +{ + __m512i zero = _mm512_setzero_si512 (); + if (memcmp (&outregs->lrv_vector0, &zero, sizeof (zero))) + abort (); + + for (int i = 0; i < 8; i++) { - __m512i zero = _mm512_setzero_si512 (); - if (memcmp (®s->lr_vector[0], &zero, sizeof (zero)) - || memcmp (®s->lr_vector[1], &zero, sizeof (zero)) - || memcmp (®s->lr_vector[2], &zero, sizeof (zero)) - || memcmp (®s->lr_vector[3], &zero, sizeof (zero)) - || memcmp (®s->lr_vector[4], &zero, sizeof (zero)) - || memcmp (®s->lr_vector[5], &zero, sizeof (zero)) - || memcmp (®s->lr_vector[6], &zero, sizeof (zero)) - || memcmp (®s->lr_vector[7], &zero, sizeof (zero))) - abort (); - - for (int i = 0; i < 8; i++) - regs->lr_vector[i].zmm[0] - = (La_x86_64_zmm) _mm512_set1_epi64 (i + 1); - - __m512i zmm = _mm512_set1_epi64 (-1); - asm volatile ("vmovdqa64 %0, %%zmm0" : : "x" (zmm) : "xmm0" ); - asm volatile ("vmovdqa64 %0, %%zmm1" : : "x" (zmm) : "xmm1" ); - asm volatile ("vmovdqa64 %0, %%zmm2" : : "x" (zmm) : "xmm2" ); - asm volatile ("vmovdqa64 %0, %%zmm3" : : "x" (zmm) : "xmm3" ); - asm volatile ("vmovdqa64 %0, %%zmm4" : : "x" (zmm) : "xmm4" ); - asm volatile ("vmovdqa64 %0, %%zmm5" : : "x" (zmm) : "xmm5" ); - asm volatile ("vmovdqa64 %0, %%zmm6" : : "x" (zmm) : "xmm6" ); - asm volatile ("vmovdqa64 %0, %%zmm7" : : "x" (zmm) : "xmm7" ); - - *framesizep = 1024; + __m512i zmm = _mm512_set1_epi64 (i + 1); + if (memcmp (&inregs->lr_vector[i], &zmm, sizeof (zmm)) != 0) + abort (); } -#endif - return sym->st_value; + outregs->lrv_vector0.zmm[0] + = (La_x86_64_zmm) _mm512_set1_epi64 (0x12349876); + + __m512i zmm = _mm512_set1_epi64 (-1); + asm volatile ("vmovdqa64 %0, %%zmm0" : : "x" (zmm) : "xmm0" ); + asm volatile ("vmovdqa64 %0, %%zmm1" : : "x" (zmm) : "xmm1" ); } unsigned int @@ -204,28 +225,8 @@ pltexit (ElfW(Sym) *sym, unsigned int ndx, uintptr_t *refcook, symname, (long int) sym->st_value, ndx, (ptrdiff_t) outregs->int_retval); -#ifdef __AVX512F__ if (check_avx512 () && strcmp (symname, "audit_test") == 0) - { - __m512i zero = _mm512_setzero_si512 (); - if (memcmp (&outregs->lrv_vector0, &zero, sizeof (zero))) - abort (); - - for (int i = 0; i < 8; i++) - { - __m512i zmm = _mm512_set1_epi64 (i + 1); - if (memcmp (&inregs->lr_vector[i], &zmm, sizeof (zmm)) != 0) - abort (); - } - - outregs->lrv_vector0.zmm[0] - = (La_x86_64_zmm) _mm512_set1_epi64 (0x12349876); - - __m512i zmm = _mm512_set1_epi64 (-1); - asm volatile ("vmovdqa64 %0, %%zmm0" : : "x" (zmm) : "xmm0" ); - asm volatile ("vmovdqa64 %0, %%zmm1" : : "x" (zmm) : "xmm1" ); - } -#endif + pltexit_avx512f (inregs, outregs); return 0; } |