diff options
Diffstat (limited to 'sysdeps/i386')
-rw-r--r-- | sysdeps/i386/Makefile | 40 | ||||
-rw-r--r-- | sysdeps/i386/Versions | 5 | ||||
-rw-r--r-- | sysdeps/i386/dl-tls-get-addr.c | 68 | ||||
-rw-r--r-- | sysdeps/i386/dl-tls.h | 28 | ||||
-rw-r--r-- | sysdeps/i386/dl-tlsdesc-dynamic.h | 108 | ||||
-rw-r--r-- | sysdeps/i386/dl-tlsdesc.S | 17 | ||||
-rw-r--r-- | sysdeps/i386/fpu/e_ilogb.S | 41 | ||||
-rw-r--r-- | sysdeps/i386/fpu/e_ilogbf.S | 41 | ||||
-rw-r--r-- | sysdeps/i386/fpu/math_err.c | 1 | ||||
-rw-r--r-- | sysdeps/i386/nptl/rseq-access.h | 59 | ||||
-rw-r--r-- | sysdeps/i386/nptl/tcb-access.h | 112 | ||||
-rw-r--r-- | sysdeps/i386/nptl/tls.h | 17 | ||||
-rw-r--r-- | sysdeps/i386/stackinfo.h | 6 | ||||
-rw-r--r-- | sysdeps/i386/tls-get-addr-wrapper.h | 127 | ||||
-rw-r--r-- | sysdeps/i386/tls_get_addr.S | 57 | ||||
-rw-r--r-- | sysdeps/i386/tls_get_addr.h | 42 |
16 files changed, 371 insertions, 398 deletions
diff --git a/sysdeps/i386/Makefile b/sysdeps/i386/Makefile index a2e8c0b..4fbaaa2 100644 --- a/sysdeps/i386/Makefile +++ b/sysdeps/i386/Makefile @@ -17,20 +17,25 @@ ifeq ($(subdir),gmon) sysdep_routines += i386-mcount endif -ifeq ($(subdir),elf) -CFLAGS-rtld.c += -Wno-uninitialized -Wno-unused -CFLAGS-dl-load.c += -Wno-unused -CFLAGS-dl-reloc.c += -Wno-unused -endif - ifeq ($(subdir),csu) gen-as-const-headers += link-defines.sym +gen-as-const-headers += tlsdesc.sym else stack-align-test-flags += -malign-double endif +# Make sure no code in ld.so uses mm/xmm/ymm/zmm registers on i386 since +# the first 3 mm/xmm/ymm/zmm registers are used to pass vector parameters +# which must be preserved. +# With SSE disabled, ensure -fpmath is not set to use sse either. +rtld-CFLAGS += -mno-sse -mno-mmx -mfpmath=387 ifeq ($(subdir),elf) -sysdep-dl-routines += tlsdesc dl-tlsdesc +CFLAGS-rtld.c += -Wno-uninitialized -Wno-unused +CFLAGS-dl-load.c += -Wno-unused +CFLAGS-dl-reloc.c += -Wno-unused +sysdep-dl-routines += \ + dl-tls-get-addr \ +# sysdep-dl-routines tests += tst-audit3 modules-names += tst-auditmod3a tst-auditmod3b @@ -38,18 +43,6 @@ modules-names += tst-auditmod3a tst-auditmod3b $(objpfx)tst-audit3: $(objpfx)tst-auditmod3a.so $(objpfx)tst-audit3.out: $(objpfx)tst-auditmod3b.so tst-audit3-ENV = LD_AUDIT=$(objpfx)tst-auditmod3b.so -endif - -ifeq ($(subdir),csu) -gen-as-const-headers += tlsdesc.sym -endif - -# Make sure no code in ld.so uses mm/xmm/ymm/zmm registers on i386 since -# the first 3 mm/xmm/ymm/zmm registers are used to pass vector parameters -# which must be preserved. -# With SSE disabled, ensure -fpmath is not set to use sse either. -rtld-CFLAGS += -mno-sse -mno-mmx -mfpmath=387 -ifeq ($(subdir),elf) CFLAGS-.os += $(if $(filter $(@F),$(patsubst %,%.os,$(all-rtld-routines))),\ $(rtld-CFLAGS)) @@ -58,6 +51,15 @@ $(objpfx)tst-ld-sse-use.out: ../sysdeps/i386/tst-ld-sse-use.sh $(objpfx)ld.so @echo "Checking ld.so for SSE register use. This will take a few seconds..." $(BASH) $< $(objpfx) '$(NM)' '$(OBJDUMP)' '$(READELF)' > $@; \ $(evaluate-test) + +tests-special += $(objpfx)check-gnu-tls.out + +$(objpfx)check-gnu-tls.out: $(common-objpfx)libc.so + LC_ALL=C $(READELF) -V -W $< \ + | sed -ne '/.gnu.version_d/, /.gnu.version_r/ p' \ + | grep GLIBC_ABI_GNU_TLS > $@; \ + $(evaluate-test) +generated += check-gnu-tls.out else CFLAGS-.os += $(if $(filter rtld-%.os,$(@F)), $(rtld-CFLAGS)) endif diff --git a/sysdeps/i386/Versions b/sysdeps/i386/Versions index 36e23b4..9c84c8e 100644 --- a/sysdeps/i386/Versions +++ b/sysdeps/i386/Versions @@ -28,6 +28,11 @@ libc { GLIBC_2.13 { __fentry__; } + GLIBC_ABI_GNU_TLS { + # This symbol is used only for empty version map and will be removed + # by scripts/versions.awk. + __placeholder_only_for_empty_version_map; + } } libm { GLIBC_2.1 { diff --git a/sysdeps/i386/dl-tls-get-addr.c b/sysdeps/i386/dl-tls-get-addr.c new file mode 100644 index 0000000..c97e5c5 --- /dev/null +++ b/sysdeps/i386/dl-tls-get-addr.c @@ -0,0 +1,68 @@ +/* Ifunc selector for ___tls_get_addr. + Copyright (C) 2025 Free Software Foundation, Inc. + This file is part of the GNU C Library. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library; if not, see + <https://www.gnu.org/licenses/>. */ + +#ifdef SHARED +# define ___tls_get_addr __redirect____tls_get_addr +# include <dl-tls.h> +# undef ___tls_get_addr +# undef __tls_get_addr + +# define SYMBOL_NAME ___tls_get_addr +# include <init-arch.h> + +extern __typeof (REDIRECT_NAME) OPTIMIZE (fnsave) attribute_hidden; +extern __typeof (REDIRECT_NAME) OPTIMIZE (fxsave) attribute_hidden; +extern __typeof (REDIRECT_NAME) OPTIMIZE (xsave) attribute_hidden; +extern __typeof (REDIRECT_NAME) OPTIMIZE (xsavec) attribute_hidden; + +static inline void * +IFUNC_SELECTOR (void) +{ + const struct cpu_features* cpu_features = __get_cpu_features (); + + if (cpu_features->xsave_state_size != 0) + { + if (CPU_FEATURE_USABLE_P (cpu_features, XSAVEC)) + return OPTIMIZE (xsavec); + else + return OPTIMIZE (xsave); + } + else if (CPU_FEATURE_USABLE_P (cpu_features, FXSR)) + return OPTIMIZE (fxsave); + return OPTIMIZE (fnsave); +} + +libc_ifunc_redirected (__redirect____tls_get_addr, ___tls_get_addr, + IFUNC_SELECTOR ()); + +/* The special thing about the x86 TLS ABI is that we have two + variants of the __tls_get_addr function with different calling + conventions. The GNU version, which we are mostly concerned here, + takes the parameter in a register. The name is changed by adding + an additional underscore at the beginning. The Sun version uses + the normal calling convention. */ + +rtld_hidden_proto (___tls_get_addr) +rtld_hidden_def (___tls_get_addr) + +void * +__tls_get_addr (tls_index *ti) +{ + return ___tls_get_addr (ti); +} +#endif diff --git a/sysdeps/i386/dl-tls.h b/sysdeps/i386/dl-tls.h index f453931..ef605c5 100644 --- a/sysdeps/i386/dl-tls.h +++ b/sysdeps/i386/dl-tls.h @@ -37,34 +37,14 @@ typedef struct dl_tls_index /* This is the prototype for the GNU version. */ extern void *___tls_get_addr (tls_index *ti) __attribute__ ((__regparm__ (1))); -extern void *___tls_get_addr_internal (tls_index *ti) - __attribute__ ((__regparm__ (1))) attribute_hidden; - # if IS_IN (rtld) -/* The special thing about the x86 TLS ABI is that we have two - variants of the __tls_get_addr function with different calling - conventions. The GNU version, which we are mostly concerned here, - takes the parameter in a register. The name is changed by adding - an additional underscore at the beginning. The Sun version uses - the normal calling convention. */ -void * -__tls_get_addr (tls_index *ti) -{ - return ___tls_get_addr_internal (ti); -} - - /* Prepare using the definition of __tls_get_addr in the generic version of this file. */ -# define __tls_get_addr __attribute__ ((__regparm__ (1))) ___tls_get_addr -strong_alias (___tls_get_addr, ___tls_get_addr_internal) -rtld_hidden_proto (___tls_get_addr) -rtld_hidden_def (___tls_get_addr) -#else - +# define __tls_get_addr \ + __attribute__ ((__regparm__ (1))) ___tls_get_addr_internal +# else /* Users should get the better interface. */ -# define __tls_get_addr ___tls_get_addr - +# define __tls_get_addr ___tls_get_addr # endif #endif diff --git a/sysdeps/i386/dl-tlsdesc-dynamic.h b/sysdeps/i386/dl-tlsdesc-dynamic.h index 6aec06d..be9ecd6 100644 --- a/sysdeps/i386/dl-tlsdesc-dynamic.h +++ b/sysdeps/i386/dl-tlsdesc-dynamic.h @@ -16,34 +16,6 @@ License along with the GNU C Library; if not, see <https://www.gnu.org/licenses/>. */ -#undef REGISTER_SAVE_AREA - -#if !defined USE_FNSAVE && (STATE_SAVE_ALIGNMENT % 16) != 0 -# error STATE_SAVE_ALIGNMENT must be multiple of 16 -#endif - -#if DL_RUNTIME_RESOLVE_REALIGN_STACK -# ifdef USE_FNSAVE -# error USE_FNSAVE shouldn't be defined -# endif -# ifdef USE_FXSAVE -/* Use fxsave to save all registers. */ -# define REGISTER_SAVE_AREA 512 -# endif -#else -# ifdef USE_FNSAVE -/* Use fnsave to save x87 FPU stack registers. */ -# define REGISTER_SAVE_AREA 108 -# else -# ifndef USE_FXSAVE -# error USE_FXSAVE must be defined -# endif -/* Use fxsave to save all registers. Add 12 bytes to align the stack - to 16 bytes. */ -# define REGISTER_SAVE_AREA (512 + 12) -# endif -#endif - .hidden _dl_tlsdesc_dynamic .global _dl_tlsdesc_dynamic .type _dl_tlsdesc_dynamic,@function @@ -104,85 +76,7 @@ _dl_tlsdesc_dynamic: ret .p2align 4,,7 2: - cfi_adjust_cfa_offset (32) -#if DL_RUNTIME_RESOLVE_REALIGN_STACK - movl %ebx, -28(%esp) - movl %esp, %ebx - cfi_def_cfa_register(%ebx) - and $-STATE_SAVE_ALIGNMENT, %esp -#endif -#ifdef REGISTER_SAVE_AREA - subl $REGISTER_SAVE_AREA, %esp -# if !DL_RUNTIME_RESOLVE_REALIGN_STACK - cfi_adjust_cfa_offset(REGISTER_SAVE_AREA) -# endif -#else -# if !DL_RUNTIME_RESOLVE_REALIGN_STACK -# error DL_RUNTIME_RESOLVE_REALIGN_STACK must be true -# endif - /* Allocate stack space of the required size to save the state. */ - LOAD_PIC_REG (cx) - subl RTLD_GLOBAL_RO_DL_X86_CPU_FEATURES_OFFSET+XSAVE_STATE_SIZE_OFFSET+_rtld_local_ro@GOTOFF(%ecx), %esp -#endif -#ifdef USE_FNSAVE - fnsave (%esp) -#elif defined USE_FXSAVE - fxsave (%esp) -#else - /* Save the argument for ___tls_get_addr in EAX. */ - movl %eax, %ecx - movl $TLSDESC_CALL_STATE_SAVE_MASK, %eax - xorl %edx, %edx - /* Clear the XSAVE Header. */ -# ifdef USE_XSAVE - movl %edx, (512)(%esp) - movl %edx, (512 + 4 * 1)(%esp) - movl %edx, (512 + 4 * 2)(%esp) - movl %edx, (512 + 4 * 3)(%esp) -# endif - movl %edx, (512 + 4 * 4)(%esp) - movl %edx, (512 + 4 * 5)(%esp) - movl %edx, (512 + 4 * 6)(%esp) - movl %edx, (512 + 4 * 7)(%esp) - movl %edx, (512 + 4 * 8)(%esp) - movl %edx, (512 + 4 * 9)(%esp) - movl %edx, (512 + 4 * 10)(%esp) - movl %edx, (512 + 4 * 11)(%esp) - movl %edx, (512 + 4 * 12)(%esp) - movl %edx, (512 + 4 * 13)(%esp) - movl %edx, (512 + 4 * 14)(%esp) - movl %edx, (512 + 4 * 15)(%esp) -# ifdef USE_XSAVE - xsave (%esp) -# else - xsavec (%esp) -# endif - /* Restore the argument for ___tls_get_addr in EAX. */ - movl %ecx, %eax -#endif - call HIDDEN_JUMPTARGET (___tls_get_addr) - /* Get register content back. */ -#ifdef USE_FNSAVE - frstor (%esp) -#elif defined USE_FXSAVE - fxrstor (%esp) -#else - /* Save and retore ___tls_get_addr return value stored in EAX. */ - movl %eax, %ecx - movl $TLSDESC_CALL_STATE_SAVE_MASK, %eax - xorl %edx, %edx - xrstor (%esp) - movl %ecx, %eax -#endif -#if DL_RUNTIME_RESOLVE_REALIGN_STACK - mov %ebx, %esp - cfi_def_cfa_register(%esp) - movl -28(%esp), %ebx - cfi_restore(%ebx) -#else - addl $REGISTER_SAVE_AREA, %esp - cfi_adjust_cfa_offset(-REGISTER_SAVE_AREA) -#endif +#include "tls-get-addr-wrapper.h" jmp 1b cfi_endproc .size _dl_tlsdesc_dynamic, .-_dl_tlsdesc_dynamic diff --git a/sysdeps/i386/dl-tlsdesc.S b/sysdeps/i386/dl-tlsdesc.S index c080993..c914ca4 100644 --- a/sysdeps/i386/dl-tlsdesc.S +++ b/sysdeps/i386/dl-tlsdesc.S @@ -22,23 +22,6 @@ #include <features-offsets.h> #include "tlsdesc.h" -#ifndef DL_STACK_ALIGNMENT -/* Due to GCC bug: - - https://gcc.gnu.org/bugzilla/show_bug.cgi?id=58066 - - __tls_get_addr may be called with 4-byte stack alignment. Although - this bug has been fixed in GCC 4.9.4, 5.3 and 6, we can't assume - that stack will be always aligned at 16 bytes. */ -# define DL_STACK_ALIGNMENT 4 -#endif - -/* True if _dl_tlsdesc_dynamic should align stack for STATE_SAVE or align - stack to MINIMUM_ALIGNMENT bytes before calling ___tls_get_addr. */ -#define DL_RUNTIME_RESOLVE_REALIGN_STACK \ - (STATE_SAVE_ALIGNMENT > DL_STACK_ALIGNMENT \ - || MINIMUM_ALIGNMENT > DL_STACK_ALIGNMENT) - .text /* This function is used to compute the TP offset for symbols in diff --git a/sysdeps/i386/fpu/e_ilogb.S b/sysdeps/i386/fpu/e_ilogb.S deleted file mode 100644 index f4b792c..0000000 --- a/sysdeps/i386/fpu/e_ilogb.S +++ /dev/null @@ -1,41 +0,0 @@ -/* - * Public domain. - */ - -#include <machine/asm.h> - -RCSID("$NetBSD: s_ilogb.S,v 1.5 1995/10/12 15:53:09 jtc Exp $") - -ENTRY(__ieee754_ilogb) - fldl 4(%esp) -/* I added the following ugly construct because ilogb(+-Inf) is - required to return INT_MAX in ISO C99. - -- jakub@redhat.com. */ - fxam /* Is NaN or +-Inf? */ - fstsw %ax - movb $0x45, %dh - andb %ah, %dh - cmpb $0x05, %dh - je 1f /* Is +-Inf, jump. */ - cmpb $0x40, %dh - je 2f /* Is +-0, jump. */ - - fxtract - pushl %eax - cfi_adjust_cfa_offset (4) - fstp %st - - fistpl (%esp) - fwait - popl %eax - cfi_adjust_cfa_offset (-4) - - ret - -1: fstp %st - movl $0x7fffffff, %eax - ret -2: fstp %st - movl $0x80000000, %eax /* FP_ILOGB0 */ - ret -END (__ieee754_ilogb) diff --git a/sysdeps/i386/fpu/e_ilogbf.S b/sysdeps/i386/fpu/e_ilogbf.S deleted file mode 100644 index 37298b9..0000000 --- a/sysdeps/i386/fpu/e_ilogbf.S +++ /dev/null @@ -1,41 +0,0 @@ -/* - * Public domain. - */ - -#include <machine/asm.h> - -RCSID("$NetBSD: s_ilogbf.S,v 1.4 1995/10/22 20:32:43 pk Exp $") - -ENTRY(__ieee754_ilogbf) - flds 4(%esp) -/* I added the following ugly construct because ilogb(+-Inf) is - required to return INT_MAX in ISO C99. - -- jakub@redhat.com. */ - fxam /* Is NaN or +-Inf? */ - fstsw %ax - movb $0x45, %dh - andb %ah, %dh - cmpb $0x05, %dh - je 1f /* Is +-Inf, jump. */ - cmpb $0x40, %dh - je 2f /* Is +-0, jump. */ - - fxtract - pushl %eax - cfi_adjust_cfa_offset (4) - fstp %st - - fistpl (%esp) - fwait - popl %eax - cfi_adjust_cfa_offset (-4) - - ret - -1: fstp %st - movl $0x7fffffff, %eax - ret -2: fstp %st - movl $0x80000000, %eax /* FP_ILOGB0 */ - ret -END (__ieee754_ilogbf) diff --git a/sysdeps/i386/fpu/math_err.c b/sysdeps/i386/fpu/math_err.c deleted file mode 100644 index 1cc8931..0000000 --- a/sysdeps/i386/fpu/math_err.c +++ /dev/null @@ -1 +0,0 @@ -/* Not needed. */ diff --git a/sysdeps/i386/nptl/rseq-access.h b/sysdeps/i386/nptl/rseq-access.h index 5e7e09d..ee78c61 100644 --- a/sysdeps/i386/nptl/rseq-access.h +++ b/sysdeps/i386/nptl/rseq-access.h @@ -16,28 +16,6 @@ License along with the GNU C Library; if not, see <https://www.gnu.org/licenses/>. */ -#define __RSEQ_GETMEM(member) \ - ({ __typeof (RSEQ_SELF()->member) __value; \ - if (sizeof (__value) == 1) \ - asm volatile ("movb %%gs:%P2(%3),%b0" \ - : "=q" (__value) \ - : "0" (0), "i" (offsetof (struct rseq_area, member)), \ - "r" (__rseq_offset)); \ - else if (sizeof (__value) == 4) \ - asm volatile ("movl %%gs:%P1(%2),%0" \ - : "=r" (__value) \ - : "i" (offsetof (struct rseq_area, member)), \ - "r" (__rseq_offset)); \ - else /* 8 */ \ - { \ - asm volatile ("movl %%gs:%P1(%2),%%eax\n\t" \ - "movl %%gs:4+%P1(%2),%%edx" \ - : "=&A" (__value) \ - : "i" (offsetof (struct rseq_area, member)), \ - "r" (__rseq_offset)); \ - } \ - __value; }) - /* Read member of the RSEQ area directly. */ #define RSEQ_GETMEM(member) \ ({ \ @@ -45,7 +23,9 @@ || sizeof (RSEQ_SELF()->member) == 4 \ || sizeof (RSEQ_SELF()->member) == 8, \ "size of rseq data"); \ - __RSEQ_GETMEM(member); }) + (*(__typeof (RSEQ_SELF()->member) __seg_gs *) \ + (__rseq_offset + offsetof (struct rseq_area, member))); \ + }) /* Read member of the RSEQ area directly, with single-copy atomicity semantics. Static assert for types >= 64 bits since they can't be loaded atomically on @@ -55,28 +35,9 @@ _Static_assert (sizeof (RSEQ_SELF()->member) == 1 \ || sizeof (RSEQ_SELF()->member) == 4, \ "size of rseq data"); \ - __RSEQ_GETMEM(member); }) - -#define __RSEQ_SETMEM(member, value) \ - ({ \ - if (sizeof (RSEQ_SELF()->member) == 1) \ - asm volatile ("movb %b0,%%gs:%P1(%2)" : \ - : "iq" (value), \ - "i" (offsetof (struct rseq_area, member)), \ - "r" (__rseq_offset)); \ - else if (sizeof (RSEQ_SELF()->member) == 4) \ - asm volatile ("movl %0,%%gs:%P1(%2)" : \ - : "ir" (value), \ - "i" (offsetof (struct rseq_area, member)), \ - "r" (__rseq_offset)); \ - else /* 8 */ \ - { \ - asm volatile ("movl %%eax,%%gs:%P1(%2)\n\t" \ - "movl %%edx,%%gs:4+%P1(%2)" : \ - : "A" ((uint64_t) cast_to_integer (value)), \ - "i" (offsetof (struct rseq_area, member)), \ - "r" (__rseq_offset)); \ - }}) + (*(volatile __typeof (RSEQ_SELF()->member) __seg_gs *) \ + (__rseq_offset + offsetof (struct rseq_area, member))); \ + }) /* Set member of the RSEQ area directly. */ #define RSEQ_SETMEM(member, value) \ @@ -85,7 +46,9 @@ || sizeof (RSEQ_SELF()->member) == 4 \ || sizeof (RSEQ_SELF()->member) == 8, \ "size of rseq data"); \ - __RSEQ_SETMEM(member, value); }) + (*(__typeof (RSEQ_SELF()->member) __seg_gs *) \ + (__rseq_offset + offsetof (struct rseq_area, member)) = (value)); \ + }) /* Set member of the RSEQ area directly, with single-copy atomicity semantics. Static assert for types >= 64 bits since they can't be stored atomically on @@ -95,4 +58,6 @@ _Static_assert (sizeof (RSEQ_SELF()->member) == 1 \ || sizeof (RSEQ_SELF()->member) == 4, \ "size of rseq data"); \ - __RSEQ_SETMEM(member, value); }) + (*(volatile __typeof (RSEQ_SELF()->member) __seg_gs *) \ + (__rseq_offset + offsetof (struct rseq_area, member)) = (value)); \ + }) diff --git a/sysdeps/i386/nptl/tcb-access.h b/sysdeps/i386/nptl/tcb-access.h index dc84dfe..2a7f9d2 100644 --- a/sysdeps/i386/nptl/tcb-access.h +++ b/sysdeps/i386/nptl/tcb-access.h @@ -18,60 +18,35 @@ /* Read member of the thread descriptor directly. */ #define THREAD_GETMEM(descr, member) \ - ({ __typeof (descr->member) __value; \ - _Static_assert (sizeof (__value) == 1 \ - || sizeof (__value) == 4 \ - || sizeof (__value) == 8, \ + ({ \ + _Static_assert (sizeof (descr->member) == 1 \ + || sizeof (descr->member) == 4 \ + || sizeof (descr->member) == 8, \ "size of per-thread data"); \ - if (sizeof (__value) == 1) \ - asm volatile ("movb %%gs:%P2,%b0" \ - : "=q" (__value) \ - : "0" (0), "i" (offsetof (struct pthread, member))); \ - else if (sizeof (__value) == 4) \ - asm volatile ("movl %%gs:%P1,%0" \ - : "=r" (__value) \ - : "i" (offsetof (struct pthread, member))); \ - else /* 8 */ \ - { \ - asm volatile ("movl %%gs:%P1,%%eax\n\t" \ - "movl %%gs:%P2,%%edx" \ - : "=A" (__value) \ - : "i" (offsetof (struct pthread, member)), \ - "i" (offsetof (struct pthread, member) + 4)); \ - } \ - __value; }) + (*(__typeof (descr->member) __seg_gs *) \ + offsetof (struct pthread, member)); \ + }) -/* THREAD_GETMEM already forces a read. */ -#define THREAD_GETMEM_VOLATILE(descr, member) THREAD_GETMEM (descr, member) +#define THREAD_GETMEM_VOLATILE(descr, member) \ + ({ \ + _Static_assert (sizeof (descr->member) == 1 \ + || sizeof (descr->member) == 4 \ + || sizeof (descr->member) == 8, \ + "size of per-thread data"); \ + (*(volatile __typeof (descr->member) __seg_gs *) \ + offsetof (struct pthread, member)); \ + }) /* Same as THREAD_GETMEM, but the member offset can be non-constant. */ #define THREAD_GETMEM_NC(descr, member, idx) \ - ({ __typeof (descr->member[0]) __value; \ - _Static_assert (sizeof (__value) == 1 \ - || sizeof (__value) == 4 \ - || sizeof (__value) == 8, \ + ({ \ + _Static_assert (sizeof (descr->member[0]) == 1 \ + || sizeof (descr->member[0]) == 4 \ + || sizeof (descr->member[0]) == 8, \ "size of per-thread data"); \ - if (sizeof (__value) == 1) \ - asm volatile ("movb %%gs:%P2(%3),%b0" \ - : "=q" (__value) \ - : "0" (0), "i" (offsetof (struct pthread, member[0])), \ - "r" (idx)); \ - else if (sizeof (__value) == 4) \ - asm volatile ("movl %%gs:%P1(,%2,4),%0" \ - : "=r" (__value) \ - : "i" (offsetof (struct pthread, member[0])), \ - "r" (idx)); \ - else /* 8 */ \ - { \ - asm volatile ("movl %%gs:%P1(,%2,8),%%eax\n\t" \ - "movl %%gs:4+%P1(,%2,8),%%edx" \ - : "=&A" (__value) \ - : "i" (offsetof (struct pthread, member[0])), \ - "r" (idx)); \ - } \ - __value; }) - - + (*(__typeof (descr->member[0]) __seg_gs *) \ + offsetof (struct pthread, member[idx])); \ + }) /* Set member of the thread descriptor directly. */ #define THREAD_SETMEM(descr, member, value) \ @@ -80,23 +55,9 @@ || sizeof (descr->member) == 4 \ || sizeof (descr->member) == 8, \ "size of per-thread data"); \ - if (sizeof (descr->member) == 1) \ - asm volatile ("movb %b0,%%gs:%P1" : \ - : "iq" (value), \ - "i" (offsetof (struct pthread, member))); \ - else if (sizeof (descr->member) == 4) \ - asm volatile ("movl %0,%%gs:%P1" : \ - : "ir" (value), \ - "i" (offsetof (struct pthread, member))); \ - else /* 8 */ \ - { \ - asm volatile ("movl %%eax,%%gs:%P1\n\t" \ - "movl %%edx,%%gs:%P2" : \ - : "A" ((uint64_t) cast_to_integer (value)), \ - "i" (offsetof (struct pthread, member)), \ - "i" (offsetof (struct pthread, member) + 4)); \ - }}) - + (*(__typeof (descr->member) __seg_gs *) \ + offsetof (struct pthread, member) = (value)); \ + }) /* Same as THREAD_SETMEM, but the member offset can be non-constant. */ #define THREAD_SETMEM_NC(descr, member, idx, value) \ @@ -105,21 +66,6 @@ || sizeof (descr->member[0]) == 4 \ || sizeof (descr->member[0]) == 8, \ "size of per-thread data"); \ - if (sizeof (descr->member[0]) == 1) \ - asm volatile ("movb %b0,%%gs:%P1(%2)" : \ - : "iq" (value), \ - "i" (offsetof (struct pthread, member)), \ - "r" (idx)); \ - else if (sizeof (descr->member[0]) == 4) \ - asm volatile ("movl %0,%%gs:%P1(,%2,4)" : \ - : "ir" (value), \ - "i" (offsetof (struct pthread, member)), \ - "r" (idx)); \ - else /* 8 */ \ - { \ - asm volatile ("movl %%eax,%%gs:%P1(,%2,8)\n\t" \ - "movl %%edx,%%gs:4+%P1(,%2,8)" : \ - : "A" ((uint64_t) cast_to_integer (value)), \ - "i" (offsetof (struct pthread, member)), \ - "r" (idx)); \ - }}) + (*(__typeof (descr->member[0]) __seg_gs *) \ + offsetof (struct pthread, member[idx]) = (value)); \ + }) diff --git a/sysdeps/i386/nptl/tls.h b/sysdeps/i386/nptl/tls.h index 01eaa65..35202b0 100644 --- a/sysdeps/i386/nptl/tls.h +++ b/sysdeps/i386/nptl/tls.h @@ -221,22 +221,9 @@ tls_fill_user_desc (union user_desc_init *desc, THREAD_GETMEM (__pd, header.dtv); }) -/* Return the thread descriptor for the current thread. - - The contained asm must *not* be marked volatile since otherwise - assignments like - pthread_descr self = thread_self(); - do not get optimized away. */ -# if __GNUC_PREREQ (6, 0) -# define THREAD_SELF \ +/* Return the thread descriptor for the current thread. */ +# define THREAD_SELF \ (*(struct pthread *__seg_gs *) offsetof (struct pthread, header.self)) -# else -# define THREAD_SELF \ - ({ struct pthread *__self; \ - asm ("movl %%gs:%c1,%0" : "=r" (__self) \ - : "i" (offsetof (struct pthread, header.self))); \ - __self;}) -# endif /* Magic for libthread_db to know how to do THREAD_SELF. */ # define DB_THREAD_SELF \ diff --git a/sysdeps/i386/stackinfo.h b/sysdeps/i386/stackinfo.h index 74e8227..8d7a46c 100644 --- a/sysdeps/i386/stackinfo.h +++ b/sysdeps/i386/stackinfo.h @@ -26,9 +26,9 @@ /* On x86 the stack grows down. */ #define _STACK_GROWS_DOWN 1 -/* Default to an executable stack. PF_X can be overridden if PT_GNU_STACK is - * present, but it is presumed absent. */ -#define DEFAULT_STACK_PERMS (PF_R|PF_W|PF_X) +/* Default to an executable stack. PROT_EXEC can be overridden if PT_GNU_STACK + * is present, but it is presumed absent. */ +#define DEFAULT_STACK_PROT_PERMS (PROT_READ|PROT_WRITE|PROT_EXEC) /* Access to the stack pointer. The macros are used in alloca_account for which they need to act as barriers as well, hence the additional diff --git a/sysdeps/i386/tls-get-addr-wrapper.h b/sysdeps/i386/tls-get-addr-wrapper.h new file mode 100644 index 0000000..0708e5a --- /dev/null +++ b/sysdeps/i386/tls-get-addr-wrapper.h @@ -0,0 +1,127 @@ +/* Wrapper of i386 ___tls_get_addr to save and restore vector registers. + Copyright (C) 2025 Free Software Foundation, Inc. + This file is part of the GNU C Library. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library; if not, see + <https://www.gnu.org/licenses/>. */ + +#undef REGISTER_SAVE_AREA + +#if !defined USE_FNSAVE && (STATE_SAVE_ALIGNMENT % 16) != 0 +# error STATE_SAVE_ALIGNMENT must be multiple of 16 +#endif + +#if DL_RUNTIME_RESOLVE_REALIGN_STACK +# ifdef USE_FNSAVE +# error USE_FNSAVE shouldn't be defined +# endif +# ifdef USE_FXSAVE +/* Use fxsave to save all registers. */ +# define REGISTER_SAVE_AREA 512 +# endif +#else +# ifdef USE_FNSAVE +/* Use fnsave to save x87 FPU stack registers. */ +# define REGISTER_SAVE_AREA 108 +# else +# ifndef USE_FXSAVE +# error USE_FXSAVE must be defined +# endif +/* Use fxsave to save all registers. Add 12 bytes to align the stack + to 16 bytes. */ +# define REGISTER_SAVE_AREA (512 + 12) +# endif +#endif + +#if DL_RUNTIME_RESOLVE_REALIGN_STACK + movl %ebx, 28(%esp) + movl %esp, %ebx + cfi_def_cfa_register(%ebx) + and $-STATE_SAVE_ALIGNMENT, %esp +#endif +#ifdef REGISTER_SAVE_AREA + subl $REGISTER_SAVE_AREA, %esp +# if !DL_RUNTIME_RESOLVE_REALIGN_STACK + cfi_adjust_cfa_offset(REGISTER_SAVE_AREA) +# endif +#else +# if !DL_RUNTIME_RESOLVE_REALIGN_STACK +# error DL_RUNTIME_RESOLVE_REALIGN_STACK must be true +# endif + /* Allocate stack space of the required size to save the state. */ + LOAD_PIC_REG (cx) + subl RTLD_GLOBAL_RO_DL_X86_CPU_FEATURES_OFFSET \ + +XSAVE_STATE_SIZE_OFFSET+_rtld_local_ro@GOTOFF(%ecx), %esp +#endif +#ifdef USE_FNSAVE + fnsave (%esp) +#elif defined USE_FXSAVE + fxsave (%esp) +#else + /* Save the argument for ___tls_get_addr in EAX. */ + movl %eax, %ecx + movl $TLSDESC_CALL_STATE_SAVE_MASK, %eax + xorl %edx, %edx + /* Clear the XSAVE Header. */ +# ifdef USE_XSAVE + movl %edx, (512)(%esp) + movl %edx, (512 + 4 * 1)(%esp) + movl %edx, (512 + 4 * 2)(%esp) + movl %edx, (512 + 4 * 3)(%esp) +# endif + movl %edx, (512 + 4 * 4)(%esp) + movl %edx, (512 + 4 * 5)(%esp) + movl %edx, (512 + 4 * 6)(%esp) + movl %edx, (512 + 4 * 7)(%esp) + movl %edx, (512 + 4 * 8)(%esp) + movl %edx, (512 + 4 * 9)(%esp) + movl %edx, (512 + 4 * 10)(%esp) + movl %edx, (512 + 4 * 11)(%esp) + movl %edx, (512 + 4 * 12)(%esp) + movl %edx, (512 + 4 * 13)(%esp) + movl %edx, (512 + 4 * 14)(%esp) + movl %edx, (512 + 4 * 15)(%esp) +# ifdef USE_XSAVE + xsave (%esp) +# else + xsavec (%esp) +# endif + /* Restore the argument for ___tls_get_addr in EAX. */ + movl %ecx, %eax +#endif + call ___tls_get_addr_internal + /* Get register content back. */ +#ifdef USE_FNSAVE + frstor (%esp) +#elif defined USE_FXSAVE + fxrstor (%esp) +#else + /* Save and retore ___tls_get_addr return value stored in EAX. */ + movl %eax, %ecx + movl $TLSDESC_CALL_STATE_SAVE_MASK, %eax + xorl %edx, %edx + xrstor (%esp) + movl %ecx, %eax +#endif +#if DL_RUNTIME_RESOLVE_REALIGN_STACK + mov %ebx, %esp + cfi_def_cfa_register(%esp) + movl 28(%esp), %ebx + cfi_restore(%ebx) +#else + addl $REGISTER_SAVE_AREA, %esp + cfi_adjust_cfa_offset(-REGISTER_SAVE_AREA) +#endif + +#undef STATE_SAVE_ALIGNMENT diff --git a/sysdeps/i386/tls_get_addr.S b/sysdeps/i386/tls_get_addr.S new file mode 100644 index 0000000..7d143d8 --- /dev/null +++ b/sysdeps/i386/tls_get_addr.S @@ -0,0 +1,57 @@ +/* Thread-local storage handling in the ELF dynamic linker. i386 version. + Copyright (C) 2025 Free Software Foundation, Inc. + This file is part of the GNU C Library. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library; if not, see + <https://www.gnu.org/licenses/>. */ + +#include <sysdep.h> +#include <tls.h> +#include <cpu-features-offsets.h> +#include <features-offsets.h> + + .text +#ifdef SHARED +# define USE_FNSAVE +# define MINIMUM_ALIGNMENT 4 +# define STATE_SAVE_ALIGNMENT 4 +# define ___tls_get_addr _____tls_get_addr_fnsave +# include "tls_get_addr.h" +# undef ___tls_get_addr +# undef MINIMUM_ALIGNMENT +# undef USE_FNSAVE + +# define MINIMUM_ALIGNMENT 16 + +# define USE_FXSAVE +# define STATE_SAVE_ALIGNMENT 16 +# define ___tls_get_addr _____tls_get_addr_fxsave +# include "tls_get_addr.h" +# undef ___tls_get_addr +# undef USE_FXSAVE + +# define USE_XSAVE +# define STATE_SAVE_ALIGNMENT 64 +# define ___tls_get_addr _____tls_get_addr_xsave +# include "tls_get_addr.h" +# undef ___tls_get_addr +# undef USE_XSAVE + +# define USE_XSAVEC +# define STATE_SAVE_ALIGNMENT 64 +# define ___tls_get_addr _____tls_get_addr_xsavec +# include "tls_get_addr.h" +# undef ___tls_get_addr +# undef USE_XSAVEC +#endif /* SHARED */ diff --git a/sysdeps/i386/tls_get_addr.h b/sysdeps/i386/tls_get_addr.h new file mode 100644 index 0000000..1825798 --- /dev/null +++ b/sysdeps/i386/tls_get_addr.h @@ -0,0 +1,42 @@ +/* Thread-local storage handling in the ELF dynamic linker. i386 version. + Copyright (C) 2025 Free Software Foundation, Inc. + This file is part of the GNU C Library. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library; if not, see + <https://www.gnu.org/licenses/>. */ + + .hidden ___tls_get_addr + .global ___tls_get_addr + .type ___tls_get_addr,@function + + /* This function is a wrapper of ___tls_get_addr_internal to + preserve caller-saved vector registers. */ + + cfi_startproc + .align 16 +___tls_get_addr: + /* Like all TLS resolvers, preserve call-clobbered registers. + We need two scratch regs anyway. */ + subl $32, %esp + cfi_adjust_cfa_offset (32) + movl %ecx, 20(%esp) + movl %edx, 24(%esp) +#include "tls-get-addr-wrapper.h" + movl 20(%esp), %ecx + movl 24(%esp), %edx + addl $32, %esp + cfi_adjust_cfa_offset (-32) + ret + cfi_endproc + .size ___tls_get_addr, .-___tls_get_addr |