diff options
Diffstat (limited to 'sysdeps/x86')
-rw-r--r-- | sysdeps/x86/Makefile | 36 | ||||
-rw-r--r-- | sysdeps/x86/configure | 4 | ||||
-rw-r--r-- | sysdeps/x86/configure.ac | 2 | ||||
-rw-r--r-- | sysdeps/x86/cpu-features.c | 307 | ||||
-rw-r--r-- | sysdeps/x86/sysdep.h | 23 | ||||
-rw-r--r-- | sysdeps/x86/tst-gnu2-tls2-x86-noxsave.c | 2 | ||||
-rw-r--r-- | sysdeps/x86/tst-gnu2-tls2-x86-noxsavec.c | 2 | ||||
-rw-r--r-- | sysdeps/x86/tst-gnu2-tls2-x86-noxsavexsavec.c | 2 | ||||
-rw-r--r-- | sysdeps/x86/tst-gnu2-tls2.c | 36 | ||||
-rw-r--r-- | sysdeps/x86/tst-gnu2-tls2.h | 37 | ||||
-rw-r--r-- | sysdeps/x86/tst-tls23.c | 22 | ||||
-rw-r--r-- | sysdeps/x86/tst-tls23.h | 35 |
12 files changed, 342 insertions, 166 deletions
diff --git a/sysdeps/x86/Makefile b/sysdeps/x86/Makefile index 01b0192..4fbd48e 100644 --- a/sysdeps/x86/Makefile +++ b/sysdeps/x86/Makefile @@ -4,7 +4,13 @@ endif ifeq ($(subdir),elf) sysdep_routines += get-cpuid-feature-leaf -sysdep-dl-routines += dl-get-cpu-features +sysdep-dl-routines += \ + dl-get-cpu-features \ + dl-tlsdesc \ + tls_get_addr \ + tlsdesc \ +# sysdep-dl-routines + sysdep_headers += \ bits/platform/features.h \ bits/platform/x86.h \ @@ -90,14 +96,22 @@ tst-ifunc-isa-2-ENV = GLIBC_TUNABLES=glibc.cpu.hwcaps=-SSE4_2,-AVX,-AVX2,-AVX512 tst-ifunc-isa-2-static-ENV = $(tst-ifunc-isa-2-ENV) tst-hwcap-tunables-ARGS = -- $(host-test-program-cmd) -CFLAGS-tst-gnu2-tls2.c += -msse +CFLAGS-tst-gnu2-tls2.c += -msse2 CFLAGS-tst-gnu2-tls2mod0.c += -msse2 -mtune=haswell CFLAGS-tst-gnu2-tls2mod1.c += -msse2 -mtune=haswell CFLAGS-tst-gnu2-tls2mod2.c += -msse2 -mtune=haswell -LDFLAGS-tst-gnu2-tls2-x86-noxsave += -Wl,-z,lazy -LDFLAGS-tst-gnu2-tls2-x86-noxsavec += -Wl,-z,lazy -LDFLAGS-tst-gnu2-tls2-x86-noxsavexsavec += -Wl,-z,lazy +LDFLAGS-tst-gnu2-tls2 += -rdynamic +LDFLAGS-tst-gnu2-tls2mod0.so += -Wl,-z,undefs +LDFLAGS-tst-gnu2-tls2mod1.so += -Wl,-z,undefs +LDFLAGS-tst-gnu2-tls2mod2.so += -Wl,-z,undefs + +CFLAGS-tst-gnu2-tls2-x86-noxsave.c += -msse2 +CFLAGS-tst-gnu2-tls2-x86-noxsavec.c += -msse2 +CFLAGS-tst-gnu2-tls2-x86-noxsavexsavec.c += -msse2 +LDFLAGS-tst-gnu2-tls2-x86-noxsave += -Wl,-z,lazy -rdynamic +LDFLAGS-tst-gnu2-tls2-x86-noxsavec += -Wl,-z,lazy -rdynamic +LDFLAGS-tst-gnu2-tls2-x86-noxsavexsavec += -Wl,-z,lazy -rdynamic # Test for bug 32810: incorrect XSAVE state size if XSAVEC is disabled # via tunable. @@ -113,6 +127,18 @@ $(objpfx)tst-gnu2-tls2-x86-noxsavexsavec.out: \ $(objpfx)tst-gnu2-tls2mod0.so \ $(objpfx)tst-gnu2-tls2mod1.so \ $(objpfx)tst-gnu2-tls2mod2.so + +CFLAGS-tst-tls23.c += -msse2 +CFLAGS-tst-tls23-mod.c += -msse2 -mtune=haswell + +LDFLAGS-tst-tls23 += -rdynamic +tst-tls23-mod.so-no-z-defs = yes + +$(objpfx)tst-tls23-mod.so: $(libsupport) +endif + +ifeq ($(subdir),gmon) +CFLAGS-mcount.c += -mgeneral-regs-only endif ifeq ($(subdir),math) diff --git a/sysdeps/x86/configure b/sysdeps/x86/configure index c7ea9ac..dff26e9 100644 --- a/sysdeps/x86/configure +++ b/sysdeps/x86/configure @@ -171,8 +171,12 @@ fi config_vars="$config_vars have-x86-isa-level = $libc_cv_have_x86_isa_level" config_vars="$config_vars +x86-isa-level-2-or-above = 2 3 4" +config_vars="$config_vars x86-isa-level-3-or-above = 3 4" config_vars="$config_vars +x86-isa-level-4-or-above = 4" +config_vars="$config_vars enable-x86-isa-level = $libc_cv_include_x86_isa_level" diff --git a/sysdeps/x86/configure.ac b/sysdeps/x86/configure.ac index 031f917..54960a7 100644 --- a/sysdeps/x86/configure.ac +++ b/sysdeps/x86/configure.ac @@ -117,7 +117,9 @@ else AC_DEFINE_UNQUOTED(MINIMUM_X86_ISA_LEVEL, $libc_cv_have_x86_isa_level) fi LIBC_CONFIG_VAR([have-x86-isa-level], [$libc_cv_have_x86_isa_level]) +LIBC_CONFIG_VAR([x86-isa-level-2-or-above], [2 3 4]) LIBC_CONFIG_VAR([x86-isa-level-3-or-above], [3 4]) +LIBC_CONFIG_VAR([x86-isa-level-4-or-above], [4]) LIBC_CONFIG_VAR([enable-x86-isa-level], [$libc_cv_include_x86_isa_level]) dnl Check if TEST_CC supports -mfpmath=387 diff --git a/sysdeps/x86/cpu-features.c b/sysdeps/x86/cpu-features.c index 9d136e4..b7d1506 100644 --- a/sysdeps/x86/cpu-features.c +++ b/sysdeps/x86/cpu-features.c @@ -502,8 +502,8 @@ _Static_assert (((index_arch_Fast_Unaligned_Load "Incorrect index_arch_Fast_Unaligned_Load"); -/* Intel Family-6 microarch list. */ -enum +/* Intel microarch list. */ +enum intel_microarch { /* Atom processors. */ INTEL_ATOM_BONNELL, @@ -512,6 +512,7 @@ enum INTEL_ATOM_GOLDMONT, INTEL_ATOM_GOLDMONT_PLUS, INTEL_ATOM_SIERRAFOREST, + INTEL_ATOM_CLEARWATERFOREST, INTEL_ATOM_GRANDRIDGE, INTEL_ATOM_TREMONT, @@ -539,7 +540,9 @@ enum INTEL_BIGCORE_METEORLAKE, INTEL_BIGCORE_LUNARLAKE, INTEL_BIGCORE_ARROWLAKE, + INTEL_BIGCORE_PANTHERLAKE, INTEL_BIGCORE_GRANITERAPIDS, + INTEL_BIGCORE_DIAMONDRAPIDS, /* Mixed (bigcore + atom SOC). */ INTEL_MIXED_LAKEFIELD, @@ -553,7 +556,7 @@ enum INTEL_UNKNOWN, }; -static unsigned int +static enum intel_microarch intel_get_fam6_microarch (unsigned int model, __attribute__ ((unused)) unsigned int stepping) { @@ -584,6 +587,8 @@ intel_get_fam6_microarch (unsigned int model, return INTEL_ATOM_GOLDMONT_PLUS; case 0xAF: return INTEL_ATOM_SIERRAFOREST; + case 0xDD: + return INTEL_ATOM_CLEARWATERFOREST; case 0xB6: return INTEL_ATOM_GRANDRIDGE; case 0x86: @@ -691,8 +696,12 @@ intel_get_fam6_microarch (unsigned int model, return INTEL_BIGCORE_METEORLAKE; case 0xbd: return INTEL_BIGCORE_LUNARLAKE; + case 0xb5: + case 0xc5: case 0xc6: return INTEL_BIGCORE_ARROWLAKE; + case 0xCC: + return INTEL_BIGCORE_PANTHERLAKE; case 0xAD: case 0xAE: return INTEL_BIGCORE_GRANITERAPIDS; @@ -756,133 +765,20 @@ init_cpu_features (struct cpu_features *cpu_features) cpu_features->preferred[index_arch_Avoid_Non_Temporal_Memset] &= ~bit_arch_Avoid_Non_Temporal_Memset; + enum intel_microarch microarch = INTEL_UNKNOWN; if (family == 0x06) { model += extended_model; - unsigned int microarch - = intel_get_fam6_microarch (model, stepping); + microarch = intel_get_fam6_microarch (model, stepping); + /* Disable TSX on some processors to avoid TSX on kernels that + weren't updated with the latest microcode package (which + disables broken feature by default). */ switch (microarch) { - /* Atom / KNL tuning. */ - case INTEL_ATOM_BONNELL: - /* BSF is slow on Bonnell. */ - cpu_features->preferred[index_arch_Slow_BSF] - |= bit_arch_Slow_BSF; - break; - - /* Unaligned load versions are faster than SSSE3 - on Airmont, Silvermont, Goldmont, and Goldmont Plus. */ - case INTEL_ATOM_AIRMONT: - case INTEL_ATOM_SILVERMONT: - case INTEL_ATOM_GOLDMONT: - case INTEL_ATOM_GOLDMONT_PLUS: - - /* Knights Landing. Enable Silvermont optimizations. */ - case INTEL_KNIGHTS_LANDING: - - cpu_features->preferred[index_arch_Fast_Unaligned_Load] - |= (bit_arch_Fast_Unaligned_Load - | bit_arch_Fast_Unaligned_Copy - | bit_arch_Prefer_PMINUB_for_stringop - | bit_arch_Slow_SSE4_2); - break; - - case INTEL_ATOM_TREMONT: - /* Enable rep string instructions, unaligned load, unaligned - copy, pminub and avoid SSE 4.2 on Tremont. */ - cpu_features->preferred[index_arch_Fast_Rep_String] - |= (bit_arch_Fast_Rep_String - | bit_arch_Fast_Unaligned_Load - | bit_arch_Fast_Unaligned_Copy - | bit_arch_Prefer_PMINUB_for_stringop - | bit_arch_Slow_SSE4_2); - break; - - /* - Default tuned Knights microarch. - case INTEL_KNIGHTS_MILL: - */ - - /* - Default tuned atom microarch. - case INTEL_ATOM_SIERRAFOREST: - case INTEL_ATOM_GRANDRIDGE: - */ - - /* Bigcore/Default Tuning. */ default: - default_tuning: - /* Unknown family 0x06 processors. Assuming this is one - of Core i3/i5/i7 processors if AVX is available. */ - if (!CPU_FEATURES_CPU_P (cpu_features, AVX)) - break; - - enable_modern_features: - /* Rep string instructions, unaligned load, unaligned copy, - and pminub are fast on Intel Core i3, i5 and i7. */ - cpu_features->preferred[index_arch_Fast_Rep_String] - |= (bit_arch_Fast_Rep_String - | bit_arch_Fast_Unaligned_Load - | bit_arch_Fast_Unaligned_Copy - | bit_arch_Prefer_PMINUB_for_stringop); break; - case INTEL_BIGCORE_NEHALEM: - case INTEL_BIGCORE_WESTMERE: - /* Older CPUs prefer non-temporal stores at lower threshold. */ - cpu_features->cachesize_non_temporal_divisor = 8; - goto enable_modern_features; - - /* Older Bigcore microarch (smaller non-temporal store - threshold). */ - case INTEL_BIGCORE_SANDYBRIDGE: - case INTEL_BIGCORE_IVYBRIDGE: - case INTEL_BIGCORE_HASWELL: - case INTEL_BIGCORE_BROADWELL: - cpu_features->cachesize_non_temporal_divisor = 8; - goto default_tuning; - - /* Newer Bigcore microarch (larger non-temporal store - threshold). */ - case INTEL_BIGCORE_SKYLAKE_AVX512: - case INTEL_BIGCORE_CANNONLAKE: - /* Benchmarks indicate non-temporal memset is not - necessarily profitable on SKX (and in some cases much - worse). This is likely unique to SKX due its it unique - mesh interconnect (not present on ICX or BWD). Disable - non-temporal on all Skylake servers. */ - cpu_features->preferred[index_arch_Avoid_Non_Temporal_Memset] - |= bit_arch_Avoid_Non_Temporal_Memset; - /* fallthrough */ - case INTEL_BIGCORE_COMETLAKE: - case INTEL_BIGCORE_SKYLAKE: - case INTEL_BIGCORE_KABYLAKE: - case INTEL_BIGCORE_ICELAKE: - case INTEL_BIGCORE_TIGERLAKE: - case INTEL_BIGCORE_ROCKETLAKE: - case INTEL_BIGCORE_RAPTORLAKE: - case INTEL_BIGCORE_METEORLAKE: - case INTEL_BIGCORE_LUNARLAKE: - case INTEL_BIGCORE_ARROWLAKE: - case INTEL_BIGCORE_SAPPHIRERAPIDS: - case INTEL_BIGCORE_EMERALDRAPIDS: - case INTEL_BIGCORE_GRANITERAPIDS: - cpu_features->cachesize_non_temporal_divisor = 2; - goto default_tuning; - - /* Default tuned Mixed (bigcore + atom SOC). */ - case INTEL_MIXED_LAKEFIELD: - case INTEL_MIXED_ALDERLAKE: - cpu_features->cachesize_non_temporal_divisor = 2; - goto default_tuning; - } - - /* Disable TSX on some processors to avoid TSX on kernels that - weren't updated with the latest microcode package (which - disables broken feature by default). */ - switch (microarch) - { case INTEL_BIGCORE_SKYLAKE_AVX512: /* 0x55 (Skylake-avx512) && stepping <= 5 disable TSX. */ if (stepping <= 5) @@ -891,38 +787,163 @@ init_cpu_features (struct cpu_features *cpu_features) case INTEL_BIGCORE_KABYLAKE: /* NB: Although the errata documents that for model == 0x8e - (kabylake skylake client), only 0xb stepping or lower are - impacted, the intention of the errata was to disable TSX on - all client processors on all steppings. Include 0xc - stepping which is an Intel Core i7-8665U, a client mobile - processor. */ + (kabylake skylake client), only 0xb stepping or lower are + impacted, the intention of the errata was to disable TSX on + all client processors on all steppings. Include 0xc + stepping which is an Intel Core i7-8665U, a client mobile + processor. */ if (stepping > 0xc) break; /* Fall through. */ case INTEL_BIGCORE_SKYLAKE: - /* Disable Intel TSX and enable RTM_ALWAYS_ABORT for - processors listed in: - -https://www.intel.com/content/www/us/en/support/articles/000059422/processors.html - */ - disable_tsx: - CPU_FEATURE_UNSET (cpu_features, HLE); - CPU_FEATURE_UNSET (cpu_features, RTM); - CPU_FEATURE_SET (cpu_features, RTM_ALWAYS_ABORT); - break; + /* Disable Intel TSX and enable RTM_ALWAYS_ABORT for + processors listed in: + + https://www.intel.com/content/www/us/en/support/articles/000059422/processors.html + */ +disable_tsx: + CPU_FEATURE_UNSET (cpu_features, HLE); + CPU_FEATURE_UNSET (cpu_features, RTM); + CPU_FEATURE_SET (cpu_features, RTM_ALWAYS_ABORT); + break; case INTEL_BIGCORE_HASWELL: - /* Xeon E7 v3 (model == 0x3f) with stepping >= 4 has working - TSX. Haswell also include other model numbers that have - working TSX. */ - if (model == 0x3f && stepping >= 4) + /* Xeon E7 v3 (model == 0x3f) with stepping >= 4 has working + TSX. Haswell also includes other model numbers that have + working TSX. */ + if (model == 0x3f && stepping >= 4) break; - CPU_FEATURE_UNSET (cpu_features, RTM); - break; + CPU_FEATURE_UNSET (cpu_features, RTM); + break; } } + else if (family == 19) + switch (model) + { + case 0x01: + microarch = INTEL_BIGCORE_DIAMONDRAPIDS; + break; + default: + break; + } + + switch (microarch) + { + /* Atom / KNL tuning. */ + case INTEL_ATOM_BONNELL: + /* BSF is slow on Bonnell. */ + cpu_features->preferred[index_arch_Slow_BSF] + |= bit_arch_Slow_BSF; + break; + + /* Unaligned load versions are faster than SSSE3 + on Airmont, Silvermont, Goldmont, and Goldmont Plus. */ + case INTEL_ATOM_AIRMONT: + case INTEL_ATOM_SILVERMONT: + case INTEL_ATOM_GOLDMONT: + case INTEL_ATOM_GOLDMONT_PLUS: + + /* Knights Landing. Enable Silvermont optimizations. */ + case INTEL_KNIGHTS_LANDING: + + cpu_features->preferred[index_arch_Fast_Unaligned_Load] + |= (bit_arch_Fast_Unaligned_Load + | bit_arch_Fast_Unaligned_Copy + | bit_arch_Prefer_PMINUB_for_stringop + | bit_arch_Slow_SSE4_2); + break; + + case INTEL_ATOM_TREMONT: + /* Enable rep string instructions, unaligned load, unaligned + copy, pminub and avoid SSE 4.2 on Tremont. */ + cpu_features->preferred[index_arch_Fast_Rep_String] + |= (bit_arch_Fast_Rep_String + | bit_arch_Fast_Unaligned_Load + | bit_arch_Fast_Unaligned_Copy + | bit_arch_Prefer_PMINUB_for_stringop + | bit_arch_Slow_SSE4_2); + break; + + /* + Default tuned Knights microarch. + case INTEL_KNIGHTS_MILL: + */ + + /* + Default tuned atom microarch. + case INTEL_ATOM_SIERRAFOREST: + case INTEL_ATOM_GRANDRIDGE: + case INTEL_ATOM_CLEARWATERFOREST: + */ + + /* Bigcore/Default Tuning. */ + default: + default_tuning: + /* Unknown Intel processors. Assuming this is one of Core + i3/i5/i7 processors if AVX is available. */ + if (!CPU_FEATURES_CPU_P (cpu_features, AVX)) + break; + + enable_modern_features: + /* Rep string instructions, unaligned load, unaligned copy, + and pminub are fast on Intel Core i3, i5 and i7. */ + cpu_features->preferred[index_arch_Fast_Rep_String] + |= (bit_arch_Fast_Rep_String + | bit_arch_Fast_Unaligned_Load + | bit_arch_Fast_Unaligned_Copy + | bit_arch_Prefer_PMINUB_for_stringop); + break; + + case INTEL_BIGCORE_NEHALEM: + case INTEL_BIGCORE_WESTMERE: + /* Older CPUs prefer non-temporal stores at lower threshold. */ + cpu_features->cachesize_non_temporal_divisor = 8; + goto enable_modern_features; + + /* Older Bigcore microarch (smaller non-temporal store + threshold). */ + case INTEL_BIGCORE_SANDYBRIDGE: + case INTEL_BIGCORE_IVYBRIDGE: + case INTEL_BIGCORE_HASWELL: + case INTEL_BIGCORE_BROADWELL: + cpu_features->cachesize_non_temporal_divisor = 8; + goto default_tuning; + + /* Newer Bigcore microarch (larger non-temporal store + threshold). */ + case INTEL_BIGCORE_SKYLAKE_AVX512: + case INTEL_BIGCORE_CANNONLAKE: + /* Benchmarks indicate non-temporal memset is not + necessarily profitable on SKX (and in some cases much + worse). This is likely unique to SKX due to its unique + mesh interconnect (not present on ICX or BWD). Disable + non-temporal on all Skylake servers. */ + cpu_features->preferred[index_arch_Avoid_Non_Temporal_Memset] + |= bit_arch_Avoid_Non_Temporal_Memset; + /* fallthrough */ + case INTEL_BIGCORE_COMETLAKE: + case INTEL_BIGCORE_SKYLAKE: + case INTEL_BIGCORE_KABYLAKE: + case INTEL_BIGCORE_ICELAKE: + case INTEL_BIGCORE_TIGERLAKE: + case INTEL_BIGCORE_ROCKETLAKE: + case INTEL_BIGCORE_RAPTORLAKE: + case INTEL_BIGCORE_METEORLAKE: + case INTEL_BIGCORE_LUNARLAKE: + case INTEL_BIGCORE_ARROWLAKE: + case INTEL_BIGCORE_PANTHERLAKE: + case INTEL_BIGCORE_SAPPHIRERAPIDS: + case INTEL_BIGCORE_EMERALDRAPIDS: + case INTEL_BIGCORE_GRANITERAPIDS: + case INTEL_BIGCORE_DIAMONDRAPIDS: + /* Default tuned Mixed (bigcore + atom SOC). */ + case INTEL_MIXED_LAKEFIELD: + case INTEL_MIXED_ALDERLAKE: + cpu_features->cachesize_non_temporal_divisor = 2; + goto default_tuning; + } /* Since AVX512ER is unique to Xeon Phi, set Prefer_No_VZEROUPPER if AVX512ER is available. Don't use AVX512 to avoid lower CPU @@ -1235,7 +1256,7 @@ no_cpuid: #endif if (MINIMUM_X86_ISA_LEVEL >= AVX_X86_ISA_LEVEL - || (GLRO(dl_x86_cpu_features).xsave_state_size != 0)) + || cpu_features->xsave_state_size != 0) { if (CPU_FEATURE_USABLE_P (cpu_features, XSAVEC)) { diff --git a/sysdeps/x86/sysdep.h b/sysdeps/x86/sysdep.h index c3c73e7..b8e963b 100644 --- a/sysdeps/x86/sysdep.h +++ b/sysdeps/x86/sysdep.h @@ -183,6 +183,29 @@ #define atom_text_section .section ".text.atom", "ax" +#ifndef DL_STACK_ALIGNMENT +/* Due to GCC bug: + + https://gcc.gnu.org/bugzilla/show_bug.cgi?id=58066 + + __tls_get_addr may be called with 8-byte/4-byte stack alignment. + Although this bug has been fixed in GCC 4.9.4, 5.3 and 6, we can't + assume that stack will be always aligned at 16 bytes. */ +# ifdef __x86_64__ +# define DL_STACK_ALIGNMENT 8 +# define MINIMUM_ALIGNMENT 16 +# else +# define DL_STACK_ALIGNMENT 4 +# endif +#endif + +/* True if _dl_runtime_resolve/_dl_tlsdesc_dynamic should align stack for + STATE_SAVE or align stack to MINIMUM_ALIGNMENT bytes before calling + _dl_fixup/__tls_get_addr. */ +#define DL_RUNTIME_RESOLVE_REALIGN_STACK \ + (STATE_SAVE_ALIGNMENT > DL_STACK_ALIGNMENT \ + || MINIMUM_ALIGNMENT > DL_STACK_ALIGNMENT) + #endif /* __ASSEMBLER__ */ #endif /* _X86_SYSDEP_H */ diff --git a/sysdeps/x86/tst-gnu2-tls2-x86-noxsave.c b/sysdeps/x86/tst-gnu2-tls2-x86-noxsave.c index f0024c1..963c4f3 100644 --- a/sysdeps/x86/tst-gnu2-tls2-x86-noxsave.c +++ b/sysdeps/x86/tst-gnu2-tls2-x86-noxsave.c @@ -1 +1 @@ -#include <elf/tst-gnu2-tls2.c> +#include <tst-gnu2-tls2.c> diff --git a/sysdeps/x86/tst-gnu2-tls2-x86-noxsavec.c b/sysdeps/x86/tst-gnu2-tls2-x86-noxsavec.c index f0024c1..963c4f3 100644 --- a/sysdeps/x86/tst-gnu2-tls2-x86-noxsavec.c +++ b/sysdeps/x86/tst-gnu2-tls2-x86-noxsavec.c @@ -1 +1 @@ -#include <elf/tst-gnu2-tls2.c> +#include <tst-gnu2-tls2.c> diff --git a/sysdeps/x86/tst-gnu2-tls2-x86-noxsavexsavec.c b/sysdeps/x86/tst-gnu2-tls2-x86-noxsavexsavec.c index f0024c1..963c4f3 100644 --- a/sysdeps/x86/tst-gnu2-tls2-x86-noxsavexsavec.c +++ b/sysdeps/x86/tst-gnu2-tls2-x86-noxsavexsavec.c @@ -1 +1 @@ -#include <elf/tst-gnu2-tls2.c> +#include <tst-gnu2-tls2.c> diff --git a/sysdeps/x86/tst-gnu2-tls2.c b/sysdeps/x86/tst-gnu2-tls2.c index de900a4..b3195ff 100644 --- a/sysdeps/x86/tst-gnu2-tls2.c +++ b/sysdeps/x86/tst-gnu2-tls2.c @@ -1,20 +1,26 @@ -#ifndef __x86_64__ -#include <sys/platform/x86.h> +#ifndef TEST_AMX +# ifndef __x86_64__ +# include <sys/platform/x86.h> -#define IS_SUPPORTED() CPU_FEATURE_ACTIVE (SSE2) -#endif +# define IS_SUPPORTED() CPU_FEATURE_ACTIVE (SSE2) +# endif -/* Clear XMM0...XMM7 */ -#define PREPARE_MALLOC() \ -{ \ - asm volatile ("xorps %%xmm0, %%xmm0" : : : "xmm0" ); \ - asm volatile ("xorps %%xmm1, %%xmm1" : : : "xmm1" ); \ - asm volatile ("xorps %%xmm2, %%xmm2" : : : "xmm2" ); \ - asm volatile ("xorps %%xmm3, %%xmm3" : : : "xmm3" ); \ - asm volatile ("xorps %%xmm4, %%xmm4" : : : "xmm4" ); \ - asm volatile ("xorps %%xmm5, %%xmm5" : : : "xmm5" ); \ - asm volatile ("xorps %%xmm6, %%xmm6" : : : "xmm6" ); \ - asm volatile ("xorps %%xmm7, %%xmm7" : : : "xmm7" ); \ +/* Set XMM0...XMM7 to all 1s. */ +# define PREPARE_MALLOC() \ +{ \ + asm volatile ("pcmpeqd %%xmm0, %%xmm0" : : : "xmm0" ); \ + asm volatile ("pcmpeqd %%xmm1, %%xmm1" : : : "xmm1" ); \ + asm volatile ("pcmpeqd %%xmm2, %%xmm2" : : : "xmm2" ); \ + asm volatile ("pcmpeqd %%xmm3, %%xmm3" : : : "xmm3" ); \ + asm volatile ("pcmpeqd %%xmm4, %%xmm4" : : : "xmm4" ); \ + asm volatile ("pcmpeqd %%xmm5, %%xmm5" : : : "xmm5" ); \ + asm volatile ("pcmpeqd %%xmm6, %%xmm6" : : : "xmm6" ); \ + asm volatile ("pcmpeqd %%xmm7, %%xmm7" : : : "xmm7" ); \ } +#endif #include <elf/tst-gnu2-tls2.c> + +#ifndef TEST_AMX +v2di v1, v2, v3; +#endif diff --git a/sysdeps/x86/tst-gnu2-tls2.h b/sysdeps/x86/tst-gnu2-tls2.h new file mode 100644 index 0000000..fdbb565 --- /dev/null +++ b/sysdeps/x86/tst-gnu2-tls2.h @@ -0,0 +1,37 @@ +/* Test TLSDESC relocation, x86 version. + Copyright (C) 2025 Free Software Foundation, Inc. + This file is part of the GNU C Library. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library; if not, see + <https://www.gnu.org/licenses/>. */ + +#ifndef TEST_AMX +# include <support/check.h> + +typedef long long v2di __attribute__((vector_size(16))); +extern v2di v1, v2, v3; + +# define BEFORE_TLSDESC_CALL() \ + v1 = __extension__(v2di){0, 0}; \ + v2 = __extension__(v2di){0, 0}; + +# define AFTER_TLSDESC_CALL() \ + v3 = __extension__(v2di){0, 0}; \ + asm volatile ("" : "+x" (v3)); \ + union { v2di x; long long a[2]; } u; \ + u.x = v3; \ + TEST_VERIFY_EXIT (u.a[0] == 0 && u.a[1] == 0); +#endif + +#include <elf/tst-gnu2-tls2.h> diff --git a/sysdeps/x86/tst-tls23.c b/sysdeps/x86/tst-tls23.c new file mode 100644 index 0000000..6130d91 --- /dev/null +++ b/sysdeps/x86/tst-tls23.c @@ -0,0 +1,22 @@ +#ifndef __x86_64__ +#include <sys/platform/x86.h> + +#define IS_SUPPORTED() CPU_FEATURE_ACTIVE (SSE2) +#endif + +/* Set XMM0...XMM7 to all 1s. */ +#define PREPARE_MALLOC() \ +{ \ + asm volatile ("pcmpeqd %%xmm0, %%xmm0" : : : "xmm0" ); \ + asm volatile ("pcmpeqd %%xmm1, %%xmm1" : : : "xmm1" ); \ + asm volatile ("pcmpeqd %%xmm2, %%xmm2" : : : "xmm2" ); \ + asm volatile ("pcmpeqd %%xmm3, %%xmm3" : : : "xmm3" ); \ + asm volatile ("pcmpeqd %%xmm4, %%xmm4" : : : "xmm4" ); \ + asm volatile ("pcmpeqd %%xmm5, %%xmm5" : : : "xmm5" ); \ + asm volatile ("pcmpeqd %%xmm6, %%xmm6" : : : "xmm6" ); \ + asm volatile ("pcmpeqd %%xmm7, %%xmm7" : : : "xmm7" ); \ +} + +#include <elf/tst-tls23.c> + +v2di v1, v2, v3; diff --git a/sysdeps/x86/tst-tls23.h b/sysdeps/x86/tst-tls23.h new file mode 100644 index 0000000..21cee4c --- /dev/null +++ b/sysdeps/x86/tst-tls23.h @@ -0,0 +1,35 @@ +/* Test that __tls_get_addr preserves XMM registers. + Copyright (C) 2025 Free Software Foundation, Inc. + This file is part of the GNU C Library. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library; if not, see + <https://www.gnu.org/licenses/>. */ + +#include <support/check.h> + +typedef long long v2di __attribute__((vector_size(16))); +extern v2di v1, v2, v3; + +#define BEFORE_TLS_CALL() \ + v1 = __extension__(v2di){0, 0}; \ + v2 = __extension__(v2di){0, 0}; + +#define AFTER_TLS_CALL() \ + v3 = __extension__(v2di){0, 0}; \ + asm volatile ("" : "+x" (v3)); \ + union { v2di x; long long a[2]; } u; \ + u.x = v3; \ + TEST_VERIFY_EXIT (u.a[0] == 0 && u.a[1] == 0); + +#include <elf/tst-tls23.h> |