aboutsummaryrefslogtreecommitdiff
path: root/sysdeps/x86
diff options
context:
space:
mode:
Diffstat (limited to 'sysdeps/x86')
-rw-r--r--sysdeps/x86/Makefile36
-rw-r--r--sysdeps/x86/configure4
-rw-r--r--sysdeps/x86/configure.ac2
-rw-r--r--sysdeps/x86/cpu-features.c307
-rw-r--r--sysdeps/x86/sysdep.h23
-rw-r--r--sysdeps/x86/tst-gnu2-tls2-x86-noxsave.c2
-rw-r--r--sysdeps/x86/tst-gnu2-tls2-x86-noxsavec.c2
-rw-r--r--sysdeps/x86/tst-gnu2-tls2-x86-noxsavexsavec.c2
-rw-r--r--sysdeps/x86/tst-gnu2-tls2.c36
-rw-r--r--sysdeps/x86/tst-gnu2-tls2.h37
-rw-r--r--sysdeps/x86/tst-tls23.c22
-rw-r--r--sysdeps/x86/tst-tls23.h35
12 files changed, 342 insertions, 166 deletions
diff --git a/sysdeps/x86/Makefile b/sysdeps/x86/Makefile
index 01b0192..4fbd48e 100644
--- a/sysdeps/x86/Makefile
+++ b/sysdeps/x86/Makefile
@@ -4,7 +4,13 @@ endif
ifeq ($(subdir),elf)
sysdep_routines += get-cpuid-feature-leaf
-sysdep-dl-routines += dl-get-cpu-features
+sysdep-dl-routines += \
+ dl-get-cpu-features \
+ dl-tlsdesc \
+ tls_get_addr \
+ tlsdesc \
+# sysdep-dl-routines
+
sysdep_headers += \
bits/platform/features.h \
bits/platform/x86.h \
@@ -90,14 +96,22 @@ tst-ifunc-isa-2-ENV = GLIBC_TUNABLES=glibc.cpu.hwcaps=-SSE4_2,-AVX,-AVX2,-AVX512
tst-ifunc-isa-2-static-ENV = $(tst-ifunc-isa-2-ENV)
tst-hwcap-tunables-ARGS = -- $(host-test-program-cmd)
-CFLAGS-tst-gnu2-tls2.c += -msse
+CFLAGS-tst-gnu2-tls2.c += -msse2
CFLAGS-tst-gnu2-tls2mod0.c += -msse2 -mtune=haswell
CFLAGS-tst-gnu2-tls2mod1.c += -msse2 -mtune=haswell
CFLAGS-tst-gnu2-tls2mod2.c += -msse2 -mtune=haswell
-LDFLAGS-tst-gnu2-tls2-x86-noxsave += -Wl,-z,lazy
-LDFLAGS-tst-gnu2-tls2-x86-noxsavec += -Wl,-z,lazy
-LDFLAGS-tst-gnu2-tls2-x86-noxsavexsavec += -Wl,-z,lazy
+LDFLAGS-tst-gnu2-tls2 += -rdynamic
+LDFLAGS-tst-gnu2-tls2mod0.so += -Wl,-z,undefs
+LDFLAGS-tst-gnu2-tls2mod1.so += -Wl,-z,undefs
+LDFLAGS-tst-gnu2-tls2mod2.so += -Wl,-z,undefs
+
+CFLAGS-tst-gnu2-tls2-x86-noxsave.c += -msse2
+CFLAGS-tst-gnu2-tls2-x86-noxsavec.c += -msse2
+CFLAGS-tst-gnu2-tls2-x86-noxsavexsavec.c += -msse2
+LDFLAGS-tst-gnu2-tls2-x86-noxsave += -Wl,-z,lazy -rdynamic
+LDFLAGS-tst-gnu2-tls2-x86-noxsavec += -Wl,-z,lazy -rdynamic
+LDFLAGS-tst-gnu2-tls2-x86-noxsavexsavec += -Wl,-z,lazy -rdynamic
# Test for bug 32810: incorrect XSAVE state size if XSAVEC is disabled
# via tunable.
@@ -113,6 +127,18 @@ $(objpfx)tst-gnu2-tls2-x86-noxsavexsavec.out: \
$(objpfx)tst-gnu2-tls2mod0.so \
$(objpfx)tst-gnu2-tls2mod1.so \
$(objpfx)tst-gnu2-tls2mod2.so
+
+CFLAGS-tst-tls23.c += -msse2
+CFLAGS-tst-tls23-mod.c += -msse2 -mtune=haswell
+
+LDFLAGS-tst-tls23 += -rdynamic
+tst-tls23-mod.so-no-z-defs = yes
+
+$(objpfx)tst-tls23-mod.so: $(libsupport)
+endif
+
+ifeq ($(subdir),gmon)
+CFLAGS-mcount.c += -mgeneral-regs-only
endif
ifeq ($(subdir),math)
diff --git a/sysdeps/x86/configure b/sysdeps/x86/configure
index c7ea9ac..dff26e9 100644
--- a/sysdeps/x86/configure
+++ b/sysdeps/x86/configure
@@ -171,8 +171,12 @@ fi
config_vars="$config_vars
have-x86-isa-level = $libc_cv_have_x86_isa_level"
config_vars="$config_vars
+x86-isa-level-2-or-above = 2 3 4"
+config_vars="$config_vars
x86-isa-level-3-or-above = 3 4"
config_vars="$config_vars
+x86-isa-level-4-or-above = 4"
+config_vars="$config_vars
enable-x86-isa-level = $libc_cv_include_x86_isa_level"
diff --git a/sysdeps/x86/configure.ac b/sysdeps/x86/configure.ac
index 031f917..54960a7 100644
--- a/sysdeps/x86/configure.ac
+++ b/sysdeps/x86/configure.ac
@@ -117,7 +117,9 @@ else
AC_DEFINE_UNQUOTED(MINIMUM_X86_ISA_LEVEL, $libc_cv_have_x86_isa_level)
fi
LIBC_CONFIG_VAR([have-x86-isa-level], [$libc_cv_have_x86_isa_level])
+LIBC_CONFIG_VAR([x86-isa-level-2-or-above], [2 3 4])
LIBC_CONFIG_VAR([x86-isa-level-3-or-above], [3 4])
+LIBC_CONFIG_VAR([x86-isa-level-4-or-above], [4])
LIBC_CONFIG_VAR([enable-x86-isa-level], [$libc_cv_include_x86_isa_level])
dnl Check if TEST_CC supports -mfpmath=387
diff --git a/sysdeps/x86/cpu-features.c b/sysdeps/x86/cpu-features.c
index 9d136e4..b7d1506 100644
--- a/sysdeps/x86/cpu-features.c
+++ b/sysdeps/x86/cpu-features.c
@@ -502,8 +502,8 @@ _Static_assert (((index_arch_Fast_Unaligned_Load
"Incorrect index_arch_Fast_Unaligned_Load");
-/* Intel Family-6 microarch list. */
-enum
+/* Intel microarch list. */
+enum intel_microarch
{
/* Atom processors. */
INTEL_ATOM_BONNELL,
@@ -512,6 +512,7 @@ enum
INTEL_ATOM_GOLDMONT,
INTEL_ATOM_GOLDMONT_PLUS,
INTEL_ATOM_SIERRAFOREST,
+ INTEL_ATOM_CLEARWATERFOREST,
INTEL_ATOM_GRANDRIDGE,
INTEL_ATOM_TREMONT,
@@ -539,7 +540,9 @@ enum
INTEL_BIGCORE_METEORLAKE,
INTEL_BIGCORE_LUNARLAKE,
INTEL_BIGCORE_ARROWLAKE,
+ INTEL_BIGCORE_PANTHERLAKE,
INTEL_BIGCORE_GRANITERAPIDS,
+ INTEL_BIGCORE_DIAMONDRAPIDS,
/* Mixed (bigcore + atom SOC). */
INTEL_MIXED_LAKEFIELD,
@@ -553,7 +556,7 @@ enum
INTEL_UNKNOWN,
};
-static unsigned int
+static enum intel_microarch
intel_get_fam6_microarch (unsigned int model,
__attribute__ ((unused)) unsigned int stepping)
{
@@ -584,6 +587,8 @@ intel_get_fam6_microarch (unsigned int model,
return INTEL_ATOM_GOLDMONT_PLUS;
case 0xAF:
return INTEL_ATOM_SIERRAFOREST;
+ case 0xDD:
+ return INTEL_ATOM_CLEARWATERFOREST;
case 0xB6:
return INTEL_ATOM_GRANDRIDGE;
case 0x86:
@@ -691,8 +696,12 @@ intel_get_fam6_microarch (unsigned int model,
return INTEL_BIGCORE_METEORLAKE;
case 0xbd:
return INTEL_BIGCORE_LUNARLAKE;
+ case 0xb5:
+ case 0xc5:
case 0xc6:
return INTEL_BIGCORE_ARROWLAKE;
+ case 0xCC:
+ return INTEL_BIGCORE_PANTHERLAKE;
case 0xAD:
case 0xAE:
return INTEL_BIGCORE_GRANITERAPIDS;
@@ -756,133 +765,20 @@ init_cpu_features (struct cpu_features *cpu_features)
cpu_features->preferred[index_arch_Avoid_Non_Temporal_Memset]
&= ~bit_arch_Avoid_Non_Temporal_Memset;
+ enum intel_microarch microarch = INTEL_UNKNOWN;
if (family == 0x06)
{
model += extended_model;
- unsigned int microarch
- = intel_get_fam6_microarch (model, stepping);
+ microarch = intel_get_fam6_microarch (model, stepping);
+ /* Disable TSX on some processors to avoid TSX on kernels that
+ weren't updated with the latest microcode package (which
+ disables broken feature by default). */
switch (microarch)
{
- /* Atom / KNL tuning. */
- case INTEL_ATOM_BONNELL:
- /* BSF is slow on Bonnell. */
- cpu_features->preferred[index_arch_Slow_BSF]
- |= bit_arch_Slow_BSF;
- break;
-
- /* Unaligned load versions are faster than SSSE3
- on Airmont, Silvermont, Goldmont, and Goldmont Plus. */
- case INTEL_ATOM_AIRMONT:
- case INTEL_ATOM_SILVERMONT:
- case INTEL_ATOM_GOLDMONT:
- case INTEL_ATOM_GOLDMONT_PLUS:
-
- /* Knights Landing. Enable Silvermont optimizations. */
- case INTEL_KNIGHTS_LANDING:
-
- cpu_features->preferred[index_arch_Fast_Unaligned_Load]
- |= (bit_arch_Fast_Unaligned_Load
- | bit_arch_Fast_Unaligned_Copy
- | bit_arch_Prefer_PMINUB_for_stringop
- | bit_arch_Slow_SSE4_2);
- break;
-
- case INTEL_ATOM_TREMONT:
- /* Enable rep string instructions, unaligned load, unaligned
- copy, pminub and avoid SSE 4.2 on Tremont. */
- cpu_features->preferred[index_arch_Fast_Rep_String]
- |= (bit_arch_Fast_Rep_String
- | bit_arch_Fast_Unaligned_Load
- | bit_arch_Fast_Unaligned_Copy
- | bit_arch_Prefer_PMINUB_for_stringop
- | bit_arch_Slow_SSE4_2);
- break;
-
- /*
- Default tuned Knights microarch.
- case INTEL_KNIGHTS_MILL:
- */
-
- /*
- Default tuned atom microarch.
- case INTEL_ATOM_SIERRAFOREST:
- case INTEL_ATOM_GRANDRIDGE:
- */
-
- /* Bigcore/Default Tuning. */
default:
- default_tuning:
- /* Unknown family 0x06 processors. Assuming this is one
- of Core i3/i5/i7 processors if AVX is available. */
- if (!CPU_FEATURES_CPU_P (cpu_features, AVX))
- break;
-
- enable_modern_features:
- /* Rep string instructions, unaligned load, unaligned copy,
- and pminub are fast on Intel Core i3, i5 and i7. */
- cpu_features->preferred[index_arch_Fast_Rep_String]
- |= (bit_arch_Fast_Rep_String
- | bit_arch_Fast_Unaligned_Load
- | bit_arch_Fast_Unaligned_Copy
- | bit_arch_Prefer_PMINUB_for_stringop);
break;
- case INTEL_BIGCORE_NEHALEM:
- case INTEL_BIGCORE_WESTMERE:
- /* Older CPUs prefer non-temporal stores at lower threshold. */
- cpu_features->cachesize_non_temporal_divisor = 8;
- goto enable_modern_features;
-
- /* Older Bigcore microarch (smaller non-temporal store
- threshold). */
- case INTEL_BIGCORE_SANDYBRIDGE:
- case INTEL_BIGCORE_IVYBRIDGE:
- case INTEL_BIGCORE_HASWELL:
- case INTEL_BIGCORE_BROADWELL:
- cpu_features->cachesize_non_temporal_divisor = 8;
- goto default_tuning;
-
- /* Newer Bigcore microarch (larger non-temporal store
- threshold). */
- case INTEL_BIGCORE_SKYLAKE_AVX512:
- case INTEL_BIGCORE_CANNONLAKE:
- /* Benchmarks indicate non-temporal memset is not
- necessarily profitable on SKX (and in some cases much
- worse). This is likely unique to SKX due its it unique
- mesh interconnect (not present on ICX or BWD). Disable
- non-temporal on all Skylake servers. */
- cpu_features->preferred[index_arch_Avoid_Non_Temporal_Memset]
- |= bit_arch_Avoid_Non_Temporal_Memset;
- /* fallthrough */
- case INTEL_BIGCORE_COMETLAKE:
- case INTEL_BIGCORE_SKYLAKE:
- case INTEL_BIGCORE_KABYLAKE:
- case INTEL_BIGCORE_ICELAKE:
- case INTEL_BIGCORE_TIGERLAKE:
- case INTEL_BIGCORE_ROCKETLAKE:
- case INTEL_BIGCORE_RAPTORLAKE:
- case INTEL_BIGCORE_METEORLAKE:
- case INTEL_BIGCORE_LUNARLAKE:
- case INTEL_BIGCORE_ARROWLAKE:
- case INTEL_BIGCORE_SAPPHIRERAPIDS:
- case INTEL_BIGCORE_EMERALDRAPIDS:
- case INTEL_BIGCORE_GRANITERAPIDS:
- cpu_features->cachesize_non_temporal_divisor = 2;
- goto default_tuning;
-
- /* Default tuned Mixed (bigcore + atom SOC). */
- case INTEL_MIXED_LAKEFIELD:
- case INTEL_MIXED_ALDERLAKE:
- cpu_features->cachesize_non_temporal_divisor = 2;
- goto default_tuning;
- }
-
- /* Disable TSX on some processors to avoid TSX on kernels that
- weren't updated with the latest microcode package (which
- disables broken feature by default). */
- switch (microarch)
- {
case INTEL_BIGCORE_SKYLAKE_AVX512:
/* 0x55 (Skylake-avx512) && stepping <= 5 disable TSX. */
if (stepping <= 5)
@@ -891,38 +787,163 @@ init_cpu_features (struct cpu_features *cpu_features)
case INTEL_BIGCORE_KABYLAKE:
/* NB: Although the errata documents that for model == 0x8e
- (kabylake skylake client), only 0xb stepping or lower are
- impacted, the intention of the errata was to disable TSX on
- all client processors on all steppings. Include 0xc
- stepping which is an Intel Core i7-8665U, a client mobile
- processor. */
+ (kabylake skylake client), only 0xb stepping or lower are
+ impacted, the intention of the errata was to disable TSX on
+ all client processors on all steppings. Include 0xc
+ stepping which is an Intel Core i7-8665U, a client mobile
+ processor. */
if (stepping > 0xc)
break;
/* Fall through. */
case INTEL_BIGCORE_SKYLAKE:
- /* Disable Intel TSX and enable RTM_ALWAYS_ABORT for
- processors listed in:
-
-https://www.intel.com/content/www/us/en/support/articles/000059422/processors.html
- */
- disable_tsx:
- CPU_FEATURE_UNSET (cpu_features, HLE);
- CPU_FEATURE_UNSET (cpu_features, RTM);
- CPU_FEATURE_SET (cpu_features, RTM_ALWAYS_ABORT);
- break;
+ /* Disable Intel TSX and enable RTM_ALWAYS_ABORT for
+ processors listed in:
+
+ https://www.intel.com/content/www/us/en/support/articles/000059422/processors.html
+ */
+disable_tsx:
+ CPU_FEATURE_UNSET (cpu_features, HLE);
+ CPU_FEATURE_UNSET (cpu_features, RTM);
+ CPU_FEATURE_SET (cpu_features, RTM_ALWAYS_ABORT);
+ break;
case INTEL_BIGCORE_HASWELL:
- /* Xeon E7 v3 (model == 0x3f) with stepping >= 4 has working
- TSX. Haswell also include other model numbers that have
- working TSX. */
- if (model == 0x3f && stepping >= 4)
+ /* Xeon E7 v3 (model == 0x3f) with stepping >= 4 has working
+ TSX. Haswell also includes other model numbers that have
+ working TSX. */
+ if (model == 0x3f && stepping >= 4)
break;
- CPU_FEATURE_UNSET (cpu_features, RTM);
- break;
+ CPU_FEATURE_UNSET (cpu_features, RTM);
+ break;
}
}
+ else if (family == 19)
+ switch (model)
+ {
+ case 0x01:
+ microarch = INTEL_BIGCORE_DIAMONDRAPIDS;
+ break;
+ default:
+ break;
+ }
+
+ switch (microarch)
+ {
+ /* Atom / KNL tuning. */
+ case INTEL_ATOM_BONNELL:
+ /* BSF is slow on Bonnell. */
+ cpu_features->preferred[index_arch_Slow_BSF]
+ |= bit_arch_Slow_BSF;
+ break;
+
+ /* Unaligned load versions are faster than SSSE3
+ on Airmont, Silvermont, Goldmont, and Goldmont Plus. */
+ case INTEL_ATOM_AIRMONT:
+ case INTEL_ATOM_SILVERMONT:
+ case INTEL_ATOM_GOLDMONT:
+ case INTEL_ATOM_GOLDMONT_PLUS:
+
+ /* Knights Landing. Enable Silvermont optimizations. */
+ case INTEL_KNIGHTS_LANDING:
+
+ cpu_features->preferred[index_arch_Fast_Unaligned_Load]
+ |= (bit_arch_Fast_Unaligned_Load
+ | bit_arch_Fast_Unaligned_Copy
+ | bit_arch_Prefer_PMINUB_for_stringop
+ | bit_arch_Slow_SSE4_2);
+ break;
+
+ case INTEL_ATOM_TREMONT:
+ /* Enable rep string instructions, unaligned load, unaligned
+ copy, pminub and avoid SSE 4.2 on Tremont. */
+ cpu_features->preferred[index_arch_Fast_Rep_String]
+ |= (bit_arch_Fast_Rep_String
+ | bit_arch_Fast_Unaligned_Load
+ | bit_arch_Fast_Unaligned_Copy
+ | bit_arch_Prefer_PMINUB_for_stringop
+ | bit_arch_Slow_SSE4_2);
+ break;
+
+ /*
+ Default tuned Knights microarch.
+ case INTEL_KNIGHTS_MILL:
+ */
+
+ /*
+ Default tuned atom microarch.
+ case INTEL_ATOM_SIERRAFOREST:
+ case INTEL_ATOM_GRANDRIDGE:
+ case INTEL_ATOM_CLEARWATERFOREST:
+ */
+
+ /* Bigcore/Default Tuning. */
+ default:
+ default_tuning:
+ /* Unknown Intel processors. Assuming this is one of Core
+ i3/i5/i7 processors if AVX is available. */
+ if (!CPU_FEATURES_CPU_P (cpu_features, AVX))
+ break;
+
+ enable_modern_features:
+ /* Rep string instructions, unaligned load, unaligned copy,
+ and pminub are fast on Intel Core i3, i5 and i7. */
+ cpu_features->preferred[index_arch_Fast_Rep_String]
+ |= (bit_arch_Fast_Rep_String
+ | bit_arch_Fast_Unaligned_Load
+ | bit_arch_Fast_Unaligned_Copy
+ | bit_arch_Prefer_PMINUB_for_stringop);
+ break;
+
+ case INTEL_BIGCORE_NEHALEM:
+ case INTEL_BIGCORE_WESTMERE:
+ /* Older CPUs prefer non-temporal stores at lower threshold. */
+ cpu_features->cachesize_non_temporal_divisor = 8;
+ goto enable_modern_features;
+
+ /* Older Bigcore microarch (smaller non-temporal store
+ threshold). */
+ case INTEL_BIGCORE_SANDYBRIDGE:
+ case INTEL_BIGCORE_IVYBRIDGE:
+ case INTEL_BIGCORE_HASWELL:
+ case INTEL_BIGCORE_BROADWELL:
+ cpu_features->cachesize_non_temporal_divisor = 8;
+ goto default_tuning;
+
+ /* Newer Bigcore microarch (larger non-temporal store
+ threshold). */
+ case INTEL_BIGCORE_SKYLAKE_AVX512:
+ case INTEL_BIGCORE_CANNONLAKE:
+ /* Benchmarks indicate non-temporal memset is not
+ necessarily profitable on SKX (and in some cases much
+ worse). This is likely unique to SKX due to its unique
+ mesh interconnect (not present on ICX or BWD). Disable
+ non-temporal on all Skylake servers. */
+ cpu_features->preferred[index_arch_Avoid_Non_Temporal_Memset]
+ |= bit_arch_Avoid_Non_Temporal_Memset;
+ /* fallthrough */
+ case INTEL_BIGCORE_COMETLAKE:
+ case INTEL_BIGCORE_SKYLAKE:
+ case INTEL_BIGCORE_KABYLAKE:
+ case INTEL_BIGCORE_ICELAKE:
+ case INTEL_BIGCORE_TIGERLAKE:
+ case INTEL_BIGCORE_ROCKETLAKE:
+ case INTEL_BIGCORE_RAPTORLAKE:
+ case INTEL_BIGCORE_METEORLAKE:
+ case INTEL_BIGCORE_LUNARLAKE:
+ case INTEL_BIGCORE_ARROWLAKE:
+ case INTEL_BIGCORE_PANTHERLAKE:
+ case INTEL_BIGCORE_SAPPHIRERAPIDS:
+ case INTEL_BIGCORE_EMERALDRAPIDS:
+ case INTEL_BIGCORE_GRANITERAPIDS:
+ case INTEL_BIGCORE_DIAMONDRAPIDS:
+ /* Default tuned Mixed (bigcore + atom SOC). */
+ case INTEL_MIXED_LAKEFIELD:
+ case INTEL_MIXED_ALDERLAKE:
+ cpu_features->cachesize_non_temporal_divisor = 2;
+ goto default_tuning;
+ }
/* Since AVX512ER is unique to Xeon Phi, set Prefer_No_VZEROUPPER
if AVX512ER is available. Don't use AVX512 to avoid lower CPU
@@ -1235,7 +1256,7 @@ no_cpuid:
#endif
if (MINIMUM_X86_ISA_LEVEL >= AVX_X86_ISA_LEVEL
- || (GLRO(dl_x86_cpu_features).xsave_state_size != 0))
+ || cpu_features->xsave_state_size != 0)
{
if (CPU_FEATURE_USABLE_P (cpu_features, XSAVEC))
{
diff --git a/sysdeps/x86/sysdep.h b/sysdeps/x86/sysdep.h
index c3c73e7..b8e963b 100644
--- a/sysdeps/x86/sysdep.h
+++ b/sysdeps/x86/sysdep.h
@@ -183,6 +183,29 @@
#define atom_text_section .section ".text.atom", "ax"
+#ifndef DL_STACK_ALIGNMENT
+/* Due to GCC bug:
+
+ https://gcc.gnu.org/bugzilla/show_bug.cgi?id=58066
+
+ __tls_get_addr may be called with 8-byte/4-byte stack alignment.
+ Although this bug has been fixed in GCC 4.9.4, 5.3 and 6, we can't
+ assume that stack will be always aligned at 16 bytes. */
+# ifdef __x86_64__
+# define DL_STACK_ALIGNMENT 8
+# define MINIMUM_ALIGNMENT 16
+# else
+# define DL_STACK_ALIGNMENT 4
+# endif
+#endif
+
+/* True if _dl_runtime_resolve/_dl_tlsdesc_dynamic should align stack for
+ STATE_SAVE or align stack to MINIMUM_ALIGNMENT bytes before calling
+ _dl_fixup/__tls_get_addr. */
+#define DL_RUNTIME_RESOLVE_REALIGN_STACK \
+ (STATE_SAVE_ALIGNMENT > DL_STACK_ALIGNMENT \
+ || MINIMUM_ALIGNMENT > DL_STACK_ALIGNMENT)
+
#endif /* __ASSEMBLER__ */
#endif /* _X86_SYSDEP_H */
diff --git a/sysdeps/x86/tst-gnu2-tls2-x86-noxsave.c b/sysdeps/x86/tst-gnu2-tls2-x86-noxsave.c
index f0024c1..963c4f3 100644
--- a/sysdeps/x86/tst-gnu2-tls2-x86-noxsave.c
+++ b/sysdeps/x86/tst-gnu2-tls2-x86-noxsave.c
@@ -1 +1 @@
-#include <elf/tst-gnu2-tls2.c>
+#include <tst-gnu2-tls2.c>
diff --git a/sysdeps/x86/tst-gnu2-tls2-x86-noxsavec.c b/sysdeps/x86/tst-gnu2-tls2-x86-noxsavec.c
index f0024c1..963c4f3 100644
--- a/sysdeps/x86/tst-gnu2-tls2-x86-noxsavec.c
+++ b/sysdeps/x86/tst-gnu2-tls2-x86-noxsavec.c
@@ -1 +1 @@
-#include <elf/tst-gnu2-tls2.c>
+#include <tst-gnu2-tls2.c>
diff --git a/sysdeps/x86/tst-gnu2-tls2-x86-noxsavexsavec.c b/sysdeps/x86/tst-gnu2-tls2-x86-noxsavexsavec.c
index f0024c1..963c4f3 100644
--- a/sysdeps/x86/tst-gnu2-tls2-x86-noxsavexsavec.c
+++ b/sysdeps/x86/tst-gnu2-tls2-x86-noxsavexsavec.c
@@ -1 +1 @@
-#include <elf/tst-gnu2-tls2.c>
+#include <tst-gnu2-tls2.c>
diff --git a/sysdeps/x86/tst-gnu2-tls2.c b/sysdeps/x86/tst-gnu2-tls2.c
index de900a4..b3195ff 100644
--- a/sysdeps/x86/tst-gnu2-tls2.c
+++ b/sysdeps/x86/tst-gnu2-tls2.c
@@ -1,20 +1,26 @@
-#ifndef __x86_64__
-#include <sys/platform/x86.h>
+#ifndef TEST_AMX
+# ifndef __x86_64__
+# include <sys/platform/x86.h>
-#define IS_SUPPORTED() CPU_FEATURE_ACTIVE (SSE2)
-#endif
+# define IS_SUPPORTED() CPU_FEATURE_ACTIVE (SSE2)
+# endif
-/* Clear XMM0...XMM7 */
-#define PREPARE_MALLOC() \
-{ \
- asm volatile ("xorps %%xmm0, %%xmm0" : : : "xmm0" ); \
- asm volatile ("xorps %%xmm1, %%xmm1" : : : "xmm1" ); \
- asm volatile ("xorps %%xmm2, %%xmm2" : : : "xmm2" ); \
- asm volatile ("xorps %%xmm3, %%xmm3" : : : "xmm3" ); \
- asm volatile ("xorps %%xmm4, %%xmm4" : : : "xmm4" ); \
- asm volatile ("xorps %%xmm5, %%xmm5" : : : "xmm5" ); \
- asm volatile ("xorps %%xmm6, %%xmm6" : : : "xmm6" ); \
- asm volatile ("xorps %%xmm7, %%xmm7" : : : "xmm7" ); \
+/* Set XMM0...XMM7 to all 1s. */
+# define PREPARE_MALLOC() \
+{ \
+ asm volatile ("pcmpeqd %%xmm0, %%xmm0" : : : "xmm0" ); \
+ asm volatile ("pcmpeqd %%xmm1, %%xmm1" : : : "xmm1" ); \
+ asm volatile ("pcmpeqd %%xmm2, %%xmm2" : : : "xmm2" ); \
+ asm volatile ("pcmpeqd %%xmm3, %%xmm3" : : : "xmm3" ); \
+ asm volatile ("pcmpeqd %%xmm4, %%xmm4" : : : "xmm4" ); \
+ asm volatile ("pcmpeqd %%xmm5, %%xmm5" : : : "xmm5" ); \
+ asm volatile ("pcmpeqd %%xmm6, %%xmm6" : : : "xmm6" ); \
+ asm volatile ("pcmpeqd %%xmm7, %%xmm7" : : : "xmm7" ); \
}
+#endif
#include <elf/tst-gnu2-tls2.c>
+
+#ifndef TEST_AMX
+v2di v1, v2, v3;
+#endif
diff --git a/sysdeps/x86/tst-gnu2-tls2.h b/sysdeps/x86/tst-gnu2-tls2.h
new file mode 100644
index 0000000..fdbb565
--- /dev/null
+++ b/sysdeps/x86/tst-gnu2-tls2.h
@@ -0,0 +1,37 @@
+/* Test TLSDESC relocation, x86 version.
+ Copyright (C) 2025 Free Software Foundation, Inc.
+ This file is part of the GNU C Library.
+
+ The GNU C Library is free software; you can redistribute it and/or
+ modify it under the terms of the GNU Lesser General Public
+ License as published by the Free Software Foundation; either
+ version 2.1 of the License, or (at your option) any later version.
+
+ The GNU C Library is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ Lesser General Public License for more details.
+
+ You should have received a copy of the GNU Lesser General Public
+ License along with the GNU C Library; if not, see
+ <https://www.gnu.org/licenses/>. */
+
+#ifndef TEST_AMX
+# include <support/check.h>
+
+typedef long long v2di __attribute__((vector_size(16)));
+extern v2di v1, v2, v3;
+
+# define BEFORE_TLSDESC_CALL() \
+ v1 = __extension__(v2di){0, 0}; \
+ v2 = __extension__(v2di){0, 0};
+
+# define AFTER_TLSDESC_CALL() \
+ v3 = __extension__(v2di){0, 0}; \
+ asm volatile ("" : "+x" (v3)); \
+ union { v2di x; long long a[2]; } u; \
+ u.x = v3; \
+ TEST_VERIFY_EXIT (u.a[0] == 0 && u.a[1] == 0);
+#endif
+
+#include <elf/tst-gnu2-tls2.h>
diff --git a/sysdeps/x86/tst-tls23.c b/sysdeps/x86/tst-tls23.c
new file mode 100644
index 0000000..6130d91
--- /dev/null
+++ b/sysdeps/x86/tst-tls23.c
@@ -0,0 +1,22 @@
+#ifndef __x86_64__
+#include <sys/platform/x86.h>
+
+#define IS_SUPPORTED() CPU_FEATURE_ACTIVE (SSE2)
+#endif
+
+/* Set XMM0...XMM7 to all 1s. */
+#define PREPARE_MALLOC() \
+{ \
+ asm volatile ("pcmpeqd %%xmm0, %%xmm0" : : : "xmm0" ); \
+ asm volatile ("pcmpeqd %%xmm1, %%xmm1" : : : "xmm1" ); \
+ asm volatile ("pcmpeqd %%xmm2, %%xmm2" : : : "xmm2" ); \
+ asm volatile ("pcmpeqd %%xmm3, %%xmm3" : : : "xmm3" ); \
+ asm volatile ("pcmpeqd %%xmm4, %%xmm4" : : : "xmm4" ); \
+ asm volatile ("pcmpeqd %%xmm5, %%xmm5" : : : "xmm5" ); \
+ asm volatile ("pcmpeqd %%xmm6, %%xmm6" : : : "xmm6" ); \
+ asm volatile ("pcmpeqd %%xmm7, %%xmm7" : : : "xmm7" ); \
+}
+
+#include <elf/tst-tls23.c>
+
+v2di v1, v2, v3;
diff --git a/sysdeps/x86/tst-tls23.h b/sysdeps/x86/tst-tls23.h
new file mode 100644
index 0000000..21cee4c
--- /dev/null
+++ b/sysdeps/x86/tst-tls23.h
@@ -0,0 +1,35 @@
+/* Test that __tls_get_addr preserves XMM registers.
+ Copyright (C) 2025 Free Software Foundation, Inc.
+ This file is part of the GNU C Library.
+
+ The GNU C Library is free software; you can redistribute it and/or
+ modify it under the terms of the GNU Lesser General Public
+ License as published by the Free Software Foundation; either
+ version 2.1 of the License, or (at your option) any later version.
+
+ The GNU C Library is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ Lesser General Public License for more details.
+
+ You should have received a copy of the GNU Lesser General Public
+ License along with the GNU C Library; if not, see
+ <https://www.gnu.org/licenses/>. */
+
+#include <support/check.h>
+
+typedef long long v2di __attribute__((vector_size(16)));
+extern v2di v1, v2, v3;
+
+#define BEFORE_TLS_CALL() \
+ v1 = __extension__(v2di){0, 0}; \
+ v2 = __extension__(v2di){0, 0};
+
+#define AFTER_TLS_CALL() \
+ v3 = __extension__(v2di){0, 0}; \
+ asm volatile ("" : "+x" (v3)); \
+ union { v2di x; long long a[2]; } u; \
+ u.x = v3; \
+ TEST_VERIFY_EXIT (u.a[0] == 0 && u.a[1] == 0);
+
+#include <elf/tst-tls23.h>