aboutsummaryrefslogtreecommitdiff
path: root/sysdeps/x86
diff options
context:
space:
mode:
authorH.J. Lu <hjl.tools@gmail.com>2024-02-28 12:08:03 -0800
committerH.J. Lu <hjl.tools@gmail.com>2024-02-29 04:30:01 -0800
commit9b7091415af47082664717210ac49d51551456ab (patch)
treeb2754fd0f298085481c5ebcac55721a4eb09ef53 /sysdeps/x86
parenta1735e0aa858f0c8b15e5ee9975bff4279423680 (diff)
downloadglibc-9b7091415af47082664717210ac49d51551456ab.zip
glibc-9b7091415af47082664717210ac49d51551456ab.tar.gz
glibc-9b7091415af47082664717210ac49d51551456ab.tar.bz2
x86-64: Update _dl_tlsdesc_dynamic to preserve AMX registers
_dl_tlsdesc_dynamic should also preserve AMX registers which are caller-saved. Add X86_XSTATE_TILECFG_ID and X86_XSTATE_TILEDATA_ID to x86-64 TLSDESC_CALL_STATE_SAVE_MASK. Compute the AMX state size and save it in xsave_state_full_size which is only used by _dl_tlsdesc_dynamic_xsave and _dl_tlsdesc_dynamic_xsavec. This fixes the AMX part of BZ #31372. Tested on AMX processor. AMX test is enabled only for compilers with the fix for https://gcc.gnu.org/bugzilla/show_bug.cgi?id=114098 GCC 14 and GCC 11/12/13 branches have the bug fix. Reviewed-by: Sunil K Pandey <skpgkp2@gmail.com>
Diffstat (limited to 'sysdeps/x86')
-rw-r--r--sysdeps/x86/cpu-features-offsets.sym1
-rw-r--r--sysdeps/x86/cpu-features.c55
-rw-r--r--sysdeps/x86/include/cpu-features.h2
-rw-r--r--sysdeps/x86/sysdep.h18
4 files changed, 71 insertions, 5 deletions
diff --git a/sysdeps/x86/cpu-features-offsets.sym b/sysdeps/x86/cpu-features-offsets.sym
index 6a8fd29..21fc88d 100644
--- a/sysdeps/x86/cpu-features-offsets.sym
+++ b/sysdeps/x86/cpu-features-offsets.sym
@@ -3,3 +3,4 @@
#include <ldsodefs.h>
XSAVE_STATE_SIZE_OFFSET offsetof (struct cpu_features, xsave_state_size)
+XSAVE_STATE_FULL_SIZE_OFFSET offsetof (struct cpu_features, xsave_state_full_size)
diff --git a/sysdeps/x86/cpu-features.c b/sysdeps/x86/cpu-features.c
index 0ad0a78..e7c7ece 100644
--- a/sysdeps/x86/cpu-features.c
+++ b/sysdeps/x86/cpu-features.c
@@ -308,6 +308,8 @@ update_active (struct cpu_features *cpu_features)
__cpuid_count (0xd, 0, eax, ebx, ecx, edx);
if (ebx != 0)
{
+ /* NB: On AMX capable processors, ebx always includes AMX
+ states. */
unsigned int xsave_state_full_size
= ALIGN_UP (ebx + STATE_SAVE_OFFSET, 64);
@@ -321,6 +323,11 @@ update_active (struct cpu_features *cpu_features)
{
unsigned int xstate_comp_offsets[32];
unsigned int xstate_comp_sizes[32];
+#ifdef __x86_64__
+ unsigned int xstate_amx_comp_offsets[32];
+ unsigned int xstate_amx_comp_sizes[32];
+ unsigned int amx_ecx;
+#endif
unsigned int i;
xstate_comp_offsets[0] = 0;
@@ -328,16 +335,39 @@ update_active (struct cpu_features *cpu_features)
xstate_comp_offsets[2] = 576;
xstate_comp_sizes[0] = 160;
xstate_comp_sizes[1] = 256;
+#ifdef __x86_64__
+ xstate_amx_comp_offsets[0] = 0;
+ xstate_amx_comp_offsets[1] = 160;
+ xstate_amx_comp_offsets[2] = 576;
+ xstate_amx_comp_sizes[0] = 160;
+ xstate_amx_comp_sizes[1] = 256;
+#endif
for (i = 2; i < 32; i++)
{
- if ((STATE_SAVE_MASK & (1 << i)) != 0)
+ if ((FULL_STATE_SAVE_MASK & (1 << i)) != 0)
{
__cpuid_count (0xd, i, eax, ebx, ecx, edx);
- xstate_comp_sizes[i] = eax;
+#ifdef __x86_64__
+ /* Include this in xsave_state_full_size. */
+ amx_ecx = ecx;
+ xstate_amx_comp_sizes[i] = eax;
+ if ((AMX_STATE_SAVE_MASK & (1 << i)) != 0)
+ {
+ /* Exclude this from xsave_state_size. */
+ ecx = 0;
+ xstate_comp_sizes[i] = 0;
+ }
+ else
+#endif
+ xstate_comp_sizes[i] = eax;
}
else
{
+#ifdef __x86_64__
+ amx_ecx = 0;
+ xstate_amx_comp_sizes[i] = 0;
+#endif
ecx = 0;
xstate_comp_sizes[i] = 0;
}
@@ -350,6 +380,15 @@ update_active (struct cpu_features *cpu_features)
if ((ecx & (1 << 1)) != 0)
xstate_comp_offsets[i]
= ALIGN_UP (xstate_comp_offsets[i], 64);
+#ifdef __x86_64__
+ xstate_amx_comp_offsets[i]
+ = (xstate_amx_comp_offsets[i - 1]
+ + xstate_amx_comp_sizes[i - 1]);
+ if ((amx_ecx & (1 << 1)) != 0)
+ xstate_amx_comp_offsets[i]
+ = ALIGN_UP (xstate_amx_comp_offsets[i],
+ 64);
+#endif
}
}
@@ -358,6 +397,18 @@ update_active (struct cpu_features *cpu_features)
= xstate_comp_offsets[31] + xstate_comp_sizes[31];
if (size)
{
+#ifdef __x86_64__
+ unsigned int amx_size
+ = (xstate_amx_comp_offsets[31]
+ + xstate_amx_comp_sizes[31]);
+ amx_size = ALIGN_UP (amx_size + STATE_SAVE_OFFSET,
+ 64);
+ /* Set xsave_state_full_size to the compact AMX
+ state size for XSAVEC. NB: xsave_state_full_size
+ is only used in _dl_tlsdesc_dynamic_xsave and
+ _dl_tlsdesc_dynamic_xsavec. */
+ cpu_features->xsave_state_full_size = amx_size;
+#endif
cpu_features->xsave_state_size
= ALIGN_UP (size + STATE_SAVE_OFFSET, 64);
CPU_FEATURE_SET (cpu_features, XSAVEC);
diff --git a/sysdeps/x86/include/cpu-features.h b/sysdeps/x86/include/cpu-features.h
index b9bf311..cd7bd27 100644
--- a/sysdeps/x86/include/cpu-features.h
+++ b/sysdeps/x86/include/cpu-features.h
@@ -934,6 +934,8 @@ struct cpu_features
/* The full state size for XSAVE when XSAVEC is disabled by
GLIBC_TUNABLES=glibc.cpu.hwcaps=-XSAVEC
+
+ and the AMX state size when XSAVEC is available.
*/
unsigned int xsave_state_full_size;
/* Data cache size for use in memory and string routines, typically
diff --git a/sysdeps/x86/sysdep.h b/sysdeps/x86/sysdep.h
index 485cad9..db8e576 100644
--- a/sysdeps/x86/sysdep.h
+++ b/sysdeps/x86/sysdep.h
@@ -56,6 +56,14 @@
| (1 << X86_XSTATE_ZMM_H_ID) \
| (1 << X86_XSTATE_ZMM_ID) \
| (1 << X86_XSTATE_APX_F_ID))
+
+/* AMX state mask. */
+# define AMX_STATE_SAVE_MASK \
+ ((1 << X86_XSTATE_TILECFG_ID) | (1 << X86_XSTATE_TILEDATA_ID))
+
+/* States to be included in xsave_state_full_size. */
+# define FULL_STATE_SAVE_MASK \
+ (STATE_SAVE_MASK | AMX_STATE_SAVE_MASK)
#else
/* Offset for fxsave/xsave area used by _dl_tlsdesc_dynamic. Since i386
doesn't have red-zone, use 0 here. */
@@ -68,13 +76,17 @@
| (1 << X86_XSTATE_BNDREGS_ID) \
| (1 << X86_XSTATE_K_ID) \
| (1 << X86_XSTATE_ZMM_H_ID))
+
+/* States to be included in xsave_state_size. */
+# define FULL_STATE_SAVE_MASK STATE_SAVE_MASK
#endif
/* States which should be saved for TLSDESC_CALL and TLS_DESC_CALL.
- Compiler assumes that all registers, including x87 FPU stack registers,
- are unchanged after CALL, except for EFLAGS and RAX/EAX. */
+ Compiler assumes that all registers, including AMX and x87 FPU
+ stack registers, are unchanged after CALL, except for EFLAGS and
+ RAX/EAX. */
#define TLSDESC_CALL_STATE_SAVE_MASK \
- (STATE_SAVE_MASK | (1 << X86_XSTATE_X87_ID))
+ (FULL_STATE_SAVE_MASK | (1 << X86_XSTATE_X87_ID))
/* Constants for bits in __x86_string_control: */