diff options
author | H.J. Lu <hjl.tools@gmail.com> | 2024-02-28 12:08:03 -0800 |
---|---|---|
committer | H.J. Lu <hjl.tools@gmail.com> | 2024-02-29 04:30:01 -0800 |
commit | 9b7091415af47082664717210ac49d51551456ab (patch) | |
tree | b2754fd0f298085481c5ebcac55721a4eb09ef53 /sysdeps/x86 | |
parent | a1735e0aa858f0c8b15e5ee9975bff4279423680 (diff) | |
download | glibc-9b7091415af47082664717210ac49d51551456ab.zip glibc-9b7091415af47082664717210ac49d51551456ab.tar.gz glibc-9b7091415af47082664717210ac49d51551456ab.tar.bz2 |
x86-64: Update _dl_tlsdesc_dynamic to preserve AMX registers
_dl_tlsdesc_dynamic should also preserve AMX registers which are
caller-saved. Add X86_XSTATE_TILECFG_ID and X86_XSTATE_TILEDATA_ID
to x86-64 TLSDESC_CALL_STATE_SAVE_MASK. Compute the AMX state size
and save it in xsave_state_full_size which is only used by
_dl_tlsdesc_dynamic_xsave and _dl_tlsdesc_dynamic_xsavec. This fixes
the AMX part of BZ #31372. Tested on AMX processor.
AMX test is enabled only for compilers with the fix for
https://gcc.gnu.org/bugzilla/show_bug.cgi?id=114098
GCC 14 and GCC 11/12/13 branches have the bug fix.
Reviewed-by: Sunil K Pandey <skpgkp2@gmail.com>
Diffstat (limited to 'sysdeps/x86')
-rw-r--r-- | sysdeps/x86/cpu-features-offsets.sym | 1 | ||||
-rw-r--r-- | sysdeps/x86/cpu-features.c | 55 | ||||
-rw-r--r-- | sysdeps/x86/include/cpu-features.h | 2 | ||||
-rw-r--r-- | sysdeps/x86/sysdep.h | 18 |
4 files changed, 71 insertions, 5 deletions
diff --git a/sysdeps/x86/cpu-features-offsets.sym b/sysdeps/x86/cpu-features-offsets.sym index 6a8fd29..21fc88d 100644 --- a/sysdeps/x86/cpu-features-offsets.sym +++ b/sysdeps/x86/cpu-features-offsets.sym @@ -3,3 +3,4 @@ #include <ldsodefs.h> XSAVE_STATE_SIZE_OFFSET offsetof (struct cpu_features, xsave_state_size) +XSAVE_STATE_FULL_SIZE_OFFSET offsetof (struct cpu_features, xsave_state_full_size) diff --git a/sysdeps/x86/cpu-features.c b/sysdeps/x86/cpu-features.c index 0ad0a78..e7c7ece 100644 --- a/sysdeps/x86/cpu-features.c +++ b/sysdeps/x86/cpu-features.c @@ -308,6 +308,8 @@ update_active (struct cpu_features *cpu_features) __cpuid_count (0xd, 0, eax, ebx, ecx, edx); if (ebx != 0) { + /* NB: On AMX capable processors, ebx always includes AMX + states. */ unsigned int xsave_state_full_size = ALIGN_UP (ebx + STATE_SAVE_OFFSET, 64); @@ -321,6 +323,11 @@ update_active (struct cpu_features *cpu_features) { unsigned int xstate_comp_offsets[32]; unsigned int xstate_comp_sizes[32]; +#ifdef __x86_64__ + unsigned int xstate_amx_comp_offsets[32]; + unsigned int xstate_amx_comp_sizes[32]; + unsigned int amx_ecx; +#endif unsigned int i; xstate_comp_offsets[0] = 0; @@ -328,16 +335,39 @@ update_active (struct cpu_features *cpu_features) xstate_comp_offsets[2] = 576; xstate_comp_sizes[0] = 160; xstate_comp_sizes[1] = 256; +#ifdef __x86_64__ + xstate_amx_comp_offsets[0] = 0; + xstate_amx_comp_offsets[1] = 160; + xstate_amx_comp_offsets[2] = 576; + xstate_amx_comp_sizes[0] = 160; + xstate_amx_comp_sizes[1] = 256; +#endif for (i = 2; i < 32; i++) { - if ((STATE_SAVE_MASK & (1 << i)) != 0) + if ((FULL_STATE_SAVE_MASK & (1 << i)) != 0) { __cpuid_count (0xd, i, eax, ebx, ecx, edx); - xstate_comp_sizes[i] = eax; +#ifdef __x86_64__ + /* Include this in xsave_state_full_size. */ + amx_ecx = ecx; + xstate_amx_comp_sizes[i] = eax; + if ((AMX_STATE_SAVE_MASK & (1 << i)) != 0) + { + /* Exclude this from xsave_state_size. */ + ecx = 0; + xstate_comp_sizes[i] = 0; + } + else +#endif + xstate_comp_sizes[i] = eax; } else { +#ifdef __x86_64__ + amx_ecx = 0; + xstate_amx_comp_sizes[i] = 0; +#endif ecx = 0; xstate_comp_sizes[i] = 0; } @@ -350,6 +380,15 @@ update_active (struct cpu_features *cpu_features) if ((ecx & (1 << 1)) != 0) xstate_comp_offsets[i] = ALIGN_UP (xstate_comp_offsets[i], 64); +#ifdef __x86_64__ + xstate_amx_comp_offsets[i] + = (xstate_amx_comp_offsets[i - 1] + + xstate_amx_comp_sizes[i - 1]); + if ((amx_ecx & (1 << 1)) != 0) + xstate_amx_comp_offsets[i] + = ALIGN_UP (xstate_amx_comp_offsets[i], + 64); +#endif } } @@ -358,6 +397,18 @@ update_active (struct cpu_features *cpu_features) = xstate_comp_offsets[31] + xstate_comp_sizes[31]; if (size) { +#ifdef __x86_64__ + unsigned int amx_size + = (xstate_amx_comp_offsets[31] + + xstate_amx_comp_sizes[31]); + amx_size = ALIGN_UP (amx_size + STATE_SAVE_OFFSET, + 64); + /* Set xsave_state_full_size to the compact AMX + state size for XSAVEC. NB: xsave_state_full_size + is only used in _dl_tlsdesc_dynamic_xsave and + _dl_tlsdesc_dynamic_xsavec. */ + cpu_features->xsave_state_full_size = amx_size; +#endif cpu_features->xsave_state_size = ALIGN_UP (size + STATE_SAVE_OFFSET, 64); CPU_FEATURE_SET (cpu_features, XSAVEC); diff --git a/sysdeps/x86/include/cpu-features.h b/sysdeps/x86/include/cpu-features.h index b9bf311..cd7bd27 100644 --- a/sysdeps/x86/include/cpu-features.h +++ b/sysdeps/x86/include/cpu-features.h @@ -934,6 +934,8 @@ struct cpu_features /* The full state size for XSAVE when XSAVEC is disabled by GLIBC_TUNABLES=glibc.cpu.hwcaps=-XSAVEC + + and the AMX state size when XSAVEC is available. */ unsigned int xsave_state_full_size; /* Data cache size for use in memory and string routines, typically diff --git a/sysdeps/x86/sysdep.h b/sysdeps/x86/sysdep.h index 485cad9..db8e576 100644 --- a/sysdeps/x86/sysdep.h +++ b/sysdeps/x86/sysdep.h @@ -56,6 +56,14 @@ | (1 << X86_XSTATE_ZMM_H_ID) \ | (1 << X86_XSTATE_ZMM_ID) \ | (1 << X86_XSTATE_APX_F_ID)) + +/* AMX state mask. */ +# define AMX_STATE_SAVE_MASK \ + ((1 << X86_XSTATE_TILECFG_ID) | (1 << X86_XSTATE_TILEDATA_ID)) + +/* States to be included in xsave_state_full_size. */ +# define FULL_STATE_SAVE_MASK \ + (STATE_SAVE_MASK | AMX_STATE_SAVE_MASK) #else /* Offset for fxsave/xsave area used by _dl_tlsdesc_dynamic. Since i386 doesn't have red-zone, use 0 here. */ @@ -68,13 +76,17 @@ | (1 << X86_XSTATE_BNDREGS_ID) \ | (1 << X86_XSTATE_K_ID) \ | (1 << X86_XSTATE_ZMM_H_ID)) + +/* States to be included in xsave_state_size. */ +# define FULL_STATE_SAVE_MASK STATE_SAVE_MASK #endif /* States which should be saved for TLSDESC_CALL and TLS_DESC_CALL. - Compiler assumes that all registers, including x87 FPU stack registers, - are unchanged after CALL, except for EFLAGS and RAX/EAX. */ + Compiler assumes that all registers, including AMX and x87 FPU + stack registers, are unchanged after CALL, except for EFLAGS and + RAX/EAX. */ #define TLSDESC_CALL_STATE_SAVE_MASK \ - (STATE_SAVE_MASK | (1 << X86_XSTATE_X87_ID)) + (FULL_STATE_SAVE_MASK | (1 << X86_XSTATE_X87_ID)) /* Constants for bits in __x86_string_control: */ |