aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorH.J. Lu <hjl.tools@gmail.com>2022-06-27 11:36:28 -0700
committerH.J. Lu <hjl.tools@gmail.com>2022-06-27 14:17:52 -0700
commitcfdc4df66ce1464611e1b508f7a5a8f38afd5337 (patch)
treef7a9aa5d51998f088220c91402a4124059eeb9b9
parentf56c497d2b640577f0a8a41f04d4f2c25a8800bd (diff)
downloadglibc-cfdc4df66ce1464611e1b508f7a5a8f38afd5337.zip
glibc-cfdc4df66ce1464611e1b508f7a5a8f38afd5337.tar.gz
glibc-cfdc4df66ce1464611e1b508f7a5a8f38afd5337.tar.bz2
x86-64: Only define used SSE/AVX/AVX512 run-time resolvers
When glibc is built with x86-64 ISA level v3, SSE run-time resolvers aren't used. For x86-64 ISA level v4 build, both SSE and AVX resolvers are unused. Check the minimum x86-64 ISA level to exclude the unused run-time resolvers.
-rw-r--r--sysdeps/x86/isa-level.h2
-rw-r--r--sysdeps/x86_64/dl-machine.h12
-rw-r--r--sysdeps/x86_64/dl-trampoline.S59
3 files changed, 42 insertions, 31 deletions
diff --git a/sysdeps/x86/isa-level.h b/sysdeps/x86/isa-level.h
index c6156e7..f293aea 100644
--- a/sysdeps/x86/isa-level.h
+++ b/sysdeps/x86/isa-level.h
@@ -68,10 +68,12 @@
compile-time constant.. */
/* ISA level >= 4 guaranteed includes. */
+#define AVX512F_X86_ISA_LEVEL 4
#define AVX512VL_X86_ISA_LEVEL 4
#define AVX512BW_X86_ISA_LEVEL 4
/* ISA level >= 3 guaranteed includes. */
+#define AVX_X86_ISA_LEVEL 3
#define AVX2_X86_ISA_LEVEL 3
#define BMI2_X86_ISA_LEVEL 3
diff --git a/sysdeps/x86_64/dl-machine.h b/sysdeps/x86_64/dl-machine.h
index 3476632..005d089 100644
--- a/sysdeps/x86_64/dl-machine.h
+++ b/sysdeps/x86_64/dl-machine.h
@@ -28,6 +28,7 @@
#include <dl-tlsdesc.h>
#include <dl-static-tls.h>
#include <dl-machine-rel.h>
+#include <isa-level.h>
/* Return nonzero iff ELF header is compatible with the running host. */
static inline int __attribute__ ((unused))
@@ -86,6 +87,8 @@ elf_machine_runtime_setup (struct link_map *l, struct r_scope_elem *scope[],
/* Identify this shared object. */
*(ElfW(Addr) *) (got + 1) = (ElfW(Addr)) l;
+ const struct cpu_features* cpu_features = __get_cpu_features ();
+
/* The got[2] entry contains the address of a function which gets
called to get the address of a so far unresolved function and
jump to it. The profiling extension of the dynamic linker allows
@@ -94,9 +97,9 @@ elf_machine_runtime_setup (struct link_map *l, struct r_scope_elem *scope[],
end in this function. */
if (__glibc_unlikely (profile))
{
- if (CPU_FEATURE_USABLE (AVX512F))
+ if (X86_ISA_CPU_FEATURE_USABLE_P (cpu_features, AVX512F))
*(ElfW(Addr) *) (got + 2) = (ElfW(Addr)) &_dl_runtime_profile_avx512;
- else if (CPU_FEATURE_USABLE (AVX))
+ else if (X86_ISA_CPU_FEATURE_USABLE_P (cpu_features, AVX))
*(ElfW(Addr) *) (got + 2) = (ElfW(Addr)) &_dl_runtime_profile_avx;
else
*(ElfW(Addr) *) (got + 2) = (ElfW(Addr)) &_dl_runtime_profile_sse;
@@ -112,9 +115,10 @@ elf_machine_runtime_setup (struct link_map *l, struct r_scope_elem *scope[],
/* This function will get called to fix up the GOT entry
indicated by the offset on the stack, and then jump to
the resolved address. */
- if (GLRO(dl_x86_cpu_features).xsave_state_size != 0)
+ if (MINIMUM_X86_ISA_LEVEL >= AVX_X86_ISA_LEVEL
+ || GLRO(dl_x86_cpu_features).xsave_state_size != 0)
*(ElfW(Addr) *) (got + 2)
- = (CPU_FEATURE_USABLE (XSAVEC)
+ = (CPU_FEATURE_USABLE_P (cpu_features, XSAVEC)
? (ElfW(Addr)) &_dl_runtime_resolve_xsavec
: (ElfW(Addr)) &_dl_runtime_resolve_xsave);
else
diff --git a/sysdeps/x86_64/dl-trampoline.S b/sysdeps/x86_64/dl-trampoline.S
index 831a654..f669805 100644
--- a/sysdeps/x86_64/dl-trampoline.S
+++ b/sysdeps/x86_64/dl-trampoline.S
@@ -20,6 +20,7 @@
#include <sysdep.h>
#include <cpu-features-offsets.h>
#include <link-defines.h>
+#include <isa-level.h>
#ifndef DL_STACK_ALIGNMENT
/* Due to GCC bug:
@@ -62,35 +63,39 @@
#undef VMOVA
#undef VEC_SIZE
-#define VEC_SIZE 32
-#define VMOVA vmovdqa
-#define VEC(i) ymm##i
-#define _dl_runtime_profile _dl_runtime_profile_avx
-#include "dl-trampoline.h"
-#undef _dl_runtime_profile
-#undef VEC
-#undef VMOVA
-#undef VEC_SIZE
+#if MINIMUM_X86_ISA_LEVEL <= AVX_X86_ISA_LEVEL
+# define VEC_SIZE 32
+# define VMOVA vmovdqa
+# define VEC(i) ymm##i
+# define _dl_runtime_profile _dl_runtime_profile_avx
+# include "dl-trampoline.h"
+# undef _dl_runtime_profile
+# undef VEC
+# undef VMOVA
+# undef VEC_SIZE
+#endif
+#if MINIMUM_X86_ISA_LEVEL < AVX_X86_ISA_LEVEL
/* movaps/movups is 1-byte shorter. */
-#define VEC_SIZE 16
-#define VMOVA movaps
-#define VEC(i) xmm##i
-#define _dl_runtime_profile _dl_runtime_profile_sse
-#undef RESTORE_AVX
-#include "dl-trampoline.h"
-#undef _dl_runtime_profile
-#undef VEC
-#undef VMOVA
-#undef VEC_SIZE
-
-#define USE_FXSAVE
-#define STATE_SAVE_ALIGNMENT 16
-#define _dl_runtime_resolve _dl_runtime_resolve_fxsave
-#include "dl-trampoline.h"
-#undef _dl_runtime_resolve
-#undef USE_FXSAVE
-#undef STATE_SAVE_ALIGNMENT
+# define VEC_SIZE 16
+# define VMOVA movaps
+# define VEC(i) xmm##i
+# define _dl_runtime_profile _dl_runtime_profile_sse
+# undef RESTORE_AVX
+# include "dl-trampoline.h"
+# undef _dl_runtime_profile
+# undef VEC
+# undef VMOVA
+# undef VEC_SIZE
+
+# define USE_FXSAVE
+# define STATE_SAVE_ALIGNMENT 16
+# define _dl_runtime_resolve _dl_runtime_resolve_fxsave
+# include "dl-trampoline.h"
+# undef _dl_runtime_resolve
+# undef USE_FXSAVE
+# undef STATE_SAVE_ALIGNMENT
+#endif
#define USE_XSAVE
#define STATE_SAVE_ALIGNMENT 64