diff options
author | H.J. Lu <hjl.tools@gmail.com> | 2015-08-13 03:37:47 -0700 |
---|---|---|
committer | H.J. Lu <hjl.tools@gmail.com> | 2015-08-13 03:41:22 -0700 |
commit | e2e4f56056adddc3c1efe676b40a4b4f2453103b (patch) | |
tree | c9f54be6f6e8b57c8e58bdfac594aa3927378231 /sysdeps/x86 | |
parent | 63e952d9be87db68f0e4164d4a5760b32e77ebff (diff) | |
download | glibc-e2e4f56056adddc3c1efe676b40a4b4f2453103b.zip glibc-e2e4f56056adddc3c1efe676b40a4b4f2453103b.tar.gz glibc-e2e4f56056adddc3c1efe676b40a4b4f2453103b.tar.bz2 |
Add _dl_x86_cpu_features to rtld_global
This patch adds _dl_x86_cpu_features to rtld_global in x86 ld.so
and initializes it early before __libc_start_main is called so that
cpu_features is always available when it is used and we can avoid
calling __init_cpu_features in IFUNC selectors.
* sysdeps/i386/dl-machine.h: Include <cpu-features.c>.
(dl_platform_init): Call init_cpu_features.
* sysdeps/i386/dl-procinfo.c (_dl_x86_cpu_features): New.
* sysdeps/i386/i686/cacheinfo.c
(DISABLE_PREFERRED_MEMORY_INSTRUCTION): Removed.
* sysdeps/i386/i686/multiarch/Makefile (aux): Remove init-arch.
* sysdeps/i386/i686/multiarch/Versions: Removed.
* sysdeps/i386/i686/multiarch/ifunc-defines.sym (KIND_OFFSET):
Removed.
* sysdeps/i386/ldsodefs.h: Include <cpu-features.h>.
* sysdeps/unix/sysv/linux/x86/Makefile
(libpthread-sysdep_routines): Remove init-arch.
* sysdeps/unix/sysv/linux/x86_64/dl-procinfo.c: Include
<sysdeps/x86_64/dl-procinfo.c> instead of
sysdeps/generic/dl-procinfo.c>.
* sysdeps/x86/Makefile [$(subdir) == csu] (gen-as-const-headers):
Add cpu-features-offsets.sym and rtld-global-offsets.sym.
[$(subdir) == elf] (sysdep-dl-routines): Add dl-get-cpu-features.
[$(subdir) == elf] (tests): Add tst-get-cpu-features.
[$(subdir) == elf] (tests-static): Add
tst-get-cpu-features-static.
* sysdeps/x86/Versions: New file.
* sysdeps/x86/cpu-features-offsets.sym: Likewise.
* sysdeps/x86/cpu-features.c: Likewise.
* sysdeps/x86/cpu-features.h: Likewise.
* sysdeps/x86/dl-get-cpu-features.c: Likewise.
* sysdeps/x86/libc-start.c: Likewise.
* sysdeps/x86/rtld-global-offsets.sym: Likewise.
* sysdeps/x86/tst-get-cpu-features-static.c: Likewise.
* sysdeps/x86/tst-get-cpu-features.c: Likewise.
* sysdeps/x86_64/dl-procinfo.c: Likewise.
* sysdeps/x86_64/cacheinfo.c (__cpuid_count): Removed.
Assume USE_MULTIARCH is defined and don't check it.
(is_intel): Replace __cpu_features with GLRO(dl_x86_cpu_features).
(is_amd): Likewise.
(max_cpuid): Likewise.
(intel_check_word): Likewise.
(__cache_sysconf): Don't call __init_cpu_features.
(__x86_preferred_memory_instruction): Removed.
(init_cacheinfo): Don't call __init_cpu_features. Replace
__cpu_features with GLRO(dl_x86_cpu_features).
* sysdeps/x86_64/dl-machine.h: <cpu-features.c>.
(dl_platform_init): Call init_cpu_features.
* sysdeps/x86_64/ldsodefs.h: Include <cpu-features.h>.
* sysdeps/x86_64/multiarch/Makefile (aux): Remove init-arch.
* sysdeps/x86_64/multiarch/Versions: Removed.
* sysdeps/x86_64/multiarch/cacheinfo.c: Likewise.
* sysdeps/x86_64/multiarch/init-arch.c: Likewise.
* sysdeps/x86_64/multiarch/ifunc-defines.sym (KIND_OFFSET):
Removed.
* sysdeps/x86_64/multiarch/init-arch.h: Rewrite.
Diffstat (limited to 'sysdeps/x86')
-rw-r--r-- | sysdeps/x86/Makefile | 11 | ||||
-rw-r--r-- | sysdeps/x86/Versions | 5 | ||||
-rw-r--r-- | sysdeps/x86/cpu-features-offsets.sym | 7 | ||||
-rw-r--r-- | sysdeps/x86/cpu-features.c | 202 | ||||
-rw-r--r-- | sysdeps/x86/cpu-features.h | 240 | ||||
-rw-r--r-- | sysdeps/x86/dl-get-cpu-features.c | 27 | ||||
-rw-r--r-- | sysdeps/x86/libc-start.c | 41 | ||||
-rw-r--r-- | sysdeps/x86/rtld-global-offsets.sym | 7 | ||||
-rw-r--r-- | sysdeps/x86/tst-get-cpu-features-static.c | 1 | ||||
-rw-r--r-- | sysdeps/x86/tst-get-cpu-features.c | 31 |
10 files changed, 572 insertions, 0 deletions
diff --git a/sysdeps/x86/Makefile b/sysdeps/x86/Makefile index 19f5eca..c262fdf 100644 --- a/sysdeps/x86/Makefile +++ b/sysdeps/x86/Makefile @@ -8,3 +8,14 @@ $(objpfx)tst-ld-sse-use.out: ../sysdeps/x86/tst-ld-sse-use.sh $(objpfx)ld.so $(BASH) $< $(objpfx) '$(NM)' '$(OBJDUMP)' '$(READELF)' > $@; \ $(evaluate-test) endif + +ifeq ($(subdir),csu) +gen-as-const-headers += cpu-features-offsets.sym rtld-global-offsets.sym +endif + +ifeq ($(subdir),elf) +sysdep-dl-routines += dl-get-cpu-features + +tests += tst-get-cpu-features +tests-static += tst-get-cpu-features-static +endif diff --git a/sysdeps/x86/Versions b/sysdeps/x86/Versions new file mode 100644 index 0000000..e029237 --- /dev/null +++ b/sysdeps/x86/Versions @@ -0,0 +1,5 @@ +ld { + GLIBC_PRIVATE { + __get_cpu_features; + } +} diff --git a/sysdeps/x86/cpu-features-offsets.sym b/sysdeps/x86/cpu-features-offsets.sym new file mode 100644 index 0000000..a9d53d1 --- /dev/null +++ b/sysdeps/x86/cpu-features-offsets.sym @@ -0,0 +1,7 @@ +#define SHARED 1 + +#include <ldsodefs.h> + +#define rtld_global_ro_offsetof(mem) offsetof (struct rtld_global_ro, mem) + +RTLD_GLOBAL_RO_DL_X86_CPU_FEATURES_OFFSET rtld_global_ro_offsetof (_dl_x86_cpu_features) diff --git a/sysdeps/x86/cpu-features.c b/sysdeps/x86/cpu-features.c new file mode 100644 index 0000000..587080c --- /dev/null +++ b/sysdeps/x86/cpu-features.c @@ -0,0 +1,202 @@ +/* Initialize CPU feature data. + This file is part of the GNU C Library. + Copyright (C) 2008-2015 Free Software Foundation, Inc. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library; if not, see + <http://www.gnu.org/licenses/>. */ + +#include <cpuid.h> +#include <cpu-features.h> + +static inline void +get_common_indeces (struct cpu_features *cpu_features, + unsigned int *family, unsigned int *model) +{ + unsigned int eax; + __cpuid (1, eax, cpu_features->cpuid[COMMON_CPUID_INDEX_1].ebx, + cpu_features->cpuid[COMMON_CPUID_INDEX_1].ecx, + cpu_features->cpuid[COMMON_CPUID_INDEX_1].edx); + GLRO(dl_x86_cpu_features).cpuid[COMMON_CPUID_INDEX_1].eax = eax; + *family = (eax >> 8) & 0x0f; + *model = (eax >> 4) & 0x0f; +} + +static inline void +init_cpu_features (struct cpu_features *cpu_features) +{ + unsigned int ebx, ecx, edx; + unsigned int family = 0; + unsigned int model = 0; + enum cpu_features_kind kind; + + __cpuid (0, cpu_features->max_cpuid, ebx, ecx, edx); + + /* This spells out "GenuineIntel". */ + if (ebx == 0x756e6547 && ecx == 0x6c65746e && edx == 0x49656e69) + { + kind = arch_kind_intel; + + get_common_indeces (cpu_features, &family, &model); + + unsigned int eax = cpu_features->cpuid[COMMON_CPUID_INDEX_1].eax; + unsigned int extended_family = (eax >> 20) & 0xff; + unsigned int extended_model = (eax >> 12) & 0xf0; + if (family == 0x0f) + { + family += extended_family; + model += extended_model; + } + else if (family == 0x06) + { + ecx = cpu_features->cpuid[COMMON_CPUID_INDEX_1].ecx; + model += extended_model; + switch (model) + { + case 0x1c: + case 0x26: + /* BSF is slow on Atom. */ + cpu_features->feature[index_Slow_BSF] |= bit_Slow_BSF; + break; + + case 0x37: + case 0x4a: + case 0x4d: + case 0x5a: + case 0x5d: + /* Unaligned load versions are faster than SSSE3 + on Silvermont. */ +#if index_Fast_Unaligned_Load != index_Prefer_PMINUB_for_stringop +# error index_Fast_Unaligned_Load != index_Prefer_PMINUB_for_stringop +#endif +#if index_Fast_Unaligned_Load != index_Slow_SSE4_2 +# error index_Fast_Unaligned_Load != index_Slow_SSE4_2 +#endif + cpu_features->feature[index_Fast_Unaligned_Load] + |= (bit_Fast_Unaligned_Load + | bit_Prefer_PMINUB_for_stringop + | bit_Slow_SSE4_2); + break; + + default: + /* Unknown family 0x06 processors. Assuming this is one + of Core i3/i5/i7 processors if AVX is available. */ + if ((ecx & bit_AVX) == 0) + break; + + case 0x1a: + case 0x1e: + case 0x1f: + case 0x25: + case 0x2c: + case 0x2e: + case 0x2f: + /* Rep string instructions, copy backward, unaligned loads + and pminub are fast on Intel Core i3, i5 and i7. */ +#if index_Fast_Rep_String != index_Fast_Copy_Backward +# error index_Fast_Rep_String != index_Fast_Copy_Backward +#endif +#if index_Fast_Rep_String != index_Fast_Unaligned_Load +# error index_Fast_Rep_String != index_Fast_Unaligned_Load +#endif +#if index_Fast_Rep_String != index_Prefer_PMINUB_for_stringop +# error index_Fast_Rep_String != index_Prefer_PMINUB_for_stringop +#endif + cpu_features->feature[index_Fast_Rep_String] + |= (bit_Fast_Rep_String + | bit_Fast_Copy_Backward + | bit_Fast_Unaligned_Load + | bit_Prefer_PMINUB_for_stringop); + break; + } + } + } + /* This spells out "AuthenticAMD". */ + else if (ebx == 0x68747541 && ecx == 0x444d4163 && edx == 0x69746e65) + { + kind = arch_kind_amd; + + get_common_indeces (cpu_features, &family, &model); + + ecx = cpu_features->cpuid[COMMON_CPUID_INDEX_1].ecx; + + unsigned int eax; + __cpuid (0x80000000, eax, ebx, ecx, edx); + if (eax >= 0x80000001) + __cpuid (0x80000001, + cpu_features->cpuid[COMMON_CPUID_INDEX_80000001].eax, + cpu_features->cpuid[COMMON_CPUID_INDEX_80000001].ebx, + cpu_features->cpuid[COMMON_CPUID_INDEX_80000001].ecx, + cpu_features->cpuid[COMMON_CPUID_INDEX_80000001].edx); + } + else + kind = arch_kind_other; + + if (cpu_features->max_cpuid >= 7) + __cpuid_count (7, 0, + cpu_features->cpuid[COMMON_CPUID_INDEX_7].eax, + cpu_features->cpuid[COMMON_CPUID_INDEX_7].ebx, + cpu_features->cpuid[COMMON_CPUID_INDEX_7].ecx, + cpu_features->cpuid[COMMON_CPUID_INDEX_7].edx); + + /* Can we call xgetbv? */ + if (HAS_CPU_FEATURE (OSXSAVE)) + { + unsigned int xcrlow; + unsigned int xcrhigh; + asm ("xgetbv" : "=a" (xcrlow), "=d" (xcrhigh) : "c" (0)); + /* Is YMM and XMM state usable? */ + if ((xcrlow & (bit_YMM_state | bit_XMM_state)) == + (bit_YMM_state | bit_XMM_state)) + { + /* Determine if AVX is usable. */ + if (HAS_CPU_FEATURE (AVX)) + cpu_features->feature[index_AVX_Usable] |= bit_AVX_Usable; +#if index_AVX2_Usable != index_AVX_Fast_Unaligned_Load +# error index_AVX2_Usable != index_AVX_Fast_Unaligned_Load +#endif + /* Determine if AVX2 is usable. Unaligned load with 256-bit + AVX registers are faster on processors with AVX2. */ + if (HAS_CPU_FEATURE (AVX2)) + cpu_features->feature[index_AVX2_Usable] + |= bit_AVX2_Usable | bit_AVX_Fast_Unaligned_Load; + /* Check if OPMASK state, upper 256-bit of ZMM0-ZMM15 and + ZMM16-ZMM31 state are enabled. */ + if ((xcrlow & (bit_Opmask_state | bit_ZMM0_15_state + | bit_ZMM16_31_state)) == + (bit_Opmask_state | bit_ZMM0_15_state | bit_ZMM16_31_state)) + { + /* Determine if AVX512F is usable. */ + if (HAS_CPU_FEATURE (AVX512F)) + { + cpu_features->feature[index_AVX512F_Usable] + |= bit_AVX512F_Usable; + /* Determine if AVX512DQ is usable. */ + if (HAS_CPU_FEATURE (AVX512DQ)) + cpu_features->feature[index_AVX512DQ_Usable] + |= bit_AVX512DQ_Usable; + } + } + /* Determine if FMA is usable. */ + if (HAS_CPU_FEATURE (FMA)) + cpu_features->feature[index_FMA_Usable] |= bit_FMA_Usable; + /* Determine if FMA4 is usable. */ + if (HAS_CPU_FEATURE (FMA4)) + cpu_features->feature[index_FMA4_Usable] |= bit_FMA4_Usable; + } + } + + cpu_features->family = family; + cpu_features->model = model; + cpu_features->kind = kind; +} diff --git a/sysdeps/x86/cpu-features.h b/sysdeps/x86/cpu-features.h new file mode 100644 index 0000000..22e5abb --- /dev/null +++ b/sysdeps/x86/cpu-features.h @@ -0,0 +1,240 @@ +/* This file is part of the GNU C Library. + Copyright (C) 2008-2015 Free Software Foundation, Inc. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library; if not, see + <http://www.gnu.org/licenses/>. */ + +#ifndef cpu_features_h +#define cpu_features_h + +#define bit_Fast_Rep_String (1 << 0) +#define bit_Fast_Copy_Backward (1 << 1) +#define bit_Slow_BSF (1 << 2) +#define bit_Fast_Unaligned_Load (1 << 4) +#define bit_Prefer_PMINUB_for_stringop (1 << 5) +#define bit_AVX_Usable (1 << 6) +#define bit_FMA_Usable (1 << 7) +#define bit_FMA4_Usable (1 << 8) +#define bit_Slow_SSE4_2 (1 << 9) +#define bit_AVX2_Usable (1 << 10) +#define bit_AVX_Fast_Unaligned_Load (1 << 11) +#define bit_AVX512F_Usable (1 << 12) +#define bit_AVX512DQ_Usable (1 << 13) + +/* CPUID Feature flags. */ + +/* COMMON_CPUID_INDEX_1. */ +#define bit_SSE2 (1 << 26) +#define bit_SSSE3 (1 << 9) +#define bit_SSE4_1 (1 << 19) +#define bit_SSE4_2 (1 << 20) +#define bit_OSXSAVE (1 << 27) +#define bit_AVX (1 << 28) +#define bit_POPCOUNT (1 << 23) +#define bit_FMA (1 << 12) +#define bit_FMA4 (1 << 16) + +/* COMMON_CPUID_INDEX_7. */ +#define bit_RTM (1 << 11) +#define bit_AVX2 (1 << 5) +#define bit_AVX512F (1 << 16) +#define bit_AVX512DQ (1 << 17) + +/* XCR0 Feature flags. */ +#define bit_XMM_state (1 << 1) +#define bit_YMM_state (2 << 1) +#define bit_Opmask_state (1 << 5) +#define bit_ZMM0_15_state (1 << 6) +#define bit_ZMM16_31_state (1 << 7) + +/* The integer bit array index for the first set of internal feature bits. */ +#define FEATURE_INDEX_1 0 + +/* The current maximum size of the feature integer bit array. */ +#define FEATURE_INDEX_MAX 1 + +#ifdef __ASSEMBLER__ + +# include <ifunc-defines.h> +# include <rtld-global-offsets.h> + +# define index_SSE2 COMMON_CPUID_INDEX_1*CPUID_SIZE+CPUID_EDX_OFFSET +# define index_SSSE3 COMMON_CPUID_INDEX_1*CPUID_SIZE+CPUID_ECX_OFFSET +# define index_SSE4_1 COMMON_CPUID_INDEX_1*CPUID_SIZE+CPUID_ECX_OFFSET +# define index_SSE4_2 COMMON_CPUID_INDEX_1*CPUID_SIZE+CPUID_ECX_OFFSET +# define index_AVX COMMON_CPUID_INDEX_1*CPUID_SIZE+CPUID_ECX_OFFSET +# define index_AVX2 COMMON_CPUID_INDEX_7*CPUID_SIZE+CPUID_EBX_OFFSET + +# define index_Fast_Rep_String FEATURE_INDEX_1*FEATURE_SIZE +# define index_Fast_Copy_Backward FEATURE_INDEX_1*FEATURE_SIZE +# define index_Slow_BSF FEATURE_INDEX_1*FEATURE_SIZE +# define index_Fast_Unaligned_Load FEATURE_INDEX_1*FEATURE_SIZE +# define index_Prefer_PMINUB_for_stringop FEATURE_INDEX_1*FEATURE_SIZE +# define index_AVX_Usable FEATURE_INDEX_1*FEATURE_SIZE +# define index_FMA_Usable FEATURE_INDEX_1*FEATURE_SIZE +# define index_FMA4_Usable FEATURE_INDEX_1*FEATURE_SIZE +# define index_Slow_SSE4_2 FEATURE_INDEX_1*FEATURE_SIZE +# define index_AVX2_Usable FEATURE_INDEX_1*FEATURE_SIZE +# define index_AVX_Fast_Unaligned_Load FEATURE_INDEX_1*FEATURE_SIZE +# define index_AVX512F_Usable FEATURE_INDEX_1*FEATURE_SIZE +# define index_AVX512DQ_Usable FEATURE_INDEX_1*FEATURE_SIZE + +# if defined (_LIBC) && !IS_IN (nonlib) +# ifdef __x86_64__ +# ifdef SHARED +# if IS_IN (rtld) +# define LOAD_RTLD_GLOBAL_RO_RDX +# define HAS_FEATURE(offset, name) \ + testl $(bit_##name), _rtld_local_ro+offset+(index_##name)(%rip) +# else +# define LOAD_RTLD_GLOBAL_RO_RDX \ + mov _rtld_global_ro@GOTPCREL(%rip), %RDX_LP +# define HAS_FEATURE(offset, name) \ + testl $(bit_##name), \ + RTLD_GLOBAL_RO_DL_X86_CPU_FEATURES_OFFSET+offset+(index_##name)(%rdx) +# endif +# else /* SHARED */ +# define LOAD_RTLD_GLOBAL_RO_RDX +# define HAS_FEATURE(offset, name) \ + testl $(bit_##name), _dl_x86_cpu_features+offset+(index_##name)(%rip) +# endif /* !SHARED */ +# else /* __x86_64__ */ +# ifdef SHARED +# define LOAD_FUNC_GOT_EAX(func) \ + leal func@GOTOFF(%edx), %eax +# if IS_IN (rtld) +# define LOAD_GOT_AND_RTLD_GLOBAL_RO \ + LOAD_PIC_REG(dx) +# define HAS_FEATURE(offset, name) \ + testl $(bit_##name), offset+(index_##name)+_rtld_local_ro@GOTOFF(%edx) +# else +# define LOAD_GOT_AND_RTLD_GLOBAL_RO \ + LOAD_PIC_REG(dx); \ + mov _rtld_global_ro@GOT(%edx), %ecx +# define HAS_FEATURE(offset, name) \ + testl $(bit_##name), \ + RTLD_GLOBAL_RO_DL_X86_CPU_FEATURES_OFFSET+offset+(index_##name)(%ecx) +# endif +# else /* SHARED */ +# define LOAD_FUNC_GOT_EAX(func) \ + leal func, %eax +# define LOAD_GOT_AND_RTLD_GLOBAL_RO +# define HAS_FEATURE(offset, name) \ + testl $(bit_##name), _dl_x86_cpu_features+offset+(index_##name) +# endif /* !SHARED */ +# endif /* !__x86_64__ */ +# else /* _LIBC && !nonlib */ +# error "Sorry, <cpu-features.h> is unimplemented for assembler" +# endif /* !_LIBC || nonlib */ + +/* HAS_* evaluates to true if we may use the feature at runtime. */ +# define HAS_CPU_FEATURE(name) HAS_FEATURE (CPUID_OFFSET, name) +# define HAS_ARCH_FEATURE(name) HAS_FEATURE (FEATURE_OFFSET, name) + +#else /* __ASSEMBLER__ */ + +enum + { + COMMON_CPUID_INDEX_1 = 0, + COMMON_CPUID_INDEX_7, + COMMON_CPUID_INDEX_80000001, /* for AMD */ + /* Keep the following line at the end. */ + COMMON_CPUID_INDEX_MAX + }; + +struct cpu_features +{ + enum cpu_features_kind + { + arch_kind_unknown = 0, + arch_kind_intel, + arch_kind_amd, + arch_kind_other + } kind; + int max_cpuid; + struct cpuid_registers + { + unsigned int eax; + unsigned int ebx; + unsigned int ecx; + unsigned int edx; + } cpuid[COMMON_CPUID_INDEX_MAX]; + unsigned int family; + unsigned int model; + unsigned int feature[FEATURE_INDEX_MAX]; +}; + +/* Used from outside of glibc to get access to the CPU features + structure. */ +extern const struct cpu_features *__get_cpu_features (void) + __attribute__ ((const)); + +# if defined (_LIBC) && !IS_IN (nonlib) +/* Unused for x86. */ +# define INIT_ARCH() +# define __get_cpu_features() (&GLRO(dl_x86_cpu_features)) +# endif + + +/* HAS_* evaluates to true if we may use the feature at runtime. */ +# define HAS_CPU_FEATURE(name) \ + ((__get_cpu_features ()->cpuid[index_##name].reg_##name & (bit_##name)) != 0) +# define HAS_ARCH_FEATURE(name) \ + ((__get_cpu_features ()->feature[index_##name] & (bit_##name)) != 0) + +# define index_SSE2 COMMON_CPUID_INDEX_1 +# define index_SSSE3 COMMON_CPUID_INDEX_1 +# define index_SSE4_1 COMMON_CPUID_INDEX_1 +# define index_SSE4_2 COMMON_CPUID_INDEX_1 +# define index_AVX COMMON_CPUID_INDEX_1 +# define index_AVX2 COMMON_CPUID_INDEX_7 +# define index_AVX512F COMMON_CPUID_INDEX_7 +# define index_AVX512DQ COMMON_CPUID_INDEX_7 +# define index_RTM COMMON_CPUID_INDEX_7 +# define index_FMA COMMON_CPUID_INDEX_1 +# define index_FMA4 COMMON_CPUID_INDEX_80000001 +# define index_POPCOUNT COMMON_CPUID_INDEX_1 +# define index_OSXSAVE COMMON_CPUID_INDEX_1 + +# define reg_SSE2 edx +# define reg_SSSE3 ecx +# define reg_SSE4_1 ecx +# define reg_SSE4_2 ecx +# define reg_AVX ecx +# define reg_AVX2 ebx +# define reg_AVX512F ebx +# define reg_AVX512DQ ebx +# define reg_RTM ebx +# define reg_FMA ecx +# define reg_FMA4 ecx +# define reg_POPCOUNT ecx +# define reg_OSXSAVE ecx + +# define index_Fast_Rep_String FEATURE_INDEX_1 +# define index_Fast_Copy_Backward FEATURE_INDEX_1 +# define index_Slow_BSF FEATURE_INDEX_1 +# define index_Fast_Unaligned_Load FEATURE_INDEX_1 +# define index_Prefer_PMINUB_for_stringop FEATURE_INDEX_1 +# define index_AVX_Usable FEATURE_INDEX_1 +# define index_FMA_Usable FEATURE_INDEX_1 +# define index_FMA4_Usable FEATURE_INDEX_1 +# define index_Slow_SSE4_2 FEATURE_INDEX_1 +# define index_AVX2_Usable FEATURE_INDEX_1 +# define index_AVX_Fast_Unaligned_Load FEATURE_INDEX_1 +# define index_AVX512F_Usable FEATURE_INDEX_1 +# define index_AVX512DQ_Usable FEATURE_INDEX_1 + +#endif /* !__ASSEMBLER__ */ + +#endif /* cpu_features_h */ diff --git a/sysdeps/x86/dl-get-cpu-features.c b/sysdeps/x86/dl-get-cpu-features.c new file mode 100644 index 0000000..080e5e8 --- /dev/null +++ b/sysdeps/x86/dl-get-cpu-features.c @@ -0,0 +1,27 @@ +/* This file is part of the GNU C Library. + Copyright (C) 2015 Free Software Foundation, Inc. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library; if not, see + <http://www.gnu.org/licenses/>. */ + + +#include <ldsodefs.h> + +#undef __get_cpu_features + +const struct cpu_features * +__get_cpu_features (void) +{ + return &GLRO(dl_x86_cpu_features); +} diff --git a/sysdeps/x86/libc-start.c b/sysdeps/x86/libc-start.c new file mode 100644 index 0000000..9f0c045 --- /dev/null +++ b/sysdeps/x86/libc-start.c @@ -0,0 +1,41 @@ +/* Copyright (C) 2015 Free Software Foundation, Inc. + This file is part of the GNU C Library. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library; if not, see + <http://www.gnu.org/licenses/>. */ + +#ifdef SHARED +# include <csu/libc-start.c> +# else +/* The main work is done in the generic function. */ +# define LIBC_START_DISABLE_INLINE +# define LIBC_START_MAIN generic_start_main +# include <csu/libc-start.c> +# include <cpu-features.h> +# include <cpu-features.c> + +extern struct cpu_features _dl_x86_cpu_features; + +int +__libc_start_main (int (*main) (int, char **, char ** MAIN_AUXVEC_DECL), + int argc, char **argv, + __typeof (main) init, + void (*fini) (void), + void (*rtld_fini) (void), void *stack_end) +{ + init_cpu_features (&_dl_x86_cpu_features); + return generic_start_main (main, argc, argv, init, fini, rtld_fini, + stack_end); +} +#endif diff --git a/sysdeps/x86/rtld-global-offsets.sym b/sysdeps/x86/rtld-global-offsets.sym new file mode 100644 index 0000000..a9d53d1 --- /dev/null +++ b/sysdeps/x86/rtld-global-offsets.sym @@ -0,0 +1,7 @@ +#define SHARED 1 + +#include <ldsodefs.h> + +#define rtld_global_ro_offsetof(mem) offsetof (struct rtld_global_ro, mem) + +RTLD_GLOBAL_RO_DL_X86_CPU_FEATURES_OFFSET rtld_global_ro_offsetof (_dl_x86_cpu_features) diff --git a/sysdeps/x86/tst-get-cpu-features-static.c b/sysdeps/x86/tst-get-cpu-features-static.c new file mode 100644 index 0000000..03f5906 --- /dev/null +++ b/sysdeps/x86/tst-get-cpu-features-static.c @@ -0,0 +1 @@ +#include "tst-get-cpu-features.c" diff --git a/sysdeps/x86/tst-get-cpu-features.c b/sysdeps/x86/tst-get-cpu-features.c new file mode 100644 index 0000000..c17060f --- /dev/null +++ b/sysdeps/x86/tst-get-cpu-features.c @@ -0,0 +1,31 @@ +/* Test case for x86 __get_cpu_features interface + Copyright (C) 2015 Free Software Foundation, Inc. + This file is part of the GNU C Library. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library; if not, see + <http://www.gnu.org/licenses/>. */ + +#include <stdlib.h> +#include <cpu-features.h> + +static int +do_test (void) +{ + if (__get_cpu_features ()->kind == arch_kind_unknown) + abort (); + return 0; +} + +#define TEST_FUNCTION do_test () +#include "../../test-skeleton.c" |