diff options
author | Jakub Jelinek <jakub@redhat.com> | 2023-11-28 13:29:58 +0100 |
---|---|---|
committer | Jakub Jelinek <jakub@redhat.com> | 2023-11-28 13:29:58 +0100 |
commit | 4a50820ee8f153265ec8ffd068618607d4be3a26 (patch) | |
tree | 680392257eafcb9160a66e62070f802374d2eda6 /libiberty/configure.ac | |
parent | e5f1ee1832ff9e970833fa5773f46c3e0b93bc04 (diff) | |
download | gdb-4a50820ee8f153265ec8ffd068618607d4be3a26.zip gdb-4a50820ee8f153265ec8ffd068618607d4be3a26.tar.gz gdb-4a50820ee8f153265ec8ffd068618607d4be3a26.tar.bz2 |
libiberty, ld: Use x86 HW optimized sha1
The following patch attempts to use x86 SHA ISA if available to speed
up in my testing about 2.5x sha1 build-id processing (in my case on
AMD Ryzen 5 3600) while producing the same result.
I believe AArch64 has similar HW acceleration for SHA1, perhaps it
could be added similarly.
Note, seems lld uses BLAKE3 rather than md5/sha1. I think it would be
a bad idea to lie to users, if they choose --buildid=sha1, we should
be using SHA1, not some other checksum, but perhaps we could add some other
--buildid= styles and perhaps make one of the new the default.
Tested on x86_64-linux, both on Intel i9-7960X (which doesn't have
sha_ni ISA support) without/with the patch and on AMD Ryzen 5 3600
(which does have it) without/with the patch.
2023-11-28 Jakub Jelinek <jakub@redhat.com>
include/
* sha1.h (sha1_process_bytes_fn): New typedef.
(sha1_choose_process_bytes): Declare.
libiberty/
* configure.ac (HAVE_X86_SHA1_HW_SUPPORT): New check.
* sha1.c: If HAVE_X86_SHA1_HW_SUPPORT is defined, include x86intrin.h
and cpuid.h.
(sha1_hw_process_bytes, sha1_hw_process_block,
sha1_choose_process_bytes): New functions.
* config.in: Regenerated.
* configure: Regenerated.
ld/
* ldbuildid.c (generate_build_id): Use sha1_choose_process_bytes ()
instead of &sha1_process_bytes.
Diffstat (limited to 'libiberty/configure.ac')
-rw-r--r-- | libiberty/configure.ac | 40 |
1 files changed, 40 insertions, 0 deletions
diff --git a/libiberty/configure.ac b/libiberty/configure.ac index 0748c59..e07cbb8 100644 --- a/libiberty/configure.ac +++ b/libiberty/configure.ac @@ -740,6 +740,46 @@ case "${host}" in esac AC_SUBST(pexecute) +AC_MSG_CHECKING([for SHA1 HW acceleration support]) +AC_COMPILE_IFELSE([AC_LANG_PROGRAM([[ +#include <x86intrin.h> +#include <cpuid.h> + +__attribute__((__target__ ("sse4.1,sha"))) +void foo (__m128i *buf, unsigned int e, __m128i msg0, __m128i msg1) +{ + __m128i abcd = _mm_loadu_si128 ((const __m128i *) buf); + __m128i e0 = _mm_set_epi32 (e, 0, 0, 0); + abcd = _mm_shuffle_epi32 (abcd, 0x1b); + const __m128i shuf_mask = _mm_set_epi64x (0x0001020304050607ULL, 0x08090a0b0c0d0e0fULL); + abcd = _mm_shuffle_epi8 (abcd, shuf_mask); + e0 = _mm_sha1nexte_epu32 (e0, msg1); + abcd = _mm_sha1rnds4_epu32 (abcd, e0, 0); + msg0 = _mm_sha1msg1_epu32 (msg0, msg1); + msg0 = _mm_sha1msg2_epu32 (msg0, msg1); + msg0 = _mm_xor_si128 (msg0, msg1); + e0 = _mm_add_epi32 (e0, msg0); + e0 = abcd; + _mm_storeu_si128 (buf, abcd); + e = _mm_extract_epi32 (e0, 3); +} + +int bar (void) +{ + unsigned int eax, ebx, ecx, edx; + if (__get_cpuid_count (7, 0, &eax, &ebx, &ecx, &edx) + && (ebx & bit_SHA) != 0 + && __get_cpuid (1, &eax, &ebx, &ecx, &edx) + && (ecx & bit_SSE4_1) != 0) + return 1; + return 0; +} +]], [[bar ();]])], + [AC_MSG_RESULT([x86 SHA1]) + AC_DEFINE(HAVE_X86_SHA1_HW_SUPPORT, 1, + [Define if you have x86 SHA1 HW acceleration support.])], + [AC_MSG_RESULT([no])]) + libiberty_AC_FUNC_STRNCMP # Install a library built with a cross compiler in $(tooldir) rather |