aboutsummaryrefslogtreecommitdiff
path: root/libiberty/configure.ac
diff options
context:
space:
mode:
authorJakub Jelinek <jakub@redhat.com>2023-11-28 13:29:58 +0100
committerJakub Jelinek <jakub@redhat.com>2023-11-28 13:29:58 +0100
commit4a50820ee8f153265ec8ffd068618607d4be3a26 (patch)
tree680392257eafcb9160a66e62070f802374d2eda6 /libiberty/configure.ac
parente5f1ee1832ff9e970833fa5773f46c3e0b93bc04 (diff)
downloadgdb-4a50820ee8f153265ec8ffd068618607d4be3a26.zip
gdb-4a50820ee8f153265ec8ffd068618607d4be3a26.tar.gz
gdb-4a50820ee8f153265ec8ffd068618607d4be3a26.tar.bz2
libiberty, ld: Use x86 HW optimized sha1
The following patch attempts to use x86 SHA ISA if available to speed up in my testing about 2.5x sha1 build-id processing (in my case on AMD Ryzen 5 3600) while producing the same result. I believe AArch64 has similar HW acceleration for SHA1, perhaps it could be added similarly. Note, seems lld uses BLAKE3 rather than md5/sha1. I think it would be a bad idea to lie to users, if they choose --buildid=sha1, we should be using SHA1, not some other checksum, but perhaps we could add some other --buildid= styles and perhaps make one of the new the default. Tested on x86_64-linux, both on Intel i9-7960X (which doesn't have sha_ni ISA support) without/with the patch and on AMD Ryzen 5 3600 (which does have it) without/with the patch. 2023-11-28 Jakub Jelinek <jakub@redhat.com> include/ * sha1.h (sha1_process_bytes_fn): New typedef. (sha1_choose_process_bytes): Declare. libiberty/ * configure.ac (HAVE_X86_SHA1_HW_SUPPORT): New check. * sha1.c: If HAVE_X86_SHA1_HW_SUPPORT is defined, include x86intrin.h and cpuid.h. (sha1_hw_process_bytes, sha1_hw_process_block, sha1_choose_process_bytes): New functions. * config.in: Regenerated. * configure: Regenerated. ld/ * ldbuildid.c (generate_build_id): Use sha1_choose_process_bytes () instead of &sha1_process_bytes.
Diffstat (limited to 'libiberty/configure.ac')
-rw-r--r--libiberty/configure.ac40
1 files changed, 40 insertions, 0 deletions
diff --git a/libiberty/configure.ac b/libiberty/configure.ac
index 0748c59..e07cbb8 100644
--- a/libiberty/configure.ac
+++ b/libiberty/configure.ac
@@ -740,6 +740,46 @@ case "${host}" in
esac
AC_SUBST(pexecute)
+AC_MSG_CHECKING([for SHA1 HW acceleration support])
+AC_COMPILE_IFELSE([AC_LANG_PROGRAM([[
+#include <x86intrin.h>
+#include <cpuid.h>
+
+__attribute__((__target__ ("sse4.1,sha")))
+void foo (__m128i *buf, unsigned int e, __m128i msg0, __m128i msg1)
+{
+ __m128i abcd = _mm_loadu_si128 ((const __m128i *) buf);
+ __m128i e0 = _mm_set_epi32 (e, 0, 0, 0);
+ abcd = _mm_shuffle_epi32 (abcd, 0x1b);
+ const __m128i shuf_mask = _mm_set_epi64x (0x0001020304050607ULL, 0x08090a0b0c0d0e0fULL);
+ abcd = _mm_shuffle_epi8 (abcd, shuf_mask);
+ e0 = _mm_sha1nexte_epu32 (e0, msg1);
+ abcd = _mm_sha1rnds4_epu32 (abcd, e0, 0);
+ msg0 = _mm_sha1msg1_epu32 (msg0, msg1);
+ msg0 = _mm_sha1msg2_epu32 (msg0, msg1);
+ msg0 = _mm_xor_si128 (msg0, msg1);
+ e0 = _mm_add_epi32 (e0, msg0);
+ e0 = abcd;
+ _mm_storeu_si128 (buf, abcd);
+ e = _mm_extract_epi32 (e0, 3);
+}
+
+int bar (void)
+{
+ unsigned int eax, ebx, ecx, edx;
+ if (__get_cpuid_count (7, 0, &eax, &ebx, &ecx, &edx)
+ && (ebx & bit_SHA) != 0
+ && __get_cpuid (1, &eax, &ebx, &ecx, &edx)
+ && (ecx & bit_SSE4_1) != 0)
+ return 1;
+ return 0;
+}
+]], [[bar ();]])],
+ [AC_MSG_RESULT([x86 SHA1])
+ AC_DEFINE(HAVE_X86_SHA1_HW_SUPPORT, 1,
+ [Define if you have x86 SHA1 HW acceleration support.])],
+ [AC_MSG_RESULT([no])])
+
libiberty_AC_FUNC_STRNCMP
# Install a library built with a cross compiler in $(tooldir) rather