aboutsummaryrefslogtreecommitdiff
path: root/libiberty/configure.ac
diff options
context:
space:
mode:
authorJakub Jelinek <jakub@redhat.com>2023-11-28 13:14:05 +0100
committerJakub Jelinek <jakub@redhat.com>2023-11-28 13:14:05 +0100
commitbf4f40cc3195eb7b900bf5535cdba1ee51fdbb8e (patch)
tree4d092d8732e61733b96acf348003dc3af0aed485 /libiberty/configure.ac
parent9f3f0b829b62f11f350867d2350e2af8639ec890 (diff)
downloadgcc-bf4f40cc3195eb7b900bf5535cdba1ee51fdbb8e.zip
gcc-bf4f40cc3195eb7b900bf5535cdba1ee51fdbb8e.tar.gz
gcc-bf4f40cc3195eb7b900bf5535cdba1ee51fdbb8e.tar.bz2
libiberty: Use x86 HW optimized sha1
Nick has approved this patch (+ small ld change to use it for --build-id=), so I'm commiting it to GCC as master as well. If anyone from ARM would be willing to implement it similarly with vsha1{cq,mq,pq,h,su0q,su1q}_u32 intrinsics, it could be a useful linker speedup on those hosts as well, the intent in sha1.c was that sha1_hw_process_bytes, sha1_hw_process_block functions would be defined whenever defined (HAVE_X86_SHA1_HW_SUPPORT) || defined (HAVE_WHATEVERELSE_SHA1_HW_SUPPORT) but the body of sha1_hw_process_block and sha1_choose_process_bytes would then have #elif defined (HAVE_WHATEVERELSE_SHA1_HW_SUPPORT) for the other arch support, similarly for any target attributes on sha1_hw_process_block if needed. 2023-11-28 Jakub Jelinek <jakub@redhat.com> include/ * sha1.h (sha1_process_bytes_fn): New typedef. (sha1_choose_process_bytes): Declare. libiberty/ * configure.ac (HAVE_X86_SHA1_HW_SUPPORT): New check. * sha1.c: If HAVE_X86_SHA1_HW_SUPPORT is defined, include x86intrin.h and cpuid.h. (sha1_hw_process_bytes, sha1_hw_process_block, sha1_choose_process_bytes): New functions. * config.in: Regenerated. * configure: Regenerated.
Diffstat (limited to 'libiberty/configure.ac')
-rw-r--r--libiberty/configure.ac40
1 files changed, 40 insertions, 0 deletions
diff --git a/libiberty/configure.ac b/libiberty/configure.ac
index 2488b03..3520ec2 100644
--- a/libiberty/configure.ac
+++ b/libiberty/configure.ac
@@ -742,6 +742,46 @@ case "${host}" in
esac
AC_SUBST(pexecute)
+AC_MSG_CHECKING([for SHA1 HW acceleration support])
+AC_COMPILE_IFELSE([AC_LANG_PROGRAM([[
+#include <x86intrin.h>
+#include <cpuid.h>
+
+__attribute__((__target__ ("sse4.1,sha")))
+void foo (__m128i *buf, unsigned int e, __m128i msg0, __m128i msg1)
+{
+ __m128i abcd = _mm_loadu_si128 ((const __m128i *) buf);
+ __m128i e0 = _mm_set_epi32 (e, 0, 0, 0);
+ abcd = _mm_shuffle_epi32 (abcd, 0x1b);
+ const __m128i shuf_mask = _mm_set_epi64x (0x0001020304050607ULL, 0x08090a0b0c0d0e0fULL);
+ abcd = _mm_shuffle_epi8 (abcd, shuf_mask);
+ e0 = _mm_sha1nexte_epu32 (e0, msg1);
+ abcd = _mm_sha1rnds4_epu32 (abcd, e0, 0);
+ msg0 = _mm_sha1msg1_epu32 (msg0, msg1);
+ msg0 = _mm_sha1msg2_epu32 (msg0, msg1);
+ msg0 = _mm_xor_si128 (msg0, msg1);
+ e0 = _mm_add_epi32 (e0, msg0);
+ e0 = abcd;
+ _mm_storeu_si128 (buf, abcd);
+ e = _mm_extract_epi32 (e0, 3);
+}
+
+int bar (void)
+{
+ unsigned int eax, ebx, ecx, edx;
+ if (__get_cpuid_count (7, 0, &eax, &ebx, &ecx, &edx)
+ && (ebx & bit_SHA) != 0
+ && __get_cpuid (1, &eax, &ebx, &ecx, &edx)
+ && (ecx & bit_SSE4_1) != 0)
+ return 1;
+ return 0;
+}
+]], [[bar ();]])],
+ [AC_MSG_RESULT([x86 SHA1])
+ AC_DEFINE(HAVE_X86_SHA1_HW_SUPPORT, 1,
+ [Define if you have x86 SHA1 HW acceleration support.])],
+ [AC_MSG_RESULT([no])])
+
libiberty_AC_FUNC_STRNCMP
# Install a library built with a cross compiler in $(tooldir) rather