aboutsummaryrefslogtreecommitdiff
path: root/sysdeps/x86_64/multiarch/strcmp-avx2.S
diff options
context:
space:
mode:
authorH.J. Lu <hjl.tools@gmail.com>2021-03-05 07:26:42 -0800
committerH.J. Lu <hjl.tools@gmail.com>2021-03-29 07:40:17 -0700
commit7ebba91361badf7531d4e75050627a88d424872f (patch)
treed99781a37b47b95441ad358d119ec3741960d405 /sysdeps/x86_64/multiarch/strcmp-avx2.S
parent91264fe3577fe887b4860923fa6142b5274c8965 (diff)
downloadglibc-7ebba91361badf7531d4e75050627a88d424872f.zip
glibc-7ebba91361badf7531d4e75050627a88d424872f.tar.gz
glibc-7ebba91361badf7531d4e75050627a88d424872f.tar.bz2
x86-64: Add AVX optimized string/memory functions for RTM
Since VZEROUPPER triggers RTM abort while VZEROALL won't, select AVX optimized string/memory functions with xtest jz 1f vzeroall ret 1: vzeroupper ret at function exit on processors with usable RTM, but without 256-bit EVEX instructions to avoid VZEROUPPER inside a transactionally executing RTM region.
Diffstat (limited to 'sysdeps/x86_64/multiarch/strcmp-avx2.S')
-rw-r--r--sysdeps/x86_64/multiarch/strcmp-avx2.S55
1 files changed, 22 insertions, 33 deletions
diff --git a/sysdeps/x86_64/multiarch/strcmp-avx2.S b/sysdeps/x86_64/multiarch/strcmp-avx2.S
index 53cb7a6..4033301 100644
--- a/sysdeps/x86_64/multiarch/strcmp-avx2.S
+++ b/sysdeps/x86_64/multiarch/strcmp-avx2.S
@@ -55,6 +55,10 @@
# define VZEROUPPER vzeroupper
# endif
+# ifndef SECTION
+# define SECTION(p) p##.avx
+# endif
+
/* Warning!
wcscmp/wcsncmp have to use SIGNED comparison for elements.
strcmp/strncmp have to use UNSIGNED comparison for elements.
@@ -75,7 +79,7 @@
the maximum offset is reached before a difference is found, zero is
returned. */
- .section .text.avx,"ax",@progbits
+ .section SECTION(.text),"ax",@progbits
ENTRY (STRCMP)
# ifdef USE_AS_STRNCMP
/* Check for simple cases (0 or 1) in offset. */
@@ -127,8 +131,8 @@ L(return):
movzbl (%rsi, %rdx), %edx
subl %edx, %eax
# endif
- VZEROUPPER
- ret
+L(return_vzeroupper):
+ ZERO_UPPER_VEC_REGISTERS_RETURN
.p2align 4
L(return_vec_size):
@@ -161,8 +165,7 @@ L(return_vec_size):
subl %edx, %eax
# endif
# endif
- VZEROUPPER
- ret
+ VZEROUPPER_RETURN
.p2align 4
L(return_2_vec_size):
@@ -195,8 +198,7 @@ L(return_2_vec_size):
subl %edx, %eax
# endif
# endif
- VZEROUPPER
- ret
+ VZEROUPPER_RETURN
.p2align 4
L(return_3_vec_size):
@@ -229,8 +231,7 @@ L(return_3_vec_size):
subl %edx, %eax
# endif
# endif
- VZEROUPPER
- ret
+ VZEROUPPER_RETURN
.p2align 4
L(next_3_vectors):
@@ -356,8 +357,7 @@ L(back_to_loop):
subl %edx, %eax
# endif
# endif
- VZEROUPPER
- ret
+ VZEROUPPER_RETURN
.p2align 4
L(test_vec):
@@ -400,8 +400,7 @@ L(test_vec):
subl %edx, %eax
# endif
# endif
- VZEROUPPER
- ret
+ VZEROUPPER_RETURN
.p2align 4
L(test_2_vec):
@@ -444,8 +443,7 @@ L(test_2_vec):
subl %edx, %eax
# endif
# endif
- VZEROUPPER
- ret
+ VZEROUPPER_RETURN
.p2align 4
L(test_3_vec):
@@ -486,8 +484,7 @@ L(test_3_vec):
subl %edx, %eax
# endif
# endif
- VZEROUPPER
- ret
+ VZEROUPPER_RETURN
.p2align 4
L(loop_cross_page):
@@ -556,8 +553,7 @@ L(loop_cross_page):
subl %edx, %eax
# endif
# endif
- VZEROUPPER
- ret
+ VZEROUPPER_RETURN
.p2align 4
L(loop_cross_page_2_vec):
@@ -631,8 +627,7 @@ L(loop_cross_page_2_vec):
subl %edx, %eax
# endif
# endif
- VZEROUPPER
- ret
+ VZEROUPPER_RETURN
# ifdef USE_AS_STRNCMP
L(string_nbyte_offset_check):
@@ -674,8 +669,7 @@ L(cross_page_loop):
# ifndef USE_AS_WCSCMP
L(different):
# endif
- VZEROUPPER
- ret
+ VZEROUPPER_RETURN
# ifdef USE_AS_WCSCMP
.p2align 4
@@ -685,16 +679,14 @@ L(different):
setl %al
negl %eax
orl $1, %eax
- VZEROUPPER
- ret
+ VZEROUPPER_RETURN
# endif
# ifdef USE_AS_STRNCMP
.p2align 4
L(zero):
xorl %eax, %eax
- VZEROUPPER
- ret
+ VZEROUPPER_RETURN
.p2align 4
L(char0):
@@ -708,8 +700,7 @@ L(char0):
movzbl (%rdi), %eax
subl %ecx, %eax
# endif
- VZEROUPPER
- ret
+ VZEROUPPER_RETURN
# endif
.p2align 4
@@ -734,8 +725,7 @@ L(last_vector):
movzbl (%rsi, %rdx), %edx
subl %edx, %eax
# endif
- VZEROUPPER
- ret
+ VZEROUPPER_RETURN
/* Comparing on page boundary region requires special treatment:
It must done one vector at the time, starting with the wider
@@ -856,7 +846,6 @@ L(cross_page_4bytes):
testl %eax, %eax
jne L(cross_page_loop)
subl %ecx, %eax
- VZEROUPPER
- ret
+ VZEROUPPER_RETURN
END (STRCMP)
#endif