aboutsummaryrefslogtreecommitdiff
path: root/sysdeps/i386
diff options
context:
space:
mode:
authorUlrich Drepper <drepper@redhat.com>2010-02-15 13:04:54 -0800
committerUlrich Drepper <drepper@redhat.com>2010-02-15 13:04:54 -0800
commit6bb74d9f86e543c418f94a7732e8ee47c9e8225f (patch)
treeedadff811d4545e48713f224f8fd172f078a576e /sysdeps/i386
parent904057bc17fb3e3127a35ebf35fcac8d5bc8269b (diff)
downloadglibc-6bb74d9f86e543c418f94a7732e8ee47c9e8225f.zip
glibc-6bb74d9f86e543c418f94a7732e8ee47c9e8225f.tar.gz
glibc-6bb74d9f86e543c418f94a7732e8ee47c9e8225f.tar.bz2
Fix up new x86 string functions.
Diffstat (limited to 'sysdeps/i386')
-rw-r--r--sysdeps/i386/i686/multiarch/memcmp-sse4.S36
-rw-r--r--sysdeps/i386/i686/multiarch/memcmp-ssse3.S75
-rw-r--r--sysdeps/i386/i686/multiarch/strcmp-sse4.S14
-rw-r--r--sysdeps/i386/i686/multiarch/strcmp-ssse3.S64
4 files changed, 151 insertions, 38 deletions
diff --git a/sysdeps/i386/i686/multiarch/memcmp-sse4.S b/sysdeps/i386/i686/multiarch/memcmp-sse4.S
index 06437e4..71c4e1c 100644
--- a/sysdeps/i386/i686/multiarch/memcmp-sse4.S
+++ b/sysdeps/i386/i686/multiarch/memcmp-sse4.S
@@ -105,43 +105,43 @@ L(less8bytes):
mov 1(%eax), %bl
cmpb 1(%edx), %bl
jne L(nonzero)
-
- cmp $2, %ecx
+
+ cmp $2, %ecx
jz L(0bytes)
mov 2(%eax), %bl
cmpb 2(%edx), %bl
jne L(nonzero)
-
- cmp $3, %ecx
+
+ cmp $3, %ecx
jz L(0bytes)
-
+
mov 3(%eax), %bl
cmpb 3(%edx), %bl
jne L(nonzero)
-
- cmp $4, %ecx
+
+ cmp $4, %ecx
jz L(0bytes)
-
+
mov 4(%eax), %bl
cmpb 4(%edx), %bl
jne L(nonzero)
- cmp $5, %ecx
+ cmp $5, %ecx
jz L(0bytes)
-
+
mov 5(%eax), %bl
cmpb 5(%edx), %bl
jne L(nonzero)
- cmp $6, %ecx
+ cmp $6, %ecx
jz L(0bytes)
-
+
mov 6(%eax), %bl
cmpb 6(%edx), %bl
je L(0bytes)
L(nonzero):
- POP (%ebx)
+ POP (%ebx)
mov $1, %eax
ja L(above)
neg %eax
@@ -151,11 +151,11 @@ L(above):
ALIGN (4)
L(0bytes):
- POP (%ebx)
+ POP (%ebx)
xor %eax, %eax
ret
CFI_PUSH (%ebx)
-
+
ALIGN (4)
L(less1bytes):
jb L(0bytesend)
@@ -609,7 +609,7 @@ L(26bytes):
mov -6(%edx), %ebx
cmp %ebx, %ecx
jne L(find_diff)
-
+
movzwl -2(%eax), %ecx
movzwl -2(%edx), %ebx
cmp %bl, %cl
@@ -873,7 +873,7 @@ L(32bytes):
L(less16bytes):
add %ebx, %eax
add %ebx, %edx
-
+
mov (%eax), %ecx
mov (%edx), %ebx
cmp %ebx, %ecx
@@ -908,7 +908,7 @@ L(find_diff):
jne L(end)
cmp %bx, %cx
L(end):
- POP (%ebx)
+ POP (%ebx)
mov $1, %eax
ja L(bigger)
neg %eax
diff --git a/sysdeps/i386/i686/multiarch/memcmp-ssse3.S b/sysdeps/i386/i686/multiarch/memcmp-ssse3.S
index bfcf660..869f37a 100644
--- a/sysdeps/i386/i686/multiarch/memcmp-ssse3.S
+++ b/sysdeps/i386/i686/multiarch/memcmp-ssse3.S
@@ -43,8 +43,7 @@
#define BLK2 BLK1+4
#define LEN BLK2+4
#define RETURN_END POP (%edi); POP (%esi); POP (%ebx); ret
-#define RETURN RETURN_END; CFI_PUSH (%ebx); CFI_PUSH (%edi); \
- CFI_PUSH (%esi)
+#define RETURN RETURN_END; cfi_restore_state; cfi_remember_state
.section .text.ssse3,"ax",@progbits
ENTRY (MEMCMP)
@@ -76,12 +75,13 @@ L(1bytesend):
L(zero):
mov $0, %eax
ret
-
+
ALIGN (4)
L(48bytesormore):
PUSH (%ebx)
PUSH (%esi)
PUSH (%edi)
+ cfi_remember_state
movdqu (%eax), %xmm3
movdqu (%edx), %xmm0
movl %eax, %edi
@@ -155,7 +155,7 @@ L(shr_0):
add $32, %esi
sub $0xffff, %edx
jnz L(exit)
-
+
lea (%ecx, %edi,1), %eax
lea (%ecx, %esi,1), %edx
@@ -163,6 +163,8 @@ L(shr_0):
POP (%esi)
jmp L(less48bytes)
+ cfi_restore_state
+ cfi_remember_state
ALIGN (4)
L(shr_0_gobble):
lea -48(%ecx), %ecx
@@ -207,6 +209,8 @@ L(shr_0_gobble_loop_next):
POP (%esi)
jmp L(less48bytes)
+ cfi_restore_state
+ cfi_remember_state
ALIGN (4)
L(shr_1):
cmp $80, %ecx
@@ -235,6 +239,8 @@ L(shr_1):
POP (%esi)
jmp L(less48bytes)
+ cfi_restore_state
+ cfi_remember_state
ALIGN (4)
L(shr_1_gobble):
sub $32, %ecx
@@ -286,6 +292,8 @@ L(shr_1_gobble_next):
jmp L(less48bytes)
+ cfi_restore_state
+ cfi_remember_state
ALIGN (4)
L(shr_2):
cmp $80, %ecx
@@ -314,6 +322,8 @@ L(shr_2):
POP (%esi)
jmp L(less48bytes)
+ cfi_restore_state
+ cfi_remember_state
ALIGN (4)
L(shr_2_gobble):
sub $32, %ecx
@@ -364,6 +374,8 @@ L(shr_2_gobble_next):
POP (%esi)
jmp L(less48bytes)
+ cfi_restore_state
+ cfi_remember_state
ALIGN (4)
L(shr_3):
cmp $80, %ecx
@@ -392,6 +404,8 @@ L(shr_3):
POP (%esi)
jmp L(less48bytes)
+ cfi_restore_state
+ cfi_remember_state
ALIGN (4)
L(shr_3_gobble):
sub $32, %ecx
@@ -442,6 +456,8 @@ L(shr_3_gobble_next):
POP (%esi)
jmp L(less48bytes)
+ cfi_restore_state
+ cfi_remember_state
ALIGN (4)
L(shr_4):
cmp $80, %ecx
@@ -470,6 +486,8 @@ L(shr_4):
POP (%esi)
jmp L(less48bytes)
+ cfi_restore_state
+ cfi_remember_state
ALIGN (4)
L(shr_4_gobble):
sub $32, %ecx
@@ -520,6 +538,8 @@ L(shr_4_gobble_next):
POP (%esi)
jmp L(less48bytes)
+ cfi_restore_state
+ cfi_remember_state
ALIGN (4)
L(shr_5):
cmp $80, %ecx
@@ -548,6 +568,8 @@ L(shr_5):
POP (%esi)
jmp L(less48bytes)
+ cfi_restore_state
+ cfi_remember_state
ALIGN (4)
L(shr_5_gobble):
sub $32, %ecx
@@ -598,6 +620,8 @@ L(shr_5_gobble_next):
POP (%esi)
jmp L(less48bytes)
+ cfi_restore_state
+ cfi_remember_state
ALIGN (4)
L(shr_6):
cmp $80, %ecx
@@ -626,6 +650,8 @@ L(shr_6):
POP (%esi)
jmp L(less48bytes)
+ cfi_restore_state
+ cfi_remember_state
ALIGN (4)
L(shr_6_gobble):
sub $32, %ecx
@@ -676,6 +702,8 @@ L(shr_6_gobble_next):
POP (%esi)
jmp L(less48bytes)
+ cfi_restore_state
+ cfi_remember_state
ALIGN (4)
L(shr_7):
cmp $80, %ecx
@@ -704,6 +732,8 @@ L(shr_7):
POP (%esi)
jmp L(less48bytes)
+ cfi_restore_state
+ cfi_remember_state
ALIGN (4)
L(shr_7_gobble):
sub $32, %ecx
@@ -754,6 +784,8 @@ L(shr_7_gobble_next):
POP (%esi)
jmp L(less48bytes)
+ cfi_restore_state
+ cfi_remember_state
ALIGN (4)
L(shr_8):
cmp $80, %ecx
@@ -782,6 +814,8 @@ L(shr_8):
POP (%esi)
jmp L(less48bytes)
+ cfi_restore_state
+ cfi_remember_state
ALIGN (4)
L(shr_8_gobble):
sub $32, %ecx
@@ -832,6 +866,8 @@ L(shr_8_gobble_next):
POP (%esi)
jmp L(less48bytes)
+ cfi_restore_state
+ cfi_remember_state
ALIGN (4)
L(shr_9):
cmp $80, %ecx
@@ -860,6 +896,8 @@ L(shr_9):
POP (%esi)
jmp L(less48bytes)
+ cfi_restore_state
+ cfi_remember_state
ALIGN (4)
L(shr_9_gobble):
sub $32, %ecx
@@ -910,6 +948,8 @@ L(shr_9_gobble_next):
POP (%esi)
jmp L(less48bytes)
+ cfi_restore_state
+ cfi_remember_state
ALIGN (4)
L(shr_10):
cmp $80, %ecx
@@ -938,6 +978,8 @@ L(shr_10):
POP (%esi)
jmp L(less48bytes)
+ cfi_restore_state
+ cfi_remember_state
ALIGN (4)
L(shr_10_gobble):
sub $32, %ecx
@@ -988,6 +1030,8 @@ L(shr_10_gobble_next):
POP (%esi)
jmp L(less48bytes)
+ cfi_restore_state
+ cfi_remember_state
ALIGN (4)
L(shr_11):
cmp $80, %ecx
@@ -1016,6 +1060,8 @@ L(shr_11):
POP (%esi)
jmp L(less48bytes)
+ cfi_restore_state
+ cfi_remember_state
ALIGN (4)
L(shr_11_gobble):
sub $32, %ecx
@@ -1066,6 +1112,8 @@ L(shr_11_gobble_next):
POP (%esi)
jmp L(less48bytes)
+ cfi_restore_state
+ cfi_remember_state
ALIGN (4)
L(shr_12):
cmp $80, %ecx
@@ -1094,6 +1142,8 @@ L(shr_12):
POP (%esi)
jmp L(less48bytes)
+ cfi_restore_state
+ cfi_remember_state
ALIGN (4)
L(shr_12_gobble):
sub $32, %ecx
@@ -1144,6 +1194,8 @@ L(shr_12_gobble_next):
POP (%esi)
jmp L(less48bytes)
+ cfi_restore_state
+ cfi_remember_state
ALIGN (4)
L(shr_13):
cmp $80, %ecx
@@ -1172,6 +1224,8 @@ L(shr_13):
POP (%esi)
jmp L(less48bytes)
+ cfi_restore_state
+ cfi_remember_state
ALIGN (4)
L(shr_13_gobble):
sub $32, %ecx
@@ -1222,6 +1276,8 @@ L(shr_13_gobble_next):
POP (%esi)
jmp L(less48bytes)
+ cfi_restore_state
+ cfi_remember_state
ALIGN (4)
L(shr_14):
cmp $80, %ecx
@@ -1250,6 +1306,8 @@ L(shr_14):
POP (%esi)
jmp L(less48bytes)
+ cfi_restore_state
+ cfi_remember_state
ALIGN (4)
L(shr_14_gobble):
sub $32, %ecx
@@ -1300,6 +1358,8 @@ L(shr_14_gobble_next):
POP (%esi)
jmp L(less48bytes)
+ cfi_restore_state
+ cfi_remember_state
ALIGN (4)
L(shr_15):
cmp $80, %ecx
@@ -1328,6 +1388,8 @@ L(shr_15):
POP (%esi)
jmp L(less48bytes)
+ cfi_restore_state
+ cfi_remember_state
ALIGN (4)
L(shr_15_gobble):
sub $32, %ecx
@@ -1378,6 +1440,8 @@ L(shr_15_gobble_next):
POP (%esi)
jmp L(less48bytes)
+ cfi_restore_state
+ cfi_remember_state
ALIGN (4)
L(exit):
pmovmskb %xmm1, %ebx
@@ -1497,8 +1561,9 @@ L(Byte31):
movzbl -9(%edi), %eax
movzbl -9(%esi), %edx
sub %edx, %eax
- RETURN
+ RETURN_END
+ CFI_PUSH (%ebx)
ALIGN (4)
L(more8bytes):
cmp $16, %ecx
diff --git a/sysdeps/i386/i686/multiarch/strcmp-sse4.S b/sysdeps/i386/i686/multiarch/strcmp-sse4.S
index 9776472..4b47851 100644
--- a/sysdeps/i386/i686/multiarch/strcmp-sse4.S
+++ b/sysdeps/i386/i686/multiarch/strcmp-sse4.S
@@ -176,6 +176,7 @@ L(first4bytes):
PUSH (%ebx)
PUSH (%edi)
PUSH (%esi)
+ cfi_remember_state
mov %edx, %edi
mov %eax, %esi
xorl %eax, %eax
@@ -241,6 +242,7 @@ L(ret):
#endif
ret
+ cfi_restore_state
#ifdef USE_AS_STRNCMP
L(more16byteseq):
POP (%esi)
@@ -253,6 +255,10 @@ L(eq):
POP (%ebp)
#endif
ret
+
+#ifdef USE_AS_STRNCMP
+ CFI_PUSH (%ebp)
+#endif
L(neq):
mov $1, %eax
ja L(neq_bigger)
@@ -263,6 +269,9 @@ L(neq_bigger):
#endif
ret
.p2align 4
+#ifdef USE_AS_STRNCMP
+ CFI_PUSH (%ebp)
+#endif
L(less16bytes):
add $0xfefefeff, %ecx
jnc L(less4bytes)
@@ -370,8 +379,13 @@ L(more4bytes):
movzbl 7(%eax), %ecx
cmpb %cl, 7(%edx)
jne L(neq)
+#if 0
+ // XXX bug in original code. It had a fallthru without any code
cmpl $0, %ecx
je L(eq)
+#else
+ jmp L(eq)
+#endif
END (STRCMP)
diff --git a/sysdeps/i386/i686/multiarch/strcmp-ssse3.S b/sysdeps/i386/i686/multiarch/strcmp-ssse3.S
index 14caae2..338b003 100644
--- a/sysdeps/i386/i686/multiarch/strcmp-ssse3.S
+++ b/sysdeps/i386/i686/multiarch/strcmp-ssse3.S
@@ -160,6 +160,9 @@ L(crosspage):
PUSH (%ebx)
PUSH (%edi)
PUSH (%esi)
+#ifdef USE_AS_STRNCMP
+ cfi_remember_state
+#endif
movl %edx, %edi
movl %eax, %ecx
@@ -254,7 +257,7 @@ L(loop_ashr_0):
/*
* The following cases will be handled by ashr_1
- * ecx(offset of esi) eax(offset of edi) relative offset corresponding case
+ * ecx(offset of esi) eax(offset of edi) relative offset corresponding case
* n(15) n -15 0(15 +(n-15) - n) ashr_1
*/
.p2align 4
@@ -360,7 +363,7 @@ L(ashr_1_exittail):
/*
* The following cases will be handled by ashr_2
- * ecx(offset of esi) eax(offset of edi) relative offset corresponding case
+ * ecx(offset of esi) eax(offset of edi) relative offset corresponding case
* n(14~15) n -14 1(15 +(n-14) - n) ashr_2
*/
.p2align 4
@@ -467,7 +470,7 @@ L(ashr_2_exittail):
/*
* The following cases will be handled by ashr_3
- * ecx(offset of esi) eax(offset of edi) relative offset corresponding case
+ * ecx(offset of esi) eax(offset of edi) relative offset corresponding case
* n(13~15) n -13 2(15 +(n-13) - n) ashr_3
*/
.p2align 4
@@ -573,7 +576,7 @@ L(ashr_3_exittail):
/*
* The following cases will be handled by ashr_4
- * ecx(offset of esi) eax(offset of edi) relative offset corresponding case
+ * ecx(offset of esi) eax(offset of edi) relative offset corresponding case
* n(12~15) n -12 3(15 +(n-12) - n) ashr_4
*/
.p2align 4
@@ -682,7 +685,7 @@ L(ashr_4_exittail):
/*
* The following cases will be handled by ashr_5
- * ecx(offset of esi) eax(offset of edi) relative offset corresponding case
+ * ecx(offset of esi) eax(offset of edi) relative offset corresponding case
* n(11~15) n -11 4(15 +(n-11) - n) ashr_5
*/
.p2align 4
@@ -788,7 +791,7 @@ L(ashr_5_exittail):
/*
* The following cases will be handled by ashr_6
- * ecx(offset of esi) eax(offset of edi) relative offset corresponding case
+ * ecx(offset of esi) eax(offset of edi) relative offset corresponding case
* n(10~15) n -10 5(15 +(n-10) - n) ashr_6
*/
@@ -896,7 +899,7 @@ L(ashr_6_exittail):
/*
* The following cases will be handled by ashr_7
- * ecx(offset of esi) eax(offset of edi) relative offset corresponding case
+ * ecx(offset of esi) eax(offset of edi) relative offset corresponding case
* n(9~15) n - 9 6(15 +(n-9) - n) ashr_7
*/
@@ -1006,7 +1009,7 @@ L(ashr_7_exittail):
/*
* The following cases will be handled by ashr_8
- * ecx(offset of esi) eax(offset of edi) relative offset corresponding case
+ * ecx(offset of esi) eax(offset of edi) relative offset corresponding case
* n(8~15) n - 8 7(15 +(n-8) - n) ashr_8
*/
.p2align 4
@@ -1113,7 +1116,7 @@ L(ashr_8_exittail):
/*
* The following cases will be handled by ashr_9
- * ecx(offset of esi) eax(offset of edi) relative offset corresponding case
+ * ecx(offset of esi) eax(offset of edi) relative offset corresponding case
* n(7~15) n - 7 8(15 +(n-7) - n) ashr_9
*/
.p2align 4
@@ -1219,7 +1222,7 @@ L(ashr_9_exittail):
/*
* The following cases will be handled by ashr_10
- * ecx(offset of esi) eax(offset of edi) relative offset corresponding case
+ * ecx(offset of esi) eax(offset of edi) relative offset corresponding case
* n(6~15) n - 6 9(15 +(n-6) - n) ashr_10
*/
.p2align 4
@@ -1325,7 +1328,7 @@ L(ashr_10_exittail):
/*
* The following cases will be handled by ashr_11
- * ecx(offset of esi) eax(offset of edi) relative offset corresponding case
+ * ecx(offset of esi) eax(offset of edi) relative offset corresponding case
* n(5~15) n - 5 10(15 +(n-5) - n) ashr_11
*/
.p2align 4
@@ -1431,7 +1434,7 @@ L(ashr_11_exittail):
/*
* The following cases will be handled by ashr_12
- * ecx(offset of esi) eax(offset of edi) relative offset corresponding case
+ * ecx(offset of esi) eax(offset of edi) relative offset corresponding case
* n(4~15) n - 4 11(15 +(n-4) - n) ashr_12
*/
.p2align 4
@@ -1537,7 +1540,7 @@ L(ashr_12_exittail):
/*
* The following cases will be handled by ashr_13
- * ecx(offset of esi) eax(offset of edi) relative offset corresponding case
+ * ecx(offset of esi) eax(offset of edi) relative offset corresponding case
* n(3~15) n - 3 12(15 +(n-3) - n) ashr_13
*/
.p2align 4
@@ -1643,7 +1646,7 @@ L(ashr_13_exittail):
/*
* The following cases will be handled by ashr_14
- * ecx(offset of esi) eax(offset of edi) relative offset corresponding case
+ * ecx(offset of esi) eax(offset of edi) relative offset corresponding case
* n(2~15) n - 2 13(15 +(n-2) - n) ashr_14
*/
.p2align 4
@@ -1749,7 +1752,7 @@ L(ashr_14_exittail):
/*
* The following cases will be handled by ashr_14
- * ecx(offset of esi) eax(offset of edi) relative offset corresponding case
+ * ecx(offset of esi) eax(offset of edi) relative offset corresponding case
* n(1~15) n - 1 14(15 +(n-1) - n) ashr_15
*/
@@ -1916,6 +1919,9 @@ L(less16bytes):
ret
.p2align 4
+#ifdef USE_AS_STRNCMP
+ CFI_PUSH (%ebp)
+#endif
L(Byte0):
#ifdef USE_AS_STRNCMP
cmp $0, %ebp
@@ -1931,6 +1937,9 @@ L(Byte0):
ret
.p2align 4
+#ifdef USE_AS_STRNCMP
+ CFI_PUSH (%ebp)
+#endif
L(Byte1):
#ifdef USE_AS_STRNCMP
cmp $1, %ebp
@@ -1946,6 +1955,9 @@ L(Byte1):
ret
.p2align 4
+#ifdef USE_AS_STRNCMP
+ CFI_PUSH (%ebp)
+#endif
L(Byte2):
#ifdef USE_AS_STRNCMP
cmp $2, %ebp
@@ -1961,6 +1973,9 @@ L(Byte2):
ret
.p2align 4
+#ifdef USE_AS_STRNCMP
+ CFI_PUSH (%ebp)
+#endif
L(Byte3):
#ifdef USE_AS_STRNCMP
cmp $3, %ebp
@@ -1976,6 +1991,9 @@ L(Byte3):
ret
.p2align 4
+#ifdef USE_AS_STRNCMP
+ CFI_PUSH (%ebp)
+#endif
L(Byte4):
#ifdef USE_AS_STRNCMP
cmp $4, %ebp
@@ -1989,7 +2007,11 @@ L(Byte4):
POP (%ebp)
#endif
ret
+
.p2align 4
+#ifdef USE_AS_STRNCMP
+ CFI_PUSH (%ebp)
+#endif
L(Byte5):
#ifdef USE_AS_STRNCMP
cmp $5, %ebp
@@ -2005,6 +2027,9 @@ L(Byte5):
ret
.p2align 4
+#ifdef USE_AS_STRNCMP
+ CFI_PUSH (%ebp)
+#endif
L(Byte6):
#ifdef USE_AS_STRNCMP
cmp $6, %ebp
@@ -2020,6 +2045,9 @@ L(Byte6):
ret
.p2align 4
+#ifdef USE_AS_STRNCMP
+ CFI_PUSH (%ebp)
+#endif
L(2next_8_bytes):
add $8, %eax
add $8, %edx
@@ -2063,6 +2091,9 @@ L(2next_8_bytes):
#endif
ret
+#ifdef USE_AS_STRNCMP
+ CFI_PUSH (%ebp)
+#endif
L(neq):
mov $1, %eax
ja L(neq_bigger)
@@ -2074,6 +2105,7 @@ L(neq_bigger):
ret
#ifdef USE_AS_STRNCMP
+ cfi_remember_state
L(more8byteseq):
POP (%esi)
POP (%edi)
@@ -2087,7 +2119,9 @@ L(eq):
#endif
xorl %eax, %eax
ret
+
#ifdef USE_AS_STRNCMP
+ CFI_PUSH (%ebp)
L(less16bytes_sncmp):
test %ebp, %ebp
jz L(eq)