aboutsummaryrefslogtreecommitdiff
path: root/sysdeps/i386
diff options
context:
space:
mode:
authorH.J. Lu <hjl.tools@gmail.com>2017-10-30 10:02:16 -0700
committerH.J. Lu <hjl.tools@gmail.com>2017-10-30 10:02:30 -0700
commitc5cc45148c89cc5c57d1946348dd242d4db5c5f5 (patch)
treeceee1e4fcc30daa0d3716353d5ffd6ac587508e1 /sysdeps/i386
parentce12269fac8cb873df1a8785e4a6cde870855590 (diff)
downloadglibc-c5cc45148c89cc5c57d1946348dd242d4db5c5f5.zip
glibc-c5cc45148c89cc5c57d1946348dd242d4db5c5f5.tar.gz
glibc-c5cc45148c89cc5c57d1946348dd242d4db5c5f5.tar.bz2
i586: Use conditional branches in strcpy.S [BZ #22353]
i586 strcpy.S used a clever trick with LEA to implement jump table: /* ECX has the last 2 bits of the address of source - 1. */ andl $3, %ecx call 2f 2: popl %edx /* 0xb is the distance between 2: and 1:. */ leal 0xb(%edx,%ecx,8), %ecx jmp *%ecx .align 8 1: /* ECX == 0 */ orb (%esi), %al jz L(end) stosb xorl %eax, %eax incl %esi /* ECX == 1 */ orb (%esi), %al jz L(end) stosb xorl %eax, %eax incl %esi /* ECX == 2 */ orb (%esi), %al jz L(end) stosb xorl %eax, %eax incl %esi /* ECX == 3 */ L(1): movl (%esi), %ecx leal 4(%esi),%esi This fails if there are instruction length changes before L(1):. This patch replaces it with conditional branches: cmpb $2, %cl je L(Src2) ja L(Src3) cmpb $1, %cl je L(Src1) L(Src0): which have similar performance and work with any instruction lengths. Tested on i586 and i686 with and without --disable-multi-arch. [BZ #22353] * sysdeps/i386/i586/strcpy.S (STRCPY): Use conditional branches. (1): Renamed to ... (L(Src0)): This. (L(Src1)): New. (L(Src2)): Likewise. (L(1)): Renamed to ... (L(Src3)): This.
Diffstat (limited to 'sysdeps/i386')
-rw-r--r--sysdeps/i386/i586/strcpy.S28
1 files changed, 11 insertions, 17 deletions
diff --git a/sysdeps/i386/i586/strcpy.S b/sysdeps/i386/i586/strcpy.S
index a444604..bb73ca4 100644
--- a/sysdeps/i386/i586/strcpy.S
+++ b/sysdeps/i386/i586/strcpy.S
@@ -53,41 +53,35 @@ ENTRY (STRCPY)
cfi_rel_offset (ebx, 0)
andl $3, %ecx
-#ifdef PIC
- call 2f
- cfi_adjust_cfa_offset (4)
-2: popl %edx
- cfi_adjust_cfa_offset (-4)
- /* 0xb is the distance between 2: and 1: but we avoid writing
- 1f-2b because the assembler generates worse code. */
- leal 0xb(%edx,%ecx,8), %ecx
-#else
- leal 1f(,%ecx,8), %ecx
-#endif
-
- jmp *%ecx
+ cmpb $2, %cl
+ je L(Src2)
+ ja L(Src3)
+ cmpb $1, %cl
+ je L(Src1)
- .align 8
-1:
+L(Src0):
orb (%esi), %al
jz L(end)
stosb
xorl %eax, %eax
incl %esi
+L(Src1):
orb (%esi), %al
jz L(end)
stosb
xorl %eax, %eax
incl %esi
+L(Src2):
orb (%esi), %al
jz L(end)
stosb
xorl %eax, %eax
incl %esi
-L(1): movl (%esi), %ecx
+L(Src3):
+ movl (%esi), %ecx
leal 4(%esi),%esi
subl %ecx, %eax
@@ -107,7 +101,7 @@ L(1): movl (%esi), %ecx
movl %edx, (%edi)
leal 4(%edi),%edi
- jmp L(1)
+ jmp L(Src3)
L(3): movl %ecx, %edx