x86_64: Remove redundant REX bytes from memrchr.S

By x86-64 specification, 32-bit destination registers are zero-extended to 64 bits. There is no need to use 64-bit registers when only the lower 32 bits are non-zero. Also 2 instructions in: mov %rdi, %rcx and $15, %rcx jz L(length_less16_offset0) mov %rdi, %rcx <<< redundant and $15, %rcx <<< redundant are redundant. * sysdeps/x86_64/memrchr.S (__memrchr): Use 32-bit registers for the lower 32 bits. Remove redundant instructions.
author: H.J. Lu <hjl.tools@gmail.com> 2017-06-05 07:41:14 -0700
committer: H.J. Lu <hjl.tools@gmail.com> 2017-06-05 07:41:26 -0700
commit: 7395928b957ebb35afb696c3278d14122aa97b51 (patch)
tree: ca45794e0ad69d0e4d03bd94d36d6afcb54edd6b
parent: d8a7d10324d9765fa62f42c1d94c5bf36b60d558 (diff)
download: glibc-7395928b957ebb35afb696c3278d14122aa97b51.zip
glibc-7395928b957ebb35afb696c3278d14122aa97b51.tar.gz
glibc-7395928b957ebb35afb696c3278d14122aa97b51.tar.bz2
2 files changed, 22 insertions, 19 deletions
diff --git a/ChangeLog b/ChangeLog
index 1cbcf56..1549eb6 100644
--- a/ChangeLog
+++ b/ChangeLog
@@ -1,5 +1,10 @@
 2017-06-05  H.J. Lu  <hongjiu.lu@intel.com>
 
+	* sysdeps/x86_64/memrchr.S (__memrchr): Use 32-bit registers for
+	the lower 32 bits.  Remove redundant instructions.
+
+2017-06-05  H.J. Lu  <hongjiu.lu@intel.com>
+
 	* sysdeps/unix/sysv/linux/x86_64/sysdep.h (LO_HI_LONG): Pass
 	0 as the high part of offset.
 	* sysdeps/unix/sysv/linux/x86_64/x32/sysdep.h (LO_HI_LONG): New.
diff --git a/sysdeps/x86_64/memrchr.S b/sysdeps/x86_64/memrchr.S
index aab1a4a..5fa0fe9 100644
--- a/sysdeps/x86_64/memrchr.S
+++ b/sysdeps/x86_64/memrchr.S
@@ -22,7 +22,7 @@
 
 	.text
 ENTRY (__memrchr)
-	movd	%rsi, %xmm1
+	movd	%esi, %xmm1
 
 	sub	$16, %rdx
 	jbe	L(length_less16)
@@ -42,8 +42,8 @@ ENTRY (__memrchr)
 	jnz	L(matches0)
 
 	sub	$64, %rdi
-	mov	%rdi, %rcx
-	and	$15, %rcx
+	mov	%edi, %ecx
+	and	$15, %ecx
 	jz	L(loop_prolog)
 
 	add	$16, %rdi
@@ -108,8 +108,8 @@ L(loop_prolog):
 	test	%eax, %eax
 	jnz	L(matches0)
 
-	mov	%rdi, %rcx
-	and	$63, %rcx
+	mov	%edi, %ecx
+	and	$63, %ecx
 	jz	L(align64_loop)
 
 	add	$64, %rdi
@@ -166,8 +166,8 @@ L(align64_loop):
 
 	.p2align 4
 L(exit_loop):
-	add	$64, %rdx
-	cmp	$32, %rdx
+	add	$64, %edx
+	cmp	$32, %edx
 	jbe	L(exit_loop_32)
 
 	movdqa	48(%rdi), %xmm0
@@ -187,7 +187,7 @@ L(exit_loop):
 	pmovmskb	%xmm3, %eax
 	test	%eax, %eax
 	jnz	L(matches16_1)
-	cmp	$48, %rdx
+	cmp	$48, %edx
 	jbe	L(return_null)
 
 	pcmpeqb	(%rdi), %xmm1
@@ -204,7 +204,7 @@ L(exit_loop_32):
 	pmovmskb	%xmm0, %eax
 	test	%eax, %eax
 	jnz	L(matches48_1)
-	cmp	$16, %rdx
+	cmp	$16, %edx
 	jbe	L(return_null)
 
 	pcmpeqb	32(%rdi), %xmm1
@@ -276,7 +276,7 @@ L(matches48_1):
 
 	.p2align 4
 L(return_null):
-	xor	%rax, %rax
+	xor	%eax, %eax
 	ret
 
 	.p2align 4
@@ -306,18 +306,16 @@ L(length_less16):
 	punpcklbw	%xmm1, %xmm1
 	punpcklbw	%xmm1, %xmm1
 
-	add	$16, %rdx
+	add	$16, %edx
 
 	pshufd	$0, %xmm1, %xmm1
 
-	mov	%rdi, %rcx
-	and	$15, %rcx
+	mov	%edi, %ecx
+	and	$15, %ecx
 	jz	L(length_less16_offset0)
 
-	mov	%rdi, %rcx
-	and	$15, %rcx
 	mov	%cl, %dh
-	mov	%rcx, %r8
+	mov	%ecx, %esi
 	add	%dl, %dh
 	and	$-16, %rdi
 
@@ -340,7 +338,7 @@ L(length_less16):
 
 	bsr	%eax, %eax
 	add	%rdi, %rax
-	add	%r8, %rax
+	add	%rsi, %rax
 	ret
 
 	.p2align 4
@@ -362,14 +360,14 @@ L(length_less16_part2):
 	pcmpeqb	(%rdi), %xmm1
 	pmovmskb	%xmm1, %eax
 
-	mov	%r8, %rcx
+	mov	%esi, %ecx
 	sar	%cl, %eax
 	test	%eax, %eax
 	jz	L(return_null)
 
 	bsr	%eax, %eax
 	add	%rdi, %rax
-	add	%r8, %rax
+	add	%rsi, %rax
 	ret
 
 	.p2align 4
author	H.J. Lu <hjl.tools@gmail.com>	2017-06-05 07:41:14 -0700
committer	H.J. Lu <hjl.tools@gmail.com>	2017-06-05 07:41:26 -0700
commit	7395928b957ebb35afb696c3278d14122aa97b51 (patch)
tree	ca45794e0ad69d0e4d03bd94d36d6afcb54edd6b
parent	d8a7d10324d9765fa62f42c1d94c5bf36b60d558 (diff)
download	glibc-7395928b957ebb35afb696c3278d14122aa97b51.zip glibc-7395928b957ebb35afb696c3278d14122aa97b51.tar.gz glibc-7395928b957ebb35afb696c3278d14122aa97b51.tar.bz2