From 5f92ec52e795dc004f8e8d17317e4572695ded15 Mon Sep 17 00:00:00 2001 From: "H.J. Lu" Date: Tue, 25 Aug 2015 08:48:21 -0700 Subject: Replace %xmm8 with %xmm0 Since ld.so preserves vector registers now, we can use %xmm0 to avoid the REX prefix. * sysdeps/x86_64/memset.S: Replace %xmm8 with %xmm0. --- sysdeps/x86_64/memset.S | 52 ++++++++++++++++++++++++------------------------- 1 file changed, 26 insertions(+), 26 deletions(-) (limited to 'sysdeps') diff --git a/sysdeps/x86_64/memset.S b/sysdeps/x86_64/memset.S index e496254..3855cc8 100644 --- a/sysdeps/x86_64/memset.S +++ b/sysdeps/x86_64/memset.S @@ -24,7 +24,7 @@ ENTRY(__bzero) movq %rdi, %rax /* Set return value. */ movq %rsi, %rdx /* Set n. */ - pxor %xmm8, %xmm8 + pxor %xmm0, %xmm0 jmp L(entry_from_bzero) END(__bzero) weak_alias (__bzero, bzero) @@ -33,10 +33,10 @@ weak_alias (__bzero, bzero) ENTRY(__memset_tail) movq %rcx, %rax /* Set return value. */ - movd %esi, %xmm8 - punpcklbw %xmm8, %xmm8 - punpcklwd %xmm8, %xmm8 - pshufd $0, %xmm8, %xmm8 + movd %esi, %xmm0 + punpcklbw %xmm0, %xmm0 + punpcklwd %xmm0, %xmm0 + pshufd $0, %xmm0, %xmm0 jmp L(entry_from_bzero) END(__memset_tail) @@ -50,57 +50,57 @@ END_CHK (__memset_chk) #endif ENTRY (memset) - movd %esi, %xmm8 + movd %esi, %xmm0 movq %rdi, %rax - punpcklbw %xmm8, %xmm8 - punpcklwd %xmm8, %xmm8 - pshufd $0, %xmm8, %xmm8 + punpcklbw %xmm0, %xmm0 + punpcklwd %xmm0, %xmm0 + pshufd $0, %xmm0, %xmm0 L(entry_from_bzero): cmpq $64, %rdx ja L(loop_start) cmpq $16, %rdx jbe L(less_16_bytes) cmpq $32, %rdx - movdqu %xmm8, (%rdi) - movdqu %xmm8, -16(%rdi,%rdx) + movdqu %xmm0, (%rdi) + movdqu %xmm0, -16(%rdi,%rdx) ja L(between_32_64_bytes) L(return): rep ret .p2align 4 L(between_32_64_bytes): - movdqu %xmm8, 16(%rdi) - movdqu %xmm8, -32(%rdi,%rdx) + movdqu %xmm0, 16(%rdi) + movdqu %xmm0, -32(%rdi,%rdx) ret .p2align 4 L(loop_start): leaq 64(%rdi), %rcx - movdqu %xmm8, (%rdi) + movdqu %xmm0, (%rdi) andq $-64, %rcx - movdqu %xmm8, -16(%rdi,%rdx) - movdqu %xmm8, 16(%rdi) - movdqu %xmm8, -32(%rdi,%rdx) - movdqu %xmm8, 32(%rdi) - movdqu %xmm8, -48(%rdi,%rdx) - movdqu %xmm8, 48(%rdi) - movdqu %xmm8, -64(%rdi,%rdx) + movdqu %xmm0, -16(%rdi,%rdx) + movdqu %xmm0, 16(%rdi) + movdqu %xmm0, -32(%rdi,%rdx) + movdqu %xmm0, 32(%rdi) + movdqu %xmm0, -48(%rdi,%rdx) + movdqu %xmm0, 48(%rdi) + movdqu %xmm0, -64(%rdi,%rdx) addq %rdi, %rdx andq $-64, %rdx cmpq %rdx, %rcx je L(return) .p2align 4 L(loop): - movdqa %xmm8, (%rcx) - movdqa %xmm8, 16(%rcx) - movdqa %xmm8, 32(%rcx) - movdqa %xmm8, 48(%rcx) + movdqa %xmm0, (%rcx) + movdqa %xmm0, 16(%rcx) + movdqa %xmm0, 32(%rcx) + movdqa %xmm0, 48(%rcx) addq $64, %rcx cmpq %rcx, %rdx jne L(loop) rep ret L(less_16_bytes): - movq %xmm8, %rcx + movq %xmm0, %rcx testb $24, %dl jne L(between8_16bytes) testb $4, %dl -- cgit v1.1