From 6458faf199de42a3bba3dc7cad9fb38d66bf0b4d Mon Sep 17 00:00:00 2001 From: "H.J. Lu" Date: Thu, 20 Aug 2015 13:57:22 -0700 Subject: Add i386 strcmp family multiarch functions --- sysdeps/i386/i686/multiarch/strcasecmp-c.c | 12 - sysdeps/i386/i686/multiarch/strcasecmp_l-c.c | 13 - sysdeps/i386/i686/multiarch/strcasecmp_l-sse4.S | 2 - sysdeps/i386/i686/multiarch/strcasecmp_l-ssse3.S | 2 - sysdeps/i386/i686/multiarch/strcmp-sse4.S | 804 ------- sysdeps/i386/i686/multiarch/strcmp-ssse3.S | 2810 ---------------------- sysdeps/i386/i686/multiarch/strncase-c.c | 8 - sysdeps/i386/i686/multiarch/strncase_l-c.c | 13 - sysdeps/i386/i686/multiarch/strncase_l-sse4.S | 2 - sysdeps/i386/i686/multiarch/strncase_l-ssse3.S | 2 - sysdeps/i386/i686/multiarch/strncmp-c.c | 8 - sysdeps/i386/i686/multiarch/strncmp-sse4.S | 5 - sysdeps/i386/i686/multiarch/strncmp-ssse3.S | 5 - sysdeps/i386/i686/rtld-strcmp.S | 1 + sysdeps/i386/multiarch/Makefile | 8 +- sysdeps/i386/multiarch/ifunc-impl-list.c | 22 +- sysdeps/i386/multiarch/rtld-strcmp.c | 1 + sysdeps/i386/multiarch/strcasecmp-i386.c | 12 + sysdeps/i386/multiarch/strcasecmp.c | 53 + sysdeps/i386/multiarch/strcasecmp_l-i386.c | 13 + sysdeps/i386/multiarch/strcasecmp_l-sse4.S | 2 + sysdeps/i386/multiarch/strcasecmp_l-ssse3.S | 2 + sysdeps/i386/multiarch/strcasecmp_l.c | 53 + sysdeps/i386/multiarch/strcmp-i386.c | 10 + sysdeps/i386/multiarch/strcmp-i686.S | 16 + sysdeps/i386/multiarch/strcmp-sse4.S | 804 +++++++ sysdeps/i386/multiarch/strcmp-ssse3.S | 2810 ++++++++++++++++++++++ sysdeps/i386/multiarch/strcmp.c | 58 + sysdeps/i386/multiarch/strncase-i386.c | 8 + sysdeps/i386/multiarch/strncase.c | 53 + sysdeps/i386/multiarch/strncase_l-i386.c | 13 + sysdeps/i386/multiarch/strncase_l-sse4.S | 2 + sysdeps/i386/multiarch/strncase_l-ssse3.S | 2 + sysdeps/i386/multiarch/strncase_l.c | 53 + sysdeps/i386/multiarch/strncmp-i386.c | 8 + sysdeps/i386/multiarch/strncmp-sse4.S | 5 + sysdeps/i386/multiarch/strncmp-ssse3.S | 5 + sysdeps/i386/multiarch/strncmp.c | 54 + 38 files changed, 4059 insertions(+), 3695 deletions(-) delete mode 100644 sysdeps/i386/i686/multiarch/strcasecmp-c.c delete mode 100644 sysdeps/i386/i686/multiarch/strcasecmp_l-c.c delete mode 100644 sysdeps/i386/i686/multiarch/strcasecmp_l-sse4.S delete mode 100644 sysdeps/i386/i686/multiarch/strcasecmp_l-ssse3.S delete mode 100644 sysdeps/i386/i686/multiarch/strcmp-sse4.S delete mode 100644 sysdeps/i386/i686/multiarch/strcmp-ssse3.S delete mode 100644 sysdeps/i386/i686/multiarch/strncase-c.c delete mode 100644 sysdeps/i386/i686/multiarch/strncase_l-c.c delete mode 100644 sysdeps/i386/i686/multiarch/strncase_l-sse4.S delete mode 100644 sysdeps/i386/i686/multiarch/strncase_l-ssse3.S delete mode 100644 sysdeps/i386/i686/multiarch/strncmp-c.c delete mode 100644 sysdeps/i386/i686/multiarch/strncmp-sse4.S delete mode 100644 sysdeps/i386/i686/multiarch/strncmp-ssse3.S create mode 100644 sysdeps/i386/i686/rtld-strcmp.S create mode 100644 sysdeps/i386/multiarch/rtld-strcmp.c create mode 100644 sysdeps/i386/multiarch/strcasecmp-i386.c create mode 100644 sysdeps/i386/multiarch/strcasecmp.c create mode 100644 sysdeps/i386/multiarch/strcasecmp_l-i386.c create mode 100644 sysdeps/i386/multiarch/strcasecmp_l-sse4.S create mode 100644 sysdeps/i386/multiarch/strcasecmp_l-ssse3.S create mode 100644 sysdeps/i386/multiarch/strcasecmp_l.c create mode 100644 sysdeps/i386/multiarch/strcmp-i386.c create mode 100644 sysdeps/i386/multiarch/strcmp-i686.S create mode 100644 sysdeps/i386/multiarch/strcmp-sse4.S create mode 100644 sysdeps/i386/multiarch/strcmp-ssse3.S create mode 100644 sysdeps/i386/multiarch/strcmp.c create mode 100644 sysdeps/i386/multiarch/strncase-i386.c create mode 100644 sysdeps/i386/multiarch/strncase.c create mode 100644 sysdeps/i386/multiarch/strncase_l-i386.c create mode 100644 sysdeps/i386/multiarch/strncase_l-sse4.S create mode 100644 sysdeps/i386/multiarch/strncase_l-ssse3.S create mode 100644 sysdeps/i386/multiarch/strncase_l.c create mode 100644 sysdeps/i386/multiarch/strncmp-i386.c create mode 100644 sysdeps/i386/multiarch/strncmp-sse4.S create mode 100644 sysdeps/i386/multiarch/strncmp-ssse3.S create mode 100644 sysdeps/i386/multiarch/strncmp.c diff --git a/sysdeps/i386/i686/multiarch/strcasecmp-c.c b/sysdeps/i386/i686/multiarch/strcasecmp-c.c deleted file mode 100644 index 753c6ec..0000000 --- a/sysdeps/i386/i686/multiarch/strcasecmp-c.c +++ /dev/null @@ -1,12 +0,0 @@ -#include - -extern __typeof (strcasecmp) __strcasecmp_nonascii; - -#define __strcasecmp __strcasecmp_nonascii -#include - -strong_alias (__strcasecmp_nonascii, __strcasecmp_ia32) - -/* The needs of strcasecmp in libc are minimal, no need to go through - the IFUNC. */ -strong_alias (__strcasecmp_nonascii, __GI___strcasecmp) diff --git a/sysdeps/i386/i686/multiarch/strcasecmp_l-c.c b/sysdeps/i386/i686/multiarch/strcasecmp_l-c.c deleted file mode 100644 index d4fcd2b..0000000 --- a/sysdeps/i386/i686/multiarch/strcasecmp_l-c.c +++ /dev/null @@ -1,13 +0,0 @@ -#include - -extern __typeof (strcasecmp_l) __strcasecmp_l_nonascii; - -#define __strcasecmp_l __strcasecmp_l_nonascii -#define USE_IN_EXTENDED_LOCALE_MODEL 1 -#include - -strong_alias (__strcasecmp_l_nonascii, __strcasecmp_l_ia32) - -/* The needs of strcasecmp in libc are minimal, no need to go through - the IFUNC. */ -strong_alias (__strcasecmp_l_nonascii, __GI___strcasecmp_l) diff --git a/sysdeps/i386/i686/multiarch/strcasecmp_l-sse4.S b/sysdeps/i386/i686/multiarch/strcasecmp_l-sse4.S deleted file mode 100644 index 411d415..0000000 --- a/sysdeps/i386/i686/multiarch/strcasecmp_l-sse4.S +++ /dev/null @@ -1,2 +0,0 @@ -#define USE_AS_STRCASECMP_L 1 -#include "strcmp-sse4.S" diff --git a/sysdeps/i386/i686/multiarch/strcasecmp_l-ssse3.S b/sysdeps/i386/i686/multiarch/strcasecmp_l-ssse3.S deleted file mode 100644 index a22b93c..0000000 --- a/sysdeps/i386/i686/multiarch/strcasecmp_l-ssse3.S +++ /dev/null @@ -1,2 +0,0 @@ -#define USE_AS_STRCASECMP_L 1 -#include "strcmp-ssse3.S" diff --git a/sysdeps/i386/i686/multiarch/strcmp-sse4.S b/sysdeps/i386/i686/multiarch/strcmp-sse4.S deleted file mode 100644 index 38c2317..0000000 --- a/sysdeps/i386/i686/multiarch/strcmp-sse4.S +++ /dev/null @@ -1,804 +0,0 @@ -/* strcmp with SSE4.2 - Copyright (C) 2010-2015 Free Software Foundation, Inc. - Contributed by Intel Corporation. - This file is part of the GNU C Library. - - The GNU C Library is free software; you can redistribute it and/or - modify it under the terms of the GNU Lesser General Public - License as published by the Free Software Foundation; either - version 2.1 of the License, or (at your option) any later version. - - The GNU C Library is distributed in the hope that it will be useful, - but WITHOUT ANY WARRANTY; without even the implied warranty of - MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU - Lesser General Public License for more details. - - You should have received a copy of the GNU Lesser General Public - License along with the GNU C Library; if not, see - . */ - -#if IS_IN (libc) - -#include -#include "asm-syntax.h" - -#define CFI_PUSH(REG) \ - cfi_adjust_cfa_offset (4); \ - cfi_rel_offset (REG, 0) - -#define CFI_POP(REG) \ - cfi_adjust_cfa_offset (-4); \ - cfi_restore (REG) - -#define PUSH(REG) pushl REG; CFI_PUSH (REG) -#define POP(REG) popl REG; CFI_POP (REG) - -#ifdef USE_AS_STRNCMP -# ifndef STRCMP -# define STRCMP __strncmp_sse4_2 -# endif -# define STR1 8 -# define STR2 STR1+4 -# define CNT STR2+4 -# define RETURN POP (REM); ret; .p2align 4; CFI_PUSH (REM) -# define REM %ebp -#elif defined USE_AS_STRCASECMP_L -# include "locale-defines.h" -# ifndef STRCMP -# define STRCMP __strcasecmp_l_sse4_2 -# endif -# ifdef PIC -# define STR1 12 -# else -# define STR1 8 -# endif -# define STR2 STR1+4 -# define LOCALE 12 /* Loaded before the adjustment. */ -# ifdef PIC -# define RETURN POP (%edi); POP (%ebx); ret; \ - .p2align 4; CFI_PUSH (%ebx); CFI_PUSH (%edi) -# else -# define RETURN POP (%edi); ret; .p2align 4; CFI_PUSH (%edi) -# endif -# define NONASCII __strcasecmp_nonascii -#elif defined USE_AS_STRNCASECMP_L -# include "locale-defines.h" -# ifndef STRCMP -# define STRCMP __strncasecmp_l_sse4_2 -# endif -# ifdef PIC -# define STR1 16 -# else -# define STR1 12 -# endif -# define STR2 STR1+4 -# define CNT STR2+4 -# define LOCALE 16 /* Loaded before the adjustment. */ -# ifdef PIC -# define RETURN POP (%edi); POP (REM); POP (%ebx); ret; \ - .p2align 4; \ - CFI_PUSH (%ebx); CFI_PUSH (REM); CFI_PUSH (%edi) -# else -# define RETURN POP (%edi); POP (REM); ret; \ - .p2align 4; CFI_PUSH (REM); CFI_PUSH (%edi) -# endif -# define REM %ebp -# define NONASCII __strncasecmp_nonascii -#else -# ifndef STRCMP -# define STRCMP __strcmp_sse4_2 -# endif -# define STR1 4 -# define STR2 STR1+4 -# define RETURN ret; .p2align 4 -#endif - - .section .text.sse4.2,"ax",@progbits - -#ifdef USE_AS_STRCASECMP_L -ENTRY (__strcasecmp_sse4_2) -# ifdef PIC - PUSH (%ebx) - LOAD_PIC_REG(bx) - movl __libc_tsd_LOCALE@GOTNTPOFF(%ebx), %eax -# ifdef NO_TLS_DIRECT_SEG_REFS - addl %gs:0, %eax - movl (%eax), %eax -# else - movl %gs:(%eax), %eax -# endif -# else -# ifdef NO_TLS_DIRECT_SEG_REFS - movl %gs:0, %eax - movl __libc_tsd_LOCALE@NTPOFF(%eax), %eax -# else - movl %gs:__libc_tsd_LOCALE@NTPOFF, %eax -# endif -# endif -# if LOCALE_T___LOCALES != 0 || LC_CTYPE != 0 - movl LOCALE_T___LOCALES+LC_CTYPE*4(%eax), %eax -# else - movl (%eax), %eax -# endif - testl $1, LOCALE_DATA_VALUES+_NL_CTYPE_NONASCII_CASE*SIZEOF_VALUES(%eax) -# ifdef PIC - je L(ascii) - POP (%ebx) - jmp __strcasecmp_nonascii -# else - jne __strcasecmp_nonascii - jmp L(ascii) -# endif -END (__strcasecmp_sse4_2) -#endif - -#ifdef USE_AS_STRNCASECMP_L -ENTRY (__strncasecmp_sse4_2) -# ifdef PIC - PUSH (%ebx) - LOAD_PIC_REG(bx) - movl __libc_tsd_LOCALE@GOTNTPOFF(%ebx), %eax -# ifdef NO_TLS_DIRECT_SEG_REFS - addl %gs:0, %eax - movl (%eax), %eax -# else - movl %gs:(%eax), %eax -# endif -# else -# ifdef NO_TLS_DIRECT_SEG_REFS - movl %gs:0, %eax - movl __libc_tsd_LOCALE@NTPOFF(%eax), %eax -# else - movl %gs:__libc_tsd_LOCALE@NTPOFF, %eax -# endif -# endif -# if LOCALE_T___LOCALES != 0 || LC_CTYPE != 0 - movl LOCALE_T___LOCALES+LC_CTYPE*4(%eax), %eax -# else - movl (%eax), %eax -# endif - testl $1, LOCALE_DATA_VALUES+_NL_CTYPE_NONASCII_CASE*SIZEOF_VALUES(%eax) -# ifdef PIC - je L(ascii) - POP (%ebx) - jmp __strncasecmp_nonascii -# else - jne __strncasecmp_nonascii - jmp L(ascii) -# endif -END (__strncasecmp_sse4_2) -#endif - - ENTRY (STRCMP) -#if defined USE_AS_STRCASECMP_L || defined USE_AS_STRNCASECMP_L - movl LOCALE(%esp), %eax -# if LOCALE_T___LOCALES != 0 || LC_CTYPE != 0 - movl LOCALE_T___LOCALES+LC_CTYPE*4(%eax), %eax -# else - movl (%eax), %eax -# endif - testl $1, LOCALE_DATA_VALUES+_NL_CTYPE_NONASCII_CASE*SIZEOF_VALUES(%eax) - jne NONASCII - -# ifdef PIC - PUSH (%ebx) - LOAD_PIC_REG(bx) -# endif -L(ascii): - .section .rodata.cst16,"aM",@progbits,16 - .align 16 -.Lbelowupper: - .quad 0x4040404040404040 - .quad 0x4040404040404040 -.Ltopupper: - .quad 0x5b5b5b5b5b5b5b5b - .quad 0x5b5b5b5b5b5b5b5b -.Ltouppermask: - .quad 0x2020202020202020 - .quad 0x2020202020202020 - .previous - -# ifdef PIC -# define UCLOW_reg .Lbelowupper@GOTOFF(%ebx) -# define UCHIGH_reg .Ltopupper@GOTOFF(%ebx) -# define LCQWORD_reg .Ltouppermask@GOTOFF(%ebx) -# else -# define UCLOW_reg .Lbelowupper -# define UCHIGH_reg .Ltopupper -# define LCQWORD_reg .Ltouppermask -# endif -#endif - -#if defined USE_AS_STRNCMP || defined USE_AS_STRNCASECMP_L - PUSH (REM) -#endif -#if defined USE_AS_STRCASECMP_L || defined USE_AS_STRNCASECMP_L - PUSH (%edi) -#endif - mov STR1(%esp), %edx - mov STR2(%esp), %eax -#if defined USE_AS_STRNCMP || defined USE_AS_STRNCASECMP_L - movl CNT(%esp), REM - test REM, REM - je L(eq) -#endif - mov %dx, %cx - and $0xfff, %cx - cmp $0xff0, %cx - ja L(first4bytes) - movdqu (%edx), %xmm2 - mov %eax, %ecx - and $0xfff, %ecx - cmp $0xff0, %ecx - ja L(first4bytes) -#if defined USE_AS_STRCASECMP_L || defined USE_AS_STRNCASECMP_L -# define TOLOWER(reg1, reg2) \ - movdqa reg1, %xmm3; \ - movdqa UCHIGH_reg, %xmm4; \ - movdqa reg2, %xmm5; \ - movdqa UCHIGH_reg, %xmm6; \ - pcmpgtb UCLOW_reg, %xmm3; \ - pcmpgtb reg1, %xmm4; \ - pcmpgtb UCLOW_reg, %xmm5; \ - pcmpgtb reg2, %xmm6; \ - pand %xmm4, %xmm3; \ - pand %xmm6, %xmm5; \ - pand LCQWORD_reg, %xmm3; \ - pand LCQWORD_reg, %xmm5; \ - por %xmm3, reg1; \ - por %xmm5, reg2 - - movdqu (%eax), %xmm1 - TOLOWER (%xmm2, %xmm1) - movd %xmm2, %ecx - movd %xmm1, %edi - movdqa %xmm2, %xmm3 - movdqa %xmm1, %xmm4 - cmpl %edi, %ecx -#else -# define TOLOWER(reg1, reg) - - movd %xmm2, %ecx - cmp (%eax), %ecx -#endif - jne L(less4bytes) -#if !defined USE_AS_STRCASECMP_L && !defined USE_AS_STRNCASECMP_L - movdqu (%eax), %xmm1 -#endif - pxor %xmm2, %xmm1 - pxor %xmm0, %xmm0 - ptest %xmm1, %xmm0 - jnc L(less16bytes) - pcmpeqb %xmm0, %xmm2 - ptest %xmm2, %xmm0 - jnc L(less16bytes) - -#if defined USE_AS_STRNCMP || defined USE_AS_STRNCASECMP_L - sub $16, REM - jbe L(eq) -#endif - add $16, %edx - add $16, %eax -L(first4bytes): - movzbl (%eax), %ecx -#if defined USE_AS_STRCASECMP_L || defined USE_AS_STRNCASECMP_L - movzbl (%edx), %edi -# ifdef PIC - movl _nl_C_LC_CTYPE_tolower@GOTOFF+128*4(%ebx,%ecx,4), %ecx - movl _nl_C_LC_CTYPE_tolower@GOTOFF+128*4(%ebx,%edi,4), %edi -# else - movl _nl_C_LC_CTYPE_tolower+128*4(,%ecx,4), %ecx - movl _nl_C_LC_CTYPE_tolower+128*4(,%edi,4), %edi -# endif - cmpl %ecx, %edi -#else - cmpb %cl, (%edx) -#endif - jne L(neq) - cmpl $0, %ecx - je L(eq) - -#if defined USE_AS_STRNCMP || defined USE_AS_STRNCASECMP_L - cmp $1, REM - je L(eq) -#endif - - movzbl 1(%eax), %ecx -#if defined USE_AS_STRCASECMP_L || defined USE_AS_STRNCASECMP_L - movzbl 1(%edx), %edi -# ifdef PIC - movl _nl_C_LC_CTYPE_tolower@GOTOFF+128*4(%ebx,%ecx,4), %ecx - movl _nl_C_LC_CTYPE_tolower@GOTOFF+128*4(%ebx,%edi,4), %edi -# else - movl _nl_C_LC_CTYPE_tolower+128*4(,%ecx,4), %ecx - movl _nl_C_LC_CTYPE_tolower+128*4(,%edi,4), %edi -# endif - cmpl %ecx, %edi -#else - cmpb %cl, 1(%edx) -#endif - jne L(neq) - cmpl $0, %ecx - je L(eq) - -#if defined USE_AS_STRNCMP || defined USE_AS_STRNCASECMP_L - cmp $2, REM - je L(eq) -#endif - movzbl 2(%eax), %ecx -#if defined USE_AS_STRCASECMP_L || defined USE_AS_STRNCASECMP_L - movzbl 2(%edx), %edi -# ifdef PIC - movl _nl_C_LC_CTYPE_tolower@GOTOFF+128*4(%ebx,%ecx,4), %ecx - movl _nl_C_LC_CTYPE_tolower@GOTOFF+128*4(%ebx,%edi,4), %edi -# else - movl _nl_C_LC_CTYPE_tolower+128*4(,%ecx,4), %ecx - movl _nl_C_LC_CTYPE_tolower+128*4(,%edi,4), %edi -# endif - cmpl %ecx, %edi -#else - cmpb %cl, 2(%edx) -#endif - jne L(neq) - cmpl $0, %ecx - je L(eq) - -#if defined USE_AS_STRNCMP || defined USE_AS_STRNCASECMP_L - cmp $3, REM - je L(eq) -#endif - movzbl 3(%eax), %ecx -#if defined USE_AS_STRCASECMP_L || defined USE_AS_STRNCASECMP_L - movzbl 3(%edx), %edi -# ifdef PIC - movl _nl_C_LC_CTYPE_tolower@GOTOFF+128*4(%ebx,%ecx,4), %ecx - movl _nl_C_LC_CTYPE_tolower@GOTOFF+128*4(%ebx,%edi,4), %edi -# else - movl _nl_C_LC_CTYPE_tolower+128*4(,%ecx,4), %ecx - movl _nl_C_LC_CTYPE_tolower+128*4(,%edi,4), %edi -# endif - cmpl %ecx, %edi -#else - cmpb %cl, 3(%edx) -#endif - jne L(neq) - cmpl $0, %ecx - je L(eq) - -#if defined USE_AS_STRNCMP || defined USE_AS_STRNCASECMP_L - cmp $4, REM - je L(eq) -#endif - movzbl 4(%eax), %ecx -#if defined USE_AS_STRCASECMP_L || defined USE_AS_STRNCASECMP_L - movzbl 4(%edx), %edi -# ifdef PIC - movl _nl_C_LC_CTYPE_tolower@GOTOFF+128*4(%ebx,%ecx,4), %ecx - movl _nl_C_LC_CTYPE_tolower@GOTOFF+128*4(%ebx,%edi,4), %edi -# else - movl _nl_C_LC_CTYPE_tolower+128*4(,%ecx,4), %ecx - movl _nl_C_LC_CTYPE_tolower+128*4(,%edi,4), %edi -# endif - cmpl %ecx, %edi -#else - cmpb %cl, 4(%edx) -#endif - jne L(neq) - cmpl $0, %ecx - je L(eq) - -#if defined USE_AS_STRNCMP || defined USE_AS_STRNCASECMP_L - cmp $5, REM - je L(eq) -#endif - movzbl 5(%eax), %ecx -#if defined USE_AS_STRCASECMP_L || defined USE_AS_STRNCASECMP_L - movzbl 5(%edx), %edi -# ifdef PIC - movl _nl_C_LC_CTYPE_tolower@GOTOFF+128*4(%ebx,%ecx,4), %ecx - movl _nl_C_LC_CTYPE_tolower@GOTOFF+128*4(%ebx,%edi,4), %edi -# else - movl _nl_C_LC_CTYPE_tolower+128*4(,%ecx,4), %ecx - movl _nl_C_LC_CTYPE_tolower+128*4(,%edi,4), %edi -# endif - cmpl %ecx, %edi -#else - cmpb %cl, 5(%edx) -#endif - jne L(neq) - cmpl $0, %ecx - je L(eq) - -#if defined USE_AS_STRNCMP || defined USE_AS_STRNCASECMP_L - cmp $6, REM - je L(eq) -#endif - movzbl 6(%eax), %ecx -#if defined USE_AS_STRCASECMP_L || defined USE_AS_STRNCASECMP_L - movzbl 6(%edx), %edi -# ifdef PIC - movl _nl_C_LC_CTYPE_tolower@GOTOFF+128*4(%ebx,%ecx,4), %ecx - movl _nl_C_LC_CTYPE_tolower@GOTOFF+128*4(%ebx,%edi,4), %edi -# else - movl _nl_C_LC_CTYPE_tolower+128*4(,%ecx,4), %ecx - movl _nl_C_LC_CTYPE_tolower+128*4(,%edi,4), %edi -# endif - cmpl %ecx, %edi -#else - cmpb %cl, 6(%edx) -#endif - jne L(neq) - cmpl $0, %ecx - je L(eq) - -#if defined USE_AS_STRNCMP || defined USE_AS_STRNCASECMP_L - cmp $7, REM - je L(eq) -#endif - movzbl 7(%eax), %ecx -#if defined USE_AS_STRCASECMP_L || defined USE_AS_STRNCASECMP_L - movzbl 7(%edx), %edi -# ifdef PIC - movl _nl_C_LC_CTYPE_tolower@GOTOFF+128*4(%ebx,%ecx,4), %ecx - movl _nl_C_LC_CTYPE_tolower@GOTOFF+128*4(%ebx,%edi,4), %edi -# else - movl _nl_C_LC_CTYPE_tolower+128*4(,%ecx,4), %ecx - movl _nl_C_LC_CTYPE_tolower+128*4(,%edi,4), %edi -# endif - cmpl %ecx, %edi -#else - cmpb %cl, 7(%edx) -#endif - jne L(neq) - cmpl $0, %ecx - je L(eq) - -#if defined USE_AS_STRNCMP || defined USE_AS_STRNCASECMP_L - sub $8, REM - je L(eq) -#endif - add $8, %eax - add $8, %edx - -#if !defined USE_AS_STRCASECMP_L && !defined USE_AS_STRNCASECMP_L - PUSH (%edi) -#endif - PUSH (%esi) -#if defined USE_AS_STRNCMP || defined USE_AS_STRNCASECMP_L - cfi_remember_state -#endif - mov %edx, %edi - mov %eax, %esi - xorl %eax, %eax -L(check_offset): - movl %edi, %edx - movl %esi, %ecx - andl $0xfff, %edx - andl $0xfff, %ecx - cmpl %edx, %ecx - cmovl %edx, %ecx - lea -0xff0(%ecx), %edx - sub %edx, %edi - sub %edx, %esi - testl %edx, %edx - jg L(crosspage) -L(loop): - movdqu (%esi,%edx), %xmm2 - movdqu (%edi,%edx), %xmm1 - TOLOWER (%xmm2, %xmm1) - pcmpistri $0x1a, %xmm2, %xmm1 - jbe L(end) - -#if defined USE_AS_STRNCMP || defined USE_AS_STRNCASECMP_L - sub $16, REM - jbe L(more16byteseq) -#endif - - add $16, %edx - jle L(loop) -L(crosspage): - movzbl (%edi,%edx), %eax - movzbl (%esi,%edx), %ecx -#if defined USE_AS_STRCASECMP_L || defined USE_AS_STRNCASECMP_L -# ifdef PIC - movl _nl_C_LC_CTYPE_tolower@GOTOFF+128*4(%ebx,%eax,4), %eax - movl _nl_C_LC_CTYPE_tolower@GOTOFF+128*4(%ebx,%ecx,4), %ecx -# else - movl _nl_C_LC_CTYPE_tolower+128*4(,%eax,4), %eax - movl _nl_C_LC_CTYPE_tolower+128*4(,%ecx,4), %ecx -# endif -#endif - subl %ecx, %eax - jne L(ret) - testl %ecx, %ecx - je L(ret) -#if defined USE_AS_STRNCMP || defined USE_AS_STRNCASECMP_L - sub $1, REM - jbe L(more16byteseq) -#endif - inc %edx - cmp $15, %edx - jle L(crosspage) - add %edx, %edi - add %edx, %esi - jmp L(check_offset) - - .p2align 4 -L(end): - jnc L(ret) -#if defined USE_AS_STRNCMP || defined USE_AS_STRNCASECMP_L - sub %ecx, REM - jbe L(more16byteseq) -#endif - lea (%ecx,%edx), %ecx - movzbl (%edi,%ecx), %eax - movzbl (%esi,%ecx), %ecx -#if defined USE_AS_STRCASECMP_L || defined USE_AS_STRNCASECMP_L -# ifdef PIC - movl _nl_C_LC_CTYPE_tolower@GOTOFF+128*4(%ebx,%eax,4), %eax - movl _nl_C_LC_CTYPE_tolower@GOTOFF+128*4(%ebx,%ecx,4), %ecx -# else - movl _nl_C_LC_CTYPE_tolower+128*4(,%eax,4), %eax - movl _nl_C_LC_CTYPE_tolower+128*4(,%ecx,4), %ecx -# endif -#endif - subl %ecx, %eax -L(ret): - POP (%esi) - POP (%edi) -#if defined USE_AS_STRNCMP || defined USE_AS_STRNCASECMP_L - POP (REM) -#endif -#if defined USE_AS_STRCASECMP_L || defined USE_AS_STRNCASECMP_L -# ifdef PIC - POP (%ebx) -# endif -#endif - ret - - .p2align 4 -#if defined USE_AS_STRNCMP || defined USE_AS_STRNCASECMP_L - cfi_restore_state -L(more16byteseq): - POP (%esi) -# ifdef USE_AS_STRNCMP - POP (%edi) -# endif -#endif -L(eq): - xorl %eax, %eax - RETURN - -L(neq): - mov $1, %eax - ja L(neq_bigger) - neg %eax -L(neq_bigger): - RETURN - -L(less16bytes): - add $0xfefefeff, %ecx - jnc L(less4bytes) -#if defined USE_AS_STRCASECMP_L || defined USE_AS_STRNCASECMP_L - movd %xmm3, %edi - xor %edi, %ecx -#else - xor (%edx), %ecx -#endif - or $0xfefefeff, %ecx - add $1, %ecx - jnz L(less4bytes) - -#if defined USE_AS_STRNCMP || defined USE_AS_STRNCASECMP_L - cmp $4, REM - jbe L(eq) -#endif -#if defined USE_AS_STRCASECMP_L || defined USE_AS_STRNCASECMP_L - psrldq $4, %xmm3 - psrldq $4, %xmm4 - movd %xmm3, %ecx - movd %xmm4, %edi - cmp %edi, %ecx - mov %ecx, %edi -#else - mov 4(%edx), %ecx - cmp 4(%eax), %ecx -#endif - jne L(more4bytes) - add $0xfefefeff, %ecx - jnc L(more4bytes) -#if defined USE_AS_STRCASECMP_L || defined USE_AS_STRNCASECMP_L - xor %edi, %ecx -#else - xor 4(%edx), %ecx -#endif - or $0xfefefeff, %ecx - add $1, %ecx - jnz L(more4bytes) - -#if defined USE_AS_STRNCMP || defined USE_AS_STRNCASECMP_L - sub $8, REM - jbe L(eq) -#endif - - add $8, %edx - add $8, %eax -L(less4bytes): - - movzbl (%eax), %ecx -#if defined USE_AS_STRCASECMP_L || defined USE_AS_STRNCASECMP_L - movzbl (%edx), %edi -# ifdef PIC - movl _nl_C_LC_CTYPE_tolower@GOTOFF+128*4(%ebx,%ecx,4), %ecx - movl _nl_C_LC_CTYPE_tolower@GOTOFF+128*4(%ebx,%edi,4), %edi -# else - movl _nl_C_LC_CTYPE_tolower+128*4(,%ecx,4), %ecx - movl _nl_C_LC_CTYPE_tolower+128*4(,%edi,4), %edi -# endif - cmpl %ecx, %edi -#else - cmpb %cl, (%edx) -#endif - jne L(neq) - cmpl $0, %ecx - je L(eq) - -#if defined USE_AS_STRNCMP || defined USE_AS_STRNCASECMP_L - cmp $1, REM - je L(eq) -#endif - movzbl 1(%eax), %ecx -#if defined USE_AS_STRCASECMP_L || defined USE_AS_STRNCASECMP_L - movzbl 1(%edx), %edi -# ifdef PIC - movl _nl_C_LC_CTYPE_tolower@GOTOFF+128*4(%ebx,%ecx,4), %ecx - movl _nl_C_LC_CTYPE_tolower@GOTOFF+128*4(%ebx,%edi,4), %edi -# else - movl _nl_C_LC_CTYPE_tolower+128*4(,%ecx,4), %ecx - movl _nl_C_LC_CTYPE_tolower+128*4(,%edi,4), %edi -# endif - cmpl %ecx, %edi -#else - cmpb %cl, 1(%edx) -#endif - jne L(neq) - cmpl $0, %ecx - je L(eq) - -#if defined USE_AS_STRNCMP || defined USE_AS_STRNCASECMP_L - cmp $2, REM - je L(eq) -#endif - - movzbl 2(%eax), %ecx -#if defined USE_AS_STRCASECMP_L || defined USE_AS_STRNCASECMP_L - movzbl 2(%edx), %edi -# ifdef PIC - movl _nl_C_LC_CTYPE_tolower@GOTOFF+128*4(%ebx,%ecx,4), %ecx - movl _nl_C_LC_CTYPE_tolower@GOTOFF+128*4(%ebx,%edi,4), %edi -# else - movl _nl_C_LC_CTYPE_tolower+128*4(,%ecx,4), %ecx - movl _nl_C_LC_CTYPE_tolower+128*4(,%edi,4), %edi -# endif - cmpl %ecx, %edi -#else - cmpb %cl, 2(%edx) -#endif - jne L(neq) - cmpl $0, %ecx - je L(eq) - -#if defined USE_AS_STRNCMP || defined USE_AS_STRNCASECMP_L - cmp $3, REM - je L(eq) -#endif - movzbl 3(%eax), %ecx -#if defined USE_AS_STRCASECMP_L || defined USE_AS_STRNCASECMP_L - movzbl 3(%edx), %edi -# ifdef PIC - movl _nl_C_LC_CTYPE_tolower@GOTOFF+128*4(%ebx,%ecx,4), %ecx - movl _nl_C_LC_CTYPE_tolower@GOTOFF+128*4(%ebx,%edi,4), %edi -# else - movl _nl_C_LC_CTYPE_tolower+128*4(,%ecx,4), %ecx - movl _nl_C_LC_CTYPE_tolower+128*4(,%edi,4), %edi -# endif - cmpl %ecx, %edi -#else - cmpb %cl, 3(%edx) -#endif - jne L(neq) - cmpl $0, %ecx - je L(eq) - -L(more4bytes): -#if defined USE_AS_STRNCMP || defined USE_AS_STRNCASECMP_L - cmp $4, REM - je L(eq) -#endif - movzbl 4(%eax), %ecx -#if defined USE_AS_STRCASECMP_L || defined USE_AS_STRNCASECMP_L - movzbl 4(%edx), %edi -# ifdef PIC - movl _nl_C_LC_CTYPE_tolower@GOTOFF+128*4(%ebx,%ecx,4), %ecx - movl _nl_C_LC_CTYPE_tolower@GOTOFF+128*4(%ebx,%edi,4), %edi -# else - movl _nl_C_LC_CTYPE_tolower+128*4(,%ecx,4), %ecx - movl _nl_C_LC_CTYPE_tolower+128*4(,%edi,4), %edi -# endif - cmpl %ecx, %edi -#else - cmpb %cl, 4(%edx) -#endif - jne L(neq) - cmpl $0, %ecx - je L(eq) - - -#if defined USE_AS_STRNCMP || defined USE_AS_STRNCASECMP_L - cmp $5, REM - je L(eq) -#endif - movzbl 5(%eax), %ecx -#if defined USE_AS_STRCASECMP_L || defined USE_AS_STRNCASECMP_L - movzbl 5(%edx), %edi -# ifdef PIC - movl _nl_C_LC_CTYPE_tolower@GOTOFF+128*4(%ebx,%ecx,4), %ecx - movl _nl_C_LC_CTYPE_tolower@GOTOFF+128*4(%ebx,%edi,4), %edi -# else - movl _nl_C_LC_CTYPE_tolower+128*4(,%ecx,4), %ecx - movl _nl_C_LC_CTYPE_tolower+128*4(,%edi,4), %edi -# endif - cmpl %ecx, %edi -#else - cmpb %cl, 5(%edx) -#endif - jne L(neq) - cmpl $0, %ecx - je L(eq) - -#if defined USE_AS_STRNCMP || defined USE_AS_STRNCASECMP_L - cmp $6, REM - je L(eq) -#endif - movzbl 6(%eax), %ecx -#if defined USE_AS_STRCASECMP_L || defined USE_AS_STRNCASECMP_L - movzbl 6(%edx), %edi -# ifdef PIC - movl _nl_C_LC_CTYPE_tolower@GOTOFF+128*4(%ebx,%ecx,4), %ecx - movl _nl_C_LC_CTYPE_tolower@GOTOFF+128*4(%ebx,%edi,4), %edi -# else - movl _nl_C_LC_CTYPE_tolower+128*4(,%ecx,4), %ecx - movl _nl_C_LC_CTYPE_tolower+128*4(,%edi,4), %edi -# endif - cmpl %ecx, %edi -#else - cmpb %cl, 6(%edx) -#endif - jne L(neq) - cmpl $0, %ecx - je L(eq) - -#if defined USE_AS_STRNCMP || defined USE_AS_STRNCASECMP_L - cmp $7, REM - je L(eq) -#endif - movzbl 7(%eax), %ecx -#if defined USE_AS_STRCASECMP_L || defined USE_AS_STRNCASECMP_L - movzbl 7(%edx), %edi -# ifdef PIC - movl _nl_C_LC_CTYPE_tolower@GOTOFF+128*4(%ebx,%ecx,4), %ecx - movl _nl_C_LC_CTYPE_tolower@GOTOFF+128*4(%ebx,%edi,4), %edi -# else - movl _nl_C_LC_CTYPE_tolower+128*4(,%ecx,4), %ecx - movl _nl_C_LC_CTYPE_tolower+128*4(,%edi,4), %edi -# endif - cmpl %ecx, %edi -#else - cmpb %cl, 7(%edx) -#endif - jne L(neq) - jmp L(eq) - -END (STRCMP) - -#endif diff --git a/sysdeps/i386/i686/multiarch/strcmp-ssse3.S b/sysdeps/i386/i686/multiarch/strcmp-ssse3.S deleted file mode 100644 index fb21288..0000000 --- a/sysdeps/i386/i686/multiarch/strcmp-ssse3.S +++ /dev/null @@ -1,2810 +0,0 @@ -/* strcmp with SSSE3 - Copyright (C) 2010-2015 Free Software Foundation, Inc. - Contributed by Intel Corporation. - This file is part of the GNU C Library. - - The GNU C Library is free software; you can redistribute it and/or - modify it under the terms of the GNU Lesser General Public - License as published by the Free Software Foundation; either - version 2.1 of the License, or (at your option) any later version. - - The GNU C Library is distributed in the hope that it will be useful, - but WITHOUT ANY WARRANTY; without even the implied warranty of - MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU - Lesser General Public License for more details. - - You should have received a copy of the GNU Lesser General Public - License along with the GNU C Library; if not, see - . */ - -#if IS_IN (libc) - -#include -#include "asm-syntax.h" - -#define CFI_PUSH(REG) \ - cfi_adjust_cfa_offset (4); \ - cfi_rel_offset (REG, 0) - -#define CFI_POP(REG) \ - cfi_adjust_cfa_offset (-4); \ - cfi_restore (REG) - -#define PUSH(REG) pushl REG; CFI_PUSH (REG) -#define POP(REG) popl REG; CFI_POP (REG) - -#ifdef USE_AS_STRNCMP -# ifndef STRCMP -# define STRCMP __strncmp_ssse3 -# endif -# define STR1 8 -# define STR2 STR1+4 -# define CNT STR2+4 -# define RETURN POP (REM); ret; .p2align 4; CFI_PUSH (REM) -# define UPDATE_STRNCMP_COUNTER \ - /* calculate left number to compare */ \ - mov $16, %esi; \ - sub %ecx, %esi; \ - cmp %esi, REM; \ - jbe L(more8byteseq); \ - sub %esi, REM -# define FLAGS %ebx -# define REM %ebp -#elif defined USE_AS_STRCASECMP_L -# include "locale-defines.h" -# ifndef STRCMP -# define STRCMP __strcasecmp_l_ssse3 -# endif -# ifdef PIC -# define STR1 8 -# else -# define STR1 4 -# endif -# define STR2 STR1+4 -# define LOCALE 12 /* Loaded before the adjustment. */ -# ifdef PIC -# define RETURN POP (%ebx); ret; .p2align 4; CFI_PUSH (%ebx) -# else -# define RETURN ret; .p2align 4 -# endif -# define UPDATE_STRNCMP_COUNTER -# define FLAGS (%esp) -# define NONASCII __strcasecmp_nonascii -#elif defined USE_AS_STRNCASECMP_L -# include "locale-defines.h" -# ifndef STRCMP -# define STRCMP __strncasecmp_l_ssse3 -# endif -# ifdef PIC -# define STR1 12 -# else -# define STR1 8 -# endif -# define STR2 STR1+4 -# define CNT STR2+4 -# define LOCALE 16 /* Loaded before the adjustment. */ -# ifdef PIC -# define RETURN POP (REM); POP (%ebx); ret; \ - .p2align 4; CFI_PUSH (%ebx); CFI_PUSH (REM) -# else -# define RETURN POP (REM); ret; .p2align 4; CFI_PUSH (REM) -# endif -# define UPDATE_STRNCMP_COUNTER \ - /* calculate left number to compare */ \ - mov $16, %esi; \ - sub %ecx, %esi; \ - cmp %esi, REM; \ - jbe L(more8byteseq); \ - sub %esi, REM -# define FLAGS (%esp) -# define REM %ebp -# define NONASCII __strncasecmp_nonascii -#else -# ifndef STRCMP -# define STRCMP __strcmp_ssse3 -# endif -# define STR1 4 -# define STR2 STR1+4 -# define RETURN ret; .p2align 4 -# define UPDATE_STRNCMP_COUNTER -# define FLAGS %ebx -#endif - - .section .text.ssse3,"ax",@progbits - -#ifdef USE_AS_STRCASECMP_L -ENTRY (__strcasecmp_ssse3) -# ifdef PIC - PUSH (%ebx) - LOAD_PIC_REG(bx) - movl __libc_tsd_LOCALE@GOTNTPOFF(%ebx), %eax -# ifdef NO_TLS_DIRECT_SEG_REFS - addl %gs:0, %eax - movl (%eax), %eax -# else - movl %gs:(%eax), %eax -# endif -# else -# ifdef NO_TLS_DIRECT_SEG_REFS - movl %gs:0, %eax - movl __libc_tsd_LOCALE@NTPOFF(%eax), %eax -# else - movl %gs:__libc_tsd_LOCALE@NTPOFF, %eax -# endif -# endif -# if LOCALE_T___LOCALES != 0 || LC_CTYPE != 0 - movl LOCALE_T___LOCALES+LC_CTYPE*4(%eax), %eax -# else - movl (%eax), %eax -# endif - testl $1, LOCALE_DATA_VALUES+_NL_CTYPE_NONASCII_CASE*SIZEOF_VALUES(%eax) -# ifdef PIC - je L(ascii) - POP (%ebx) - jmp __strcasecmp_nonascii -# else - jne __strcasecmp_nonascii - jmp L(ascii) -# endif -END (__strcasecmp_ssse3) -#endif - -#ifdef USE_AS_STRNCASECMP_L -ENTRY (__strncasecmp_ssse3) -# ifdef PIC - PUSH (%ebx) - LOAD_PIC_REG(bx) - movl __libc_tsd_LOCALE@GOTNTPOFF(%ebx), %eax -# ifdef NO_TLS_DIRECT_SEG_REFS - addl %gs:0, %eax - movl (%eax), %eax -# else - movl %gs:(%eax), %eax -# endif -# else -# ifdef NO_TLS_DIRECT_SEG_REFS - movl %gs:0, %eax - movl __libc_tsd_LOCALE@NTPOFF(%eax), %eax -# else - movl %gs:__libc_tsd_LOCALE@NTPOFF, %eax -# endif -# endif -# if LOCALE_T___LOCALES != 0 || LC_CTYPE != 0 - movl LOCALE_T___LOCALES+LC_CTYPE*4(%eax), %eax -# else - movl (%eax), %eax -# endif - testl $1, LOCALE_DATA_VALUES+_NL_CTYPE_NONASCII_CASE*SIZEOF_VALUES(%eax) -# ifdef PIC - je L(ascii) - POP (%ebx) - jmp __strncasecmp_nonascii -# else - jne __strncasecmp_nonascii - jmp L(ascii) -# endif -END (__strncasecmp_ssse3) -#endif - -ENTRY (STRCMP) -#if defined USE_AS_STRCASECMP_L || defined USE_AS_STRNCASECMP_L - movl LOCALE(%esp), %eax -# if LOCALE_T___LOCALES != 0 || LC_CTYPE != 0 - movl LOCALE_T___LOCALES+LC_CTYPE*4(%eax), %eax -# else - movl (%eax), %eax -# endif - testl $1, LOCALE_DATA_VALUES+_NL_CTYPE_NONASCII_CASE*SIZEOF_VALUES(%eax) - jne NONASCII - -# ifdef PIC - PUSH (%ebx) - LOAD_PIC_REG(bx) -# endif -L(ascii): - .section .rodata.cst16,"aM",@progbits,16 - .align 16 -.Lbelowupper: - .quad 0x4040404040404040 - .quad 0x4040404040404040 -.Ltopupper: - .quad 0x5b5b5b5b5b5b5b5b - .quad 0x5b5b5b5b5b5b5b5b -.Ltouppermask: - .quad 0x2020202020202020 - .quad 0x2020202020202020 - .previous - -# ifdef PIC -# define UCLOW_reg .Lbelowupper@GOTOFF(%ebx) -# define UCHIGH_reg .Ltopupper@GOTOFF(%ebx) -# define LCQWORD_reg .Ltouppermask@GOTOFF(%ebx) -# else -# define UCLOW_reg .Lbelowupper -# define UCHIGH_reg .Ltopupper -# define LCQWORD_reg .Ltouppermask -# endif -#endif - -#if defined USE_AS_STRNCMP || defined USE_AS_STRNCASECMP_L - PUSH (REM) -#endif - - movl STR1(%esp), %edx - movl STR2(%esp), %eax -#if defined USE_AS_STRNCMP || defined USE_AS_STRNCASECMP_L - movl CNT(%esp), REM - cmp $16, REM - jb L(less16bytes_sncmp) -#elif !defined USE_AS_STRCASECMP_L - movzbl (%eax), %ecx - cmpb %cl, (%edx) - jne L(neq) - cmpl $0, %ecx - je L(eq) - - movzbl 1(%eax), %ecx - cmpb %cl, 1(%edx) - jne L(neq) - cmpl $0, %ecx - je L(eq) - - movzbl 2(%eax), %ecx - cmpb %cl, 2(%edx) - jne L(neq) - cmpl $0, %ecx - je L(eq) - - movzbl 3(%eax), %ecx - cmpb %cl, 3(%edx) - jne L(neq) - cmpl $0, %ecx - je L(eq) - - movzbl 4(%eax), %ecx - cmpb %cl, 4(%edx) - jne L(neq) - cmpl $0, %ecx - je L(eq) - - movzbl 5(%eax), %ecx - cmpb %cl, 5(%edx) - jne L(neq) - cmpl $0, %ecx - je L(eq) - - movzbl 6(%eax), %ecx - cmpb %cl, 6(%edx) - jne L(neq) - cmpl $0, %ecx - je L(eq) - - movzbl 7(%eax), %ecx - cmpb %cl, 7(%edx) - jne L(neq) - cmpl $0, %ecx - je L(eq) - - add $8, %edx - add $8, %eax -#endif - movl %edx, %ecx - and $0xfff, %ecx - cmp $0xff0, %ecx - ja L(crosspage) - mov %eax, %ecx - and $0xfff, %ecx - cmp $0xff0, %ecx - ja L(crosspage) - pxor %xmm0, %xmm0 - movlpd (%eax), %xmm1 - movlpd (%edx), %xmm2 - movhpd 8(%eax), %xmm1 - movhpd 8(%edx), %xmm2 -#if defined USE_AS_STRCASECMP_L || defined USE_AS_STRNCASECMP_L -# define TOLOWER(reg1, reg2) \ - movdqa reg1, %xmm5; \ - movdqa reg2, %xmm7; \ - movdqa UCHIGH_reg, %xmm6; \ - pcmpgtb UCLOW_reg, %xmm5; \ - pcmpgtb UCLOW_reg, %xmm7; \ - pcmpgtb reg1, %xmm6; \ - pand %xmm6, %xmm5; \ - movdqa UCHIGH_reg, %xmm6; \ - pcmpgtb reg2, %xmm6; \ - pand %xmm6, %xmm7; \ - pand LCQWORD_reg, %xmm5; \ - por %xmm5, reg1; \ - pand LCQWORD_reg, %xmm7; \ - por %xmm7, reg2 - TOLOWER (%xmm1, %xmm2) -#else -# define TOLOWER(reg1, reg2) -#endif - pcmpeqb %xmm1, %xmm0 - pcmpeqb %xmm2, %xmm1 - psubb %xmm0, %xmm1 - pmovmskb %xmm1, %ecx - sub $0xffff, %ecx - jnz L(less16bytes) -#if defined USE_AS_STRNCMP || defined USE_AS_STRNCASECMP_L - cmp $16, REM - lea -16(REM), REM - jbe L(eq) -#endif - add $16, %eax - add $16, %edx - -L(crosspage): - -#if !defined USE_AS_STRCASECMP_L && !defined USE_AS_STRNCASECMP_L - PUSH (FLAGS) -#endif - PUSH (%edi) - PUSH (%esi) -#if defined USE_AS_STRCASECMP_L || defined USE_AS_STRNCASECMP_L - pushl $0 - cfi_adjust_cfa_offset (4) -#endif -#if defined USE_AS_STRNCMP || defined USE_AS_STRNCASECMP_L - cfi_remember_state -#endif - - movl %edx, %edi - movl %eax, %ecx - and $0xf, %ecx - and $0xf, %edi - xor %ecx, %eax - xor %edi, %edx -#if !defined USE_AS_STRCASECMP_L && !defined USE_AS_STRNCASECMP_L - xor FLAGS, FLAGS -#endif - cmp %edi, %ecx - je L(ashr_0) - ja L(bigger) - orl $0x20, FLAGS - xchg %edx, %eax - xchg %ecx, %edi -L(bigger): - lea 15(%edi), %edi - sub %ecx, %edi - cmp $8, %edi - jle L(ashr_less_8) - cmp $14, %edi - je L(ashr_15) - cmp $13, %edi - je L(ashr_14) - cmp $12, %edi - je L(ashr_13) - cmp $11, %edi - je L(ashr_12) - cmp $10, %edi - je L(ashr_11) - cmp $9, %edi - je L(ashr_10) -L(ashr_less_8): - je L(ashr_9) - cmp $7, %edi - je L(ashr_8) - cmp $6, %edi - je L(ashr_7) - cmp $5, %edi - je L(ashr_6) - cmp $4, %edi - je L(ashr_5) - cmp $3, %edi - je L(ashr_4) - cmp $2, %edi - je L(ashr_3) - cmp $1, %edi - je L(ashr_2) - cmp $0, %edi - je L(ashr_1) - -/* - * The following cases will be handled by ashr_0 - * ecx(offset of esi) eax(offset of edi) relative offset corresponding case - * n(0~15) n(0~15) 15(15+ n-n) ashr_0 - */ - .p2align 4 -L(ashr_0): - mov $0xffff, %esi - movdqa (%eax), %xmm1 - pxor %xmm0, %xmm0 - pcmpeqb %xmm1, %xmm0 -#if defined USE_AS_STRCASECMP_L || defined USE_AS_STRNCASECMP_L - movdqa (%edx), %xmm2 - TOLOWER (%xmm1, %xmm2) - pcmpeqb %xmm2, %xmm1 -#else - pcmpeqb (%edx), %xmm1 -#endif - psubb %xmm0, %xmm1 - pmovmskb %xmm1, %edi - shr %cl, %esi - shr %cl, %edi - sub %edi, %esi - mov %ecx, %edi - jne L(less32bytes) - UPDATE_STRNCMP_COUNTER - movl $0x10, FLAGS - mov $0x10, %ecx - pxor %xmm0, %xmm0 - .p2align 4 -L(loop_ashr_0): - movdqa (%eax, %ecx), %xmm1 -#if defined USE_AS_STRCASECMP_L || defined USE_AS_STRNCASECMP_L - movdqa (%edx, %ecx), %xmm2 - TOLOWER (%xmm1, %xmm2) - - pcmpeqb %xmm1, %xmm0 - pcmpeqb %xmm2, %xmm1 -#else - pcmpeqb %xmm1, %xmm0 - pcmpeqb (%edx, %ecx), %xmm1 -#endif - psubb %xmm0, %xmm1 - pmovmskb %xmm1, %esi - sub $0xffff, %esi - jnz L(exit) -#if defined USE_AS_STRNCMP || defined USE_AS_STRNCASECMP_L - cmp $16, REM - lea -16(REM), REM - jbe L(more8byteseq) -#endif - add $16, %ecx - jmp L(loop_ashr_0) - -/* - * The following cases will be handled by ashr_1 - * ecx(offset of esi) eax(offset of edi) relative offset corresponding case - * n(15) n -15 0(15 +(n-15) - n) ashr_1 - */ - .p2align 4 -L(ashr_1): - mov $0xffff, %esi - pxor %xmm0, %xmm0 - movdqa (%edx), %xmm2 - movdqa (%eax), %xmm1 - pcmpeqb %xmm1, %xmm0 - pslldq $15, %xmm2 - TOLOWER (%xmm1, %xmm2) - pcmpeqb %xmm1, %xmm2 - psubb %xmm0, %xmm2 - pmovmskb %xmm2, %edi - shr %cl, %esi - shr %cl, %edi - sub %edi, %esi - lea -15(%ecx), %edi - jnz L(less32bytes) - - UPDATE_STRNCMP_COUNTER - - movdqa (%edx), %xmm3 - pxor %xmm0, %xmm0 - mov $16, %ecx - orl $1, FLAGS - lea 1(%edx), %edi - and $0xfff, %edi - sub $0x1000, %edi - - .p2align 4 -L(loop_ashr_1): - add $16, %edi - jg L(nibble_ashr_1) - -L(gobble_ashr_1): - movdqa (%eax, %ecx), %xmm1 - movdqa (%edx, %ecx), %xmm2 - movdqa %xmm2, %xmm4 - - palignr $1, %xmm3, %xmm2 - TOLOWER (%xmm1, %xmm2) - - pcmpeqb %xmm1, %xmm0 - pcmpeqb %xmm2, %xmm1 - psubb %xmm0, %xmm1 - pmovmskb %xmm1, %esi - sub $0xffff, %esi - jnz L(exit) -#if defined USE_AS_STRNCMP || defined USE_AS_STRNCASECMP_L - cmp $16, REM - lea -16(REM), REM - jbe L(more8byteseq) -#endif - - add $16, %ecx - movdqa %xmm4, %xmm3 - - add $16, %edi - jg L(nibble_ashr_1) - - movdqa (%eax, %ecx), %xmm1 - movdqa (%edx, %ecx), %xmm2 - movdqa %xmm2, %xmm4 - - palignr $1, %xmm3, %xmm2 - TOLOWER (%xmm1, %xmm2) - - pcmpeqb %xmm1, %xmm0 - pcmpeqb %xmm2, %xmm1 - psubb %xmm0, %xmm1 - pmovmskb %xmm1, %esi - sub $0xffff, %esi - jnz L(exit) - -#if defined USE_AS_STRNCMP || defined USE_AS_STRNCASECMP_L - cmp $16, REM - lea -16(REM), REM - jbe L(more8byteseq) -#endif - add $16, %ecx - movdqa %xmm4, %xmm3 - jmp L(loop_ashr_1) - - .p2align 4 -L(nibble_ashr_1): - pcmpeqb %xmm3, %xmm0 - pmovmskb %xmm0, %esi - test $0xfffe, %esi - jnz L(ashr_1_exittail) - -#if defined USE_AS_STRNCMP || defined USE_AS_STRNCASECMP_L - cmp $15, REM - jbe L(ashr_1_exittail) -#endif - pxor %xmm0, %xmm0 - sub $0x1000, %edi - jmp L(gobble_ashr_1) - - .p2align 4 -L(ashr_1_exittail): - movdqa (%eax, %ecx), %xmm1 - psrldq $1, %xmm0 - psrldq $1, %xmm3 - jmp L(aftertail) - -/* - * The following cases will be handled by ashr_2 - * ecx(offset of esi) eax(offset of edi) relative offset corresponding case - * n(14~15) n -14 1(15 +(n-14) - n) ashr_2 - */ - .p2align 4 -L(ashr_2): - mov $0xffff, %esi - pxor %xmm0, %xmm0 - movdqa (%edx), %xmm2 - movdqa (%eax), %xmm1 - pcmpeqb %xmm1, %xmm0 - pslldq $14, %xmm2 - TOLOWER (%xmm1, %xmm2) - pcmpeqb %xmm1, %xmm2 - psubb %xmm0, %xmm2 - pmovmskb %xmm2, %edi - shr %cl, %esi - shr %cl, %edi - sub %edi, %esi - lea -14(%ecx), %edi - jnz L(less32bytes) - - UPDATE_STRNCMP_COUNTER - - movdqa (%edx), %xmm3 - pxor %xmm0, %xmm0 - mov $16, %ecx - orl $2, FLAGS - lea 2(%edx), %edi - and $0xfff, %edi - sub $0x1000, %edi - - .p2align 4 -L(loop_ashr_2): - add $16, %edi - jg L(nibble_ashr_2) - -L(gobble_ashr_2): - movdqa (%eax, %ecx), %xmm1 - movdqa (%edx, %ecx), %xmm2 - movdqa %xmm2, %xmm4 - - palignr $2, %xmm3, %xmm2 - TOLOWER (%xmm1, %xmm2) - - pcmpeqb %xmm1, %xmm0 - pcmpeqb %xmm2, %xmm1 - psubb %xmm0, %xmm1 - pmovmskb %xmm1, %esi - sub $0xffff, %esi - jnz L(exit) - -#if defined USE_AS_STRNCMP || defined USE_AS_STRNCASECMP_L - cmp $16, REM - lea -16(REM), REM - jbe L(more8byteseq) -#endif - add $16, %ecx - movdqa %xmm4, %xmm3 - - add $16, %edi - jg L(nibble_ashr_2) - - movdqa (%eax, %ecx), %xmm1 - movdqa (%edx, %ecx), %xmm2 - movdqa %xmm2, %xmm4 - - palignr $2, %xmm3, %xmm2 - TOLOWER (%xmm1, %xmm2) - - pcmpeqb %xmm1, %xmm0 - pcmpeqb %xmm2, %xmm1 - psubb %xmm0, %xmm1 - pmovmskb %xmm1, %esi - sub $0xffff, %esi - jnz L(exit) - -#if defined USE_AS_STRNCMP || defined USE_AS_STRNCASECMP_L - cmp $16, REM - lea -16(REM), REM - jbe L(more8byteseq) -#endif - add $16, %ecx - movdqa %xmm4, %xmm3 - jmp L(loop_ashr_2) - - .p2align 4 -L(nibble_ashr_2): - pcmpeqb %xmm3, %xmm0 - pmovmskb %xmm0, %esi - test $0xfffc, %esi - jnz L(ashr_2_exittail) - -#if defined USE_AS_STRNCMP || defined USE_AS_STRNCASECMP_L - cmp $14, REM - jbe L(ashr_2_exittail) -#endif - - pxor %xmm0, %xmm0 - sub $0x1000, %edi - jmp L(gobble_ashr_2) - - .p2align 4 -L(ashr_2_exittail): - movdqa (%eax, %ecx), %xmm1 - psrldq $2, %xmm0 - psrldq $2, %xmm3 - jmp L(aftertail) - -/* - * The following cases will be handled by ashr_3 - * ecx(offset of esi) eax(offset of edi) relative offset corresponding case - * n(13~15) n -13 2(15 +(n-13) - n) ashr_3 - */ - .p2align 4 -L(ashr_3): - mov $0xffff, %esi - pxor %xmm0, %xmm0 - movdqa (%edx), %xmm2 - movdqa (%eax), %xmm1 - pcmpeqb %xmm1, %xmm0 - pslldq $13, %xmm2 - TOLOWER (%xmm1, %xmm2) - pcmpeqb %xmm1, %xmm2 - psubb %xmm0, %xmm2 - pmovmskb %xmm2, %edi - shr %cl, %esi - shr %cl, %edi - sub %edi, %esi - lea -13(%ecx), %edi - jnz L(less32bytes) - - UPDATE_STRNCMP_COUNTER - - movdqa (%edx), %xmm3 - pxor %xmm0, %xmm0 - mov $16, %ecx - orl $3, FLAGS - lea 3(%edx), %edi - and $0xfff, %edi - sub $0x1000, %edi - - .p2align 4 -L(loop_ashr_3): - add $16, %edi - jg L(nibble_ashr_3) - -L(gobble_ashr_3): - movdqa (%eax, %ecx), %xmm1 - movdqa (%edx, %ecx), %xmm2 - movdqa %xmm2, %xmm4 - - palignr $3, %xmm3, %xmm2 - TOLOWER (%xmm1, %xmm2) - - pcmpeqb %xmm1, %xmm0 - pcmpeqb %xmm2, %xmm1 - psubb %xmm0, %xmm1 - pmovmskb %xmm1, %esi - sub $0xffff, %esi - jnz L(exit) - -#if defined USE_AS_STRNCMP || defined USE_AS_STRNCASECMP_L - cmp $16, REM - lea -16(REM), REM - jbe L(more8byteseq) -#endif - add $16, %ecx - movdqa %xmm4, %xmm3 - - add $16, %edi - jg L(nibble_ashr_3) - - movdqa (%eax, %ecx), %xmm1 - movdqa (%edx, %ecx), %xmm2 - movdqa %xmm2, %xmm4 - - palignr $3, %xmm3, %xmm2 - TOLOWER (%xmm1, %xmm2) - - pcmpeqb %xmm1, %xmm0 - pcmpeqb %xmm2, %xmm1 - psubb %xmm0, %xmm1 - pmovmskb %xmm1, %esi - sub $0xffff, %esi - jnz L(exit) - -#if defined USE_AS_STRNCMP || defined USE_AS_STRNCASECMP_L - cmp $16, REM - lea -16(REM), REM - jbe L(more8byteseq) -#endif - add $16, %ecx - movdqa %xmm4, %xmm3 - jmp L(loop_ashr_3) - - .p2align 4 -L(nibble_ashr_3): - pcmpeqb %xmm3, %xmm0 - pmovmskb %xmm0, %esi - test $0xfff8, %esi - jnz L(ashr_3_exittail) - -#if defined USE_AS_STRNCMP || defined USE_AS_STRNCASECMP_L - cmp $13, REM - jbe L(ashr_3_exittail) -#endif - pxor %xmm0, %xmm0 - sub $0x1000, %edi - jmp L(gobble_ashr_3) - - .p2align 4 -L(ashr_3_exittail): - movdqa (%eax, %ecx), %xmm1 - psrldq $3, %xmm0 - psrldq $3, %xmm3 - jmp L(aftertail) - -/* - * The following cases will be handled by ashr_4 - * ecx(offset of esi) eax(offset of edi) relative offset corresponding case - * n(12~15) n -12 3(15 +(n-12) - n) ashr_4 - */ - .p2align 4 -L(ashr_4): - mov $0xffff, %esi - pxor %xmm0, %xmm0 - movdqa (%edx), %xmm2 - movdqa (%eax), %xmm1 - pcmpeqb %xmm1, %xmm0 - pslldq $12, %xmm2 - TOLOWER (%xmm1, %xmm2) - pcmpeqb %xmm1, %xmm2 - psubb %xmm0, %xmm2 - pmovmskb %xmm2, %edi - shr %cl, %esi - shr %cl, %edi - sub %edi, %esi - lea -12(%ecx), %edi - jnz L(less32bytes) - - UPDATE_STRNCMP_COUNTER - - movdqa (%edx), %xmm3 - pxor %xmm0, %xmm0 - mov $16, %ecx - orl $4, FLAGS - lea 4(%edx), %edi - and $0xfff, %edi - sub $0x1000, %edi - - .p2align 4 -L(loop_ashr_4): - add $16, %edi - jg L(nibble_ashr_4) - -L(gobble_ashr_4): - movdqa (%eax, %ecx), %xmm1 - movdqa (%edx, %ecx), %xmm2 - movdqa %xmm2, %xmm4 - - palignr $4, %xmm3, %xmm2 - TOLOWER (%xmm1, %xmm2) - - pcmpeqb %xmm1, %xmm0 - pcmpeqb %xmm2, %xmm1 - psubb %xmm0, %xmm1 - pmovmskb %xmm1, %esi - sub $0xffff, %esi - jnz L(exit) - -#if defined USE_AS_STRNCMP || defined USE_AS_STRNCASECMP_L - cmp $16, REM - lea -16(REM), REM - jbe L(more8byteseq) -#endif - - add $16, %ecx - movdqa %xmm4, %xmm3 - - add $16, %edi - jg L(nibble_ashr_4) - - movdqa (%eax, %ecx), %xmm1 - movdqa (%edx, %ecx), %xmm2 - movdqa %xmm2, %xmm4 - - palignr $4, %xmm3, %xmm2 - TOLOWER (%xmm1, %xmm2) - - pcmpeqb %xmm1, %xmm0 - pcmpeqb %xmm2, %xmm1 - psubb %xmm0, %xmm1 - pmovmskb %xmm1, %esi - sub $0xffff, %esi - jnz L(exit) - -#if defined USE_AS_STRNCMP || defined USE_AS_STRNCASECMP_L - cmp $16, REM - lea -16(REM), REM - jbe L(more8byteseq) -#endif - - add $16, %ecx - movdqa %xmm4, %xmm3 - jmp L(loop_ashr_4) - - .p2align 4 -L(nibble_ashr_4): - pcmpeqb %xmm3, %xmm0 - pmovmskb %xmm0, %esi - test $0xfff0, %esi - jnz L(ashr_4_exittail) - -#if defined USE_AS_STRNCMP || defined USE_AS_STRNCASECMP_L - cmp $12, REM - jbe L(ashr_4_exittail) -#endif - - pxor %xmm0, %xmm0 - sub $0x1000, %edi - jmp L(gobble_ashr_4) - - .p2align 4 -L(ashr_4_exittail): - movdqa (%eax, %ecx), %xmm1 - psrldq $4, %xmm0 - psrldq $4, %xmm3 - jmp L(aftertail) - -/* - * The following cases will be handled by ashr_5 - * ecx(offset of esi) eax(offset of edi) relative offset corresponding case - * n(11~15) n -11 4(15 +(n-11) - n) ashr_5 - */ - .p2align 4 -L(ashr_5): - mov $0xffff, %esi - pxor %xmm0, %xmm0 - movdqa (%edx), %xmm2 - movdqa (%eax), %xmm1 - pcmpeqb %xmm1, %xmm0 - pslldq $11, %xmm2 - TOLOWER (%xmm1, %xmm2) - pcmpeqb %xmm1, %xmm2 - psubb %xmm0, %xmm2 - pmovmskb %xmm2, %edi - shr %cl, %esi - shr %cl, %edi - sub %edi, %esi - lea -11(%ecx), %edi - jnz L(less32bytes) - - UPDATE_STRNCMP_COUNTER - - movdqa (%edx), %xmm3 - pxor %xmm0, %xmm0 - mov $16, %ecx - orl $5, FLAGS - lea 5(%edx), %edi - and $0xfff, %edi - sub $0x1000, %edi - - .p2align 4 -L(loop_ashr_5): - add $16, %edi - jg L(nibble_ashr_5) - -L(gobble_ashr_5): - movdqa (%eax, %ecx), %xmm1 - movdqa (%edx, %ecx), %xmm2 - movdqa %xmm2, %xmm4 - - palignr $5, %xmm3, %xmm2 - TOLOWER (%xmm1, %xmm2) - - pcmpeqb %xmm1, %xmm0 - pcmpeqb %xmm2, %xmm1 - psubb %xmm0, %xmm1 - pmovmskb %xmm1, %esi - sub $0xffff, %esi - jnz L(exit) - -#if defined USE_AS_STRNCMP || defined USE_AS_STRNCASECMP_L - cmp $16, REM - lea -16(REM), REM - jbe L(more8byteseq) -#endif - add $16, %ecx - movdqa %xmm4, %xmm3 - - add $16, %edi - jg L(nibble_ashr_5) - - movdqa (%eax, %ecx), %xmm1 - movdqa (%edx, %ecx), %xmm2 - movdqa %xmm2, %xmm4 - - palignr $5, %xmm3, %xmm2 - TOLOWER (%xmm1, %xmm2) - - pcmpeqb %xmm1, %xmm0 - pcmpeqb %xmm2, %xmm1 - psubb %xmm0, %xmm1 - pmovmskb %xmm1, %esi - sub $0xffff, %esi - jnz L(exit) - -#if defined USE_AS_STRNCMP || defined USE_AS_STRNCASECMP_L - cmp $16, REM - lea -16(REM), REM - jbe L(more8byteseq) -#endif - add $16, %ecx - movdqa %xmm4, %xmm3 - jmp L(loop_ashr_5) - - .p2align 4 -L(nibble_ashr_5): - pcmpeqb %xmm3, %xmm0 - pmovmskb %xmm0, %esi - test $0xffe0, %esi - jnz L(ashr_5_exittail) - -#if defined USE_AS_STRNCMP || defined USE_AS_STRNCASECMP_L - cmp $11, REM - jbe L(ashr_5_exittail) -#endif - pxor %xmm0, %xmm0 - sub $0x1000, %edi - jmp L(gobble_ashr_5) - - .p2align 4 -L(ashr_5_exittail): - movdqa (%eax, %ecx), %xmm1 - psrldq $5, %xmm0 - psrldq $5, %xmm3 - jmp L(aftertail) - -/* - * The following cases will be handled by ashr_6 - * ecx(offset of esi) eax(offset of edi) relative offset corresponding case - * n(10~15) n -10 5(15 +(n-10) - n) ashr_6 - */ - - .p2align 4 -L(ashr_6): - mov $0xffff, %esi - pxor %xmm0, %xmm0 - movdqa (%edx), %xmm2 - movdqa (%eax), %xmm1 - pcmpeqb %xmm1, %xmm0 - pslldq $10, %xmm2 - TOLOWER (%xmm1, %xmm2) - pcmpeqb %xmm1, %xmm2 - psubb %xmm0, %xmm2 - pmovmskb %xmm2, %edi - shr %cl, %esi - shr %cl, %edi - sub %edi, %esi - lea -10(%ecx), %edi - jnz L(less32bytes) - - UPDATE_STRNCMP_COUNTER - - movdqa (%edx), %xmm3 - pxor %xmm0, %xmm0 - mov $16, %ecx - orl $6, FLAGS - lea 6(%edx), %edi - and $0xfff, %edi - sub $0x1000, %edi - - .p2align 4 -L(loop_ashr_6): - add $16, %edi - jg L(nibble_ashr_6) - -L(gobble_ashr_6): - movdqa (%eax, %ecx), %xmm1 - movdqa (%edx, %ecx), %xmm2 - movdqa %xmm2, %xmm4 - - palignr $6, %xmm3, %xmm2 - TOLOWER (%xmm1, %xmm2) - - pcmpeqb %xmm1, %xmm0 - pcmpeqb %xmm2, %xmm1 - psubb %xmm0, %xmm1 - pmovmskb %xmm1, %esi - sub $0xffff, %esi - jnz L(exit) - -#if defined USE_AS_STRNCMP || defined USE_AS_STRNCASECMP_L - cmp $16, REM - lea -16(REM), REM - jbe L(more8byteseq) -#endif - - add $16, %ecx - movdqa %xmm4, %xmm3 - - add $16, %edi - jg L(nibble_ashr_6) - - movdqa (%eax, %ecx), %xmm1 - movdqa (%edx, %ecx), %xmm2 - movdqa %xmm2, %xmm4 - - palignr $6, %xmm3, %xmm2 - TOLOWER (%xmm1, %xmm2) - - pcmpeqb %xmm1, %xmm0 - pcmpeqb %xmm2, %xmm1 - psubb %xmm0, %xmm1 - pmovmskb %xmm1, %esi - sub $0xffff, %esi - jnz L(exit) -#if defined USE_AS_STRNCMP || defined USE_AS_STRNCASECMP_L - cmp $16, REM - lea -16(REM), REM - jbe L(more8byteseq) -#endif - - add $16, %ecx - movdqa %xmm4, %xmm3 - jmp L(loop_ashr_6) - - .p2align 4 -L(nibble_ashr_6): - pcmpeqb %xmm3, %xmm0 - pmovmskb %xmm0, %esi - test $0xffc0, %esi - jnz L(ashr_6_exittail) - -#if defined USE_AS_STRNCMP || defined USE_AS_STRNCASECMP_L - cmp $10, REM - jbe L(ashr_6_exittail) -#endif - pxor %xmm0, %xmm0 - sub $0x1000, %edi - jmp L(gobble_ashr_6) - - .p2align 4 -L(ashr_6_exittail): - movdqa (%eax, %ecx), %xmm1 - psrldq $6, %xmm0 - psrldq $6, %xmm3 - jmp L(aftertail) - -/* - * The following cases will be handled by ashr_7 - * ecx(offset of esi) eax(offset of edi) relative offset corresponding case - * n(9~15) n - 9 6(15 +(n-9) - n) ashr_7 - */ - - .p2align 4 -L(ashr_7): - mov $0xffff, %esi - pxor %xmm0, %xmm0 - movdqa (%edx), %xmm2 - movdqa (%eax), %xmm1 - pcmpeqb %xmm1, %xmm0 - pslldq $9, %xmm2 - TOLOWER (%xmm1, %xmm2) - pcmpeqb %xmm1, %xmm2 - psubb %xmm0, %xmm2 - pmovmskb %xmm2, %edi - shr %cl, %esi - shr %cl, %edi - sub %edi, %esi - lea -9(%ecx), %edi - jnz L(less32bytes) - - UPDATE_STRNCMP_COUNTER - - movdqa (%edx), %xmm3 - pxor %xmm0, %xmm0 - mov $16, %ecx - orl $7, FLAGS - lea 8(%edx), %edi - and $0xfff, %edi - sub $0x1000, %edi - - .p2align 4 -L(loop_ashr_7): - add $16, %edi - jg L(nibble_ashr_7) - -L(gobble_ashr_7): - movdqa (%eax, %ecx), %xmm1 - movdqa (%edx, %ecx), %xmm2 - movdqa %xmm2, %xmm4 - - palignr $7, %xmm3, %xmm2 - TOLOWER (%xmm1, %xmm2) - - pcmpeqb %xmm1, %xmm0 - pcmpeqb %xmm2, %xmm1 - psubb %xmm0, %xmm1 - pmovmskb %xmm1, %esi - sub $0xffff, %esi - jnz L(exit) - -#if defined USE_AS_STRNCMP || defined USE_AS_STRNCASECMP_L - cmp $16, REM - lea -16(REM), REM - jbe L(more8byteseq) -#endif - - add $16, %ecx - movdqa %xmm4, %xmm3 - - add $16, %edi - jg L(nibble_ashr_7) - - movdqa (%eax, %ecx), %xmm1 - movdqa (%edx, %ecx), %xmm2 - movdqa %xmm2, %xmm4 - - palignr $7, %xmm3, %xmm2 - TOLOWER (%xmm1, %xmm2) - - pcmpeqb %xmm1, %xmm0 - pcmpeqb %xmm2, %xmm1 - psubb %xmm0, %xmm1 - pmovmskb %xmm1, %esi - sub $0xffff, %esi - jnz L(exit) - -#if defined USE_AS_STRNCMP || defined USE_AS_STRNCASECMP_L - cmp $16, REM - lea -16(REM), REM - jbe L(more8byteseq) -#endif - - add $16, %ecx - movdqa %xmm4, %xmm3 - jmp L(loop_ashr_7) - - .p2align 4 -L(nibble_ashr_7): - pcmpeqb %xmm3, %xmm0 - pmovmskb %xmm0, %esi - test $0xff80, %esi - jnz L(ashr_7_exittail) - -#if defined USE_AS_STRNCMP || defined USE_AS_STRNCASECMP_L - cmp $9, REM - jbe L(ashr_7_exittail) -#endif - pxor %xmm0, %xmm0 - pxor %xmm0, %xmm0 - sub $0x1000, %edi - jmp L(gobble_ashr_7) - - .p2align 4 -L(ashr_7_exittail): - movdqa (%eax, %ecx), %xmm1 - psrldq $7, %xmm0 - psrldq $7, %xmm3 - jmp L(aftertail) - -/* - * The following cases will be handled by ashr_8 - * ecx(offset of esi) eax(offset of edi) relative offset corresponding case - * n(8~15) n - 8 7(15 +(n-8) - n) ashr_8 - */ - .p2align 4 -L(ashr_8): - mov $0xffff, %esi - pxor %xmm0, %xmm0 - movdqa (%edx), %xmm2 - movdqa (%eax), %xmm1 - pcmpeqb %xmm1, %xmm0 - pslldq $8, %xmm2 - TOLOWER (%xmm1, %xmm2) - pcmpeqb %xmm1, %xmm2 - psubb %xmm0, %xmm2 - pmovmskb %xmm2, %edi - shr %cl, %esi - shr %cl, %edi - sub %edi, %esi - lea -8(%ecx), %edi - jnz L(less32bytes) - - UPDATE_STRNCMP_COUNTER - - movdqa (%edx), %xmm3 - pxor %xmm0, %xmm0 - mov $16, %ecx - orl $8, FLAGS - lea 8(%edx), %edi - and $0xfff, %edi - sub $0x1000, %edi - - .p2align 4 -L(loop_ashr_8): - add $16, %edi - jg L(nibble_ashr_8) - -L(gobble_ashr_8): - movdqa (%eax, %ecx), %xmm1 - movdqa (%edx, %ecx), %xmm2 - movdqa %xmm2, %xmm4 - - palignr $8, %xmm3, %xmm2 - TOLOWER (%xmm1, %xmm2) - - pcmpeqb %xmm1, %xmm0 - pcmpeqb %xmm2, %xmm1 - psubb %xmm0, %xmm1 - pmovmskb %xmm1, %esi - sub $0xffff, %esi - jnz L(exit) - -#if defined USE_AS_STRNCMP || defined USE_AS_STRNCASECMP_L - cmp $16, REM - lea -16(REM), REM - jbe L(more8byteseq) -#endif - add $16, %ecx - movdqa %xmm4, %xmm3 - - add $16, %edi - jg L(nibble_ashr_8) - - movdqa (%eax, %ecx), %xmm1 - movdqa (%edx, %ecx), %xmm2 - movdqa %xmm2, %xmm4 - - palignr $8, %xmm3, %xmm2 - TOLOWER (%xmm1, %xmm2) - - pcmpeqb %xmm1, %xmm0 - pcmpeqb %xmm2, %xmm1 - psubb %xmm0, %xmm1 - pmovmskb %xmm1, %esi - sub $0xffff, %esi - jnz L(exit) - -#if defined USE_AS_STRNCMP || defined USE_AS_STRNCASECMP_L - cmp $16, REM - lea -16(REM), REM - jbe L(more8byteseq) -#endif - add $16, %ecx - movdqa %xmm4, %xmm3 - jmp L(loop_ashr_8) - - .p2align 4 -L(nibble_ashr_8): - pcmpeqb %xmm3, %xmm0 - pmovmskb %xmm0, %esi - test $0xff00, %esi - jnz L(ashr_8_exittail) - -#if defined USE_AS_STRNCMP || defined USE_AS_STRNCASECMP_L - cmp $8, REM - jbe L(ashr_8_exittail) -#endif - pxor %xmm0, %xmm0 - pxor %xmm0, %xmm0 - sub $0x1000, %edi - jmp L(gobble_ashr_8) - - .p2align 4 -L(ashr_8_exittail): - movdqa (%eax, %ecx), %xmm1 - psrldq $8, %xmm0 - psrldq $8, %xmm3 - jmp L(aftertail) - -/* - * The following cases will be handled by ashr_9 - * ecx(offset of esi) eax(offset of edi) relative offset corresponding case - * n(7~15) n - 7 8(15 +(n-7) - n) ashr_9 - */ - .p2align 4 -L(ashr_9): - mov $0xffff, %esi - pxor %xmm0, %xmm0 - movdqa (%edx), %xmm2 - movdqa (%eax), %xmm1 - pcmpeqb %xmm1, %xmm0 - pslldq $7, %xmm2 - TOLOWER (%xmm1, %xmm2) - pcmpeqb %xmm1, %xmm2 - psubb %xmm0, %xmm2 - pmovmskb %xmm2, %edi - shr %cl, %esi - shr %cl, %edi - sub %edi, %esi - lea -7(%ecx), %edi - jnz L(less32bytes) - - UPDATE_STRNCMP_COUNTER - - movdqa (%edx), %xmm3 - pxor %xmm0, %xmm0 - mov $16, %ecx - orl $9, FLAGS - lea 9(%edx), %edi - and $0xfff, %edi - sub $0x1000, %edi - - .p2align 4 -L(loop_ashr_9): - add $16, %edi - jg L(nibble_ashr_9) - -L(gobble_ashr_9): - movdqa (%eax, %ecx), %xmm1 - movdqa (%edx, %ecx), %xmm2 - movdqa %xmm2, %xmm4 - - palignr $9, %xmm3, %xmm2 - TOLOWER (%xmm1, %xmm2) - - pcmpeqb %xmm1, %xmm0 - pcmpeqb %xmm2, %xmm1 - psubb %xmm0, %xmm1 - pmovmskb %xmm1, %esi - sub $0xffff, %esi - jnz L(exit) - -#if defined USE_AS_STRNCMP || defined USE_AS_STRNCASECMP_L - cmp $16, REM - lea -16(REM), REM - jbe L(more8byteseq) -#endif - add $16, %ecx - movdqa %xmm4, %xmm3 - - add $16, %edi - jg L(nibble_ashr_9) - - movdqa (%eax, %ecx), %xmm1 - movdqa (%edx, %ecx), %xmm2 - movdqa %xmm2, %xmm4 - - palignr $9, %xmm3, %xmm2 - TOLOWER (%xmm1, %xmm2) - - pcmpeqb %xmm1, %xmm0 - pcmpeqb %xmm2, %xmm1 - psubb %xmm0, %xmm1 - pmovmskb %xmm1, %esi - sub $0xffff, %esi - jnz L(exit) - -#if defined USE_AS_STRNCMP || defined USE_AS_STRNCASECMP_L - cmp $16, REM - lea -16(REM), REM - jbe L(more8byteseq) -#endif - add $16, %ecx - movdqa %xmm4, %xmm3 - jmp L(loop_ashr_9) - - .p2align 4 -L(nibble_ashr_9): - pcmpeqb %xmm3, %xmm0 - pmovmskb %xmm0, %esi - test $0xfe00, %esi - jnz L(ashr_9_exittail) - -#if defined USE_AS_STRNCMP || defined USE_AS_STRNCASECMP_L - cmp $7, REM - jbe L(ashr_9_exittail) -#endif - pxor %xmm0, %xmm0 - sub $0x1000, %edi - jmp L(gobble_ashr_9) - - .p2align 4 -L(ashr_9_exittail): - movdqa (%eax, %ecx), %xmm1 - psrldq $9, %xmm0 - psrldq $9, %xmm3 - jmp L(aftertail) - -/* - * The following cases will be handled by ashr_10 - * ecx(offset of esi) eax(offset of edi) relative offset corresponding case - * n(6~15) n - 6 9(15 +(n-6) - n) ashr_10 - */ - .p2align 4 -L(ashr_10): - mov $0xffff, %esi - pxor %xmm0, %xmm0 - movdqa (%edx), %xmm2 - movdqa (%eax), %xmm1 - pcmpeqb %xmm1, %xmm0 - pslldq $6, %xmm2 - TOLOWER (%xmm1, %xmm2) - pcmpeqb %xmm1, %xmm2 - psubb %xmm0, %xmm2 - pmovmskb %xmm2, %edi - shr %cl, %esi - shr %cl, %edi - sub %edi, %esi - lea -6(%ecx), %edi - jnz L(less32bytes) - - UPDATE_STRNCMP_COUNTER - - movdqa (%edx), %xmm3 - pxor %xmm0, %xmm0 - mov $16, %ecx - orl $10, FLAGS - lea 10(%edx), %edi - and $0xfff, %edi - sub $0x1000, %edi - - .p2align 4 -L(loop_ashr_10): - add $16, %edi - jg L(nibble_ashr_10) - -L(gobble_ashr_10): - movdqa (%eax, %ecx), %xmm1 - movdqa (%edx, %ecx), %xmm2 - movdqa %xmm2, %xmm4 - - palignr $10, %xmm3, %xmm2 - TOLOWER (%xmm1, %xmm2) - - pcmpeqb %xmm1, %xmm0 - pcmpeqb %xmm2, %xmm1 - psubb %xmm0, %xmm1 - pmovmskb %xmm1, %esi - sub $0xffff, %esi - jnz L(exit) - -#if defined USE_AS_STRNCMP || defined USE_AS_STRNCASECMP_L - cmp $16, REM - lea -16(REM), REM - jbe L(more8byteseq) -#endif - add $16, %ecx - movdqa %xmm4, %xmm3 - - add $16, %edi - jg L(nibble_ashr_10) - - movdqa (%eax, %ecx), %xmm1 - movdqa (%edx, %ecx), %xmm2 - movdqa %xmm2, %xmm4 - - palignr $10, %xmm3, %xmm2 - TOLOWER (%xmm1, %xmm2) - - pcmpeqb %xmm1, %xmm0 - pcmpeqb %xmm2, %xmm1 - psubb %xmm0, %xmm1 - pmovmskb %xmm1, %esi - sub $0xffff, %esi - jnz L(exit) - -#if defined USE_AS_STRNCMP || defined USE_AS_STRNCASECMP_L - cmp $16, REM - lea -16(REM), REM - jbe L(more8byteseq) -#endif - add $16, %ecx - movdqa %xmm4, %xmm3 - jmp L(loop_ashr_10) - - .p2align 4 -L(nibble_ashr_10): - pcmpeqb %xmm3, %xmm0 - pmovmskb %xmm0, %esi - test $0xfc00, %esi - jnz L(ashr_10_exittail) - -#if defined USE_AS_STRNCMP || defined USE_AS_STRNCASECMP_L - cmp $6, REM - jbe L(ashr_10_exittail) -#endif - pxor %xmm0, %xmm0 - sub $0x1000, %edi - jmp L(gobble_ashr_10) - - .p2align 4 -L(ashr_10_exittail): - movdqa (%eax, %ecx), %xmm1 - psrldq $10, %xmm0 - psrldq $10, %xmm3 - jmp L(aftertail) - -/* - * The following cases will be handled by ashr_11 - * ecx(offset of esi) eax(offset of edi) relative offset corresponding case - * n(5~15) n - 5 10(15 +(n-5) - n) ashr_11 - */ - .p2align 4 -L(ashr_11): - mov $0xffff, %esi - pxor %xmm0, %xmm0 - movdqa (%edx), %xmm2 - movdqa (%eax), %xmm1 - pcmpeqb %xmm1, %xmm0 - pslldq $5, %xmm2 - TOLOWER (%xmm1, %xmm2) - pcmpeqb %xmm1, %xmm2 - psubb %xmm0, %xmm2 - pmovmskb %xmm2, %edi - shr %cl, %esi - shr %cl, %edi - sub %edi, %esi - lea -5(%ecx), %edi - jnz L(less32bytes) - - UPDATE_STRNCMP_COUNTER - - movdqa (%edx), %xmm3 - pxor %xmm0, %xmm0 - mov $16, %ecx - orl $11, FLAGS - lea 11(%edx), %edi - and $0xfff, %edi - sub $0x1000, %edi - - .p2align 4 -L(loop_ashr_11): - add $16, %edi - jg L(nibble_ashr_11) - -L(gobble_ashr_11): - movdqa (%eax, %ecx), %xmm1 - movdqa (%edx, %ecx), %xmm2 - movdqa %xmm2, %xmm4 - - palignr $11, %xmm3, %xmm2 - TOLOWER (%xmm1, %xmm2) - - pcmpeqb %xmm1, %xmm0 - pcmpeqb %xmm2, %xmm1 - psubb %xmm0, %xmm1 - pmovmskb %xmm1, %esi - sub $0xffff, %esi - jnz L(exit) - -#if defined USE_AS_STRNCMP || defined USE_AS_STRNCASECMP_L - cmp $16, REM - lea -16(REM), REM - jbe L(more8byteseq) -#endif - add $16, %ecx - movdqa %xmm4, %xmm3 - - add $16, %edi - jg L(nibble_ashr_11) - - movdqa (%eax, %ecx), %xmm1 - movdqa (%edx, %ecx), %xmm2 - movdqa %xmm2, %xmm4 - - palignr $11, %xmm3, %xmm2 - TOLOWER (%xmm1, %xmm2) - - pcmpeqb %xmm1, %xmm0 - pcmpeqb %xmm2, %xmm1 - psubb %xmm0, %xmm1 - pmovmskb %xmm1, %esi - sub $0xffff, %esi - jnz L(exit) - -#if defined USE_AS_STRNCMP || defined USE_AS_STRNCASECMP_L - cmp $16, REM - lea -16(REM), REM - jbe L(more8byteseq) -#endif - add $16, %ecx - movdqa %xmm4, %xmm3 - jmp L(loop_ashr_11) - - .p2align 4 -L(nibble_ashr_11): - pcmpeqb %xmm3, %xmm0 - pmovmskb %xmm0, %esi - test $0xf800, %esi - jnz L(ashr_11_exittail) - -#if defined USE_AS_STRNCMP || defined USE_AS_STRNCASECMP_L - cmp $5, REM - jbe L(ashr_11_exittail) -#endif - pxor %xmm0, %xmm0 - sub $0x1000, %edi - jmp L(gobble_ashr_11) - - .p2align 4 -L(ashr_11_exittail): - movdqa (%eax, %ecx), %xmm1 - psrldq $11, %xmm0 - psrldq $11, %xmm3 - jmp L(aftertail) - -/* - * The following cases will be handled by ashr_12 - * ecx(offset of esi) eax(offset of edi) relative offset corresponding case - * n(4~15) n - 4 11(15 +(n-4) - n) ashr_12 - */ - .p2align 4 -L(ashr_12): - mov $0xffff, %esi - pxor %xmm0, %xmm0 - movdqa (%edx), %xmm2 - movdqa (%eax), %xmm1 - pcmpeqb %xmm1, %xmm0 - pslldq $4, %xmm2 - TOLOWER (%xmm1, %xmm2) - pcmpeqb %xmm1, %xmm2 - psubb %xmm0, %xmm2 - pmovmskb %xmm2, %edi - shr %cl, %esi - shr %cl, %edi - sub %edi, %esi - lea -4(%ecx), %edi - jnz L(less32bytes) - - UPDATE_STRNCMP_COUNTER - - movdqa (%edx), %xmm3 - pxor %xmm0, %xmm0 - mov $16, %ecx - orl $12, FLAGS - lea 12(%edx), %edi - and $0xfff, %edi - sub $0x1000, %edi - - .p2align 4 -L(loop_ashr_12): - add $16, %edi - jg L(nibble_ashr_12) - -L(gobble_ashr_12): - movdqa (%eax, %ecx), %xmm1 - movdqa (%edx, %ecx), %xmm2 - movdqa %xmm2, %xmm4 - - palignr $12, %xmm3, %xmm2 - TOLOWER (%xmm1, %xmm2) - - pcmpeqb %xmm1, %xmm0 - pcmpeqb %xmm2, %xmm1 - psubb %xmm0, %xmm1 - pmovmskb %xmm1, %esi - sub $0xffff, %esi - jnz L(exit) - -#if defined USE_AS_STRNCMP || defined USE_AS_STRNCASECMP_L - cmp $16, REM - lea -16(REM), REM - jbe L(more8byteseq) -#endif - - add $16, %ecx - movdqa %xmm4, %xmm3 - - add $16, %edi - jg L(nibble_ashr_12) - - movdqa (%eax, %ecx), %xmm1 - movdqa (%edx, %ecx), %xmm2 - movdqa %xmm2, %xmm4 - - palignr $12, %xmm3, %xmm2 - TOLOWER (%xmm1, %xmm2) - - pcmpeqb %xmm1, %xmm0 - pcmpeqb %xmm2, %xmm1 - psubb %xmm0, %xmm1 - pmovmskb %xmm1, %esi - sub $0xffff, %esi - jnz L(exit) - -#if defined USE_AS_STRNCMP || defined USE_AS_STRNCASECMP_L - cmp $16, REM - lea -16(REM), REM - jbe L(more8byteseq) -#endif - add $16, %ecx - movdqa %xmm4, %xmm3 - jmp L(loop_ashr_12) - - .p2align 4 -L(nibble_ashr_12): - pcmpeqb %xmm3, %xmm0 - pmovmskb %xmm0, %esi - test $0xf000, %esi - jnz L(ashr_12_exittail) - -#if defined USE_AS_STRNCMP || defined USE_AS_STRNCASECMP_L - cmp $4, REM - jbe L(ashr_12_exittail) -#endif - pxor %xmm0, %xmm0 - sub $0x1000, %edi - jmp L(gobble_ashr_12) - - .p2align 4 -L(ashr_12_exittail): - movdqa (%eax, %ecx), %xmm1 - psrldq $12, %xmm0 - psrldq $12, %xmm3 - jmp L(aftertail) - -/* - * The following cases will be handled by ashr_13 - * ecx(offset of esi) eax(offset of edi) relative offset corresponding case - * n(3~15) n - 3 12(15 +(n-3) - n) ashr_13 - */ - .p2align 4 -L(ashr_13): - mov $0xffff, %esi - pxor %xmm0, %xmm0 - movdqa (%edx), %xmm2 - movdqa (%eax), %xmm1 - pcmpeqb %xmm1, %xmm0 - pslldq $3, %xmm2 - TOLOWER (%xmm1, %xmm2) - pcmpeqb %xmm1, %xmm2 - psubb %xmm0, %xmm2 - pmovmskb %xmm2, %edi - shr %cl, %esi - shr %cl, %edi - sub %edi, %esi - lea -3(%ecx), %edi - jnz L(less32bytes) - - UPDATE_STRNCMP_COUNTER - - movdqa (%edx), %xmm3 - pxor %xmm0, %xmm0 - mov $16, %ecx - orl $13, FLAGS - lea 13(%edx), %edi - and $0xfff, %edi - sub $0x1000, %edi - - .p2align 4 -L(loop_ashr_13): - add $16, %edi - jg L(nibble_ashr_13) - -L(gobble_ashr_13): - movdqa (%eax, %ecx), %xmm1 - movdqa (%edx, %ecx), %xmm2 - movdqa %xmm2, %xmm4 - - palignr $13, %xmm3, %xmm2 - TOLOWER (%xmm1, %xmm2) - - pcmpeqb %xmm1, %xmm0 - pcmpeqb %xmm2, %xmm1 - psubb %xmm0, %xmm1 - pmovmskb %xmm1, %esi - sub $0xffff, %esi - jnz L(exit) - -#if defined USE_AS_STRNCMP || defined USE_AS_STRNCASECMP_L - cmp $16, REM - lea -16(REM), REM - jbe L(more8byteseq) -#endif - add $16, %ecx - movdqa %xmm4, %xmm3 - - add $16, %edi - jg L(nibble_ashr_13) - - movdqa (%eax, %ecx), %xmm1 - movdqa (%edx, %ecx), %xmm2 - movdqa %xmm2, %xmm4 - - palignr $13, %xmm3, %xmm2 - TOLOWER (%xmm1, %xmm2) - - pcmpeqb %xmm1, %xmm0 - pcmpeqb %xmm2, %xmm1 - psubb %xmm0, %xmm1 - pmovmskb %xmm1, %esi - sub $0xffff, %esi - jnz L(exit) - -#if defined USE_AS_STRNCMP || defined USE_AS_STRNCASECMP_L - cmp $16, REM - lea -16(REM), REM - jbe L(more8byteseq) -#endif - add $16, %ecx - movdqa %xmm4, %xmm3 - jmp L(loop_ashr_13) - - .p2align 4 -L(nibble_ashr_13): - pcmpeqb %xmm3, %xmm0 - pmovmskb %xmm0, %esi - test $0xe000, %esi - jnz L(ashr_13_exittail) - -#if defined USE_AS_STRNCMP || defined USE_AS_STRNCASECMP_L - cmp $3, REM - jbe L(ashr_13_exittail) -#endif - pxor %xmm0, %xmm0 - sub $0x1000, %edi - jmp L(gobble_ashr_13) - - .p2align 4 -L(ashr_13_exittail): - movdqa (%eax, %ecx), %xmm1 - psrldq $13, %xmm0 - psrldq $13, %xmm3 - jmp L(aftertail) - -/* - * The following cases will be handled by ashr_14 - * ecx(offset of esi) eax(offset of edi) relative offset corresponding case - * n(2~15) n - 2 13(15 +(n-2) - n) ashr_14 - */ - .p2align 4 -L(ashr_14): - mov $0xffff, %esi - pxor %xmm0, %xmm0 - movdqa (%edx), %xmm2 - movdqa (%eax), %xmm1 - pcmpeqb %xmm1, %xmm0 - pslldq $2, %xmm2 - TOLOWER (%xmm1, %xmm2) - pcmpeqb %xmm1, %xmm2 - psubb %xmm0, %xmm2 - pmovmskb %xmm2, %edi - shr %cl, %esi - shr %cl, %edi - sub %edi, %esi - lea -2(%ecx), %edi - jnz L(less32bytes) - - UPDATE_STRNCMP_COUNTER - - movdqa (%edx), %xmm3 - pxor %xmm0, %xmm0 - mov $16, %ecx - orl $14, FLAGS - lea 14(%edx), %edi - and $0xfff, %edi - sub $0x1000, %edi - - .p2align 4 -L(loop_ashr_14): - add $16, %edi - jg L(nibble_ashr_14) - -L(gobble_ashr_14): - movdqa (%eax, %ecx), %xmm1 - movdqa (%edx, %ecx), %xmm2 - movdqa %xmm2, %xmm4 - - palignr $14, %xmm3, %xmm2 - TOLOWER (%xmm1, %xmm2) - - pcmpeqb %xmm1, %xmm0 - pcmpeqb %xmm2, %xmm1 - psubb %xmm0, %xmm1 - pmovmskb %xmm1, %esi - sub $0xffff, %esi - jnz L(exit) - -#if defined USE_AS_STRNCMP || defined USE_AS_STRNCASECMP_L - cmp $16, REM - lea -16(REM), REM - jbe L(more8byteseq) -#endif - add $16, %ecx - movdqa %xmm4, %xmm3 - - add $16, %edi - jg L(nibble_ashr_14) - - movdqa (%eax, %ecx), %xmm1 - movdqa (%edx, %ecx), %xmm2 - movdqa %xmm2, %xmm4 - - palignr $14, %xmm3, %xmm2 - TOLOWER (%xmm1, %xmm2) - - pcmpeqb %xmm1, %xmm0 - pcmpeqb %xmm2, %xmm1 - psubb %xmm0, %xmm1 - pmovmskb %xmm1, %esi - sub $0xffff, %esi - jnz L(exit) - -#if defined USE_AS_STRNCMP || defined USE_AS_STRNCASECMP_L - cmp $16, REM - lea -16(REM), REM - jbe L(more8byteseq) -#endif - add $16, %ecx - movdqa %xmm4, %xmm3 - jmp L(loop_ashr_14) - - .p2align 4 -L(nibble_ashr_14): - pcmpeqb %xmm3, %xmm0 - pmovmskb %xmm0, %esi - test $0xc000, %esi - jnz L(ashr_14_exittail) - -#if defined USE_AS_STRNCMP || defined USE_AS_STRNCASECMP_L - cmp $2, REM - jbe L(ashr_14_exittail) -#endif - pxor %xmm0, %xmm0 - sub $0x1000, %edi - jmp L(gobble_ashr_14) - - .p2align 4 -L(ashr_14_exittail): - movdqa (%eax, %ecx), %xmm1 - psrldq $14, %xmm0 - psrldq $14, %xmm3 - jmp L(aftertail) - -/* - * The following cases will be handled by ashr_14 - * ecx(offset of esi) eax(offset of edi) relative offset corresponding case - * n(1~15) n - 1 14(15 +(n-1) - n) ashr_15 - */ - - .p2align 4 -L(ashr_15): - mov $0xffff, %esi - pxor %xmm0, %xmm0 - movdqa (%edx), %xmm2 - movdqa (%eax), %xmm1 - pcmpeqb %xmm1, %xmm0 - pslldq $1, %xmm2 - TOLOWER (%xmm1, %xmm2) - pcmpeqb %xmm1, %xmm2 - psubb %xmm0, %xmm2 - pmovmskb %xmm2, %edi - shr %cl, %esi - shr %cl, %edi - sub %edi, %esi - lea -1(%ecx), %edi - jnz L(less32bytes) - - UPDATE_STRNCMP_COUNTER - - movdqa (%edx), %xmm3 - pxor %xmm0, %xmm0 - mov $16, %ecx - orl $15, FLAGS - lea 15(%edx), %edi - and $0xfff, %edi - sub $0x1000, %edi - - .p2align 4 -L(loop_ashr_15): - add $16, %edi - jg L(nibble_ashr_15) - -L(gobble_ashr_15): - movdqa (%eax, %ecx), %xmm1 - movdqa (%edx, %ecx), %xmm2 - movdqa %xmm2, %xmm4 - - palignr $15, %xmm3, %xmm2 - TOLOWER (%xmm1, %xmm2) - - pcmpeqb %xmm1, %xmm0 - pcmpeqb %xmm2, %xmm1 - psubb %xmm0, %xmm1 - pmovmskb %xmm1, %esi - sub $0xffff, %esi - jnz L(exit) - -#if defined USE_AS_STRNCMP || defined USE_AS_STRNCASECMP_L - cmp $16, REM - lea -16(REM), REM - jbe L(more8byteseq) -#endif - add $16, %ecx - movdqa %xmm4, %xmm3 - - add $16, %edi - jg L(nibble_ashr_15) - - movdqa (%eax, %ecx), %xmm1 - movdqa (%edx, %ecx), %xmm2 - movdqa %xmm2, %xmm4 - - palignr $15, %xmm3, %xmm2 - TOLOWER (%xmm1, %xmm2) - - pcmpeqb %xmm1, %xmm0 - pcmpeqb %xmm2, %xmm1 - psubb %xmm0, %xmm1 - pmovmskb %xmm1, %esi - sub $0xffff, %esi - jnz L(exit) - -#if defined USE_AS_STRNCMP || defined USE_AS_STRNCASECMP_L - cmp $16, REM - lea -16(REM), REM - jbe L(more8byteseq) -#endif - add $16, %ecx - movdqa %xmm4, %xmm3 - jmp L(loop_ashr_15) - - .p2align 4 -L(nibble_ashr_15): - pcmpeqb %xmm3, %xmm0 - pmovmskb %xmm0, %esi - test $0x8000, %esi - jnz L(ashr_15_exittail) - -#if defined USE_AS_STRNCMP || defined USE_AS_STRNCASECMP_L - cmp $1, REM - jbe L(ashr_15_exittail) -#endif - pxor %xmm0, %xmm0 - sub $0x1000, %edi - jmp L(gobble_ashr_15) - - .p2align 4 -L(ashr_15_exittail): - movdqa (%eax, %ecx), %xmm1 - psrldq $15, %xmm0 - psrldq $15, %xmm3 - jmp L(aftertail) - - .p2align 4 -L(aftertail): - TOLOWER (%xmm1, %xmm3) - pcmpeqb %xmm3, %xmm1 - psubb %xmm0, %xmm1 - pmovmskb %xmm1, %esi - not %esi -L(exit): - mov FLAGS, %edi - and $0x1f, %edi - lea -16(%edi, %ecx), %edi -L(less32bytes): - add %edi, %edx - add %ecx, %eax - testl $0x20, FLAGS - jz L(ret2) - xchg %eax, %edx - - .p2align 4 -L(ret2): - mov %esi, %ecx -#if defined USE_AS_STRCASECMP_L || defined USE_AS_STRNCASECMP_L - addl $4, %esp - cfi_adjust_cfa_offset (-4) -#endif - POP (%esi) - POP (%edi) -#if !defined USE_AS_STRCASECMP_L && !defined USE_AS_STRNCASECMP_L - POP (FLAGS) -#endif -L(less16bytes): - test %cl, %cl - jz L(2next_8_bytes) - - test $0x01, %cl - jnz L(Byte0) - - test $0x02, %cl - jnz L(Byte1) - - test $0x04, %cl - jnz L(Byte2) - - test $0x08, %cl - jnz L(Byte3) - - test $0x10, %cl - jnz L(Byte4) - - test $0x20, %cl - jnz L(Byte5) - - test $0x40, %cl - jnz L(Byte6) -#if defined USE_AS_STRNCMP || defined USE_AS_STRNCASECMP_L - cmp $7, REM - jbe L(eq) -#endif - - movzx 7(%eax), %ecx - movzx 7(%edx), %eax -#if defined USE_AS_STRCASECMP_L || defined USE_AS_STRNCASECMP_L -# ifdef PIC - movl _nl_C_LC_CTYPE_tolower@GOTOFF+128*4(%ebx,%ecx,4), %ecx - movl _nl_C_LC_CTYPE_tolower@GOTOFF+128*4(%ebx,%eax,4), %eax -# else - movl _nl_C_LC_CTYPE_tolower+128*4(,%ecx,4), %ecx - movl _nl_C_LC_CTYPE_tolower+128*4(,%eax,4), %eax -# endif -#endif - - sub %ecx, %eax - RETURN - -L(Byte0): -#if defined USE_AS_STRNCMP || defined USE_AS_STRNCASECMP_L - cmp $0, REM - jbe L(eq) -#endif - movzx (%eax), %ecx - movzx (%edx), %eax - -#if defined USE_AS_STRCASECMP_L || defined USE_AS_STRNCASECMP_L -# ifdef PIC - movl _nl_C_LC_CTYPE_tolower@GOTOFF+128*4(%ebx,%ecx,4), %ecx - movl _nl_C_LC_CTYPE_tolower@GOTOFF+128*4(%ebx,%eax,4), %eax -# else - movl _nl_C_LC_CTYPE_tolower+128*4(,%ecx,4), %ecx - movl _nl_C_LC_CTYPE_tolower+128*4(,%eax,4), %eax -# endif -#endif - - sub %ecx, %eax - RETURN - -L(Byte1): -#if defined USE_AS_STRNCMP || defined USE_AS_STRNCASECMP_L - cmp $1, REM - jbe L(eq) -#endif - movzx 1(%eax), %ecx - movzx 1(%edx), %eax - -#if defined USE_AS_STRCASECMP_L || defined USE_AS_STRNCASECMP_L -# ifdef PIC - movl _nl_C_LC_CTYPE_tolower@GOTOFF+128*4(%ebx,%ecx,4), %ecx - movl _nl_C_LC_CTYPE_tolower@GOTOFF+128*4(%ebx,%eax,4), %eax -# else - movl _nl_C_LC_CTYPE_tolower+128*4(,%ecx,4), %ecx - movl _nl_C_LC_CTYPE_tolower+128*4(,%eax,4), %eax -# endif -#endif - - sub %ecx, %eax - RETURN - -L(Byte2): -#if defined USE_AS_STRNCMP || defined USE_AS_STRNCASECMP_L - cmp $2, REM - jbe L(eq) -#endif - movzx 2(%eax), %ecx - movzx 2(%edx), %eax - -#if defined USE_AS_STRCASECMP_L || defined USE_AS_STRNCASECMP_L -# ifdef PIC - movl _nl_C_LC_CTYPE_tolower@GOTOFF+128*4(%ebx,%ecx,4), %ecx - movl _nl_C_LC_CTYPE_tolower@GOTOFF+128*4(%ebx,%eax,4), %eax -# else - movl _nl_C_LC_CTYPE_tolower+128*4(,%ecx,4), %ecx - movl _nl_C_LC_CTYPE_tolower+128*4(,%eax,4), %eax -# endif -#endif - - sub %ecx, %eax - RETURN - -L(Byte3): -#if defined USE_AS_STRNCMP || defined USE_AS_STRNCASECMP_L - cmp $3, REM - jbe L(eq) -#endif - movzx 3(%eax), %ecx - movzx 3(%edx), %eax - -#if defined USE_AS_STRCASECMP_L || defined USE_AS_STRNCASECMP_L -# ifdef PIC - movl _nl_C_LC_CTYPE_tolower@GOTOFF+128*4(%ebx,%ecx,4), %ecx - movl _nl_C_LC_CTYPE_tolower@GOTOFF+128*4(%ebx,%eax,4), %eax -# else - movl _nl_C_LC_CTYPE_tolower+128*4(,%ecx,4), %ecx - movl _nl_C_LC_CTYPE_tolower+128*4(,%eax,4), %eax -# endif -#endif - - sub %ecx, %eax - RETURN - -L(Byte4): -#if defined USE_AS_STRNCMP || defined USE_AS_STRNCASECMP_L - cmp $4, REM - jbe L(eq) -#endif - movzx 4(%eax), %ecx - movzx 4(%edx), %eax - -#if defined USE_AS_STRCASECMP_L || defined USE_AS_STRNCASECMP_L -# ifdef PIC - movl _nl_C_LC_CTYPE_tolower@GOTOFF+128*4(%ebx,%ecx,4), %ecx - movl _nl_C_LC_CTYPE_tolower@GOTOFF+128*4(%ebx,%eax,4), %eax -# else - movl _nl_C_LC_CTYPE_tolower+128*4(,%ecx,4), %ecx - movl _nl_C_LC_CTYPE_tolower+128*4(,%eax,4), %eax -# endif -#endif - - sub %ecx, %eax - RETURN - -L(Byte5): -#if defined USE_AS_STRNCMP || defined USE_AS_STRNCASECMP_L - cmp $5, REM - jbe L(eq) -#endif - movzx 5(%eax), %ecx - movzx 5(%edx), %eax - -#if defined USE_AS_STRCASECMP_L || defined USE_AS_STRNCASECMP_L -# ifdef PIC - movl _nl_C_LC_CTYPE_tolower@GOTOFF+128*4(%ebx,%ecx,4), %ecx - movl _nl_C_LC_CTYPE_tolower@GOTOFF+128*4(%ebx,%eax,4), %eax -# else - movl _nl_C_LC_CTYPE_tolower+128*4(,%ecx,4), %ecx - movl _nl_C_LC_CTYPE_tolower+128*4(,%eax,4), %eax -# endif -#endif - - sub %ecx, %eax - RETURN - -L(Byte6): -#if defined USE_AS_STRNCMP || defined USE_AS_STRNCASECMP_L - cmp $6, REM - jbe L(eq) -#endif - movzx 6(%eax), %ecx - movzx 6(%edx), %eax - -#if defined USE_AS_STRCASECMP_L || defined USE_AS_STRNCASECMP_L -# ifdef PIC - movl _nl_C_LC_CTYPE_tolower@GOTOFF+128*4(%ebx,%ecx,4), %ecx - movl _nl_C_LC_CTYPE_tolower@GOTOFF+128*4(%ebx,%eax,4), %eax -# else - movl _nl_C_LC_CTYPE_tolower+128*4(,%ecx,4), %ecx - movl _nl_C_LC_CTYPE_tolower+128*4(,%eax,4), %eax -# endif -#endif - - sub %ecx, %eax - RETURN - -L(2next_8_bytes): - add $8, %eax - add $8, %edx -#if defined USE_AS_STRNCMP || defined USE_AS_STRNCASECMP_L - cmp $8, REM - lea -8(REM), REM - jbe L(eq) -#endif - - test $0x01, %ch - jnz L(Byte0) - - test $0x02, %ch - jnz L(Byte1) - - test $0x04, %ch - jnz L(Byte2) - - test $0x08, %ch - jnz L(Byte3) - - test $0x10, %ch - jnz L(Byte4) - - test $0x20, %ch - jnz L(Byte5) - - test $0x40, %ch - jnz L(Byte6) - -#if defined USE_AS_STRNCMP || defined USE_AS_STRNCASECMP_L - cmp $7, REM - jbe L(eq) -#endif - movzx 7(%eax), %ecx - movzx 7(%edx), %eax - -#if defined USE_AS_STRCASECMP_L || defined USE_AS_STRNCASECMP_L -# ifdef PIC - movl _nl_C_LC_CTYPE_tolower@GOTOFF+128*4(%ebx,%ecx,4), %ecx - movl _nl_C_LC_CTYPE_tolower@GOTOFF+128*4(%ebx,%eax,4), %eax -# else - movl _nl_C_LC_CTYPE_tolower+128*4(,%ecx,4), %ecx - movl _nl_C_LC_CTYPE_tolower+128*4(,%eax,4), %eax -# endif -#endif - - sub %ecx, %eax - RETURN - -#ifdef USE_AS_STRNCMP -L(neq_sncmp): -#endif -L(neq): - mov $1, %eax - ja L(neq_bigger) - neg %eax -L(neq_bigger): -#if defined USE_AS_STRCASECMP_L || defined USE_AS_STRNCASECMP_L - addl $4, %esp - cfi_adjust_cfa_offset (-4) -#endif -#if defined USE_AS_STRNCMP || defined USE_AS_STRNCASECMP_L - POP (REM) -#endif -#if defined USE_AS_STRCASECMP_L || defined USE_AS_STRNCASECMP_L -# ifdef PIC - POP (%ebx) -# endif -#endif - ret - -#if defined USE_AS_STRNCMP || defined USE_AS_STRNCASECMP_L - .p2align 4 - cfi_restore_state -L(more8byteseq): - -# ifdef USE_AS_STRNCASECMP_L - addl $4, %esp - cfi_adjust_cfa_offset (-4) -# endif - POP (%esi) - POP (%edi) -# ifdef USE_AS_STRNCMP - POP (FLAGS) -# endif -#endif - -#ifdef USE_AS_STRNCMP -L(eq_sncmp): -#endif -L(eq): - -#if defined USE_AS_STRNCMP || defined USE_AS_STRNCASECMP_L - POP (REM) -#endif -#if defined USE_AS_STRCASECMP_L || defined USE_AS_STRNCASECMP_L -# ifdef PIC - POP (%ebx) -# endif -#endif - xorl %eax, %eax - ret - -#if defined USE_AS_STRNCMP || defined USE_AS_STRNCASECMP_L - .p2align 4 -# if defined USE_AS_STRNCASECMP_L && defined PIC - CFI_PUSH (%ebx) -# endif - CFI_PUSH (REM) -L(less16bytes_sncmp): -# ifdef USE_AS_STRNCASECMP_L - PUSH (%esi) -# endif - test REM, REM - jz L(eq_sncmp) - - movzbl (%eax), %ecx -# ifdef USE_AS_STRNCASECMP_L - movzbl (%edx), %esi -# ifdef PIC - movl _nl_C_LC_CTYPE_tolower@GOTOFF+128*4(%ebx,%ecx,4), %ecx - movl _nl_C_LC_CTYPE_tolower@GOTOFF+128*4(%ebx,%esi,4), %esi -# else - movl _nl_C_LC_CTYPE_tolower+128*4(,%ecx,4), %ecx - movl _nl_C_LC_CTYPE_tolower+128*4(,%esi,4), %esi -# endif - cmpl %ecx, %esi -# else - cmpb %cl, (%edx) -# endif - jne L(neq_sncmp) - test %cl, %cl - je L(eq_sncmp) - - cmp $1, REM - je L(eq_sncmp) - - movzbl 1(%eax), %ecx -# ifdef USE_AS_STRNCASECMP_L - movzbl 1(%edx), %esi -# ifdef PIC - movl _nl_C_LC_CTYPE_tolower@GOTOFF+128*4(%ebx,%ecx,4), %ecx - movl _nl_C_LC_CTYPE_tolower@GOTOFF+128*4(%ebx,%esi,4), %esi -# else - movl _nl_C_LC_CTYPE_tolower+128*4(,%ecx,4), %ecx - movl _nl_C_LC_CTYPE_tolower+128*4(,%esi,4), %esi -# endif - cmpl %ecx, %esi -# else - cmpb %cl, 1(%edx) -# endif - jne L(neq_sncmp) - test %cl, %cl - je L(eq_sncmp) - - cmp $2, REM - je L(eq_sncmp) - - movzbl 2(%eax), %ecx -# ifdef USE_AS_STRNCASECMP_L - movzbl 2(%edx), %esi -# ifdef PIC - movl _nl_C_LC_CTYPE_tolower@GOTOFF+128*4(%ebx,%ecx,4), %ecx - movl _nl_C_LC_CTYPE_tolower@GOTOFF+128*4(%ebx,%esi,4), %esi -# else - movl _nl_C_LC_CTYPE_tolower+128*4(,%ecx,4), %ecx - movl _nl_C_LC_CTYPE_tolower+128*4(,%esi,4), %esi -# endif - cmpl %ecx, %esi -# else - cmpb %cl, 2(%edx) -# endif - jne L(neq_sncmp) - test %cl, %cl - je L(eq_sncmp) - - cmp $3, REM - je L(eq_sncmp) - - movzbl 3(%eax), %ecx -# ifdef USE_AS_STRNCASECMP_L - movzbl 3(%edx), %esi -# ifdef PIC - movl _nl_C_LC_CTYPE_tolower@GOTOFF+128*4(%ebx,%ecx,4), %ecx - movl _nl_C_LC_CTYPE_tolower@GOTOFF+128*4(%ebx,%esi,4), %esi -# else - movl _nl_C_LC_CTYPE_tolower+128*4(,%ecx,4), %ecx - movl _nl_C_LC_CTYPE_tolower+128*4(,%esi,4), %esi -# endif - cmpl %ecx, %esi -# else - cmpb %cl, 3(%edx) -# endif - jne L(neq_sncmp) - test %cl, %cl - je L(eq_sncmp) - - cmp $4, REM - je L(eq_sncmp) - - movzbl 4(%eax), %ecx -# ifdef USE_AS_STRNCASECMP_L - movzbl 4(%edx), %esi -# ifdef PIC - movl _nl_C_LC_CTYPE_tolower@GOTOFF+128*4(%ebx,%ecx,4), %ecx - movl _nl_C_LC_CTYPE_tolower@GOTOFF+128*4(%ebx,%esi,4), %esi -# else - movl _nl_C_LC_CTYPE_tolower+128*4(,%ecx,4), %ecx - movl _nl_C_LC_CTYPE_tolower+128*4(,%esi,4), %esi -# endif - cmpl %ecx, %esi -# else - cmpb %cl, 4(%edx) -# endif - jne L(neq_sncmp) - test %cl, %cl - je L(eq_sncmp) - - cmp $5, REM - je L(eq_sncmp) - - movzbl 5(%eax), %ecx -# ifdef USE_AS_STRNCASECMP_L - movzbl 5(%edx), %esi -# ifdef PIC - movl _nl_C_LC_CTYPE_tolower@GOTOFF+128*4(%ebx,%ecx,4), %ecx - movl _nl_C_LC_CTYPE_tolower@GOTOFF+128*4(%ebx,%esi,4), %esi -# else - movl _nl_C_LC_CTYPE_tolower+128*4(,%ecx,4), %ecx - movl _nl_C_LC_CTYPE_tolower+128*4(,%esi,4), %esi -# endif - cmpl %ecx, %esi -# else - cmpb %cl, 5(%edx) -# endif - jne L(neq_sncmp) - test %cl, %cl - je L(eq_sncmp) - - cmp $6, REM - je L(eq_sncmp) - - movzbl 6(%eax), %ecx -# ifdef USE_AS_STRNCASECMP_L - movzbl 6(%edx), %esi -# ifdef PIC - movl _nl_C_LC_CTYPE_tolower@GOTOFF+128*4(%ebx,%ecx,4), %ecx - movl _nl_C_LC_CTYPE_tolower@GOTOFF+128*4(%ebx,%esi,4), %esi -# else - movl _nl_C_LC_CTYPE_tolower+128*4(,%ecx,4), %ecx - movl _nl_C_LC_CTYPE_tolower+128*4(,%esi,4), %esi -# endif - cmpl %ecx, %esi -# else - cmpb %cl, 6(%edx) -# endif - jne L(neq_sncmp) - test %cl, %cl - je L(eq_sncmp) - - cmp $7, REM - je L(eq_sncmp) - - movzbl 7(%eax), %ecx -# ifdef USE_AS_STRNCASECMP_L - movzbl 7(%edx), %esi -# ifdef PIC - movl _nl_C_LC_CTYPE_tolower@GOTOFF+128*4(%ebx,%ecx,4), %ecx - movl _nl_C_LC_CTYPE_tolower@GOTOFF+128*4(%ebx,%esi,4), %esi -# else - movl _nl_C_LC_CTYPE_tolower+128*4(,%ecx,4), %ecx - movl _nl_C_LC_CTYPE_tolower+128*4(,%esi,4), %esi -# endif - cmpl %ecx, %esi -# else - cmpb %cl, 7(%edx) -# endif - jne L(neq_sncmp) - test %cl, %cl - je L(eq_sncmp) - - - cmp $8, REM - je L(eq_sncmp) - - movzbl 8(%eax), %ecx -# ifdef USE_AS_STRNCASECMP_L - movzbl 8(%edx), %esi -# ifdef PIC - movl _nl_C_LC_CTYPE_tolower@GOTOFF+128*4(%ebx,%ecx,4), %ecx - movl _nl_C_LC_CTYPE_tolower@GOTOFF+128*4(%ebx,%esi,4), %esi -# else - movl _nl_C_LC_CTYPE_tolower+128*4(,%ecx,4), %ecx - movl _nl_C_LC_CTYPE_tolower+128*4(,%esi,4), %esi -# endif - cmpl %ecx, %esi -# else - cmpb %cl, 8(%edx) -# endif - jne L(neq_sncmp) - test %cl, %cl - je L(eq_sncmp) - - cmp $9, REM - je L(eq_sncmp) - - movzbl 9(%eax), %ecx -# ifdef USE_AS_STRNCASECMP_L - movzbl 9(%edx), %esi -# ifdef PIC - movl _nl_C_LC_CTYPE_tolower@GOTOFF+128*4(%ebx,%ecx,4), %ecx - movl _nl_C_LC_CTYPE_tolower@GOTOFF+128*4(%ebx,%esi,4), %esi -# else - movl _nl_C_LC_CTYPE_tolower+128*4(,%ecx,4), %ecx - movl _nl_C_LC_CTYPE_tolower+128*4(,%esi,4), %esi -# endif - cmpl %ecx, %esi -# else - cmpb %cl, 9(%edx) -# endif - jne L(neq_sncmp) - test %cl, %cl - je L(eq_sncmp) - - cmp $10, REM - je L(eq_sncmp) - - movzbl 10(%eax), %ecx -# ifdef USE_AS_STRNCASECMP_L - movzbl 10(%edx), %esi -# ifdef PIC - movl _nl_C_LC_CTYPE_tolower@GOTOFF+128*4(%ebx,%ecx,4), %ecx - movl _nl_C_LC_CTYPE_tolower@GOTOFF+128*4(%ebx,%esi,4), %esi -# else - movl _nl_C_LC_CTYPE_tolower+128*4(,%ecx,4), %ecx - movl _nl_C_LC_CTYPE_tolower+128*4(,%esi,4), %esi -# endif - cmpl %ecx, %esi -# else - cmpb %cl, 10(%edx) -# endif - jne L(neq_sncmp) - test %cl, %cl - je L(eq_sncmp) - - cmp $11, REM - je L(eq_sncmp) - - movzbl 11(%eax), %ecx -# ifdef USE_AS_STRNCASECMP_L - movzbl 11(%edx), %esi -# ifdef PIC - movl _nl_C_LC_CTYPE_tolower@GOTOFF+128*4(%ebx,%ecx,4), %ecx - movl _nl_C_LC_CTYPE_tolower@GOTOFF+128*4(%ebx,%esi,4), %esi -# else - movl _nl_C_LC_CTYPE_tolower+128*4(,%ecx,4), %ecx - movl _nl_C_LC_CTYPE_tolower+128*4(,%esi,4), %esi -# endif - cmpl %ecx, %esi -# else - cmpb %cl, 11(%edx) -# endif - jne L(neq_sncmp) - test %cl, %cl - je L(eq_sncmp) - - - cmp $12, REM - je L(eq_sncmp) - - movzbl 12(%eax), %ecx -# ifdef USE_AS_STRNCASECMP_L - movzbl 12(%edx), %esi -# ifdef PIC - movl _nl_C_LC_CTYPE_tolower@GOTOFF+128*4(%ebx,%ecx,4), %ecx - movl _nl_C_LC_CTYPE_tolower@GOTOFF+128*4(%ebx,%esi,4), %esi -# else - movl _nl_C_LC_CTYPE_tolower+128*4(,%ecx,4), %ecx - movl _nl_C_LC_CTYPE_tolower+128*4(,%esi,4), %esi -# endif - cmpl %ecx, %esi -# else - cmpb %cl, 12(%edx) -# endif - jne L(neq_sncmp) - test %cl, %cl - je L(eq_sncmp) - - cmp $13, REM - je L(eq_sncmp) - - movzbl 13(%eax), %ecx -# ifdef USE_AS_STRNCASECMP_L - movzbl 13(%edx), %esi -# ifdef PIC - movl _nl_C_LC_CTYPE_tolower@GOTOFF+128*4(%ebx,%ecx,4), %ecx - movl _nl_C_LC_CTYPE_tolower@GOTOFF+128*4(%ebx,%esi,4), %esi -# else - movl _nl_C_LC_CTYPE_tolower+128*4(,%ecx,4), %ecx - movl _nl_C_LC_CTYPE_tolower+128*4(,%esi,4), %esi -# endif - cmpl %ecx, %esi -# else - cmpb %cl, 13(%edx) -# endif - jne L(neq_sncmp) - test %cl, %cl - je L(eq_sncmp) - - cmp $14, REM - je L(eq_sncmp) - - movzbl 14(%eax), %ecx -# ifdef USE_AS_STRNCASECMP_L - movzbl 14(%edx), %esi -# ifdef PIC - movl _nl_C_LC_CTYPE_tolower@GOTOFF+128*4(%ebx,%ecx,4), %ecx - movl _nl_C_LC_CTYPE_tolower@GOTOFF+128*4(%ebx,%esi,4), %esi -# else - movl _nl_C_LC_CTYPE_tolower+128*4(,%ecx,4), %ecx - movl _nl_C_LC_CTYPE_tolower+128*4(,%esi,4), %esi -# endif - cmpl %ecx, %esi -# else - cmpb %cl, 14(%edx) -# endif - jne L(neq_sncmp) - test %cl, %cl - je L(eq_sncmp) - - cmp $15, REM - je L(eq_sncmp) - - movzbl 15(%eax), %ecx -# ifdef USE_AS_STRNCASECMP_L - movzbl 15(%edx), %esi -# ifdef PIC - movl _nl_C_LC_CTYPE_tolower@GOTOFF+128*4(%ebx,%ecx,4), %ecx - movl _nl_C_LC_CTYPE_tolower@GOTOFF+128*4(%ebx,%esi,4), %esi -# else - movl _nl_C_LC_CTYPE_tolower+128*4(,%ecx,4), %ecx - movl _nl_C_LC_CTYPE_tolower+128*4(,%esi,4), %esi -# endif - cmpl %ecx, %esi -# else - cmpb %cl, 15(%edx) -# endif - jne L(neq_sncmp) - -# ifdef USE_AS_STRNCASECMP_L -L(eq_sncmp): - POP (%esi) -# endif - POP (REM) -# if defined USE_AS_STRNCASECMP_L && defined PIC - POP (%ebx) -# endif - xor %eax, %eax - ret - -# ifdef USE_AS_STRNCASECMP_L - .p2align 4 -# ifdef PIC - CFI_PUSH (%ebx) -# endif - CFI_PUSH (REM) - CFI_PUSH (%esi) -L(neq_sncmp): - mov $1, %eax - mov $-1, %edx - cmovna %edx, %eax - POP (%esi) - POP (REM) -# ifdef PIC - POP (%ebx) -# endif - ret -# endif -#endif - -END (STRCMP) - -#endif diff --git a/sysdeps/i386/i686/multiarch/strncase-c.c b/sysdeps/i386/i686/multiarch/strncase-c.c deleted file mode 100644 index 76581eb..0000000 --- a/sysdeps/i386/i686/multiarch/strncase-c.c +++ /dev/null @@ -1,8 +0,0 @@ -#include - -extern __typeof (strncasecmp) __strncasecmp_nonascii; - -#define __strncasecmp __strncasecmp_nonascii -#include - -strong_alias (__strncasecmp_nonascii, __strncasecmp_ia32) diff --git a/sysdeps/i386/i686/multiarch/strncase_l-c.c b/sysdeps/i386/i686/multiarch/strncase_l-c.c deleted file mode 100644 index 7e601af..0000000 --- a/sysdeps/i386/i686/multiarch/strncase_l-c.c +++ /dev/null @@ -1,13 +0,0 @@ -#include - -extern __typeof (strncasecmp_l) __strncasecmp_l_nonascii; - -#define __strncasecmp_l __strncasecmp_l_nonascii -#define USE_IN_EXTENDED_LOCALE_MODEL 1 -#include - -strong_alias (__strncasecmp_l_nonascii, __strncasecmp_l_ia32) - -/* The needs of strcasecmp in libc are minimal, no need to go through - the IFUNC. */ -strong_alias (__strncasecmp_l_nonascii, __GI___strncasecmp_l) diff --git a/sysdeps/i386/i686/multiarch/strncase_l-sse4.S b/sysdeps/i386/i686/multiarch/strncase_l-sse4.S deleted file mode 100644 index 5572108..0000000 --- a/sysdeps/i386/i686/multiarch/strncase_l-sse4.S +++ /dev/null @@ -1,2 +0,0 @@ -#define USE_AS_STRNCASECMP_L 1 -#include "strcmp-sse4.S" diff --git a/sysdeps/i386/i686/multiarch/strncase_l-ssse3.S b/sysdeps/i386/i686/multiarch/strncase_l-ssse3.S deleted file mode 100644 index d438a1a..0000000 --- a/sysdeps/i386/i686/multiarch/strncase_l-ssse3.S +++ /dev/null @@ -1,2 +0,0 @@ -#define USE_AS_STRNCASECMP_L 1 -#include "strcmp-ssse3.S" diff --git a/sysdeps/i386/i686/multiarch/strncmp-c.c b/sysdeps/i386/i686/multiarch/strncmp-c.c deleted file mode 100644 index cc059da..0000000 --- a/sysdeps/i386/i686/multiarch/strncmp-c.c +++ /dev/null @@ -1,8 +0,0 @@ -#ifdef SHARED -# define STRNCMP __strncmp_ia32 -# undef libc_hidden_builtin_def -# define libc_hidden_builtin_def(name) \ - __hidden_ver1 (__strncmp_ia32, __GI_strncmp, __strncmp_ia32); -#endif - -#include "string/strncmp.c" diff --git a/sysdeps/i386/i686/multiarch/strncmp-sse4.S b/sysdeps/i386/i686/multiarch/strncmp-sse4.S deleted file mode 100644 index cf14dfa..0000000 --- a/sysdeps/i386/i686/multiarch/strncmp-sse4.S +++ /dev/null @@ -1,5 +0,0 @@ -#ifdef SHARED -# define USE_AS_STRNCMP -# define STRCMP __strncmp_sse4_2 -# include "strcmp-sse4.S" -#endif diff --git a/sysdeps/i386/i686/multiarch/strncmp-ssse3.S b/sysdeps/i386/i686/multiarch/strncmp-ssse3.S deleted file mode 100644 index 536c868..0000000 --- a/sysdeps/i386/i686/multiarch/strncmp-ssse3.S +++ /dev/null @@ -1,5 +0,0 @@ -#ifdef SHARED -# define USE_AS_STRNCMP -# define STRCMP __strncmp_ssse3 -# include "strcmp-ssse3.S" -#endif diff --git a/sysdeps/i386/i686/rtld-strcmp.S b/sysdeps/i386/i686/rtld-strcmp.S new file mode 100644 index 0000000..01f31f4 --- /dev/null +++ b/sysdeps/i386/i686/rtld-strcmp.S @@ -0,0 +1 @@ +#include diff --git a/sysdeps/i386/multiarch/Makefile b/sysdeps/i386/multiarch/Makefile index 147258c..ba6a489 100644 --- a/sysdeps/i386/multiarch/Makefile +++ b/sysdeps/i386/multiarch/Makefile @@ -25,7 +25,13 @@ sysdep_routines += bcopy-i386 bcopy-i686 bcopy-sse2-unaligned \ stpcpy-i386 stpcpy-i586 stpcpy-sse2 stpcpy-ssse3 \ stpncpy-i386 stpncpy-sse2 stpncpy-ssse3 \ strcpy-i386 strcpy-i586 strcpy-sse2 strcpy-ssse3 \ - strncpy-i386 strncpy-sse2 strncpy-ssse3 + strncpy-i386 strncpy-sse2 strncpy-ssse3 \ + strcmp-i386 strcmp-i686 strcmp-sse4 strcmp-ssse3 \ + strcasecmp-i386 strcasecmp_l-i386 strcasecmp_l-sse4 \ + strcasecmp_l-ssse3 \ + strncase-i386 strncase_l-i386 strncase_l-sse4 \ + strncase_l-ssse3 \ + strncmp-i386 strncmp-ssse3 strncmp-sse4 endif ifeq (mathyes,$(subdir)$(config-cflags-avx)) diff --git a/sysdeps/i386/multiarch/ifunc-impl-list.c b/sysdeps/i386/multiarch/ifunc-impl-list.c index 73cd46d..015ad3c 100644 --- a/sysdeps/i386/multiarch/ifunc-impl-list.c +++ b/sysdeps/i386/multiarch/ifunc-impl-list.c @@ -191,7 +191,6 @@ __libc_ifunc_impl_list (const char *name, struct libc_ifunc_impl *array, #endif ) -#if 0 /* Support sysdeps/i386/i686/multiarch/strcasecmp.S. */ IFUNC_IMPL (i, name, strcasecmp, IFUNC_IMPL_ADD (array, i, strcasecmp, @@ -200,7 +199,7 @@ __libc_ifunc_impl_list (const char *name, struct libc_ifunc_impl *array, IFUNC_IMPL_ADD (array, i, strcasecmp, HAS_CPU_FEATURE (SSSE3), __strcasecmp_ssse3) - IFUNC_IMPL_ADD (array, i, strcasecmp, 1, __strcasecmp_ia32)) + IFUNC_IMPL_ADD (array, i, strcasecmp, 1, __strcasecmp_i386)) /* Support sysdeps/i386/i686/multiarch/strcasecmp_l.S. */ IFUNC_IMPL (i, name, strcasecmp_l, @@ -211,8 +210,9 @@ __libc_ifunc_impl_list (const char *name, struct libc_ifunc_impl *array, HAS_CPU_FEATURE (SSSE3), __strcasecmp_l_ssse3) IFUNC_IMPL_ADD (array, i, strcasecmp_l, 1, - __strcasecmp_l_ia32)) + __strcasecmp_l_i386)) +#if 0 /* Support sysdeps/i386/i686/multiarch/strcat.S. */ IFUNC_IMPL (i, name, strcat, IFUNC_IMPL_ADD (array, i, strcat, HAS_CPU_FEATURE (SSSE3), @@ -228,6 +228,7 @@ __libc_ifunc_impl_list (const char *name, struct libc_ifunc_impl *array, IFUNC_IMPL_ADD (array, i, strchr, HAS_CPU_FEATURE (SSE2), __strchr_sse2) IFUNC_IMPL_ADD (array, i, strchr, 1, __strchr_ia32)) +#endif /* Support sysdeps/i386/i686/multiarch/strcmp.S. */ IFUNC_IMPL (i, name, strcmp, @@ -235,8 +236,11 @@ __libc_ifunc_impl_list (const char *name, struct libc_ifunc_impl *array, __strcmp_sse4_2) IFUNC_IMPL_ADD (array, i, strcmp, HAS_CPU_FEATURE (SSSE3), __strcmp_ssse3) - IFUNC_IMPL_ADD (array, i, strcmp, 1, __strcmp_ia32)) + IFUNC_IMPL_ADD (array, i, strcmp, HAS_I686, __strcmp_i686) +#if !SUPPORT_I686 + IFUNC_IMPL_ADD (array, i, strcmp, 1, __strcmp_i386) #endif + ) /* Support sysdeps/i386/i686/multiarch/strcpy.S. */ IFUNC_IMPL (i, name, strcpy, @@ -256,6 +260,7 @@ __libc_ifunc_impl_list (const char *name, struct libc_ifunc_impl *array, IFUNC_IMPL_ADD (array, i, strcspn, HAS_CPU_FEATURE (SSE4_2), __strcspn_sse42) IFUNC_IMPL_ADD (array, i, strcspn, 1, __strcspn_ia32)) +#endif /* Support sysdeps/i386/i686/multiarch/strncase.S. */ IFUNC_IMPL (i, name, strncasecmp, @@ -266,7 +271,7 @@ __libc_ifunc_impl_list (const char *name, struct libc_ifunc_impl *array, HAS_CPU_FEATURE (SSSE3), __strncasecmp_ssse3) IFUNC_IMPL_ADD (array, i, strncasecmp, 1, - __strncasecmp_ia32)) + __strncasecmp_i386)) /* Support sysdeps/i386/i686/multiarch/strncase_l.S. */ IFUNC_IMPL (i, name, strncasecmp_l, @@ -277,8 +282,9 @@ __libc_ifunc_impl_list (const char *name, struct libc_ifunc_impl *array, HAS_CPU_FEATURE (SSSE3), __strncasecmp_l_ssse3) IFUNC_IMPL_ADD (array, i, strncasecmp_l, 1, - __strncasecmp_l_ia32)) + __strncasecmp_l_i386)) +#if 0 /* Support sysdeps/i386/i686/multiarch/strncat.S. */ IFUNC_IMPL (i, name, strncat, IFUNC_IMPL_ADD (array, i, strncat, HAS_CPU_FEATURE (SSSE3), @@ -451,6 +457,7 @@ __libc_ifunc_impl_list (const char *name, struct libc_ifunc_impl *array, IFUNC_IMPL_ADD (array, i, strlen, HAS_CPU_FEATURE (SSE2), __strlen_sse2) IFUNC_IMPL_ADD (array, i, strlen, 1, __strlen_ia32)) +#endif /* Support sysdeps/i386/i686/multiarch/strncmp.S. */ IFUNC_IMPL (i, name, strncmp, @@ -458,8 +465,7 @@ __libc_ifunc_impl_list (const char *name, struct libc_ifunc_impl *array, __strncmp_sse4_2) IFUNC_IMPL_ADD (array, i, strncmp, HAS_CPU_FEATURE (SSSE3), __strncmp_ssse3) - IFUNC_IMPL_ADD (array, i, strncmp, 1, __strncmp_ia32)) -#endif + IFUNC_IMPL_ADD (array, i, strncmp, 1, __strncmp_i386)) #endif return i; diff --git a/sysdeps/i386/multiarch/rtld-strcmp.c b/sysdeps/i386/multiarch/rtld-strcmp.c new file mode 100644 index 0000000..78448ae --- /dev/null +++ b/sysdeps/i386/multiarch/rtld-strcmp.c @@ -0,0 +1 @@ +#include "string/strcmp.c" diff --git a/sysdeps/i386/multiarch/strcasecmp-i386.c b/sysdeps/i386/multiarch/strcasecmp-i386.c new file mode 100644 index 0000000..eb5d602 --- /dev/null +++ b/sysdeps/i386/multiarch/strcasecmp-i386.c @@ -0,0 +1,12 @@ +#include + +extern __typeof (strcasecmp) __strcasecmp_nonascii; + +#define __strcasecmp __strcasecmp_nonascii +#include + +strong_alias (__strcasecmp_nonascii, __strcasecmp_i386) + +/* The needs of strcasecmp in libc are minimal, no need to go through + the IFUNC. */ +strong_alias (__strcasecmp_nonascii, __GI___strcasecmp) diff --git a/sysdeps/i386/multiarch/strcasecmp.c b/sysdeps/i386/multiarch/strcasecmp.c new file mode 100644 index 0000000..261bd7a --- /dev/null +++ b/sysdeps/i386/multiarch/strcasecmp.c @@ -0,0 +1,53 @@ +/* Multiple versions of strcasecmp. + All versions must be listed in ifunc-impl-list.c. + Copyright (C) 2015 Free Software Foundation, Inc. + This file is part of the GNU C Library. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library; if not, see + . */ + +/* Define multiple versions only for the definition in lib. */ +#if IS_IN (libc) +/* Redefine strcasecmp so that the compiler won't complain about the type + mismatch with the IFUNC selector in strong_alias, below. */ +# undef strcasecmp +# define strcasecmp __redirect_strcasecmp +# include +# undef strcasecmp + +# include + +extern __typeof (__redirect_strcasecmp) __strcasecmp_i386 attribute_hidden; +extern __typeof (__redirect_strcasecmp) __strcasecmp_ssse3 attribute_hidden; +extern __typeof (__redirect_strcasecmp) __strcasecmp_sse4_2 attribute_hidden; + +/* Avoid DWARF definition DIE on ifunc symbol so that GDB can handle + ifunc symbol properly. */ +extern __typeof (__redirect_strcasecmp) __strcasecmp; +extern void *strcasecmp_ifunc (void) __asm__ ("__strcasecmp"); + +void * +strcasecmp_ifunc (void) +{ + if (HAS_CPU_FEATURE (SSE4_2)) + return __strcasecmp_sse4_2; + else if (HAS_CPU_FEATURE (SSSE3)) + return __strcasecmp_ssse3; + + return __strcasecmp_i386; +} +__asm__ (".type __strcasecmp, %gnu_indirect_function"); + +weak_alias (__strcasecmp, strcasecmp) +#endif diff --git a/sysdeps/i386/multiarch/strcasecmp_l-i386.c b/sysdeps/i386/multiarch/strcasecmp_l-i386.c new file mode 100644 index 0000000..b5b38d3 --- /dev/null +++ b/sysdeps/i386/multiarch/strcasecmp_l-i386.c @@ -0,0 +1,13 @@ +#include + +extern __typeof (strcasecmp_l) __strcasecmp_l_nonascii; + +#define __strcasecmp_l __strcasecmp_l_nonascii +#define USE_IN_EXTENDED_LOCALE_MODEL 1 +#include + +strong_alias (__strcasecmp_l_nonascii, __strcasecmp_l_i386) + +/* The needs of strcasecmp in libc are minimal, no need to go through + the IFUNC. */ +strong_alias (__strcasecmp_l_nonascii, __GI___strcasecmp_l) diff --git a/sysdeps/i386/multiarch/strcasecmp_l-sse4.S b/sysdeps/i386/multiarch/strcasecmp_l-sse4.S new file mode 100644 index 0000000..411d415 --- /dev/null +++ b/sysdeps/i386/multiarch/strcasecmp_l-sse4.S @@ -0,0 +1,2 @@ +#define USE_AS_STRCASECMP_L 1 +#include "strcmp-sse4.S" diff --git a/sysdeps/i386/multiarch/strcasecmp_l-ssse3.S b/sysdeps/i386/multiarch/strcasecmp_l-ssse3.S new file mode 100644 index 0000000..a22b93c --- /dev/null +++ b/sysdeps/i386/multiarch/strcasecmp_l-ssse3.S @@ -0,0 +1,2 @@ +#define USE_AS_STRCASECMP_L 1 +#include "strcmp-ssse3.S" diff --git a/sysdeps/i386/multiarch/strcasecmp_l.c b/sysdeps/i386/multiarch/strcasecmp_l.c new file mode 100644 index 0000000..7bdc760 --- /dev/null +++ b/sysdeps/i386/multiarch/strcasecmp_l.c @@ -0,0 +1,53 @@ +/* Multiple versions of strcasecmp_l. + All versions must be listed in ifunc-impl-list.c. + Copyright (C) 2015 Free Software Foundation, Inc. + This file is part of the GNU C Library. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library; if not, see + . */ + +/* Define multiple versions only for the definition in lib. */ +#if IS_IN (libc) +/* Redefine strcasecmp_l so that the compiler won't complain about the type + mismatch with the IFUNC selector in strong_alias, below. */ +# undef strcasecmp_l +# define strcasecmp_l __redirect_strcasecmp_l +# include +# undef strcasecmp_l + +# include + +extern __typeof (__redirect_strcasecmp_l) __strcasecmp_l_i386 attribute_hidden; +extern __typeof (__redirect_strcasecmp_l) __strcasecmp_l_ssse3 attribute_hidden; +extern __typeof (__redirect_strcasecmp_l) __strcasecmp_l_sse4_2 attribute_hidden; + +/* Avoid DWARF definition DIE on ifunc symbol so that GDB can handle + ifunc symbol properly. */ +extern __typeof (__redirect_strcasecmp_l) __strcasecmp_l; +extern void *strcasecmp_l_ifunc (void) __asm__ ("__strcasecmp_l"); + +void * +strcasecmp_l_ifunc (void) +{ + if (HAS_CPU_FEATURE (SSE4_2)) + return __strcasecmp_l_sse4_2; + else if (HAS_CPU_FEATURE (SSSE3)) + return __strcasecmp_l_ssse3; + + return __strcasecmp_l_i386; +} +__asm__ (".type __strcasecmp_l, %gnu_indirect_function"); + +weak_alias (__strcasecmp_l, strcasecmp_l) +#endif diff --git a/sysdeps/i386/multiarch/strcmp-i386.c b/sysdeps/i386/multiarch/strcmp-i386.c new file mode 100644 index 0000000..f5931bf --- /dev/null +++ b/sysdeps/i386/multiarch/strcmp-i386.c @@ -0,0 +1,10 @@ +#include +#if !SUPPORT_I686 +# ifdef SHARED +# define STRCMP __strcmp_i386 +# undef libc_hidden_builtin_def +# define libc_hidden_builtin_def(name) \ + __hidden_ver1 (__strcmp_i386, __GI_strcmp, __strcmp_i386); +# endif +# include "string/strcmp.c" +#endif diff --git a/sysdeps/i386/multiarch/strcmp-i686.S b/sysdeps/i386/multiarch/strcmp-i686.S new file mode 100644 index 0000000..ce07410 --- /dev/null +++ b/sysdeps/i386/multiarch/strcmp-i686.S @@ -0,0 +1,16 @@ +#include +#ifdef SHARED +# define strcmp __strcmp_i686 +# undef libc_hidden_builtin_def +# define libc_hidden_builtin_def(name) +#endif + +#if SUPPORT_I686 || defined SHARED +#include +#endif + +#if defined SHARED && SUPPORT_I686 + .globl __GI_strcmp + .hidden __GI_strcmp + __GI_strcmp = __strcmp_i686 +#endif diff --git a/sysdeps/i386/multiarch/strcmp-sse4.S b/sysdeps/i386/multiarch/strcmp-sse4.S new file mode 100644 index 0000000..38c2317 --- /dev/null +++ b/sysdeps/i386/multiarch/strcmp-sse4.S @@ -0,0 +1,804 @@ +/* strcmp with SSE4.2 + Copyright (C) 2010-2015 Free Software Foundation, Inc. + Contributed by Intel Corporation. + This file is part of the GNU C Library. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library; if not, see + . */ + +#if IS_IN (libc) + +#include +#include "asm-syntax.h" + +#define CFI_PUSH(REG) \ + cfi_adjust_cfa_offset (4); \ + cfi_rel_offset (REG, 0) + +#define CFI_POP(REG) \ + cfi_adjust_cfa_offset (-4); \ + cfi_restore (REG) + +#define PUSH(REG) pushl REG; CFI_PUSH (REG) +#define POP(REG) popl REG; CFI_POP (REG) + +#ifdef USE_AS_STRNCMP +# ifndef STRCMP +# define STRCMP __strncmp_sse4_2 +# endif +# define STR1 8 +# define STR2 STR1+4 +# define CNT STR2+4 +# define RETURN POP (REM); ret; .p2align 4; CFI_PUSH (REM) +# define REM %ebp +#elif defined USE_AS_STRCASECMP_L +# include "locale-defines.h" +# ifndef STRCMP +# define STRCMP __strcasecmp_l_sse4_2 +# endif +# ifdef PIC +# define STR1 12 +# else +# define STR1 8 +# endif +# define STR2 STR1+4 +# define LOCALE 12 /* Loaded before the adjustment. */ +# ifdef PIC +# define RETURN POP (%edi); POP (%ebx); ret; \ + .p2align 4; CFI_PUSH (%ebx); CFI_PUSH (%edi) +# else +# define RETURN POP (%edi); ret; .p2align 4; CFI_PUSH (%edi) +# endif +# define NONASCII __strcasecmp_nonascii +#elif defined USE_AS_STRNCASECMP_L +# include "locale-defines.h" +# ifndef STRCMP +# define STRCMP __strncasecmp_l_sse4_2 +# endif +# ifdef PIC +# define STR1 16 +# else +# define STR1 12 +# endif +# define STR2 STR1+4 +# define CNT STR2+4 +# define LOCALE 16 /* Loaded before the adjustment. */ +# ifdef PIC +# define RETURN POP (%edi); POP (REM); POP (%ebx); ret; \ + .p2align 4; \ + CFI_PUSH (%ebx); CFI_PUSH (REM); CFI_PUSH (%edi) +# else +# define RETURN POP (%edi); POP (REM); ret; \ + .p2align 4; CFI_PUSH (REM); CFI_PUSH (%edi) +# endif +# define REM %ebp +# define NONASCII __strncasecmp_nonascii +#else +# ifndef STRCMP +# define STRCMP __strcmp_sse4_2 +# endif +# define STR1 4 +# define STR2 STR1+4 +# define RETURN ret; .p2align 4 +#endif + + .section .text.sse4.2,"ax",@progbits + +#ifdef USE_AS_STRCASECMP_L +ENTRY (__strcasecmp_sse4_2) +# ifdef PIC + PUSH (%ebx) + LOAD_PIC_REG(bx) + movl __libc_tsd_LOCALE@GOTNTPOFF(%ebx), %eax +# ifdef NO_TLS_DIRECT_SEG_REFS + addl %gs:0, %eax + movl (%eax), %eax +# else + movl %gs:(%eax), %eax +# endif +# else +# ifdef NO_TLS_DIRECT_SEG_REFS + movl %gs:0, %eax + movl __libc_tsd_LOCALE@NTPOFF(%eax), %eax +# else + movl %gs:__libc_tsd_LOCALE@NTPOFF, %eax +# endif +# endif +# if LOCALE_T___LOCALES != 0 || LC_CTYPE != 0 + movl LOCALE_T___LOCALES+LC_CTYPE*4(%eax), %eax +# else + movl (%eax), %eax +# endif + testl $1, LOCALE_DATA_VALUES+_NL_CTYPE_NONASCII_CASE*SIZEOF_VALUES(%eax) +# ifdef PIC + je L(ascii) + POP (%ebx) + jmp __strcasecmp_nonascii +# else + jne __strcasecmp_nonascii + jmp L(ascii) +# endif +END (__strcasecmp_sse4_2) +#endif + +#ifdef USE_AS_STRNCASECMP_L +ENTRY (__strncasecmp_sse4_2) +# ifdef PIC + PUSH (%ebx) + LOAD_PIC_REG(bx) + movl __libc_tsd_LOCALE@GOTNTPOFF(%ebx), %eax +# ifdef NO_TLS_DIRECT_SEG_REFS + addl %gs:0, %eax + movl (%eax), %eax +# else + movl %gs:(%eax), %eax +# endif +# else +# ifdef NO_TLS_DIRECT_SEG_REFS + movl %gs:0, %eax + movl __libc_tsd_LOCALE@NTPOFF(%eax), %eax +# else + movl %gs:__libc_tsd_LOCALE@NTPOFF, %eax +# endif +# endif +# if LOCALE_T___LOCALES != 0 || LC_CTYPE != 0 + movl LOCALE_T___LOCALES+LC_CTYPE*4(%eax), %eax +# else + movl (%eax), %eax +# endif + testl $1, LOCALE_DATA_VALUES+_NL_CTYPE_NONASCII_CASE*SIZEOF_VALUES(%eax) +# ifdef PIC + je L(ascii) + POP (%ebx) + jmp __strncasecmp_nonascii +# else + jne __strncasecmp_nonascii + jmp L(ascii) +# endif +END (__strncasecmp_sse4_2) +#endif + + ENTRY (STRCMP) +#if defined USE_AS_STRCASECMP_L || defined USE_AS_STRNCASECMP_L + movl LOCALE(%esp), %eax +# if LOCALE_T___LOCALES != 0 || LC_CTYPE != 0 + movl LOCALE_T___LOCALES+LC_CTYPE*4(%eax), %eax +# else + movl (%eax), %eax +# endif + testl $1, LOCALE_DATA_VALUES+_NL_CTYPE_NONASCII_CASE*SIZEOF_VALUES(%eax) + jne NONASCII + +# ifdef PIC + PUSH (%ebx) + LOAD_PIC_REG(bx) +# endif +L(ascii): + .section .rodata.cst16,"aM",@progbits,16 + .align 16 +.Lbelowupper: + .quad 0x4040404040404040 + .quad 0x4040404040404040 +.Ltopupper: + .quad 0x5b5b5b5b5b5b5b5b + .quad 0x5b5b5b5b5b5b5b5b +.Ltouppermask: + .quad 0x2020202020202020 + .quad 0x2020202020202020 + .previous + +# ifdef PIC +# define UCLOW_reg .Lbelowupper@GOTOFF(%ebx) +# define UCHIGH_reg .Ltopupper@GOTOFF(%ebx) +# define LCQWORD_reg .Ltouppermask@GOTOFF(%ebx) +# else +# define UCLOW_reg .Lbelowupper +# define UCHIGH_reg .Ltopupper +# define LCQWORD_reg .Ltouppermask +# endif +#endif + +#if defined USE_AS_STRNCMP || defined USE_AS_STRNCASECMP_L + PUSH (REM) +#endif +#if defined USE_AS_STRCASECMP_L || defined USE_AS_STRNCASECMP_L + PUSH (%edi) +#endif + mov STR1(%esp), %edx + mov STR2(%esp), %eax +#if defined USE_AS_STRNCMP || defined USE_AS_STRNCASECMP_L + movl CNT(%esp), REM + test REM, REM + je L(eq) +#endif + mov %dx, %cx + and $0xfff, %cx + cmp $0xff0, %cx + ja L(first4bytes) + movdqu (%edx), %xmm2 + mov %eax, %ecx + and $0xfff, %ecx + cmp $0xff0, %ecx + ja L(first4bytes) +#if defined USE_AS_STRCASECMP_L || defined USE_AS_STRNCASECMP_L +# define TOLOWER(reg1, reg2) \ + movdqa reg1, %xmm3; \ + movdqa UCHIGH_reg, %xmm4; \ + movdqa reg2, %xmm5; \ + movdqa UCHIGH_reg, %xmm6; \ + pcmpgtb UCLOW_reg, %xmm3; \ + pcmpgtb reg1, %xmm4; \ + pcmpgtb UCLOW_reg, %xmm5; \ + pcmpgtb reg2, %xmm6; \ + pand %xmm4, %xmm3; \ + pand %xmm6, %xmm5; \ + pand LCQWORD_reg, %xmm3; \ + pand LCQWORD_reg, %xmm5; \ + por %xmm3, reg1; \ + por %xmm5, reg2 + + movdqu (%eax), %xmm1 + TOLOWER (%xmm2, %xmm1) + movd %xmm2, %ecx + movd %xmm1, %edi + movdqa %xmm2, %xmm3 + movdqa %xmm1, %xmm4 + cmpl %edi, %ecx +#else +# define TOLOWER(reg1, reg) + + movd %xmm2, %ecx + cmp (%eax), %ecx +#endif + jne L(less4bytes) +#if !defined USE_AS_STRCASECMP_L && !defined USE_AS_STRNCASECMP_L + movdqu (%eax), %xmm1 +#endif + pxor %xmm2, %xmm1 + pxor %xmm0, %xmm0 + ptest %xmm1, %xmm0 + jnc L(less16bytes) + pcmpeqb %xmm0, %xmm2 + ptest %xmm2, %xmm0 + jnc L(less16bytes) + +#if defined USE_AS_STRNCMP || defined USE_AS_STRNCASECMP_L + sub $16, REM + jbe L(eq) +#endif + add $16, %edx + add $16, %eax +L(first4bytes): + movzbl (%eax), %ecx +#if defined USE_AS_STRCASECMP_L || defined USE_AS_STRNCASECMP_L + movzbl (%edx), %edi +# ifdef PIC + movl _nl_C_LC_CTYPE_tolower@GOTOFF+128*4(%ebx,%ecx,4), %ecx + movl _nl_C_LC_CTYPE_tolower@GOTOFF+128*4(%ebx,%edi,4), %edi +# else + movl _nl_C_LC_CTYPE_tolower+128*4(,%ecx,4), %ecx + movl _nl_C_LC_CTYPE_tolower+128*4(,%edi,4), %edi +# endif + cmpl %ecx, %edi +#else + cmpb %cl, (%edx) +#endif + jne L(neq) + cmpl $0, %ecx + je L(eq) + +#if defined USE_AS_STRNCMP || defined USE_AS_STRNCASECMP_L + cmp $1, REM + je L(eq) +#endif + + movzbl 1(%eax), %ecx +#if defined USE_AS_STRCASECMP_L || defined USE_AS_STRNCASECMP_L + movzbl 1(%edx), %edi +# ifdef PIC + movl _nl_C_LC_CTYPE_tolower@GOTOFF+128*4(%ebx,%ecx,4), %ecx + movl _nl_C_LC_CTYPE_tolower@GOTOFF+128*4(%ebx,%edi,4), %edi +# else + movl _nl_C_LC_CTYPE_tolower+128*4(,%ecx,4), %ecx + movl _nl_C_LC_CTYPE_tolower+128*4(,%edi,4), %edi +# endif + cmpl %ecx, %edi +#else + cmpb %cl, 1(%edx) +#endif + jne L(neq) + cmpl $0, %ecx + je L(eq) + +#if defined USE_AS_STRNCMP || defined USE_AS_STRNCASECMP_L + cmp $2, REM + je L(eq) +#endif + movzbl 2(%eax), %ecx +#if defined USE_AS_STRCASECMP_L || defined USE_AS_STRNCASECMP_L + movzbl 2(%edx), %edi +# ifdef PIC + movl _nl_C_LC_CTYPE_tolower@GOTOFF+128*4(%ebx,%ecx,4), %ecx + movl _nl_C_LC_CTYPE_tolower@GOTOFF+128*4(%ebx,%edi,4), %edi +# else + movl _nl_C_LC_CTYPE_tolower+128*4(,%ecx,4), %ecx + movl _nl_C_LC_CTYPE_tolower+128*4(,%edi,4), %edi +# endif + cmpl %ecx, %edi +#else + cmpb %cl, 2(%edx) +#endif + jne L(neq) + cmpl $0, %ecx + je L(eq) + +#if defined USE_AS_STRNCMP || defined USE_AS_STRNCASECMP_L + cmp $3, REM + je L(eq) +#endif + movzbl 3(%eax), %ecx +#if defined USE_AS_STRCASECMP_L || defined USE_AS_STRNCASECMP_L + movzbl 3(%edx), %edi +# ifdef PIC + movl _nl_C_LC_CTYPE_tolower@GOTOFF+128*4(%ebx,%ecx,4), %ecx + movl _nl_C_LC_CTYPE_tolower@GOTOFF+128*4(%ebx,%edi,4), %edi +# else + movl _nl_C_LC_CTYPE_tolower+128*4(,%ecx,4), %ecx + movl _nl_C_LC_CTYPE_tolower+128*4(,%edi,4), %edi +# endif + cmpl %ecx, %edi +#else + cmpb %cl, 3(%edx) +#endif + jne L(neq) + cmpl $0, %ecx + je L(eq) + +#if defined USE_AS_STRNCMP || defined USE_AS_STRNCASECMP_L + cmp $4, REM + je L(eq) +#endif + movzbl 4(%eax), %ecx +#if defined USE_AS_STRCASECMP_L || defined USE_AS_STRNCASECMP_L + movzbl 4(%edx), %edi +# ifdef PIC + movl _nl_C_LC_CTYPE_tolower@GOTOFF+128*4(%ebx,%ecx,4), %ecx + movl _nl_C_LC_CTYPE_tolower@GOTOFF+128*4(%ebx,%edi,4), %edi +# else + movl _nl_C_LC_CTYPE_tolower+128*4(,%ecx,4), %ecx + movl _nl_C_LC_CTYPE_tolower+128*4(,%edi,4), %edi +# endif + cmpl %ecx, %edi +#else + cmpb %cl, 4(%edx) +#endif + jne L(neq) + cmpl $0, %ecx + je L(eq) + +#if defined USE_AS_STRNCMP || defined USE_AS_STRNCASECMP_L + cmp $5, REM + je L(eq) +#endif + movzbl 5(%eax), %ecx +#if defined USE_AS_STRCASECMP_L || defined USE_AS_STRNCASECMP_L + movzbl 5(%edx), %edi +# ifdef PIC + movl _nl_C_LC_CTYPE_tolower@GOTOFF+128*4(%ebx,%ecx,4), %ecx + movl _nl_C_LC_CTYPE_tolower@GOTOFF+128*4(%ebx,%edi,4), %edi +# else + movl _nl_C_LC_CTYPE_tolower+128*4(,%ecx,4), %ecx + movl _nl_C_LC_CTYPE_tolower+128*4(,%edi,4), %edi +# endif + cmpl %ecx, %edi +#else + cmpb %cl, 5(%edx) +#endif + jne L(neq) + cmpl $0, %ecx + je L(eq) + +#if defined USE_AS_STRNCMP || defined USE_AS_STRNCASECMP_L + cmp $6, REM + je L(eq) +#endif + movzbl 6(%eax), %ecx +#if defined USE_AS_STRCASECMP_L || defined USE_AS_STRNCASECMP_L + movzbl 6(%edx), %edi +# ifdef PIC + movl _nl_C_LC_CTYPE_tolower@GOTOFF+128*4(%ebx,%ecx,4), %ecx + movl _nl_C_LC_CTYPE_tolower@GOTOFF+128*4(%ebx,%edi,4), %edi +# else + movl _nl_C_LC_CTYPE_tolower+128*4(,%ecx,4), %ecx + movl _nl_C_LC_CTYPE_tolower+128*4(,%edi,4), %edi +# endif + cmpl %ecx, %edi +#else + cmpb %cl, 6(%edx) +#endif + jne L(neq) + cmpl $0, %ecx + je L(eq) + +#if defined USE_AS_STRNCMP || defined USE_AS_STRNCASECMP_L + cmp $7, REM + je L(eq) +#endif + movzbl 7(%eax), %ecx +#if defined USE_AS_STRCASECMP_L || defined USE_AS_STRNCASECMP_L + movzbl 7(%edx), %edi +# ifdef PIC + movl _nl_C_LC_CTYPE_tolower@GOTOFF+128*4(%ebx,%ecx,4), %ecx + movl _nl_C_LC_CTYPE_tolower@GOTOFF+128*4(%ebx,%edi,4), %edi +# else + movl _nl_C_LC_CTYPE_tolower+128*4(,%ecx,4), %ecx + movl _nl_C_LC_CTYPE_tolower+128*4(,%edi,4), %edi +# endif + cmpl %ecx, %edi +#else + cmpb %cl, 7(%edx) +#endif + jne L(neq) + cmpl $0, %ecx + je L(eq) + +#if defined USE_AS_STRNCMP || defined USE_AS_STRNCASECMP_L + sub $8, REM + je L(eq) +#endif + add $8, %eax + add $8, %edx + +#if !defined USE_AS_STRCASECMP_L && !defined USE_AS_STRNCASECMP_L + PUSH (%edi) +#endif + PUSH (%esi) +#if defined USE_AS_STRNCMP || defined USE_AS_STRNCASECMP_L + cfi_remember_state +#endif + mov %edx, %edi + mov %eax, %esi + xorl %eax, %eax +L(check_offset): + movl %edi, %edx + movl %esi, %ecx + andl $0xfff, %edx + andl $0xfff, %ecx + cmpl %edx, %ecx + cmovl %edx, %ecx + lea -0xff0(%ecx), %edx + sub %edx, %edi + sub %edx, %esi + testl %edx, %edx + jg L(crosspage) +L(loop): + movdqu (%esi,%edx), %xmm2 + movdqu (%edi,%edx), %xmm1 + TOLOWER (%xmm2, %xmm1) + pcmpistri $0x1a, %xmm2, %xmm1 + jbe L(end) + +#if defined USE_AS_STRNCMP || defined USE_AS_STRNCASECMP_L + sub $16, REM + jbe L(more16byteseq) +#endif + + add $16, %edx + jle L(loop) +L(crosspage): + movzbl (%edi,%edx), %eax + movzbl (%esi,%edx), %ecx +#if defined USE_AS_STRCASECMP_L || defined USE_AS_STRNCASECMP_L +# ifdef PIC + movl _nl_C_LC_CTYPE_tolower@GOTOFF+128*4(%ebx,%eax,4), %eax + movl _nl_C_LC_CTYPE_tolower@GOTOFF+128*4(%ebx,%ecx,4), %ecx +# else + movl _nl_C_LC_CTYPE_tolower+128*4(,%eax,4), %eax + movl _nl_C_LC_CTYPE_tolower+128*4(,%ecx,4), %ecx +# endif +#endif + subl %ecx, %eax + jne L(ret) + testl %ecx, %ecx + je L(ret) +#if defined USE_AS_STRNCMP || defined USE_AS_STRNCASECMP_L + sub $1, REM + jbe L(more16byteseq) +#endif + inc %edx + cmp $15, %edx + jle L(crosspage) + add %edx, %edi + add %edx, %esi + jmp L(check_offset) + + .p2align 4 +L(end): + jnc L(ret) +#if defined USE_AS_STRNCMP || defined USE_AS_STRNCASECMP_L + sub %ecx, REM + jbe L(more16byteseq) +#endif + lea (%ecx,%edx), %ecx + movzbl (%edi,%ecx), %eax + movzbl (%esi,%ecx), %ecx +#if defined USE_AS_STRCASECMP_L || defined USE_AS_STRNCASECMP_L +# ifdef PIC + movl _nl_C_LC_CTYPE_tolower@GOTOFF+128*4(%ebx,%eax,4), %eax + movl _nl_C_LC_CTYPE_tolower@GOTOFF+128*4(%ebx,%ecx,4), %ecx +# else + movl _nl_C_LC_CTYPE_tolower+128*4(,%eax,4), %eax + movl _nl_C_LC_CTYPE_tolower+128*4(,%ecx,4), %ecx +# endif +#endif + subl %ecx, %eax +L(ret): + POP (%esi) + POP (%edi) +#if defined USE_AS_STRNCMP || defined USE_AS_STRNCASECMP_L + POP (REM) +#endif +#if defined USE_AS_STRCASECMP_L || defined USE_AS_STRNCASECMP_L +# ifdef PIC + POP (%ebx) +# endif +#endif + ret + + .p2align 4 +#if defined USE_AS_STRNCMP || defined USE_AS_STRNCASECMP_L + cfi_restore_state +L(more16byteseq): + POP (%esi) +# ifdef USE_AS_STRNCMP + POP (%edi) +# endif +#endif +L(eq): + xorl %eax, %eax + RETURN + +L(neq): + mov $1, %eax + ja L(neq_bigger) + neg %eax +L(neq_bigger): + RETURN + +L(less16bytes): + add $0xfefefeff, %ecx + jnc L(less4bytes) +#if defined USE_AS_STRCASECMP_L || defined USE_AS_STRNCASECMP_L + movd %xmm3, %edi + xor %edi, %ecx +#else + xor (%edx), %ecx +#endif + or $0xfefefeff, %ecx + add $1, %ecx + jnz L(less4bytes) + +#if defined USE_AS_STRNCMP || defined USE_AS_STRNCASECMP_L + cmp $4, REM + jbe L(eq) +#endif +#if defined USE_AS_STRCASECMP_L || defined USE_AS_STRNCASECMP_L + psrldq $4, %xmm3 + psrldq $4, %xmm4 + movd %xmm3, %ecx + movd %xmm4, %edi + cmp %edi, %ecx + mov %ecx, %edi +#else + mov 4(%edx), %ecx + cmp 4(%eax), %ecx +#endif + jne L(more4bytes) + add $0xfefefeff, %ecx + jnc L(more4bytes) +#if defined USE_AS_STRCASECMP_L || defined USE_AS_STRNCASECMP_L + xor %edi, %ecx +#else + xor 4(%edx), %ecx +#endif + or $0xfefefeff, %ecx + add $1, %ecx + jnz L(more4bytes) + +#if defined USE_AS_STRNCMP || defined USE_AS_STRNCASECMP_L + sub $8, REM + jbe L(eq) +#endif + + add $8, %edx + add $8, %eax +L(less4bytes): + + movzbl (%eax), %ecx +#if defined USE_AS_STRCASECMP_L || defined USE_AS_STRNCASECMP_L + movzbl (%edx), %edi +# ifdef PIC + movl _nl_C_LC_CTYPE_tolower@GOTOFF+128*4(%ebx,%ecx,4), %ecx + movl _nl_C_LC_CTYPE_tolower@GOTOFF+128*4(%ebx,%edi,4), %edi +# else + movl _nl_C_LC_CTYPE_tolower+128*4(,%ecx,4), %ecx + movl _nl_C_LC_CTYPE_tolower+128*4(,%edi,4), %edi +# endif + cmpl %ecx, %edi +#else + cmpb %cl, (%edx) +#endif + jne L(neq) + cmpl $0, %ecx + je L(eq) + +#if defined USE_AS_STRNCMP || defined USE_AS_STRNCASECMP_L + cmp $1, REM + je L(eq) +#endif + movzbl 1(%eax), %ecx +#if defined USE_AS_STRCASECMP_L || defined USE_AS_STRNCASECMP_L + movzbl 1(%edx), %edi +# ifdef PIC + movl _nl_C_LC_CTYPE_tolower@GOTOFF+128*4(%ebx,%ecx,4), %ecx + movl _nl_C_LC_CTYPE_tolower@GOTOFF+128*4(%ebx,%edi,4), %edi +# else + movl _nl_C_LC_CTYPE_tolower+128*4(,%ecx,4), %ecx + movl _nl_C_LC_CTYPE_tolower+128*4(,%edi,4), %edi +# endif + cmpl %ecx, %edi +#else + cmpb %cl, 1(%edx) +#endif + jne L(neq) + cmpl $0, %ecx + je L(eq) + +#if defined USE_AS_STRNCMP || defined USE_AS_STRNCASECMP_L + cmp $2, REM + je L(eq) +#endif + + movzbl 2(%eax), %ecx +#if defined USE_AS_STRCASECMP_L || defined USE_AS_STRNCASECMP_L + movzbl 2(%edx), %edi +# ifdef PIC + movl _nl_C_LC_CTYPE_tolower@GOTOFF+128*4(%ebx,%ecx,4), %ecx + movl _nl_C_LC_CTYPE_tolower@GOTOFF+128*4(%ebx,%edi,4), %edi +# else + movl _nl_C_LC_CTYPE_tolower+128*4(,%ecx,4), %ecx + movl _nl_C_LC_CTYPE_tolower+128*4(,%edi,4), %edi +# endif + cmpl %ecx, %edi +#else + cmpb %cl, 2(%edx) +#endif + jne L(neq) + cmpl $0, %ecx + je L(eq) + +#if defined USE_AS_STRNCMP || defined USE_AS_STRNCASECMP_L + cmp $3, REM + je L(eq) +#endif + movzbl 3(%eax), %ecx +#if defined USE_AS_STRCASECMP_L || defined USE_AS_STRNCASECMP_L + movzbl 3(%edx), %edi +# ifdef PIC + movl _nl_C_LC_CTYPE_tolower@GOTOFF+128*4(%ebx,%ecx,4), %ecx + movl _nl_C_LC_CTYPE_tolower@GOTOFF+128*4(%ebx,%edi,4), %edi +# else + movl _nl_C_LC_CTYPE_tolower+128*4(,%ecx,4), %ecx + movl _nl_C_LC_CTYPE_tolower+128*4(,%edi,4), %edi +# endif + cmpl %ecx, %edi +#else + cmpb %cl, 3(%edx) +#endif + jne L(neq) + cmpl $0, %ecx + je L(eq) + +L(more4bytes): +#if defined USE_AS_STRNCMP || defined USE_AS_STRNCASECMP_L + cmp $4, REM + je L(eq) +#endif + movzbl 4(%eax), %ecx +#if defined USE_AS_STRCASECMP_L || defined USE_AS_STRNCASECMP_L + movzbl 4(%edx), %edi +# ifdef PIC + movl _nl_C_LC_CTYPE_tolower@GOTOFF+128*4(%ebx,%ecx,4), %ecx + movl _nl_C_LC_CTYPE_tolower@GOTOFF+128*4(%ebx,%edi,4), %edi +# else + movl _nl_C_LC_CTYPE_tolower+128*4(,%ecx,4), %ecx + movl _nl_C_LC_CTYPE_tolower+128*4(,%edi,4), %edi +# endif + cmpl %ecx, %edi +#else + cmpb %cl, 4(%edx) +#endif + jne L(neq) + cmpl $0, %ecx + je L(eq) + + +#if defined USE_AS_STRNCMP || defined USE_AS_STRNCASECMP_L + cmp $5, REM + je L(eq) +#endif + movzbl 5(%eax), %ecx +#if defined USE_AS_STRCASECMP_L || defined USE_AS_STRNCASECMP_L + movzbl 5(%edx), %edi +# ifdef PIC + movl _nl_C_LC_CTYPE_tolower@GOTOFF+128*4(%ebx,%ecx,4), %ecx + movl _nl_C_LC_CTYPE_tolower@GOTOFF+128*4(%ebx,%edi,4), %edi +# else + movl _nl_C_LC_CTYPE_tolower+128*4(,%ecx,4), %ecx + movl _nl_C_LC_CTYPE_tolower+128*4(,%edi,4), %edi +# endif + cmpl %ecx, %edi +#else + cmpb %cl, 5(%edx) +#endif + jne L(neq) + cmpl $0, %ecx + je L(eq) + +#if defined USE_AS_STRNCMP || defined USE_AS_STRNCASECMP_L + cmp $6, REM + je L(eq) +#endif + movzbl 6(%eax), %ecx +#if defined USE_AS_STRCASECMP_L || defined USE_AS_STRNCASECMP_L + movzbl 6(%edx), %edi +# ifdef PIC + movl _nl_C_LC_CTYPE_tolower@GOTOFF+128*4(%ebx,%ecx,4), %ecx + movl _nl_C_LC_CTYPE_tolower@GOTOFF+128*4(%ebx,%edi,4), %edi +# else + movl _nl_C_LC_CTYPE_tolower+128*4(,%ecx,4), %ecx + movl _nl_C_LC_CTYPE_tolower+128*4(,%edi,4), %edi +# endif + cmpl %ecx, %edi +#else + cmpb %cl, 6(%edx) +#endif + jne L(neq) + cmpl $0, %ecx + je L(eq) + +#if defined USE_AS_STRNCMP || defined USE_AS_STRNCASECMP_L + cmp $7, REM + je L(eq) +#endif + movzbl 7(%eax), %ecx +#if defined USE_AS_STRCASECMP_L || defined USE_AS_STRNCASECMP_L + movzbl 7(%edx), %edi +# ifdef PIC + movl _nl_C_LC_CTYPE_tolower@GOTOFF+128*4(%ebx,%ecx,4), %ecx + movl _nl_C_LC_CTYPE_tolower@GOTOFF+128*4(%ebx,%edi,4), %edi +# else + movl _nl_C_LC_CTYPE_tolower+128*4(,%ecx,4), %ecx + movl _nl_C_LC_CTYPE_tolower+128*4(,%edi,4), %edi +# endif + cmpl %ecx, %edi +#else + cmpb %cl, 7(%edx) +#endif + jne L(neq) + jmp L(eq) + +END (STRCMP) + +#endif diff --git a/sysdeps/i386/multiarch/strcmp-ssse3.S b/sysdeps/i386/multiarch/strcmp-ssse3.S new file mode 100644 index 0000000..fb21288 --- /dev/null +++ b/sysdeps/i386/multiarch/strcmp-ssse3.S @@ -0,0 +1,2810 @@ +/* strcmp with SSSE3 + Copyright (C) 2010-2015 Free Software Foundation, Inc. + Contributed by Intel Corporation. + This file is part of the GNU C Library. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library; if not, see + . */ + +#if IS_IN (libc) + +#include +#include "asm-syntax.h" + +#define CFI_PUSH(REG) \ + cfi_adjust_cfa_offset (4); \ + cfi_rel_offset (REG, 0) + +#define CFI_POP(REG) \ + cfi_adjust_cfa_offset (-4); \ + cfi_restore (REG) + +#define PUSH(REG) pushl REG; CFI_PUSH (REG) +#define POP(REG) popl REG; CFI_POP (REG) + +#ifdef USE_AS_STRNCMP +# ifndef STRCMP +# define STRCMP __strncmp_ssse3 +# endif +# define STR1 8 +# define STR2 STR1+4 +# define CNT STR2+4 +# define RETURN POP (REM); ret; .p2align 4; CFI_PUSH (REM) +# define UPDATE_STRNCMP_COUNTER \ + /* calculate left number to compare */ \ + mov $16, %esi; \ + sub %ecx, %esi; \ + cmp %esi, REM; \ + jbe L(more8byteseq); \ + sub %esi, REM +# define FLAGS %ebx +# define REM %ebp +#elif defined USE_AS_STRCASECMP_L +# include "locale-defines.h" +# ifndef STRCMP +# define STRCMP __strcasecmp_l_ssse3 +# endif +# ifdef PIC +# define STR1 8 +# else +# define STR1 4 +# endif +# define STR2 STR1+4 +# define LOCALE 12 /* Loaded before the adjustment. */ +# ifdef PIC +# define RETURN POP (%ebx); ret; .p2align 4; CFI_PUSH (%ebx) +# else +# define RETURN ret; .p2align 4 +# endif +# define UPDATE_STRNCMP_COUNTER +# define FLAGS (%esp) +# define NONASCII __strcasecmp_nonascii +#elif defined USE_AS_STRNCASECMP_L +# include "locale-defines.h" +# ifndef STRCMP +# define STRCMP __strncasecmp_l_ssse3 +# endif +# ifdef PIC +# define STR1 12 +# else +# define STR1 8 +# endif +# define STR2 STR1+4 +# define CNT STR2+4 +# define LOCALE 16 /* Loaded before the adjustment. */ +# ifdef PIC +# define RETURN POP (REM); POP (%ebx); ret; \ + .p2align 4; CFI_PUSH (%ebx); CFI_PUSH (REM) +# else +# define RETURN POP (REM); ret; .p2align 4; CFI_PUSH (REM) +# endif +# define UPDATE_STRNCMP_COUNTER \ + /* calculate left number to compare */ \ + mov $16, %esi; \ + sub %ecx, %esi; \ + cmp %esi, REM; \ + jbe L(more8byteseq); \ + sub %esi, REM +# define FLAGS (%esp) +# define REM %ebp +# define NONASCII __strncasecmp_nonascii +#else +# ifndef STRCMP +# define STRCMP __strcmp_ssse3 +# endif +# define STR1 4 +# define STR2 STR1+4 +# define RETURN ret; .p2align 4 +# define UPDATE_STRNCMP_COUNTER +# define FLAGS %ebx +#endif + + .section .text.ssse3,"ax",@progbits + +#ifdef USE_AS_STRCASECMP_L +ENTRY (__strcasecmp_ssse3) +# ifdef PIC + PUSH (%ebx) + LOAD_PIC_REG(bx) + movl __libc_tsd_LOCALE@GOTNTPOFF(%ebx), %eax +# ifdef NO_TLS_DIRECT_SEG_REFS + addl %gs:0, %eax + movl (%eax), %eax +# else + movl %gs:(%eax), %eax +# endif +# else +# ifdef NO_TLS_DIRECT_SEG_REFS + movl %gs:0, %eax + movl __libc_tsd_LOCALE@NTPOFF(%eax), %eax +# else + movl %gs:__libc_tsd_LOCALE@NTPOFF, %eax +# endif +# endif +# if LOCALE_T___LOCALES != 0 || LC_CTYPE != 0 + movl LOCALE_T___LOCALES+LC_CTYPE*4(%eax), %eax +# else + movl (%eax), %eax +# endif + testl $1, LOCALE_DATA_VALUES+_NL_CTYPE_NONASCII_CASE*SIZEOF_VALUES(%eax) +# ifdef PIC + je L(ascii) + POP (%ebx) + jmp __strcasecmp_nonascii +# else + jne __strcasecmp_nonascii + jmp L(ascii) +# endif +END (__strcasecmp_ssse3) +#endif + +#ifdef USE_AS_STRNCASECMP_L +ENTRY (__strncasecmp_ssse3) +# ifdef PIC + PUSH (%ebx) + LOAD_PIC_REG(bx) + movl __libc_tsd_LOCALE@GOTNTPOFF(%ebx), %eax +# ifdef NO_TLS_DIRECT_SEG_REFS + addl %gs:0, %eax + movl (%eax), %eax +# else + movl %gs:(%eax), %eax +# endif +# else +# ifdef NO_TLS_DIRECT_SEG_REFS + movl %gs:0, %eax + movl __libc_tsd_LOCALE@NTPOFF(%eax), %eax +# else + movl %gs:__libc_tsd_LOCALE@NTPOFF, %eax +# endif +# endif +# if LOCALE_T___LOCALES != 0 || LC_CTYPE != 0 + movl LOCALE_T___LOCALES+LC_CTYPE*4(%eax), %eax +# else + movl (%eax), %eax +# endif + testl $1, LOCALE_DATA_VALUES+_NL_CTYPE_NONASCII_CASE*SIZEOF_VALUES(%eax) +# ifdef PIC + je L(ascii) + POP (%ebx) + jmp __strncasecmp_nonascii +# else + jne __strncasecmp_nonascii + jmp L(ascii) +# endif +END (__strncasecmp_ssse3) +#endif + +ENTRY (STRCMP) +#if defined USE_AS_STRCASECMP_L || defined USE_AS_STRNCASECMP_L + movl LOCALE(%esp), %eax +# if LOCALE_T___LOCALES != 0 || LC_CTYPE != 0 + movl LOCALE_T___LOCALES+LC_CTYPE*4(%eax), %eax +# else + movl (%eax), %eax +# endif + testl $1, LOCALE_DATA_VALUES+_NL_CTYPE_NONASCII_CASE*SIZEOF_VALUES(%eax) + jne NONASCII + +# ifdef PIC + PUSH (%ebx) + LOAD_PIC_REG(bx) +# endif +L(ascii): + .section .rodata.cst16,"aM",@progbits,16 + .align 16 +.Lbelowupper: + .quad 0x4040404040404040 + .quad 0x4040404040404040 +.Ltopupper: + .quad 0x5b5b5b5b5b5b5b5b + .quad 0x5b5b5b5b5b5b5b5b +.Ltouppermask: + .quad 0x2020202020202020 + .quad 0x2020202020202020 + .previous + +# ifdef PIC +# define UCLOW_reg .Lbelowupper@GOTOFF(%ebx) +# define UCHIGH_reg .Ltopupper@GOTOFF(%ebx) +# define LCQWORD_reg .Ltouppermask@GOTOFF(%ebx) +# else +# define UCLOW_reg .Lbelowupper +# define UCHIGH_reg .Ltopupper +# define LCQWORD_reg .Ltouppermask +# endif +#endif + +#if defined USE_AS_STRNCMP || defined USE_AS_STRNCASECMP_L + PUSH (REM) +#endif + + movl STR1(%esp), %edx + movl STR2(%esp), %eax +#if defined USE_AS_STRNCMP || defined USE_AS_STRNCASECMP_L + movl CNT(%esp), REM + cmp $16, REM + jb L(less16bytes_sncmp) +#elif !defined USE_AS_STRCASECMP_L + movzbl (%eax), %ecx + cmpb %cl, (%edx) + jne L(neq) + cmpl $0, %ecx + je L(eq) + + movzbl 1(%eax), %ecx + cmpb %cl, 1(%edx) + jne L(neq) + cmpl $0, %ecx + je L(eq) + + movzbl 2(%eax), %ecx + cmpb %cl, 2(%edx) + jne L(neq) + cmpl $0, %ecx + je L(eq) + + movzbl 3(%eax), %ecx + cmpb %cl, 3(%edx) + jne L(neq) + cmpl $0, %ecx + je L(eq) + + movzbl 4(%eax), %ecx + cmpb %cl, 4(%edx) + jne L(neq) + cmpl $0, %ecx + je L(eq) + + movzbl 5(%eax), %ecx + cmpb %cl, 5(%edx) + jne L(neq) + cmpl $0, %ecx + je L(eq) + + movzbl 6(%eax), %ecx + cmpb %cl, 6(%edx) + jne L(neq) + cmpl $0, %ecx + je L(eq) + + movzbl 7(%eax), %ecx + cmpb %cl, 7(%edx) + jne L(neq) + cmpl $0, %ecx + je L(eq) + + add $8, %edx + add $8, %eax +#endif + movl %edx, %ecx + and $0xfff, %ecx + cmp $0xff0, %ecx + ja L(crosspage) + mov %eax, %ecx + and $0xfff, %ecx + cmp $0xff0, %ecx + ja L(crosspage) + pxor %xmm0, %xmm0 + movlpd (%eax), %xmm1 + movlpd (%edx), %xmm2 + movhpd 8(%eax), %xmm1 + movhpd 8(%edx), %xmm2 +#if defined USE_AS_STRCASECMP_L || defined USE_AS_STRNCASECMP_L +# define TOLOWER(reg1, reg2) \ + movdqa reg1, %xmm5; \ + movdqa reg2, %xmm7; \ + movdqa UCHIGH_reg, %xmm6; \ + pcmpgtb UCLOW_reg, %xmm5; \ + pcmpgtb UCLOW_reg, %xmm7; \ + pcmpgtb reg1, %xmm6; \ + pand %xmm6, %xmm5; \ + movdqa UCHIGH_reg, %xmm6; \ + pcmpgtb reg2, %xmm6; \ + pand %xmm6, %xmm7; \ + pand LCQWORD_reg, %xmm5; \ + por %xmm5, reg1; \ + pand LCQWORD_reg, %xmm7; \ + por %xmm7, reg2 + TOLOWER (%xmm1, %xmm2) +#else +# define TOLOWER(reg1, reg2) +#endif + pcmpeqb %xmm1, %xmm0 + pcmpeqb %xmm2, %xmm1 + psubb %xmm0, %xmm1 + pmovmskb %xmm1, %ecx + sub $0xffff, %ecx + jnz L(less16bytes) +#if defined USE_AS_STRNCMP || defined USE_AS_STRNCASECMP_L + cmp $16, REM + lea -16(REM), REM + jbe L(eq) +#endif + add $16, %eax + add $16, %edx + +L(crosspage): + +#if !defined USE_AS_STRCASECMP_L && !defined USE_AS_STRNCASECMP_L + PUSH (FLAGS) +#endif + PUSH (%edi) + PUSH (%esi) +#if defined USE_AS_STRCASECMP_L || defined USE_AS_STRNCASECMP_L + pushl $0 + cfi_adjust_cfa_offset (4) +#endif +#if defined USE_AS_STRNCMP || defined USE_AS_STRNCASECMP_L + cfi_remember_state +#endif + + movl %edx, %edi + movl %eax, %ecx + and $0xf, %ecx + and $0xf, %edi + xor %ecx, %eax + xor %edi, %edx +#if !defined USE_AS_STRCASECMP_L && !defined USE_AS_STRNCASECMP_L + xor FLAGS, FLAGS +#endif + cmp %edi, %ecx + je L(ashr_0) + ja L(bigger) + orl $0x20, FLAGS + xchg %edx, %eax + xchg %ecx, %edi +L(bigger): + lea 15(%edi), %edi + sub %ecx, %edi + cmp $8, %edi + jle L(ashr_less_8) + cmp $14, %edi + je L(ashr_15) + cmp $13, %edi + je L(ashr_14) + cmp $12, %edi + je L(ashr_13) + cmp $11, %edi + je L(ashr_12) + cmp $10, %edi + je L(ashr_11) + cmp $9, %edi + je L(ashr_10) +L(ashr_less_8): + je L(ashr_9) + cmp $7, %edi + je L(ashr_8) + cmp $6, %edi + je L(ashr_7) + cmp $5, %edi + je L(ashr_6) + cmp $4, %edi + je L(ashr_5) + cmp $3, %edi + je L(ashr_4) + cmp $2, %edi + je L(ashr_3) + cmp $1, %edi + je L(ashr_2) + cmp $0, %edi + je L(ashr_1) + +/* + * The following cases will be handled by ashr_0 + * ecx(offset of esi) eax(offset of edi) relative offset corresponding case + * n(0~15) n(0~15) 15(15+ n-n) ashr_0 + */ + .p2align 4 +L(ashr_0): + mov $0xffff, %esi + movdqa (%eax), %xmm1 + pxor %xmm0, %xmm0 + pcmpeqb %xmm1, %xmm0 +#if defined USE_AS_STRCASECMP_L || defined USE_AS_STRNCASECMP_L + movdqa (%edx), %xmm2 + TOLOWER (%xmm1, %xmm2) + pcmpeqb %xmm2, %xmm1 +#else + pcmpeqb (%edx), %xmm1 +#endif + psubb %xmm0, %xmm1 + pmovmskb %xmm1, %edi + shr %cl, %esi + shr %cl, %edi + sub %edi, %esi + mov %ecx, %edi + jne L(less32bytes) + UPDATE_STRNCMP_COUNTER + movl $0x10, FLAGS + mov $0x10, %ecx + pxor %xmm0, %xmm0 + .p2align 4 +L(loop_ashr_0): + movdqa (%eax, %ecx), %xmm1 +#if defined USE_AS_STRCASECMP_L || defined USE_AS_STRNCASECMP_L + movdqa (%edx, %ecx), %xmm2 + TOLOWER (%xmm1, %xmm2) + + pcmpeqb %xmm1, %xmm0 + pcmpeqb %xmm2, %xmm1 +#else + pcmpeqb %xmm1, %xmm0 + pcmpeqb (%edx, %ecx), %xmm1 +#endif + psubb %xmm0, %xmm1 + pmovmskb %xmm1, %esi + sub $0xffff, %esi + jnz L(exit) +#if defined USE_AS_STRNCMP || defined USE_AS_STRNCASECMP_L + cmp $16, REM + lea -16(REM), REM + jbe L(more8byteseq) +#endif + add $16, %ecx + jmp L(loop_ashr_0) + +/* + * The following cases will be handled by ashr_1 + * ecx(offset of esi) eax(offset of edi) relative offset corresponding case + * n(15) n -15 0(15 +(n-15) - n) ashr_1 + */ + .p2align 4 +L(ashr_1): + mov $0xffff, %esi + pxor %xmm0, %xmm0 + movdqa (%edx), %xmm2 + movdqa (%eax), %xmm1 + pcmpeqb %xmm1, %xmm0 + pslldq $15, %xmm2 + TOLOWER (%xmm1, %xmm2) + pcmpeqb %xmm1, %xmm2 + psubb %xmm0, %xmm2 + pmovmskb %xmm2, %edi + shr %cl, %esi + shr %cl, %edi + sub %edi, %esi + lea -15(%ecx), %edi + jnz L(less32bytes) + + UPDATE_STRNCMP_COUNTER + + movdqa (%edx), %xmm3 + pxor %xmm0, %xmm0 + mov $16, %ecx + orl $1, FLAGS + lea 1(%edx), %edi + and $0xfff, %edi + sub $0x1000, %edi + + .p2align 4 +L(loop_ashr_1): + add $16, %edi + jg L(nibble_ashr_1) + +L(gobble_ashr_1): + movdqa (%eax, %ecx), %xmm1 + movdqa (%edx, %ecx), %xmm2 + movdqa %xmm2, %xmm4 + + palignr $1, %xmm3, %xmm2 + TOLOWER (%xmm1, %xmm2) + + pcmpeqb %xmm1, %xmm0 + pcmpeqb %xmm2, %xmm1 + psubb %xmm0, %xmm1 + pmovmskb %xmm1, %esi + sub $0xffff, %esi + jnz L(exit) +#if defined USE_AS_STRNCMP || defined USE_AS_STRNCASECMP_L + cmp $16, REM + lea -16(REM), REM + jbe L(more8byteseq) +#endif + + add $16, %ecx + movdqa %xmm4, %xmm3 + + add $16, %edi + jg L(nibble_ashr_1) + + movdqa (%eax, %ecx), %xmm1 + movdqa (%edx, %ecx), %xmm2 + movdqa %xmm2, %xmm4 + + palignr $1, %xmm3, %xmm2 + TOLOWER (%xmm1, %xmm2) + + pcmpeqb %xmm1, %xmm0 + pcmpeqb %xmm2, %xmm1 + psubb %xmm0, %xmm1 + pmovmskb %xmm1, %esi + sub $0xffff, %esi + jnz L(exit) + +#if defined USE_AS_STRNCMP || defined USE_AS_STRNCASECMP_L + cmp $16, REM + lea -16(REM), REM + jbe L(more8byteseq) +#endif + add $16, %ecx + movdqa %xmm4, %xmm3 + jmp L(loop_ashr_1) + + .p2align 4 +L(nibble_ashr_1): + pcmpeqb %xmm3, %xmm0 + pmovmskb %xmm0, %esi + test $0xfffe, %esi + jnz L(ashr_1_exittail) + +#if defined USE_AS_STRNCMP || defined USE_AS_STRNCASECMP_L + cmp $15, REM + jbe L(ashr_1_exittail) +#endif + pxor %xmm0, %xmm0 + sub $0x1000, %edi + jmp L(gobble_ashr_1) + + .p2align 4 +L(ashr_1_exittail): + movdqa (%eax, %ecx), %xmm1 + psrldq $1, %xmm0 + psrldq $1, %xmm3 + jmp L(aftertail) + +/* + * The following cases will be handled by ashr_2 + * ecx(offset of esi) eax(offset of edi) relative offset corresponding case + * n(14~15) n -14 1(15 +(n-14) - n) ashr_2 + */ + .p2align 4 +L(ashr_2): + mov $0xffff, %esi + pxor %xmm0, %xmm0 + movdqa (%edx), %xmm2 + movdqa (%eax), %xmm1 + pcmpeqb %xmm1, %xmm0 + pslldq $14, %xmm2 + TOLOWER (%xmm1, %xmm2) + pcmpeqb %xmm1, %xmm2 + psubb %xmm0, %xmm2 + pmovmskb %xmm2, %edi + shr %cl, %esi + shr %cl, %edi + sub %edi, %esi + lea -14(%ecx), %edi + jnz L(less32bytes) + + UPDATE_STRNCMP_COUNTER + + movdqa (%edx), %xmm3 + pxor %xmm0, %xmm0 + mov $16, %ecx + orl $2, FLAGS + lea 2(%edx), %edi + and $0xfff, %edi + sub $0x1000, %edi + + .p2align 4 +L(loop_ashr_2): + add $16, %edi + jg L(nibble_ashr_2) + +L(gobble_ashr_2): + movdqa (%eax, %ecx), %xmm1 + movdqa (%edx, %ecx), %xmm2 + movdqa %xmm2, %xmm4 + + palignr $2, %xmm3, %xmm2 + TOLOWER (%xmm1, %xmm2) + + pcmpeqb %xmm1, %xmm0 + pcmpeqb %xmm2, %xmm1 + psubb %xmm0, %xmm1 + pmovmskb %xmm1, %esi + sub $0xffff, %esi + jnz L(exit) + +#if defined USE_AS_STRNCMP || defined USE_AS_STRNCASECMP_L + cmp $16, REM + lea -16(REM), REM + jbe L(more8byteseq) +#endif + add $16, %ecx + movdqa %xmm4, %xmm3 + + add $16, %edi + jg L(nibble_ashr_2) + + movdqa (%eax, %ecx), %xmm1 + movdqa (%edx, %ecx), %xmm2 + movdqa %xmm2, %xmm4 + + palignr $2, %xmm3, %xmm2 + TOLOWER (%xmm1, %xmm2) + + pcmpeqb %xmm1, %xmm0 + pcmpeqb %xmm2, %xmm1 + psubb %xmm0, %xmm1 + pmovmskb %xmm1, %esi + sub $0xffff, %esi + jnz L(exit) + +#if defined USE_AS_STRNCMP || defined USE_AS_STRNCASECMP_L + cmp $16, REM + lea -16(REM), REM + jbe L(more8byteseq) +#endif + add $16, %ecx + movdqa %xmm4, %xmm3 + jmp L(loop_ashr_2) + + .p2align 4 +L(nibble_ashr_2): + pcmpeqb %xmm3, %xmm0 + pmovmskb %xmm0, %esi + test $0xfffc, %esi + jnz L(ashr_2_exittail) + +#if defined USE_AS_STRNCMP || defined USE_AS_STRNCASECMP_L + cmp $14, REM + jbe L(ashr_2_exittail) +#endif + + pxor %xmm0, %xmm0 + sub $0x1000, %edi + jmp L(gobble_ashr_2) + + .p2align 4 +L(ashr_2_exittail): + movdqa (%eax, %ecx), %xmm1 + psrldq $2, %xmm0 + psrldq $2, %xmm3 + jmp L(aftertail) + +/* + * The following cases will be handled by ashr_3 + * ecx(offset of esi) eax(offset of edi) relative offset corresponding case + * n(13~15) n -13 2(15 +(n-13) - n) ashr_3 + */ + .p2align 4 +L(ashr_3): + mov $0xffff, %esi + pxor %xmm0, %xmm0 + movdqa (%edx), %xmm2 + movdqa (%eax), %xmm1 + pcmpeqb %xmm1, %xmm0 + pslldq $13, %xmm2 + TOLOWER (%xmm1, %xmm2) + pcmpeqb %xmm1, %xmm2 + psubb %xmm0, %xmm2 + pmovmskb %xmm2, %edi + shr %cl, %esi + shr %cl, %edi + sub %edi, %esi + lea -13(%ecx), %edi + jnz L(less32bytes) + + UPDATE_STRNCMP_COUNTER + + movdqa (%edx), %xmm3 + pxor %xmm0, %xmm0 + mov $16, %ecx + orl $3, FLAGS + lea 3(%edx), %edi + and $0xfff, %edi + sub $0x1000, %edi + + .p2align 4 +L(loop_ashr_3): + add $16, %edi + jg L(nibble_ashr_3) + +L(gobble_ashr_3): + movdqa (%eax, %ecx), %xmm1 + movdqa (%edx, %ecx), %xmm2 + movdqa %xmm2, %xmm4 + + palignr $3, %xmm3, %xmm2 + TOLOWER (%xmm1, %xmm2) + + pcmpeqb %xmm1, %xmm0 + pcmpeqb %xmm2, %xmm1 + psubb %xmm0, %xmm1 + pmovmskb %xmm1, %esi + sub $0xffff, %esi + jnz L(exit) + +#if defined USE_AS_STRNCMP || defined USE_AS_STRNCASECMP_L + cmp $16, REM + lea -16(REM), REM + jbe L(more8byteseq) +#endif + add $16, %ecx + movdqa %xmm4, %xmm3 + + add $16, %edi + jg L(nibble_ashr_3) + + movdqa (%eax, %ecx), %xmm1 + movdqa (%edx, %ecx), %xmm2 + movdqa %xmm2, %xmm4 + + palignr $3, %xmm3, %xmm2 + TOLOWER (%xmm1, %xmm2) + + pcmpeqb %xmm1, %xmm0 + pcmpeqb %xmm2, %xmm1 + psubb %xmm0, %xmm1 + pmovmskb %xmm1, %esi + sub $0xffff, %esi + jnz L(exit) + +#if defined USE_AS_STRNCMP || defined USE_AS_STRNCASECMP_L + cmp $16, REM + lea -16(REM), REM + jbe L(more8byteseq) +#endif + add $16, %ecx + movdqa %xmm4, %xmm3 + jmp L(loop_ashr_3) + + .p2align 4 +L(nibble_ashr_3): + pcmpeqb %xmm3, %xmm0 + pmovmskb %xmm0, %esi + test $0xfff8, %esi + jnz L(ashr_3_exittail) + +#if defined USE_AS_STRNCMP || defined USE_AS_STRNCASECMP_L + cmp $13, REM + jbe L(ashr_3_exittail) +#endif + pxor %xmm0, %xmm0 + sub $0x1000, %edi + jmp L(gobble_ashr_3) + + .p2align 4 +L(ashr_3_exittail): + movdqa (%eax, %ecx), %xmm1 + psrldq $3, %xmm0 + psrldq $3, %xmm3 + jmp L(aftertail) + +/* + * The following cases will be handled by ashr_4 + * ecx(offset of esi) eax(offset of edi) relative offset corresponding case + * n(12~15) n -12 3(15 +(n-12) - n) ashr_4 + */ + .p2align 4 +L(ashr_4): + mov $0xffff, %esi + pxor %xmm0, %xmm0 + movdqa (%edx), %xmm2 + movdqa (%eax), %xmm1 + pcmpeqb %xmm1, %xmm0 + pslldq $12, %xmm2 + TOLOWER (%xmm1, %xmm2) + pcmpeqb %xmm1, %xmm2 + psubb %xmm0, %xmm2 + pmovmskb %xmm2, %edi + shr %cl, %esi + shr %cl, %edi + sub %edi, %esi + lea -12(%ecx), %edi + jnz L(less32bytes) + + UPDATE_STRNCMP_COUNTER + + movdqa (%edx), %xmm3 + pxor %xmm0, %xmm0 + mov $16, %ecx + orl $4, FLAGS + lea 4(%edx), %edi + and $0xfff, %edi + sub $0x1000, %edi + + .p2align 4 +L(loop_ashr_4): + add $16, %edi + jg L(nibble_ashr_4) + +L(gobble_ashr_4): + movdqa (%eax, %ecx), %xmm1 + movdqa (%edx, %ecx), %xmm2 + movdqa %xmm2, %xmm4 + + palignr $4, %xmm3, %xmm2 + TOLOWER (%xmm1, %xmm2) + + pcmpeqb %xmm1, %xmm0 + pcmpeqb %xmm2, %xmm1 + psubb %xmm0, %xmm1 + pmovmskb %xmm1, %esi + sub $0xffff, %esi + jnz L(exit) + +#if defined USE_AS_STRNCMP || defined USE_AS_STRNCASECMP_L + cmp $16, REM + lea -16(REM), REM + jbe L(more8byteseq) +#endif + + add $16, %ecx + movdqa %xmm4, %xmm3 + + add $16, %edi + jg L(nibble_ashr_4) + + movdqa (%eax, %ecx), %xmm1 + movdqa (%edx, %ecx), %xmm2 + movdqa %xmm2, %xmm4 + + palignr $4, %xmm3, %xmm2 + TOLOWER (%xmm1, %xmm2) + + pcmpeqb %xmm1, %xmm0 + pcmpeqb %xmm2, %xmm1 + psubb %xmm0, %xmm1 + pmovmskb %xmm1, %esi + sub $0xffff, %esi + jnz L(exit) + +#if defined USE_AS_STRNCMP || defined USE_AS_STRNCASECMP_L + cmp $16, REM + lea -16(REM), REM + jbe L(more8byteseq) +#endif + + add $16, %ecx + movdqa %xmm4, %xmm3 + jmp L(loop_ashr_4) + + .p2align 4 +L(nibble_ashr_4): + pcmpeqb %xmm3, %xmm0 + pmovmskb %xmm0, %esi + test $0xfff0, %esi + jnz L(ashr_4_exittail) + +#if defined USE_AS_STRNCMP || defined USE_AS_STRNCASECMP_L + cmp $12, REM + jbe L(ashr_4_exittail) +#endif + + pxor %xmm0, %xmm0 + sub $0x1000, %edi + jmp L(gobble_ashr_4) + + .p2align 4 +L(ashr_4_exittail): + movdqa (%eax, %ecx), %xmm1 + psrldq $4, %xmm0 + psrldq $4, %xmm3 + jmp L(aftertail) + +/* + * The following cases will be handled by ashr_5 + * ecx(offset of esi) eax(offset of edi) relative offset corresponding case + * n(11~15) n -11 4(15 +(n-11) - n) ashr_5 + */ + .p2align 4 +L(ashr_5): + mov $0xffff, %esi + pxor %xmm0, %xmm0 + movdqa (%edx), %xmm2 + movdqa (%eax), %xmm1 + pcmpeqb %xmm1, %xmm0 + pslldq $11, %xmm2 + TOLOWER (%xmm1, %xmm2) + pcmpeqb %xmm1, %xmm2 + psubb %xmm0, %xmm2 + pmovmskb %xmm2, %edi + shr %cl, %esi + shr %cl, %edi + sub %edi, %esi + lea -11(%ecx), %edi + jnz L(less32bytes) + + UPDATE_STRNCMP_COUNTER + + movdqa (%edx), %xmm3 + pxor %xmm0, %xmm0 + mov $16, %ecx + orl $5, FLAGS + lea 5(%edx), %edi + and $0xfff, %edi + sub $0x1000, %edi + + .p2align 4 +L(loop_ashr_5): + add $16, %edi + jg L(nibble_ashr_5) + +L(gobble_ashr_5): + movdqa (%eax, %ecx), %xmm1 + movdqa (%edx, %ecx), %xmm2 + movdqa %xmm2, %xmm4 + + palignr $5, %xmm3, %xmm2 + TOLOWER (%xmm1, %xmm2) + + pcmpeqb %xmm1, %xmm0 + pcmpeqb %xmm2, %xmm1 + psubb %xmm0, %xmm1 + pmovmskb %xmm1, %esi + sub $0xffff, %esi + jnz L(exit) + +#if defined USE_AS_STRNCMP || defined USE_AS_STRNCASECMP_L + cmp $16, REM + lea -16(REM), REM + jbe L(more8byteseq) +#endif + add $16, %ecx + movdqa %xmm4, %xmm3 + + add $16, %edi + jg L(nibble_ashr_5) + + movdqa (%eax, %ecx), %xmm1 + movdqa (%edx, %ecx), %xmm2 + movdqa %xmm2, %xmm4 + + palignr $5, %xmm3, %xmm2 + TOLOWER (%xmm1, %xmm2) + + pcmpeqb %xmm1, %xmm0 + pcmpeqb %xmm2, %xmm1 + psubb %xmm0, %xmm1 + pmovmskb %xmm1, %esi + sub $0xffff, %esi + jnz L(exit) + +#if defined USE_AS_STRNCMP || defined USE_AS_STRNCASECMP_L + cmp $16, REM + lea -16(REM), REM + jbe L(more8byteseq) +#endif + add $16, %ecx + movdqa %xmm4, %xmm3 + jmp L(loop_ashr_5) + + .p2align 4 +L(nibble_ashr_5): + pcmpeqb %xmm3, %xmm0 + pmovmskb %xmm0, %esi + test $0xffe0, %esi + jnz L(ashr_5_exittail) + +#if defined USE_AS_STRNCMP || defined USE_AS_STRNCASECMP_L + cmp $11, REM + jbe L(ashr_5_exittail) +#endif + pxor %xmm0, %xmm0 + sub $0x1000, %edi + jmp L(gobble_ashr_5) + + .p2align 4 +L(ashr_5_exittail): + movdqa (%eax, %ecx), %xmm1 + psrldq $5, %xmm0 + psrldq $5, %xmm3 + jmp L(aftertail) + +/* + * The following cases will be handled by ashr_6 + * ecx(offset of esi) eax(offset of edi) relative offset corresponding case + * n(10~15) n -10 5(15 +(n-10) - n) ashr_6 + */ + + .p2align 4 +L(ashr_6): + mov $0xffff, %esi + pxor %xmm0, %xmm0 + movdqa (%edx), %xmm2 + movdqa (%eax), %xmm1 + pcmpeqb %xmm1, %xmm0 + pslldq $10, %xmm2 + TOLOWER (%xmm1, %xmm2) + pcmpeqb %xmm1, %xmm2 + psubb %xmm0, %xmm2 + pmovmskb %xmm2, %edi + shr %cl, %esi + shr %cl, %edi + sub %edi, %esi + lea -10(%ecx), %edi + jnz L(less32bytes) + + UPDATE_STRNCMP_COUNTER + + movdqa (%edx), %xmm3 + pxor %xmm0, %xmm0 + mov $16, %ecx + orl $6, FLAGS + lea 6(%edx), %edi + and $0xfff, %edi + sub $0x1000, %edi + + .p2align 4 +L(loop_ashr_6): + add $16, %edi + jg L(nibble_ashr_6) + +L(gobble_ashr_6): + movdqa (%eax, %ecx), %xmm1 + movdqa (%edx, %ecx), %xmm2 + movdqa %xmm2, %xmm4 + + palignr $6, %xmm3, %xmm2 + TOLOWER (%xmm1, %xmm2) + + pcmpeqb %xmm1, %xmm0 + pcmpeqb %xmm2, %xmm1 + psubb %xmm0, %xmm1 + pmovmskb %xmm1, %esi + sub $0xffff, %esi + jnz L(exit) + +#if defined USE_AS_STRNCMP || defined USE_AS_STRNCASECMP_L + cmp $16, REM + lea -16(REM), REM + jbe L(more8byteseq) +#endif + + add $16, %ecx + movdqa %xmm4, %xmm3 + + add $16, %edi + jg L(nibble_ashr_6) + + movdqa (%eax, %ecx), %xmm1 + movdqa (%edx, %ecx), %xmm2 + movdqa %xmm2, %xmm4 + + palignr $6, %xmm3, %xmm2 + TOLOWER (%xmm1, %xmm2) + + pcmpeqb %xmm1, %xmm0 + pcmpeqb %xmm2, %xmm1 + psubb %xmm0, %xmm1 + pmovmskb %xmm1, %esi + sub $0xffff, %esi + jnz L(exit) +#if defined USE_AS_STRNCMP || defined USE_AS_STRNCASECMP_L + cmp $16, REM + lea -16(REM), REM + jbe L(more8byteseq) +#endif + + add $16, %ecx + movdqa %xmm4, %xmm3 + jmp L(loop_ashr_6) + + .p2align 4 +L(nibble_ashr_6): + pcmpeqb %xmm3, %xmm0 + pmovmskb %xmm0, %esi + test $0xffc0, %esi + jnz L(ashr_6_exittail) + +#if defined USE_AS_STRNCMP || defined USE_AS_STRNCASECMP_L + cmp $10, REM + jbe L(ashr_6_exittail) +#endif + pxor %xmm0, %xmm0 + sub $0x1000, %edi + jmp L(gobble_ashr_6) + + .p2align 4 +L(ashr_6_exittail): + movdqa (%eax, %ecx), %xmm1 + psrldq $6, %xmm0 + psrldq $6, %xmm3 + jmp L(aftertail) + +/* + * The following cases will be handled by ashr_7 + * ecx(offset of esi) eax(offset of edi) relative offset corresponding case + * n(9~15) n - 9 6(15 +(n-9) - n) ashr_7 + */ + + .p2align 4 +L(ashr_7): + mov $0xffff, %esi + pxor %xmm0, %xmm0 + movdqa (%edx), %xmm2 + movdqa (%eax), %xmm1 + pcmpeqb %xmm1, %xmm0 + pslldq $9, %xmm2 + TOLOWER (%xmm1, %xmm2) + pcmpeqb %xmm1, %xmm2 + psubb %xmm0, %xmm2 + pmovmskb %xmm2, %edi + shr %cl, %esi + shr %cl, %edi + sub %edi, %esi + lea -9(%ecx), %edi + jnz L(less32bytes) + + UPDATE_STRNCMP_COUNTER + + movdqa (%edx), %xmm3 + pxor %xmm0, %xmm0 + mov $16, %ecx + orl $7, FLAGS + lea 8(%edx), %edi + and $0xfff, %edi + sub $0x1000, %edi + + .p2align 4 +L(loop_ashr_7): + add $16, %edi + jg L(nibble_ashr_7) + +L(gobble_ashr_7): + movdqa (%eax, %ecx), %xmm1 + movdqa (%edx, %ecx), %xmm2 + movdqa %xmm2, %xmm4 + + palignr $7, %xmm3, %xmm2 + TOLOWER (%xmm1, %xmm2) + + pcmpeqb %xmm1, %xmm0 + pcmpeqb %xmm2, %xmm1 + psubb %xmm0, %xmm1 + pmovmskb %xmm1, %esi + sub $0xffff, %esi + jnz L(exit) + +#if defined USE_AS_STRNCMP || defined USE_AS_STRNCASECMP_L + cmp $16, REM + lea -16(REM), REM + jbe L(more8byteseq) +#endif + + add $16, %ecx + movdqa %xmm4, %xmm3 + + add $16, %edi + jg L(nibble_ashr_7) + + movdqa (%eax, %ecx), %xmm1 + movdqa (%edx, %ecx), %xmm2 + movdqa %xmm2, %xmm4 + + palignr $7, %xmm3, %xmm2 + TOLOWER (%xmm1, %xmm2) + + pcmpeqb %xmm1, %xmm0 + pcmpeqb %xmm2, %xmm1 + psubb %xmm0, %xmm1 + pmovmskb %xmm1, %esi + sub $0xffff, %esi + jnz L(exit) + +#if defined USE_AS_STRNCMP || defined USE_AS_STRNCASECMP_L + cmp $16, REM + lea -16(REM), REM + jbe L(more8byteseq) +#endif + + add $16, %ecx + movdqa %xmm4, %xmm3 + jmp L(loop_ashr_7) + + .p2align 4 +L(nibble_ashr_7): + pcmpeqb %xmm3, %xmm0 + pmovmskb %xmm0, %esi + test $0xff80, %esi + jnz L(ashr_7_exittail) + +#if defined USE_AS_STRNCMP || defined USE_AS_STRNCASECMP_L + cmp $9, REM + jbe L(ashr_7_exittail) +#endif + pxor %xmm0, %xmm0 + pxor %xmm0, %xmm0 + sub $0x1000, %edi + jmp L(gobble_ashr_7) + + .p2align 4 +L(ashr_7_exittail): + movdqa (%eax, %ecx), %xmm1 + psrldq $7, %xmm0 + psrldq $7, %xmm3 + jmp L(aftertail) + +/* + * The following cases will be handled by ashr_8 + * ecx(offset of esi) eax(offset of edi) relative offset corresponding case + * n(8~15) n - 8 7(15 +(n-8) - n) ashr_8 + */ + .p2align 4 +L(ashr_8): + mov $0xffff, %esi + pxor %xmm0, %xmm0 + movdqa (%edx), %xmm2 + movdqa (%eax), %xmm1 + pcmpeqb %xmm1, %xmm0 + pslldq $8, %xmm2 + TOLOWER (%xmm1, %xmm2) + pcmpeqb %xmm1, %xmm2 + psubb %xmm0, %xmm2 + pmovmskb %xmm2, %edi + shr %cl, %esi + shr %cl, %edi + sub %edi, %esi + lea -8(%ecx), %edi + jnz L(less32bytes) + + UPDATE_STRNCMP_COUNTER + + movdqa (%edx), %xmm3 + pxor %xmm0, %xmm0 + mov $16, %ecx + orl $8, FLAGS + lea 8(%edx), %edi + and $0xfff, %edi + sub $0x1000, %edi + + .p2align 4 +L(loop_ashr_8): + add $16, %edi + jg L(nibble_ashr_8) + +L(gobble_ashr_8): + movdqa (%eax, %ecx), %xmm1 + movdqa (%edx, %ecx), %xmm2 + movdqa %xmm2, %xmm4 + + palignr $8, %xmm3, %xmm2 + TOLOWER (%xmm1, %xmm2) + + pcmpeqb %xmm1, %xmm0 + pcmpeqb %xmm2, %xmm1 + psubb %xmm0, %xmm1 + pmovmskb %xmm1, %esi + sub $0xffff, %esi + jnz L(exit) + +#if defined USE_AS_STRNCMP || defined USE_AS_STRNCASECMP_L + cmp $16, REM + lea -16(REM), REM + jbe L(more8byteseq) +#endif + add $16, %ecx + movdqa %xmm4, %xmm3 + + add $16, %edi + jg L(nibble_ashr_8) + + movdqa (%eax, %ecx), %xmm1 + movdqa (%edx, %ecx), %xmm2 + movdqa %xmm2, %xmm4 + + palignr $8, %xmm3, %xmm2 + TOLOWER (%xmm1, %xmm2) + + pcmpeqb %xmm1, %xmm0 + pcmpeqb %xmm2, %xmm1 + psubb %xmm0, %xmm1 + pmovmskb %xmm1, %esi + sub $0xffff, %esi + jnz L(exit) + +#if defined USE_AS_STRNCMP || defined USE_AS_STRNCASECMP_L + cmp $16, REM + lea -16(REM), REM + jbe L(more8byteseq) +#endif + add $16, %ecx + movdqa %xmm4, %xmm3 + jmp L(loop_ashr_8) + + .p2align 4 +L(nibble_ashr_8): + pcmpeqb %xmm3, %xmm0 + pmovmskb %xmm0, %esi + test $0xff00, %esi + jnz L(ashr_8_exittail) + +#if defined USE_AS_STRNCMP || defined USE_AS_STRNCASECMP_L + cmp $8, REM + jbe L(ashr_8_exittail) +#endif + pxor %xmm0, %xmm0 + pxor %xmm0, %xmm0 + sub $0x1000, %edi + jmp L(gobble_ashr_8) + + .p2align 4 +L(ashr_8_exittail): + movdqa (%eax, %ecx), %xmm1 + psrldq $8, %xmm0 + psrldq $8, %xmm3 + jmp L(aftertail) + +/* + * The following cases will be handled by ashr_9 + * ecx(offset of esi) eax(offset of edi) relative offset corresponding case + * n(7~15) n - 7 8(15 +(n-7) - n) ashr_9 + */ + .p2align 4 +L(ashr_9): + mov $0xffff, %esi + pxor %xmm0, %xmm0 + movdqa (%edx), %xmm2 + movdqa (%eax), %xmm1 + pcmpeqb %xmm1, %xmm0 + pslldq $7, %xmm2 + TOLOWER (%xmm1, %xmm2) + pcmpeqb %xmm1, %xmm2 + psubb %xmm0, %xmm2 + pmovmskb %xmm2, %edi + shr %cl, %esi + shr %cl, %edi + sub %edi, %esi + lea -7(%ecx), %edi + jnz L(less32bytes) + + UPDATE_STRNCMP_COUNTER + + movdqa (%edx), %xmm3 + pxor %xmm0, %xmm0 + mov $16, %ecx + orl $9, FLAGS + lea 9(%edx), %edi + and $0xfff, %edi + sub $0x1000, %edi + + .p2align 4 +L(loop_ashr_9): + add $16, %edi + jg L(nibble_ashr_9) + +L(gobble_ashr_9): + movdqa (%eax, %ecx), %xmm1 + movdqa (%edx, %ecx), %xmm2 + movdqa %xmm2, %xmm4 + + palignr $9, %xmm3, %xmm2 + TOLOWER (%xmm1, %xmm2) + + pcmpeqb %xmm1, %xmm0 + pcmpeqb %xmm2, %xmm1 + psubb %xmm0, %xmm1 + pmovmskb %xmm1, %esi + sub $0xffff, %esi + jnz L(exit) + +#if defined USE_AS_STRNCMP || defined USE_AS_STRNCASECMP_L + cmp $16, REM + lea -16(REM), REM + jbe L(more8byteseq) +#endif + add $16, %ecx + movdqa %xmm4, %xmm3 + + add $16, %edi + jg L(nibble_ashr_9) + + movdqa (%eax, %ecx), %xmm1 + movdqa (%edx, %ecx), %xmm2 + movdqa %xmm2, %xmm4 + + palignr $9, %xmm3, %xmm2 + TOLOWER (%xmm1, %xmm2) + + pcmpeqb %xmm1, %xmm0 + pcmpeqb %xmm2, %xmm1 + psubb %xmm0, %xmm1 + pmovmskb %xmm1, %esi + sub $0xffff, %esi + jnz L(exit) + +#if defined USE_AS_STRNCMP || defined USE_AS_STRNCASECMP_L + cmp $16, REM + lea -16(REM), REM + jbe L(more8byteseq) +#endif + add $16, %ecx + movdqa %xmm4, %xmm3 + jmp L(loop_ashr_9) + + .p2align 4 +L(nibble_ashr_9): + pcmpeqb %xmm3, %xmm0 + pmovmskb %xmm0, %esi + test $0xfe00, %esi + jnz L(ashr_9_exittail) + +#if defined USE_AS_STRNCMP || defined USE_AS_STRNCASECMP_L + cmp $7, REM + jbe L(ashr_9_exittail) +#endif + pxor %xmm0, %xmm0 + sub $0x1000, %edi + jmp L(gobble_ashr_9) + + .p2align 4 +L(ashr_9_exittail): + movdqa (%eax, %ecx), %xmm1 + psrldq $9, %xmm0 + psrldq $9, %xmm3 + jmp L(aftertail) + +/* + * The following cases will be handled by ashr_10 + * ecx(offset of esi) eax(offset of edi) relative offset corresponding case + * n(6~15) n - 6 9(15 +(n-6) - n) ashr_10 + */ + .p2align 4 +L(ashr_10): + mov $0xffff, %esi + pxor %xmm0, %xmm0 + movdqa (%edx), %xmm2 + movdqa (%eax), %xmm1 + pcmpeqb %xmm1, %xmm0 + pslldq $6, %xmm2 + TOLOWER (%xmm1, %xmm2) + pcmpeqb %xmm1, %xmm2 + psubb %xmm0, %xmm2 + pmovmskb %xmm2, %edi + shr %cl, %esi + shr %cl, %edi + sub %edi, %esi + lea -6(%ecx), %edi + jnz L(less32bytes) + + UPDATE_STRNCMP_COUNTER + + movdqa (%edx), %xmm3 + pxor %xmm0, %xmm0 + mov $16, %ecx + orl $10, FLAGS + lea 10(%edx), %edi + and $0xfff, %edi + sub $0x1000, %edi + + .p2align 4 +L(loop_ashr_10): + add $16, %edi + jg L(nibble_ashr_10) + +L(gobble_ashr_10): + movdqa (%eax, %ecx), %xmm1 + movdqa (%edx, %ecx), %xmm2 + movdqa %xmm2, %xmm4 + + palignr $10, %xmm3, %xmm2 + TOLOWER (%xmm1, %xmm2) + + pcmpeqb %xmm1, %xmm0 + pcmpeqb %xmm2, %xmm1 + psubb %xmm0, %xmm1 + pmovmskb %xmm1, %esi + sub $0xffff, %esi + jnz L(exit) + +#if defined USE_AS_STRNCMP || defined USE_AS_STRNCASECMP_L + cmp $16, REM + lea -16(REM), REM + jbe L(more8byteseq) +#endif + add $16, %ecx + movdqa %xmm4, %xmm3 + + add $16, %edi + jg L(nibble_ashr_10) + + movdqa (%eax, %ecx), %xmm1 + movdqa (%edx, %ecx), %xmm2 + movdqa %xmm2, %xmm4 + + palignr $10, %xmm3, %xmm2 + TOLOWER (%xmm1, %xmm2) + + pcmpeqb %xmm1, %xmm0 + pcmpeqb %xmm2, %xmm1 + psubb %xmm0, %xmm1 + pmovmskb %xmm1, %esi + sub $0xffff, %esi + jnz L(exit) + +#if defined USE_AS_STRNCMP || defined USE_AS_STRNCASECMP_L + cmp $16, REM + lea -16(REM), REM + jbe L(more8byteseq) +#endif + add $16, %ecx + movdqa %xmm4, %xmm3 + jmp L(loop_ashr_10) + + .p2align 4 +L(nibble_ashr_10): + pcmpeqb %xmm3, %xmm0 + pmovmskb %xmm0, %esi + test $0xfc00, %esi + jnz L(ashr_10_exittail) + +#if defined USE_AS_STRNCMP || defined USE_AS_STRNCASECMP_L + cmp $6, REM + jbe L(ashr_10_exittail) +#endif + pxor %xmm0, %xmm0 + sub $0x1000, %edi + jmp L(gobble_ashr_10) + + .p2align 4 +L(ashr_10_exittail): + movdqa (%eax, %ecx), %xmm1 + psrldq $10, %xmm0 + psrldq $10, %xmm3 + jmp L(aftertail) + +/* + * The following cases will be handled by ashr_11 + * ecx(offset of esi) eax(offset of edi) relative offset corresponding case + * n(5~15) n - 5 10(15 +(n-5) - n) ashr_11 + */ + .p2align 4 +L(ashr_11): + mov $0xffff, %esi + pxor %xmm0, %xmm0 + movdqa (%edx), %xmm2 + movdqa (%eax), %xmm1 + pcmpeqb %xmm1, %xmm0 + pslldq $5, %xmm2 + TOLOWER (%xmm1, %xmm2) + pcmpeqb %xmm1, %xmm2 + psubb %xmm0, %xmm2 + pmovmskb %xmm2, %edi + shr %cl, %esi + shr %cl, %edi + sub %edi, %esi + lea -5(%ecx), %edi + jnz L(less32bytes) + + UPDATE_STRNCMP_COUNTER + + movdqa (%edx), %xmm3 + pxor %xmm0, %xmm0 + mov $16, %ecx + orl $11, FLAGS + lea 11(%edx), %edi + and $0xfff, %edi + sub $0x1000, %edi + + .p2align 4 +L(loop_ashr_11): + add $16, %edi + jg L(nibble_ashr_11) + +L(gobble_ashr_11): + movdqa (%eax, %ecx), %xmm1 + movdqa (%edx, %ecx), %xmm2 + movdqa %xmm2, %xmm4 + + palignr $11, %xmm3, %xmm2 + TOLOWER (%xmm1, %xmm2) + + pcmpeqb %xmm1, %xmm0 + pcmpeqb %xmm2, %xmm1 + psubb %xmm0, %xmm1 + pmovmskb %xmm1, %esi + sub $0xffff, %esi + jnz L(exit) + +#if defined USE_AS_STRNCMP || defined USE_AS_STRNCASECMP_L + cmp $16, REM + lea -16(REM), REM + jbe L(more8byteseq) +#endif + add $16, %ecx + movdqa %xmm4, %xmm3 + + add $16, %edi + jg L(nibble_ashr_11) + + movdqa (%eax, %ecx), %xmm1 + movdqa (%edx, %ecx), %xmm2 + movdqa %xmm2, %xmm4 + + palignr $11, %xmm3, %xmm2 + TOLOWER (%xmm1, %xmm2) + + pcmpeqb %xmm1, %xmm0 + pcmpeqb %xmm2, %xmm1 + psubb %xmm0, %xmm1 + pmovmskb %xmm1, %esi + sub $0xffff, %esi + jnz L(exit) + +#if defined USE_AS_STRNCMP || defined USE_AS_STRNCASECMP_L + cmp $16, REM + lea -16(REM), REM + jbe L(more8byteseq) +#endif + add $16, %ecx + movdqa %xmm4, %xmm3 + jmp L(loop_ashr_11) + + .p2align 4 +L(nibble_ashr_11): + pcmpeqb %xmm3, %xmm0 + pmovmskb %xmm0, %esi + test $0xf800, %esi + jnz L(ashr_11_exittail) + +#if defined USE_AS_STRNCMP || defined USE_AS_STRNCASECMP_L + cmp $5, REM + jbe L(ashr_11_exittail) +#endif + pxor %xmm0, %xmm0 + sub $0x1000, %edi + jmp L(gobble_ashr_11) + + .p2align 4 +L(ashr_11_exittail): + movdqa (%eax, %ecx), %xmm1 + psrldq $11, %xmm0 + psrldq $11, %xmm3 + jmp L(aftertail) + +/* + * The following cases will be handled by ashr_12 + * ecx(offset of esi) eax(offset of edi) relative offset corresponding case + * n(4~15) n - 4 11(15 +(n-4) - n) ashr_12 + */ + .p2align 4 +L(ashr_12): + mov $0xffff, %esi + pxor %xmm0, %xmm0 + movdqa (%edx), %xmm2 + movdqa (%eax), %xmm1 + pcmpeqb %xmm1, %xmm0 + pslldq $4, %xmm2 + TOLOWER (%xmm1, %xmm2) + pcmpeqb %xmm1, %xmm2 + psubb %xmm0, %xmm2 + pmovmskb %xmm2, %edi + shr %cl, %esi + shr %cl, %edi + sub %edi, %esi + lea -4(%ecx), %edi + jnz L(less32bytes) + + UPDATE_STRNCMP_COUNTER + + movdqa (%edx), %xmm3 + pxor %xmm0, %xmm0 + mov $16, %ecx + orl $12, FLAGS + lea 12(%edx), %edi + and $0xfff, %edi + sub $0x1000, %edi + + .p2align 4 +L(loop_ashr_12): + add $16, %edi + jg L(nibble_ashr_12) + +L(gobble_ashr_12): + movdqa (%eax, %ecx), %xmm1 + movdqa (%edx, %ecx), %xmm2 + movdqa %xmm2, %xmm4 + + palignr $12, %xmm3, %xmm2 + TOLOWER (%xmm1, %xmm2) + + pcmpeqb %xmm1, %xmm0 + pcmpeqb %xmm2, %xmm1 + psubb %xmm0, %xmm1 + pmovmskb %xmm1, %esi + sub $0xffff, %esi + jnz L(exit) + +#if defined USE_AS_STRNCMP || defined USE_AS_STRNCASECMP_L + cmp $16, REM + lea -16(REM), REM + jbe L(more8byteseq) +#endif + + add $16, %ecx + movdqa %xmm4, %xmm3 + + add $16, %edi + jg L(nibble_ashr_12) + + movdqa (%eax, %ecx), %xmm1 + movdqa (%edx, %ecx), %xmm2 + movdqa %xmm2, %xmm4 + + palignr $12, %xmm3, %xmm2 + TOLOWER (%xmm1, %xmm2) + + pcmpeqb %xmm1, %xmm0 + pcmpeqb %xmm2, %xmm1 + psubb %xmm0, %xmm1 + pmovmskb %xmm1, %esi + sub $0xffff, %esi + jnz L(exit) + +#if defined USE_AS_STRNCMP || defined USE_AS_STRNCASECMP_L + cmp $16, REM + lea -16(REM), REM + jbe L(more8byteseq) +#endif + add $16, %ecx + movdqa %xmm4, %xmm3 + jmp L(loop_ashr_12) + + .p2align 4 +L(nibble_ashr_12): + pcmpeqb %xmm3, %xmm0 + pmovmskb %xmm0, %esi + test $0xf000, %esi + jnz L(ashr_12_exittail) + +#if defined USE_AS_STRNCMP || defined USE_AS_STRNCASECMP_L + cmp $4, REM + jbe L(ashr_12_exittail) +#endif + pxor %xmm0, %xmm0 + sub $0x1000, %edi + jmp L(gobble_ashr_12) + + .p2align 4 +L(ashr_12_exittail): + movdqa (%eax, %ecx), %xmm1 + psrldq $12, %xmm0 + psrldq $12, %xmm3 + jmp L(aftertail) + +/* + * The following cases will be handled by ashr_13 + * ecx(offset of esi) eax(offset of edi) relative offset corresponding case + * n(3~15) n - 3 12(15 +(n-3) - n) ashr_13 + */ + .p2align 4 +L(ashr_13): + mov $0xffff, %esi + pxor %xmm0, %xmm0 + movdqa (%edx), %xmm2 + movdqa (%eax), %xmm1 + pcmpeqb %xmm1, %xmm0 + pslldq $3, %xmm2 + TOLOWER (%xmm1, %xmm2) + pcmpeqb %xmm1, %xmm2 + psubb %xmm0, %xmm2 + pmovmskb %xmm2, %edi + shr %cl, %esi + shr %cl, %edi + sub %edi, %esi + lea -3(%ecx), %edi + jnz L(less32bytes) + + UPDATE_STRNCMP_COUNTER + + movdqa (%edx), %xmm3 + pxor %xmm0, %xmm0 + mov $16, %ecx + orl $13, FLAGS + lea 13(%edx), %edi + and $0xfff, %edi + sub $0x1000, %edi + + .p2align 4 +L(loop_ashr_13): + add $16, %edi + jg L(nibble_ashr_13) + +L(gobble_ashr_13): + movdqa (%eax, %ecx), %xmm1 + movdqa (%edx, %ecx), %xmm2 + movdqa %xmm2, %xmm4 + + palignr $13, %xmm3, %xmm2 + TOLOWER (%xmm1, %xmm2) + + pcmpeqb %xmm1, %xmm0 + pcmpeqb %xmm2, %xmm1 + psubb %xmm0, %xmm1 + pmovmskb %xmm1, %esi + sub $0xffff, %esi + jnz L(exit) + +#if defined USE_AS_STRNCMP || defined USE_AS_STRNCASECMP_L + cmp $16, REM + lea -16(REM), REM + jbe L(more8byteseq) +#endif + add $16, %ecx + movdqa %xmm4, %xmm3 + + add $16, %edi + jg L(nibble_ashr_13) + + movdqa (%eax, %ecx), %xmm1 + movdqa (%edx, %ecx), %xmm2 + movdqa %xmm2, %xmm4 + + palignr $13, %xmm3, %xmm2 + TOLOWER (%xmm1, %xmm2) + + pcmpeqb %xmm1, %xmm0 + pcmpeqb %xmm2, %xmm1 + psubb %xmm0, %xmm1 + pmovmskb %xmm1, %esi + sub $0xffff, %esi + jnz L(exit) + +#if defined USE_AS_STRNCMP || defined USE_AS_STRNCASECMP_L + cmp $16, REM + lea -16(REM), REM + jbe L(more8byteseq) +#endif + add $16, %ecx + movdqa %xmm4, %xmm3 + jmp L(loop_ashr_13) + + .p2align 4 +L(nibble_ashr_13): + pcmpeqb %xmm3, %xmm0 + pmovmskb %xmm0, %esi + test $0xe000, %esi + jnz L(ashr_13_exittail) + +#if defined USE_AS_STRNCMP || defined USE_AS_STRNCASECMP_L + cmp $3, REM + jbe L(ashr_13_exittail) +#endif + pxor %xmm0, %xmm0 + sub $0x1000, %edi + jmp L(gobble_ashr_13) + + .p2align 4 +L(ashr_13_exittail): + movdqa (%eax, %ecx), %xmm1 + psrldq $13, %xmm0 + psrldq $13, %xmm3 + jmp L(aftertail) + +/* + * The following cases will be handled by ashr_14 + * ecx(offset of esi) eax(offset of edi) relative offset corresponding case + * n(2~15) n - 2 13(15 +(n-2) - n) ashr_14 + */ + .p2align 4 +L(ashr_14): + mov $0xffff, %esi + pxor %xmm0, %xmm0 + movdqa (%edx), %xmm2 + movdqa (%eax), %xmm1 + pcmpeqb %xmm1, %xmm0 + pslldq $2, %xmm2 + TOLOWER (%xmm1, %xmm2) + pcmpeqb %xmm1, %xmm2 + psubb %xmm0, %xmm2 + pmovmskb %xmm2, %edi + shr %cl, %esi + shr %cl, %edi + sub %edi, %esi + lea -2(%ecx), %edi + jnz L(less32bytes) + + UPDATE_STRNCMP_COUNTER + + movdqa (%edx), %xmm3 + pxor %xmm0, %xmm0 + mov $16, %ecx + orl $14, FLAGS + lea 14(%edx), %edi + and $0xfff, %edi + sub $0x1000, %edi + + .p2align 4 +L(loop_ashr_14): + add $16, %edi + jg L(nibble_ashr_14) + +L(gobble_ashr_14): + movdqa (%eax, %ecx), %xmm1 + movdqa (%edx, %ecx), %xmm2 + movdqa %xmm2, %xmm4 + + palignr $14, %xmm3, %xmm2 + TOLOWER (%xmm1, %xmm2) + + pcmpeqb %xmm1, %xmm0 + pcmpeqb %xmm2, %xmm1 + psubb %xmm0, %xmm1 + pmovmskb %xmm1, %esi + sub $0xffff, %esi + jnz L(exit) + +#if defined USE_AS_STRNCMP || defined USE_AS_STRNCASECMP_L + cmp $16, REM + lea -16(REM), REM + jbe L(more8byteseq) +#endif + add $16, %ecx + movdqa %xmm4, %xmm3 + + add $16, %edi + jg L(nibble_ashr_14) + + movdqa (%eax, %ecx), %xmm1 + movdqa (%edx, %ecx), %xmm2 + movdqa %xmm2, %xmm4 + + palignr $14, %xmm3, %xmm2 + TOLOWER (%xmm1, %xmm2) + + pcmpeqb %xmm1, %xmm0 + pcmpeqb %xmm2, %xmm1 + psubb %xmm0, %xmm1 + pmovmskb %xmm1, %esi + sub $0xffff, %esi + jnz L(exit) + +#if defined USE_AS_STRNCMP || defined USE_AS_STRNCASECMP_L + cmp $16, REM + lea -16(REM), REM + jbe L(more8byteseq) +#endif + add $16, %ecx + movdqa %xmm4, %xmm3 + jmp L(loop_ashr_14) + + .p2align 4 +L(nibble_ashr_14): + pcmpeqb %xmm3, %xmm0 + pmovmskb %xmm0, %esi + test $0xc000, %esi + jnz L(ashr_14_exittail) + +#if defined USE_AS_STRNCMP || defined USE_AS_STRNCASECMP_L + cmp $2, REM + jbe L(ashr_14_exittail) +#endif + pxor %xmm0, %xmm0 + sub $0x1000, %edi + jmp L(gobble_ashr_14) + + .p2align 4 +L(ashr_14_exittail): + movdqa (%eax, %ecx), %xmm1 + psrldq $14, %xmm0 + psrldq $14, %xmm3 + jmp L(aftertail) + +/* + * The following cases will be handled by ashr_14 + * ecx(offset of esi) eax(offset of edi) relative offset corresponding case + * n(1~15) n - 1 14(15 +(n-1) - n) ashr_15 + */ + + .p2align 4 +L(ashr_15): + mov $0xffff, %esi + pxor %xmm0, %xmm0 + movdqa (%edx), %xmm2 + movdqa (%eax), %xmm1 + pcmpeqb %xmm1, %xmm0 + pslldq $1, %xmm2 + TOLOWER (%xmm1, %xmm2) + pcmpeqb %xmm1, %xmm2 + psubb %xmm0, %xmm2 + pmovmskb %xmm2, %edi + shr %cl, %esi + shr %cl, %edi + sub %edi, %esi + lea -1(%ecx), %edi + jnz L(less32bytes) + + UPDATE_STRNCMP_COUNTER + + movdqa (%edx), %xmm3 + pxor %xmm0, %xmm0 + mov $16, %ecx + orl $15, FLAGS + lea 15(%edx), %edi + and $0xfff, %edi + sub $0x1000, %edi + + .p2align 4 +L(loop_ashr_15): + add $16, %edi + jg L(nibble_ashr_15) + +L(gobble_ashr_15): + movdqa (%eax, %ecx), %xmm1 + movdqa (%edx, %ecx), %xmm2 + movdqa %xmm2, %xmm4 + + palignr $15, %xmm3, %xmm2 + TOLOWER (%xmm1, %xmm2) + + pcmpeqb %xmm1, %xmm0 + pcmpeqb %xmm2, %xmm1 + psubb %xmm0, %xmm1 + pmovmskb %xmm1, %esi + sub $0xffff, %esi + jnz L(exit) + +#if defined USE_AS_STRNCMP || defined USE_AS_STRNCASECMP_L + cmp $16, REM + lea -16(REM), REM + jbe L(more8byteseq) +#endif + add $16, %ecx + movdqa %xmm4, %xmm3 + + add $16, %edi + jg L(nibble_ashr_15) + + movdqa (%eax, %ecx), %xmm1 + movdqa (%edx, %ecx), %xmm2 + movdqa %xmm2, %xmm4 + + palignr $15, %xmm3, %xmm2 + TOLOWER (%xmm1, %xmm2) + + pcmpeqb %xmm1, %xmm0 + pcmpeqb %xmm2, %xmm1 + psubb %xmm0, %xmm1 + pmovmskb %xmm1, %esi + sub $0xffff, %esi + jnz L(exit) + +#if defined USE_AS_STRNCMP || defined USE_AS_STRNCASECMP_L + cmp $16, REM + lea -16(REM), REM + jbe L(more8byteseq) +#endif + add $16, %ecx + movdqa %xmm4, %xmm3 + jmp L(loop_ashr_15) + + .p2align 4 +L(nibble_ashr_15): + pcmpeqb %xmm3, %xmm0 + pmovmskb %xmm0, %esi + test $0x8000, %esi + jnz L(ashr_15_exittail) + +#if defined USE_AS_STRNCMP || defined USE_AS_STRNCASECMP_L + cmp $1, REM + jbe L(ashr_15_exittail) +#endif + pxor %xmm0, %xmm0 + sub $0x1000, %edi + jmp L(gobble_ashr_15) + + .p2align 4 +L(ashr_15_exittail): + movdqa (%eax, %ecx), %xmm1 + psrldq $15, %xmm0 + psrldq $15, %xmm3 + jmp L(aftertail) + + .p2align 4 +L(aftertail): + TOLOWER (%xmm1, %xmm3) + pcmpeqb %xmm3, %xmm1 + psubb %xmm0, %xmm1 + pmovmskb %xmm1, %esi + not %esi +L(exit): + mov FLAGS, %edi + and $0x1f, %edi + lea -16(%edi, %ecx), %edi +L(less32bytes): + add %edi, %edx + add %ecx, %eax + testl $0x20, FLAGS + jz L(ret2) + xchg %eax, %edx + + .p2align 4 +L(ret2): + mov %esi, %ecx +#if defined USE_AS_STRCASECMP_L || defined USE_AS_STRNCASECMP_L + addl $4, %esp + cfi_adjust_cfa_offset (-4) +#endif + POP (%esi) + POP (%edi) +#if !defined USE_AS_STRCASECMP_L && !defined USE_AS_STRNCASECMP_L + POP (FLAGS) +#endif +L(less16bytes): + test %cl, %cl + jz L(2next_8_bytes) + + test $0x01, %cl + jnz L(Byte0) + + test $0x02, %cl + jnz L(Byte1) + + test $0x04, %cl + jnz L(Byte2) + + test $0x08, %cl + jnz L(Byte3) + + test $0x10, %cl + jnz L(Byte4) + + test $0x20, %cl + jnz L(Byte5) + + test $0x40, %cl + jnz L(Byte6) +#if defined USE_AS_STRNCMP || defined USE_AS_STRNCASECMP_L + cmp $7, REM + jbe L(eq) +#endif + + movzx 7(%eax), %ecx + movzx 7(%edx), %eax +#if defined USE_AS_STRCASECMP_L || defined USE_AS_STRNCASECMP_L +# ifdef PIC + movl _nl_C_LC_CTYPE_tolower@GOTOFF+128*4(%ebx,%ecx,4), %ecx + movl _nl_C_LC_CTYPE_tolower@GOTOFF+128*4(%ebx,%eax,4), %eax +# else + movl _nl_C_LC_CTYPE_tolower+128*4(,%ecx,4), %ecx + movl _nl_C_LC_CTYPE_tolower+128*4(,%eax,4), %eax +# endif +#endif + + sub %ecx, %eax + RETURN + +L(Byte0): +#if defined USE_AS_STRNCMP || defined USE_AS_STRNCASECMP_L + cmp $0, REM + jbe L(eq) +#endif + movzx (%eax), %ecx + movzx (%edx), %eax + +#if defined USE_AS_STRCASECMP_L || defined USE_AS_STRNCASECMP_L +# ifdef PIC + movl _nl_C_LC_CTYPE_tolower@GOTOFF+128*4(%ebx,%ecx,4), %ecx + movl _nl_C_LC_CTYPE_tolower@GOTOFF+128*4(%ebx,%eax,4), %eax +# else + movl _nl_C_LC_CTYPE_tolower+128*4(,%ecx,4), %ecx + movl _nl_C_LC_CTYPE_tolower+128*4(,%eax,4), %eax +# endif +#endif + + sub %ecx, %eax + RETURN + +L(Byte1): +#if defined USE_AS_STRNCMP || defined USE_AS_STRNCASECMP_L + cmp $1, REM + jbe L(eq) +#endif + movzx 1(%eax), %ecx + movzx 1(%edx), %eax + +#if defined USE_AS_STRCASECMP_L || defined USE_AS_STRNCASECMP_L +# ifdef PIC + movl _nl_C_LC_CTYPE_tolower@GOTOFF+128*4(%ebx,%ecx,4), %ecx + movl _nl_C_LC_CTYPE_tolower@GOTOFF+128*4(%ebx,%eax,4), %eax +# else + movl _nl_C_LC_CTYPE_tolower+128*4(,%ecx,4), %ecx + movl _nl_C_LC_CTYPE_tolower+128*4(,%eax,4), %eax +# endif +#endif + + sub %ecx, %eax + RETURN + +L(Byte2): +#if defined USE_AS_STRNCMP || defined USE_AS_STRNCASECMP_L + cmp $2, REM + jbe L(eq) +#endif + movzx 2(%eax), %ecx + movzx 2(%edx), %eax + +#if defined USE_AS_STRCASECMP_L || defined USE_AS_STRNCASECMP_L +# ifdef PIC + movl _nl_C_LC_CTYPE_tolower@GOTOFF+128*4(%ebx,%ecx,4), %ecx + movl _nl_C_LC_CTYPE_tolower@GOTOFF+128*4(%ebx,%eax,4), %eax +# else + movl _nl_C_LC_CTYPE_tolower+128*4(,%ecx,4), %ecx + movl _nl_C_LC_CTYPE_tolower+128*4(,%eax,4), %eax +# endif +#endif + + sub %ecx, %eax + RETURN + +L(Byte3): +#if defined USE_AS_STRNCMP || defined USE_AS_STRNCASECMP_L + cmp $3, REM + jbe L(eq) +#endif + movzx 3(%eax), %ecx + movzx 3(%edx), %eax + +#if defined USE_AS_STRCASECMP_L || defined USE_AS_STRNCASECMP_L +# ifdef PIC + movl _nl_C_LC_CTYPE_tolower@GOTOFF+128*4(%ebx,%ecx,4), %ecx + movl _nl_C_LC_CTYPE_tolower@GOTOFF+128*4(%ebx,%eax,4), %eax +# else + movl _nl_C_LC_CTYPE_tolower+128*4(,%ecx,4), %ecx + movl _nl_C_LC_CTYPE_tolower+128*4(,%eax,4), %eax +# endif +#endif + + sub %ecx, %eax + RETURN + +L(Byte4): +#if defined USE_AS_STRNCMP || defined USE_AS_STRNCASECMP_L + cmp $4, REM + jbe L(eq) +#endif + movzx 4(%eax), %ecx + movzx 4(%edx), %eax + +#if defined USE_AS_STRCASECMP_L || defined USE_AS_STRNCASECMP_L +# ifdef PIC + movl _nl_C_LC_CTYPE_tolower@GOTOFF+128*4(%ebx,%ecx,4), %ecx + movl _nl_C_LC_CTYPE_tolower@GOTOFF+128*4(%ebx,%eax,4), %eax +# else + movl _nl_C_LC_CTYPE_tolower+128*4(,%ecx,4), %ecx + movl _nl_C_LC_CTYPE_tolower+128*4(,%eax,4), %eax +# endif +#endif + + sub %ecx, %eax + RETURN + +L(Byte5): +#if defined USE_AS_STRNCMP || defined USE_AS_STRNCASECMP_L + cmp $5, REM + jbe L(eq) +#endif + movzx 5(%eax), %ecx + movzx 5(%edx), %eax + +#if defined USE_AS_STRCASECMP_L || defined USE_AS_STRNCASECMP_L +# ifdef PIC + movl _nl_C_LC_CTYPE_tolower@GOTOFF+128*4(%ebx,%ecx,4), %ecx + movl _nl_C_LC_CTYPE_tolower@GOTOFF+128*4(%ebx,%eax,4), %eax +# else + movl _nl_C_LC_CTYPE_tolower+128*4(,%ecx,4), %ecx + movl _nl_C_LC_CTYPE_tolower+128*4(,%eax,4), %eax +# endif +#endif + + sub %ecx, %eax + RETURN + +L(Byte6): +#if defined USE_AS_STRNCMP || defined USE_AS_STRNCASECMP_L + cmp $6, REM + jbe L(eq) +#endif + movzx 6(%eax), %ecx + movzx 6(%edx), %eax + +#if defined USE_AS_STRCASECMP_L || defined USE_AS_STRNCASECMP_L +# ifdef PIC + movl _nl_C_LC_CTYPE_tolower@GOTOFF+128*4(%ebx,%ecx,4), %ecx + movl _nl_C_LC_CTYPE_tolower@GOTOFF+128*4(%ebx,%eax,4), %eax +# else + movl _nl_C_LC_CTYPE_tolower+128*4(,%ecx,4), %ecx + movl _nl_C_LC_CTYPE_tolower+128*4(,%eax,4), %eax +# endif +#endif + + sub %ecx, %eax + RETURN + +L(2next_8_bytes): + add $8, %eax + add $8, %edx +#if defined USE_AS_STRNCMP || defined USE_AS_STRNCASECMP_L + cmp $8, REM + lea -8(REM), REM + jbe L(eq) +#endif + + test $0x01, %ch + jnz L(Byte0) + + test $0x02, %ch + jnz L(Byte1) + + test $0x04, %ch + jnz L(Byte2) + + test $0x08, %ch + jnz L(Byte3) + + test $0x10, %ch + jnz L(Byte4) + + test $0x20, %ch + jnz L(Byte5) + + test $0x40, %ch + jnz L(Byte6) + +#if defined USE_AS_STRNCMP || defined USE_AS_STRNCASECMP_L + cmp $7, REM + jbe L(eq) +#endif + movzx 7(%eax), %ecx + movzx 7(%edx), %eax + +#if defined USE_AS_STRCASECMP_L || defined USE_AS_STRNCASECMP_L +# ifdef PIC + movl _nl_C_LC_CTYPE_tolower@GOTOFF+128*4(%ebx,%ecx,4), %ecx + movl _nl_C_LC_CTYPE_tolower@GOTOFF+128*4(%ebx,%eax,4), %eax +# else + movl _nl_C_LC_CTYPE_tolower+128*4(,%ecx,4), %ecx + movl _nl_C_LC_CTYPE_tolower+128*4(,%eax,4), %eax +# endif +#endif + + sub %ecx, %eax + RETURN + +#ifdef USE_AS_STRNCMP +L(neq_sncmp): +#endif +L(neq): + mov $1, %eax + ja L(neq_bigger) + neg %eax +L(neq_bigger): +#if defined USE_AS_STRCASECMP_L || defined USE_AS_STRNCASECMP_L + addl $4, %esp + cfi_adjust_cfa_offset (-4) +#endif +#if defined USE_AS_STRNCMP || defined USE_AS_STRNCASECMP_L + POP (REM) +#endif +#if defined USE_AS_STRCASECMP_L || defined USE_AS_STRNCASECMP_L +# ifdef PIC + POP (%ebx) +# endif +#endif + ret + +#if defined USE_AS_STRNCMP || defined USE_AS_STRNCASECMP_L + .p2align 4 + cfi_restore_state +L(more8byteseq): + +# ifdef USE_AS_STRNCASECMP_L + addl $4, %esp + cfi_adjust_cfa_offset (-4) +# endif + POP (%esi) + POP (%edi) +# ifdef USE_AS_STRNCMP + POP (FLAGS) +# endif +#endif + +#ifdef USE_AS_STRNCMP +L(eq_sncmp): +#endif +L(eq): + +#if defined USE_AS_STRNCMP || defined USE_AS_STRNCASECMP_L + POP (REM) +#endif +#if defined USE_AS_STRCASECMP_L || defined USE_AS_STRNCASECMP_L +# ifdef PIC + POP (%ebx) +# endif +#endif + xorl %eax, %eax + ret + +#if defined USE_AS_STRNCMP || defined USE_AS_STRNCASECMP_L + .p2align 4 +# if defined USE_AS_STRNCASECMP_L && defined PIC + CFI_PUSH (%ebx) +# endif + CFI_PUSH (REM) +L(less16bytes_sncmp): +# ifdef USE_AS_STRNCASECMP_L + PUSH (%esi) +# endif + test REM, REM + jz L(eq_sncmp) + + movzbl (%eax), %ecx +# ifdef USE_AS_STRNCASECMP_L + movzbl (%edx), %esi +# ifdef PIC + movl _nl_C_LC_CTYPE_tolower@GOTOFF+128*4(%ebx,%ecx,4), %ecx + movl _nl_C_LC_CTYPE_tolower@GOTOFF+128*4(%ebx,%esi,4), %esi +# else + movl _nl_C_LC_CTYPE_tolower+128*4(,%ecx,4), %ecx + movl _nl_C_LC_CTYPE_tolower+128*4(,%esi,4), %esi +# endif + cmpl %ecx, %esi +# else + cmpb %cl, (%edx) +# endif + jne L(neq_sncmp) + test %cl, %cl + je L(eq_sncmp) + + cmp $1, REM + je L(eq_sncmp) + + movzbl 1(%eax), %ecx +# ifdef USE_AS_STRNCASECMP_L + movzbl 1(%edx), %esi +# ifdef PIC + movl _nl_C_LC_CTYPE_tolower@GOTOFF+128*4(%ebx,%ecx,4), %ecx + movl _nl_C_LC_CTYPE_tolower@GOTOFF+128*4(%ebx,%esi,4), %esi +# else + movl _nl_C_LC_CTYPE_tolower+128*4(,%ecx,4), %ecx + movl _nl_C_LC_CTYPE_tolower+128*4(,%esi,4), %esi +# endif + cmpl %ecx, %esi +# else + cmpb %cl, 1(%edx) +# endif + jne L(neq_sncmp) + test %cl, %cl + je L(eq_sncmp) + + cmp $2, REM + je L(eq_sncmp) + + movzbl 2(%eax), %ecx +# ifdef USE_AS_STRNCASECMP_L + movzbl 2(%edx), %esi +# ifdef PIC + movl _nl_C_LC_CTYPE_tolower@GOTOFF+128*4(%ebx,%ecx,4), %ecx + movl _nl_C_LC_CTYPE_tolower@GOTOFF+128*4(%ebx,%esi,4), %esi +# else + movl _nl_C_LC_CTYPE_tolower+128*4(,%ecx,4), %ecx + movl _nl_C_LC_CTYPE_tolower+128*4(,%esi,4), %esi +# endif + cmpl %ecx, %esi +# else + cmpb %cl, 2(%edx) +# endif + jne L(neq_sncmp) + test %cl, %cl + je L(eq_sncmp) + + cmp $3, REM + je L(eq_sncmp) + + movzbl 3(%eax), %ecx +# ifdef USE_AS_STRNCASECMP_L + movzbl 3(%edx), %esi +# ifdef PIC + movl _nl_C_LC_CTYPE_tolower@GOTOFF+128*4(%ebx,%ecx,4), %ecx + movl _nl_C_LC_CTYPE_tolower@GOTOFF+128*4(%ebx,%esi,4), %esi +# else + movl _nl_C_LC_CTYPE_tolower+128*4(,%ecx,4), %ecx + movl _nl_C_LC_CTYPE_tolower+128*4(,%esi,4), %esi +# endif + cmpl %ecx, %esi +# else + cmpb %cl, 3(%edx) +# endif + jne L(neq_sncmp) + test %cl, %cl + je L(eq_sncmp) + + cmp $4, REM + je L(eq_sncmp) + + movzbl 4(%eax), %ecx +# ifdef USE_AS_STRNCASECMP_L + movzbl 4(%edx), %esi +# ifdef PIC + movl _nl_C_LC_CTYPE_tolower@GOTOFF+128*4(%ebx,%ecx,4), %ecx + movl _nl_C_LC_CTYPE_tolower@GOTOFF+128*4(%ebx,%esi,4), %esi +# else + movl _nl_C_LC_CTYPE_tolower+128*4(,%ecx,4), %ecx + movl _nl_C_LC_CTYPE_tolower+128*4(,%esi,4), %esi +# endif + cmpl %ecx, %esi +# else + cmpb %cl, 4(%edx) +# endif + jne L(neq_sncmp) + test %cl, %cl + je L(eq_sncmp) + + cmp $5, REM + je L(eq_sncmp) + + movzbl 5(%eax), %ecx +# ifdef USE_AS_STRNCASECMP_L + movzbl 5(%edx), %esi +# ifdef PIC + movl _nl_C_LC_CTYPE_tolower@GOTOFF+128*4(%ebx,%ecx,4), %ecx + movl _nl_C_LC_CTYPE_tolower@GOTOFF+128*4(%ebx,%esi,4), %esi +# else + movl _nl_C_LC_CTYPE_tolower+128*4(,%ecx,4), %ecx + movl _nl_C_LC_CTYPE_tolower+128*4(,%esi,4), %esi +# endif + cmpl %ecx, %esi +# else + cmpb %cl, 5(%edx) +# endif + jne L(neq_sncmp) + test %cl, %cl + je L(eq_sncmp) + + cmp $6, REM + je L(eq_sncmp) + + movzbl 6(%eax), %ecx +# ifdef USE_AS_STRNCASECMP_L + movzbl 6(%edx), %esi +# ifdef PIC + movl _nl_C_LC_CTYPE_tolower@GOTOFF+128*4(%ebx,%ecx,4), %ecx + movl _nl_C_LC_CTYPE_tolower@GOTOFF+128*4(%ebx,%esi,4), %esi +# else + movl _nl_C_LC_CTYPE_tolower+128*4(,%ecx,4), %ecx + movl _nl_C_LC_CTYPE_tolower+128*4(,%esi,4), %esi +# endif + cmpl %ecx, %esi +# else + cmpb %cl, 6(%edx) +# endif + jne L(neq_sncmp) + test %cl, %cl + je L(eq_sncmp) + + cmp $7, REM + je L(eq_sncmp) + + movzbl 7(%eax), %ecx +# ifdef USE_AS_STRNCASECMP_L + movzbl 7(%edx), %esi +# ifdef PIC + movl _nl_C_LC_CTYPE_tolower@GOTOFF+128*4(%ebx,%ecx,4), %ecx + movl _nl_C_LC_CTYPE_tolower@GOTOFF+128*4(%ebx,%esi,4), %esi +# else + movl _nl_C_LC_CTYPE_tolower+128*4(,%ecx,4), %ecx + movl _nl_C_LC_CTYPE_tolower+128*4(,%esi,4), %esi +# endif + cmpl %ecx, %esi +# else + cmpb %cl, 7(%edx) +# endif + jne L(neq_sncmp) + test %cl, %cl + je L(eq_sncmp) + + + cmp $8, REM + je L(eq_sncmp) + + movzbl 8(%eax), %ecx +# ifdef USE_AS_STRNCASECMP_L + movzbl 8(%edx), %esi +# ifdef PIC + movl _nl_C_LC_CTYPE_tolower@GOTOFF+128*4(%ebx,%ecx,4), %ecx + movl _nl_C_LC_CTYPE_tolower@GOTOFF+128*4(%ebx,%esi,4), %esi +# else + movl _nl_C_LC_CTYPE_tolower+128*4(,%ecx,4), %ecx + movl _nl_C_LC_CTYPE_tolower+128*4(,%esi,4), %esi +# endif + cmpl %ecx, %esi +# else + cmpb %cl, 8(%edx) +# endif + jne L(neq_sncmp) + test %cl, %cl + je L(eq_sncmp) + + cmp $9, REM + je L(eq_sncmp) + + movzbl 9(%eax), %ecx +# ifdef USE_AS_STRNCASECMP_L + movzbl 9(%edx), %esi +# ifdef PIC + movl _nl_C_LC_CTYPE_tolower@GOTOFF+128*4(%ebx,%ecx,4), %ecx + movl _nl_C_LC_CTYPE_tolower@GOTOFF+128*4(%ebx,%esi,4), %esi +# else + movl _nl_C_LC_CTYPE_tolower+128*4(,%ecx,4), %ecx + movl _nl_C_LC_CTYPE_tolower+128*4(,%esi,4), %esi +# endif + cmpl %ecx, %esi +# else + cmpb %cl, 9(%edx) +# endif + jne L(neq_sncmp) + test %cl, %cl + je L(eq_sncmp) + + cmp $10, REM + je L(eq_sncmp) + + movzbl 10(%eax), %ecx +# ifdef USE_AS_STRNCASECMP_L + movzbl 10(%edx), %esi +# ifdef PIC + movl _nl_C_LC_CTYPE_tolower@GOTOFF+128*4(%ebx,%ecx,4), %ecx + movl _nl_C_LC_CTYPE_tolower@GOTOFF+128*4(%ebx,%esi,4), %esi +# else + movl _nl_C_LC_CTYPE_tolower+128*4(,%ecx,4), %ecx + movl _nl_C_LC_CTYPE_tolower+128*4(,%esi,4), %esi +# endif + cmpl %ecx, %esi +# else + cmpb %cl, 10(%edx) +# endif + jne L(neq_sncmp) + test %cl, %cl + je L(eq_sncmp) + + cmp $11, REM + je L(eq_sncmp) + + movzbl 11(%eax), %ecx +# ifdef USE_AS_STRNCASECMP_L + movzbl 11(%edx), %esi +# ifdef PIC + movl _nl_C_LC_CTYPE_tolower@GOTOFF+128*4(%ebx,%ecx,4), %ecx + movl _nl_C_LC_CTYPE_tolower@GOTOFF+128*4(%ebx,%esi,4), %esi +# else + movl _nl_C_LC_CTYPE_tolower+128*4(,%ecx,4), %ecx + movl _nl_C_LC_CTYPE_tolower+128*4(,%esi,4), %esi +# endif + cmpl %ecx, %esi +# else + cmpb %cl, 11(%edx) +# endif + jne L(neq_sncmp) + test %cl, %cl + je L(eq_sncmp) + + + cmp $12, REM + je L(eq_sncmp) + + movzbl 12(%eax), %ecx +# ifdef USE_AS_STRNCASECMP_L + movzbl 12(%edx), %esi +# ifdef PIC + movl _nl_C_LC_CTYPE_tolower@GOTOFF+128*4(%ebx,%ecx,4), %ecx + movl _nl_C_LC_CTYPE_tolower@GOTOFF+128*4(%ebx,%esi,4), %esi +# else + movl _nl_C_LC_CTYPE_tolower+128*4(,%ecx,4), %ecx + movl _nl_C_LC_CTYPE_tolower+128*4(,%esi,4), %esi +# endif + cmpl %ecx, %esi +# else + cmpb %cl, 12(%edx) +# endif + jne L(neq_sncmp) + test %cl, %cl + je L(eq_sncmp) + + cmp $13, REM + je L(eq_sncmp) + + movzbl 13(%eax), %ecx +# ifdef USE_AS_STRNCASECMP_L + movzbl 13(%edx), %esi +# ifdef PIC + movl _nl_C_LC_CTYPE_tolower@GOTOFF+128*4(%ebx,%ecx,4), %ecx + movl _nl_C_LC_CTYPE_tolower@GOTOFF+128*4(%ebx,%esi,4), %esi +# else + movl _nl_C_LC_CTYPE_tolower+128*4(,%ecx,4), %ecx + movl _nl_C_LC_CTYPE_tolower+128*4(,%esi,4), %esi +# endif + cmpl %ecx, %esi +# else + cmpb %cl, 13(%edx) +# endif + jne L(neq_sncmp) + test %cl, %cl + je L(eq_sncmp) + + cmp $14, REM + je L(eq_sncmp) + + movzbl 14(%eax), %ecx +# ifdef USE_AS_STRNCASECMP_L + movzbl 14(%edx), %esi +# ifdef PIC + movl _nl_C_LC_CTYPE_tolower@GOTOFF+128*4(%ebx,%ecx,4), %ecx + movl _nl_C_LC_CTYPE_tolower@GOTOFF+128*4(%ebx,%esi,4), %esi +# else + movl _nl_C_LC_CTYPE_tolower+128*4(,%ecx,4), %ecx + movl _nl_C_LC_CTYPE_tolower+128*4(,%esi,4), %esi +# endif + cmpl %ecx, %esi +# else + cmpb %cl, 14(%edx) +# endif + jne L(neq_sncmp) + test %cl, %cl + je L(eq_sncmp) + + cmp $15, REM + je L(eq_sncmp) + + movzbl 15(%eax), %ecx +# ifdef USE_AS_STRNCASECMP_L + movzbl 15(%edx), %esi +# ifdef PIC + movl _nl_C_LC_CTYPE_tolower@GOTOFF+128*4(%ebx,%ecx,4), %ecx + movl _nl_C_LC_CTYPE_tolower@GOTOFF+128*4(%ebx,%esi,4), %esi +# else + movl _nl_C_LC_CTYPE_tolower+128*4(,%ecx,4), %ecx + movl _nl_C_LC_CTYPE_tolower+128*4(,%esi,4), %esi +# endif + cmpl %ecx, %esi +# else + cmpb %cl, 15(%edx) +# endif + jne L(neq_sncmp) + +# ifdef USE_AS_STRNCASECMP_L +L(eq_sncmp): + POP (%esi) +# endif + POP (REM) +# if defined USE_AS_STRNCASECMP_L && defined PIC + POP (%ebx) +# endif + xor %eax, %eax + ret + +# ifdef USE_AS_STRNCASECMP_L + .p2align 4 +# ifdef PIC + CFI_PUSH (%ebx) +# endif + CFI_PUSH (REM) + CFI_PUSH (%esi) +L(neq_sncmp): + mov $1, %eax + mov $-1, %edx + cmovna %edx, %eax + POP (%esi) + POP (REM) +# ifdef PIC + POP (%ebx) +# endif + ret +# endif +#endif + +END (STRCMP) + +#endif diff --git a/sysdeps/i386/multiarch/strcmp.c b/sysdeps/i386/multiarch/strcmp.c new file mode 100644 index 0000000..58049e8 --- /dev/null +++ b/sysdeps/i386/multiarch/strcmp.c @@ -0,0 +1,58 @@ +/* Multiple versions of strcmp. + All versions must be listed in ifunc-impl-list.c. + Copyright (C) 2015 Free Software Foundation, Inc. + This file is part of the GNU C Library. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library; if not, see + . */ + +/* Define multiple versions only for the definition in libc. Don't + define multiple versions for strcmp in static library since we + need strcmp before the initialization happened. */ +#if defined SHARED && IS_IN (libc) +# define _HAVE_STRING_ARCH_strcmp +/* Redefine strcmp so that the compiler won't complain about the type + mismatch with the IFUNC selector in strong_alias, below. */ +# undef strcmp +# define strcmp __redirect_strcmp +# include +# undef strcmp + +# include + +extern __typeof (__redirect_strcmp) __strcmp_i386 attribute_hidden; +extern __typeof (__redirect_strcmp) __strcmp_i686 attribute_hidden; +extern __typeof (__redirect_strcmp) __strcmp_ssse3 attribute_hidden; +extern __typeof (__redirect_strcmp) __strcmp_sse4_2 attribute_hidden; + +/* Avoid DWARF definition DIE on ifunc symbol so that GDB can handle + ifunc symbol properly. */ +extern __typeof (__redirect_strcmp) strcmp; +extern void *strcmp_ifunc (void) __asm__ ("strcmp"); + +void * +strcmp_ifunc (void) +{ + if (HAS_CPU_FEATURE (SSE4_2)) + return __strcmp_sse4_2; + else if (HAS_CPU_FEATURE (SSSE3)) + return __strcmp_ssse3; + + if (HAS_I686) + return __strcmp_i686; + else + return __strcmp_i386; +} +__asm__ (".type strcmp, %gnu_indirect_function"); +#endif diff --git a/sysdeps/i386/multiarch/strncase-i386.c b/sysdeps/i386/multiarch/strncase-i386.c new file mode 100644 index 0000000..7053e55 --- /dev/null +++ b/sysdeps/i386/multiarch/strncase-i386.c @@ -0,0 +1,8 @@ +#include + +extern __typeof (strncasecmp) __strncasecmp_nonascii; + +#define __strncasecmp __strncasecmp_nonascii +#include + +strong_alias (__strncasecmp_nonascii, __strncasecmp_i386) diff --git a/sysdeps/i386/multiarch/strncase.c b/sysdeps/i386/multiarch/strncase.c new file mode 100644 index 0000000..9e5dcab --- /dev/null +++ b/sysdeps/i386/multiarch/strncase.c @@ -0,0 +1,53 @@ +/* Multiple versions of strncasecmp. + All versions must be listed in ifunc-impl-list.c. + Copyright (C) 2015 Free Software Foundation, Inc. + This file is part of the GNU C Library. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library; if not, see + . */ + +/* Define multiple versions only for the definition in lib. */ +#if IS_IN (libc) +/* Redefine strncasecmp so that the compiler won't complain about the type + mismatch with the IFUNC selector in strong_alias, below. */ +# undef strncasecmp +# define strncasecmp __redirect_strncasecmp +# include +# undef strncasecmp + +# include + +extern __typeof (__redirect_strncasecmp) __strncasecmp_i386 attribute_hidden; +extern __typeof (__redirect_strncasecmp) __strncasecmp_ssse3 attribute_hidden; +extern __typeof (__redirect_strncasecmp) __strncasecmp_sse4_2 attribute_hidden; + +/* Avoid DWARF definition DIE on ifunc symbol so that GDB can handle + ifunc symbol properly. */ +extern __typeof (__redirect_strncasecmp) __strncasecmp; +extern void *strncasecmp_ifunc (void) __asm__ ("__strncasecmp"); + +void * +strncasecmp_ifunc (void) +{ + if (HAS_CPU_FEATURE (SSE4_2)) + return __strncasecmp_sse4_2; + else if (HAS_CPU_FEATURE (SSSE3)) + return __strncasecmp_ssse3; + + return __strncasecmp_i386; +} +__asm__ (".type __strncasecmp, %gnu_indirect_function"); + +weak_alias (__strncasecmp, strncasecmp) +#endif diff --git a/sysdeps/i386/multiarch/strncase_l-i386.c b/sysdeps/i386/multiarch/strncase_l-i386.c new file mode 100644 index 0000000..efee0bf --- /dev/null +++ b/sysdeps/i386/multiarch/strncase_l-i386.c @@ -0,0 +1,13 @@ +#include + +extern __typeof (strncasecmp_l) __strncasecmp_l_nonascii; + +#define __strncasecmp_l __strncasecmp_l_nonascii +#define USE_IN_EXTENDED_LOCALE_MODEL 1 +#include + +strong_alias (__strncasecmp_l_nonascii, __strncasecmp_l_i386) + +/* The needs of strcasecmp in libc are minimal, no need to go through + the IFUNC. */ +strong_alias (__strncasecmp_l_nonascii, __GI___strncasecmp_l) diff --git a/sysdeps/i386/multiarch/strncase_l-sse4.S b/sysdeps/i386/multiarch/strncase_l-sse4.S new file mode 100644 index 0000000..5572108 --- /dev/null +++ b/sysdeps/i386/multiarch/strncase_l-sse4.S @@ -0,0 +1,2 @@ +#define USE_AS_STRNCASECMP_L 1 +#include "strcmp-sse4.S" diff --git a/sysdeps/i386/multiarch/strncase_l-ssse3.S b/sysdeps/i386/multiarch/strncase_l-ssse3.S new file mode 100644 index 0000000..d438a1a --- /dev/null +++ b/sysdeps/i386/multiarch/strncase_l-ssse3.S @@ -0,0 +1,2 @@ +#define USE_AS_STRNCASECMP_L 1 +#include "strcmp-ssse3.S" diff --git a/sysdeps/i386/multiarch/strncase_l.c b/sysdeps/i386/multiarch/strncase_l.c new file mode 100644 index 0000000..279b3ce --- /dev/null +++ b/sysdeps/i386/multiarch/strncase_l.c @@ -0,0 +1,53 @@ +/* Multiple versions of strncasecmp_l. + All versions must be listed in ifunc-impl-list.c. + Copyright (C) 2015 Free Software Foundation, Inc. + This file is part of the GNU C Library. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library; if not, see + . */ + +/* Define multiple versions only for the definition in lib. */ +#if IS_IN (libc) +/* Redefine strncasecmp_l so that the compiler won't complain about the type + mismatch with the IFUNC selector in strong_alias, below. */ +# undef strncasecmp_l +# define strncasecmp_l __redirect_strncasecmp_l +# include +# undef strncasecmp_l + +# include + +extern __typeof (__redirect_strncasecmp_l) __strncasecmp_l_i386 attribute_hidden; +extern __typeof (__redirect_strncasecmp_l) __strncasecmp_l_ssse3 attribute_hidden; +extern __typeof (__redirect_strncasecmp_l) __strncasecmp_l_sse4_2 attribute_hidden; + +/* Avoid DWARF definition DIE on ifunc symbol so that GDB can handle + ifunc symbol properly. */ +extern __typeof (__redirect_strncasecmp_l) __strncasecmp_l; +extern void *strncasecmp_l_ifunc (void) __asm__ ("__strncasecmp_l"); + +void * +strncasecmp_l_ifunc (void) +{ + if (HAS_CPU_FEATURE (SSE4_2)) + return __strncasecmp_l_sse4_2; + else if (HAS_CPU_FEATURE (SSSE3)) + return __strncasecmp_l_ssse3; + + return __strncasecmp_l_i386; +} +__asm__ (".type __strncasecmp_l, %gnu_indirect_function"); + +weak_alias (__strncasecmp_l, strncasecmp_l) +#endif diff --git a/sysdeps/i386/multiarch/strncmp-i386.c b/sysdeps/i386/multiarch/strncmp-i386.c new file mode 100644 index 0000000..8e41388 --- /dev/null +++ b/sysdeps/i386/multiarch/strncmp-i386.c @@ -0,0 +1,8 @@ +#ifdef SHARED +# define STRNCMP __strncmp_i386 +# undef libc_hidden_builtin_def +# define libc_hidden_builtin_def(name) \ + __hidden_ver1 (__strncmp_i386, __GI_strncmp, __strncmp_i386); +#endif + +#include "string/strncmp.c" diff --git a/sysdeps/i386/multiarch/strncmp-sse4.S b/sysdeps/i386/multiarch/strncmp-sse4.S new file mode 100644 index 0000000..cf14dfa --- /dev/null +++ b/sysdeps/i386/multiarch/strncmp-sse4.S @@ -0,0 +1,5 @@ +#ifdef SHARED +# define USE_AS_STRNCMP +# define STRCMP __strncmp_sse4_2 +# include "strcmp-sse4.S" +#endif diff --git a/sysdeps/i386/multiarch/strncmp-ssse3.S b/sysdeps/i386/multiarch/strncmp-ssse3.S new file mode 100644 index 0000000..536c868 --- /dev/null +++ b/sysdeps/i386/multiarch/strncmp-ssse3.S @@ -0,0 +1,5 @@ +#ifdef SHARED +# define USE_AS_STRNCMP +# define STRCMP __strncmp_ssse3 +# include "strcmp-ssse3.S" +#endif diff --git a/sysdeps/i386/multiarch/strncmp.c b/sysdeps/i386/multiarch/strncmp.c new file mode 100644 index 0000000..180f870 --- /dev/null +++ b/sysdeps/i386/multiarch/strncmp.c @@ -0,0 +1,54 @@ +/* Multiple versions of strncmp. + All versions must be listed in ifunc-impl-list.c. + Copyright (C) 2015 Free Software Foundation, Inc. + This file is part of the GNU C Library. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library; if not, see + . */ + +/* Define multiple versions only for the definition in libc. Don't + define multiple versions for strncmp in static library since we + need strncmp before the initialization happened. */ +#if defined SHARED && IS_IN (libc) +# define _HAVE_STRING_ARCH_strncmp +/* Redefine strncmp so that the compiler won't complain about the type + mismatch with the IFUNC selector in strong_alias, below. */ +# undef strncmp +# define strncmp __redirect_strncmp +# include +# undef strncmp + +# include + +extern __typeof (__redirect_strncmp) __strncmp_i386 attribute_hidden; +extern __typeof (__redirect_strncmp) __strncmp_ssse3 attribute_hidden; +extern __typeof (__redirect_strncmp) __strncmp_sse4_2 attribute_hidden; + +/* Avoid DWARF definition DIE on ifunc symbol so that GDB can handle + ifunc symbol properly. */ +extern __typeof (__redirect_strncmp) strncmp; +extern void *strncmp_ifunc (void) __asm__ ("strncmp"); + +void * +strncmp_ifunc (void) +{ + if (HAS_CPU_FEATURE (SSE4_2)) + return __strncmp_sse4_2; + else if (HAS_CPU_FEATURE (SSSE3)) + return __strncmp_ssse3; + + return __strncmp_i386; +} +__asm__ (".type strncmp, %gnu_indirect_function"); +#endif -- cgit v1.1