diff options
127 files changed, 4483 insertions, 723 deletions
diff --git a/README.google b/README.google index 00a22f9..e615a49 100644 --- a/README.google +++ b/README.google @@ -553,3 +553,12 @@ nptl/tst-basic8.c nptl/Makefile Unbreak building of tst-key5 (stanshebs, google-local) + +elf/elf.h +math/libm-test.inc +string/* +sysdeps/ieee754/* +sysdeps/powerpc/* + For b/18010034, backport IBM optimizations for Power8. + https://sourceware.org/git/?p=glibc.git;a=shortlog;h=refs/heads/ibm/2.19/master + (stanshebs, backport) @@ -2283,7 +2283,7 @@ typedef Elf32_Addr Elf32_Conflict; #define DT_PPC64_OPD (DT_LOPROC + 1) #define DT_PPC64_OPDSZ (DT_LOPROC + 2) #define DT_PPC64_OPT (DT_LOPROC + 3) -#define DT_PPC64_NUM 3 +#define DT_PPC64_NUM 4 /* PowerPC64 specific values for the DT_PPC64_OPT Dyn entry. */ #define PPC64_OPT_TLS 1 diff --git a/math/libm-test.inc b/math/libm-test.inc index 027dfb9..28d3319 100644 --- a/math/libm-test.inc +++ b/math/libm-test.inc @@ -6000,6 +6000,15 @@ static const struct test_f_f_data ceil_test_data[] = TEST_f_f (ceil, -72057594037927936.75L, -72057594037927936.0L), TEST_f_f (ceil, -72057594037927937.5L, -72057594037927937.0L), + /* Check cases where first double is a exact integer higher than 2^52 and + the precision is determined by second long double for IBM long double. */ + TEST_f_f (ceil, 34503599627370498.515625L, 34503599627370499.0L), + TEST_f_f (ceil, -34503599627370498.515625L, -34503599627370498.0L), +# if LDBL_MANT_DIG >= 106 + TEST_f_f (ceil, 1192568192774434123539907640624.484375L, 1192568192774434123539907640625.0L), + TEST_f_f (ceil, -1192568192774434123539907640624.484375L, -1192568192774434123539907640624.0L), +# endif + TEST_f_f (ceil, 10141204801825835211973625643007.5L, 10141204801825835211973625643008.0L), TEST_f_f (ceil, 10141204801825835211973625643008.25L, 10141204801825835211973625643009.0L), TEST_f_f (ceil, 10141204801825835211973625643008.5L, 10141204801825835211973625643009.0L), @@ -8714,6 +8723,15 @@ static const struct test_f_f1_data frexp_test_data[] = TEST_fI_f1 (frexp, 12.8L, 0.8L, 4, NO_INEXACT_EXCEPTION), TEST_fI_f1 (frexp, -27.34L, -0.854375L, 5, NO_INEXACT_EXCEPTION), + +#if defined TEST_LDOUBLE && LDBL_MANT_DIG >= 106 + TEST_fI_f1 (frexp, 1.0L-0x1p-106L, 1.0L-0x1p-106L, 0, NO_INEXACT_EXCEPTION), + TEST_fI_f1 (frexp, 1.0L, 0.5L, 1, NO_INEXACT_EXCEPTION), + TEST_fI_f1 (frexp, 1.0L+0x1p-105L, 0.5L+0x1p-106L, 1, NO_INEXACT_EXCEPTION), + TEST_fI_f1 (frexp, -1.0L+0x1p-106L, -1.0L+0x1p-106L, 0, NO_INEXACT_EXCEPTION), + TEST_fI_f1 (frexp, -1.0L, -0.5L, 1, NO_INEXACT_EXCEPTION), + TEST_fI_f1 (frexp, -1.0L-0x1p-105L, -0.5L-0x1p-106L, 1, NO_INEXACT_EXCEPTION), +#endif }; static void @@ -10495,6 +10513,20 @@ static const struct test_f_f_data nearbyint_test_data[] = TEST_f_f (nearbyint, -562949953421312.75, -562949953421313.0, NO_INEXACT_EXCEPTION), TEST_f_f (nearbyint, -1125899906842624.75, -1125899906842625.0, NO_INEXACT_EXCEPTION), #endif +#ifdef TEST_LDOUBLE + /* Check cases where first double is a exact integer higher than 2^52 and + the precision is determined by second long double for IBM long double. */ + TEST_f_f (nearbyint, 34503599627370498.515625L, 34503599627370499.0L), + TEST_f_f (nearbyint, -34503599627370498.515625L, -34503599627370499.0L), +# if LDBL_MANT_DIG >= 106 + TEST_f_f (nearbyint, 1024.5000000000001L, 1025.0L, NO_INEXACT_EXCEPTION), + TEST_f_f (nearbyint, 1025.5000000000001L, 1026.0L, NO_INEXACT_EXCEPTION), + TEST_f_f (nearbyint, -1024.5000000000001L, -1025.0L, NO_INEXACT_EXCEPTION), + TEST_f_f (nearbyint, -1025.5000000000001L, -1026.0L, NO_INEXACT_EXCEPTION), + TEST_f_f (nearbyint, 1192568192774434123539907640624.484375L, 1192568192774434123539907640624.0L), + TEST_f_f (nearbyint, -1192568192774434123539907640624.484375L, -1192568192774434123539907640624.0L), +# endif +#endif }; static void @@ -10528,6 +10560,14 @@ static const struct test_ff_f_data nextafter_test_data[] = // XXX Enable once gcc is fixed. //TEST_ff_f (nextafter, 0x0.00000040000000000000p-16385L, -0.1L, 0x0.0000003ffffffff00000p-16385L), #endif +#if defined TEST_LDOUBLE && LDBL_MANT_DIG == 106 + TEST_ff_f (nextafter, 1.0L, -10.0L, 1.0L-0x1p-106L, NO_EXCEPTION), + TEST_ff_f (nextafter, 1.0L, 10.0L, 1.0L+0x1p-105L, NO_EXCEPTION), + TEST_ff_f (nextafter, 1.0L-0x1p-106L, 10.0L, 1.0L, NO_EXCEPTION), + TEST_ff_f (nextafter, -1.0L, -10.0L, -1.0L-0x1p-105L, NO_EXCEPTION), + TEST_ff_f (nextafter, -1.0L, 10.0L, -1.0L+0x1p-106L, NO_EXCEPTION), + TEST_ff_f (nextafter, -1.0L+0x1p-106L, -10.0L, -1.0L, NO_EXCEPTION), +#endif /* XXX We need the hexadecimal FP number representation here for further tests. */ @@ -11299,6 +11339,10 @@ static const struct test_f_f_data rint_test_data[] = TEST_f_f (rint, 4503599627370497.5L, 4503599627370498.0L, INEXACT_EXCEPTION), # if LDBL_MANT_DIG > 100 + TEST_f_f (rint, 1024.5000000000001L, 1025.0L, INEXACT_EXCEPTION), + TEST_f_f (rint, 1025.5000000000001L, 1026.0L, INEXACT_EXCEPTION), + TEST_f_f (rint, -1024.5000000000001L, -1025.0L, INEXACT_EXCEPTION), + TEST_f_f (rint, -1025.5000000000001L, -1026.0L, INEXACT_EXCEPTION), TEST_f_f (rint, 4503599627370494.5000000000001L, 4503599627370495.0L, INEXACT_EXCEPTION), TEST_f_f (rint, 4503599627370495.5000000000001L, 4503599627370496.0L, INEXACT_EXCEPTION), TEST_f_f (rint, 4503599627370496.5000000000001L, 4503599627370497.0L, INEXACT_EXCEPTION), @@ -11784,6 +11828,15 @@ static const struct test_f_f_data round_test_data[] = TEST_f_f (round, -72057594037927936.75L, -72057594037927937.0L), TEST_f_f (round, -72057594037927937.5L, -72057594037927938.0L), + /* Check cases where first double is a exact integer higher than 2^52 and + the precision is determined by second long double for IBM long double. */ + TEST_f_f (round, 34503599627370498.515625L, 34503599627370499.0L), + TEST_f_f (round, -34503599627370498.515625L, -34503599627370499.0L), +# if LDBL_MANT_DIG >= 106 + TEST_f_f (round, 1192568192774434123539907640624.484375L, 1192568192774434123539907640624.0L), + TEST_f_f (round, -1192568192774434123539907640624.484375L, -1192568192774434123539907640624.0L), +# endif + TEST_f_f (round, 10141204801825835211973625643007.5L, 10141204801825835211973625643008.0L), TEST_f_f (round, 10141204801825835211973625643008.25L, 10141204801825835211973625643008.0L), TEST_f_f (round, 10141204801825835211973625643008.5L, 10141204801825835211973625643009.0L), diff --git a/string/bcopy.c b/string/bcopy.c index 7c1225c..f497b5d 100644 --- a/string/bcopy.c +++ b/string/bcopy.c @@ -25,4 +25,4 @@ #define a2 dest #define a2const -#include <memmove.c> +#include <string/memmove.c> diff --git a/string/strcspn.c b/string/strcspn.c index 7c39f79..4316205 100644 --- a/string/strcspn.c +++ b/string/strcspn.c @@ -15,27 +15,18 @@ License along with the GNU C Library; if not, see <http://www.gnu.org/licenses/>. */ -#if HAVE_CONFIG_H -# include <config.h> -#endif - -#if defined _LIBC || HAVE_STRING_H -# include <string.h> -#else -# include <strings.h> -# ifndef strchr -# define strchr index -# endif -#endif +#include <string.h> #undef strcspn +#ifndef STRCSPN +# define STRCSPN strcspn +#endif + /* Return the length of the maximum initial segment of S which contains no characters from REJECT. */ size_t -strcspn (s, reject) - const char *s; - const char *reject; +STRCSPN (const char *s, const char *reject) { size_t count = 0; diff --git a/string/strpbrk.c b/string/strpbrk.c index ce33b68..a694242 100644 --- a/string/strpbrk.c +++ b/string/strpbrk.c @@ -15,21 +15,17 @@ License along with the GNU C Library; if not, see <http://www.gnu.org/licenses/>. */ -#ifdef HAVE_CONFIG_H -# include <config.h> -#endif - -#if defined _LIBC || defined HAVE_CONFIG_H -# include <string.h> -#endif +#include <string.h> #undef strpbrk +#ifndef STRPBRK +#define STRPBRK strpbrk +#endif + /* Find the first occurrence in S of any character in ACCEPT. */ char * -strpbrk (s, accept) - const char *s; - const char *accept; +STRPBRK (const char *s, const char *accept) { while (*s != '\0') { diff --git a/string/strrchr.c b/string/strrchr.c index b5b4bc6..47ff08c 100644 --- a/string/strrchr.c +++ b/string/strrchr.c @@ -19,9 +19,13 @@ #undef strrchr +#ifndef STRRCHR +# define STRRCHR strrchr +#endif + /* Find the last occurrence of C in S. */ char * -strrchr (const char *s, int c) +STRRCHR (const char *s, int c) { const char *found, *p; diff --git a/string/strspn.c b/string/strspn.c index 37e8161..c2d6364 100644 --- a/string/strspn.c +++ b/string/strspn.c @@ -18,13 +18,14 @@ #include <string.h> #undef strspn +#ifndef STRSPN +#define STRSPN strspn +#endif /* Return the length of the maximum initial segment of S which contains only characters in ACCEPT. */ size_t -strspn (s, accept) - const char *s; - const char *accept; +STRSPN (const char *s, const char *accept) { const char *p; const char *a; diff --git a/sysdeps/ieee754/dbl-64/Makefile b/sysdeps/ieee754/dbl-64/Makefile index 35f545f..5557c75 100644 --- a/sysdeps/ieee754/dbl-64/Makefile +++ b/sysdeps/ieee754/dbl-64/Makefile @@ -2,4 +2,5 @@ ifeq ($(subdir),math) # branred depends on precise IEEE double rounding CFLAGS-branred.c = $(config-cflags-nofma) CFLAGS-e_sqrt.c = $(config-cflags-nofma) +CFLAGS-e_pow.c = $(config-cflags-nofma) endif diff --git a/sysdeps/ieee754/ldbl-128ibm/s_frexpl.c b/sysdeps/ieee754/ldbl-128ibm/s_frexpl.c index 7e40663..a644f92 100644 --- a/sysdeps/ieee754/ldbl-128ibm/s_frexpl.c +++ b/sysdeps/ieee754/ldbl-128ibm/s_frexpl.c @@ -31,57 +31,115 @@ static char rcsid[] = "$NetBSD: $"; #include <math_private.h> #include <math_ldbl_opt.h> -static const long double -two107 = 162259276829213363391578010288128.0; /* 0x4670000000000000, 0 */ - long double __frexpl(long double x, int *eptr) { - uint64_t hx, lx, ix, ixl; - int64_t explo; - double xhi, xlo; + uint64_t hx, lx, ix, ixl; + int64_t explo, expon; + double xhi, xlo; + + ldbl_unpack (x, &xhi, &xlo); + EXTRACT_WORDS64 (hx, xhi); + EXTRACT_WORDS64 (lx, xlo); + ixl = 0x7fffffffffffffffULL & lx; + ix = 0x7fffffffffffffffULL & hx; + expon = 0; + if (ix >= 0x7ff0000000000000ULL || ix == 0) + { + /* 0,inf,nan. */ + *eptr = expon; + return x; + } + expon = ix >> 52; + if (expon == 0) + { + /* Denormal high double, the low double must be 0.0. */ + int cnt; + + /* Normalize. */ + if (sizeof (ix) == sizeof (long)) + cnt = __builtin_clzl (ix); + else if ((ix >> 32) != 0) + cnt = __builtin_clzl ((long) (ix >> 32)); + else + cnt = __builtin_clzl ((long) ix) + 32; + cnt = cnt - 12; + expon -= cnt; + ix <<= cnt + 1; + } + expon -= 1022; + ix &= 0x000fffffffffffffULL; + hx &= 0x8000000000000000ULL; + hx |= (1022LL << 52) | ix; - ldbl_unpack (x, &xhi, &xlo); - EXTRACT_WORDS64 (hx, xhi); - EXTRACT_WORDS64 (lx, xlo); - ixl = 0x7fffffffffffffffULL&lx; - ix = 0x7fffffffffffffffULL&hx; - *eptr = 0; - if(ix>=0x7ff0000000000000ULL||ix==0) return x; /* 0,inf,nan */ - if (ix<0x0010000000000000ULL) { /* subnormal */ - x *= two107; - xhi = ldbl_high (x); - EXTRACT_WORDS64 (hx, xhi); - ix = hx&0x7fffffffffffffffULL; - *eptr = -107; + if (ixl != 0) + { + /* If the high double is an exact power of two and the low + double has the opposite sign, then the exponent calculated + from the high double is one too big. */ + if (ix == 0 + && (int64_t) (hx ^ lx) < 0) + { + hx += 1L << 52; + expon -= 1; } - *eptr += (ix>>52)-1022; - if (ixl != 0ULL) { - explo = (ixl>>52) - (ix>>52) + 0x3fe; - if ((ixl&0x7ff0000000000000ULL) == 0LL) { - /* the lower double is a denormal so we need to correct its - mantissa and perhaps its exponent. */ - int cnt; + explo = ixl >> 52; + if (explo == 0) + { + /* The low double started out as a denormal. Normalize its + mantissa and adjust the exponent. */ + int cnt; - if (sizeof (ixl) == sizeof (long)) - cnt = __builtin_clzl (ixl); - else if ((ixl >> 32) != 0) - cnt = __builtin_clzl ((long) (ixl >> 32)); - else - cnt = __builtin_clzl ((long) ixl) + 32; - cnt = cnt - 12; - lx = (lx&0x8000000000000000ULL) | ((explo-cnt)<<52) - | ((ixl<<(cnt+1))&0x000fffffffffffffULL); - } else - lx = (lx&0x800fffffffffffffULL) | (explo<<52); - } else - lx = 0ULL; + if (sizeof (ixl) == sizeof (long)) + cnt = __builtin_clzl (ixl); + else if ((ixl >> 32) != 0) + cnt = __builtin_clzl ((long) (ixl >> 32)); + else + cnt = __builtin_clzl ((long) ixl) + 32; + cnt = cnt - 12; + explo -= cnt; + ixl <<= cnt + 1; + } + + /* With variable precision we can't assume much about the + magnitude of the returned low double. It may even be a + denormal. */ + explo -= expon; + ixl &= 0x000fffffffffffffULL; + lx &= 0x8000000000000000ULL; + if (explo <= 0) + { + /* Handle denormal low double. */ + if (explo > -52) + { + ixl |= 1LL << 52; + ixl >>= 1 - explo; + } + else + { + ixl = 0; + lx = 0; + if ((hx & 0x7ff0000000000000ULL) == (1023LL << 52)) + { + /* Oops, the adjustment we made above for values a + little smaller than powers of two turned out to + be wrong since the returned low double will be + zero. This can happen if the input was + something weird like 0x1p1000 - 0x1p-1000. */ + hx -= 1L << 52; + expon += 1; + } + } + explo = 0; + } + lx |= (explo << 52) | ixl; + } - hx = (hx&0x800fffffffffffffULL) | 0x3fe0000000000000ULL; - INSERT_WORDS64 (xhi, hx); - INSERT_WORDS64 (xlo, lx); - x = ldbl_pack (xhi, xlo); - return x; + INSERT_WORDS64 (xhi, hx); + INSERT_WORDS64 (xlo, lx); + x = ldbl_pack (xhi, xlo); + *eptr = expon; + return x; } #ifdef IS_IN_libm long_double_symbol (libm, __frexpl, frexpl); diff --git a/sysdeps/ieee754/ldbl-128ibm/s_nearbyintl.c b/sysdeps/ieee754/ldbl-128ibm/s_nearbyintl.c index 4e997a6..8f34604 100644 --- a/sysdeps/ieee754/ldbl-128ibm/s_nearbyintl.c +++ b/sysdeps/ieee754/ldbl-128ibm/s_nearbyintl.c @@ -38,6 +38,7 @@ __nearbyintl (long double x) if (fabs (u.d[0].d) < TWO52) { + double xh = u.d[0].d; double high = u.d[0].d; feholdexcept (&env); if (high > 0.0) @@ -52,6 +53,10 @@ __nearbyintl (long double x) high += TWO52; if (high == 0.0) high = -0.0; } + if (u.d[1].d > 0.0 && (xh - high == 0.5)) + high += 1.0; + else if (u.d[1].d < 0.0 && (-(xh - high) == 0.5)) + high -= 1.0; u.d[0].d = high; u.d[1].d = 0.0; math_force_eval (u.d[0]); diff --git a/sysdeps/ieee754/ldbl-128ibm/s_nextafterl.c b/sysdeps/ieee754/ldbl-128ibm/s_nextafterl.c index c050944..7b09927 100644 --- a/sysdeps/ieee754/ldbl-128ibm/s_nextafterl.c +++ b/sysdeps/ieee754/ldbl-128ibm/s_nextafterl.c @@ -30,8 +30,7 @@ static char rcsid[] = "$NetBSD: $"; long double __nextafterl(long double x, long double y) { - int64_t hx,hy,ihx,ihy; - uint64_t lx; + int64_t hx, hy, ihx, ihy, lx; double xhi, xlo, yhi; ldbl_unpack (x, &xhi, &xlo); @@ -76,19 +75,28 @@ long double __nextafterl(long double x, long double y) u = math_opt_barrier (x); x -= __LDBL_DENORM_MIN__; if (ihx < 0x0360000000000000LL - || (hx > 0 && (int64_t) lx <= 0) - || (hx < 0 && (int64_t) lx > 1)) { + || (hx > 0 && lx <= 0) + || (hx < 0 && lx > 1)) { u = u * u; math_force_eval (u); /* raise underflow flag */ } return x; } - if (ihx < 0x06a0000000000000LL) { /* ulp will denormal */ - INSERT_WORDS64 (yhi, hx & (0x7ffLL<<52)); - u = yhi; - u *= 0x1.0000000000000p-105L; + /* If the high double is an exact power of two and the low + double is the opposite sign, then 1ulp is one less than + what we might determine from the high double. Similarly + if X is an exact power of two, and positive, because + making it a little smaller will result in the exponent + decreasing by one and normalisation of the mantissa. */ + if ((hx & 0x000fffffffffffffLL) == 0 + && ((lx != 0 && (hx ^ lx) < 0) + || (lx == 0 && hx >= 0))) + ihx -= 1LL << 52; + if (ihx < (106LL << 52)) { /* ulp will denormal */ + INSERT_WORDS64 (yhi, ihx & (0x7ffLL<<52)); + u = yhi * 0x1p-105; } else { - INSERT_WORDS64 (yhi, (hx & (0x7ffLL<<52))-(0x069LL<<52)); + INSERT_WORDS64 (yhi, (ihx & (0x7ffLL<<52))-(105LL<<52)); u = yhi; } return x - u; @@ -103,8 +111,8 @@ long double __nextafterl(long double x, long double y) u = math_opt_barrier (x); x += __LDBL_DENORM_MIN__; if (ihx < 0x0360000000000000LL - || (hx > 0 && (int64_t) lx < 0 && lx != 0x8000000000000001LL) - || (hx < 0 && (int64_t) lx >= 0)) { + || (hx > 0 && lx < 0 && lx != 0x8000000000000001LL) + || (hx < 0 && lx >= 0)) { u = u * u; math_force_eval (u); /* raise underflow flag */ } @@ -112,12 +120,21 @@ long double __nextafterl(long double x, long double y) x = -0.0L; return x; } - if (ihx < 0x06a0000000000000LL) { /* ulp will denormal */ - INSERT_WORDS64 (yhi, hx & (0x7ffLL<<52)); - u = yhi; - u *= 0x1.0000000000000p-105L; + /* If the high double is an exact power of two and the low + double is the opposite sign, then 1ulp is one less than + what we might determine from the high double. Similarly + if X is an exact power of two, and negative, because + making it a little larger will result in the exponent + decreasing by one and normalisation of the mantissa. */ + if ((hx & 0x000fffffffffffffLL) == 0 + && ((lx != 0 && (hx ^ lx) < 0) + || (lx == 0 && hx < 0))) + ihx -= 1LL << 52; + if (ihx < (106LL << 52)) { /* ulp will denormal */ + INSERT_WORDS64 (yhi, ihx & (0x7ffLL<<52)); + u = yhi * 0x1p-105; } else { - INSERT_WORDS64 (yhi, (hx & (0x7ffLL<<52))-(0x069LL<<52)); + INSERT_WORDS64 (yhi, (ihx & (0x7ffLL<<52))-(105LL<<52)); u = yhi; } return x + u; diff --git a/sysdeps/powerpc/bits/hwcap.h b/sysdeps/powerpc/bits/hwcap.h index 1af8c82..dab1a58 100644 --- a/sysdeps/powerpc/bits/hwcap.h +++ b/sysdeps/powerpc/bits/hwcap.h @@ -62,3 +62,5 @@ #define PPC_FEATURE2_HAS_EBB 0x10000000 /* Event Base Branching */ #define PPC_FEATURE2_HAS_ISEL 0x08000000 /* Integer Select */ #define PPC_FEATURE2_HAS_TAR 0x04000000 /* Target Address Register */ +#define PPC_FEATURE2_HAS_VEC_CRYPTO 0x02000000 /* Target supports vector + instruction. */ diff --git a/sysdeps/powerpc/memmove.c b/sysdeps/powerpc/memmove.c deleted file mode 100644 index 3859da8..0000000 --- a/sysdeps/powerpc/memmove.c +++ /dev/null @@ -1,118 +0,0 @@ -/* Copy memory to memory until the specified number of bytes - has been copied. Overlap is handled correctly. - Copyright (C) 1991-2014 Free Software Foundation, Inc. - This file is part of the GNU C Library. - Contributed by Torbjorn Granlund (tege@sics.se). - - The GNU C Library is free software; you can redistribute it and/or - modify it under the terms of the GNU Lesser General Public - License as published by the Free Software Foundation; either - version 2.1 of the License, or (at your option) any later version. - - The GNU C Library is distributed in the hope that it will be useful, - but WITHOUT ANY WARRANTY; without even the implied warranty of - MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU - Lesser General Public License for more details. - - You should have received a copy of the GNU Lesser General Public - License along with the GNU C Library; see the file COPYING.LIB. If - not, see <http://www.gnu.org/licenses/>. */ - -#include <string.h> -#include <memcopy.h> -#include <pagecopy.h> - -/* All this is so that bcopy.c can #include - this file after defining some things. */ -#ifndef a1 -#define a1 dest /* First arg is DEST. */ -#define a1const -#define a2 src /* Second arg is SRC. */ -#define a2const const -#undef memmove -#endif -#if !defined(RETURN) || !defined(rettype) -#define RETURN(s) return (s) /* Return DEST. */ -#define rettype void * -#endif - -#ifndef MEMMOVE -#define MEMMOVE memmove -#endif - -rettype -MEMMOVE (a1, a2, len) - a1const void *a1; - a2const void *a2; - size_t len; -{ - unsigned long int dstp = (long int) dest; - unsigned long int srcp = (long int) src; - - /* If there is no overlap between ranges, call the builtin memcpy. */ - if (dstp >= srcp + len || srcp > dstp + len) - __builtin_memcpy (dest, src, len); - - /* This test makes the forward copying code be used whenever possible. - Reduces the working set. */ - else if (dstp - srcp >= len) /* *Unsigned* compare! */ - { - /* Copy from the beginning to the end. */ - - /* If there not too few bytes to copy, use word copy. */ - if (len >= OP_T_THRES) - { - /* Copy just a few bytes to make DSTP aligned. */ - len -= (-dstp) % OPSIZ; - BYTE_COPY_FWD (dstp, srcp, (-dstp) % OPSIZ); - - /* Copy whole pages from SRCP to DSTP by virtual address - manipulation, as much as possible. */ - - PAGE_COPY_FWD_MAYBE (dstp, srcp, len, len); - - /* Copy from SRCP to DSTP taking advantage of the known - alignment of DSTP. Number of bytes remaining is put - in the third argument, i.e. in LEN. This number may - vary from machine to machine. */ - - WORD_COPY_FWD (dstp, srcp, len, len); - - /* Fall out and copy the tail. */ - } - - /* There are just a few bytes to copy. Use byte memory operations. */ - BYTE_COPY_FWD (dstp, srcp, len); - } - else - { - /* Copy from the end to the beginning. */ - srcp += len; - dstp += len; - - /* If there not too few bytes to copy, use word copy. */ - if (len >= OP_T_THRES) - { - /* Copy just a few bytes to make DSTP aligned. */ - len -= dstp % OPSIZ; - BYTE_COPY_BWD (dstp, srcp, dstp % OPSIZ); - - /* Copy from SRCP to DSTP taking advantage of the known - alignment of DSTP. Number of bytes remaining is put - in the third argument, i.e. in LEN. This number may - vary from machine to machine. */ - - WORD_COPY_BWD (dstp, srcp, len, len); - - /* Fall out and copy the tail. */ - } - - /* There are just a few bytes to copy. Use byte memory operations. */ - BYTE_COPY_BWD (dstp, srcp, len); - } - - RETURN (dest); -} -#ifndef memmove -libc_hidden_builtin_def (memmove) -#endif diff --git a/sysdeps/powerpc/powerpc32/fpu/s_nearbyint.S b/sysdeps/powerpc/powerpc32/fpu/s_nearbyint.S index 2734738..05ab40e 100644 --- a/sysdeps/powerpc/powerpc32/fpu/s_nearbyint.S +++ b/sysdeps/powerpc/powerpc32/fpu/s_nearbyint.S @@ -53,17 +53,17 @@ ENTRY (__nearbyint) fcmpu cr7,fp1,fp12 /* if (x > 0.0 */ ble cr7,L(lessthanzero) mtfsb0 4*cr7+lt /* Disable FE_INEXACT exception */ - fadd fp0,fp1,fp13 /* x += TWO52 */ - fsub fp1,fp0,fp13 /* x -= TWO52 */ + fadd fp1,fp1,fp13 /* x += TWO52 */ + fsub fp1,fp1,fp13 /* x -= TWO52 */ fabs fp1,fp1 /* if (x == 0.0 */ mtfsb0 4*cr1+eq /* Clear any FE_INEXACT exception */ blr L(lessthanzero): bgelr cr7 mtfsb0 4*cr7+lt /* Disable FE_INEXACT exception */ - fsub fp0,fp13,fp1 /* x -= TWO52 */ - fsub fp0,fp0,fp13 /* x += TWO52 */ - fneg fp1,fp0 /* if (x == 0.0) */ + fsub fp1,fp1,fp13 /* x -= TWO52 */ + fadd fp1,fp1,fp13 /* x += TWO52 */ + fnabs fp1,fp1 /* if (x == 0.0) */ mtfsb0 4*cr1+eq /* Clear any FE_INEXACT exception */ blr END (__nearbyint) diff --git a/sysdeps/powerpc/powerpc32/fpu/s_nearbyintf.S b/sysdeps/powerpc/powerpc32/fpu/s_nearbyintf.S index 11bdc77..7449a5f 100644 --- a/sysdeps/powerpc/powerpc32/fpu/s_nearbyintf.S +++ b/sysdeps/powerpc/powerpc32/fpu/s_nearbyintf.S @@ -52,16 +52,17 @@ ENTRY (__nearbyintf) fcmpu cr7,fp1,fp12 /* if (x > 0.0 */ ble cr7,L(lessthanzero) mtfsb0 4*cr7+lt /* Disable FE_INEXACT exception */ - fadds fp0,fp1,fp13 /* x += TWO23 */ - fsubs fp1,fp0,fp13 /* x -= TWO23 */ + fadds fp1,fp1,fp13 /* x += TWO23 */ + fsubs fp1,fp1,fp13 /* x -= TWO23 */ + fabs fp1,fp1 /* if (x == 0.0) */ mtfsb0 4*cr1+eq /* Clear any FE_INEXACT exception */ blr L(lessthanzero): bgelr cr7 mtfsb0 4*cr7+lt /* Disable FE_INEXACT exception */ - fsubs fp0,fp13,fp1 /* x -= TWO23 */ - fsubs fp0,fp0,fp13 /* x += TWO23 */ - fneg fp1,fp0 /* if (x == 0.0) */ + fsubs fp1,fp1,fp13 /* x -= TWO23 */ + fadds fp1,fp1,fp13 /* x += TWO23 */ + fnabs fp1,fp1 /* if (x == 0.0) */ mtfsb0 4*cr1+eq /* Clear any FE_INEXACT exception */ blr END (__nearbyintf) diff --git a/sysdeps/powerpc/powerpc32/power4/memcopy.h b/sysdeps/powerpc/powerpc32/power4/memcopy.h index d3752dc..3431084 100644 --- a/sysdeps/powerpc/powerpc32/power4/memcopy.h +++ b/sysdeps/powerpc/powerpc32/power4/memcopy.h @@ -110,3 +110,7 @@ ((byte *) dst_ep)[0] = __x; \ } \ } while (0) + +/* The powerpc memcpy implementation is safe to use for memmove. */ +#undef MEMCPY_OK_FOR_FWD_MEMMOVE +#define MEMCPY_OK_FOR_FWD_MEMMOVE 1 diff --git a/sysdeps/powerpc/powerpc32/power4/memset.S b/sysdeps/powerpc/powerpc32/power4/memset.S index 88110e3..8b746a6 100644 --- a/sysdeps/powerpc/powerpc32/power4/memset.S +++ b/sysdeps/powerpc/powerpc32/power4/memset.S @@ -50,7 +50,7 @@ L(_memset): /* Align to word boundary. */ cmplwi cr5, rLEN, 31 - insrdi rCHR, rCHR, 8, 48 /* Replicate byte to halfword. */ + insrwi rCHR, rCHR, 8, 16 /* Replicate byte to halfword. */ beq+ L(aligned) mtcrf 0x01, rMEMP0 subfic rALIGN, rALIGN, 4 @@ -65,7 +65,7 @@ L(g0): /* Handle the case of size < 31. */ L(aligned): mtcrf 0x01, rLEN - insrdi rCHR, rCHR, 16, 32 /* Replicate halfword to word. */ + insrwi rCHR, rCHR, 16, 0 /* Replicate halfword to word. */ ble cr5, L(medium) /* Align to 32-byte boundary. */ andi. rALIGN, rMEMP, 0x1C diff --git a/sysdeps/powerpc/powerpc32/power4/multiarch/Makefile b/sysdeps/powerpc/powerpc32/power4/multiarch/Makefile index a465685..a7d33b9 100644 --- a/sysdeps/powerpc/powerpc32/power4/multiarch/Makefile +++ b/sysdeps/powerpc/powerpc32/power4/multiarch/Makefile @@ -11,7 +11,7 @@ sysdep_routines += memcpy-power7 memcpy-a2 memcpy-power6 memcpy-cell \ strchr-power7 strchr-ppc32 wcschr-power7 wcschr-power6 \ wcschr-ppc32 wcsrchr-power7 wcsrchr-power6 wcsrchr-ppc32 \ wcscpy-power7 wcscpy-power6 wcscpy-ppc32 wordcopy-power7 \ - wordcopy-power6 wordcopy-ppc32 + wordcopy-power6 wordcopy-ppc32 memmove-power7 memmove-ppc CFLAGS-strncase-power7.c += -mcpu=power7 -funroll-loops CFLAGS-strncase_l-power7.c += -mcpu=power7 -funroll-loops diff --git a/sysdeps/powerpc/powerpc32/power4/multiarch/bzero-ppc32.S b/sysdeps/powerpc/powerpc32/power4/multiarch/bzero-ppc32.S index 80a2dc5..7a7cca9 100644 --- a/sysdeps/powerpc/powerpc32/power4/multiarch/bzero-ppc32.S +++ b/sysdeps/powerpc/powerpc32/power4/multiarch/bzero-ppc32.S @@ -19,17 +19,8 @@ #include <sysdep.h> -/* memset ifunc selector is not built for static and memset@local - for shared builds makes the linker point the call to the ifunc - selector. */ -#ifdef SHARED -# define MEMSET __memset_ppc -#else -# define MEMSET memset -#endif - ENTRY (__bzero_ppc) mr r5,r4 li r4,0 - b MEMSET@local + b __memset_ppc@local END (__bzero_ppc) diff --git a/sysdeps/powerpc/powerpc32/power4/multiarch/bzero.c b/sysdeps/powerpc/powerpc32/power4/multiarch/bzero.c index baaa6b4..2a6298a 100644 --- a/sysdeps/powerpc/powerpc32/power4/multiarch/bzero.c +++ b/sysdeps/powerpc/powerpc32/power4/multiarch/bzero.c @@ -17,7 +17,7 @@ <http://www.gnu.org/licenses/>. */ /* Define multiple versions only for definition in libc. */ -#ifndef NOT_IN_libc +#if defined SHARED && !defined NOT_IN_libc # include <string.h> # include <strings.h> # include "init-arch.h" diff --git a/sysdeps/powerpc/powerpc32/power4/multiarch/ifunc-impl-list.c b/sysdeps/powerpc/powerpc32/power4/multiarch/ifunc-impl-list.c index 8ba6a80..38bf28e 100644 --- a/sysdeps/powerpc/powerpc32/power4/multiarch/ifunc-impl-list.c +++ b/sysdeps/powerpc/powerpc32/power4/multiarch/ifunc-impl-list.c @@ -59,6 +59,12 @@ __libc_ifunc_impl_list (const char *name, struct libc_ifunc_impl *array, __memcpy_cell) IFUNC_IMPL_ADD (array, i, memcpy, 1, __memcpy_ppc)) + /* Support sysdeps/powerpc/powerpc32/power4/multiarch/memmove.c. */ + IFUNC_IMPL (i, name, memmove, + IFUNC_IMPL_ADD (array, i, memmove, hwcap & PPC_FEATURE_HAS_VSX, + __memmove_power7) + IFUNC_IMPL_ADD (array, i, memmove, 1, __memmove_ppc)) + /* Support sysdeps/powerpc/powerpc32/power4/multiarch/memset.c. */ IFUNC_IMPL (i, name, memset, IFUNC_IMPL_ADD (array, i, memset, hwcap & PPC_FEATURE_HAS_VSX, diff --git a/sysdeps/powerpc/powerpc32/power4/multiarch/init-arch.h b/sysdeps/powerpc/powerpc32/power4/multiarch/init-arch.h index 51a34f2..72d720d 100644 --- a/sysdeps/powerpc/powerpc32/power4/multiarch/init-arch.h +++ b/sysdeps/powerpc/powerpc32/power4/multiarch/init-arch.h @@ -36,6 +36,7 @@ and fills the previous ones. */ #define INIT_ARCH() \ unsigned long int hwcap = __GLRO(dl_hwcap); \ + unsigned long int __attribute__((unused)) hwcap2 = __GLRO(dl_hwcap2); \ if (hwcap & PPC_FEATURE_ARCH_2_06) \ hwcap |= PPC_FEATURE_ARCH_2_05 | \ PPC_FEATURE_POWER5_PLUS | \ diff --git a/sysdeps/powerpc/powerpc32/power4/multiarch/memchr-ppc32.c b/sysdeps/powerpc/powerpc32/power4/multiarch/memchr-ppc32.c index 43c5652..f5db4a8 100644 --- a/sysdeps/powerpc/powerpc32/power4/multiarch/memchr-ppc32.c +++ b/sysdeps/powerpc/powerpc32/power4/multiarch/memchr-ppc32.c @@ -25,8 +25,8 @@ #ifdef SHARED # undef libc_hidden_builtin_def -# define libc_hidden_builtin_def(name) \ - __hidden_ver1 (__memchr_ppc, __GI_memchr, __memchr_ppc); +# define libc_hidden_builtin_def(name) \ + __hidden_ver1(__memchr_ppc, __GI_memchr, __memchr_ppc); #endif extern __typeof (memchr) __memchr_ppc attribute_hidden; diff --git a/sysdeps/powerpc/powerpc32/power4/multiarch/memchr.c b/sysdeps/powerpc/powerpc32/power4/multiarch/memchr.c index ca0f714..94c22ef 100644 --- a/sysdeps/powerpc/powerpc32/power4/multiarch/memchr.c +++ b/sysdeps/powerpc/powerpc32/power4/multiarch/memchr.c @@ -17,22 +17,25 @@ <http://www.gnu.org/licenses/>. */ #ifndef NOT_IN_libc +# undef memcpy +/* Redefine memchr so that the compiler won't make the weak_alias point + to internal hidden definition (__GI_memchr), since PPC32 does not + support local IFUNC calls. */ +# define memchr __redirect_memchr # include <string.h> -# include <shlib-compat.h> # include "init-arch.h" -extern __typeof (__memchr) __memchr_ppc attribute_hidden; -extern __typeof (__memchr) __memchr_power7 attribute_hidden; +extern __typeof (__redirect_memchr) __memchr_ppc attribute_hidden; +extern __typeof (__redirect_memchr) __memchr_power7 attribute_hidden; -/* Avoid DWARF definition DIE on ifunc symbol so that GDB can handle - ifunc symbol properly. */ -libc_ifunc (__memchr, +extern __typeof (__redirect_memchr) __libc_memchr; + +libc_ifunc (__libc_memchr, (hwcap & PPC_FEATURE_HAS_VSX) ? __memchr_power7 : __memchr_ppc); - -weak_alias (__memchr, memchr) -libc_hidden_builtin_def (memchr) +#undef memchr +weak_alias (__libc_memchr, memchr) #else #include <string/memchr.c> #endif diff --git a/sysdeps/powerpc/powerpc32/power4/multiarch/memmove-power7.c b/sysdeps/powerpc/powerpc32/power4/multiarch/memmove-power7.c new file mode 100644 index 0000000..b14c3b1 --- /dev/null +++ b/sysdeps/powerpc/powerpc32/power4/multiarch/memmove-power7.c @@ -0,0 +1,30 @@ +/* Power7 multiarch memmove. + Copyright (C) 2014 Free Software Foundation, Inc. + This file is part of the GNU C Library. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library; see the file COPYING.LIB. If + not, see <http://www.gnu.org/licenses/>. */ + +#include <string.h> + +extern __typeof (memcpy) __memcpy_power7; +#define memcpy __memcpy_power7 + +extern __typeof (memmove) __memmove_power7; +#define MEMMOVE __memmove_power7 + +#undef libc_hidden_builtin_def +#define libc_hidden_builtin_def(name) + +#include <string/memmove.c> diff --git a/sysdeps/powerpc/powerpc32/power4/multiarch/memmove-ppc.c b/sysdeps/powerpc/powerpc32/power4/multiarch/memmove-ppc.c new file mode 100644 index 0000000..d56b77a --- /dev/null +++ b/sysdeps/powerpc/powerpc32/power4/multiarch/memmove-ppc.c @@ -0,0 +1,33 @@ +/* Power7 multiarch memmove. + Copyright (C) 2014 Free Software Foundation, Inc. + This file is part of the GNU C Library. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library; see the file COPYING.LIB. If + not, see <http://www.gnu.org/licenses/>. */ + +#include <string.h> + +extern __typeof (memcpy) __memcpy_ppc; +#define memcpy __memcpy_ppc + +extern __typeof (memmove) __memmove_ppc; +#define MEMMOVE __memmove_ppc + +#if defined SHARED +# undef libc_hidden_builtin_def +# define libc_hidden_builtin_def(name) \ + __hidden_ver1 (__memmove_ppc, __GI_memmove, __memmove_ppc); +#endif + +#include <string/memmove.c> diff --git a/sysdeps/powerpc/powerpc32/power4/multiarch/memmove.c b/sysdeps/powerpc/powerpc32/power4/multiarch/memmove.c new file mode 100644 index 0000000..1070148 --- /dev/null +++ b/sysdeps/powerpc/powerpc32/power4/multiarch/memmove.c @@ -0,0 +1,34 @@ +/* Multiple versions of memmove. PowerPC32 version. + Copyright (C) 2014 Free Software Foundation, Inc. + This file is part of the GNU C Library. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library; if not, see + <http://www.gnu.org/licenses/>. */ + +#if defined SHARED && !defined NOT_IN_libc +/* Redefine memmove so that the compiler won't complain about the type + mismatch with the IFUNC selector in strong_alias, below. */ +# include <string.h> +# include "init-arch.h" + +extern __typeof (memmove) __memmove_ppc attribute_hidden; +extern __typeof (memmove) __memmove_power7 attribute_hidden; + +libc_ifunc (memmove, + (hwcap & PPC_FEATURE_HAS_VSX) + ? __memmove_power7 + : __memmove_ppc); +#else +# include <string/memmove.c> +#endif diff --git a/sysdeps/powerpc/powerpc32/power4/multiarch/strcasecmp-power7.S b/sysdeps/powerpc/powerpc32/power4/multiarch/strcasecmp-power7.S index 930564c..0b00011 100644 --- a/sysdeps/powerpc/powerpc32/power4/multiarch/strcasecmp-power7.S +++ b/sysdeps/powerpc/powerpc32/power4/multiarch/strcasecmp-power7.S @@ -30,7 +30,7 @@ cfi_endproc; \ ASM_SIZE_DIRECTIVE(__strcasecmp_power7) -#undef weak_alias(name, alias) +#undef weak_alias #define weak_alias(name, alias) #undef libc_hidden_builtin_def diff --git a/sysdeps/powerpc/powerpc32/power4/multiarch/strcasecmp_l-power7.S b/sysdeps/powerpc/powerpc32/power4/multiarch/strcasecmp_l-power7.S index 46733f5..4b3ce51 100644 --- a/sysdeps/powerpc/powerpc32/power4/multiarch/strcasecmp_l-power7.S +++ b/sysdeps/powerpc/powerpc32/power4/multiarch/strcasecmp_l-power7.S @@ -30,7 +30,7 @@ cfi_endproc; \ ASM_SIZE_DIRECTIVE(__strcasecmp_l_power7) -#undef weak_alias(name, alias) +#undef weak_alias #define weak_alias(name, alias) #undef libc_hidden_builtin_def diff --git a/sysdeps/powerpc/powerpc32/power6/memset.S b/sysdeps/powerpc/powerpc32/power6/memset.S index 4b18fa7..445fa44 100644 --- a/sysdeps/powerpc/powerpc32/power6/memset.S +++ b/sysdeps/powerpc/powerpc32/power6/memset.S @@ -48,7 +48,7 @@ L(_memset): ble- cr1, L(small) /* Align to word boundary. */ cmplwi cr5, rLEN, 31 - insrdi rCHR, rCHR, 8, 48 /* Replicate byte to halfword. */ + insrwi rCHR, rCHR, 8, 16 /* Replicate byte to halfword. */ beq+ L(aligned) mtcrf 0x01, rMEMP0 subfic rALIGN, rALIGN, 4 @@ -64,7 +64,7 @@ L(g0): /* Handle the case of size < 31. */ L(aligned): mtcrf 0x01, rLEN - insrdi rCHR, rCHR, 16, 32 /* Replicate halfword to word. */ + insrwi rCHR, rCHR, 16, 0 /* Replicate halfword to word. */ ble cr5, L(medium) /* Align to 32-byte boundary. */ andi. rALIGN, rMEMP, 0x1C diff --git a/sysdeps/powerpc/powerpc32/power6x/fpu/Implies b/sysdeps/powerpc/powerpc32/power6x/fpu/Implies new file mode 100644 index 0000000..d53ce25 --- /dev/null +++ b/sysdeps/powerpc/powerpc32/power6x/fpu/Implies @@ -0,0 +1 @@ +powerpc/powerpc32/power6/fpu diff --git a/sysdeps/powerpc/powerpc32/power7/memchr.S b/sysdeps/powerpc/powerpc32/power7/memchr.S index 1d6a0d6..ccdd7cf 100644 --- a/sysdeps/powerpc/powerpc32/power7/memchr.S +++ b/sysdeps/powerpc/powerpc32/power7/memchr.S @@ -25,9 +25,9 @@ ENTRY (__memchr) CALL_MCOUNT dcbt 0,r3 clrrwi r8,r3,2 - insrdi r4,r4,8,48 + insrwi r4,r4,8,16 /* Replicate byte to word. */ add r7,r3,r5 /* Calculate the last acceptable address. */ - insrdi r4,r4,16,32 + insrwi r4,r4,16,0 cmplwi r5,16 li r9, -1 rlwinm r6,r3,3,27,28 /* Calculate padding. */ diff --git a/sysdeps/powerpc/powerpc32/power7/memcpy.S b/sysdeps/powerpc/powerpc32/power7/memcpy.S index 52c2a6b..e540fea 100644 --- a/sysdeps/powerpc/powerpc32/power7/memcpy.S +++ b/sysdeps/powerpc/powerpc32/power7/memcpy.S @@ -38,8 +38,8 @@ EALIGN (memcpy, 5, 0) ble cr1, L(copy_LT_32) /* If move < 32 bytes use short move code. */ - andi. 11,3,7 /* Check alignment of DST. */ - clrlwi 10,4,29 /* Check alignment of SRC. */ + andi. 11,3,15 /* Check alignment of DST. */ + clrlwi 10,4,28 /* Check alignment of SRC. */ cmplw cr6,10,11 /* SRC and DST alignments match? */ mr 12,4 mr 31,5 diff --git a/sysdeps/powerpc/powerpc32/power7/memrchr.S b/sysdeps/powerpc/powerpc32/power7/memrchr.S index ebfd540..b05bf32 100644 --- a/sysdeps/powerpc/powerpc32/power7/memrchr.S +++ b/sysdeps/powerpc/powerpc32/power7/memrchr.S @@ -32,8 +32,8 @@ ENTRY (__memrchr) dcbt r9,r6,16 /* Stream hint, decreasing addresses. */ /* Replicate BYTE to word. */ - rldimi r4,r4,8,48 - rldimi r4,r4,16,32 + insrwi r4,r4,8,16 + insrwi r4,r4,16,0 li r6,-4 li r9,-1 rlwinm r0,r0,3,27,28 /* Calculate padding. */ diff --git a/sysdeps/powerpc/powerpc32/power7/memset.S b/sysdeps/powerpc/powerpc32/power7/memset.S index ae18761..34fc1ad 100644 --- a/sysdeps/powerpc/powerpc32/power7/memset.S +++ b/sysdeps/powerpc/powerpc32/power7/memset.S @@ -35,8 +35,8 @@ L(_memset): cfi_offset(31,-8) /* Replicate byte to word. */ - insrdi 4,4,8,48 - insrdi 4,4,16,32 + insrwi 4,4,8,16 + insrwi 4,4,16,0 ble cr6,L(small) /* If length <= 8, use short copy code. */ diff --git a/sysdeps/powerpc/powerpc32/power7/rawmemchr.S b/sysdeps/powerpc/powerpc32/power7/rawmemchr.S index dec4db0..8ccf186 100644 --- a/sysdeps/powerpc/powerpc32/power7/rawmemchr.S +++ b/sysdeps/powerpc/powerpc32/power7/rawmemchr.S @@ -27,8 +27,8 @@ ENTRY (__rawmemchr) clrrwi r8,r3,2 /* Align the address to word boundary. */ /* Replicate byte to word. */ - rldimi r4,r4,8,48 - rldimi r4,r4,16,32 + insrwi r4,r4,8,16 + insrwi r4,r4,16,0 /* Now r4 has a word of c bytes. */ diff --git a/sysdeps/powerpc/powerpc32/power7/strchr.S b/sysdeps/powerpc/powerpc32/power7/strchr.S index f7ecb72..d795833 100644 --- a/sysdeps/powerpc/powerpc32/power7/strchr.S +++ b/sysdeps/powerpc/powerpc32/power7/strchr.S @@ -35,8 +35,8 @@ ENTRY (strchr) beq cr7,L(null_match) /* Replicate byte to word. */ - insrdi r4,r4,8,48 - insrdi r4,r4,16,32 + insrwi r4,r4,8,16 + insrwi r4,r4,16,0 /* Now r4 has a word of c bytes and r0 has a word of null bytes. */ diff --git a/sysdeps/powerpc/powerpc32/power7/strchrnul.S b/sysdeps/powerpc/powerpc32/power7/strchrnul.S index ece8237..dcc7620 100644 --- a/sysdeps/powerpc/powerpc32/power7/strchrnul.S +++ b/sysdeps/powerpc/powerpc32/power7/strchrnul.S @@ -27,8 +27,8 @@ ENTRY (__strchrnul) clrrwi r8,r3,2 /* Align the address to word boundary. */ /* Replicate byte to word. */ - insrdi r4,r4,8,48 - insrdi r4,r4,16,32 + insrwi r4,r4,8,16 + insrwi r4,r4,16,0 rlwinm r6,r3,3,27,28 /* Calculate padding. */ lwz r12,0(r8) /* Load word from memory. */ diff --git a/sysdeps/powerpc/powerpc64/entry.h b/sysdeps/powerpc/powerpc64/entry.h index 76ead1d..30553c1 100644 --- a/sysdeps/powerpc/powerpc64/entry.h +++ b/sysdeps/powerpc/powerpc64/entry.h @@ -23,6 +23,7 @@ extern void _start (void); #define ENTRY_POINT _start +#if _CALL_ELF != 2 /* We have to provide a special declaration. */ #define ENTRY_POINT_DECL(class) class void _start (void); @@ -33,3 +34,4 @@ extern void _start (void); #define TEXT_START \ ({ extern unsigned long int _start_as_data[] asm ("_start"); \ _start_as_data[0]; }) +#endif diff --git a/sysdeps/powerpc/powerpc64/fpu/multiarch/Makefile b/sysdeps/powerpc/powerpc64/fpu/multiarch/Makefile index ebf957e..0e3eac7 100644 --- a/sysdeps/powerpc/powerpc64/fpu/multiarch/Makefile +++ b/sysdeps/powerpc/powerpc64/fpu/multiarch/Makefile @@ -4,7 +4,8 @@ sysdep_routines += s_isnan-power7 s_isnan-power6x s_isnan-power6 \ s_copysign-ppc64 s_finite-power7 s_finite-ppc64 \ s_finitef-ppc64 s_isinff-ppc64 s_isinf-power7 \ s_isinf-ppc64 s_modf-power5+ s_modf-ppc64 \ - s_modff-power5+ s_modff-ppc64 + s_modff-power5+ s_modff-ppc64 s_isnan-power8 \ + s_isinf-power8 s_finite-power8 libm-sysdep_routines += s_isnan-power7 s_isnan-power6x s_isnan-power6 \ s_isnan-power5 s_isnan-ppc64 s_llround-power6x \ @@ -21,7 +22,9 @@ libm-sysdep_routines += s_isnan-power7 s_isnan-power6x s_isnan-power6 \ s_logbl-power7 s_logb-ppc64 s_logbf-ppc64 \ s_logbl-ppc64 s_modf-power5+ s_modf-ppc64 \ s_modff-power5+ s_modff-ppc64 e_hypot-ppc64 \ - e_hypot-power7 e_hypotf-ppc64 e_hypotf-power7 + e_hypot-power7 e_hypotf-ppc64 e_hypotf-power7 \ + s_isnan-power8 s_isinf-power8 s_finite-power8 \ + s_llrint-power8 s_llround-power8 CFLAGS-s_logbf-power7.c = -mcpu=power7 CFLAGS-s_logbl-power7.c = -mcpu=power7 diff --git a/sysdeps/powerpc/powerpc64/fpu/multiarch/e_hypof.c b/sysdeps/powerpc/powerpc64/fpu/multiarch/e_hypotf.c index 3c418d3..3c418d3 100644 --- a/sysdeps/powerpc/powerpc64/fpu/multiarch/e_hypof.c +++ b/sysdeps/powerpc/powerpc64/fpu/multiarch/e_hypotf.c diff --git a/sysdeps/powerpc/powerpc64/fpu/multiarch/s_finite-power8.S b/sysdeps/powerpc/powerpc64/fpu/multiarch/s_finite-power8.S new file mode 100644 index 0000000..3b9071f --- /dev/null +++ b/sysdeps/powerpc/powerpc64/fpu/multiarch/s_finite-power8.S @@ -0,0 +1,33 @@ +/* isnan(). PowerPC64/POWER7 version. + Copyright (C) 2014 Free Software Foundation, Inc. + This file is part of the GNU C Library. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library; if not, see + <http://www.gnu.org/licenses/>. */ + +#include <sysdep.h> +#include <math_ldbl_opt.h> + +#undef hidden_def +#define hidden_def(name) +#undef weak_alias +#define weak_alias(name, alias) +#undef strong_alias +#define strong_alias(name, alias) +#undef compat_symbol +#define compat_symbol(lib, name, symbol, ver) + +#define __finite __finite_power8 + +#include <sysdeps/powerpc/powerpc64/power8/fpu/s_finite.S> diff --git a/sysdeps/powerpc/powerpc64/fpu/multiarch/s_finite.c b/sysdeps/powerpc/powerpc64/fpu/multiarch/s_finite.c index f79a93e..b9e908d 100644 --- a/sysdeps/powerpc/powerpc64/fpu/multiarch/s_finite.c +++ b/sysdeps/powerpc/powerpc64/fpu/multiarch/s_finite.c @@ -23,10 +23,13 @@ extern __typeof (__finite) __finite_ppc64 attribute_hidden; extern __typeof (__finite) __finite_power7 attribute_hidden; +extern __typeof (__finite) __finite_power8 attribute_hidden; libc_ifunc (__finite, - (hwcap & PPC_FEATURE_ARCH_2_06) - ? __finite_power7 + (hwcap2 & PPC_FEATURE2_ARCH_2_07) + ? __finite_power8 : + (hwcap & PPC_FEATURE_ARCH_2_06) + ? __finite_power7 : __finite_ppc64); weak_alias (__finite, finite) diff --git a/sysdeps/powerpc/powerpc64/fpu/multiarch/s_finitef.c b/sysdeps/powerpc/powerpc64/fpu/multiarch/s_finitef.c index a7243b5..30b34bc 100644 --- a/sysdeps/powerpc/powerpc64/fpu/multiarch/s_finitef.c +++ b/sysdeps/powerpc/powerpc64/fpu/multiarch/s_finitef.c @@ -23,10 +23,13 @@ extern __typeof (__finitef) __finitef_ppc64 attribute_hidden; /* The double-precision version also works for single-precision. */ extern __typeof (__finitef) __finite_power7 attribute_hidden; +extern __typeof (__finitef) __finite_power8 attribute_hidden; libc_ifunc (__finitef, - (hwcap & PPC_FEATURE_ARCH_2_06) - ? __finite_power7 + (hwcap2 & PPC_FEATURE2_ARCH_2_07) + ? __finite_power8 : + (hwcap & PPC_FEATURE_ARCH_2_06) + ? __finite_power7 : __finitef_ppc64); weak_alias (__finitef, finitef) diff --git a/sysdeps/powerpc/powerpc64/fpu/multiarch/s_isinf-power8.S b/sysdeps/powerpc/powerpc64/fpu/multiarch/s_isinf-power8.S new file mode 100644 index 0000000..979816e --- /dev/null +++ b/sysdeps/powerpc/powerpc64/fpu/multiarch/s_isinf-power8.S @@ -0,0 +1,33 @@ +/* isinf(). PowerPC64/POWER8 version. + Copyright (C) 2014 Free Software Foundation, Inc. + This file is part of the GNU C Library. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library; if not, see + <http://www.gnu.org/licenses/>. */ + +#include <sysdep.h> +#include <math_ldbl_opt.h> + +#undef hidden_def +#define hidden_def(name) +#undef weak_alias +#define weak_alias(name, alias) +#undef strong_alias +#define strong_alias(name, alias) +#undef compat_symbol +#define compat_symbol(lib, name, alias, ver) + +#define __isinf __isinf_power8 + +#include <sysdeps/powerpc/powerpc64/power8/fpu/s_isinf.S> diff --git a/sysdeps/powerpc/powerpc64/fpu/multiarch/s_isinf.c b/sysdeps/powerpc/powerpc64/fpu/multiarch/s_isinf.c index 1ee230b..e349a06 100644 --- a/sysdeps/powerpc/powerpc64/fpu/multiarch/s_isinf.c +++ b/sysdeps/powerpc/powerpc64/fpu/multiarch/s_isinf.c @@ -23,10 +23,13 @@ extern __typeof (__isinf) __isinf_ppc64 attribute_hidden; extern __typeof (__isinf) __isinf_power7 attribute_hidden; +extern __typeof (__isinf) __isinf_power8 attribute_hidden; libc_ifunc (__isinf, - (hwcap & PPC_FEATURE_ARCH_2_06) - ? __isinf_power7 + (hwcap2 & PPC_FEATURE2_ARCH_2_07) + ? __isinf_power8 : + (hwcap & PPC_FEATURE_ARCH_2_06) + ? __isinf_power7 : __isinf_ppc64); weak_alias (__isinf, isinf) diff --git a/sysdeps/powerpc/powerpc64/fpu/multiarch/s_isinff.c b/sysdeps/powerpc/powerpc64/fpu/multiarch/s_isinff.c index 1336feb..71da7a3 100644 --- a/sysdeps/powerpc/powerpc64/fpu/multiarch/s_isinff.c +++ b/sysdeps/powerpc/powerpc64/fpu/multiarch/s_isinff.c @@ -24,10 +24,13 @@ extern __typeof (__isinff) __isinff_ppc64 attribute_hidden; /* The double-precision version also works for single-precision. */ extern __typeof (__isinff) __isinf_power7 attribute_hidden; +extern __typeof (__isinff) __isinf_power8 attribute_hidden; libc_ifunc (__isinff, - (hwcap & PPC_FEATURE_ARCH_2_06) - ? __isinf_power7 + (hwcap2 & PPC_FEATURE2_ARCH_2_07) + ? __isinf_power8 : + (hwcap & PPC_FEATURE_ARCH_2_06) + ? __isinf_power7 : __isinff_ppc64); weak_alias (__isinff, isinff) diff --git a/sysdeps/powerpc/powerpc64/fpu/multiarch/s_isnan-power8.S b/sysdeps/powerpc/powerpc64/fpu/multiarch/s_isnan-power8.S new file mode 100644 index 0000000..c176d5a --- /dev/null +++ b/sysdeps/powerpc/powerpc64/fpu/multiarch/s_isnan-power8.S @@ -0,0 +1,33 @@ +/* isnan(). PowerPC64/POWER7 version. + Copyright (C) 2014 Free Software Foundation, Inc. + This file is part of the GNU C Library. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library; if not, see + <http://www.gnu.org/licenses/>. */ + +#include <sysdep.h> +#include <math_ldbl_opt.h> + +#undef hidden_def +#define hidden_def(name) +#undef weak_alias +#define weak_alias(name, alias) +#undef strong_alias +#define strong_alias(name, alias) +#undef compat_symbol +#define compat_symbol(lib, name, symbol, ver) + +#define __isnan __isnan_power8 + +#include <sysdeps/powerpc/powerpc64/power8/fpu/s_isnan.S> diff --git a/sysdeps/powerpc/powerpc64/fpu/multiarch/s_isnan.c b/sysdeps/powerpc/powerpc64/fpu/multiarch/s_isnan.c index 0de833e..65a5ca0 100644 --- a/sysdeps/powerpc/powerpc64/fpu/multiarch/s_isnan.c +++ b/sysdeps/powerpc/powerpc64/fpu/multiarch/s_isnan.c @@ -26,16 +26,19 @@ extern __typeof (__isnan) __isnan_power5 attribute_hidden; extern __typeof (__isnan) __isnan_power6 attribute_hidden; extern __typeof (__isnan) __isnan_power6x attribute_hidden; extern __typeof (__isnan) __isnan_power7 attribute_hidden; +extern __typeof (__isnan) __isnan_power8 attribute_hidden; libc_ifunc (__isnan, - (hwcap & PPC_FEATURE_ARCH_2_06) - ? __isnan_power7 : - (hwcap & PPC_FEATURE_POWER6_EXT) + (hwcap2 & PPC_FEATURE2_ARCH_2_07) + ? __isnan_power8 : + (hwcap & PPC_FEATURE_ARCH_2_06) + ? __isnan_power7 : + (hwcap & PPC_FEATURE_POWER6_EXT) ? __isnan_power6x : - (hwcap & PPC_FEATURE_ARCH_2_05) - ? __isnan_power6 : - (hwcap & PPC_FEATURE_POWER5) - ? __isnan_power5 + (hwcap & PPC_FEATURE_ARCH_2_05) + ? __isnan_power6 : + (hwcap & PPC_FEATURE_POWER5) + ? __isnan_power5 : __isnan_ppc64); weak_alias (__isnan, isnan) diff --git a/sysdeps/powerpc/powerpc64/fpu/multiarch/s_isnanf.c b/sysdeps/powerpc/powerpc64/fpu/multiarch/s_isnanf.c index b237455..eb68a50 100644 --- a/sysdeps/powerpc/powerpc64/fpu/multiarch/s_isnanf.c +++ b/sysdeps/powerpc/powerpc64/fpu/multiarch/s_isnanf.c @@ -25,16 +25,19 @@ extern __typeof (__isnanf) __isnan_power5 attribute_hidden; extern __typeof (__isnanf) __isnan_power6 attribute_hidden; extern __typeof (__isnanf) __isnan_power6x attribute_hidden; extern __typeof (__isnanf) __isnan_power7 attribute_hidden; +extern __typeof (__isnanf) __isnan_power8 attribute_hidden; libc_ifunc (__isnanf, - (hwcap & PPC_FEATURE_ARCH_2_06) - ? __isnan_power7 : - (hwcap & PPC_FEATURE_POWER6_EXT) - ? __isnan_power6x : - (hwcap & PPC_FEATURE_ARCH_2_05) - ? __isnan_power6 : - (hwcap & PPC_FEATURE_POWER5) - ? __isnan_power5 + (hwcap2 & PPC_FEATURE2_ARCH_2_07) + ? __isnan_power8 : + (hwcap & PPC_FEATURE_ARCH_2_06) + ? __isnan_power7 : + (hwcap & PPC_FEATURE_POWER6_EXT) + ? __isnan_power6x : + (hwcap & PPC_FEATURE_ARCH_2_05) + ? __isnan_power6 : + (hwcap & PPC_FEATURE_POWER5) + ? __isnan_power5 : __isnan_ppc64); weak_alias (__isnanf, isnanf) diff --git a/sysdeps/powerpc/powerpc64/fpu/multiarch/s_llrint-power8.S b/sysdeps/powerpc/powerpc64/fpu/multiarch/s_llrint-power8.S new file mode 100644 index 0000000..3962b7d --- /dev/null +++ b/sysdeps/powerpc/powerpc64/fpu/multiarch/s_llrint-power8.S @@ -0,0 +1,31 @@ +/* Round double to long int. PowerPC64/POWER6X default version. + Copyright (C) 2014 Free Software Foundation, Inc. + This file is part of the GNU C Library. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library; if not, see + <http://www.gnu.org/licenses/>. */ + +#include <sysdep.h> +#include <math_ldbl_opt.h> + +#undef weak_alias +#define weak_alias(a,b) +#undef strong_alias +#define strong_alias(a,b) +#undef compat_symbol +#define compat_symbol(a,b,c,d) + +#define __llrint __llrint_power8 + +#include <sysdeps/powerpc/powerpc64/power8/fpu/s_llrint.S> diff --git a/sysdeps/powerpc/powerpc64/fpu/multiarch/s_llrint.c b/sysdeps/powerpc/powerpc64/fpu/multiarch/s_llrint.c index 5818b53..cf1b2e4 100644 --- a/sysdeps/powerpc/powerpc64/fpu/multiarch/s_llrint.c +++ b/sysdeps/powerpc/powerpc64/fpu/multiarch/s_llrint.c @@ -30,10 +30,13 @@ extern __typeof (__llrint) __llrint_ppc64 attribute_hidden; extern __typeof (__llrint) __llrint_power6x attribute_hidden; +extern __typeof (__llrint) __llrint_power8 attribute_hidden; libc_ifunc (__llrint, - (hwcap & PPC_FEATURE_POWER6_EXT) - ? __llrint_power6x + (hwcap2 & PPC_FEATURE2_ARCH_2_07) + ? __llrint_power8 : + (hwcap & PPC_FEATURE_POWER6_EXT) + ? __llrint_power6x : __llrint_ppc64); weak_alias (__llrint, llrint) diff --git a/sysdeps/powerpc/powerpc64/fpu/multiarch/s_llround-power8.S b/sysdeps/powerpc/powerpc64/fpu/multiarch/s_llround-power8.S new file mode 100644 index 0000000..41c61a1 --- /dev/null +++ b/sysdeps/powerpc/powerpc64/fpu/multiarch/s_llround-power8.S @@ -0,0 +1,31 @@ +/* llround(). PowerPC64 default version. + Copyright (C) 2014 Free Software Foundation, Inc. + This file is part of the GNU C Library. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library; if not, see + <http://www.gnu.org/licenses/>. */ + +#include <sysdep.h> +#include <math_ldbl_opt.h> + +#undef weak_alias +#define weak_alias(name, alias) +#undef strong_alias +#define strong_alias(name, alias) +#undef compat_symbol +#define compat_symbol(lib, name, alias, ver) + +#define __llround __llround_power8 + +#include <sysdeps/powerpc/powerpc64/power8/fpu/s_llround.S> diff --git a/sysdeps/powerpc/powerpc64/fpu/multiarch/s_llround.c b/sysdeps/powerpc/powerpc64/fpu/multiarch/s_llround.c index a4d1bf3..7dba17e 100644 --- a/sysdeps/powerpc/powerpc64/fpu/multiarch/s_llround.c +++ b/sysdeps/powerpc/powerpc64/fpu/multiarch/s_llround.c @@ -27,12 +27,15 @@ extern __typeof (__llround) __llround_ppc64 attribute_hidden; extern __typeof (__llround) __llround_power5plus attribute_hidden; extern __typeof (__llround) __llround_power6x attribute_hidden; +extern __typeof (__llround) __llround_power8 attribute_hidden; libc_ifunc (__llround, - (hwcap & PPC_FEATURE_POWER6_EXT) - ? __llround_power6x : - (hwcap & PPC_FEATURE_POWER5_PLUS) - ? __llround_power5plus + (hwcap2 & PPC_FEATURE2_ARCH_2_07) + ? __llround_power8 : + (hwcap & PPC_FEATURE_POWER6_EXT) + ? __llround_power6x : + (hwcap & PPC_FEATURE_POWER5_PLUS) + ? __llround_power5plus : __llround_ppc64); weak_alias (__llround, llround) diff --git a/sysdeps/powerpc/powerpc64/fpu/s_ceill.S b/sysdeps/powerpc/powerpc64/fpu/s_ceill.S deleted file mode 100644 index 42a73af..0000000 --- a/sysdeps/powerpc/powerpc64/fpu/s_ceill.S +++ /dev/null @@ -1,132 +0,0 @@ -/* s_ceill.S IBM extended format long double version. - Copyright (C) 2004-2014 Free Software Foundation, Inc. - This file is part of the GNU C Library. - - The GNU C Library is free software; you can redistribute it and/or - modify it under the terms of the GNU Lesser General Public - License as published by the Free Software Foundation; either - version 2.1 of the License, or (at your option) any later version. - - The GNU C Library is distributed in the hope that it will be useful, - but WITHOUT ANY WARRANTY; without even the implied warranty of - MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU - Lesser General Public License for more details. - - You should have received a copy of the GNU Lesser General Public - License along with the GNU C Library; if not, see - <http://www.gnu.org/licenses/>. */ - -#include <sysdep.h> -#include <math_ldbl_opt.h> - - .section ".toc","aw" -.LC0: /* 2**52 */ - .tc FD_43300000_0[TC],0x4330000000000000 - - .section ".text" - -/* long double [fp1,fp2] ceill (long double x [fp1,fp2]) - IEEE 1003.1 ceil function. - - PowerPC64 long double uses the IBM extended format which is - represented two 64-floating point double values. The values are - non-overlapping giving an effective precision of 106 bits. The first - double contains the high order bits of mantissa and is always ceiled - to represent a normal ceiling of long double to double. Since the - long double value is sum of the high and low values, the low double - normally has the opposite sign to compensate for the this ceiling. - - For long double there are two cases: - 1) |x| < 2**52, all the integer bits are in the high double. - ceil the high double and set the low double to -0.0. - 2) |x| >= 2**52, ceiling involves both doubles. - See the comment before label .L2 for details. - */ - -ENTRY (__ceill) - mffs fp11 /* Save current FPU rounding mode. */ - lfd fp13,.LC0@toc(2) - fabs fp0,fp1 - fabs fp9,fp2 - fsub fp12,fp13,fp13 /* generate 0.0 */ - fcmpu cr7,fp0,fp13 /* if (fabs(x) > TWO52) */ - fcmpu cr6,fp1,fp12 /* if (x > 0.0) */ - bnl- cr7,.L2 - mtfsfi 7,2 /* Set rounding mode toward +inf. */ - fneg fp2,fp12 - ble- cr6,.L1 - fadd fp1,fp1,fp13 /* x+= TWO52; */ - fsub fp1,fp1,fp13 /* x-= TWO52; */ - fabs fp1,fp1 /* if (x == 0.0) */ -.L0: - mtfsf 0x01,fp11 /* restore previous rounding mode. */ - blr /* x = 0.0; */ -.L1: - bge- cr6,.L0 /* if (x < 0.0) */ - fsub fp1,fp1,fp13 /* x-= TWO52; */ - fadd fp1,fp1,fp13 /* x+= TWO52; */ - fcmpu cr5,fp1,fp12 /* if (x > 0.0) */ - mtfsf 0x01,fp11 /* restore previous rounding mode. */ - fnabs fp1,fp1 /* if (x == 0.0) */ - blr /* x = -0.0; */ - -/* The high double is > TWO52 so we need to round the low double and - perhaps the high double. In this case we have to round the low - double and handle any adjustment to the high double that may be - caused by rounding (up). This is complicated by the fact that the - high double may already be rounded and the low double may have the - opposite sign to compensate.This gets a bit tricky so we use the - following algorithm: - - tau = floor(x_high/TWO52); - x0 = x_high - tau; - x1 = x_low + tau; - r1 = rint(x1); - y_high = x0 + r1; - y_low = x0 - y_high + r1; - return y; */ -.L2: - fcmpu cr7,fp9,fp13 /* if (|x_low| > TWO52) */ - fcmpu cr0,fp9,fp12 /* || (|x_low| == 0.0) */ - fcmpu cr5,fp2,fp12 /* if (x_low > 0.0) */ - bgelr- cr7 /* return x; */ - beqlr- cr0 - mtfsfi 7,2 /* Set rounding mode toward +inf. */ - fdiv fp8,fp1,fp13 /* x_high/TWO52 */ - - bng- cr6,.L6 /* if (x > 0.0) */ - fctidz fp0,fp8 - fcfid fp8,fp0 /* tau = floor(x_high/TWO52); */ - bng cr5,.L4 /* if (x_low > 0.0) */ - fmr fp3,fp1 - fmr fp4,fp2 - b .L5 -.L4: /* if (x_low < 0.0) */ - fsub fp3,fp1,fp8 /* x0 = x_high - tau; */ - fadd fp4,fp2,fp8 /* x1 = x_low + tau; */ -.L5: - fadd fp5,fp4,fp13 /* r1 = r1 + TWO52; */ - fsub fp5,fp5,fp13 /* r1 = r1 - TWO52; */ - b .L9 -.L6: /* if (x < 0.0) */ - fctidz fp0,fp8 - fcfid fp8,fp0 /* tau = floor(x_high/TWO52); */ - bnl cr5,.L7 /* if (x_low < 0.0) */ - fmr fp3,fp1 - fmr fp4,fp2 - b .L8 -.L7: /* if (x_low > 0.0) */ - fsub fp3,fp1,fp8 /* x0 = x_high - tau; */ - fadd fp4,fp2,fp8 /* x1 = x_low + tau; */ -.L8: - fsub fp5,fp4,fp13 /* r1-= TWO52; */ - fadd fp5,fp5,fp13 /* r1+= TWO52; */ -.L9: - mtfsf 0x01,fp11 /* restore previous rounding mode. */ - fadd fp1,fp3,fp5 /* y_high = x0 + r1; */ - fsub fp2,fp3,fp1 /* y_low = x0 - y_high + r1; */ - fadd fp2,fp2,fp5 - blr -END (__ceill) - -long_double_symbol (libm, __ceill, ceill) diff --git a/sysdeps/powerpc/powerpc64/fpu/s_nearbyintl.S b/sysdeps/powerpc/powerpc64/fpu/s_nearbyintl.S deleted file mode 100644 index acd95da..0000000 --- a/sysdeps/powerpc/powerpc64/fpu/s_nearbyintl.S +++ /dev/null @@ -1,113 +0,0 @@ -/* nearbyint long double. - IBM extended format long double version. - Copyright (C) 2004-2014 Free Software Foundation, Inc. - This file is part of the GNU C Library. - - The GNU C Library is free software; you can redistribute it and/or - modify it under the terms of the GNU Lesser General Public - License as published by the Free Software Foundation; either - version 2.1 of the License, or (at your option) any later version. - - The GNU C Library is distributed in the hope that it will be useful, - but WITHOUT ANY WARRANTY; without even the implied warranty of - MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU - Lesser General Public License for more details. - - You should have received a copy of the GNU Lesser General Public - License along with the GNU C Library; if not, see - <http://www.gnu.org/licenses/>. */ - -#include <sysdep.h> -#include <math_ldbl_opt.h> - - .section ".toc","aw" -.LC0: /* 2**52 */ - .tc FD_43300000_0[TC],0x4330000000000000 - .section ".text" - -/* long double [fp1,fp2] nearbyintl (long double x [fp1,fp2]) - IEEE 1003.1 nearbyintl function. nearbyintl is similar to the rintl - but does raise the "inexact" exception. This implementation is - based on rintl but explicitly masks the inexact exception on entry - and clears any pending inexact before restoring the exception mask - on exit. - - PowerPC64 long double uses the IBM extended format which is - represented two 64-floating point double values. The values are - non-overlapping giving an effective precision of 106 bits. The first - double contains the high order bits of mantissa and is always rounded - to represent a normal rounding of long double to double. Since the - long double value is sum of the high and low values, the low double - normally has the opposite sign to compensate for the this rounding. - - For long double there are two cases: - 1) |x| < 2**52, all the integer bits are in the high double. - floor the high double and set the low double to -0.0. - 2) |x| >= 2**52, Rounding involves both doubles. - See the comment before label .L2 for details. - */ -ENTRY (__nearbyintl) - mffs fp11 /* Save current FPSCR. */ - lfd fp13,.LC0@toc(2) - fabs fp0,fp1 - mtfsb0 28 /* Disable "inexact" exceptions. */ - fsub fp12,fp13,fp13 /* generate 0.0 */ - fabs fp9,fp2 - fcmpu cr7,fp0,fp13 /* if (fabs(x) > TWO52) */ - fcmpu cr6,fp1,fp12 /* if (x > 0.0) */ - bnl- cr7,.L2 - fmr fp2,fp12 - bng- cr6,.L4 - fadd fp1,fp1,fp13 /* x+= TWO52; */ - fsub fp1,fp1,fp13 /* x-= TWO52; */ - b .L9 -.L4: - bnl- cr6,.L9 /* if (x < 0.0) */ - fsub fp1,fp13,fp1 /* x = TWO52 - x; */ - fsub fp0,fp1,fp13 /* x = - (x - TWO52); */ - fneg fp1,fp0 -.L9: - mtfsb0 6 /* Clear any pending "inexact" exceptions. */ - mtfsf 0x01,fp11 /* restore exception mask. */ - blr - -/* The high double is > TWO52 so we need to round the low double and - perhaps the high double. This gets a bit tricky so we use the - following algorithm: - - tau = floor(x_high/TWO52); - x0 = x_high - tau; - x1 = x_low + tau; - r1 = nearbyint(x1); - y_high = x0 + r1; - y_low = r1 - tau; - return y; */ -.L2: - fcmpu cr7,fp9,fp13 /* if (|x_low| > TWO52) */ - fcmpu cr0,fp9,fp12 /* || (|x_low| == 0.0) */ - bge- cr7,.L9 /* return x; */ - beq- cr0,.L9 - fdiv fp8,fp1,fp13 /* x_high/TWO52 */ - fctidz fp0,fp8 - fcfid fp8,fp0 /* tau = floor(x_high/TWO52); */ - fsub fp3,fp1,fp8 /* x0 = x_high - tau; */ - fadd fp4,fp2,fp8 /* x1 = x_low + tau; */ - - fcmpu cr6,fp4,fp12 /* if (x1 > 0.0) */ - bng- cr6,.L8 - fadd fp5,fp4,fp13 /* r1 = x1 + TWO52; */ - fsub fp5,fp5,fp13 /* r1 = r1 - TWO52; */ - b .L6 -.L8: - fmr fp5,fp4 - bge- cr6,.L6 /* if (x1 < 0.0) */ - fsub fp5,fp13,fp4 /* r1 = TWO52 - x1; */ - fsub fp0,fp5,fp13 /* r1 = - (r1 - TWO52); */ - fneg fp5,fp0 -.L6: - fadd fp1,fp3,fp5 /* y_high = x0 + r1; */ - fsub fp2,fp5,fp8 /* y_low = r1 - tau; */ - b .L9 -END (__nearbyintl) - -long_double_symbol (libm, __nearbyintl, nearbyintl) diff --git a/sysdeps/powerpc/powerpc64/fpu/s_roundl.S b/sysdeps/powerpc/powerpc64/fpu/s_roundl.S deleted file mode 100644 index 5362da8..0000000 --- a/sysdeps/powerpc/powerpc64/fpu/s_roundl.S +++ /dev/null @@ -1,132 +0,0 @@ -/* long double round function. - IBM extended format long double version. - Copyright (C) 2004-2014 Free Software Foundation, Inc. - This file is part of the GNU C Library. - - The GNU C Library is free software; you can redistribute it and/or - modify it under the terms of the GNU Lesser General Public - License as published by the Free Software Foundation; either - version 2.1 of the License, or (at your option) any later version. - - The GNU C Library is distributed in the hope that it will be useful, - but WITHOUT ANY WARRANTY; without even the implied warranty of - MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU - Lesser General Public License for more details. - - You should have received a copy of the GNU Lesser General Public - License along with the GNU C Library; if not, see - <http://www.gnu.org/licenses/>. */ - -#include <sysdep.h> -#include <math_ldbl_opt.h> - - .section ".toc","aw" -.LC0: /* 2**52 */ - .tc FD_43300000_0[TC],0x4330000000000000 -.LC1: /* 0.5 */ - .tc FD_3fe00000_0[TC],0x3fe0000000000000 - .section ".text" - -/* long double [fp1,fp2] roundl (long double x [fp1,fp2]) - IEEE 1003.1 round function. IEEE specifies "round to the nearest - integer value, rounding halfway cases away from zero, regardless of - the current rounding mode." However PowerPC Architecture defines - "Round to Nearest" as "Choose the best approximation. In case of a - tie, choose the one that is even (least significant bit o).". - So we can't use the PowerPC "Round to Nearest" mode. Instead we set - "Round toward Zero" mode and round by adding +-0.5 before rounding - to the integer value. */ - -ENTRY (__roundl) - mffs fp11 /* Save current FPU rounding mode. */ - lfd fp13,.LC0@toc(2) - fabs fp0,fp1 - fabs fp9,fp2 - fsub fp12,fp13,fp13 /* generate 0.0 */ - fcmpu cr7,fp0,fp13 /* if (fabs(x) > TWO52) */ - fcmpu cr6,fp1,fp12 /* if (x > 0.0) */ - bnl- cr7,.L2 - mtfsfi 7,1 /* Set rounding mode toward 0. */ - lfd fp10,.LC1@toc(2) - ble- cr6,.L1 - fneg fp2,fp12 - fadd fp1,fp1,fp10 /* x+= 0.5; */ - fadd fp1,fp1,fp13 /* x+= TWO52; */ - fsub fp1,fp1,fp13 /* x-= TWO52; */ - fabs fp1,fp1 /* if (x == 0.0) x = 0.0; */ -.L0: - mtfsf 0x01,fp11 /* restore previous rounding mode. */ - blr -.L1: - fsub fp9,fp1,fp10 /* x-= 0.5; */ - fneg fp2,fp12 - bge- cr6,.L0 /* if (x < 0.0) */ - fsub fp1,fp9,fp13 /* x-= TWO52; */ - fadd fp1,fp1,fp13 /* x+= TWO52; */ - fnabs fp1,fp1 /* if (x == 0.0) x = -0.0; */ - mtfsf 0x01,fp11 /* restore previous rounding mode. */ - blr - -/* The high double is > TWO52 so we need to round the low double and - perhaps the high double. In this case we have to round the low - double and handle any adjustment to the high double that may be - caused by rounding (up). This is complicated by the fact that the - high double may already be rounded and the low double may have the - opposite sign to compensate.This gets a bit tricky so we use the - following algorithm: - - tau = floor(x_high/TWO52); - x0 = x_high - tau; - x1 = x_low + tau; - r1 = rint(x1); - y_high = x0 + r1; - y_low = x0 - y_high + r1; - return y; */ -.L2: - fcmpu cr7,fp9,fp13 /* if (|x_low| > TWO52) */ - fcmpu cr0,fp9,fp12 /* || (|x_low| == 0.0) */ - fcmpu cr5,fp2,fp12 /* if (x_low > 0.0) */ - lfd fp10,.LC1@toc(2) - bgelr- cr7 /* return x; */ - beqlr- cr0 - mtfsfi 7,1 /* Set rounding mode toward 0. */ - fdiv fp8,fp1,fp13 /* x_high/TWO52 */ - - bng- cr6,.L6 /* if (x > 0.0) */ - fctidz fp0,fp8 - fcfid fp8,fp0 /* tau = floor(x_high/TWO52); */ - bng cr5,.L4 /* if (x_low > 0.0) */ - fmr fp3,fp1 - fmr fp4,fp2 - b .L5 -.L4: /* if (x_low < 0.0) */ - fsub fp3,fp1,fp8 /* x0 = x_high - tau; */ - fadd fp4,fp2,fp8 /* x1 = x_low + tau; */ -.L5: - fadd fp5,fp4,fp10 /* r1 = x1 + 0.5; */ - fadd fp5,fp5,fp13 /* r1 = r1 + TWO52; */ - fsub fp5,fp5,fp13 /* r1 = r1 - TWO52; */ - b .L9 -.L6: /* if (x < 0.0) */ - fctidz fp0,fp8 - fcfid fp8,fp0 /* tau = floor(x_high/TWO52); */ - bnl cr5,.L7 /* if (x_low < 0.0) */ - fmr fp3,fp1 - fmr fp4,fp2 - b .L8 -.L7: /* if (x_low > 0.0) */ - fsub fp3,fp1,fp8 /* x0 = x_high - tau; */ - fadd fp4,fp2,fp8 /* x1 = x_low + tau; */ -.L8: - fsub fp5,fp4,fp10 /* r1 = x1 - 0.5; */ - fsub fp5,fp5,fp13 /* r1-= TWO52; */ - fadd fp5,fp5,fp13 /* r1+= TWO52; */ -.L9: - mtfsf 0x01,fp11 /* restore previous rounding mode. */ - fadd fp1,fp3,fp5 /* y_high = x0 + r1; */ - fsub fp2,fp3,fp1 /* y_low = x0 - y_high + r1; */ - fadd fp2,fp2,fp5 - blr -END (__roundl) - -long_double_symbol (libm, __roundl, roundl) diff --git a/sysdeps/powerpc/powerpc64/multiarch/Makefile b/sysdeps/powerpc/powerpc64/multiarch/Makefile index 3c47316..82722fb 100644 --- a/sysdeps/powerpc/powerpc64/multiarch/Makefile +++ b/sysdeps/powerpc/powerpc64/multiarch/Makefile @@ -13,7 +13,13 @@ sysdep_routines += memcpy-power7 memcpy-a2 memcpy-power6 memcpy-cell \ wcschr-power6 wcschr-ppc64 wcsrchr-power7 wcsrchr-power6 \ wcsrchr-ppc64 wcscpy-power7 wcscpy-power6 wcscpy-ppc64 \ wordcopy-power7 wordcopy-power6 wordcopy-ppc64 \ - strcpy-power7 strcpy-ppc64 stpcpy-power7 stpcpy-ppc64 + strcpy-power7 strcpy-ppc64 stpcpy-power7 stpcpy-ppc64 \ + strrchr-power7 strrchr-ppc64 strncat-power7 strncat-ppc64 \ + strspn-power7 strspn-ppc64 strcspn-power7 strcspn-ppc64 \ + strpbrk-power7 strpbrk-ppc64 strncpy-power7 strncpy-ppc64 \ + stpncpy-power7 stpncpy-ppc64 strcmp-power7 strcmp-ppc64 \ + strcat-power7 strcat-ppc64 memmove-power7 memmove-ppc64 \ + bcopy-ppc64 CFLAGS-strncase-power7.c += -mcpu=power7 -funroll-loops CFLAGS-strncase_l-power7.c += -mcpu=power7 -funroll-loops diff --git a/sysdeps/powerpc/powerpc64/multiarch/bcopy-ppc64.c b/sysdeps/powerpc/powerpc64/multiarch/bcopy-ppc64.c new file mode 100644 index 0000000..f8d5aa5 --- /dev/null +++ b/sysdeps/powerpc/powerpc64/multiarch/bcopy-ppc64.c @@ -0,0 +1,25 @@ +/* PowerPC64 default bcopy. + Copyright (C) 2014 Free Software Foundation, Inc. + This file is part of the GNU C Library. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library; if not, see + <http://www.gnu.org/licenses/>. */ + +#include <string.h> + +extern __typeof (bcopy) __bcopy_ppc attribute_hidden; + +#define bcopy __bcopy_ppc + +#include <string/bcopy.c> diff --git a/sysdeps/powerpc/powerpc64/multiarch/bcopy.c b/sysdeps/powerpc/powerpc64/multiarch/bcopy.c new file mode 100644 index 0000000..0688907 --- /dev/null +++ b/sysdeps/powerpc/powerpc64/multiarch/bcopy.c @@ -0,0 +1,29 @@ +/* PowerPC64 multiarch bcopy. + Copyright (C) 2014 Free Software Foundation, Inc. + This file is part of the GNU C Library. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library; if not, see + <http://www.gnu.org/licenses/>. */ + +#include <string.h> +#include "init-arch.h" + +extern __typeof (bcopy) __bcopy_ppc attribute_hidden; +/* __bcopy_power7 symbol is implemented at memmove-power7.S */ +extern __typeof (bcopy) __bcopy_power7 attribute_hidden; + +libc_ifunc (bcopy, + (hwcap & PPC_FEATURE_HAS_VSX) + ? __bcopy_power7 + : __bcopy_ppc); diff --git a/sysdeps/powerpc/powerpc64/multiarch/ifunc-impl-list.c b/sysdeps/powerpc/powerpc64/multiarch/ifunc-impl-list.c index 6bbdd4e..a574487 100644 --- a/sysdeps/powerpc/powerpc64/multiarch/ifunc-impl-list.c +++ b/sysdeps/powerpc/powerpc64/multiarch/ifunc-impl-list.c @@ -61,6 +61,12 @@ __libc_ifunc_impl_list (const char *name, struct libc_ifunc_impl *array, __memcpy_power4) IFUNC_IMPL_ADD (array, i, memcpy, 1, __memcpy_ppc)) + /* Support sysdeps/powerpc/powerpc64/multiarch/memmove.c. */ + IFUNC_IMPL (i, name, memmove, + IFUNC_IMPL_ADD (array, i, memmove, hwcap & PPC_FEATURE_HAS_VSX, + __memmove_power7) + IFUNC_IMPL_ADD (array, i, memmove, 1, __memmove_ppc)) + /* Support sysdeps/powerpc/powerpc64/multiarch/memset.c. */ IFUNC_IMPL (i, name, memset, IFUNC_IMPL_ADD (array, i, memset, hwcap & PPC_FEATURE_HAS_VSX, @@ -136,6 +142,12 @@ __libc_ifunc_impl_list (const char *name, struct libc_ifunc_impl *array, __bzero_power4) IFUNC_IMPL_ADD (array, i, bzero, 1, __bzero_ppc)) + /* Support sysdeps/powerpc/powerpc64/multiarch/bcopy.c. */ + IFUNC_IMPL (i, name, bcopy, + IFUNC_IMPL_ADD (array, i, bcopy, hwcap & PPC_FEATURE_HAS_VSX, + __bcopy_power7) + IFUNC_IMPL_ADD (array, i, bcopy, 1, __bcopy_ppc)) + /* Support sysdeps/powerpc/powerpc64/multiarch/mempcpy.c. */ IFUNC_IMPL (i, name, mempcpy, IFUNC_IMPL_ADD (array, i, mempcpy, @@ -238,5 +250,77 @@ __libc_ifunc_impl_list (const char *name, struct libc_ifunc_impl *array, IFUNC_IMPL_ADD (array, i, wcscpy, 1, __wcscpy_ppc)) + /* Support sysdeps/powerpc/powerpc64/multiarch/strrchr.c. */ + IFUNC_IMPL (i, name, strrchr, + IFUNC_IMPL_ADD (array, i, strrchr, + hwcap & PPC_FEATURE_HAS_VSX, + __strrchr_power7) + IFUNC_IMPL_ADD (array, i, strrchr, 1, + __strrchr_ppc)) + + /* Support sysdeps/powerpc/powerpc64/multiarch/strncat.c. */ + IFUNC_IMPL (i, name, strncat, + IFUNC_IMPL_ADD (array, i, strncat, + hwcap & PPC_FEATURE_HAS_VSX, + __strncat_power7) + IFUNC_IMPL_ADD (array, i, strncat, 1, + __strncat_ppc)) + + /* Support sysdeps/powerpc/powerpc64/multiarch/strspn.c. */ + IFUNC_IMPL (i, name, strspn, + IFUNC_IMPL_ADD (array, i, strspn, + hwcap & PPC_FEATURE_HAS_VSX, + __strspn_power7) + IFUNC_IMPL_ADD (array, i, strspn, 1, + __strspn_ppc)) + + /* Support sysdeps/powerpc/powerpc64/multiarch/strcspn.c. */ + IFUNC_IMPL (i, name, strcspn, + IFUNC_IMPL_ADD (array, i, strcspn, + hwcap & PPC_FEATURE_HAS_VSX, + __strcspn_power7) + IFUNC_IMPL_ADD (array, i, strcspn, 1, + __strcspn_ppc)) + + /* Support sysdeps/powerpc/powerpc64/multiarch/strpbrk.c. */ + IFUNC_IMPL (i, name, strpbrk, + IFUNC_IMPL_ADD (array, i, strpbrk, + hwcap & PPC_FEATURE_HAS_VSX, + __strpbrk_power7) + IFUNC_IMPL_ADD (array, i, strpbrk, 1, + __strpbrk_ppc)) + + /* Support sysdeps/powerpc/powerpc64/multiarch/strncpy.c. */ + IFUNC_IMPL (i, name, strncpy, + IFUNC_IMPL_ADD (array, i, strncpy, + hwcap & PPC_FEATURE_HAS_VSX, + __strncpy_power7) + IFUNC_IMPL_ADD (array, i, strncpy, 1, + __strncpy_ppc)) + + /* Support sysdeps/powerpc/powerpc64/multiarch/stpncpy.c. */ + IFUNC_IMPL (i, name, stpncpy, + IFUNC_IMPL_ADD (array, i, stpncpy, + hwcap & PPC_FEATURE_HAS_VSX, + __stpncpy_power7) + IFUNC_IMPL_ADD (array, i, stpncpy, 1, + __stpncpy_ppc)) + + /* Support sysdeps/powerpc/powerpc64/multiarch/strcmp.c. */ + IFUNC_IMPL (i, name, strcmp, + IFUNC_IMPL_ADD (array, i, strcmp, + hwcap & PPC_FEATURE_HAS_VSX, + __strcmp_power7) + IFUNC_IMPL_ADD (array, i, strcmp, 1, + __strcmp_ppc)) + + /* Support sysdeps/powerpc/powerpc64/multiarch/strcat.c. */ + IFUNC_IMPL (i, name, strcat, + IFUNC_IMPL_ADD (array, i, strcat, + hwcap & PPC_FEATURE_HAS_VSX, + __strcat_power7) + IFUNC_IMPL_ADD (array, i, strcat, 1, + __strcat_ppc)) + return i; } diff --git a/sysdeps/powerpc/powerpc64/multiarch/memcpy-ppc64.S b/sysdeps/powerpc/powerpc64/multiarch/memcpy-ppc64.S index a09d760..c630654 100644 --- a/sysdeps/powerpc/powerpc64/multiarch/memcpy-ppc64.S +++ b/sysdeps/powerpc/powerpc64/multiarch/memcpy-ppc64.S @@ -36,8 +36,7 @@ END_2(__memcpy_ppc) # undef libc_hidden_builtin_def -# define libc_hidden_builtin_def(name) \ - .globl __GI_memcpy; __GI_memcpy = __memcpy_ppc +# define libc_hidden_builtin_def(name) #endif #include <sysdeps/powerpc/powerpc64/memcpy.S> diff --git a/sysdeps/powerpc/powerpc64/multiarch/memcpy.c b/sysdeps/powerpc/powerpc64/multiarch/memcpy.c index 6a91630..305e963 100644 --- a/sysdeps/powerpc/powerpc64/multiarch/memcpy.c +++ b/sysdeps/powerpc/powerpc64/multiarch/memcpy.c @@ -20,20 +20,23 @@ DSO. In static binaries we need memcpy before the initialization happened. */ #if defined SHARED && !defined NOT_IN_libc +/* Redefine memcpy so that the compiler won't complain about the type + mismatch with the IFUNC selector in strong_alias, below. */ +# undef memcpy +# define memcpy __redirect_memcpy # include <string.h> -# include <shlib-compat.h> # include "init-arch.h" -extern __typeof (memcpy) __memcpy_ppc attribute_hidden; -extern __typeof (memcpy) __memcpy_power4 attribute_hidden; -extern __typeof (memcpy) __memcpy_cell attribute_hidden; -extern __typeof (memcpy) __memcpy_power6 attribute_hidden; -extern __typeof (memcpy) __memcpy_a2 attribute_hidden; -extern __typeof (memcpy) __memcpy_power7 attribute_hidden; +extern __typeof (__redirect_memcpy) __libc_memcpy; -/* Avoid DWARF definition DIE on ifunc symbol so that GDB can handle - ifunc symbol properly. */ -libc_ifunc (memcpy, +extern __typeof (__redirect_memcpy) __memcpy_ppc attribute_hidden; +extern __typeof (__redirect_memcpy) __memcpy_power4 attribute_hidden; +extern __typeof (__redirect_memcpy) __memcpy_cell attribute_hidden; +extern __typeof (__redirect_memcpy) __memcpy_power6 attribute_hidden; +extern __typeof (__redirect_memcpy) __memcpy_a2 attribute_hidden; +extern __typeof (__redirect_memcpy) __memcpy_power7 attribute_hidden; + +libc_ifunc (__libc_memcpy, (hwcap & PPC_FEATURE_HAS_VSX) ? __memcpy_power7 : (hwcap & PPC_FEATURE_ARCH_2_06) @@ -45,4 +48,8 @@ libc_ifunc (memcpy, (hwcap & PPC_FEATURE_POWER4) ? __memcpy_power4 : __memcpy_ppc); + +#undef memcpy +strong_alias (__libc_memcpy, memcpy); +libc_hidden_ver (__libc_memcpy, memcpy); #endif diff --git a/sysdeps/powerpc/powerpc64/multiarch/memmove-power7.S b/sysdeps/powerpc/powerpc64/multiarch/memmove-power7.S new file mode 100644 index 0000000..667e7bc --- /dev/null +++ b/sysdeps/powerpc/powerpc64/multiarch/memmove-power7.S @@ -0,0 +1,43 @@ +/* Optimized memmove implementation for PowerPC64/POWER7. + Copyright (C) 2014 Free Software Foundation, Inc. + This file is part of the GNU C Library. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library; if not, see + <http://www.gnu.org/licenses/>. */ + +#include <sysdep.h> + +#undef EALIGN +#define EALIGN(name, alignt, words) \ + .section ".text"; \ + ENTRY_2(__memmove_power7) \ + .align ALIGNARG(alignt); \ + EALIGN_W_##words; \ + BODY_LABEL(__memmove_power7): \ + cfi_startproc; \ + LOCALENTRY(__memmove_power7) + +#undef END_GEN_TB +#define END_GEN_TB(name, mask) \ + cfi_endproc; \ + TRACEBACK_MASK(__memmove_power7,mask) \ + END_2(__memmove_power7) + +#undef libc_hidden_builtin_def +#define libc_hidden_builtin_def(name) + +#undef bcopy +#define bcopy __bcopy_power7 + +#include <sysdeps/powerpc/powerpc64/power7/memmove.S> diff --git a/sysdeps/powerpc/powerpc64/multiarch/memmove-ppc64.c b/sysdeps/powerpc/powerpc64/multiarch/memmove-ppc64.c new file mode 100644 index 0000000..ff78fe6 --- /dev/null +++ b/sysdeps/powerpc/powerpc64/multiarch/memmove-ppc64.c @@ -0,0 +1,28 @@ +/* Copyright (C) 2014 Free Software Foundation, Inc. + This file is part of the GNU C Library. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library; if not, see + <http://www.gnu.org/licenses/>. */ + +#include <string.h> + +#define MEMMOVE __memmove_ppc +#if !defined(NOT_IN_libc) && defined(SHARED) +# undef libc_hidden_builtin_def +# define libc_hidden_builtin_def(name) +#endif + +extern __typeof (memmove) __memmove_ppc attribute_hidden; + +#include <string/memmove.c> diff --git a/sysdeps/powerpc/powerpc64/multiarch/memmove.c b/sysdeps/powerpc/powerpc64/multiarch/memmove.c new file mode 100644 index 0000000..9a1ce8f --- /dev/null +++ b/sysdeps/powerpc/powerpc64/multiarch/memmove.c @@ -0,0 +1,45 @@ +/* Multiple versions of memmove. PowerPC64 version. + Copyright (C) 2014 Free Software Foundation, Inc. + This file is part of the GNU C Library. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library; if not, see + <http://www.gnu.org/licenses/>. */ + +/* Define multiple versions only for the definition in lib and for + DSO. In static binaries we need memmove before the initialization + happened. */ +#if defined SHARED && !defined NOT_IN_libc +/* Redefine memmove so that the compiler won't complain about the type + mismatch with the IFUNC selector in strong_alias, below. */ +# undef memmove +# define memmove __redirect_memmove +# include <string.h> +# include "init-arch.h" + +extern __typeof (__redirect_memmove) __libc_memmove; + +extern __typeof (__redirect_memmove) __memmove_ppc attribute_hidden; +extern __typeof (__redirect_memmove) __memmove_power7 attribute_hidden; + +libc_ifunc (__libc_memmove, + (hwcap & PPC_FEATURE_HAS_VSX) + ? __memmove_power7 + : __memmove_ppc); + +#undef memmove +strong_alias (__libc_memmove, memmove); +libc_hidden_ver (__libc_memmove, memmove); +#else +# include <string/memmove.c> +#endif diff --git a/sysdeps/powerpc/powerpc64/multiarch/memset-ppc64.S b/sysdeps/powerpc/powerpc64/multiarch/memset-ppc64.S index 65b3afe..3601a77 100644 --- a/sysdeps/powerpc/powerpc64/multiarch/memset-ppc64.S +++ b/sysdeps/powerpc/powerpc64/multiarch/memset-ppc64.S @@ -47,8 +47,7 @@ END_GEN_TB (__bzero_ppc,TB_TOCLESS) END_2(__memset_ppc) # undef libc_hidden_builtin_def -# define libc_hidden_builtin_def(name) \ - .globl __GI_memset; __GI_memset = __memset_ppc +# define libc_hidden_builtin_def(name) #endif /* Do not implement __bzero at powerpc64/memset.S. */ diff --git a/sysdeps/powerpc/powerpc64/multiarch/memset.c b/sysdeps/powerpc/powerpc64/multiarch/memset.c index 829d127..aa2ae70 100644 --- a/sysdeps/powerpc/powerpc64/multiarch/memset.c +++ b/sysdeps/powerpc/powerpc64/multiarch/memset.c @@ -18,18 +18,24 @@ /* Define multiple versions only for definition in libc. */ #if defined SHARED && !defined NOT_IN_libc +/* Redefine memset so that the compiler won't complain about the type + mismatch with the IFUNC selector in strong_alias, below. */ +# undef memset +# define memset __redirect_memset # include <string.h> # include <shlib-compat.h> # include "init-arch.h" -extern __typeof (memset) __memset_ppc attribute_hidden; -extern __typeof (memset) __memset_power4 attribute_hidden; -extern __typeof (memset) __memset_power6 attribute_hidden; -extern __typeof (memset) __memset_power7 attribute_hidden; +extern __typeof (__redirect_memset) __libc_memset; + +extern __typeof (__redirect_memset) __memset_ppc attribute_hidden; +extern __typeof (__redirect_memset) __memset_power4 attribute_hidden; +extern __typeof (__redirect_memset) __memset_power6 attribute_hidden; +extern __typeof (__redirect_memset) __memset_power7 attribute_hidden; /* Avoid DWARF definition DIE on ifunc symbol so that GDB can handle ifunc symbol properly. */ -libc_ifunc (memset, +libc_ifunc (__libc_memset, (hwcap & PPC_FEATURE_HAS_VSX) ? __memset_power7 : (hwcap & PPC_FEATURE_ARCH_2_05) @@ -37,4 +43,8 @@ libc_ifunc (memset, (hwcap & PPC_FEATURE_POWER4) ? __memset_power4 : __memset_ppc); + +#undef memset +strong_alias (__libc_memset, memset); +libc_hidden_ver (__libc_memset, memset); #endif diff --git a/sysdeps/powerpc/powerpc64/multiarch/stpncpy-power7.S b/sysdeps/powerpc/powerpc64/multiarch/stpncpy-power7.S new file mode 100644 index 0000000..e29674f --- /dev/null +++ b/sysdeps/powerpc/powerpc64/multiarch/stpncpy-power7.S @@ -0,0 +1,44 @@ +/* Optimized stpncpy implementation for POWER7. + Copyright (C) 2014 Free Software Foundation, Inc. + This file is part of the GNU C Library. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library; if not, see + <http://www.gnu.org/licenses/>. */ + +#include <sysdep.h> + +#define USE_AS_STPNCPY + +#undef EALIGN +#define EALIGN(name, alignt, words) \ + .section ".text"; \ + ENTRY_2(__stpncpy_power7) \ + .align ALIGNARG(alignt); \ + EALIGN_W_##words; \ + BODY_LABEL(__stpncpy_power7): \ + cfi_startproc; \ + LOCALENTRY(__stpncpy_power7) + +#undef END +#define END(name) \ + cfi_endproc; \ + TRACEBACK(__stpncpy_power7) \ + END_2(__stpncpy_power7) + +#undef libc_hidden_builtin_def +#define libc_hidden_builtin_def(name) + +#define MEMSET __memset_power7 + +#include <sysdeps/powerpc/powerpc64/power7/stpncpy.S> diff --git a/sysdeps/powerpc/powerpc64/multiarch/stpncpy-ppc64.c b/sysdeps/powerpc/powerpc64/multiarch/stpncpy-ppc64.c new file mode 100644 index 0000000..74f47a7 --- /dev/null +++ b/sysdeps/powerpc/powerpc64/multiarch/stpncpy-ppc64.c @@ -0,0 +1,26 @@ +/* Default stpncpy implementation for PowerPC64. + Copyright (C) 2014 Free Software Foundation, Inc. + This file is part of the GNU C Library. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library; if not, see + <http://www.gnu.org/licenses/>. */ + +#define STPNCPY __stpncpy_ppc +#ifdef SHARED +#undef libc_hidden_def +#define libc_hidden_def(name) \ + __hidden_ver1 (__stpncpy_ppc, __GI___stpncpy, __stpncpy_ppc); +#endif + +#include <string/stpncpy.c> diff --git a/sysdeps/powerpc/powerpc64/multiarch/stpncpy.c b/sysdeps/powerpc/powerpc64/multiarch/stpncpy.c new file mode 100644 index 0000000..dbf8521 --- /dev/null +++ b/sysdeps/powerpc/powerpc64/multiarch/stpncpy.c @@ -0,0 +1,33 @@ +/* Multiple versions of stpncpy. PowerPC64 version. + Copyright (C) 2014 Free Software Foundation, Inc. + This file is part of the GNU C Library. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library; if not, see + <http://www.gnu.org/licenses/>. */ + +#ifndef NOT_IN_libc +# include <string.h> +# include <shlib-compat.h> +# include "init-arch.h" + +extern __typeof (__stpncpy) __stpncpy_ppc attribute_hidden; +extern __typeof (__stpncpy) __stpncpy_power7 attribute_hidden; + +libc_ifunc (__stpncpy, + (hwcap & PPC_FEATURE_HAS_VSX) + ? __stpncpy_power7 + : __stpncpy_ppc); + +weak_alias (__stpncpy, stpncpy) +#endif diff --git a/sysdeps/powerpc/powerpc64/multiarch/strcat-power7.c b/sysdeps/powerpc/powerpc64/multiarch/strcat-power7.c new file mode 100644 index 0000000..628f8bd --- /dev/null +++ b/sysdeps/powerpc/powerpc64/multiarch/strcat-power7.c @@ -0,0 +1,30 @@ +/* Copyright (C) 2014 Free Software Foundation, Inc. + This file is part of the GNU C Library. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library; if not, see + <http://www.gnu.org/licenses/ >. */ + +#include <string.h> + +#define STRCAT __strcat_power7 + +#undef libc_hidden_def +#define libc_hidden_def(name) + +extern typeof (strcpy) __strcpy_power7; +extern typeof (strlen) __strlen_power7; + +#define strcpy __strcpy_power7 +#define strlen __strlen_power7 +#include <sysdeps/powerpc/strcat.c> diff --git a/sysdeps/powerpc/powerpc64/multiarch/strcat-ppc64.c b/sysdeps/powerpc/powerpc64/multiarch/strcat-ppc64.c new file mode 100644 index 0000000..1245764 --- /dev/null +++ b/sysdeps/powerpc/powerpc64/multiarch/strcat-ppc64.c @@ -0,0 +1,29 @@ +/* Copyright (C) 2014 Free Software Foundation, Inc. + This file is part of the GNU C Library. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library; if not, see + <http://www.gnu.org/licenses/ >. */ + +#include <string.h> + +#define STRCAT __strcat_ppc +#ifdef SHARED +# undef libc_hidden_builtin_def +# define libc_hidden_builtin_def(name) \ + __hidden_ver1 (__strcat_ppc, __GI_strcat, __strcat_ppc); +#endif + +extern __typeof (strcat) __strcat_ppc attribute_hidden; + +#include <sysdeps/powerpc/strcat.c> diff --git a/sysdeps/powerpc/powerpc64/multiarch/strcat.c b/sysdeps/powerpc/powerpc64/multiarch/strcat.c new file mode 100644 index 0000000..847a62d --- /dev/null +++ b/sysdeps/powerpc/powerpc64/multiarch/strcat.c @@ -0,0 +1,31 @@ +/* Multiple versions of strcat. PowerPC64 version. + Copyright (C) 2014 Free Software Foundation, Inc. + This file is part of the GNU C Library. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library; if not, see + <http://www.gnu.org/licenses/>. */ + +#ifndef NOT_IN_libc +# include <string.h> +# include <shlib-compat.h> +# include "init-arch.h" + +extern __typeof (strcat) __strcat_ppc attribute_hidden; +extern __typeof (strcat) __strcat_power7 attribute_hidden; + +libc_ifunc (strcat, + (hwcap & PPC_FEATURE_HAS_VSX) + ? __strcat_power7 + : __strcat_ppc); +#endif diff --git a/sysdeps/powerpc/powerpc64/multiarch/strcmp-power7.S b/sysdeps/powerpc/powerpc64/multiarch/strcmp-power7.S new file mode 100644 index 0000000..790ce8d --- /dev/null +++ b/sysdeps/powerpc/powerpc64/multiarch/strcmp-power7.S @@ -0,0 +1,40 @@ +/* Optimized strcmp implementation for POWER7. + Copyright (C) 2014 Free Software Foundation, Inc. + This file is part of the GNU C Library. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library; if not, see + <http://www.gnu.org/licenses/>. */ + +#include <sysdep.h> + +#undef EALIGN +#define EALIGN(name, alignt, words) \ + .section ".text"; \ + ENTRY_2(__strcmp_power7) \ + .align ALIGNARG(alignt); \ + EALIGN_W_##words; \ + BODY_LABEL(__strcmp_power7): \ + cfi_startproc; \ + LOCALENTRY(__strcmp_power7) + +#undef END +#define END(name) \ + cfi_endproc; \ + TRACEBACK(__strcmp_power7) \ + END_2(__strcmp_power7) + +#undef libc_hidden_builtin_def +#define libc_hidden_builtin_def(name) + +#include <sysdeps/powerpc/powerpc64/power7/strcmp.S> diff --git a/sysdeps/powerpc/powerpc64/multiarch/strcmp-ppc64.S b/sysdeps/powerpc/powerpc64/multiarch/strcmp-ppc64.S new file mode 100644 index 0000000..93d1277 --- /dev/null +++ b/sysdeps/powerpc/powerpc64/multiarch/strcmp-ppc64.S @@ -0,0 +1,43 @@ +/* Default strcmp implementation for PowerPC64. + Copyright (C) 2014 Free Software Foundation, Inc. + This file is part of the GNU C Library. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library; if not, see + <http://www.gnu.org/licenses/>. */ + +#include <sysdep.h> + +#if defined SHARED && !defined NOT_IN_libc +# undef EALIGN +# define EALIGN(name, alignt, words) \ + .section ".text"; \ + ENTRY_2(__strcmp_ppc) \ + .align ALIGNARG(alignt); \ + EALIGN_W_##words; \ + BODY_LABEL(__strcmp_ppc): \ + cfi_startproc; \ + LOCALENTRY(__strcmp_ppc) + +# undef END +# define END(name) \ + cfi_endproc; \ + TRACEBACK(__strcmp_ppc) \ + END_2(__strcmp_ppc) + +# undef libc_hidden_builtin_def +# define libc_hidden_builtin_def(name) \ + .globl __GI_strcmp; __GI_strcmp = __strcmp_ppc +#endif + +#include <sysdeps/powerpc/powerpc64/strcmp.S> diff --git a/sysdeps/powerpc/powerpc64/multiarch/strcmp.c b/sysdeps/powerpc/powerpc64/multiarch/strcmp.c new file mode 100644 index 0000000..2013301 --- /dev/null +++ b/sysdeps/powerpc/powerpc64/multiarch/strcmp.c @@ -0,0 +1,31 @@ +/* Multiple versions of strcmp. PowerPC64 version. + Copyright (C) 2014 Free Software Foundation, Inc. + This file is part of the GNU C Library. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library; if not, see + <http://www.gnu.org/licenses/>. */ + +#if defined SHARED && !defined NOT_IN_libc +# include <string.h> +# include <shlib-compat.h> +# include "init-arch.h" + +extern __typeof (strcmp) __strcmp_ppc attribute_hidden; +extern __typeof (strcmp) __strcmp_power7 attribute_hidden; + +libc_ifunc (strcmp, + (hwcap & PPC_FEATURE_HAS_VSX) + ? __strcmp_power7 + : __strcmp_ppc); +#endif diff --git a/sysdeps/powerpc/powerpc64/multiarch/strcspn-power7.S b/sysdeps/powerpc/powerpc64/multiarch/strcspn-power7.S new file mode 100644 index 0000000..02ffcc8 --- /dev/null +++ b/sysdeps/powerpc/powerpc64/multiarch/strcspn-power7.S @@ -0,0 +1,40 @@ +/* Optimized strcspn implementation for POWER7. + Copyright (C) 2014 Free Software Foundation, Inc. + This file is part of the GNU C Library. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library; if not, see + <http://www.gnu.org/licenses/>. */ + +#include <sysdep.h> + +#undef EALIGN +#define EALIGN(name, alignt, words) \ + .section ".text"; \ + ENTRY_2(__strcspn_power7) \ + .align ALIGNARG(alignt); \ + EALIGN_W_##words; \ + BODY_LABEL(__strcspn_power7): \ + cfi_startproc; \ + LOCALENTRY(__strcspn_power7) + +#undef END +#define END(name) \ + cfi_endproc; \ + TRACEBACK(__strcspn_power7) \ + END_2(__strcspn_power7) + +#undef libc_hidden_builtin_def +#define libc_hidden_builtin_def(name) + +#include <sysdeps/powerpc/powerpc64/power7/strcspn.S> diff --git a/sysdeps/powerpc/powerpc64/multiarch/strcspn-ppc64.c b/sysdeps/powerpc/powerpc64/multiarch/strcspn-ppc64.c new file mode 100644 index 0000000..5f8b610 --- /dev/null +++ b/sysdeps/powerpc/powerpc64/multiarch/strcspn-ppc64.c @@ -0,0 +1,30 @@ +/* Copyright (C) 2014 Free Software Foundation, Inc. + This file is part of the GNU C Library. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library; if not, see + <http://www.gnu.org/licenses/>. */ + +#include <string.h> + +#define STRCSPN __strcspn_ppc +#ifdef SHARED + +# undef libc_hidden_builtin_def +# define libc_hidden_builtin_def(name) \ + __hidden_ver1 (__strcspn_ppc, __GI_strcspn, __strcspn_ppc); +#endif + +extern __typeof (strcspn) __strcspn_ppc attribute_hidden; + +#include <string/strcspn.c> diff --git a/sysdeps/powerpc/powerpc64/multiarch/strcspn.c b/sysdeps/powerpc/powerpc64/multiarch/strcspn.c new file mode 100644 index 0000000..3609d93 --- /dev/null +++ b/sysdeps/powerpc/powerpc64/multiarch/strcspn.c @@ -0,0 +1,31 @@ +/* Multiple versions of strcspn. PowerPC64 version. + Copyright (C) 2014 Free Software Foundation, Inc. + This file is part of the GNU C Library. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library; if not, see + <http://www.gnu.org/licenses/>. */ + +#ifndef NOT_IN_libc +# include <string.h> +# include <shlib-compat.h> +# include "init-arch.h" + +extern __typeof (strcspn) __strcspn_ppc attribute_hidden; +extern __typeof (strcspn) __strcspn_power7 attribute_hidden; + +libc_ifunc (strcspn, + (hwcap & PPC_FEATURE_HAS_VSX) + ? __strcspn_power7 + : __strcspn_ppc); +#endif diff --git a/sysdeps/powerpc/powerpc64/multiarch/strlen-ppc64.S b/sysdeps/powerpc/powerpc64/multiarch/strlen-ppc64.S index efcc212..a195e9a 100644 --- a/sysdeps/powerpc/powerpc64/multiarch/strlen-ppc64.S +++ b/sysdeps/powerpc/powerpc64/multiarch/strlen-ppc64.S @@ -35,8 +35,7 @@ END_2(__strlen_ppc) # undef libc_hidden_builtin_def -# define libc_hidden_builtin_def(name) \ - .globl __GI_strlen; __GI_strlen = __strlen_ppc +# define libc_hidden_builtin_def(name) #endif #include <sysdeps/powerpc/powerpc64/strlen.S> diff --git a/sysdeps/powerpc/powerpc64/multiarch/strlen.c b/sysdeps/powerpc/powerpc64/multiarch/strlen.c index 6574696..d2c26e9 100644 --- a/sysdeps/powerpc/powerpc64/multiarch/strlen.c +++ b/sysdeps/powerpc/powerpc64/multiarch/strlen.c @@ -17,15 +17,25 @@ <http://www.gnu.org/licenses/>. */ #if defined SHARED && !defined NOT_IN_libc +/* Redefine strlen so that the compiler won't complain about the type + mismatch with the IFUNC selector in strong_alias, below. */ +# undef strlen +# define strlen __redirect_strlen # include <string.h> # include <shlib-compat.h> # include "init-arch.h" -extern __typeof (strlen) __strlen_ppc attribute_hidden; -extern __typeof (strlen) __strlen_power7 attribute_hidden; +extern __typeof (__redirect_strlen) __libc_strlen; -libc_ifunc (strlen, +extern __typeof (__redirect_strlen) __strlen_ppc attribute_hidden; +extern __typeof (__redirect_strlen) __strlen_power7 attribute_hidden; + +libc_ifunc (__libc_strlen, (hwcap & PPC_FEATURE_HAS_VSX) ? __strlen_power7 : __strlen_ppc); + +#undef strlen +strong_alias (__libc_strlen, strlen) +libc_hidden_ver (__libc_strlen, strlen) #endif diff --git a/sysdeps/powerpc/powerpc64/multiarch/strncat-power7.S b/sysdeps/powerpc/powerpc64/multiarch/strncat-power7.S new file mode 100644 index 0000000..ead4a9a --- /dev/null +++ b/sysdeps/powerpc/powerpc64/multiarch/strncat-power7.S @@ -0,0 +1,42 @@ +/* Optimized strncat implementation for POWER7. + Copyright (C) 2014 Free Software Foundation, Inc. + This file is part of the GNU C Library. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library; if not, see + <http://www.gnu.org/licenses/>. */ + +#include <sysdep.h> + +#undef EALIGN +#define EALIGN(name, alignt, words) \ + .section ".text"; \ + ENTRY_2(__strncat_power7) \ + .align ALIGNARG(alignt); \ + EALIGN_W_##words; \ + BODY_LABEL(__strncat_power7): \ + cfi_startproc; \ + LOCALENTRY(__strncat_power7) + +#undef END +#define END(name) \ + cfi_endproc; \ + TRACEBACK(__strncat_power7) \ + END_2(__strncat_power7) + +#undef libc_hidden_builtin_def +#define libc_hidden_builtin_def(name) + +#define STRLEN __strlen_power7 + +#include <sysdeps/powerpc/powerpc64/power7/strncat.S> diff --git a/sysdeps/powerpc/powerpc64/multiarch/strncat-ppc64.c b/sysdeps/powerpc/powerpc64/multiarch/strncat-ppc64.c new file mode 100644 index 0000000..3058c0a --- /dev/null +++ b/sysdeps/powerpc/powerpc64/multiarch/strncat-ppc64.c @@ -0,0 +1,29 @@ +/* Copyright (C) 2014 Free Software Foundation, Inc. + This file is part of the GNU C Library. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library; if not, see + <http://www.gnu.org/licenses/ >. */ + +#include <string.h> + +#define STRNCAT __strncat_ppc +#ifdef SHARED +# undef libc_hidden_builtin_def +# define libc_hidden_builtin_def(name) \ + __hidden_ver1 (__strncat_ppc, __GI_strncat, __strncat_ppc); +#endif + +extern __typeof (strncat) __strncat_ppc attribute_hidden; + +#include <string/strncat.c> diff --git a/sysdeps/powerpc/powerpc64/multiarch/strncat.c b/sysdeps/powerpc/powerpc64/multiarch/strncat.c new file mode 100644 index 0000000..db98ec1 --- /dev/null +++ b/sysdeps/powerpc/powerpc64/multiarch/strncat.c @@ -0,0 +1,31 @@ +/* Multiple versions of strncat. PowerPC64 version. + Copyright (C) 2014 Free Software Foundation, Inc. + This file is part of the GNU C Library. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library; if not, see + <http://www.gnu.org/licenses/>. */ + +#ifndef NOT_IN_libc +# include <string.h> +# include <shlib-compat.h> +# include "init-arch.h" + +extern __typeof (strncat) __strncat_ppc attribute_hidden; +extern __typeof (strncat) __strncat_power7 attribute_hidden; + +libc_ifunc (strncat, + (hwcap & PPC_FEATURE_HAS_VSX) + ? __strncat_power7 + : __strncat_ppc); +#endif diff --git a/sysdeps/powerpc/powerpc64/multiarch/strncpy-power7.S b/sysdeps/powerpc/powerpc64/multiarch/strncpy-power7.S new file mode 100644 index 0000000..be349f9 --- /dev/null +++ b/sysdeps/powerpc/powerpc64/multiarch/strncpy-power7.S @@ -0,0 +1,42 @@ +/* Optimized strncpy implementation for POWER7. + Copyright (C) 2014 Free Software Foundation, Inc. + This file is part of the GNU C Library. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library; if not, see + <http://www.gnu.org/licenses/>. */ + +#include <sysdep.h> + +#undef EALIGN +#define EALIGN(name, alignt, words) \ + .section ".text"; \ + ENTRY_2(__strncpy_power7) \ + .align ALIGNARG(alignt); \ + EALIGN_W_##words; \ + BODY_LABEL(__strncpy_power7): \ + cfi_startproc; \ + LOCALENTRY(__strncpy_power7) + +#undef END +#define END(name) \ + cfi_endproc; \ + TRACEBACK(__strncpy_power7) \ + END_2(__strncpy_power7) + +#undef libc_hidden_builtin_def +#define libc_hidden_builtin_def(name) + +#define MEMSET __memset_power7 + +#include <sysdeps/powerpc/powerpc64/power7/strncpy.S> diff --git a/sysdeps/powerpc/powerpc64/multiarch/strncpy-ppc64.c b/sysdeps/powerpc/powerpc64/multiarch/strncpy-ppc64.c new file mode 100644 index 0000000..e3111d2 --- /dev/null +++ b/sysdeps/powerpc/powerpc64/multiarch/strncpy-ppc64.c @@ -0,0 +1,33 @@ +/* Copyright (C) 2014 Free Software Foundation, Inc. + This file is part of the GNU C Library. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library; if not, see + <http://www.gnu.org/licenses/>. */ + +#include <string.h> + +#define STRNCPY __strncpy_ppc +#undef weak_alias +#define weak_alias(name, aliasname) \ + extern __typeof (__strncpy_ppc) aliasname \ + __attribute__ ((weak, alias ("__strncpy_ppc"))); +#if !defined(NOT_IN_libc) && defined(SHARED) +# undef libc_hidden_builtin_def +# define libc_hidden_builtin_def(name) \ + __hidden_ver1(__strncpy_ppc, __GI_strncpy, __strncpy_ppc); +#endif + +extern __typeof (strncpy) __strncpy_ppc attribute_hidden; + +#include <string/strncpy.c> diff --git a/sysdeps/powerpc/powerpc64/multiarch/strncpy.c b/sysdeps/powerpc/powerpc64/multiarch/strncpy.c new file mode 100644 index 0000000..8fd5e4b --- /dev/null +++ b/sysdeps/powerpc/powerpc64/multiarch/strncpy.c @@ -0,0 +1,35 @@ +/* Multiple versions of strncpy. + Copyright (C) 2014 Free Software Foundation, Inc. + This file is part of the GNU C Library. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library; if not, see + <http://www.gnu.org/licenses/ >. */ + +/* Define multiple versions only for definition in libc. */ +#ifndef NOT_IN_libc +# include <string.h> +# include <shlib-compat.h> +# include "init-arch.h" + +extern __typeof (strncpy) __strncpy_ppc attribute_hidden; +extern __typeof (strncpy) __strncpy_power7 attribute_hidden; + +/* Avoid DWARF definition DIE on ifunc symbol so that GDB can handle + ifunc symbol properly. */ +libc_ifunc (strncpy, + (hwcap & PPC_FEATURE_HAS_VSX) + ? __strncpy_power7 + : __strncpy_ppc); + +#endif diff --git a/sysdeps/powerpc/powerpc64/multiarch/strpbrk-power7.S b/sysdeps/powerpc/powerpc64/multiarch/strpbrk-power7.S new file mode 100644 index 0000000..663ca36 --- /dev/null +++ b/sysdeps/powerpc/powerpc64/multiarch/strpbrk-power7.S @@ -0,0 +1,40 @@ +/* Optimized strpbrk implementation for POWER7. + Copyright (C) 2014 Free Software Foundation, Inc. + This file is part of the GNU C Library. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library; if not, see + <http://www.gnu.org/licenses/>. */ + +#include <sysdep.h> + +#undef EALIGN +#define EALIGN(name, alignt, words) \ + .section ".text"; \ + ENTRY_2(__strpbrk_power7) \ + .align ALIGNARG(alignt); \ + EALIGN_W_##words; \ + BODY_LABEL(__strpbrk_power7): \ + cfi_startproc; \ + LOCALENTRY(__strpbrk_power7) + +#undef END +#define END(name) \ + cfi_endproc; \ + TRACEBACK(__strpbrk_power7) \ + END_2(__strpbrk_power7) + +#undef libc_hidden_builtin_def +#define libc_hidden_builtin_def(name) + +#include <sysdeps/powerpc/powerpc64/power7/strpbrk.S> diff --git a/sysdeps/powerpc/powerpc64/multiarch/strpbrk-ppc64.c b/sysdeps/powerpc/powerpc64/multiarch/strpbrk-ppc64.c new file mode 100644 index 0000000..8dea70e --- /dev/null +++ b/sysdeps/powerpc/powerpc64/multiarch/strpbrk-ppc64.c @@ -0,0 +1,30 @@ +/* Copyright (C) 2014 Free Software Foundation, Inc. + This file is part of the GNU C Library. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library; if not, see + <http://www.gnu.org/licenses/>. */ + +#include <string.h> + +#define STRPBRK __strpbrk_ppc +#ifdef SHARED + +# undef libc_hidden_builtin_def +# define libc_hidden_builtin_def(name) \ + __hidden_ver1 (__strpbrk_ppc, __GI_strpbrk, __strpbrk_ppc); +#endif + +extern __typeof (strpbrk) __strpbrk_ppc attribute_hidden; + +#include <string/strpbrk.c> diff --git a/sysdeps/powerpc/powerpc64/multiarch/strpbrk.c b/sysdeps/powerpc/powerpc64/multiarch/strpbrk.c new file mode 100644 index 0000000..8b05536 --- /dev/null +++ b/sysdeps/powerpc/powerpc64/multiarch/strpbrk.c @@ -0,0 +1,31 @@ +/* Multiple versions of strpbrk. PowerPC64 version. + Copyright (C) 2014 Free Software Foundation, Inc. + This file is part of the GNU C Library. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library; if not, see + <http://www.gnu.org/licenses/>. */ + +#ifndef NOT_IN_libc +# include <string.h> +# include <shlib-compat.h> +# include "init-arch.h" + +extern __typeof (strpbrk) __strpbrk_ppc attribute_hidden; +extern __typeof (strpbrk) __strpbrk_power7 attribute_hidden; + +libc_ifunc (strpbrk, + (hwcap & PPC_FEATURE_HAS_VSX) + ? __strpbrk_power7 + : __strpbrk_ppc); +#endif diff --git a/sysdeps/powerpc/powerpc64/multiarch/strrchr-power7.S b/sysdeps/powerpc/powerpc64/multiarch/strrchr-power7.S new file mode 100644 index 0000000..78e15e3 --- /dev/null +++ b/sysdeps/powerpc/powerpc64/multiarch/strrchr-power7.S @@ -0,0 +1,39 @@ +/* Optimized strrchr implementation for POWER7. + Copyright (C) 2014 Free Software Foundation, Inc. + This file is part of the GNU C Library. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library; if not, see + <http://www.gnu.org/licenses/>. */ + +#include <sysdep.h> + +#undef ENTRY +#define ENTRY(name) \ + .section ".text"; \ + ENTRY_2(__strrchr_power7) \ + .align ALIGNARG(2); \ + BODY_LABEL(__strrchr_power7): \ + cfi_startproc; \ + LOCALENTRY(__strrchr_power7) + +#undef END +#define END(name) \ + cfi_endproc; \ + TRACEBACK(__strrchr_power7) \ + END_2(__strrchr_power7) + +#undef libc_hidden_builtin_def +#define libc_hidden_builtin_def(name) + +#include <sysdeps/powerpc/powerpc64/power7/strrchr.S> diff --git a/sysdeps/powerpc/powerpc64/multiarch/strrchr-ppc64.c b/sysdeps/powerpc/powerpc64/multiarch/strrchr-ppc64.c new file mode 100644 index 0000000..5633a9f --- /dev/null +++ b/sysdeps/powerpc/powerpc64/multiarch/strrchr-ppc64.c @@ -0,0 +1,33 @@ +/* Copyright (C) 2014 Free Software Foundation, Inc. + This file is part of the GNU C Library. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library; if not, see + <http://www.gnu.org/licenses/>. */ + +#include <string.h> + +#define STRRCHR __strrchr_ppc +#undef weak_alias +#define weak_alias(name, aliasname) \ + extern __typeof (__strrchr_ppc) aliasname \ + __attribute__ ((weak, alias ("__strrchr_ppc"))); +#if !defined(NOT_IN_libc) && defined(SHARED) +# undef libc_hidden_builtin_def +# define libc_hidden_builtin_def(name) \ + __hidden_ver1(__strrchr_ppc, __GI_strrchr, __strrchr_ppc); +#endif + +extern __typeof (strrchr) __strrchr_ppc attribute_hidden; + +#include <string/strrchr.c> diff --git a/sysdeps/powerpc/powerpc64/multiarch/strrchr.c b/sysdeps/powerpc/powerpc64/multiarch/strrchr.c new file mode 100644 index 0000000..046162f --- /dev/null +++ b/sysdeps/powerpc/powerpc64/multiarch/strrchr.c @@ -0,0 +1,35 @@ +/* Multiple versions of strrchr. PowerPC64 version. + Copyright (C) 2014 Free Software Foundation, Inc. + This file is part of the GNU C Library. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library; if not, see + <http://www.gnu.org/licenses/>. */ + +/* Define multiple versions only for definition in libc. */ +#ifndef NOT_IN_libc +# include <string.h> +# include <shlib-compat.h> +# include "init-arch.h" + +extern __typeof (strrchr) __strrchr_ppc attribute_hidden; +extern __typeof (strrchr) __strrchr_power7 attribute_hidden; + +/* Avoid DWARF definition DIE on ifunc symbol so that GDB can handle + ifunc symbol properly. */ +libc_ifunc (strrchr, + (hwcap & PPC_FEATURE_HAS_VSX) + ? __strrchr_power7 + : __strrchr_ppc); +weak_alias (strrchr, rindex) +#endif diff --git a/sysdeps/powerpc/powerpc64/multiarch/strspn-power7.S b/sysdeps/powerpc/powerpc64/multiarch/strspn-power7.S new file mode 100644 index 0000000..889dfee --- /dev/null +++ b/sysdeps/powerpc/powerpc64/multiarch/strspn-power7.S @@ -0,0 +1,40 @@ +/* Optimized strspn implementation for POWER7. + Copyright (C) 2014 Free Software Foundation, Inc. + This file is part of the GNU C Library. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library; if not, see + <http://www.gnu.org/licenses/>. */ + +#include <sysdep.h> + +#undef EALIGN +#define EALIGN(name, alignt, words) \ + .section ".text"; \ + ENTRY_2(__strspn_power7) \ + .align ALIGNARG(alignt); \ + EALIGN_W_##words; \ + BODY_LABEL(__strspn_power7): \ + cfi_startproc; \ + LOCALENTRY(__strspn_power7) + +#undef END +#define END(name) \ + cfi_endproc; \ + TRACEBACK(__strspn_power7) \ + END_2(__strspn_power7) + +#undef libc_hidden_builtin_def +#define libc_hidden_builtin_def(name) + +#include <sysdeps/powerpc/powerpc64/power7/strspn.S> diff --git a/sysdeps/powerpc/powerpc64/multiarch/strspn-ppc64.c b/sysdeps/powerpc/powerpc64/multiarch/strspn-ppc64.c new file mode 100644 index 0000000..d543772 --- /dev/null +++ b/sysdeps/powerpc/powerpc64/multiarch/strspn-ppc64.c @@ -0,0 +1,33 @@ +/* Copyright (C) 2014 Free Software Foundation, Inc. + This file is part of the GNU C Library. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library; if not, see + <http://www.gnu.org/licenses/>. */ + +#include <string.h> + +#define STRSPN __strspn_ppc +#undef weak_alias +#define weak_alias(name, aliasname) \ + extern __typeof (__strspn_ppc) aliasname \ + __attribute__ ((weak, alias ("__strspn_ppc"))); +#if !defined(NOT_IN_libc) && defined(SHARED) +# undef libc_hidden_builtin_def +# define libc_hidden_builtin_def(name) \ + __hidden_ver1(__strspn_ppc, __GI_strspn, __strspn_ppc); +#endif + +extern __typeof (strspn) __strspn_ppc attribute_hidden; + +#include <string/strspn.c> diff --git a/sysdeps/powerpc/powerpc64/multiarch/strspn.c b/sysdeps/powerpc/powerpc64/multiarch/strspn.c new file mode 100644 index 0000000..bf8c877 --- /dev/null +++ b/sysdeps/powerpc/powerpc64/multiarch/strspn.c @@ -0,0 +1,31 @@ +/* Multiple versions of strspn. PowerPC64 version. + Copyright (C) 2014 Free Software Foundation, Inc. + This file is part of the GNU C Library. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library; if not, see + <http://www.gnu.org/licenses/>. */ + +#ifndef NOT_IN_libc +# include <string.h> +# include <shlib-compat.h> +# include "init-arch.h" + +extern __typeof (strspn) __strspn_ppc attribute_hidden; +extern __typeof (strspn) __strspn_power7 attribute_hidden; + +libc_ifunc (strspn, + (hwcap & PPC_FEATURE_HAS_VSX) + ? __strspn_power7 + : __strspn_ppc); +#endif diff --git a/sysdeps/powerpc/powerpc64/power5+/fpu/Implies b/sysdeps/powerpc/powerpc64/power5+/fpu/Implies index c0e6784..f00c50f 100644 --- a/sysdeps/powerpc/powerpc64/power5+/fpu/Implies +++ b/sysdeps/powerpc/powerpc64/power5+/fpu/Implies @@ -1 +1 @@ -powerpc/powerpc64/power5/fpu/multiarch +powerpc/powerpc64/power5/fpu diff --git a/sysdeps/powerpc/powerpc64/power5/fpu/Implies b/sysdeps/powerpc/powerpc64/power5/fpu/Implies index 3740d05..6b8c23e 100644 --- a/sysdeps/powerpc/powerpc64/power5/fpu/Implies +++ b/sysdeps/powerpc/powerpc64/power5/fpu/Implies @@ -1 +1 @@ -powerpc/powerpc64/power4/fpu/multiarch +powerpc/powerpc64/power4/fpu/ diff --git a/sysdeps/powerpc/powerpc64/power6x/fpu/Implies b/sysdeps/powerpc/powerpc64/power6x/fpu/Implies new file mode 100644 index 0000000..30fa176 --- /dev/null +++ b/sysdeps/powerpc/powerpc64/power6x/fpu/Implies @@ -0,0 +1 @@ +powerpc/powerpc64/power6/fpu diff --git a/sysdeps/powerpc/powerpc64/power6x/fpu/multiarch/Implies b/sysdeps/powerpc/powerpc64/power6x/fpu/multiarch/Implies index f54ff23..410d289 100644 --- a/sysdeps/powerpc/powerpc64/power6x/fpu/multiarch/Implies +++ b/sysdeps/powerpc/powerpc64/power6x/fpu/multiarch/Implies @@ -1 +1 @@ -sysdeps/powerpc/powerpc64/power6/fpu/multiarch +powerpc/powerpc64/power6/fpu/multiarch diff --git a/sysdeps/powerpc/powerpc64/power6x/multiarch/Implies b/sysdeps/powerpc/powerpc64/power6x/multiarch/Implies new file mode 100644 index 0000000..bf5d617 --- /dev/null +++ b/sysdeps/powerpc/powerpc64/power6x/multiarch/Implies @@ -0,0 +1 @@ +powerpc/powerpc64/power6/multiarch diff --git a/sysdeps/powerpc/powerpc64/power7/bcopy.c b/sysdeps/powerpc/powerpc64/power7/bcopy.c new file mode 100644 index 0000000..4a6a400 --- /dev/null +++ b/sysdeps/powerpc/powerpc64/power7/bcopy.c @@ -0,0 +1 @@ +/* Implemented at memmove.S */ diff --git a/sysdeps/powerpc/powerpc64/power7/fpu/Implies b/sysdeps/powerpc/powerpc64/power7/fpu/Implies index 410d289..30fa176 100644 --- a/sysdeps/powerpc/powerpc64/power7/fpu/Implies +++ b/sysdeps/powerpc/powerpc64/power7/fpu/Implies @@ -1 +1 @@ -powerpc/powerpc64/power6/fpu/multiarch +powerpc/powerpc64/power6/fpu diff --git a/sysdeps/powerpc/powerpc64/power7/memcpy.S b/sysdeps/powerpc/powerpc64/power7/memcpy.S index bbfd381..58d9b12 100644 --- a/sysdeps/powerpc/powerpc64/power7/memcpy.S +++ b/sysdeps/powerpc/powerpc64/power7/memcpy.S @@ -36,16 +36,11 @@ EALIGN (memcpy, 5, 0) ble cr1, L(copy_LT_32) /* If move < 32 bytes use short move code. */ -#ifdef __LITTLE_ENDIAN__ -/* In little-endian mode, power7 takes an alignment trap on any lxvd2x - or stxvd2x crossing a 32-byte boundary, so ensure the aligned_copy - loop is only used for quadword aligned copies. */ +/* Align copies using VSX instructions to quadword. It is to avoid alignment + traps when memcpy is used on non-cacheable memory (for instance, memory + mapped I/O). */ andi. 10,3,15 clrldi 11,4,60 -#else - andi. 10,3,7 /* Check alignment of DST. */ - clrldi 11,4,61 /* Check alignment of SRC. */ -#endif cmpld cr6,10,11 /* SRC and DST alignments match? */ mr dst,3 @@ -53,13 +48,9 @@ EALIGN (memcpy, 5, 0) beq L(aligned_copy) mtocrf 0x01,0 -#ifdef __LITTLE_ENDIAN__ clrldi 0,0,60 -#else - clrldi 0,0,61 -#endif -/* Get the DST and SRC aligned to 8 bytes (16 for little-endian). */ +/* Get the DST and SRC aligned to 16 bytes. */ 1: bf 31,2f lbz 6,0(src) @@ -79,14 +70,12 @@ EALIGN (memcpy, 5, 0) stw 6,0(dst) addi dst,dst,4 8: -#ifdef __LITTLE_ENDIAN__ bf 28,16f ld 6,0(src) addi src,src,8 std 6,0(dst) addi dst,dst,8 16: -#endif subf cnt,0,cnt /* Main aligned copy loop. Copies 128 bytes at a time. */ @@ -298,9 +287,6 @@ L(copy_LE_8): .align 4 L(copy_GE_32_unaligned): clrldi 0,0,60 /* Number of bytes until the 1st dst quadword. */ -#ifndef __LITTLE_ENDIAN__ - andi. 10,3,15 /* Check alignment of DST (against quadwords). */ -#endif srdi 9,cnt,4 /* Number of full quadwords remaining. */ beq L(copy_GE_32_unaligned_cont) diff --git a/sysdeps/powerpc/powerpc64/power7/memmove.S b/sysdeps/powerpc/powerpc64/power7/memmove.S new file mode 100644 index 0000000..8663dfb --- /dev/null +++ b/sysdeps/powerpc/powerpc64/power7/memmove.S @@ -0,0 +1,831 @@ +/* Optimized memmove implementation for PowerPC64/POWER7. + Copyright (C) 2014 Free Software Foundation, Inc. + This file is part of the GNU C Library. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library; if not, see + <http://www.gnu.org/licenses/>. */ + +#include <sysdep.h> + + +/* void* [r3] memmove (void *dest [r3], const void *src [r4], size_t len [r5]) + + This optimization check if memory 'dest' overlaps with 'src'. If it does + not then it calls an optimized memcpy call (similar to memcpy for POWER7, + embedded here to gain some cycles). + If source and destiny overlaps, a optimized backwards memcpy is used + instead. */ + + .machine power7 +EALIGN (memmove, 5, 0) + CALL_MCOUNT 3 + +L(_memmove): + subf r9,r4,r3 + cmpld cr7,r9,r5 + blt cr7,L(memmove_bwd) + + cmpldi cr1,r5,31 + neg 0,3 + ble cr1, L(copy_LT_32) /* If move < 32 bytes use short move + code. */ + + andi. 10,3,15 + clrldi 11,4,60 + cmpld cr6,10,11 /* SRC and DST alignments match? */ + + mr r11,3 + bne cr6,L(copy_GE_32_unaligned) + beq L(aligned_copy) + + mtocrf 0x01,0 + clrldi 0,0,60 + +/* Get the DST and SRC aligned to 8 bytes (16 for little-endian). */ +1: + bf 31,2f + lbz 6,0(r4) + addi r4,r4,1 + stb 6,0(r11) + addi r11,r11,1 +2: + bf 30,4f + lhz 6,0(r4) + addi r4,r4,2 + sth 6,0(r11) + addi r11,r11,2 +4: + bf 29,8f + lwz 6,0(r4) + addi r4,r4,4 + stw 6,0(r11) + addi r11,r11,4 +8: + bf 28,16f + ld 6,0(r4) + addi r4,r4,8 + std 6,0(r11) + addi r11,r11,8 +16: + subf r5,0,r5 + +/* Main aligned copy loop. Copies 128 bytes at a time. */ +L(aligned_copy): + li 6,16 + li 7,32 + li 8,48 + mtocrf 0x02,r5 + srdi 12,r5,7 + cmpdi 12,0 + beq L(aligned_tail) + lxvd2x 6,0,r4 + lxvd2x 7,r4,6 + mtctr 12 + b L(aligned_128loop) + + .align 4 +L(aligned_128head): + /* for the 2nd + iteration of this loop. */ + lxvd2x 6,0,r4 + lxvd2x 7,r4,6 +L(aligned_128loop): + lxvd2x 8,r4,7 + lxvd2x 9,r4,8 + stxvd2x 6,0,r11 + addi r4,r4,64 + stxvd2x 7,r11,6 + stxvd2x 8,r11,7 + stxvd2x 9,r11,8 + lxvd2x 6,0,r4 + lxvd2x 7,r4,6 + addi r11,r11,64 + lxvd2x 8,r4,7 + lxvd2x 9,r4,8 + addi r4,r4,64 + stxvd2x 6,0,r11 + stxvd2x 7,r11,6 + stxvd2x 8,r11,7 + stxvd2x 9,r11,8 + addi r11,r11,64 + bdnz L(aligned_128head) + +L(aligned_tail): + mtocrf 0x01,r5 + bf 25,32f + lxvd2x 6,0,r4 + lxvd2x 7,r4,6 + lxvd2x 8,r4,7 + lxvd2x 9,r4,8 + addi r4,r4,64 + stxvd2x 6,0,r11 + stxvd2x 7,r11,6 + stxvd2x 8,r11,7 + stxvd2x 9,r11,8 + addi r11,r11,64 +32: + bf 26,16f + lxvd2x 6,0,r4 + lxvd2x 7,r4,6 + addi r4,r4,32 + stxvd2x 6,0,r11 + stxvd2x 7,r11,6 + addi r11,r11,32 +16: + bf 27,8f + lxvd2x 6,0,r4 + addi r4,r4,16 + stxvd2x 6,0,r11 + addi r11,r11,16 +8: + bf 28,4f + ld 6,0(r4) + addi r4,r4,8 + std 6,0(r11) + addi r11,r11,8 +4: /* Copies 4~7 bytes. */ + bf 29,L(tail2) + lwz 6,0(r4) + stw 6,0(r11) + bf 30,L(tail5) + lhz 7,4(r4) + sth 7,4(r11) + bflr 31 + lbz 8,6(r4) + stb 8,6(r11) + /* Return original DST pointer. */ + blr + +/* Handle copies of 0~31 bytes. */ + .align 4 +L(copy_LT_32): + mr r11,3 + cmpldi cr6,r5,8 + mtocrf 0x01,r5 + ble cr6,L(copy_LE_8) + + /* At least 9 bytes to go. */ + neg 8,4 + andi. 0,8,3 + cmpldi cr1,r5,16 + beq L(copy_LT_32_aligned) + + /* Force 4-byte alignment for SRC. */ + mtocrf 0x01,0 + subf r5,0,r5 +2: + bf 30,1f + lhz 6,0(r4) + addi r4,r4,2 + sth 6,0(r11) + addi r11,r11,2 +1: + bf 31,L(end_4bytes_alignment) + lbz 6,0(r4) + addi r4,r4,1 + stb 6,0(r11) + addi r11,r11,1 + + .align 4 +L(end_4bytes_alignment): + cmpldi cr1,r5,16 + mtocrf 0x01,r5 + +L(copy_LT_32_aligned): + /* At least 6 bytes to go, and SRC is word-aligned. */ + blt cr1,8f + + /* Copy 16 bytes. */ + lwz 6,0(r4) + lwz 7,4(r4) + stw 6,0(r11) + lwz 8,8(r4) + stw 7,4(r11) + lwz 6,12(r4) + addi r4,r4,16 + stw 8,8(r11) + stw 6,12(r11) + addi r11,r11,16 +8: /* Copy 8 bytes. */ + bf 28,L(tail4) + lwz 6,0(r4) + lwz 7,4(r4) + addi r4,r4,8 + stw 6,0(r11) + stw 7,4(r11) + addi r11,r11,8 + + .align 4 +/* Copies 4~7 bytes. */ +L(tail4): + bf 29,L(tail2) + lwz 6,0(r4) + stw 6,0(r11) + bf 30,L(tail5) + lhz 7,4(r4) + sth 7,4(r11) + bflr 31 + lbz 8,6(r4) + stb 8,6(r11) + /* Return original DST pointer. */ + blr + + .align 4 +/* Copies 2~3 bytes. */ +L(tail2): + bf 30,1f + lhz 6,0(r4) + sth 6,0(r11) + bflr 31 + lbz 7,2(r4) + stb 7,2(r11) + blr + + .align 4 +L(tail5): + bflr 31 + lbz 6,4(r4) + stb 6,4(r11) + blr + + .align 4 +1: + bflr 31 + lbz 6,0(r4) + stb 6,0(r11) + /* Return original DST pointer. */ + blr + +/* Handles copies of 0~8 bytes. */ + .align 4 +L(copy_LE_8): + bne cr6,L(tail4) + + /* Though we could've used ld/std here, they are still + slow for unaligned cases. */ + + lwz 6,0(r4) + lwz 7,4(r4) + stw 6,0(r11) + stw 7,4(r11) + blr + + +/* Handle copies of 32+ bytes where DST is aligned (to quadword) but + SRC is not. Use aligned quadword loads from SRC, shifted to realign + the data, allowing for aligned DST stores. */ + .align 4 +L(copy_GE_32_unaligned): + clrldi 0,0,60 /* Number of bytes until the 1st r11 quadword. */ + srdi 9,r5,4 /* Number of full quadwords remaining. */ + + beq L(copy_GE_32_unaligned_cont) + + /* DST is not quadword aligned, get it aligned. */ + + mtocrf 0x01,0 + subf r5,0,r5 + + /* Vector instructions work best when proper alignment (16-bytes) + is present. Move 0~15 bytes as needed to get DST quadword-aligned. */ +1: + bf 31,2f + lbz 6,0(r4) + addi r4,r4,1 + stb 6,0(r11) + addi r11,r11,1 +2: + bf 30,4f + lhz 6,0(r4) + addi r4,r4,2 + sth 6,0(r11) + addi r11,r11,2 +4: + bf 29,8f + lwz 6,0(r4) + addi r4,r4,4 + stw 6,0(r11) + addi r11,r11,4 +8: + bf 28,0f + ld 6,0(r4) + addi r4,r4,8 + std 6,0(r11) + addi r11,r11,8 +0: + srdi 9,r5,4 /* Number of full quadwords remaining. */ + + /* The proper alignment is present, it is OK to copy the bytes now. */ +L(copy_GE_32_unaligned_cont): + + /* Setup two indexes to speed up the indexed vector operations. */ + clrldi 10,r5,60 + li 6,16 /* Index for 16-bytes offsets. */ + li 7,32 /* Index for 32-bytes offsets. */ + cmpldi cr1,10,0 + srdi 8,r5,5 /* Setup the loop counter. */ + mtocrf 0x01,9 + cmpldi cr6,9,1 +#ifdef __LITTLE_ENDIAN__ + lvsr 5,0,r4 +#else + lvsl 5,0,r4 +#endif + lvx 3,0,r4 + li 0,0 + bf 31,L(setup_unaligned_loop) + + /* Copy another 16 bytes to align to 32-bytes due to the loop. */ + lvx 4,r4,6 +#ifdef __LITTLE_ENDIAN__ + vperm 6,4,3,5 +#else + vperm 6,3,4,5 +#endif + addi r4,r4,16 + stvx 6,0,r11 + addi r11,r11,16 + vor 3,4,4 + clrrdi 0,r4,60 + +L(setup_unaligned_loop): + mtctr 8 + ble cr6,L(end_unaligned_loop) + + /* Copy 32 bytes at a time using vector instructions. */ + .align 4 +L(unaligned_loop): + + /* Note: vr6/vr10 may contain data that was already copied, + but in order to get proper alignment, we may have to copy + some portions again. This is faster than having unaligned + vector instructions though. */ + + lvx 4,r4,6 +#ifdef __LITTLE_ENDIAN__ + vperm 6,4,3,5 +#else + vperm 6,3,4,5 +#endif + lvx 3,r4,7 +#ifdef __LITTLE_ENDIAN__ + vperm 10,3,4,5 +#else + vperm 10,4,3,5 +#endif + addi r4,r4,32 + stvx 6,0,r11 + stvx 10,r11,6 + addi r11,r11,32 + bdnz L(unaligned_loop) + + clrrdi 0,r4,60 + + .align 4 +L(end_unaligned_loop): + + /* Check for tail bytes. */ + mtocrf 0x01,r5 + beqlr cr1 + + add r4,r4,0 + + /* We have 1~15 tail bytes to copy, and DST is quadword aligned. */ + /* Copy 8 bytes. */ + bf 28,4f + lwz 6,0(r4) + lwz 7,4(r4) + addi r4,r4,8 + stw 6,0(r11) + stw 7,4(r11) + addi r11,r11,8 +4: /* Copy 4~7 bytes. */ + bf 29,L(tail2) + lwz 6,0(r4) + stw 6,0(r11) + bf 30,L(tail5) + lhz 7,4(r4) + sth 7,4(r11) + bflr 31 + lbz 8,6(r4) + stb 8,6(r11) + /* Return original DST pointer. */ + blr + + /* Start to memcpy backward implementation: the algorith first check if + src and dest have the same alignment and if it does align both to 16 + bytes and copy using VSX instructions. + If does not, align dest to 16 bytes and use VMX (altivec) instruction + to read two 16 bytes at time, shift/permute the bytes read and write + aligned to dest. */ +L(memmove_bwd): + cmpldi cr1,r5,31 + /* Copy is done backwards: update the pointers and check alignment. */ + add r11,r3,r5 + add r4,r4,r5 + mr r0,r11 + ble cr1, L(copy_LT_32_bwd) /* If move < 32 bytes use short move + code. */ + + andi. r10,r11,15 /* Check if r11 is aligned to 16 bytes */ + clrldi r9,r4,60 /* Check if r4 is aligned to 16 bytes */ + cmpld cr6,r10,r9 /* SRC and DST alignments match? */ + + bne cr6,L(copy_GE_32_unaligned_bwd) + beq L(aligned_copy_bwd) + + mtocrf 0x01,r0 + clrldi r0,r0,60 + +/* Get the DST and SRC aligned to 16 bytes. */ +1: + bf 31,2f + lbz r6,-1(r4) + subi r4,r4,1 + stb r6,-1(r11) + subi r11,r11,1 +2: + bf 30,4f + lhz r6,-2(r4) + subi r4,r4,2 + sth r6,-2(r11) + subi r11,r11,2 +4: + bf 29,8f + lwz r6,-4(r4) + subi r4,r4,4 + stw r6,-4(r11) + subi r11,r11,4 +8: + bf 28,16f + ld r6,-8(r4) + subi r4,r4,8 + std r6,-8(r11) + subi r11,r11,8 +16: + subf r5,0,r5 + +/* Main aligned copy loop. Copies 128 bytes at a time. */ +L(aligned_copy_bwd): + li r6,-16 + li r7,-32 + li r8,-48 + li r9,-64 + mtocrf 0x02,r5 + srdi r12,r5,7 + cmpdi r12,0 + beq L(aligned_tail_bwd) + lxvd2x v6,r4,r6 + lxvd2x v7,r4,r7 + mtctr 12 + b L(aligned_128loop_bwd) + + .align 4 +L(aligned_128head_bwd): + /* for the 2nd + iteration of this loop. */ + lxvd2x v6,r4,r6 + lxvd2x v7,r4,r7 +L(aligned_128loop_bwd): + lxvd2x v8,r4,r8 + lxvd2x v9,r4,r9 + stxvd2x v6,r11,r6 + subi r4,r4,64 + stxvd2x v7,r11,r7 + stxvd2x v8,r11,r8 + stxvd2x v9,r11,r9 + lxvd2x v6,r4,r6 + lxvd2x v7,r4,7 + subi r11,r11,64 + lxvd2x v8,r4,r8 + lxvd2x v9,r4,r9 + subi r4,r4,64 + stxvd2x v6,r11,r6 + stxvd2x v7,r11,r7 + stxvd2x v8,r11,r8 + stxvd2x v9,r11,r9 + subi r11,r11,64 + bdnz L(aligned_128head_bwd) + +L(aligned_tail_bwd): + mtocrf 0x01,r5 + bf 25,32f + lxvd2x v6,r4,r6 + lxvd2x v7,r4,r7 + lxvd2x v8,r4,r8 + lxvd2x v9,r4,r9 + subi r4,r4,64 + stxvd2x v6,r11,r6 + stxvd2x v7,r11,r7 + stxvd2x v8,r11,r8 + stxvd2x v9,r11,r9 + subi r11,r11,64 +32: + bf 26,16f + lxvd2x v6,r4,r6 + lxvd2x v7,r4,r7 + subi r4,r4,32 + stxvd2x v6,r11,r6 + stxvd2x v7,r11,r7 + subi r11,r11,32 +16: + bf 27,8f + lxvd2x v6,r4,r6 + subi r4,r4,16 + stxvd2x v6,r11,r6 + subi r11,r11,16 +8: + bf 28,4f + ld r6,-8(r4) + subi r4,r4,8 + std r6,-8(r11) + subi r11,r11,8 +4: /* Copies 4~7 bytes. */ + bf 29,L(tail2_bwd) + lwz r6,-4(r4) + stw r6,-4(r11) + bf 30,L(tail5_bwd) + lhz r7,-6(r4) + sth r7,-6(r11) + bflr 31 + lbz r8,-7(r4) + stb r8,-7(r11) + /* Return original DST pointer. */ + blr + +/* Handle copies of 0~31 bytes. */ + .align 4 +L(copy_LT_32_bwd): + cmpldi cr6,r5,8 + mtocrf 0x01,r5 + ble cr6,L(copy_LE_8_bwd) + + /* At least 9 bytes to go. */ + neg r8,r4 + andi. r0,r8,3 + cmpldi cr1,r5,16 + beq L(copy_LT_32_aligned_bwd) + + /* Force 4-byte alignment for SRC. */ + mtocrf 0x01,0 + subf r5,0,r5 +2: + bf 30,1f + lhz r6,-2(r4) + subi r4,r4,2 + sth r6,-2(r11) + subi r11,r11,2 +1: + bf 31,L(end_4bytes_alignment_bwd) + lbz 6,-1(r4) + subi r4,r4,1 + stb 6,-1(r11) + subi r11,r11,1 + + .align 4 +L(end_4bytes_alignment_bwd): + cmpldi cr1,r5,16 + mtocrf 0x01,r5 + +L(copy_LT_32_aligned_bwd): + /* At least 6 bytes to go, and SRC is word-aligned. */ + blt cr1,8f + + /* Copy 16 bytes. */ + lwz r6,-4(r4) + lwz r7,-8(r4) + stw r6,-4(r11) + lwz r8,-12(r4) + stw r7,-8(r11) + lwz r6,-16(r4) + subi r4,r4,16 + stw r8,-12(r11) + stw r6,-16(r11) + subi r11,r11,16 +8: /* Copy 8 bytes. */ + bf 28,L(tail4_bwd) + lwz r6,-4(r4) + lwz r7,-8(r4) + subi r4,r4,8 + stw r6,-4(r11) + stw r7,-8(r11) + subi r11,r11,8 + + .align 4 +/* Copies 4~7 bytes. */ +L(tail4_bwd): + bf 29,L(tail2_bwd) + lwz 6,-4(r4) + stw 6,-4(r11) + bf 30,L(tail5_bwd) + lhz 7,-6(r4) + sth 7,-6(r11) + bflr 31 + lbz 8,-7(r4) + stb 8,-7(r11) + /* Return original DST pointer. */ + blr + + .align 4 +/* Copies 2~3 bytes. */ +L(tail2_bwd): + bf 30,1f + lhz 6,-2(r4) + sth 6,-2(r11) + bflr 31 + lbz 7,-3(r4) + stb 7,-3(r11) + blr + + .align 4 +L(tail5_bwd): + bflr 31 + lbz 6,-5(r4) + stb 6,-5(r11) + blr + + .align 4 +1: + bflr 31 + lbz 6,-1(r4) + stb 6,-1(r11) + /* Return original DST pointer. */ + blr + + +/* Handles copies of 0~8 bytes. */ + .align 4 +L(copy_LE_8_bwd): + bne cr6,L(tail4_bwd) + + /* Though we could've used ld/std here, they are still + slow for unaligned cases. */ + lwz 6,-8(r4) + lwz 7,-4(r4) + stw 6,-8(r11) + stw 7,-4(r11) + blr + + +/* Handle copies of 32+ bytes where DST is aligned (to quadword) but + SRC is not. Use aligned quadword loads from SRC, shifted to realign + the data, allowing for aligned DST stores. */ + .align 4 +L(copy_GE_32_unaligned_bwd): + andi. r10,r11,15 /* Check alignment of DST against 16 bytes.. */ + srdi r9,r5,4 /* Number of full quadwords remaining. */ + + beq L(copy_GE_32_unaligned_cont_bwd) + + /* DST is not quadword aligned and r10 holds the address masked to + compare alignments. */ + mtocrf 0x01,r10 + subf r5,r10,r5 + + /* Vector instructions work best when proper alignment (16-bytes) + is present. Move 0~15 bytes as needed to get DST quadword-aligned. */ +1: + bf 31,2f + lbz r6,-1(r4) + subi r4,r4,1 + stb r6,-1(r11) + subi r11,r11,1 +2: + bf 30,4f + lhz r6,-2(r4) + subi r4,r4,2 + sth r6,-2(r11) + subi r11,r11,2 +4: + bf 29,8f + lwz r6,-4(r4) + subi r4,r4,4 + stw r6,-4(r11) + subi r11,r11,4 +8: + bf 28,0f + ld r6,-8(r4) + subi r4,r4,8 + std r6,-8(r11) + subi r11,r11,8 +0: + srdi r9,r5,4 /* Number of full quadwords remaining. */ + + /* The proper alignment is present, it is OK to copy the bytes now. */ +L(copy_GE_32_unaligned_cont_bwd): + + /* Setup two indexes to speed up the indexed vector operations. */ + clrldi r10,r5,60 + li r6,-16 /* Index for 16-bytes offsets. */ + li r7,-32 /* Index for 32-bytes offsets. */ + cmpldi cr1,10,0 + srdi r8,r5,5 /* Setup the loop counter. */ + mtocrf 0x01,9 + cmpldi cr6,r9,1 +#ifdef __LITTLE_ENDIAN__ + lvsr v5,r0,r4 +#else + lvsl v5,r0,r4 +#endif + lvx v3,0,r4 + li r0,0 + bf 31,L(setup_unaligned_loop_bwd) + + /* Copy another 16 bytes to align to 32-bytes due to the loop. */ + lvx v4,r4,r6 +#ifdef __LITTLE_ENDIAN__ + vperm v6,v3,v4,v5 +#else + vperm v6,v4,v3,v5 +#endif + subi r4,r4,16 + stvx v6,r11,r6 + subi r11,r11,16 + vor v3,v4,v4 + clrrdi r0,r4,60 + +L(setup_unaligned_loop_bwd): + mtctr r8 + ble cr6,L(end_unaligned_loop_bwd) + + /* Copy 32 bytes at a time using vector instructions. */ + .align 4 +L(unaligned_loop_bwd): + + /* Note: vr6/vr10 may contain data that was already copied, + but in order to get proper alignment, we may have to copy + some portions again. This is faster than having unaligned + vector instructions though. */ + + lvx v4,r4,r6 +#ifdef __LITTLE_ENDIAN__ + vperm v6,v3,v4,v5 +#else + vperm v6,v4,v3,v5 +#endif + lvx v3,r4,r7 +#ifdef __LITTLE_ENDIAN__ + vperm v10,v4,v3,v5 +#else + vperm v10,v3,v4,v5 +#endif + subi r4,r4,32 + stvx v6,r11,r6 + stvx v10,r11,r7 + subi r11,r11,32 + bdnz L(unaligned_loop_bwd) + + clrrdi r0,r4,60 + + .align 4 +L(end_unaligned_loop_bwd): + + /* Check for tail bytes. */ + mtocrf 0x01,r5 + beqlr cr1 + + add r4,r4,0 + + /* We have 1~15 tail bytes to copy, and DST is quadword aligned. */ + /* Copy 8 bytes. */ + bf 28,4f + lwz r6,-4(r4) + lwz r7,-8(r4) + subi r4,r4,8 + stw r6,-4(r11) + stw r7,-8(r11) + subi r11,r11,8 +4: /* Copy 4~7 bytes. */ + bf 29,L(tail2_bwd) + lwz r6,-4(r4) + stw r6,-4(r11) + bf 30,L(tail5_bwd) + lhz r7,-6(r4) + sth r7,-6(r11) + bflr 31 + lbz r8,-7(r4) + stb r8,-7(r11) + /* Return original DST pointer. */ + blr +END_GEN_TB (memmove, TB_TOCLESS) +libc_hidden_builtin_def (memmove) + + +/* void bcopy(const void *src [r3], void *dest [r4], size_t n [r5]) + Implemented in this file to avoid linker create a stub function call + in the branch to '_memmove'. */ +ENTRY (bcopy) + mr r6,r3 + mr r3,r4 + mr r4,r6 + b L(_memmove) +END (bcopy) diff --git a/sysdeps/powerpc/powerpc64/power7/memrchr.S b/sysdeps/powerpc/powerpc64/power7/memrchr.S index 40e436f..0c01ca2 100644 --- a/sysdeps/powerpc/powerpc64/power7/memrchr.S +++ b/sysdeps/powerpc/powerpc64/power7/memrchr.S @@ -29,7 +29,7 @@ ENTRY (__memrchr) mr r10,r3 clrrdi r6,r7,7 li r9,3<<5 - dcbt r9,r6,16 /* Stream hint, decreasing addresses. */ + dcbt r9,r6,8 /* Stream hint, decreasing addresses. */ /* Replicate BYTE to doubleword. */ insrdi r4,r4,8,48 diff --git a/sysdeps/powerpc/powerpc64/power7/stpncpy.S b/sysdeps/powerpc/powerpc64/power7/stpncpy.S new file mode 100644 index 0000000..a539093 --- /dev/null +++ b/sysdeps/powerpc/powerpc64/power7/stpncpy.S @@ -0,0 +1,24 @@ +/* Optimized stpncpy implementation for PowerPC64/POWER7. + Copyright (C) 2014 Free Software Foundation, Inc. + This file is part of the GNU C Library. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library; if not, see + <http://www.gnu.org/licenses/>. */ + +#define USE_AS_STPNCPY +#include <sysdeps/powerpc/powerpc64/power7/strncpy.S> + +weak_alias (__stpncpy, stpncpy) +libc_hidden_def (__stpncpy) +libc_hidden_builtin_def (stpncpy) diff --git a/sysdeps/powerpc/powerpc64/power7/strcmp.S b/sysdeps/powerpc/powerpc64/power7/strcmp.S new file mode 100644 index 0000000..f16a9d8 --- /dev/null +++ b/sysdeps/powerpc/powerpc64/power7/strcmp.S @@ -0,0 +1,195 @@ +/* Optimized strcmp implementation for Power7 using 'cmpb' instruction + Copyright (C) 2014 Free Software Foundation, Inc. + This file is part of the GNU C Library. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library; if not, see + <http://www.gnu.org/licenses/>. */ + +/* The optimization is achieved here through cmpb instruction. + 8byte aligned strings are processed with double word comparision + and unaligned strings are handled effectively with loop unrolling + technique */ + +#include <sysdep.h> + +/* int [r3] strcmp (const char *s1 [r3], const char *s2 [r4]) */ + +EALIGN (strcmp, 4, 0) + CALL_MCOUNT 2 + + or r9, r3, r4 + rldicl. r10, r9, 0, 61 /* are s1 and s2 8 byte aligned..? */ + bne cr0, L(process_unaligned_bytes) + +/* process input parameters on double word aligned boundary */ + ld r9, 0(r4) /* load s2 at offset=0 */ + li r10, 0 /* load mask=0 */ + cmpb r10, r9, r10 /* compare bytes at s2 with mask */ + cmpdi cr7, r10, 0 /* is NULL found ..? is end of string HIT */ + bne cr7, L(process_unaligned_bytes) /* process byte by byte */ + + ld r10, 0(r3) /* load s1 at offset=0 */ + li r8, 0 /* load mask=0 */ + cmpb r8, r10, r8 /* compare bytes at s1 with mask */ + cmpdi cr7, r8, 0 /* is NULL found ..? is end of string HIT */ + bne cr7, L(process_unaligned_bytes) /* process byte by byte */ + +/*s1 and s2 does not contain NULL now , so compare all 8 bytes in a GO */ + cmpb r9, r10, r9 /* compare s1 and s2 */ + cmpdi cr7, r9, -1 /* compare result with 0xFFFFFFFFFFFFFFFF */ + bne cr7, L(process_unaligned_bytes) /* s1,s2 mismatch found */ + + addi r5, r3, 8 /* save next offset of s2 */ + addi r11, r4, 8 /* save next offset of s1 */ + ld r8, 8(r4) /* load s2 at offset=8 */ + li r9, 0 /* load mask=0 */ + cmpb r9, r8, r9 /* compare bytes at s2 with mask */ + cmpdi cr7, r9, 0 /* NULL found ..? */ + bne cr7, L(processBytes)/* update input and process bytes one by one */ + + mr r9, r4 /* save s2 */ + li r10, 0 /* load mask=0 */ + + ld r7, 8(r3) /* load s1 at offset=8 */ + cmpb r6, r7, r10 /* compare bytes at s1 with mask */ + cmpdi cr7, r6, 0 /* is NULL found */ + bne cr7, L(processBytes)/* mismatch, so process one by one */ + +L(unrollDword): + cmpb r8, r7, r8 /* compare s1 and s2 */ + cmpdi cr7, r8, -1 /* compare result with 0xFFFFFFFFFFFFFFFF */ + bne cr7, L(processBytes)/* mismatch with s1 and s2 */ + + addi r5, r3, 16 /* save offset=16 of s1 */ + addi r4, r9, 16 /* save offset=16 of s2 */ + ld r8, 16(r9) /* load s2 at offset=16 */ + cmpb r7, r8, r10 /* compare bytes at s2 with mask */ + cmpdi cr7, r7, 0 /* NULL found ..? */ + bne cr7, L(update2processBytes) + + ld r7, 16(r3) /* load s1 at offset=16 */ + cmpb r6, r7, r10 /* check s1 for end of string */ + cmpdi cr7, r6, 0 /* end of s1 ?,then handle byte by byte */ + bne 7,L(update2processBytes) + + cmpb r8, r7, r8 /* compare s1 and s2 double words */ + cmpdi cr7, r8, -1 /* compare results with 0xFFFFFFFFFFFFFFFF */ + bne cr7,L(update2processBytes) + + addi r5, r3, 24 /* update s1 to offset=24 */ + addi r4, r9, 24 /* update s2 to offset=24 */ + + ld r8, 24(r9) /* load s2 */ + cmpb r7, r8, r10 /* compare s2 for NULL */ + cmpdi cr7, r7, 0 /* verify if s2 is ending now */ + bne cr7,L(update2processBytes) + + ld r7, 24(r3) /* load s1 at offset=24 */ + cmpb r6, r7, r10 /* verify for NULL */ + cmpdi cr7, r6, 0 /* is NULL found */ + bne cr7, L(update2processBytes) + + cmpb r8, r7, r8 /* compare s1 and s2 */ + cmpdi cr7, r8, -1 /* are s1 and s2 same ..? */ + bne cr7, L(update2processBytes) + + addi r7, r9, 32 /* update s2 to next double word */ + addi r3, r3, 32 /* update s1 to next double word */ + + ld r8, 32(r9) /* load s2 */ + mr r4, r7 /* save s2 */ + cmpb r6, r8, r10 /* compare s2 with NULL */ + cmpdi cr7, r6, 0 /* end of s2 ..? */ + bne cr7, L(process_unaligned_bytes) + + ld r6, 0(r3) /* load and compare s1 for NULL */ + cmpb r5, r6, r10 + cmpdi cr7, r5, 0 + bne cr7, L(process_unaligned_bytes) + + cmpb r8, r6, r8 /* compare s1 and s2 */ + cmpdi cr7, r8, -1 + bne cr7, L(process_unaligned_bytes) + + addi r5, r3, 8 /* increment s1 and d2 here */ + addi r11, r9, 40 + + ld r8, 40(r9) /* process s2 now */ + cmpb r9, r8, r10 + cmpdi cr7, r9, 0 + bne cr7, L(processBytes) + + mr r9, r7 + ld r7, 8(r3) /* process s1 now */ + cmpb r6, r7, r10 + cmpdi cr7, r6, 0 + beq cr7, L(unrollDword) /* unroll to compare s1 and s2 */ + +L(processBytes): + mr r4, r11 /* update input params */ + mr r3, r5 + + .p2align 4 +L(process_unaligned_bytes): + lbz r9, 0(r3) /* load byte from s1 */ + lbz r10, 0(r4) /* load byte from s2 */ + cmpdi cr7, r9, 0 /* compare *s1 with NULL */ + beq cr7, L(diffOfNULL) /* if *s1 is NULL , return *s1 - *s2 */ + cmplw cr7, r9, r10 /* compare *s1 and *s2 */ + bne cr7, L(ComputeDiff) /* branch to compute difference and return */ + + lbz r9, 1(r3) /* load next byte from s1 */ + lbz r10, 1(r4) /* load next byte from s2 */ + cmpdi cr7, r9, 0 /* compare *s1 with NULL */ + beq cr7, L(diffOfNULL) /* if *s1 is NULL , return *s1 - *s2 */ + cmplw cr7, r9, r10 /* compare *s1 and *s2 */ + bne cr7, L(ComputeDiff) /* branch to compute difference and return */ + + lbz r9, 2(r3) /* unroll 3rd byte here */ + lbz r10, 2(r4) + cmpdi cr7, r9, 0 + beq cr7, L(diffOfNULL) + cmplw cr7, r9, r10 + bne 7, L(ComputeDiff) + + lbz r9, 3(r3) /* unroll 4th byte now */ + lbz r10, 3(r4) + addi r3, r3, 4 /* increment s1 by unroll factor */ + cmpdi cr7, r9, 0 + cmplw cr6, 9, r10 + beq cr7, L(diffOfNULL) + addi r4, r4, 4 /* increment s2 by unroll factor */ + beq cr6, L(process_unaligned_bytes) /* unroll byte processing */ + + .p2align 4 +L(ComputeDiff): + extsw r9, r9 + subf r10, r10, r9 /* compute s1 - s2 */ + extsw r3, r10 + blr /* return */ + + .p2align 4 +L(diffOfNULL): + li r9, 0 + subf r10, r10, r9 /* compute s1 - s2 */ + extsw r3, r10 /* sign extend result */ + blr /* return */ + + .p2align 4 +L(update2processBytes): + mr r3, r5 /* update and proceed */ + b L(process_unaligned_bytes) + +END (strcmp) +libc_hidden_builtin_def (strcmp) diff --git a/sysdeps/powerpc/powerpc64/power7/strcspn.S b/sysdeps/powerpc/powerpc64/power7/strcspn.S new file mode 100644 index 0000000..3f6aa0a --- /dev/null +++ b/sysdeps/powerpc/powerpc64/power7/strcspn.S @@ -0,0 +1,139 @@ +/* Optimized strcspn implementation for PowerPC64. + Copyright (C) 2014 Free Software Foundation, Inc. + This file is part of the GNU C Library. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library; if not, see + <http://www.gnu.org/licenses/>. */ + +#include <sysdep.h> + +/* size_t [r3] strcspn (const char [r4] *s, const char [r5] *reject) */ + + .machine power7 +EALIGN (strcspn, 4, 0) + CALL_MCOUNT 3 + + /* The idea to speed up the algorithm is to create a lookup table + for fast check if input character should be considered. For ASCII + or ISO-8859-X character sets it has 256 positions. */ + lbz r10,0(r4) + + /* First the table should be cleared and to avoid unaligned accesses + when using the VSX stores the table address is aligned to 16 + bytes. */ + xxlxor v0,v0,v0 + + /* PPC64 ELF ABI stack is aligned to 16 bytes. */ + addi r9,r1,-256 + + li r8,48 + li r5,16 + li r6,32 + cmpdi cr7,r10,0 /* reject[0] == '\0' ? */ + addi r12,r9,64 + /* Clear the table with 0 values */ + stxvw4x v0,r0,r9 + addi r11,r9,128 + addi r7,r9,192 + stxvw4x v0,r9,r5 + stxvw4x v0,r9,r6 + stxvw4x v0,r9,r8 + stxvw4x v0,r0,r12 + stxvw4x v0,r12,r5 + stxvw4x v0,r12,r6 + stxvw4x v0,r12,r8 + stxvw4x v0,r0,r11 + stxvw4x v0,r11,r5 + stxvw4x v0,r11,r6 + stxvw4x v0,r11,r8 + stxvw4x v0,r0,r7 + stxvw4x v0,r7,r5 + stxvw4x v0,r7,r6 + stxvw4x v0,r7,r8 + li r8,1 + beq cr7,L(finish_table) /* If reject[0] == '\0' skip */ + + /* Initialize the table as: + for (i=0; reject[i]; i++ + table[reject[i]]] = 1 */ + .p2align 4,,15 +L(init_table): + stbx r8,r9,r10 + lbzu r10,1(r4) + cmpdi cr7,r10,0 /* If reject[0] == '\0' finish */ + bne cr7,L(init_table) +L(finish_table): + /* set table[0] = 1 */ + li r10,1 + stb r10,0(r9) + li r10,0 + b L(mainloop) + + /* Unrool the loop 4 times and check using the table as: + i = 0; + while (1) + { + if (table[input[i++]] == 1) + return i - 1; + if (table[input[i++]] == 1) + return i - 1; + if (table[input[i++]] == 1) + return i - 1; + if (table[input[i++]] == 1) + return i - 1; + } */ + .p2align 4,,15 +L(unroll): + lbz r8,1(r3) + addi r10,r10,4 + lbzx r8,r9,r8 + cmpwi r7,r8,1 + beq cr7,L(end) + lbz r8,2(r3) + addi r3,r3,4 + lbzx r8,r9,r8 + cmpwi cr7,r8,1 + beq cr7,L(end2) + lbz r8,3(r7) + lbzx r8,r9,r8 + cmpwi cr7,r8,1 + beq cr7,L(end3) +L(mainloop): + lbz r8,0(r3) + mr r7,r3 + addi r6,r10,1 + addi r4,r10,2 + addi r5,r10,3 + lbzx r8,r9,8 + cmpwi cr7,r8,1 + bne cr7,L(unroll) + mr r3,r10 + blr + + .p2align 4,,15 +L(end): + mr r3,r6 + blr + + .p2align 4,,15 +L(end2): + mr r3,r4 + blr + + .p2align 4,,15 +L(end3): + mr r3,r5 + blr +END (strcspn) +libc_hidden_builtin_def (strcspn) diff --git a/sysdeps/powerpc/powerpc64/power7/strncat.S b/sysdeps/powerpc/powerpc64/power7/strncat.S new file mode 100644 index 0000000..f5ea52d --- /dev/null +++ b/sysdeps/powerpc/powerpc64/power7/strncat.S @@ -0,0 +1,228 @@ +/* Optimized strncat implementation for PowerPC64/POWER7. + + Copyright (C) 2014 Free Software Foundation, Inc. + This file is part of the GNU C Library. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library; if not, see + <http://www.gnu.org/licenses/>. */ + +/* The algorithm is as follows for aligned memory access : + + if address of s2 is divisible by 0x7UL, + perform aligned doubleword catenation + else + perform unaligned catenation + + The aligned comparison are made using cmpb instructions. */ + +/* char* [r3] strncat (const char *s1 [r3], + const char *s2 [r4], + size_t size [r5]) */ + +#include <sysdep.h> + +#ifndef STRNCAT +# undef strncat +# define STRNCAT strncat +#endif + +#ifndef STRLEN +/* For builds with no IFUNC support, local calls should be made to internal + GLIBC symbol (created by libc_hidden_builtin_def). */ +# ifdef SHARED +# define STRLEN __GI_strlen +# else +# define STRLEN strlen +# endif +#endif + +#define FRAMESIZE (FRAME_MIN_SIZE+32) + + .machine power7 +EALIGN(STRNCAT, 4, 0) + CALL_MCOUNT 3 + + mflr r0 /* Load link register LR to r0. */ + +/* We shall use r29, r30 and r31 non volatile register for retention. + Save all the callee registers in the GPR save area. */ + std r29, -24(r1) /* Save callers register r29. */ + std r30, -16(r1) /* Save callers register r30. */ + std r31, -8(r1) /* Save callers register r31. */ + + std r0, 16(r1) /* Store the link register. */ + stdu r1, -FRAMESIZE(r1) /* Create the stack frame. */ + +/* Improve performance with CPU pre-fetch. */ + dcbt 0, r3 /* Pre-fetch str to avoid cache + miss. */ + dcbt 0, r4 /* Pre-fetch accept to avoid cache + miss. */ + + mr. r29, r5 /* Save "n" in r29. */ + mr r30, r3 /* Save "s1" in r30 from r3. */ + beq cr0,L(done) + + mr r31, r4 /* Save "s2" in r31 from r4. */ + bl STRLEN /* Call optimized strlen on s1; goto + end of s1. */ + nop + cmpldi cr7, r29, 7 /* If s2 is <=7 process + byte-by-byte. */ + add r3, r30, r3 /* Grab the last character of s1. */ + bgt cr7,L(alignment) /* Process by aligned strings. */ + + cmpldi cr7, r29, 3 /* If n is >= 4, we can + byte-unroll. */ + addi r9, r3, -1 /* Make "s1" point before next + character, increment when read. */ + bgt cr7, L(bytes_unroll) /* Process each byte. */ + +L(byte_by_byte): + lbz r10, 0(r31) + addi r8, r9, 1 + cmpdi cr7, r10, 0 /* Check for NULL in "s2". */ + stb r10, 1(r9) + beq cr7, L(done) + add r9, r9, r29 + subf r9, r8, r9 + addi r9, r9, 1 + mtctr r9 + b L(branch2) + .p2align 4 +L(branch1): + lbzu r10, 1(r31) + cmpdi cr7, r10, 0 + stbu r10, 1(r8) + beq cr7,L(done) +L(branch2): + mr r9, r8 + bdnz L(branch1) + beq cr7,L(done) +L(nullTerminate): + li r10, 0 /* Load NULL for termination. */ + stb r10, 1(r9) /* Append or terminate s1 with + NULL. */ + .p2align 4 /* A small section here. */ +L(done): /* We return now. */ + addi r1, r1, FRAMESIZE /* Restore stack pointer. */ + mr r3, r30 /* Set the return value length of + string. */ + ld r0, 16(r1) /* Read the saved link register. */ + ld r29, -24(r1) /* Restore save register r29. */ + ld r30, -16(r1) /* Restore save register r30. */ + ld r31, -8(r1) /* Restore save register r31. */ + mtlr r0 /* Restore link register. */ + blr /* Branch to link register. */ + + .p2align 4 +L(alignment): + rldicl. r9, r31, 0, 61 /* Check if s2 is 8byte aligned */ + beq cr0,L(dwordAligned) + + .p2align 4 +/* Unaligned bytes in string, so process byte by byte. + POWER7 has performance gains over loop unroll. */ +L(bytes_unroll): + addi r9, r3, -1 + srdi r10, r29, 2 + mtctr r10 + b L(L10) + .p2align 4 +L(L44): + lbz r10, 1(r31) /* Load byte. */ + cmpdi cr7, r10, 0 /* Compare ; if byte not zero, + continue. */ + stb r10, 2(r9) /* Store byte */ + beq cr7, L(done) + addi r31, r31, 4 + + lbz r10, -2(r31) /* Perform loop unroll here on byte + load and store. */ + cmpdi cr7, r10, 0 + stb r10, 3(r9) + beq cr7, L(done) + + lbz r10, -1(r31) /* Loop unroll here. */ + cmpdi cr7, r10, 0 + stbu r10, 4(r9) + beq cr7, L(done) + + bdz L(leftNbytes) + +L(L10): + lbz r10, 0(r31) /* Loop unroll here. */ + cmpdi cr7, r10, 0 + stb r10, 1(r9) + bne cr7,L(L44) + b L(done) + .p2align 4 +/* If s2 is double word aligned, we load and store double word. */ +L(dwordAligned): +/* read, write 8 bytes at a time */ + srdi r8, r29, 3 /* Compute count for CTR to loop; + count = n/8. */ + li r7, 0 /* Load r7 with NULL. */ + li r10, 0 /* Load r10 with MASK '0'. */ + + mtctr r8 /* Move count to CTR. */ +L(loop8): + ld r9, 0(r31) /* Read double word from s2. */ + cmpb r6, r9, r10 /* Compare bytes in s2 we read + just now. */ + cmpdi r6, 0 /* If cmpb returned NULL, + we continue. */ + bne+ L(a8) + std r9, 0(r3) /* Append double word from s2 + with s1. */ + addi r3, r3, 8 /* Increment s1. */ + addi r31, r31, 8 /* Increment s2. */ + subi r29, r29, 8 /* Decrement count by 8. */ + bdnz L(loop8) /* Continue until "count" is + non zero. */ + +L(a8): + cmpdi r29, 0 /* If "n" is already zero, we skip. */ + beq+ L(align8align) + + mtctr r29 /* Process left over bytes in "n". */ +L(unaligned0): + lbz r9, 0(r31) /* Read a byte from s2. */ + cmpw r9, r7 /* If byte is NULL, we stop here . */ + beq+ L(align8align) /* Skip processing further if NULL. */ + stb r9, 0(r3) /* If not NULL, store byte into s1. */ + addi r3, r3, 1 /* Increment s1 by 1. */ + addi r31, r31, 1 /* Increment s2 by 1. */ + bdnz L(unaligned0) /* Decrement counter "n" and loop + until non zero. */ +L(align8align): + stb r7, 0(r3) /* Terminate s1 with NULL. */ + + addi r1, r1, FRAMESIZE /* Restore stack pointer. */ + mr r3, r30 /* Set the return value, length of + string. */ + ld r0, 16(r1) /* Read the saved link register. */ + ld r29, -24(r1) /* Restore save register r29. */ + ld r30, -16(r1) /* Restore save register r30. */ + ld r31, -8(r1) /* Restore save register r31. */ + mtlr r0 /* Restore link register. */ + blr /* Branch to link register */ + + .p2align 4 +L(leftNbytes): + rldicl. r29, r29, 0, 62 /* Check if n>0 and n < 4 bytes. */ + bne cr0,L(byte_by_byte) /* Process bytes one by one. */ + b L(nullTerminate) /* Now, finish catenation with + NULL termination. */ +END(STRNCAT) diff --git a/sysdeps/powerpc/powerpc64/power7/strncpy.S b/sysdeps/powerpc/powerpc64/power7/strncpy.S new file mode 100644 index 0000000..51860df --- /dev/null +++ b/sysdeps/powerpc/powerpc64/power7/strncpy.S @@ -0,0 +1,338 @@ +/* Copyright (C) 2014 Free Software Foundation, Inc. + This file is part of the GNU C Library. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library; if not, see + <http://www.gnu.org/licenses/>. */ + +#include <sysdep.h> + +/* Implements the functions + + char * [r3] strncpy (char *dst [r3], const char *src [r4], size_t n [r5]) + + AND + + char * [r3] stpncpy (char *dst [r3], const char *src [r4], size_t n [r5]) + + The algorithm is as follows: + > if src and dest are 8 byte aligned, perform double word copy + else + > copy byte by byte on unaligned addresses. + + The aligned comparison are made using cmpb instructions. */ + +/* The focus on optimization for performance improvements are as follows: + 1. data alignment [gain from aligned memory access on read/write] + 2. POWER7 gains performance with loop unrolling/unwinding + [gain by reduction of branch penalty]. + 3. The final pad with null bytes is done by calling an optimized + memset. */ + +#ifdef USE_AS_STPNCPY +# define FUNC_NAME __stpncpy +#else +# define FUNC_NAME strncpy +#endif + +#define FRAMESIZE (FRAME_MIN_SIZE+32) + +#ifndef MEMSET +/* For builds with no IFUNC support, local calls should be made to internal + GLIBC symbol (created by libc_hidden_builtin_def). */ +# ifdef SHARED +# define MEMSET __GI_memset +# else +# define MEMSET memset +# endif +#endif + + .machine power7 +EALIGN(FUNC_NAME, 4, 0) + CALL_MCOUNT 3 + + mflr r0 /* load link register LR to r0 */ + or r10, r3, r4 /* to verify source and destination */ + rldicl. r8, r10, 0, 61 /* is double word aligned .. ? */ + + std r19, -8(r1) /* save callers register , r19 */ + std r18, -16(r1) /* save callers register , r18 */ + std r0, 16(r1) /* store the link register */ + stdu r1, -FRAMESIZE(r1) /* create the stack frame */ + + mr r9, r3 /* save r3 into r9 for use */ + mr r18, r3 /* save r3 for retCode of strncpy */ + bne 0, L(byte_by_byte) + + + srdi r11, r5, 3 /* compute count for CTR ; count = n/8 */ + cmpldi cr7, r11, 3 /* if count > 4 ; perform unrolling 4 times */ + ble 7, L(update1) + + ld r10, 0(r4) /* load doubleWord from src */ + cmpb r8, r10, r8 /* compare src with NULL ,we read just now */ + cmpdi cr7, r8, 0 /* if cmpb returned NULL ; we continue */ + bne cr7, L(update3) + + std r10, 0(r3) /* copy doubleword at offset=0 */ + ld r10, 8(r4) /* load next doubleword from offset=8 */ + cmpb r8, r10, r8 /* compare src with NULL , we read just now */ + cmpdi cr7, r8, 0 /* if cmpb returned NULL ; we continue */ + bne 7,L(HopBy8) + + addi r8, r11, -4 + mr r7, r3 + srdi r8, r8, 2 + mr r6, r4 + addi r8, r8, 1 + li r12, 0 + mtctr r8 + b L(dwordCopy) + + .p2align 4 +L(dWordUnroll): + std r8, 16(r9) + ld r8, 24(r4) /* load dword,perform loop unrolling again */ + cmpb r10, r8, r10 + cmpdi cr7, r10, 0 + bne cr7, L(HopBy24) + + std r8, 24(r7) /* copy dword at offset=24 */ + addi r9, r9, 32 + addi r4, r4, 32 + bdz L(leftDwords) /* continue with loop on counter */ + + ld r3, 32(r6) + cmpb r8, r3, r10 + cmpdi cr7, r8, 0 + bne cr7, L(update2) + + std r3, 32(r7) + ld r10, 40(r6) + cmpb r8, r10, r8 + cmpdi cr7, r8, 0 + bne cr7, L(HopBy40) + + mr r6, r4 /* update values */ + mr r7, r9 + mr r11, r0 + mr r5, r19 + +L(dwordCopy): + std r10, 8(r9) /* copy dword at offset=8 */ + addi r19, r5, -32 + addi r0, r11, -4 + ld r8, 16(r4) + cmpb r10, r8, r12 + cmpdi cr7, r10, 0 + beq cr7, L(dWordUnroll) + + addi r9, r9, 16 /* increment dst by 16 */ + addi r4, r4, 16 /* increment src by 16 */ + addi r5, r5, -16 /* decrement length 'n' by 16 */ + addi r0, r11, -2 /* decrement loop counter */ + +L(dWordUnrollOFF): + ld r10, 0(r4) /* load first dword */ + li r8, 0 /* load mask */ + cmpb r8, r10, r8 + cmpdi cr7, r8, 0 + bne cr7, L(byte_by_byte) + mtctr r0 + li r7, 0 + b L(CopyDword) + + .p2align 4 +L(loadDWordandCompare): + ld r10, 0(r4) + cmpb r8, r10, r7 + cmpdi cr7, r8, 0 + bne cr7, L(byte_by_byte) + +L(CopyDword): + addi r9, r9, 8 + std r10, -8(r9) + addi r4, r4, 8 + addi r5, r5, -8 + bdnz L(loadDWordandCompare) + +L(byte_by_byte): + cmpldi cr7, r5, 3 + ble cr7, L(verifyByte) + srdi r10, r5, 2 + mr r19, r9 + mtctr r10 + b L(firstByteUnroll) + + .p2align 4 +L(bytes_unroll): + lbz r10, 1(r4) /* load byte from src */ + cmpdi cr7, r10, 0 /* compare for NULL */ + stb r10, 1(r19) /* store byte to dst */ + beq cr7, L(updtDestComputeN2ndByte) + + addi r4, r4, 4 /* advance src */ + + lbz r10, -2(r4) /* perform loop unrolling for byte r/w */ + cmpdi cr7, r10, 0 + stb r10, 2(r19) + beq cr7, L(updtDestComputeN3rdByte) + + lbz r10, -1(r4) /* perform loop unrolling for byte r/w */ + addi r19, r19, 4 + cmpdi cr7, r10, 0 + stb r10, -1(r19) + beq cr7, L(ComputeNByte) + + bdz L(update0) + +L(firstByteUnroll): + lbz r10, 0(r4) /* perform loop unrolling for byte r/w */ + cmpdi cr7, 10, 0 + stb r10, 0(r19) + bne cr7, L(bytes_unroll) + addi r19, r19, 1 + +L(ComputeNByte): + subf r9, r19, r9 /* compute 'n'n bytes to fill */ + add r8, r9, r5 + +L(zeroFill): + cmpdi cr7, r8, 0 /* compare if length is zero */ + beq cr7, L(update3return) + + mr r3, r19 /* fill buffer with */ + li r4, 0 /* zero fill buffer */ + mr r5, r8 /* how many bytes to fill buffer with */ + bl MEMSET /* call optimized memset */ + nop + +L(update3return): +#ifdef USE_AS_STPNCPY + addi r3, r19, -1 /* update return value */ +#endif + +L(hop2return): +#ifndef USE_AS_STPNCPY + mr r3, r18 /* set return value */ +#endif + addi r1, r1, FRAMESIZE /* restore stack pointer */ + ld r0, 16(r1) /* read the saved link register */ + ld r18, -16(r1) /* restore callers save register, r18 */ + ld r19, -8(r1) /* restore callers save register, r19 */ + mtlr r0 /* branch to link register */ + blr /* return */ + + .p2align 4 +L(update0): + mr r9, r19 + + .p2align 4 +L(verifyByte): + rldicl. r8, r5, 0, 62 +#ifdef USE_AS_STPNCPY + mr r3, r9 +#endif + beq cr0, L(hop2return) + mtctr r8 + addi r4, r4, -1 + mr r19, r9 + b L(oneBYone) + + .p2align 4 +L(proceed): + bdz L(done) + +L(oneBYone): + lbzu r10, 1(r4) /* copy byte */ + addi r19, r19, 1 + addi r8, r8, -1 + cmpdi cr7, r10, 0 + stb r10, -1(r19) + bne cr7, L(proceed) + b L(zeroFill) + + .p2align 4 +L(done): + addi r1, r1, FRAMESIZE /* restore stack pointer */ +#ifdef USE_AS_STPNCPY + mr r3, r19 /* set the return value */ +#else + mr r3, r18 /* set the return value */ +#endif + ld r0, 16(r1) /* read the saved link register */ + ld r18, -16(r1) /* restore callers save register, r18 */ + ld r19, -8(r1) /* restore callers save register, r19 */ + mtlr r0 /* branch to link register */ + blr /* return */ + +L(update1): + mr r0, r11 + mr r19, r5 + + .p2align 4 +L(leftDwords): + cmpdi cr7, r0, 0 + mr r5, r19 + bne cr7, L(dWordUnrollOFF) + b L(byte_by_byte) + + .p2align 4 +L(updtDestComputeN2ndByte): + addi r19, r19, 2 /* update dst by 2 */ + subf r9, r19, r9 /* compute distance covered */ + add r8, r9, r5 + b L(zeroFill) + + .p2align 4 +L(updtDestComputeN3rdByte): + addi r19, r19, 3 /* update dst by 3 */ + subf r9, r19, r9 /* compute distance covered */ + add r8, r9, r5 + b L(zeroFill) + + .p2align 4 +L(HopBy24): + addi r9, r9, 24 /* increment dst by 24 */ + addi r4, r4, 24 /* increment src by 24 */ + addi r5, r5, -24 /* decrement length 'n' by 24 */ + addi r0, r11, -3 /* decrement loop counter */ + b L(dWordUnrollOFF) + + .p2align 4 +L(update2): + mr r5, r19 + b L(dWordUnrollOFF) + + .p2align 4 +L(HopBy40): + addi r9, r7, 40 /* increment dst by 40 */ + addi r4, r6, 40 /* increment src by 40 */ + addi r5, r5, -40 /* decrement length 'n' by 40 */ + addi r0, r11, -5 /* decrement loop counter */ + b L(dWordUnrollOFF) + +L(update3): + mr r0, r11 + b L(dWordUnrollOFF) + +L(HopBy8): + addi r9, r3, 8 /* increment dst by 8 */ + addi r4, r4, 8 /* increment src by 8 */ + addi r5, r5, -8 /* decrement length 'n' by 8 */ + addi r0, r11, -1 /* decrement loop counter */ + b L(dWordUnrollOFF) +END(FUNC_NAME) +#ifndef USE_AS_STPNCPY +libc_hidden_builtin_def (strncpy) +#endif diff --git a/sysdeps/powerpc/powerpc64/power7/strpbrk.S b/sysdeps/powerpc/powerpc64/power7/strpbrk.S new file mode 100644 index 0000000..d6204a7 --- /dev/null +++ b/sysdeps/powerpc/powerpc64/power7/strpbrk.S @@ -0,0 +1,148 @@ +/* Optimized strpbrk implementation for PowerPC64/POWER7. + Copyright (C) 2014 Free Software Foundation, Inc. + This file is part of the GNU C Library. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library; if not, see + <http://www.gnu.org/licenses/>. */ + +#include <sysdep.h> + +/* char [r3] *strpbrk(const char [r4] *s, const char [r5] *accept) */ + + .machine power7 +EALIGN (strpbrk, 4, 0) + CALL_MCOUNT 3 + + lbz r10,0(r4) + cmpdi cr7,r10,0 /* accept[0] == '\0' ? */ + beq cr7,L(nullfound) + + /* The idea to speed up the algorithm is to create a lookup table + for fast check if input character should be considered. For ASCII + or ISO-8859-X character sets it has 256 positions. */ + + /* First the table should be cleared and to avoid unaligned accesses + when using the VSX stores the table address is aligned to 16 + bytes. */ + xxlxor v0,v0,v0 + + /* PPC64 ELF ABI stack is aligned to 16 bytes */ + addi r9,r1,-256 + + li r5,16 + li r6,32 + li r8,48 + addi r12,r9,64 + /* Clear the table with 0 values */ + stxvw4x v0,r0,r9 + addi r11,r9,128 + addi r7,r9,192 + stxvw4x v0,r9,r5 + li r0,1 + stxvw4x v0,r9,r6 + stxvw4x v0,r9,r8 + stxvw4x v0,r0,r12 + stxvw4x v0,r12,r5 + stxvw4x v0,r12,r6 + stxvw4x v0,r12,r8 + stxvw4x v0,r0,r11 + stxvw4x v0,r11,r5 + stxvw4x v0,r11,r6 + stxvw4x v0,r11,r8 + stxvw4x v0,r0,r7 + stxvw4x v0,r7,r5 + stxvw4x v0,r7,r6 + stxvw4x v0,r7,r8 + + /* Initialize the table as: + for (i=0; accept[i]; i++ + table[accept[i]]] = 1 */ + .p2align 4,,15 +L(init_table): + stbx r0,r9,r10 + lbzu r10,1(r4) + cmpdi r0,r10,0 + bne cr0,L(init_table) +L(finish_table): + /* set table[0] = 1 */ + li r4,1 + stb r4,0(r9) + b L(mainloop) + + /* Unrool the loop 4 times and check using the table as: + i = 0; + while (1) + { + if (table[input[i++]] == 1) + return (s[i -1] ? s + i - 1: NULL); + if (table[input[i++]] == 1) + return (s[i -1] ? s + i - 1: NULL); + if (table[input[i++]] == 1) + return (s[i -1] ? s + i - 1: NULL); + if (table[input[i++]] == 1) + return (s[i -1] ? s + i - 1: NULL); + } */ + .p2align 4 +L(unroll): + lbz r0,1(r3) + lbzx r8,r9,r0 + cmpwi cr6,r8,1 + beq cr6,L(checkend2) + lbz r10,2(r3) + lbzx r4,r9,r10 + cmpwi cr7,r4,1 + beq cr7,L(checkend3) + lbz r12,3(r3) + addi r3,r3,4 + lbzx r11,r9,r12 + cmpwi cr0,r11,1 + beq cr0,L(checkend) +L(mainloop): + lbz r12,0(r3) + addi r11,r3,1 + addi r5,r3,2 + addi r7,r3,3 + lbzx r6,r9,r12 + cmpwi cr1,r6,1 + bne cr1,L(unroll) + cmpdi cr0,r12,0 + beq cr0,L(nullfound) +L(end): + blr + + .p2align 4 +L(checkend): + cmpdi cr1,r12,0 + mr r3,r7 + bne cr1,L(end) +L(nullfound): + /* return NULL */ + li 3,0 + blr + + .p2align 4 +L(checkend2): + cmpdi cr7,r0,0 + mr r3,r11 + beq cr7,L(nullfound) + blr + + .p2align 4 +L(checkend3): + cmpdi cr6,r10,0 + mr r3,r5 + beq cr6,L(nullfound) + blr +END (strpbrk) +libc_hidden_builtin_def (strpbrk) diff --git a/sysdeps/powerpc/powerpc64/power7/strrchr.S b/sysdeps/powerpc/powerpc64/power7/strrchr.S new file mode 100644 index 0000000..e4a76c8 --- /dev/null +++ b/sysdeps/powerpc/powerpc64/power7/strrchr.S @@ -0,0 +1,255 @@ +/* Optimized strrchr implementation for PowerPC64/POWER7 using cmpb insn. + Copyright (C) 2014 Free Software Foundation, Inc. + This file is part of the GNU C Library. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library; if not, see + <http://www.gnu.org/licenses/>. */ + +#include <sysdep.h> + +/* int [r3] strrchr (char *s [r3], int c [r4]) */ + .machine power7 +ENTRY (strrchr) + CALL_MCOUNT 2 + dcbt 0,r3 + clrrdi r8,r3,3 /* Align the address to doubleword boundary. */ + cmpdi cr7,r4,0 + ld r12,0(r8) /* Load doubleword from memory. */ + li r9,0 /* used to store last occurence */ + li r0,0 /* Doubleword with null chars to use + with cmpb. */ + + rlwinm r6,r3,3,26,28 /* Calculate padding. */ + + beq cr7,L(null_match) + + /* Replicate byte to doubleword. */ + insrdi r4,r4,8,48 + insrdi r4,r4,16,32 + insrdi r4,r4,32,0 + + /* r4 is changed now ,if its passed as more chars + check for null again */ + cmpdi cr7,r4,0 + beq cr7,L(null_match) + /* Now r4 has a doubleword of c bytes and r0 has + a doubleword of null bytes. */ + + cmpb r10,r12,r4 /* Compare each byte against c byte. */ + cmpb r11,r12,r0 /* Compare each byte against null byte. */ + + /* Move the doublewords left and right to discard the bits that are + not part of the string and bring them back as zeros. */ +#ifdef __LITTLE_ENDIAN__ + srd r10,r10,r6 + srd r11,r11,r6 + sld r10,r10,r6 + sld r11,r11,r6 +#else + sld r10,r10,r6 + sld r11,r11,r6 + srd r10,r10,r6 + srd r11,r11,r6 +#endif + or r5,r10,r11 /* OR the results to speed things up. */ + cmpdi cr7,r5,0 /* If r5 == 0, no c or null bytes + have been found. */ + bne cr7,L(done) + +L(align): + mtcrf 0x01,r8 + + /* Are we now aligned to a doubleword boundary? If so, skip to + the main loop. Otherwise, go through the alignment code. */ + + bt 28,L(loop) + + /* Handle WORD2 of pair. */ + ldu r12,8(r8) + cmpb r10,r12,r4 + cmpb r11,r12,r0 + or r5,r10,r11 + cmpdi cr7,r5,0 + bne cr7,L(done) + b L(loop) /* We branch here (rather than falling through) + to skip the nops due to heavy alignment + of the loop below. */ + .p2align 5 +L(loop): + /* Load two doublewords, compare and merge in a + single register for speed. This is an attempt + to speed up the null-checking process for bigger strings. */ + ld r12,8(r8) + ldu r7,16(r8) + cmpb r10,r12,r4 + cmpb r11,r12,r0 + cmpb r6,r7,r4 + cmpb r7,r7,r0 + or r12,r10,r11 + or r5,r6,r7 + or r5,r12,r5 + cmpdi cr7,r5,0 + beq cr7,L(loop) + + /* OK, one (or both) of the doublewords contains a c/null byte. Check + the first doubleword and decrement the address in case the first + doubleword really contains a c/null byte. */ + cmpdi cr6,r12,0 + addi r8,r8,-8 + bne cr6,L(done) + + /* The c/null byte must be in the second doubleword. Adjust the + address again and move the result of cmpb to r10 so we can calculate + the pointer. */ + + mr r10,r6 + mr r11,r7 + addi r8,r8,8 + + /* r10/r11 have the output of the cmpb instructions, that is, + 0xff in the same position as the c/null byte in the original + doubleword from the string. Use that to calculate the pointer. */ + +L(done): + /* if there are more than one 0xff in r11, find the first pos of ff + in r11 and fill r10 with 0 from that position */ + cmpdi cr7,r11,0 + beq cr7,L(no_null) +#ifdef __LITTLE_ENDIAN__ + addi r3,r11,-1 + andc r3,r3,r11 + popcntd r0,r3 +#else + cntlzd r0,r11 +#endif + subfic r0,r0,63 + li r6,-1 +#ifdef __LITTLE_ENDIAN__ + srd r0,r6,r0 +#else + sld r0,r6,r0 +#endif + and r10,r0,r10 +L(no_null): +#ifdef __LITTLE_ENDIAN__ + cntlzd r0,r10 /* Count leading zeros before c matches. */ + addi r3,r10,-1 + andc r3,r3,r10 + addi r10,r11,-1 + andc r10,r10,r11 + cmpld cr7,r3,r10 + bgt cr7,L(no_match) +#else + addi r3,r10,-1 /* Count trailing zeros before c matches. */ + andc r3,r3,r10 + popcntd r0,r3 + cmpld cr7,r11,r10 + bgt cr7,L(no_match) +#endif + srdi r0,r0,3 /* Convert trailing zeros to bytes. */ + subfic r0,r0,7 + add r9,r8,r0 /* Return address of the matching c byte + or null in case c was not found. */ + li r0,0 + cmpdi cr7,r11,0 /* If r11 == 0, no null's have been found. */ + beq cr7,L(align) + + .align 4 +L(no_match): + mr r3,r9 + blr + +/* We are here because strrchr was called with a null byte. */ + .align 4 +L(null_match): + /* r0 has a doubleword of null bytes. */ + + cmpb r5,r12,r0 /* Compare each byte against null bytes. */ + + /* Move the doublewords left and right to discard the bits that are + not part of the string and bring them back as zeros. */ +#ifdef __LITTLE_ENDIAN__ + srd r5,r5,r6 + sld r5,r5,r6 +#else + sld r5,r5,r6 + srd r5,r5,r6 +#endif + cmpdi cr7,r5,0 /* If r10 == 0, no c or null bytes + have been found. */ + bne cr7,L(done_null) + + mtcrf 0x01,r8 + + /* Are we now aligned to a quadword boundary? If so, skip to + the main loop. Otherwise, go through the alignment code. */ + + bt 28,L(loop_null) + + /* Handle WORD2 of pair. */ + ldu r12,8(r8) + cmpb r5,r12,r0 + cmpdi cr7,r5,0 + bne cr7,L(done_null) + b L(loop_null) /* We branch here (rather than falling through) + to skip the nops due to heavy alignment + of the loop below. */ + + /* Main loop to look for the end of the string. Since it's a + small loop (< 8 instructions), align it to 32-bytes. */ + .p2align 5 +L(loop_null): + /* Load two doublewords, compare and merge in a + single register for speed. This is an attempt + to speed up the null-checking process for bigger strings. */ + ld r12,8(r8) + ldu r11,16(r8) + cmpb r5,r12,r0 + cmpb r10,r11,r0 + or r6,r5,r10 + cmpdi cr7,r6,0 + beq cr7,L(loop_null) + + /* OK, one (or both) of the doublewords contains a null byte. Check + the first doubleword and decrement the address in case the first + doubleword really contains a null byte. */ + + cmpdi cr6,r5,0 + addi r8,r8,-8 + bne cr6,L(done_null) + + /* The null byte must be in the second doubleword. Adjust the address + again and move the result of cmpb to r10 so we can calculate the + pointer. */ + + mr r5,r10 + addi r8,r8,8 + + /* r5 has the output of the cmpb instruction, that is, it contains + 0xff in the same position as the null byte in the original + doubleword from the string. Use that to calculate the pointer. */ +L(done_null): +#ifdef __LITTLE_ENDIAN__ + addi r0,r5,-1 + andc r0,r0,r5 + popcntd r0,r0 +#else + cntlzd r0,r5 /* Count leading zeros before the match. */ +#endif + srdi r0,r0,3 /* Convert trailing zeros to bytes. */ + add r3,r8,r0 /* Return address of the matching null byte. */ + blr +END (strrchr) +weak_alias (strrchr, rindex) +libc_hidden_builtin_def (strrchr) diff --git a/sysdeps/powerpc/powerpc64/power7/strspn.S b/sysdeps/powerpc/powerpc64/power7/strspn.S new file mode 100644 index 0000000..d587a67 --- /dev/null +++ b/sysdeps/powerpc/powerpc64/power7/strspn.S @@ -0,0 +1,165 @@ +/* Optimized strspn implementation for PowerPC64/POWER7. + + Copyright (C) 2014 Free Software Foundation, Inc. + This file is part of the GNU C Library. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library; if not, see + <http://www.gnu.org/licenses/>. */ + +/* size_t [r3] strspn (const char *string [r3], + const char *needleAccept [r4] */ + +/* Performance gains are grabbed through following techniques: + + > hashing of needle. + > hashing avoids scanning of duplicate entries in needle + across the string. + > initializing the hash table with Vector instructions + by quadword access. + > unrolling when scanning for character in string + across hash table. */ + +/* Algorithm is as below: + 1. A empty hash table/dictionary is created comprising of + 256 ascii character set + 2. When hash entry is found in needle , the hash index + is initialized to 1 + 3. The string is scanned until end and for every character, + its corresponding hash index is compared. + 4. initial length of string (count) until first hit of + accept needle to be found is set to 0 + 4. If hash index is set to 1 for the index of string, + count is returned. + 5. Otherwise count is incremented and scanning continues + until end of string. */ + +#include <sysdep.h> + +#undef strspn + + .machine power7 +EALIGN(strspn, 4, 0) + CALL_MCOUNT 2 + + lbz r10, 0(r4) /* load r10 with needle (r4) */ + addi r9, r1, -256 /* r9 is a hash of 256 bytes */ + + li r5, 16 /* set r5 = 16 as offset */ + li r6, 32 /* set r6 = 32 as offset */ + li r8, 48 /* set r8 = 48 as offset */ + +/*Iniatliaze hash table with Zeroes in double indexed quadword accesses */ + xxlxor v0, v0, v0 /* prepare for initializing hash */ + + stxvd2x v0, r0, r9 /* initialize 1st quadword */ + stxvd2x v0, r9, r5 + stxvd2x v0, r9, r6 + stxvd2x v0, r9, r8 /* initialize 4th quadword */ + + addi r11, r9, 64 /* r11 is index to hash */ + + stxvd2x v0, r0, r11 /* initialize 5th quadword */ + stxvd2x v0, r11, r5 + stxvd2x v0, r11, r6 + stxvd2x v0, r11, r8 /* initialize 8th quadword */ + + addi r11, r9, 128 /* r11 is index to hash */ + + stxvd2x v0, r0, r11 /* initialize 9th quadword */ + stxvd2x v0, r11, r5 + stxvd2x v0, r11, r6 + stxvd2x v0, r11, r8 /* initialize 12th quadword */ + + addi r11, r9, 192 /* r11 is index to hash */ + + stxvd2x v0, r0, r11 /* initialize 13th quadword */ + stxvd2x v0, r11, r5 + stxvd2x v0, r11, r6 + stxvd2x v0, r11, r8 /* initialize 16th quadword */ + + li r8, 1 /* r8=1, marker into hash if found in + needle */ + + cmpdi cr7, r10, 0 /* accept needle is NULL */ + beq cr7, L(skipHashing) /* if needle is NULL, skip hashing */ + + .p2align 4 /* align section to 16 byte boundary */ +L(hashing): + stbx r8, r9, r10 /* update hash with marker for the pivot of + the needle */ + lbzu r10, 1(r4) /* load needle into r10 and update to next */ + cmpdi cr7, r10, 0 /* if needle is has reached NULL, continue */ + bne cr7, L(hashing) /* loop to hash the needle */ + +L(skipHashing): + li r10, 0 /* load counter = 0 */ + b L(beginScan) + + .p2align 4 /* align section to 16 byte boundary */ +L(scanUnroll): + lbzx r8, r9, r8 /* load r8 with hash value at index */ + cmpwi cr7, r8, 0 /* if we hit marker in hash, we have found + accept needle */ + beq cr7, L(ret1stIndex) /* we have hit accept needle, return the + count */ + + lbz r8, 1(r3) /* load string[1] into r8 */ + addi r10, r10, 4 /* increment counter */ + lbzx r8, r9, r8 /* load r8 with hash value at index */ + cmpwi cr7, r8, 0 /* if we hit marker in hash, we have found + accept needle */ + beq cr7, L(ret2ndIndex) /* we have hit accept needle, return the + count */ + + lbz r8, 2(r3) /* load string[2] into r8 */ + lbzx r8, r9, r8 /* load r8 with hash value at index */ + cmpwi cr7, r8, 0 /* if we hit marker in hash, we have found + accept needle */ + beq cr7, L(ret3rdIndex) /* we have hit accept needle, return the + count */ + + lbz r8, 3(r3) /* load string[3] into r8 */ + lbzx r8, r9, r8 /* load r8 with hash value at index */ + addi r3, r3, 4 /* unroll factor , increment string by 4 */ + cmpwi cr7, r8, 0 /* if we hit marker in hash, we have found + accept needle */ + beq cr7,L(ret4thIndex) /* we have hit accept needle, return the + count */ + +L(beginScan): + lbz r8, 0(r3) /* load string[0] into r8 */ + addi r6, r10, 1 /* place holder for counter + 1 */ + addi r5, r10, 2 /* place holder for counter + 2 */ + addi r4, r10, 3 /* place holder for counter + 3 */ + cmpdi cr7, r8, 0 /* if we hit marker in hash, we have found + accept needle */ + bne cr7, L(scanUnroll) /* continue scanning */ + +L(ret1stIndex): + mr r3, r10 /* update r3 for return */ + blr /* return */ + +L(ret2ndIndex): + mr r3, r6 /* update r3 for return */ + blr /* return */ + +L(ret3rdIndex): + mr r3, r5 /* update r3 for return */ + blr /* return */ + +L(ret4thIndex): + mr r3, r4 /* update r3 for return */ + blr /* done */ +END(strspn) +libc_hidden_builtin_def (strspn) diff --git a/sysdeps/powerpc/powerpc64/power8/fpu/Implies b/sysdeps/powerpc/powerpc64/power8/fpu/Implies index 7fd86fd..1187cdf 100644 --- a/sysdeps/powerpc/powerpc64/power8/fpu/Implies +++ b/sysdeps/powerpc/powerpc64/power8/fpu/Implies @@ -1 +1 @@ -powerpc/powerpc64/power7/fpu/multiarch +powerpc/powerpc64/power7/fpu/ diff --git a/sysdeps/powerpc/powerpc64/power8/fpu/s_finite.S b/sysdeps/powerpc/powerpc64/power8/fpu/s_finite.S new file mode 100644 index 0000000..2b27e7b --- /dev/null +++ b/sysdeps/powerpc/powerpc64/power8/fpu/s_finite.S @@ -0,0 +1,61 @@ +/* isfinite(). PowerPC64/POWER8 version. + Copyright (C) 2014 Free Software Foundation, Inc. + This file is part of the GNU C Library. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library; if not, see + <http://www.gnu.org/licenses/>. */ + +#include <sysdep.h> +#include <endian.h> +#include <math_ldbl_opt.h> + +#if __BYTE_ORDER == __LITTLE_ENDIAN +#define MFVSRD_R3_V1 .byte 0x66,0x00,0x23,0x7c /* mfvsrd r3,vs1 */ +#else +#define MFVSRD_R3_V1 .byte 0x7c,0x23,0x00,0x66 /* mfvsrd r3,vs1 */ +#endif + +/* int [r3] __finite ([fp1] x) */ + +EALIGN (__finite, 4, 0) + CALL_MCOUNT 0 + MFVSRD_R3_V1 + lis r9,0x8010 + clrldi r3,r3,1 /* r3 = r3 & 0x8000000000000000 */ + rldicr r9,r9,32,31 /* r9 = (r9 << 32) & 0xffffffff */ + add r3,r3,r9 + rldicl r3,r3,1,63 + blr +END (__finite) + +hidden_def (__finite) +weak_alias (__finite, finite) + +/* It turns out that the 'double' version will also always work for + single-precision. */ +strong_alias (__finite, __finitef) +hidden_def (__finitef) +weak_alias (__finitef, finitef) + +#ifdef IS_IN_libm +# if LONG_DOUBLE_COMPAT (libm, GLIBC_2_0) +compat_symbol (libm, __finite, __finitel, GLIBC_2_0) +compat_symbol (libm, finite, finitel, GLIBC_2_0) +# endif +#else +# if LONG_DOUBLE_COMPAT (libc, GLIBC_2_0) +compat_symbol (libc, __finite, __finitel, GLIBC_2_0); +compat_symbol (libc, finite, finitel, GLIBC_2_0); +# endif +#endif diff --git a/sysdeps/powerpc/powerpc64/power8/fpu/s_finitef.S b/sysdeps/powerpc/powerpc64/power8/fpu/s_finitef.S new file mode 100644 index 0000000..54bd941 --- /dev/null +++ b/sysdeps/powerpc/powerpc64/power8/fpu/s_finitef.S @@ -0,0 +1 @@ +/* This function uses the same code as s_finite.S. */ diff --git a/sysdeps/powerpc/powerpc64/power8/fpu/s_isinf.S b/sysdeps/powerpc/powerpc64/power8/fpu/s_isinf.S new file mode 100644 index 0000000..d09b7fc --- /dev/null +++ b/sysdeps/powerpc/powerpc64/power8/fpu/s_isinf.S @@ -0,0 +1,66 @@ +/* isinf(). PowerPC64/POWER8 version. + Copyright (C) 2014 Free Software Foundation, Inc. + This file is part of the GNU C Library. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library; if not, see + <http://www.gnu.org/licenses/>. */ + +#include <sysdep.h> +#include <endian.h> +#include <math_ldbl_opt.h> + +#if __BYTE_ORDER == __LITTLE_ENDIAN +#define MFVSRD_R3_V1 .byte 0x66,0x00,0x23,0x7c /* mfvsrd r3,vs1 */ +#else +#define MFVSRD_R3_V1 .byte 0x7c,0x23,0x00,0x66 /* mfvsrd r3,vs1 */ +#endif + +/* int [r3] __isinf([fp1] x) */ + +EALIGN (__isinf, 4, 0) + CALL_MCOUNT 0 + MFVSRD_R3_V1 + lis r9,0x7ff0 /* r9 = 0x7ff0 */ + rldicl r10,r3,0,1 /* r10 = r3 & (0x8000000000000000) */ + sldi r9,r9,32 /* r9 = r9 << 52 */ + cmpd cr7,r10,r9 /* fp1 & 0x7ff0000000000000 ? */ + beq cr7,L(inf) + li r3,0 /* Not inf */ + blr +L(inf): + sradi r3,r3,63 /* r3 = r3 >> 63 */ + ori r3,r3,1 /* r3 = r3 | 0x1 */ + blr +END (__isinf) + +hidden_def (__isinf) +weak_alias (__isinf, isinf) + +/* It turns out that the 'double' version will also always work for + single-precision. */ +strong_alias (__isinf, __isinff) +hidden_def (__isinff) +weak_alias (__isinff, isinff) + +#ifdef NO_LONG_DOUBLE +strong_alias (__isinf, __isinfl) +weak_alias (__isinf, isinfl) +#endif + +#ifndef IS_IN_libm +# if LONG_DOUBLE_COMPAT(libc, GLIBC_2_0) +compat_symbol (libc, __isinf, __isinfl, GLIBC_2_0); +compat_symbol (libc, isinf, isinfl, GLIBC_2_0); +# endif +#endif diff --git a/sysdeps/powerpc/powerpc64/power8/fpu/s_isinff.S b/sysdeps/powerpc/powerpc64/power8/fpu/s_isinff.S new file mode 100644 index 0000000..be759e0 --- /dev/null +++ b/sysdeps/powerpc/powerpc64/power8/fpu/s_isinff.S @@ -0,0 +1 @@ +/* This function uses the same code as s_isinf.S. */ diff --git a/sysdeps/powerpc/powerpc64/power8/fpu/s_isnan.S b/sysdeps/powerpc/powerpc64/power8/fpu/s_isnan.S new file mode 100644 index 0000000..cf119e5 --- /dev/null +++ b/sysdeps/powerpc/powerpc64/power8/fpu/s_isnan.S @@ -0,0 +1,61 @@ +/* isnan(). PowerPC64/POWER8 version. + Copyright (C) 2014 Free Software Foundation, Inc. + This file is part of the GNU C Library. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library; if not, see + <http://www.gnu.org/licenses/>. */ + +#include <sysdep.h> +#include <endian.h> +#include <math_ldbl_opt.h> + +#if __BYTE_ORDER == __LITTLE_ENDIAN +#define MFVSRD_R3_V1 .byte 0x66,0x00,0x23,0x7c /* mfvsrd r3,vs1 */ +#else +#define MFVSRD_R3_V1 .byte 0x7c,0x23,0x00,0x66 /* mfvsrd r3,vs1 */ +#endif + +/* int [r3] __isnan([f1] x) */ + +EALIGN (__isnan, 4, 0) + CALL_MCOUNT 0 + MFVSRD_R3_V1 + lis r9,0x7ff0 + clrldi r3,r3,1 /* r3 = r3 & 0x8000000000000000 */ + rldicr r9,r9,32,31 /* r9 = (r9 << 32) & 0xffffffff */ + subf r3,r3,r9 + rldicl r3,r3,1,63 + blr +END (__isnan) + +hidden_def (__isnan) +weak_alias (__isnan, isnan) + +/* It turns out that the 'double' version will also always work for + single-precision. */ +strong_alias (__isnan, __isnanf) +hidden_def (__isnanf) +weak_alias (__isnanf, isnanf) + +#ifdef NO_LONG_DOUBLE +strong_alias (__isnan, __isnanl) +weak_alias (__isnan, isnanl) +#endif + +#ifndef IS_IN_libm +# if LONG_DOUBLE_COMPAT(libc, GLIBC_2_0) +compat_symbol (libc, __isnan, __isnanl, GLIBC_2_0); +compat_symbol (libc, isnan, isnanl, GLIBC_2_0); +# endif +#endif diff --git a/sysdeps/powerpc/powerpc64/power8/fpu/s_isnanf.S b/sysdeps/powerpc/powerpc64/power8/fpu/s_isnanf.S new file mode 100644 index 0000000..b48c85e --- /dev/null +++ b/sysdeps/powerpc/powerpc64/power8/fpu/s_isnanf.S @@ -0,0 +1 @@ +/* This function uses the same code as s_isnan.S. */ diff --git a/sysdeps/powerpc/powerpc64/power8/fpu/s_llrint.S b/sysdeps/powerpc/powerpc64/power8/fpu/s_llrint.S new file mode 100644 index 0000000..9a55d938 --- /dev/null +++ b/sysdeps/powerpc/powerpc64/power8/fpu/s_llrint.S @@ -0,0 +1,50 @@ +/* Round double to long int. POWER8 PowerPC64 version. + Copyright (C) 2014 Free Software Foundation, Inc. + This file is part of the GNU C Library. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library; if not, see + <http://www.gnu.org/licenses/>. */ + +#include <sysdep.h> +#include <endian.h> +#include <math_ldbl_opt.h> + +#if __BYTE_ORDER == __LITTLE_ENDIAN +#define MFVSRD_R3_V1 .byte 0x66,0x00,0x23,0x7c /* mfvsrd r3,vs1 */ +#else +#define MFVSRD_R3_V1 .byte 0x7c,0x23,0x00,0x66 /* mfvsrd r3,vs1 */ +#endif + +/* long long int[r3] __llrint (double x[fp1]) */ +ENTRY (__llrint) + CALL_MCOUNT 0 + fctid fp1,fp1 + MFVSRD_R3_V1 + blr +END (__llrint) + +strong_alias (__llrint, __lrint) +weak_alias (__llrint, llrint) +weak_alias (__lrint, lrint) + +#ifdef NO_LONG_DOUBLE +strong_alias (__llrint, __llrintl) +weak_alias (__llrint, llrintl) +strong_alias (__lrint, __lrintl) +weak_alias (__lrint, lrintl) +#endif +#if LONG_DOUBLE_COMPAT(libm, GLIBC_2_1) +compat_symbol (libm, __llrint, llrintl, GLIBC_2_1) +compat_symbol (libm, __lrint, lrintl, GLIBC_2_1) +#endif diff --git a/sysdeps/powerpc/powerpc64/power8/fpu/s_llround.S b/sysdeps/powerpc/powerpc64/power8/fpu/s_llround.S new file mode 100644 index 0000000..f10c06a --- /dev/null +++ b/sysdeps/powerpc/powerpc64/power8/fpu/s_llround.S @@ -0,0 +1,52 @@ +/* llround function. POWER8 PowerPC64 version. + Copyright (C) 2014 Free Software Foundation, Inc. + This file is part of the GNU C Library. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library; if not, see + <http://www.gnu.org/licenses/>. */ + +#include <sysdep.h> +#include <endian.h> +#include <math_ldbl_opt.h> + +#if __BYTE_ORDER == __LITTLE_ENDIAN +#define MFVSRD_R3_V1 .byte 0x66,0x00,0x23,0x7c /* mfvsrd r3,vs1 */ +#else +#define MFVSRD_R3_V1 .byte 0x7c,0x23,0x00,0x66 /* mfvsrd r3,vs1 */ +#endif + +/* long long [r3] llround (float x [fp1]) */ + +ENTRY (__llround) + CALL_MCOUNT 0 + frin fp1,fp1 /* Round to nearest +-0.5. */ + fctidz fp1,fp1 /* Convert To Integer DW round toward 0. */ + MFVSRD_R3_V1 + blr +END (__llround) + +strong_alias (__llround, __lround) +weak_alias (__llround, llround) +weak_alias (__lround, lround) + +#ifdef NO_LONG_DOUBLE +weak_alias (__llround, llroundl) +strong_alias (__llround, __llroundl) +weak_alias (__lround, lroundl) +strong_alias (__lround, __lroundl) +#endif +#if LONG_DOUBLE_COMPAT(libm, GLIBC_2_1) +compat_symbol (libm, __llround, llroundl, GLIBC_2_1) +compat_symbol (libm, __lround, lroundl, GLIBC_2_1) +#endif diff --git a/sysdeps/powerpc/powerpc64/start.S b/sysdeps/powerpc/powerpc64/start.S index 15e29d9..934c558 100644 --- a/sysdeps/powerpc/powerpc64/start.S +++ b/sysdeps/powerpc/powerpc64/start.S @@ -74,7 +74,7 @@ ENTRY(_start) /* put the address of start_addresses in r8... ** ** PPC64 ABI uses R13 for thread local, so we leave it alone */ - ld r8,.L01(r2) + ld r8,.L01@toc(r2) /* and continue in libc-start, in glibc. */ b JUMPTARGET(__libc_start_main) diff --git a/sysdeps/powerpc/strcat.c b/sysdeps/powerpc/strcat.c index 06ceca7..4ff37ea 100644 --- a/sysdeps/powerpc/strcat.c +++ b/sysdeps/powerpc/strcat.c @@ -18,13 +18,16 @@ #include <string.h> -#undef strcat +#ifndef STRCAT +# undef strcat +# define STRCAT strcat +#endif /* Append SRC on the end of DEST. */ char * -strcat (char *dest, const char *src) +STRCAT(char *dest, const char *src) { strcpy (dest + strlen (dest), src); return dest; } -libc_hidden_builtin_def (strcat) +libc_hidden_builtin_def (STRCAT) |