diff options
43 files changed, 167 insertions, 121 deletions
@@ -1,3 +1,49 @@ +2013-01-07 Anton Blanchard <anton@samba.org> + + * sysdeps/powerpc/fpu/feholdexcpt.c: Fixed spelling errors. + * sysdeps/powerpc/fpu/feupdateenv.c: Likewise. + * sysdeps/powerpc/fpu/math_ldbl.h: Likewise. + * sysdeps/powerpc/powerpc32/bits/atomic.h: Likewise. + * sysdeps/powerpc/powerpc32/cell/memcpy.S: Likewise. + * sysdeps/powerpc/powerpc32/dl-machine.c: Likewise. + * sysdeps/powerpc/powerpc32/dl-start.S: Likewise. + * sysdeps/powerpc/powerpc32/memset.S: Likewise. + * sysdeps/powerpc/powerpc32/power4/fpu/mpa.c: Likewise. + * sysdeps/powerpc/powerpc32/power4/fpu/slowpow.c: Likewise. + * sysdeps/powerpc/powerpc32/power4/fpu/w_sqrt.S: Likewise. + * sysdeps/powerpc/powerpc32/power4/fpu/w_sqrtf.S: Likewise. + * sysdeps/powerpc/powerpc32/power4/hp-timing.h: Likewise. + * sysdeps/powerpc/powerpc32/power4/memcmp.S: Likewise. + * sysdeps/powerpc/powerpc32/power4/strncmp.S: Likewise. + * sysdeps/powerpc/powerpc32/power5/fpu/w_sqrt.S: Likewise. + * sysdeps/powerpc/powerpc32/power5/fpu/w_sqrtf.S: Likewise. + * sysdeps/powerpc/powerpc32/power6/memcpy.S: Likewise. + * sysdeps/powerpc/powerpc32/power7/memchr.S: Likewise. + * sysdeps/powerpc/powerpc32/power7/memcmp.S: Likewise. + * sysdeps/powerpc/powerpc32/power7/memrchr.S: Likewise. + * sysdeps/powerpc/powerpc32/power7/strcasecmp.S: Likewise. + * sysdeps/powerpc/powerpc32/power7/strncmp.S: Likewise. + * sysdeps/powerpc/powerpc32/strncmp.S: Likewise. + * sysdeps/powerpc/powerpc64/bits/atomic.h: Likewise. + * sysdeps/powerpc/powerpc64/cell/memcpy.S: Likewise. + * sysdeps/powerpc/powerpc64/dl-machine.h: Likewise. + * sysdeps/powerpc/powerpc64/fpu/s_ceill.S: Likewise. + * sysdeps/powerpc/powerpc64/fpu/s_nearbyintl.S: Likewise. + * sysdeps/powerpc/powerpc64/hp-timing.h: Likewise. + * sysdeps/powerpc/powerpc64/memcpy.S: Likewise. + * sysdeps/powerpc/powerpc64/power4/fpu/mpa.c: Likewise. + * sysdeps/powerpc/powerpc64/power4/fpu/slowpow.c: Likewise. + * sysdeps/powerpc/powerpc64/power4/memcmp.S: Likewise. + * sysdeps/powerpc/powerpc64/power4/memcpy.S: Likewise. + * sysdeps/powerpc/powerpc64/power4/strncmp.S: Likewise. + * sysdeps/powerpc/powerpc64/power6/memcpy.S: Likewise. + * sysdeps/powerpc/powerpc64/power7/memchr.S: Likewise. + * sysdeps/powerpc/powerpc64/power7/memcmp.S: Likewise. + * sysdeps/powerpc/powerpc64/power7/memrchr.S: Likewise. + * sysdeps/powerpc/powerpc64/power7/strcasecmp.S: Likewise. + * sysdeps/powerpc/powerpc64/power7/strncmp.S: Likewise. + * sysdeps/powerpc/powerpc64/strncmp.S: Likewise. + 2013-01-07 Joseph Myers <joseph@codesourcery.com> * malloc/malloc.h (__MALLOC_P): Remove all definitions. diff --git a/sysdeps/powerpc/fpu/feholdexcpt.c b/sysdeps/powerpc/fpu/feholdexcpt.c index c916455..671724b 100644 --- a/sysdeps/powerpc/fpu/feholdexcpt.c +++ b/sysdeps/powerpc/fpu/feholdexcpt.c @@ -33,7 +33,7 @@ feholdexcept (fenv_t *envp) new.l[1] = old.l[1] & 7; new.l[0] = old.l[0]; - /* If the old env had any eabled exceptions, then mask SIGFPE in the + /* If the old env had any enabled exceptions, then mask SIGFPE in the MSR FE0/FE1 bits. This may allow the FPU to run faster because it always takes the default action and can not generate SIGFPE. */ if ((old.l[1] & _FPU_MASK_ALL) != 0) diff --git a/sysdeps/powerpc/fpu/feupdateenv.c b/sysdeps/powerpc/fpu/feupdateenv.c index 30f8a6b..66f2826 100644 --- a/sysdeps/powerpc/fpu/feupdateenv.c +++ b/sysdeps/powerpc/fpu/feupdateenv.c @@ -37,14 +37,14 @@ __feupdateenv (const fenv_t *envp) unchanged. */ new.l[1] = (old.l[1] & 0x1FFFFF00) | (new.l[1] & 0x1FF80FFF); - /* If the old env has no eabled exceptions and the new env has any enabled + /* If the old env has no enabled exceptions and the new env has any enabled exceptions, then unmask SIGFPE in the MSR FE0/FE1 bits. This will put the hardware into "precise mode" and may cause the FPU to run slower on some hardware. */ if ((old.l[1] & _FPU_MASK_ALL) == 0 && (new.l[1] & _FPU_MASK_ALL) != 0) (void)__fe_nomask_env (); - /* If the old env had any eabled exceptions and the new env has no enabled + /* If the old env had any enabled exceptions and the new env has no enabled exceptions, then mask SIGFPE in the MSR FE0/FE1 bits. This may allow the FPU to run faster because it always takes the default action and can not generate SIGFPE. */ diff --git a/sysdeps/powerpc/fpu/math_ldbl.h b/sysdeps/powerpc/fpu/math_ldbl.h index 6cd6d0b..20224e6 100644 --- a/sysdeps/powerpc/fpu/math_ldbl.h +++ b/sysdeps/powerpc/fpu/math_ldbl.h @@ -27,7 +27,7 @@ ldbl_extract_mantissa (int64_t *hi64, u_int64_t *lo64, int *exp, long double x) lo |= (1ULL << 52); lo = lo << 7; /* pre-shift lo to match ieee854. */ /* The lower double is normalized separately from the upper. We - may need to adjust the lower manitissa to reflect this. */ + may need to adjust the lower mantissa to reflect this. */ ediff = eldbl.ieee.exponent - eldbl.ieee.exponent2; if (ediff > 53) lo = lo >> (ediff-53); diff --git a/sysdeps/powerpc/powerpc32/bits/atomic.h b/sysdeps/powerpc/powerpc32/bits/atomic.h index 2f441ed..3e3a1ef 100644 --- a/sysdeps/powerpc/powerpc32/bits/atomic.h +++ b/sysdeps/powerpc/powerpc32/bits/atomic.h @@ -21,7 +21,7 @@ This is a hint to the hardware to expect additional updates adjacent to the lock word or not. If we are acquiring a Mutex, the hint should be true. Otherwise we releasing a Mutex or doing a simple - atomic operation. In that case we don't expect addtional updates + atomic operation. In that case we don't expect additional updates adjacent to the lock word after the Store Conditional and the hint should be false. */ @@ -35,7 +35,7 @@ /* * The 32-bit exchange_bool is different on powerpc64 because the subf - * does signed 64-bit arthmatic while the lwarx is 32-bit unsigned + * does signed 64-bit arithmetic while the lwarx is 32-bit unsigned * (a load word and zero (high 32) form). So powerpc64 has a slightly * different version in sysdeps/powerpc/powerpc64/bits/atomic.h. */ diff --git a/sysdeps/powerpc/powerpc32/cell/memcpy.S b/sysdeps/powerpc/powerpc32/cell/memcpy.S index 5fbdab1..6d7d4ce 100644 --- a/sysdeps/powerpc/powerpc32/cell/memcpy.S +++ b/sysdeps/powerpc/powerpc32/cell/memcpy.S @@ -34,7 +34,7 @@ * latency to memory is >400 clocks * To improve copy performance we need to prefetch source data * far ahead to hide this latency - * For best performance instructionforms ending in "." like "andi." + * For best performance instruction forms ending in "." like "andi." * should be avoided as the are implemented in microcode on CELL. * The below code is loop unrolled for the CELL cache line of 128 bytes */ @@ -146,7 +146,7 @@ EALIGN (BP_SYM (memcpy), 5, 0) lfd fp9, 0x08(r4) dcbz r11,r6 lfd fp10, 0x10(r4) /* 4 register stride copy is optimal */ - lfd fp11, 0x18(r4) /* to hide 1st level cache lantency. */ + lfd fp11, 0x18(r4) /* to hide 1st level cache latency. */ lfd fp12, 0x20(r4) stfd fp9, 0x08(r6) stfd fp10, 0x10(r6) diff --git a/sysdeps/powerpc/powerpc32/dl-machine.c b/sysdeps/powerpc/powerpc32/dl-machine.c index f9f2a5d..bd42fdf 100644 --- a/sysdeps/powerpc/powerpc32/dl-machine.c +++ b/sysdeps/powerpc/powerpc32/dl-machine.c @@ -113,7 +113,7 @@ __elf_preferred_address (struct link_map *loader, size_t maplength, /* Otherwise, quickly look for a suitable gap between 0x3FFFF and 0x70000000. 0x3FFFF is so that references off NULL pointers will cause a segfault, 0x70000000 is just paranoia (it should always - be superceded by the program's load address). */ + be superseded by the program's load address). */ low = 0x0003FFFF; high = 0x70000000; for (nsid = 0; nsid < DL_NNS; ++nsid) diff --git a/sysdeps/powerpc/powerpc32/dl-start.S b/sysdeps/powerpc/powerpc32/dl-start.S index 01484e8..fa9c9bc 100644 --- a/sysdeps/powerpc/powerpc32/dl-start.S +++ b/sysdeps/powerpc/powerpc32/dl-start.S @@ -74,7 +74,7 @@ _dl_start_user: slwi r5,r3,2 add r6,r4,r5 addi r5,r6,4 -/* pass the auxilary vector in r6. This is passed to us just after _envp. */ +/* pass the auxiliary vector in r6. This is passed to us just after _envp. */ 2: lwzu r0,4(r6) cmpwi r0,0 bne 2b diff --git a/sysdeps/powerpc/powerpc32/memset.S b/sysdeps/powerpc/powerpc32/memset.S index 2e86d1c..45c79d8 100644 --- a/sysdeps/powerpc/powerpc32/memset.S +++ b/sysdeps/powerpc/powerpc32/memset.S @@ -275,7 +275,7 @@ L(checklinesize): beq cr1,L(nondcbz) /* If the cache line size is 32 bytes then goto to L(zloopstart), - which is coded specificly for 32-byte lines (and 601). */ + which is coded specifically for 32-byte lines (and 601). */ cmplwi cr1,rCLS,32 beq cr1,L(zloopstart) diff --git a/sysdeps/powerpc/powerpc32/power4/fpu/mpa.c b/sysdeps/powerpc/powerpc32/power4/fpu/mpa.c index f167969..b6f8341 100644 --- a/sysdeps/powerpc/powerpc32/power4/fpu/mpa.c +++ b/sysdeps/powerpc/powerpc32/power4/fpu/mpa.c @@ -409,9 +409,9 @@ void __mul(const mp_no *x, const mp_no *y, mp_no *z, int p) { if (k > p2) {i1=k-p2; i2=p2+1; } else {i1=1; i2=k; } #if 1 - /* rearange this inner loop to allow the fmadd instructions to be + /* rearrange this inner loop to allow the fmadd instructions to be independent and execute in parallel on processors that have - dual symetrical FP pipelines. */ + dual symmetrical FP pipelines. */ if (i1 < (i2-1)) { /* make sure we have at least 2 iterations */ @@ -437,7 +437,7 @@ void __mul(const mp_no *x, const mp_no *y, mp_no *z, int p) { zk += x->d[i1]*y->d[i1]; } #else - /* The orginal code. */ + /* The original code. */ for (i=i1,j=i2-1; i<i2; i++,j--) zk += X[i]*Y[j]; #endif diff --git a/sysdeps/powerpc/powerpc32/power4/fpu/slowpow.c b/sysdeps/powerpc/powerpc32/power4/fpu/slowpow.c index 098e19a..7c97d95 100644 --- a/sysdeps/powerpc/powerpc32/power4/fpu/slowpow.c +++ b/sysdeps/powerpc/powerpc32/power4/fpu/slowpow.c @@ -59,7 +59,7 @@ __slowpow (double x, double y, double z) res1 = (double) (ldpp - ldeps); if (res != res1) /* if result still not accurate enough */ - { /* use mpa for higher persision. */ + { /* use mpa for higher precision. */ mp_no mpx, mpy, mpz, mpw, mpp, mpr, mpr1; static const mp_no eps = { -3, {1.0, 4.0} }; int p; diff --git a/sysdeps/powerpc/powerpc32/power4/fpu/w_sqrt.S b/sysdeps/powerpc/powerpc32/power4/fpu/w_sqrt.S index cb55816..4f1c176 100644 --- a/sysdeps/powerpc/powerpc32/power4/fpu/w_sqrt.S +++ b/sysdeps/powerpc/powerpc32/power4/fpu/w_sqrt.S @@ -22,7 +22,7 @@ /* double [fp1] sqrt (double x [fp1]) Power4 (ISA V2.0) and above implement sqrt in hardware (not optional). The fsqrt instruction generates the correct value for all inputs and - sets the appropriate floating point exceptions. Extented checking is + sets the appropriate floating point exceptions. Extended checking is only needed to set errno (via __kernel_standard) if the input value is negative. diff --git a/sysdeps/powerpc/powerpc32/power4/fpu/w_sqrtf.S b/sysdeps/powerpc/powerpc32/power4/fpu/w_sqrtf.S index a13a846..0da5b7a 100644 --- a/sysdeps/powerpc/powerpc32/power4/fpu/w_sqrtf.S +++ b/sysdeps/powerpc/powerpc32/power4/fpu/w_sqrtf.S @@ -22,7 +22,7 @@ /* float [fp1] sqrts (float x [fp1]) Power4 (ISA V2.0) and above implement sqrt in hardware (not optional). The fsqrts instruction generates the correct value for all inputs and - sets the appropriate floating point exceptions. Extented checking is + sets the appropriate floating point exceptions. Extended checking is only needed to set errno (via __kernel_standard) if the input value is negative. diff --git a/sysdeps/powerpc/powerpc32/power4/hp-timing.h b/sysdeps/powerpc/powerpc32/power4/hp-timing.h index 4742d76..7d6c96e 100644 --- a/sysdeps/powerpc/powerpc32/power4/hp-timing.h +++ b/sysdeps/powerpc/powerpc32/power4/hp-timing.h @@ -82,7 +82,7 @@ typedef unsigned long long int hp_timing_t; /* That's quite simple. Use the `mftb' instruction. Note that the value might not be 100% accurate since there might be some more instructions running in this moment. This could be changed by using a barrier like - 'lwsync' right before the `mftb' instruciton. But we are not interested + 'lwsync' right before the `mftb' instruction. But we are not interested in accurate clock cycles here so we don't do this. */ #define HP_TIMING_NOW(Var) \ diff --git a/sysdeps/powerpc/powerpc32/power4/memcmp.S b/sysdeps/powerpc/powerpc32/power4/memcmp.S index 65a0d80..bbee6f4 100644 --- a/sysdeps/powerpc/powerpc32/power4/memcmp.S +++ b/sysdeps/powerpc/powerpc32/power4/memcmp.S @@ -69,7 +69,7 @@ EALIGN (BP_SYM(memcmp), 4, 0) Otherwise we know the two strings have the same alignment (but not yet word aligned). So we force the string addresses to the next lower word boundary and special case this first word using shift left to - eliminate bits preceeding the first byte. Since we want to join the + eliminate bits preceding the first byte. Since we want to join the normal (word aligned) compare loop, starting at the second word, we need to adjust the length (rN) and special case the loop versioning for the first word. This insures that the loop count is @@ -517,7 +517,7 @@ L(zeroLength): Otherwise we know that rSTR1 is not aready word aligned yet. So we can force the string addresses to the next lower word boundary and special case this first word using shift left to - eliminate bits preceeding the first byte. Since we want to join the + eliminate bits preceding the first byte. Since we want to join the normal (Wualigned) compare loop, starting at the second word, we need to adjust the length (rN) and special case the loop versioning for the first W. This insures that the loop count is diff --git a/sysdeps/powerpc/powerpc32/power4/strncmp.S b/sysdeps/powerpc/powerpc32/power4/strncmp.S index ba12632..50d79dc 100644 --- a/sysdeps/powerpc/powerpc32/power4/strncmp.S +++ b/sysdeps/powerpc/powerpc32/power4/strncmp.S @@ -51,7 +51,7 @@ EALIGN (BP_SYM(strncmp), 4, 0) cmplwi cr1, rN, 0 lis rFEFE, -0x101 bne L(unaligned) -/* We are word alligned so set up for two loops. first a word +/* We are word aligned so set up for two loops. first a word loop, then fall into the byte loop if any residual. */ srwi. rTMP, rN, 2 clrlwi rN, rN, 30 diff --git a/sysdeps/powerpc/powerpc32/power5/fpu/w_sqrt.S b/sysdeps/powerpc/powerpc32/power5/fpu/w_sqrt.S index aab4e56..23559aa 100644 --- a/sysdeps/powerpc/powerpc32/power5/fpu/w_sqrt.S +++ b/sysdeps/powerpc/powerpc32/power5/fpu/w_sqrt.S @@ -22,7 +22,7 @@ /* double [fp1] sqrt (double x [fp1]) Power4 (ISA V2.0) and above implement sqrt in hardware (not optional). The fsqrt instruction generates the correct value for all inputs and - sets the appropriate floating point exceptions. Extented checking is + sets the appropriate floating point exceptions. Extended checking is only needed to set errno (via __kernel_standard) if the input value is negative. diff --git a/sysdeps/powerpc/powerpc32/power5/fpu/w_sqrtf.S b/sysdeps/powerpc/powerpc32/power5/fpu/w_sqrtf.S index 6d80ad9..590c24c 100644 --- a/sysdeps/powerpc/powerpc32/power5/fpu/w_sqrtf.S +++ b/sysdeps/powerpc/powerpc32/power5/fpu/w_sqrtf.S @@ -22,7 +22,7 @@ /* float [fp1] sqrts (float x [fp1]) Power4 (ISA V2.0) and above implement sqrt in hardware (not optional). The fsqrts instruction generates the correct value for all inputs and - sets the appropriate floating point exceptions. Extented checking is + sets the appropriate floating point exceptions. Extended checking is only needed to set errno (via __kernel_standard) if the input value is negative. diff --git a/sysdeps/powerpc/powerpc32/power6/memcpy.S b/sysdeps/powerpc/powerpc32/power6/memcpy.S index c1dd74d..203c979 100644 --- a/sysdeps/powerpc/powerpc32/power6/memcpy.S +++ b/sysdeps/powerpc/powerpc32/power6/memcpy.S @@ -411,31 +411,31 @@ L(wdu): not. For power4, power5 and power6 machines there is penalty for unaligned loads (src) that cross 32-byte, cacheline, or page boundaries. So we want to use simple (unaligned) loads where - posible but avoid them where we know the load would span a 32-byte + possible but avoid them where we know the load would span a 32-byte boundary. At this point we know we have at least 29 (32-3) bytes to copy the src is unaligned. and we may cross at least one 32-byte - boundary. Also we have the following regester values: + boundary. Also we have the following register values: r3 == adjusted dst, word aligned r4 == unadjusted src r5 == unadjusted len r9 == adjusted Word length r10 == src alignment (1-3) - r12 == adjuested src, not aligned + r12 == adjusted src, not aligned r31 == adjusted len - First we need to copy word upto but not crossing the next 32-byte + First we need to copy word up to but not crossing the next 32-byte boundary. Then perform aligned loads just before and just after - the boundary and use shifts and or to gernerate the next aligned + the boundary and use shifts and or to generate the next aligned word for dst. If more then 32 bytes remain we copy (unaligned src) the next 7 words and repeat the loop until less then 32-bytes - remaim. + remain. Then if more then 4 bytes remain we again use aligned loads, shifts and or to generate the next dst word. We then process the remaining words using unaligned loads as needed. Finally we check - if there more then 0 bytes (1-3) bytes remainting and use + if there more then 0 bytes (1-3) bytes remaining and use halfword and or byte load/stores to complete the copy. */ mr 4,12 /* restore unaligned adjusted src ptr */ @@ -512,7 +512,7 @@ L(wdu_h32_4): addi 3,3,4 .align 4 L(wdu_h32_0): -/* set up for 32-byte boundry crossing word move and possibly 32-byte +/* set up for 32-byte boundary crossing word move and possibly 32-byte move loop. */ clrrwi 12,4,2 cmplwi cr5,31,32 diff --git a/sysdeps/powerpc/powerpc32/power7/memchr.S b/sysdeps/powerpc/powerpc32/power7/memchr.S index 1412061..3d8389e 100644 --- a/sysdeps/powerpc/powerpc32/power7/memchr.S +++ b/sysdeps/powerpc/powerpc32/power7/memchr.S @@ -44,7 +44,7 @@ L(proceed): rlwinm r6,r3,3,27,28 /* Calculate padding. */ cmpli cr6,r6,0 /* cr6 == Do we have padding? */ lwz r12,0(r8) /* Load word from memory. */ - cmpb r10,r12,r4 /* Check for BYTE's in WORD1. */ + cmpb r10,r12,r4 /* Check for BYTEs in WORD1. */ beq cr6,L(proceed_no_padding) slw r10,r10,r6 srw r10,r10,r6 diff --git a/sysdeps/powerpc/powerpc32/power7/memcmp.S b/sysdeps/powerpc/powerpc32/power7/memcmp.S index f2cb1df..815e3c3 100644 --- a/sysdeps/powerpc/powerpc32/power7/memcmp.S +++ b/sysdeps/powerpc/powerpc32/power7/memcmp.S @@ -73,7 +73,7 @@ EALIGN (BP_SYM(memcmp),4,0) Otherwise we know the two strings have the same alignment (but not yet word aligned). So we force the string addresses to the next lower word boundary and special case this first word using shift left to - eliminate bits preceeding the first byte. Since we want to join the + eliminate bits preceding the first byte. Since we want to join the normal (word aligned) compare loop, starting at the second word, we need to adjust the length (rN) and special case the loop versioning for the first word. This insures that the loop count is @@ -520,7 +520,7 @@ L(zeroLength): Otherwise we know that rSTR1 is not aready word aligned yet. So we can force the string addresses to the next lower word boundary and special case this first word using shift left to - eliminate bits preceeding the first byte. Since we want to join the + eliminate bits preceding the first byte. Since we want to join the normal (Wualigned) compare loop, starting at the second word, we need to adjust the length (rN) and special case the loop versioning for the first W. This insures that the loop count is diff --git a/sysdeps/powerpc/powerpc32/power7/memrchr.S b/sysdeps/powerpc/powerpc32/power7/memrchr.S index a6f4955..9ff8d66 100644 --- a/sysdeps/powerpc/powerpc32/power7/memrchr.S +++ b/sysdeps/powerpc/powerpc32/power7/memrchr.S @@ -51,7 +51,7 @@ L(proceed): cmpb r10,r12,r4 /* Check for BYTE in WORD1. */ slw r10,r10,r0 srw r10,r10,r0 - cmplwi cr7,r10,0 /* If r10 == 0, no BYTE's have been found. */ + cmplwi cr7,r10,0 /* If r10 == 0, no BYTEs have been found. */ bne cr7,L(done) /* Are we done already? */ diff --git a/sysdeps/powerpc/powerpc32/power7/strcasecmp.S b/sysdeps/powerpc/powerpc32/power7/strcasecmp.S index 2fcca03..52d73d9 100644 --- a/sysdeps/powerpc/powerpc32/power7/strcasecmp.S +++ b/sysdeps/powerpc/powerpc32/power7/strcasecmp.S @@ -39,8 +39,8 @@ ENTRY (BP_SYM (__STRCMP)) #define rSTR1 r5 /* 1st string */ #define rSTR2 r4 /* 2nd string */ #define rLOCARG r5 /* 3rd argument: locale_t */ -#define rCHAR1 r6 /* Byte readed from 1st string */ -#define rCHAR2 r7 /* Byte readed from 2nd string */ +#define rCHAR1 r6 /* Byte read from 1st string */ +#define rCHAR2 r7 /* Byte read from 2nd string */ #define rADDR1 r8 /* Address of tolower(rCHAR1) */ #define rADDR2 r12 /* Address of tolower(rCHAR2) */ #define rLWR1 r8 /* Byte tolower(rCHAR1) */ diff --git a/sysdeps/powerpc/powerpc32/power7/strncmp.S b/sysdeps/powerpc/powerpc32/power7/strncmp.S index 2016519..3629783 100644 --- a/sysdeps/powerpc/powerpc32/power7/strncmp.S +++ b/sysdeps/powerpc/powerpc32/power7/strncmp.S @@ -55,7 +55,7 @@ EALIGN (BP_SYM(strncmp),5,0) cmplwi cr1,rN,0 lis rFEFE,-0x101 bne L(unaligned) -/* We are word alligned so set up for two loops. first a word +/* We are word aligned so set up for two loops. first a word loop, then fall into the byte loop if any residual. */ srwi. rTMP,rN,2 clrlwi rN,rN,30 diff --git a/sysdeps/powerpc/powerpc32/strncmp.S b/sysdeps/powerpc/powerpc32/strncmp.S index 149e51a..d9e274b 100644 --- a/sysdeps/powerpc/powerpc32/strncmp.S +++ b/sysdeps/powerpc/powerpc32/strncmp.S @@ -49,7 +49,7 @@ EALIGN (BP_SYM(strncmp), 4, 0) cmplwi cr1, rN, 0 lis rFEFE, -0x101 bne L(unaligned) -/* We are word alligned so set up for two loops. first a word +/* We are word aligned so set up for two loops. first a word loop, then fall into the byte loop if any residual. */ srwi. rTMP, rN, 2 clrlwi rN, rN, 30 diff --git a/sysdeps/powerpc/powerpc64/bits/atomic.h b/sysdeps/powerpc/powerpc64/bits/atomic.h index f1e42a9..84a1447 100644 --- a/sysdeps/powerpc/powerpc64/bits/atomic.h +++ b/sysdeps/powerpc/powerpc64/bits/atomic.h @@ -21,7 +21,7 @@ This is a hint to the hardware to expect additional updates adjacent to the lock word or not. If we are acquiring a Mutex, the hint should be true. Otherwise we releasing a Mutex or doing a simple - atomic operation. In that case we don't expect addtional updates + atomic operation. In that case we don't expect additional updates adjacent to the lock word after the Store Conditional and the hint should be false. */ @@ -34,7 +34,7 @@ #endif /* The 32-bit exchange_bool is different on powerpc64 because the subf - does signed 64-bit arthmatic while the lwarx is 32-bit unsigned + does signed 64-bit arithmetic while the lwarx is 32-bit unsigned (a load word and zero (high 32) form) load. In powerpc64 register values are 64-bit by default, including oldval. The value in old val unknown sign extension, lwarx loads the 32-bit diff --git a/sysdeps/powerpc/powerpc64/cell/memcpy.S b/sysdeps/powerpc/powerpc64/cell/memcpy.S index 3ec7630..5ba4ebf 100644 --- a/sysdeps/powerpc/powerpc64/cell/memcpy.S +++ b/sysdeps/powerpc/powerpc64/cell/memcpy.S @@ -34,7 +34,7 @@ * latency to memory is >400 clocks * To improve copy performance we need to prefetch source data * far ahead to hide this latency - * For best performance instructionforms ending in "." like "andi." + * For best performance instruction forms ending in "." like "andi." * should be avoided as the are implemented in microcode on CELL. * The below code is loop unrolled for the CELL cache line of 128 bytes */ @@ -146,7 +146,7 @@ EALIGN (BP_SYM (memcpy), 5, 0) ld r9, 0x08(r4) dcbz r11,r6 ld r7, 0x10(r4) /* 4 register stride copy is optimal */ - ld r8, 0x18(r4) /* to hide 1st level cache lantency. */ + ld r8, 0x18(r4) /* to hide 1st level cache latency. */ ld r0, 0x20(r4) std r9, 0x08(r6) std r7, 0x10(r6) diff --git a/sysdeps/powerpc/powerpc64/dl-machine.h b/sysdeps/powerpc/powerpc64/dl-machine.h index 7b3e4dd..14ade21 100644 --- a/sysdeps/powerpc/powerpc64/dl-machine.h +++ b/sysdeps/powerpc/powerpc64/dl-machine.h @@ -202,7 +202,7 @@ BODY_PREFIX "_dl_start_user:\n" \ " sldi 5,3,3\n" \ " add 6,4,5\n" \ " addi 5,6,8\n" \ -/* Pass the auxilary vector in r6. This is passed to us just after \ +/* Pass the auxiliary vector in r6. This is passed to us just after \ _envp. */ \ "2: ldu 0,8(6)\n" \ " cmpdi 0,0\n" \ @@ -322,13 +322,13 @@ elf_machine_runtime_setup (struct link_map *map, int lazy, int profile) /* Relocate the DT_PPC64_GLINK entry in the _DYNAMIC section. elf_get_dynamic_info takes care of the standard entries but doesn't know exactly what to do with processor specific - entires. */ + entries. */ if (info[DT_PPC64(GLINK)] != NULL) info[DT_PPC64(GLINK)]->d_un.d_ptr += l_addr; if (lazy) { - /* The function descriptor of the appropriate trampline + /* The function descriptor of the appropriate trampoline routine is used to set the 1st and 2nd doubleword of the plt_reserve. */ Elf64_FuncDesc *resolve_fd; diff --git a/sysdeps/powerpc/powerpc64/fpu/s_ceill.S b/sysdeps/powerpc/powerpc64/fpu/s_ceill.S index 24fd521..bffac39 100644 --- a/sysdeps/powerpc/powerpc64/fpu/s_ceill.S +++ b/sysdeps/powerpc/powerpc64/fpu/s_ceill.S @@ -31,7 +31,7 @@ PowerPC64 long double uses the IBM extended format which is represented two 64-floating point double values. The values are non-overlapping giving an effective precision of 106 bits. The first - double contains the high order bits of mantisa and is always ceiled + double contains the high order bits of mantissa and is always ceiled to represent a normal ceiling of long double to double. Since the long double value is sum of the high and low values, the low double normally has the opposite sign to compensate for the this ceiling. @@ -40,7 +40,7 @@ 1) |x| < 2**52, all the integer bits are in the high double. ceil the high double and set the low double to -0.0. 2) |x| >= 2**52, ceiling involves both doubles. - See the comment before lable .L2 for details. + See the comment before label .L2 for details. */ ENTRY (__ceill) diff --git a/sysdeps/powerpc/powerpc64/fpu/s_nearbyintl.S b/sysdeps/powerpc/powerpc64/fpu/s_nearbyintl.S index 9cf03cc..b235d9b 100644 --- a/sysdeps/powerpc/powerpc64/fpu/s_nearbyintl.S +++ b/sysdeps/powerpc/powerpc64/fpu/s_nearbyintl.S @@ -26,16 +26,16 @@ .section ".text" /* long double [fp1,fp2] nearbyintl (long double x [fp1,fp2]) - IEEE 1003.1 nearbyintl function. nearbyintl is simular to the rintl + IEEE 1003.1 nearbyintl function. nearbyintl is similar to the rintl but does raise the "inexact" exception. This implementation is - based on rintl but explicitly maskes the inexact exception on entry + based on rintl but explicitly masks the inexact exception on entry and clears any pending inexact before restoring the exception mask on exit. PowerPC64 long double uses the IBM extended format which is represented two 64-floating point double values. The values are non-overlapping giving an effective precision of 106 bits. The first - double contains the high order bits of mantisa and is always rounded + double contains the high order bits of mantissa and is always rounded to represent a normal rounding of long double to double. Since the long double value is sum of the high and low values, the low double normally has the opposite sign to compensate for the this rounding. @@ -44,7 +44,7 @@ 1) |x| < 2**52, all the integer bits are in the high double. floor the high double and set the low double to -0.0. 2) |x| >= 2**52, Rounding involves both doubles. - See the comment before lable .L2 for details. + See the comment before label .L2 for details. */ ENTRY (__nearbyintl) mffs fp11 /* Save current FPSCR. */ diff --git a/sysdeps/powerpc/powerpc64/hp-timing.h b/sysdeps/powerpc/powerpc64/hp-timing.h index 12053a4..e73ad5a 100644 --- a/sysdeps/powerpc/powerpc64/hp-timing.h +++ b/sysdeps/powerpc/powerpc64/hp-timing.h @@ -82,7 +82,7 @@ typedef unsigned long long int hp_timing_t; /* That's quite simple. Use the `mftb' instruction. Note that the value might not be 100% accurate since there might be some more instructions running in this moment. This could be changed by using a barrier like - 'lwsync' right before the `mftb' instruciton. But we are not interested + 'lwsync' right before the `mftb' instruction. But we are not interested in accurate clock cycles here so we don't do this. */ #ifdef _ARCH_PWR4 #define HP_TIMING_NOW(Var) __asm__ __volatile__ ("mfspr %0,268" : "=r" (Var)) diff --git a/sysdeps/powerpc/powerpc64/memcpy.S b/sysdeps/powerpc/powerpc64/memcpy.S index 82a40f3..7c1b656 100644 --- a/sysdeps/powerpc/powerpc64/memcpy.S +++ b/sysdeps/powerpc/powerpc64/memcpy.S @@ -28,11 +28,11 @@ with the appropriate combination of byte and halfword load/stores. There is minimal effort to optimize the alignment of short moves. The 64-bit implementations of POWER3 and POWER4 do a reasonable job - of handling unligned load/stores that do not cross 32-byte boundries. + of handling unaligned load/stores that do not cross 32-byte boundaries. Longer moves (>= 32-bytes) justify the effort to get at least the destination doubleword (8-byte) aligned. Further optimization is - posible when both source and destination are doubleword aligned. + possible when both source and destination are doubleword aligned. Each case has a optimized unrolled loop. */ EALIGN (BP_SYM (memcpy), 5, 0) @@ -43,9 +43,9 @@ EALIGN (BP_SYM (memcpy), 5, 0) std 3,-16(1) std 31,-8(1) cfi_offset(31,-8) - andi. 11,3,7 /* check alignement of dst. */ + andi. 11,3,7 /* check alignment of dst. */ clrldi 0,0,61 /* Number of bytes until the 1st doubleword of dst. */ - clrldi 10,4,61 /* check alignement of src. */ + clrldi 10,4,61 /* check alignment of src. */ cmpldi cr6,5,8 ble- cr1,.L2 /* If move < 32 bytes use short move code. */ cmpld cr6,10,11 @@ -56,7 +56,7 @@ EALIGN (BP_SYM (memcpy), 5, 0) beq .L0 subf 31,0,5 - /* Move 0-7 bytes as needed to get the destination doubleword alligned. */ + /* Move 0-7 bytes as needed to get the destination doubleword aligned. */ 1: bf 31,2f lbz 6,0(12) addi 12,12,1 @@ -73,10 +73,10 @@ EALIGN (BP_SYM (memcpy), 5, 0) stw 6,0(3) addi 3,3,4 0: - clrldi 10,12,61 /* check alignement of src again. */ + clrldi 10,12,61 /* check alignment of src again. */ srdi 9,31,3 /* Number of full double words remaining. */ - /* Copy doublewords from source to destination, assumpting the + /* Copy doublewords from source to destination, assuming the destination is aligned on a doubleword boundary. At this point we know there are at least 25 bytes left (32-7) to copy. @@ -152,7 +152,7 @@ EALIGN (BP_SYM (memcpy), 5, 0) add 12,12,0 /* At this point we have a tail of 0-7 bytes and we know that the - destiniation is double word aligned. */ + destination is double word aligned. */ 4: bf 29,2f lwz 6,0(12) addi 12,12,4 @@ -282,7 +282,7 @@ EALIGN (BP_SYM (memcpy), 5, 0) bne cr6,4f /* Would have liked to use use ld/std here but the 630 processors are slow for load/store doubles that are not at least word aligned. - Unaligned Load/Store word execute with only a 1 cycle penaltity. */ + Unaligned Load/Store word execute with only a 1 cycle penalty. */ lwz 6,0(4) lwz 7,4(4) stw 6,0(3) diff --git a/sysdeps/powerpc/powerpc64/power4/fpu/mpa.c b/sysdeps/powerpc/powerpc64/power4/fpu/mpa.c index f167969..b6f8341 100644 --- a/sysdeps/powerpc/powerpc64/power4/fpu/mpa.c +++ b/sysdeps/powerpc/powerpc64/power4/fpu/mpa.c @@ -409,9 +409,9 @@ void __mul(const mp_no *x, const mp_no *y, mp_no *z, int p) { if (k > p2) {i1=k-p2; i2=p2+1; } else {i1=1; i2=k; } #if 1 - /* rearange this inner loop to allow the fmadd instructions to be + /* rearrange this inner loop to allow the fmadd instructions to be independent and execute in parallel on processors that have - dual symetrical FP pipelines. */ + dual symmetrical FP pipelines. */ if (i1 < (i2-1)) { /* make sure we have at least 2 iterations */ @@ -437,7 +437,7 @@ void __mul(const mp_no *x, const mp_no *y, mp_no *z, int p) { zk += x->d[i1]*y->d[i1]; } #else - /* The orginal code. */ + /* The original code. */ for (i=i1,j=i2-1; i<i2; i++,j--) zk += X[i]*Y[j]; #endif diff --git a/sysdeps/powerpc/powerpc64/power4/fpu/slowpow.c b/sysdeps/powerpc/powerpc64/power4/fpu/slowpow.c index 098e19a..7c97d95 100644 --- a/sysdeps/powerpc/powerpc64/power4/fpu/slowpow.c +++ b/sysdeps/powerpc/powerpc64/power4/fpu/slowpow.c @@ -59,7 +59,7 @@ __slowpow (double x, double y, double z) res1 = (double) (ldpp - ldeps); if (res != res1) /* if result still not accurate enough */ - { /* use mpa for higher persision. */ + { /* use mpa for higher precision. */ mp_no mpx, mpy, mpz, mpw, mpp, mpr, mpr1; static const mp_no eps = { -3, {1.0, 4.0} }; int p; diff --git a/sysdeps/powerpc/powerpc64/power4/memcmp.S b/sysdeps/powerpc/powerpc64/power4/memcmp.S index 6592281..7df52f8 100644 --- a/sysdeps/powerpc/powerpc64/power4/memcmp.S +++ b/sysdeps/powerpc/powerpc64/power4/memcmp.S @@ -53,7 +53,7 @@ EALIGN (BP_SYM(memcmp), 4, 0) beq- cr6, L(zeroLength) dcbt 0,rSTR1 dcbt 0,rSTR2 -/* If less than 8 bytes or not aligned, use the unalligned +/* If less than 8 bytes or not aligned, use the unaligned byte loop. */ blt cr1, L(bytealigned) std rWORD8,-8(r1) @@ -62,7 +62,7 @@ EALIGN (BP_SYM(memcmp), 4, 0) cfi_offset(rWORD7,-16) bne L(unaligned) /* At this point we know both strings have the same alignment and the - compare length is at least 8 bytes. rBITDIF containes the low order + compare length is at least 8 bytes. rBITDIF contains the low order 3 bits of rSTR1 and cr5 contains the result of the logical compare of rBITDIF to 0. If rBITDIF == 0 then we are already double word aligned and can perform the DWaligned loop. @@ -70,7 +70,7 @@ EALIGN (BP_SYM(memcmp), 4, 0) Otherwise we know the two strings have the same alignment (but not yet DW). So we can force the string addresses to the next lower DW boundary and special case this first DW word using shift left to - ellimiate bits preceeding the first byte. Since we want to join the + eliminate bits preceding the first byte. Since we want to join the normal (DWaligned) compare loop, starting at the second double word, we need to adjust the length (rN) and special case the loop versioning for the first DW. This insures that the loop count is @@ -152,8 +152,8 @@ L(DWaligned): L(dP1): mtctr rTMP /* Power4 wants mtctr 1st in dispatch group */ /* Normally we'd use rWORD7/rWORD8 here, but since we might exit early - (8-15 byte compare), we want to use only volitile registers. This - means we can avoid restoring non-volitile registers since we did not + (8-15 byte compare), we want to use only volatile registers. This + means we can avoid restoring non-volatile registers since we did not change any on the early exit path. The key here is the non-early exit path only cares about the condition code (cr5), not about which register pair was used. */ @@ -215,7 +215,7 @@ L(dP2e): bne cr5, L(dLcr5) b L(dLoop2) /* Again we are on a early exit path (16-23 byte compare), we want to - only use volitile registers and avoid restoring non-volitile + only use volatile registers and avoid restoring non-volatile registers. */ .align 4 L(dP2x): @@ -256,7 +256,7 @@ L(dP3e): bne cr6, L(dLcr6) b L(dLoop1) /* Again we are on a early exit path (24-31 byte compare), we want to - only use volitile registers and avoid restoring non-volitile + only use volatile registers and avoid restoring non-volatile registers. */ .align 4 L(dP3x): @@ -340,7 +340,7 @@ L(d04): beq L(zeroLength) /* At this point we have a remainder of 1 to 7 bytes to compare. Since we are aligned it is safe to load the whole double word, and use - shift right double to elliminate bits beyond the compare length. */ + shift right double to eliminate bits beyond the compare length. */ L(d00): ld rWORD1, 8(rSTR1) ld rWORD2, 8(rSTR2) @@ -496,15 +496,15 @@ L(zeroLength): .align 4 /* At this point we know the strings have different alignment and the - compare length is at least 8 bytes. rBITDIF containes the low order + compare length is at least 8 bytes. rBITDIF contains the low order 3 bits of rSTR1 and cr5 contains the result of the logical compare of rBITDIF to 0. If rBITDIF == 0 then rStr1 is double word aligned and can perform the DWunaligned loop. - Otherwise we know that rSTR1 is not aready DW aligned yet. + Otherwise we know that rSTR1 is not already DW aligned yet. So we can force the string addresses to the next lower DW boundary and special case this first DW word using shift left to - ellimiate bits preceeding the first byte. Since we want to join the + eliminate bits preceding the first byte. Since we want to join the normal (DWaligned) compare loop, starting at the second double word, we need to adjust the length (rN) and special case the loop versioning for the first DW. This insures that the loop count is @@ -537,7 +537,7 @@ L(unaligned): clrrdi rSTR2, rSTR2, 3 std r26,-48(r1) cfi_offset(r26,-48) -/* Compute the leaft/right shift counts for the unalign rSTR2, +/* Compute the left/right shift counts for the unalign rSTR2, compensating for the logical (DW aligned) start of rSTR1. */ clrldi rSHL, r27, 61 clrrdi rSTR1, rSTR1, 3 @@ -876,7 +876,7 @@ L(du14): sldi. rN, rN, 3 bne cr5, L(duLcr5) /* At this point we have a remainder of 1 to 7 bytes to compare. We use - shift right double to elliminate bits beyond the compare length. + shift right double to eliminate bits beyond the compare length. This allows the use of double word subtract to compute the final result. diff --git a/sysdeps/powerpc/powerpc64/power4/memcpy.S b/sysdeps/powerpc/powerpc64/power4/memcpy.S index 9d0b478..734434a 100644 --- a/sysdeps/powerpc/powerpc64/power4/memcpy.S +++ b/sysdeps/powerpc/powerpc64/power4/memcpy.S @@ -28,11 +28,11 @@ with the appropriate combination of byte and halfword load/stores. There is minimal effort to optimize the alignment of short moves. The 64-bit implementations of POWER3 and POWER4 do a reasonable job - of handling unligned load/stores that do not cross 32-byte boundries. + of handling unaligned load/stores that do not cross 32-byte boundaries. Longer moves (>= 32-bytes) justify the effort to get at least the destination doubleword (8-byte) aligned. Further optimization is - posible when both source and destination are doubleword aligned. + possible when both source and destination are doubleword aligned. Each case has a optimized unrolled loop. */ .machine power4 @@ -44,9 +44,9 @@ EALIGN (BP_SYM (memcpy), 5, 0) std 3,-16(1) std 31,-8(1) cfi_offset(31,-8) - andi. 11,3,7 /* check alignement of dst. */ + andi. 11,3,7 /* check alignment of dst. */ clrldi 0,0,61 /* Number of bytes until the 1st doubleword of dst. */ - clrldi 10,4,61 /* check alignement of src. */ + clrldi 10,4,61 /* check alignment of src. */ cmpldi cr6,5,8 ble- cr1,.L2 /* If move < 32 bytes use short move code. */ cmpld cr6,10,11 @@ -57,7 +57,7 @@ EALIGN (BP_SYM (memcpy), 5, 0) beq .L0 subf 31,0,5 - /* Move 0-7 bytes as needed to get the destination doubleword alligned. */ + /* Move 0-7 bytes as needed to get the destination doubleword aligned. */ 1: bf 31,2f lbz 6,0(12) addi 12,12,1 @@ -74,10 +74,10 @@ EALIGN (BP_SYM (memcpy), 5, 0) stw 6,0(3) addi 3,3,4 0: - clrldi 10,12,61 /* check alignement of src again. */ + clrldi 10,12,61 /* check alignment of src again. */ srdi 9,31,3 /* Number of full double words remaining. */ - /* Copy doublewords from source to destination, assumpting the + /* Copy doublewords from source to destination, assuming the destination is aligned on a doubleword boundary. At this point we know there are at least 25 bytes left (32-7) to copy. @@ -154,7 +154,7 @@ EALIGN (BP_SYM (memcpy), 5, 0) add 12,12,0 /* At this point we have a tail of 0-7 bytes and we know that the - destiniation is double word aligned. */ + destination is double word aligned. */ 4: bf 29,2f lwz 6,0(12) addi 12,12,4 @@ -284,7 +284,7 @@ EALIGN (BP_SYM (memcpy), 5, 0) bne cr6,4f /* Would have liked to use use ld/std here but the 630 processors are slow for load/store doubles that are not at least word aligned. - Unaligned Load/Store word execute with only a 1 cycle penaltity. */ + Unaligned Load/Store word execute with only a 1 cycle penalty. */ lwz 6,0(4) lwz 7,4(4) stw 6,0(3) diff --git a/sysdeps/powerpc/powerpc64/power4/strncmp.S b/sysdeps/powerpc/powerpc64/power4/strncmp.S index 0940571..19877fa 100644 --- a/sysdeps/powerpc/powerpc64/power4/strncmp.S +++ b/sysdeps/powerpc/powerpc64/power4/strncmp.S @@ -52,7 +52,7 @@ EALIGN (BP_SYM(strncmp), 4, 0) cmpldi cr1, rN, 0 lis rFEFE, -0x101 bne L(unaligned) -/* We are doubleword alligned so set up for two loops. first a double word +/* We are doubleword aligned so set up for two loops. first a double word loop, then fall into the byte loop if any residual. */ srdi. rTMP, rN, 3 clrldi rN, rN, 61 diff --git a/sysdeps/powerpc/powerpc64/power6/memcpy.S b/sysdeps/powerpc/powerpc64/power6/memcpy.S index 57f4d06..64f5b2f 100644 --- a/sysdeps/powerpc/powerpc64/power6/memcpy.S +++ b/sysdeps/powerpc/powerpc64/power6/memcpy.S @@ -28,16 +28,16 @@ with the appropriate combination of byte and halfword load/stores. There is minimal effort to optimize the alignment of short moves. The 64-bit implementations of POWER3 and POWER4 do a reasonable job - of handling unligned load/stores that do not cross 32-byte boundries. + of handling unaligned load/stores that do not cross 32-byte boundaries. Longer moves (>= 32-bytes) justify the effort to get at least the destination doubleword (8-byte) aligned. Further optimization is - posible when both source and destination are doubleword aligned. + possible when both source and destination are doubleword aligned. Each case has a optimized unrolled loop. - For POWER6 unaligned loads will take a 20+ cycle hicup for any + For POWER6 unaligned loads will take a 20+ cycle hiccup for any L1 cache miss that crosses a 32- or 128-byte boundary. Store - is more forgiving and does not take a hicup until page or + is more forgiving and does not take a hiccup until page or segment boundaries. So we require doubleword alignment for the source but may take a risk and only require word alignment for the destination. */ @@ -50,9 +50,9 @@ EALIGN (BP_SYM (memcpy), 7, 0) neg 0,3 std 3,-16(1) std 31,-8(1) - andi. 11,3,7 /* check alignement of dst. */ + andi. 11,3,7 /* check alignment of dst. */ clrldi 0,0,61 /* Number of bytes until the 1st doubleword of dst. */ - clrldi 10,4,61 /* check alignement of src. */ + clrldi 10,4,61 /* check alignment of src. */ cmpldi cr6,5,8 ble- cr1,.L2 /* If move < 32 bytes use short move code. */ mtcrf 0x01,0 @@ -61,8 +61,8 @@ EALIGN (BP_SYM (memcpy), 7, 0) beq .L0 subf 5,0,5 - /* Move 0-7 bytes as needed to get the destination doubleword alligned. - Duplicate some code to maximize fall-throught and minimize agen delays. */ + /* Move 0-7 bytes as needed to get the destination doubleword aligned. + Duplicate some code to maximize fall-through and minimize agen delays. */ 1: bf 31,2f lbz 6,0(4) stb 6,0(3) @@ -95,10 +95,10 @@ EALIGN (BP_SYM (memcpy), 7, 0) add 4,4,0 add 3,3,0 - clrldi 10,4,61 /* check alignement of src again. */ + clrldi 10,4,61 /* check alignment of src again. */ srdi 9,5,3 /* Number of full double words remaining. */ - /* Copy doublewords from source to destination, assumpting the + /* Copy doublewords from source to destination, assuming the destination is aligned on a doubleword boundary. At this point we know there are at least 25 bytes left (32-7) to copy. @@ -130,7 +130,7 @@ EALIGN (BP_SYM (memcpy), 7, 0) load, load, store, store every 2 cycles. The following code is sensitive to cache line alignment. Do not - make any change with out first making sure thay don't result in + make any change with out first making sure they don't result in splitting ld/std pairs across a cache line. */ mtcrf 0x02,5 @@ -329,7 +329,7 @@ L(das_tail): L(das_tail2): /* At this point we have a tail of 0-7 bytes and we know that the - destiniation is double word aligned. */ + destination is double word aligned. */ 4: bf 29,2f lwz 6,0(4) stw 6,0(3) @@ -537,7 +537,7 @@ L(dus_tailX): .LE8: mr 12,4 bne cr6,L(dus_4) -/* Exactly 8 bytes. We may cross a 32-/128-byte boundry and take a ~20 +/* Exactly 8 bytes. We may cross a 32-/128-byte boundary and take a ~20 cycle delay. This case should be rare and any attempt to avoid this would take most of 20 cycles any way. */ ld 6,0(4) @@ -1146,7 +1146,7 @@ L(du_done): add 3,3,0 add 12,12,0 /* At this point we have a tail of 0-7 bytes and we know that the - destiniation is double word aligned. */ + destination is double word aligned. */ 4: bf 29,2f lwz 6,0(12) addi 12,12,4 diff --git a/sysdeps/powerpc/powerpc64/power7/memcmp.S b/sysdeps/powerpc/powerpc64/power7/memcmp.S index 80687f8..a7caa48 100644 --- a/sysdeps/powerpc/powerpc64/power7/memcmp.S +++ b/sysdeps/powerpc/powerpc64/power7/memcmp.S @@ -55,7 +55,7 @@ EALIGN (BP_SYM(memcmp),4,0) beq- cr6,L(zeroLength) dcbt 0,rSTR1 dcbt 0,rSTR2 -/* If less than 8 bytes or not aligned, use the unalligned +/* If less than 8 bytes or not aligned, use the unaligned byte loop. */ blt cr1,L(bytealigned) std rWORD8,-8(r1) @@ -64,7 +64,7 @@ EALIGN (BP_SYM(memcmp),4,0) cfi_offset(rWORD7,-16) bne L(unaligned) /* At this point we know both strings have the same alignment and the - compare length is at least 8 bytes. rBITDIF containes the low order + compare length is at least 8 bytes. rBITDIF contains the low order 3 bits of rSTR1 and cr5 contains the result of the logical compare of rBITDIF to 0. If rBITDIF == 0 then we are already double word aligned and can perform the DWaligned loop. @@ -72,7 +72,7 @@ EALIGN (BP_SYM(memcmp),4,0) Otherwise we know the two strings have the same alignment (but not yet DW). So we can force the string addresses to the next lower DW boundary and special case this first DW word using shift left to - ellimiate bits preceeding the first byte. Since we want to join the + eliminate bits preceding the first byte. Since we want to join the normal (DWaligned) compare loop, starting at the second double word, we need to adjust the length (rN) and special case the loop versioning for the first DW. This insures that the loop count is @@ -154,8 +154,8 @@ L(DWaligned): L(dP1): mtctr rTMP /* Normally we'd use rWORD7/rWORD8 here, but since we might exit early - (8-15 byte compare), we want to use only volitile registers. This - means we can avoid restoring non-volitile registers since we did not + (8-15 byte compare), we want to use only volatile registers. This + means we can avoid restoring non-volatile registers since we did not change any on the early exit path. The key here is the non-early exit path only cares about the condition code (cr5), not about which register pair was used. */ @@ -217,7 +217,7 @@ L(dP2e): bne cr5,L(dLcr5) b L(dLoop2) /* Again we are on a early exit path (16-23 byte compare), we want to - only use volitile registers and avoid restoring non-volitile + only use volatile registers and avoid restoring non-volatile registers. */ .align 4 L(dP2x): @@ -258,7 +258,7 @@ L(dP3e): bne cr6,L(dLcr6) b L(dLoop1) /* Again we are on a early exit path (24-31 byte compare), we want to - only use volitile registers and avoid restoring non-volitile + only use volatile registers and avoid restoring non-volatile registers. */ .align 4 L(dP3x): @@ -342,7 +342,7 @@ L(d04): beq L(zeroLength) /* At this point we have a remainder of 1 to 7 bytes to compare. Since we are aligned it is safe to load the whole double word, and use - shift right double to elliminate bits beyond the compare length. */ + shift right double to eliminate bits beyond the compare length. */ L(d00): ld rWORD1,8(rSTR1) ld rWORD2,8(rSTR2) @@ -498,15 +498,15 @@ L(zeroLength): .align 4 /* At this point we know the strings have different alignment and the - compare length is at least 8 bytes. rBITDIF containes the low order + compare length is at least 8 bytes. rBITDIF contains the low order 3 bits of rSTR1 and cr5 contains the result of the logical compare of rBITDIF to 0. If rBITDIF == 0 then rStr1 is double word aligned and can perform the DWunaligned loop. - Otherwise we know that rSTR1 is not aready DW aligned yet. + Otherwise we know that rSTR1 is not already DW aligned yet. So we can force the string addresses to the next lower DW boundary and special case this first DW word using shift left to - ellimiate bits preceeding the first byte. Since we want to join the + eliminate bits preceding the first byte. Since we want to join the normal (DWaligned) compare loop, starting at the second double word, we need to adjust the length (rN) and special case the loop versioning for the first DW. This insures that the loop count is @@ -539,7 +539,7 @@ L(unaligned): clrrdi rSTR2,rSTR2,3 std r26,-48(r1) cfi_offset(r26,-48) -/* Compute the leaft/right shift counts for the unalign rSTR2, +/* Compute the left/right shift counts for the unaligned rSTR2, compensating for the logical (DW aligned) start of rSTR1. */ clrldi rSHL,r27,61 clrrdi rSTR1,rSTR1,3 @@ -878,7 +878,7 @@ L(du14): sldi. rN,rN,3 bne cr5,L(duLcr5) /* At this point we have a remainder of 1 to 7 bytes to compare. We use - shift right double to elliminate bits beyond the compare length. + shift right double to eliminate bits beyond the compare length. This allows the use of double word subtract to compute the final result. diff --git a/sysdeps/powerpc/powerpc64/power7/memrchr.S b/sysdeps/powerpc/powerpc64/power7/memrchr.S index 624d74f..d3ffe4c 100644 --- a/sysdeps/powerpc/powerpc64/power7/memrchr.S +++ b/sysdeps/powerpc/powerpc64/power7/memrchr.S @@ -52,7 +52,7 @@ L(proceed): cmpb r10,r12,r4 /* Check for BYTE in DWORD1. */ sld r10,r10,r0 srd r10,r10,r0 - cmpldi cr7,r10,0 /* If r10 == 0, no BYTE's have been found. */ + cmpldi cr7,r10,0 /* If r10 == 0, no BYTEs have been found. */ bne cr7,L(done) /* Are we done already? */ diff --git a/sysdeps/powerpc/powerpc64/power7/strcasecmp.S b/sysdeps/powerpc/powerpc64/power7/strcasecmp.S index da24588..6323154 100644 --- a/sysdeps/powerpc/powerpc64/power7/strcasecmp.S +++ b/sysdeps/powerpc/powerpc64/power7/strcasecmp.S @@ -40,8 +40,8 @@ ENTRY (BP_SYM (__STRCMP)) #define rSTR1 r5 /* 1st string */ #define rSTR2 r4 /* 2nd string */ #define rLOCARG r5 /* 3rd argument: locale_t */ -#define rCHAR1 r6 /* Byte readed from 1st string */ -#define rCHAR2 r7 /* Byte readed from 2nd string */ +#define rCHAR1 r6 /* Byte read from 1st string */ +#define rCHAR2 r7 /* Byte read from 2nd string */ #define rADDR1 r8 /* Address of tolower(rCHAR1) */ #define rADDR2 r12 /* Address of tolower(rCHAR2) */ #define rLWR1 r8 /* Word tolower(rCHAR1) */ diff --git a/sysdeps/powerpc/powerpc64/power7/strncmp.S b/sysdeps/powerpc/powerpc64/power7/strncmp.S index 7aaad4f..25a6baf 100644 --- a/sysdeps/powerpc/powerpc64/power7/strncmp.S +++ b/sysdeps/powerpc/powerpc64/power7/strncmp.S @@ -56,7 +56,7 @@ EALIGN (BP_SYM(strncmp),5,0) cmpldi cr1,rN,0 lis rFEFE,-0x101 bne L(unaligned) -/* We are doubleword alligned so set up for two loops. first a double word +/* We are doubleword aligned so set up for two loops. first a double word loop, then fall into the byte loop if any residual. */ srdi. rTMP,rN,3 clrldi rN,rN,61 diff --git a/sysdeps/powerpc/powerpc64/strncmp.S b/sysdeps/powerpc/powerpc64/strncmp.S index 4c1938e..89a3246 100644 --- a/sysdeps/powerpc/powerpc64/strncmp.S +++ b/sysdeps/powerpc/powerpc64/strncmp.S @@ -50,7 +50,7 @@ EALIGN (BP_SYM(strncmp), 4, 0) cmpldi cr1, rN, 0 lis rFEFE, -0x101 bne L(unaligned) -/* We are doubleword alligned so set up for two loops. first a double word +/* We are doubleword aligned so set up for two loops. first a double word loop, then fall into the byte loop if any residual. */ srdi. rTMP, rN, 3 clrldi rN, rN, 61 |