aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorGreg McGary <greg@mcgary.org>2000-06-06 22:37:40 +0000
committerGreg McGary <greg@mcgary.org>2000-06-06 22:37:40 +0000
commit1d280d9f1e4d4c889d8123663a81b49256cf6fd6 (patch)
treed75c35fc1e267377ac0957f4b521ae6bdfbab680
parent019357d23488c773cdef1dd077cc8915b6012d52 (diff)
downloadglibc-1d280d9f1e4d4c889d8123663a81b49256cf6fd6.zip
glibc-1d280d9f1e4d4c889d8123663a81b49256cf6fd6.tar.gz
glibc-1d280d9f1e4d4c889d8123663a81b49256cf6fd6.tar.bz2
* sysdeps/powerpc/memset.S: Define & use symbolic register names.
Use C comments throughout. Line up operands column with tabs. * sysdeps/powerpc/strchr.S: Likewise. * sysdeps/powerpc/strcmp.S: Likewise. * sysdeps/powerpc/strcpy.S: Likewise. * sysdeps/powerpc/strlen.S: Likewise. * sysdeps/powerpc/memset.S: Define & use symbolic register names. Use C comments throughout. Line up operands column with tabs. * sysdeps/powerpc/strchr.S: Likewise. * sysdeps/powerpc/strcmp.S: Likewise. * sysdeps/powerpc/strcpy.S: Likewise. * sysdeps/powerpc/strlen.S: Likewise.
-rw-r--r--ChangeLog7
-rw-r--r--sysdeps/powerpc/memset.S273
-rw-r--r--sysdeps/powerpc/strchr.S138
-rw-r--r--sysdeps/powerpc/strcmp.S132
-rw-r--r--sysdeps/powerpc/strcpy.S110
-rw-r--r--sysdeps/powerpc/strlen.S126
6 files changed, 408 insertions, 378 deletions
diff --git a/ChangeLog b/ChangeLog
index 874913e..6a2ad80 100644
--- a/ChangeLog
+++ b/ChangeLog
@@ -1,5 +1,12 @@
2000-06-06 Greg McGary <greg@mcgary.org>
+ * sysdeps/powerpc/memset.S: Define & use symbolic register names.
+ Use C comments throughout. Line up operands column with tabs.
+ * sysdeps/powerpc/strchr.S: Likewise.
+ * sysdeps/powerpc/strcmp.S: Likewise.
+ * sysdeps/powerpc/strcpy.S: Likewise.
+ * sysdeps/powerpc/strlen.S: Likewise.
+
* sysdeps/unix/sysv/linux/powerpc/brk.S [!PIC]:
Get low part of &__curbrk with @l.
diff --git a/sysdeps/powerpc/memset.S b/sysdeps/powerpc/memset.S
index 1b95bc7..c48c0af 100644
--- a/sysdeps/powerpc/memset.S
+++ b/sysdeps/powerpc/memset.S
@@ -19,181 +19,192 @@
#include <sysdep.h>
-EALIGN(memset,5,1)
/* __ptr_t [r3] memset (__ptr_t s [r3], int c [r4], size_t n [r5]));
Returns 's'.
The memset is done in three sizes: byte (8 bits), word (32 bits),
cache line (256 bits). There is a special case for setting cache lines
- to 0, to take advantage of the dcbz instruction.
- r6: current address we are storing at
- r7: number of bytes we are setting now (when aligning) */
+ to 0, to take advantage of the dcbz instruction. */
+
+EALIGN (memset, 5, 1)
+
+#define rTMP r0
+#define rRTN r3 /* initial value of 1st argument */
+#define rCHR r4 /* char to set in each byte */
+#define rLEN r5 /* length of region to set */
+#define rMEMP r6 /* address at which we are storing */
+#define rALIGN r7 /* number of bytes we are setting now (when aligning) */
+#define rMEMP2 r8
+
+#define rPOS32 r7 /* constant +32 for clearing with dcbz */
+#define rNEG64 r8 /* constant -64 for clearing with dcbz */
+#define rNEG32 r9 /* constant -32 for clearing with dcbz */
/* take care of case for size <= 4 */
- cmplwi cr1,r5,4
- andi. r7,r3,3
- mr r6,r3
- ble- cr1,L(small)
+ cmplwi cr1, rLEN, 4
+ andi. rALIGN, rRTN, 3
+ mr rMEMP, rRTN
+ ble- cr1, L(small)
/* align to word boundary */
- cmplwi cr5,r5,31
- rlwimi r4,r4,8,16,23
- beq+ L(aligned) # 8th instruction from .align
- mtcrf 0x01,r3
- subfic r7,r7,4
- add r6,r6,r7
- sub r5,r5,r7
- bf+ 31,L(g0)
- stb r4,0(r3)
- bt 30,L(aligned)
-L(g0): sth r4,-2(r6) # 16th instruction from .align
+ cmplwi cr5, rLEN, 31
+ rlwimi rCHR, rCHR, 8, 16, 23
+ beq+ L(aligned) /* 8th instruction from .align */
+ mtcrf 0x01, rRTN
+ subfic rALIGN, rALIGN, 4
+ add rMEMP, rMEMP, rALIGN
+ sub rLEN, rLEN, rALIGN
+ bf+ 31, L(g0)
+ stb rCHR, 0(rRTN)
+ bt 30, L(aligned)
+L(g0): sth rCHR, -2(rMEMP) /* 16th instruction from .align */
/* take care of case for size < 31 */
L(aligned):
- mtcrf 0x01,r5
- rlwimi r4,r4,16,0,15
- ble cr5,L(medium)
+ mtcrf 0x01, rLEN
+ rlwimi rCHR, rCHR, 16, 0, 15
+ ble cr5, L(medium)
/* align to cache line boundary... */
- andi. r7,r6,0x1C
- subfic r7,r7,0x20
- beq L(caligned)
- mtcrf 0x01,r7
- add r6,r6,r7
- sub r5,r5,r7
- cmplwi cr1,r7,0x10
- mr r8,r6
- bf 28,L(a1)
- stw r4,-4(r8)
- stwu r4,-8(r8)
-L(a1): blt cr1,L(a2)
- stw r4,-4(r8) # 32nd instruction from .align
- stw r4,-8(r8)
- stw r4,-12(r8)
- stwu r4,-16(r8)
-L(a2): bf 29,L(caligned)
- stw r4,-4(r8)
+ andi. rALIGN, rMEMP, 0x1C
+ subfic rALIGN, rALIGN, 0x20
+ beq L(caligned)
+ mtcrf 0x01, rALIGN
+ add rMEMP, rMEMP, rALIGN
+ sub rLEN, rLEN, rALIGN
+ cmplwi cr1, rALIGN, 0x10
+ mr rMEMP2, rMEMP
+ bf 28, L(a1)
+ stw rCHR, -4(rMEMP2)
+ stwu rCHR, -8(rMEMP2)
+L(a1): blt cr1, L(a2)
+ stw rCHR, -4(rMEMP2) /* 32nd instruction from .align */
+ stw rCHR, -8(rMEMP2)
+ stw rCHR, -12(rMEMP2)
+ stwu rCHR, -16(rMEMP2)
+L(a2): bf 29, L(caligned)
+ stw rCHR, -4(rMEMP2)
/* now aligned to a cache line. */
L(caligned):
- cmplwi cr1,r4,0
- clrrwi. r7,r5,5
- mtcrf 0x01,r5 # 40th instruction from .align
- beq cr1,L(zloopstart) # special case for clearing memory using dcbz
- srwi r0,r7,5
- mtctr r0
- beq L(medium) # we may not actually get to do a full line
- clrlwi. r5,r5,27
- add r6,r6,r7
- li r8,-0x40
- bdz L(cloopdone) # 48th instruction from .align
+ cmplwi cr1, rCHR, 0
+ clrrwi. rALIGN, rLEN, 5
+ mtcrf 0x01, rLEN /* 40th instruction from .align */
+ beq cr1, L(zloopstart) /* special case for clearing memory using dcbz */
+ srwi rTMP, rALIGN, 5
+ mtctr rTMP
+ beq L(medium) /* we may not actually get to do a full line */
+ clrlwi. rLEN, rLEN, 27
+ add rMEMP, rMEMP, rALIGN
+ li rNEG64, -0x40
+ bdz L(cloopdone) /* 48th instruction from .align */
-L(c3): dcbz r8,r6
- stw r4,-4(r6)
- stw r4,-8(r6)
- stw r4,-12(r6)
- stw r4,-16(r6)
- nop # let 601 fetch last 4 instructions of loop
- stw r4,-20(r6)
- stw r4,-24(r6) # 56th instruction from .align
- nop # let 601 fetch first 8 instructions of loop
- stw r4,-28(r6)
- stwu r4,-32(r6)
- bdnz L(c3)
+L(c3): dcbz rNEG64, rMEMP
+ stw rCHR, -4(rMEMP)
+ stw rCHR, -8(rMEMP)
+ stw rCHR, -12(rMEMP)
+ stw rCHR, -16(rMEMP)
+ nop /* let 601 fetch last 4 instructions of loop */
+ stw rCHR, -20(rMEMP)
+ stw rCHR, -24(rMEMP) /* 56th instruction from .align */
+ nop /* let 601 fetch first 8 instructions of loop */
+ stw rCHR, -28(rMEMP)
+ stwu rCHR, -32(rMEMP)
+ bdnz L(c3)
L(cloopdone):
- stw r4,-4(r6)
- stw r4,-8(r6)
- stw r4,-12(r6)
- stw r4,-16(r6) # 64th instruction from .align
- stw r4,-20(r6)
- cmplwi cr1,r5,16
- stw r4,-24(r6)
- stw r4,-28(r6)
- stwu r4,-32(r6)
+ stw rCHR, -4(rMEMP)
+ stw rCHR, -8(rMEMP)
+ stw rCHR, -12(rMEMP)
+ stw rCHR, -16(rMEMP) /* 64th instruction from .align */
+ stw rCHR, -20(rMEMP)
+ cmplwi cr1, rLEN, 16
+ stw rCHR, -24(rMEMP)
+ stw rCHR, -28(rMEMP)
+ stwu rCHR, -32(rMEMP)
beqlr
- add r6,r6,r7
- b L(medium_tail2) # 72nd instruction from .align
+ add rMEMP, rMEMP, rALIGN
+ b L(medium_tail2) /* 72nd instruction from .align */
.align 5
nop
/* Clear lines of memory in 128-byte chunks. */
L(zloopstart):
- clrlwi r5,r5,27
- mtcrf 0x02,r7
- srwi. r0,r7,7
- mtctr r0
- li r7,0x20
- li r8,-0x40
- cmplwi cr1,r5,16 # 8
- bf 26,L(z0)
- dcbz 0,r6
- addi r6,r6,0x20
-L(z0): li r9,-0x20
- bf 25,L(z1)
- dcbz 0,r6
- dcbz r7,r6
- addi r6,r6,0x40 # 16
-L(z1): cmplwi cr5,r5,0
- beq L(medium)
+ clrlwi rLEN, rLEN, 27
+ mtcrf 0x02, rALIGN
+ srwi. rTMP, rALIGN, 7
+ mtctr rTMP
+ li rPOS32, 0x20
+ li rNEG64, -0x40
+ cmplwi cr1, rLEN, 16 /* 8 */
+ bf 26, L(z0)
+ dcbz 0, rMEMP
+ addi rMEMP, rMEMP, 0x20
+L(z0): li rNEG32, -0x20
+ bf 25, L(z1)
+ dcbz 0, rMEMP
+ dcbz rPOS32, rMEMP
+ addi rMEMP, rMEMP, 0x40 /* 16 */
+L(z1): cmplwi cr5, rLEN, 0
+ beq L(medium)
L(zloop):
- dcbz 0,r6
- dcbz r7,r6
- addi r6,r6,0x80
- dcbz r8,r6
- dcbz r9,r6
- bdnz L(zloop)
- beqlr cr5
- b L(medium_tail2)
+ dcbz 0, rMEMP
+ dcbz rPOS32, rMEMP
+ addi rMEMP, rMEMP, 0x80
+ dcbz rNEG64, rMEMP
+ dcbz rNEG32, rMEMP
+ bdnz L(zloop)
+ beqlr cr5
+ b L(medium_tail2)
.align 5
L(small):
/* Memset of 4 bytes or less. */
- cmplwi cr5,r5,1
- cmplwi cr1,r5,3
- bltlr cr5
- stb r4,0(r6)
- beqlr cr5
+ cmplwi cr5, rLEN, 1
+ cmplwi cr1, rLEN, 3
+ bltlr cr5
+ stb rCHR, 0(rMEMP)
+ beqlr cr5
nop
- stb r4,1(r6)
- bltlr cr1
- stb r4,2(r6)
- beqlr cr1
+ stb rCHR, 1(rMEMP)
+ bltlr cr1
+ stb rCHR, 2(rMEMP)
+ beqlr cr1
nop
- stb r4,3(r6)
+ stb rCHR, 3(rMEMP)
blr
/* Memset of 0-31 bytes. */
.align 5
L(medium):
- cmplwi cr1,r5,16
+ cmplwi cr1, rLEN, 16
L(medium_tail2):
- add r6,r6,r5
+ add rMEMP, rMEMP, rLEN
L(medium_tail):
- bt- 31,L(medium_31t)
- bt- 30,L(medium_30t)
+ bt- 31, L(medium_31t)
+ bt- 30, L(medium_30t)
L(medium_30f):
- bt- 29,L(medium_29t)
+ bt- 29, L(medium_29t)
L(medium_29f):
- bge- cr1,L(medium_27t)
- bflr- 28
- stw r4,-4(r6) # 8th instruction from .align
- stw r4,-8(r6)
+ bge- cr1, L(medium_27t)
+ bflr- 28
+ stw rCHR, -4(rMEMP) /* 8th instruction from .align */
+ stw rCHR, -8(rMEMP)
blr
L(medium_31t):
- stbu r4,-1(r6)
- bf- 30,L(medium_30f)
+ stbu rCHR, -1(rMEMP)
+ bf- 30, L(medium_30f)
L(medium_30t):
- sthu r4,-2(r6)
- bf- 29,L(medium_29f)
+ sthu rCHR, -2(rMEMP)
+ bf- 29, L(medium_29f)
L(medium_29t):
- stwu r4,-4(r6)
- blt- cr1,L(medium_27f) # 16th instruction from .align
+ stwu rCHR, -4(rMEMP)
+ blt- cr1, L(medium_27f) /* 16th instruction from .align */
L(medium_27t):
- stw r4,-4(r6)
- stw r4,-8(r6)
- stw r4,-12(r6)
- stwu r4,-16(r6)
+ stw rCHR, -4(rMEMP)
+ stw rCHR, -8(rMEMP)
+ stw rCHR, -12(rMEMP)
+ stwu rCHR, -16(rMEMP)
L(medium_27f):
- bflr- 28
+ bflr- 28
L(medium_28t):
- stw r4,-4(r6)
- stw r4,-8(r6)
+ stw rCHR, -4(rMEMP)
+ stw rCHR, -8(rMEMP)
blr
END(memset)
diff --git a/sysdeps/powerpc/strchr.S b/sysdeps/powerpc/strchr.S
index 5367a5a..4662447 100644
--- a/sysdeps/powerpc/strchr.S
+++ b/sysdeps/powerpc/strchr.S
@@ -1,5 +1,5 @@
/* Optimized strchr implementation for PowerPC.
- Copyright (C) 1997, 1999 Free Software Foundation, Inc.
+ Copyright (C) 1997, 1999, 2000 Free Software Foundation, Inc.
This file is part of the GNU C Library.
The GNU C Library is free software; you can redistribute it and/or
@@ -21,91 +21,95 @@
/* See strlen.s for comments on how this works. */
-/* char * [r3] strchr (const char *s [r3] , int c [r4] )
+/* char * [r3] strchr (const char *s [r3] , int c [r4] ) */
- r0: a temporary
- r3: our return result.
- r4: byte we're looking for, spread over the whole word
- r5: the current word
- r6: the constant 0xfefefeff (-0x01010101)
- r7: the constant 0x7f7f7f7f
- r8: pointer to the current word.
- r9: a temporary
- r10: the number of bits we should ignore in the first word
- r11: a mask with the bits to ignore set to 0
- r12: a temporary */
-ENTRY(strchr)
- rlwimi r4,r4,8,16,23
- li r11,-1
- rlwimi r4,r4,16,0,15
- lis r6,0xfeff
- lis r7,0x7f7f
- clrrwi r8,r3,2
- addi r7,r7,0x7f7f
- addi r6,r6,0xfffffeff
- rlwinm r10,r3,3,27,28
+ENTRY (strchr)
+
+#define rTMP1 r0
+#define rRTN r3 /* outgoing result */
+#define rSTRin r3 /* incoming string arg */
+#define rCHR r4 /* byte we're looking for, spread over the whole word */
+#define rCLZB rCHR /* leading zero byte count */
+#define rWORD r5 /* the current word */
+#define rFEFE r6 /* constant 0xfefefeff (-0x01010101) */
+#define r7F7F r7 /* constant 0x7f7f7f7f */
+#define rSTR r8 /* current word pointer */
+#define rTMP2 r9
+#define rIGN r10 /* number of bits we should ignore in the first word */
+#define rMASK r11 /* mask with the bits to ignore set to 0 */
+#define rTMP3 r12
+
+ rlwimi rCHR, rCHR, 8, 16, 23
+ li rMASK, -1
+ rlwimi rCHR, rCHR, 16, 0, 15
+ lis rFEFE, -0x101
+ lis r7F7F, 0x7f7f
+ clrrwi rSTR, rSTRin, 2
+ addi r7F7F, r7F7F, 0x7f7f
+ addi rFEFE, rFEFE, -0x101
+ rlwinm rIGN, rSTRin, 3, 27, 28
/* Test the first (partial?) word. */
- lwz r5,0(r8)
- srw r11,r11,r10
- orc r5,r5,r11
- add r0,r6,r5
- nor r9,r7,r5
- and. r0,r0,r9
- xor r12,r4,r5
- orc r12,r12,r11
- b L(loopentry)
+ lwz rWORD, 0(rSTR)
+ srw rMASK, rMASK, rIGN
+ orc rWORD, rWORD, rMASK
+ add rTMP1, rFEFE, rWORD
+ nor rTMP2, r7F7F, rWORD
+ and. rTMP1, rTMP1, rTMP2
+ xor rTMP3, rCHR, rWORD
+ orc rTMP3, rTMP3, rMASK
+ b L(loopentry)
/* The loop. */
-L(loop):lwzu r5,4(r8)
- and. r0,r0,r9
-/* Test for 0. */
- add r0,r6,r5
- nor r9,r7,r5
- bne L(foundit)
- and. r0,r0,r9
+L(loop):lwzu rWORD, 4(rSTR)
+ and. rTMP1, rTMP1, rTMP2
+/* Test for 0. */
+ add rTMP1, rFEFE, rWORD
+ nor rTMP2, r7F7F, rWORD
+ bne L(foundit)
+ and. rTMP1, rTMP1, rTMP2
/* Start test for the bytes we're looking for. */
- xor r12,r4,r5
+ xor rTMP3, rCHR, rWORD
L(loopentry):
- add r0,r6,r12
- nor r9,r7,r12
- beq L(loop)
+ add rTMP1, rFEFE, rTMP3
+ nor rTMP2, r7F7F, rTMP3
+ beq L(loop)
/* There is a zero byte in the word, but may also be a matching byte (either
before or after the zero byte). In fact, we may be looking for a
zero byte, in which case we return a match. We guess that this hasn't
happened, though. */
L(missed):
- and. r0,r0,r9
- li r3,0
+ and. rTMP1, rTMP1, rTMP2
+ li rRTN, 0
beqlr
/* It did happen. Decide which one was first...
I'm not sure if this is actually faster than a sequence of
rotates, compares, and branches (we use it anyway because it's shorter). */
- and r6,r7,r5
- or r11,r7,r5
- and r0,r7,r12
- or r10,r7,r12
- add r6,r6,r7
- add r0,r0,r7
- nor r5,r11,r6
- nor r9,r10,r0
- cmplw r5,r9
+ and rFEFE, r7F7F, rWORD
+ or rMASK, r7F7F, rWORD
+ and rTMP1, r7F7F, rTMP3
+ or rIGN, r7F7F, rTMP3
+ add rFEFE, rFEFE, r7F7F
+ add rTMP1, rTMP1, r7F7F
+ nor rWORD, rMASK, rFEFE
+ nor rTMP2, rIGN, rTMP1
+ cmplw rWORD, rTMP2
bgtlr
- cntlzw r4,r9
- srwi r4,r4,3
- add r3,r8,r4
+ cntlzw rCLZB, rTMP2
+ srwi rCLZB, rCLZB, 3
+ add rRTN, rSTR, rCLZB
blr
L(foundit):
- and r0,r7,r12
- or r10,r7,r12
- add r0,r0,r7
- nor r9,r10,r0
- cntlzw r4,r9
- subi r8,r8,4
- srwi r4,r4,3
- add r3,r8,r4
+ and rTMP1, r7F7F, rTMP3
+ or rIGN, r7F7F, rTMP3
+ add rTMP1, rTMP1, r7F7F
+ nor rTMP2, rIGN, rTMP1
+ cntlzw rCLZB, rTMP2
+ subi rSTR, rSTR, 4
+ srwi rCLZB, rCLZB, 3
+ add rRTN, rSTR, rCLZB
blr
-END(strchr)
+END (strchr)
-weak_alias(strchr,index)
+weak_alias(strchr, index)
diff --git a/sysdeps/powerpc/strcmp.S b/sysdeps/powerpc/strcmp.S
index 92e9858..1accdd7 100644
--- a/sysdeps/powerpc/strcmp.S
+++ b/sysdeps/powerpc/strcmp.S
@@ -21,95 +21,93 @@
/* See strlen.s for comments on how the end-of-string testing works. */
-EALIGN(strcmp,4,0)
-/* int [r3] strcmp (const char *p1 [r3], const char *p2 [r4]) */
+/* int [r3] strcmp (const char *s1 [r3], const char *s2 [r4]) */
-/* General register assignments:
- r0: temporary
- r3: pointer to previous word in s1
- r4: pointer to previous word in s2
- r5: current word from s1
- r6: current word from s2
- r7: 0xfefefeff
- r8: 0x7f7f7f7f
- r9: ~(word in s1 | 0x7f7f7f7f) */
+EALIGN (strcmp, 4, 0)
-/* Register assignments in the prologue:
- r10: low 2 bits of p2-p1
- r11: mask to orc with r5/r6 */
+#define rTMP r0
+#define rRTN r3 /* return value */
+#define rSTR1 r3 /* first string arg */
+#define rSTR2 r4 /* second string arg */
+#define rWORD1 r5 /* current word in s1 */
+#define rWORD2 r6 /* current word in s2 */
+#define rFEFE r7 /* constant 0xfefefeff (-0x01010101) */
+#define r7F7F r8 /* constant 0x7f7f7f7f */
+#define rNEG r9 /* ~(word in s1 | 0x7f7f7f7f) */
+#define rBITDIF r10 /* bits that differ in s1 & s2 words */
- or r0,r4,r3
- clrlwi. r0,r0,30
- lis r7,0xfeff
- bne L(unaligned)
+ or rTMP, rSTR2, rSTR1
+ clrlwi. rTMP, rTMP, 30
+ lis rFEFE, -0x101
+ bne L(unaligned)
- lwz r5,0(r3)
- lwz r6,0(r4)
- lis r8,0x7f7f
- addi r7,r7,-0x101
- addi r8,r8,0x7f7f
- b L(g1)
+ lwz rWORD1, 0(rSTR1)
+ lwz rWORD2, 0(rSTR2)
+ lis r7F7F, 0x7f7f
+ addi rFEFE, rFEFE, -0x101
+ addi r7F7F, r7F7F, 0x7f7f
+ b L(g1)
-L(g0): lwzu r5,4(r3)
- bne cr1,L(different)
- lwzu r6,4(r4)
-L(g1): add r0,r7,r5
- nor r9,r8,r5
- and. r0,r0,r9
- cmpw cr1,r5,r6
- beq+ L(g0)
+L(g0): lwzu rWORD1, 4(rSTR1)
+ bne cr1, L(different)
+ lwzu rWORD2, 4(rSTR2)
+L(g1): add rTMP, rFEFE, rWORD1
+ nor rNEG, r7F7F, rWORD1
+ and. rTMP, rTMP, rNEG
+ cmpw cr1, rWORD1, rWORD2
+ beq+ L(g0)
L(endstring):
/* OK. We've hit the end of the string. We need to be careful that
we don't compare two strings as different because of gunk beyond
the end of the strings... */
- and r0,r8,r5
- beq cr1,L(equal)
- add r0,r0,r8
- xor. r10,r5,r6
- andc r9,r9,r0
- blt- L(highbit)
- cntlzw r10,r10
- cntlzw r9,r9
- addi r9,r9,7
- cmpw cr1,r9,r10
- sub r3,r5,r6
- bgelr+ cr1
+ and rTMP, r7F7F, rWORD1
+ beq cr1, L(equal)
+ add rTMP, rTMP, r7F7F
+ xor. rBITDIF, rWORD1, rWORD2
+ andc rNEG, rNEG, rTMP
+ blt- L(highbit)
+ cntlzw rBITDIF, rBITDIF
+ cntlzw rNEG, rNEG
+ addi rNEG, rNEG, 7
+ cmpw cr1, rNEG, rBITDIF
+ sub rRTN, rWORD1, rWORD2
+ bgelr+ cr1
L(equal):
- li r3,0
+ li rRTN, 0
blr
L(different):
- lwz r5,-4(r3)
- xor. r10,r5,r6
- sub r3,r5,r6
+ lwz rWORD1, -4(rSTR1)
+ xor. rBITDIF, rWORD1, rWORD2
+ sub rRTN, rWORD1, rWORD2
bgelr+
L(highbit):
- ori r3,r6,1
+ ori rRTN, rWORD2, 1
blr
/* Oh well. In this case, we just do a byte-by-byte comparison. */
.align 4
L(unaligned):
- lbz r5,0(r3)
- lbz r6,0(r4)
- b L(u1)
+ lbz rWORD1, 0(rSTR1)
+ lbz rWORD2, 0(rSTR2)
+ b L(u1)
-L(u0): lbzu r5,1(r3)
- bne- L(u4)
- lbzu r6,1(r4)
-L(u1): cmpwi cr1,r5,0
- beq- cr1,L(u3)
- cmpw r5,r6
- bne- L(u3)
- lbzu r5,1(r3)
- lbzu r6,1(r4)
- cmpwi cr1,r5,0
- cmpw r5,r6
- bne+ cr1,L(u0)
-L(u3): sub r3,r5,r6
+L(u0): lbzu rWORD1, 1(rSTR1)
+ bne- L(u4)
+ lbzu rWORD2, 1(rSTR2)
+L(u1): cmpwi cr1, rWORD1, 0
+ beq- cr1, L(u3)
+ cmpw rWORD1, rWORD2
+ bne- L(u3)
+ lbzu rWORD1, 1(rSTR1)
+ lbzu rWORD2, 1(rSTR2)
+ cmpwi cr1, rWORD1, 0
+ cmpw rWORD1, rWORD2
+ bne+ cr1, L(u0)
+L(u3): sub rRTN, rWORD1, rWORD2
blr
-L(u4): lbz r5,-1(r3)
- sub r3,r5,r6
+L(u4): lbz rWORD1, -1(rSTR1)
+ sub rRTN, rWORD1, rWORD2
blr
END(strcmp)
diff --git a/sysdeps/powerpc/strcpy.S b/sysdeps/powerpc/strcpy.S
index 0767921..901ccf1 100644
--- a/sysdeps/powerpc/strcpy.S
+++ b/sysdeps/powerpc/strcpy.S
@@ -21,80 +21,80 @@
/* See strlen.s for comments on how the end-of-string testing works. */
-EALIGN(strcpy,4,0)
/* char * [r3] strcpy (char *dest [r3], const char *src [r4]) */
-/* General register assignments:
- r0: temporary
- r3: saved `dest'
- r4: pointer to previous word in src
- r5: pointer to previous word in dest
- r6: current word from src
- r7: 0xfefefeff
- r8: 0x7f7f7f7f
- r9: ~(word in src | 0x7f7f7f7f)
- r10: alternate word from src. */
+EALIGN(strcpy, 4, 0)
- or r0,r4,r3
- clrlwi. r0,r0,30
- addi r5,r3,-4
- bne L(unaligned)
+#define rTMP r0
+#define rRTN r3 /* incoming DEST arg preserved as result */
+#define rSRC r4 /* pointer to previous word in src */
+#define rDEST r5 /* pointer to previous word in dest */
+#define rWORD r6 /* current word from src */
+#define rFEFE r7 /* constant 0xfefefeff (-0x01010101) */
+#define r7F7F r8 /* constant 0x7f7f7f7f */
+#define rNEG r9 /* ~(word in s1 | 0x7f7f7f7f) */
+#define rALT r10 /* alternate word from src */
- lis r7,0xfeff
- lis r8,0x7f7f
- lwz r6,0(r4)
- addi r7,r7,-0x101
- addi r8,r8,0x7f7f
- b L(g2)
+ or rTMP, rSRC, rRTN
+ clrlwi. rTMP, rTMP, 30
+ addi rDEST, rRTN, -4
+ bne L(unaligned)
-L(g0): lwzu r10,4(r4)
- stwu r6,4(r5)
- add r0,r7,r10
- nor r9,r8,r10
- and. r0,r0,r9
- bne- L(g1)
- lwzu r6,4(r4)
- stwu r10,4(r5)
-L(g2): add r0,r7,r6
- nor r9,r8,r6
- and. r0,r0,r9
- beq+ L(g0)
+ lis rFEFE, -0x101
+ lis r7F7F, 0x7f7f
+ lwz rWORD, 0(rSRC)
+ addi rFEFE, rFEFE, -0x101
+ addi r7F7F, r7F7F, 0x7f7f
+ b L(g2)
- mr r10,r6
+L(g0): lwzu rALT, 4(rSRC)
+ stwu rWORD, 4(rDEST)
+ add rTMP, rFEFE, rALT
+ nor rNEG, r7F7F, rALT
+ and. rTMP, rTMP, rNEG
+ bne- L(g1)
+ lwzu rWORD, 4(rSRC)
+ stwu rALT, 4(rDEST)
+L(g2): add rTMP, rFEFE, rWORD
+ nor rNEG, r7F7F, rWORD
+ and. rTMP, rTMP, rNEG
+ beq+ L(g0)
+
+ mr rALT, rWORD
/* We've hit the end of the string. Do the rest byte-by-byte. */
-L(g1): rlwinm. r0,r10,8,24,31
- stb r0,4(r5)
+L(g1): rlwinm. rTMP, rALT, 8, 24, 31
+ stb rTMP, 4(rDEST)
beqlr-
- rlwinm. r0,r10,16,24,31
- stb r0,5(r5)
+ rlwinm. rTMP, rALT, 16, 24, 31
+ stb rTMP, 5(rDEST)
beqlr-
- rlwinm. r0,r10,24,24,31
- stb r0,6(r5)
+ rlwinm. rTMP, rALT, 24, 24, 31
+ stb rTMP, 6(rDEST)
beqlr-
- stb r10,7(r5)
+ stb rALT, 7(rDEST)
blr
/* Oh well. In this case, we just do a byte-by-byte copy. */
.align 4
nop
L(unaligned):
- lbz r6,0(r4)
- addi r5,r3,-1
- cmpwi r6,0
- beq- L(u2)
+ lbz rWORD, 0(rSRC)
+ addi rDEST, rRTN, -1
+ cmpwi rWORD, 0
+ beq- L(u2)
-L(u0): lbzu r10,1(r4)
- stbu r6,1(r5)
- cmpwi r10,0
- beq- L(u1)
+L(u0): lbzu rALT, 1(rSRC)
+ stbu rWORD, 1(rDEST)
+ cmpwi rALT, 0
+ beq- L(u1)
nop /* Let 601 load start of loop. */
- lbzu r6,1(r4)
- stbu r10,1(r5)
- cmpwi r6,0
- bne+ L(u0)
-L(u2): stb r6,1(r5)
+ lbzu rWORD, 1(rSRC)
+ stbu rALT, 1(rDEST)
+ cmpwi rWORD, 0
+ bne+ L(u0)
+L(u2): stb rWORD, 1(rDEST)
blr
-L(u1): stb r10,1(r5)
+L(u1): stb rALT, 1(rDEST)
blr
END(strcpy)
diff --git a/sysdeps/powerpc/strlen.S b/sysdeps/powerpc/strlen.S
index b847ee4..18e7623 100644
--- a/sysdeps/powerpc/strlen.S
+++ b/sysdeps/powerpc/strlen.S
@@ -1,5 +1,5 @@
/* Optimized strlen implementation for PowerPC.
- Copyright (C) 1997, 1999 Free Software Foundation, Inc.
+ Copyright (C) 1997, 1999, 2000 Free Software Foundation, Inc.
This file is part of the GNU C Library.
The GNU C Library is free software; you can redistribute it and/or
@@ -69,76 +69,86 @@
We can use condition registers cr0, cr1, cr5, cr6, and cr7 without saving
them, the others we must save. */
-ENTRY(strlen)
-/* On entry, r3 points to the string, and it's left that way.
- We use r6 to store 0xfefefeff, and r7 to store 0x7f7f7f7f.
- r4 is used to keep the current index into the string; r5 holds
- the number of padding bits we prepend to the string to make it
- start at a word boundary. r8 holds the 'current' word.
- r9-12 are temporaries. r0 is used as a temporary and for discarded
- results. */
- clrrwi r4,r3,2
- lis r7,0x7f7f
- rlwinm r5,r3,3,27,28
- lwz r8,0(r4)
- li r9,-1
- addi r7,r7,0x7f7f
+/* int [r3] strlen (char *s [r3]) */
+
+ENTRY (strlen)
+
+#define rTMP1 r0
+#define rRTN r3 /* incoming STR arg, outgoing result */
+#define rSTR r4 /* current string position */
+#define rPADN r5 /* number of padding bits we prepend to the
+ string to make it start at a word boundary */
+#define rFEFE r6 /* constant 0xfefefeff (-0x01010101) */
+#define r7F7F r7 /* constant 0x7f7f7f7f */
+#define rWORD1 r8 /* current string word */
+#define rWORD2 r9 /* next string word */
+#define rMASK r9 /* mask for first string word */
+#define rTMP2 r10
+#define rTMP3 r11
+#define rTMP4 r12
+
+ clrrwi rSTR, rRTN, 2
+ lis r7F7F, 0x7f7f
+ rlwinm rPADN, rRTN, 3, 27, 28
+ lwz rWORD1, 0(rSTR)
+ li rMASK, -1
+ addi r7F7F, r7F7F, 0x7f7f
/* That's the setup done, now do the first pair of words.
We make an exception and use method (2) on the first two words, to reduce
overhead. */
- srw r9,r9,r5
- and r0,r7,r8
- or r10,r7,r8
- add r0,r0,r7
- nor r0,r10,r0
- and. r8,r0,r9
- mtcrf 0x01,r3
- bne L(done0)
- lis r6,0xfeff
- addi r6,r6,-0x101
+ srw rMASK, rMASK, rPADN
+ and rTMP1, r7F7F, rWORD1
+ or rTMP2, r7F7F, rWORD1
+ add rTMP1, rTMP1, r7F7F
+ nor rTMP1, rTMP2, rTMP1
+ and. rWORD1, rTMP1, rMASK
+ mtcrf 0x01, rRTN
+ bne L(done0)
+ lis rFEFE, -0x101
+ addi rFEFE, rFEFE, -0x101
/* Are we now aligned to a doubleword boundary? */
- bt 29,L(loop)
+ bt 29, L(loop)
/* Handle second word of pair. */
- lwzu r8,4(r4)
- and r0,r7,r8
- or r10,r7,r8
- add r0,r0,r7
- nor. r8,r10,r0
- bne L(done0)
+ lwzu rWORD1, 4(rSTR)
+ and rTMP1, r7F7F, rWORD1
+ or rTMP2, r7F7F, rWORD1
+ add rTMP1, rTMP1, r7F7F
+ nor. rWORD1, rTMP2, rTMP1
+ bne L(done0)
/* The loop. */
L(loop):
- lwz r8,4(r4)
- lwzu r9,8(r4)
- add r0,r6,r8
- nor r10,r7,r8
- and. r0,r0,r10
- add r11,r6,r9
- nor r12,r7,r9
- bne L(done1)
- and. r0,r11,r12
- beq L(loop)
-
- and r0,r7,r9
- add r0,r0,r7
- andc r8,r12,r0
- b L(done0)
+ lwz rWORD1, 4(rSTR)
+ lwzu rWORD2, 8(rSTR)
+ add rTMP1, rFEFE, rWORD1
+ nor rTMP2, r7F7F, rWORD1
+ and. rTMP1, rTMP1, rTMP2
+ add rTMP3, rFEFE, rWORD2
+ nor rTMP4, r7F7F, rWORD2
+ bne L(done1)
+ and. rTMP1, rTMP3, rTMP4
+ beq L(loop)
+
+ and rTMP1, r7F7F, rWORD2
+ add rTMP1, rTMP1, r7F7F
+ andc rWORD1, rTMP4, rTMP1
+ b L(done0)
L(done1):
- and r0,r7,r8
- subi r4,r4,4
- add r0,r0,r7
- andc r8,r10,r0
+ and rTMP1, r7F7F, rWORD1
+ subi rSTR, rSTR, 4
+ add rTMP1, rTMP1, r7F7F
+ andc rWORD1, rTMP2, rTMP1
-/* When we get to here, r4 points to the first word in the string that
- contains a zero byte, and the most significant set bit in r8 is in that
+/* When we get to here, rSTR points to the first word in the string that
+ contains a zero byte, and the most significant set bit in rWORD1 is in that
byte. */
L(done0):
- cntlzw r11,r8
- subf r0,r3,r4
- srwi r11,r11,3
- add r3,r0,r11
+ cntlzw rTMP3, rWORD1
+ subf rTMP1, rRTN, rSTR
+ srwi rTMP3, rTMP3, 3
+ add rRTN, rTMP1, rTMP3
blr
-END(strlen)
+END (strlen)