diff options
Diffstat (limited to 'sysdeps/powerpc/strcmp.s')
-rw-r--r-- | sysdeps/powerpc/strcmp.s | 273 |
1 files changed, 273 insertions, 0 deletions
diff --git a/sysdeps/powerpc/strcmp.s b/sysdeps/powerpc/strcmp.s new file mode 100644 index 0000000..f901b82 --- /dev/null +++ b/sysdeps/powerpc/strcmp.s @@ -0,0 +1,273 @@ + # Optimized strcmp implementation for PowerPC. + # Copyright (C) 1997 Free Software Foundation, Inc. + # This file is part of the GNU C Library. + # + # The GNU C Library is free software; you can redistribute it and/or + # modify it under the terms of the GNU Library General Public License as + # published by the Free Software Foundation; either version 2 of the + # License, or (at your option) any later version. + # + # The GNU C Library is distributed in the hope that it will be useful, + # but WITHOUT ANY WARRANTY; without even the implied warranty of + # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + # Library General Public License for more details. + # + # You should have received a copy of the GNU Library General Public + # License along with the GNU C Library; see the file COPYING.LIB. If not, + # write to the Free Software Foundation, Inc., 59 Temple Place - Suite 330, + # Boston, MA 02111-1307, USA. + + # See strlen.s for comments on how the end-of-string testing works. + + .section ".text" + .align 3 + .globl strcmp + .type strcmp,@function +strcmp: + # int [r3] strcmp (const char *p1 [r3], const char *p2 [r4]) + + # General register assignments: + # r0: temporary + # r3: pointer to previous word in s1 + # r4: pointer to previous word in s2 + # r5: current first word in s1 + # r6: current first word in s2 (after re-alignment) + # r7: 0xfefefeff + # r8: 0x7f7f7f7f + # r9: ~(word in s1 | 0x7f7f7f7f) + + # Register assignments in the prologue: + # r10: low 2 bits of p2-p1 + # r11: mask to orc with r5/r6 + + subf. %r10,%r4,%r3 + beq- equal + andi. %r10,%r10,3 + cmpi %cr1,%r10,2 + beq- %cr1,align2 + lis %r7,0xfeff + lis %r8,0x7f7f + addi %r8,%r8,0x7f7f + addi %r7,%r7,0xfffffeff + bgt- %cr1,align3 +strcmp3: + rlwinm %r0,%r3,3,27,28 + li %r11,-1 + srw %r11,%r11,%r0 + clrrwi %r3,%r3,2 + clrrwi %r4,%r4,2 + lwz %r5,0(%r3) + lwz %r6,0(%r4) + bne- align1 + + # The loop, case when both strings are aligned the same. + # on entry, cr1.eq must be 1. + # r10: second word in s1 + # r11: second word in s2 OR mask to orc with first two words. +align0: + andi. %r0,%r3,4 + orc %r5,%r5,%r11 + orc %r6,%r6,%r11 + beq+ a0start + add %r0,%r7,%r5 + nor %r9,%r8,%r5 + and. %r0,%r0,%r9 + cmplw %cr1,%r5,%r6 + subi %r3,%r3,4 + bne- endstringeq + subi %r4,%r4,4 + bne- %cr1,difference + +loopalign0: + lwzu %r5,8(%r3) + bne- %cr1,difference2 + lwzu %r6,8(%r4) +a0start: + add %r0,%r7,%r5 + nor %r9,%r8,%r5 + and. %r0,%r0,%r9 + cmplw %cr1,%r5,%r6 + lwz %r10,4(%r3) + bne- endstringeq + add %r0,%r7,%r10 + bne- %cr1,difference + nor %r9,%r8,%r10 + lwz %r11,4(%r4) + and. %r0,%r0,%r9 + cmplw %cr1,%r10,%r11 + beq+ loopalign0 + + mr %r5,%r10 + mr %r6,%r11 + + # fall through to... + +endstringeq: + # (like 'endstring', but an equality code is in cr1) + beq %cr1,equal +endstring: + # OK. We've hit the end of the string. We need to be careful that + # we don't compare two strings as different because of gunk beyond + # the end of the strings. We do it like this... + and %r0,%r8,%r5 + add %r0,%r0,%r8 + xor. %r10,%r5,%r6 + andc %r9,%r9,%r0 + cntlzw %r10,%r10 + cntlzw %r9,%r9 + addi %r9,%r9,7 + cmpw %cr1,%r9,%r10 + blt %cr1,equal + sub %r3,%r5,%r6 + bgelr+ + mr %r3,%r6 + blr +equal: li %r3,0 + blr + + # The loop, case when s2 is aligned 1 char behind s1. + # r10: current word in s2 (before re-alignment) + +align1: + cmpwi %cr1,%r0,0 + orc %r5,%r5,%r11 + bne %cr1,align1_123 + # When s1 is aligned to a word boundary, the startup processing is special. + slwi. %r6,%r6,24 + bne+ a1entry_0 + nor %r9,%r8,%r5 + b endstring + +align1_123: + # Otherwise (s1 not aligned to a word boundary): + mr %r10,%r6 + add %r0,%r7,%r5 + nor %r9,%r8,%r5 + and. %r0,%r0,%r9 + srwi %r6,%r6,8 + orc %r6,%r6,%r11 + cmplw %cr1,%r5,%r6 + bne- endstringeq + bne- %cr1,difference + +loopalign1: + slwi. %r6,%r10,24 + bne- %cr1,a1difference + lwzu %r5,4(%r3) + beq- endstring1 +a1entry_0: + lwzu %r10,4(%r4) +a1entry_123: + add %r0,%r7,%r5 + nor %r9,%r8,%r5 + and. %r0,%r0,%r9 + rlwimi %r6,%r10,24,8,31 + cmplw %cr1,%r5,%r6 + beq+ loopalign1 + b endstringeq + +endstring1: + srwi %r3,%r5,24 + blr + +a1difference: + lbz %r6,-1(%r4) + slwi %r6,%r6,24 + rlwimi %r6,%r10,24,8,31 + + # fall through to... + +difference: + # The idea here is that we could just return '%r5 - %r6', except + # that the result might overflow. Overflow can only happen when %r5 + # and %r6 have different signs (thus the xor), in which case we want to + # return negative iff %r6 has its high bit set so %r5 < %r6. + # A branch-free implementation of this is + # xor %r0,%r5,%r6 + # rlwinm %r0,%r0,1,31,31 + # rlwnm %r5,%r5,%r0,1,31 + # rlwnm %r6,%r6,%r0,1,31 + # sub %r3,%r5,%r6 + # blr + # but this is usually more expensive. + xor. %r0,%r5,%r6 + sub %r3,%r5,%r6 + bgelr+ + mr %r3,%r6 + blr + +difference2: + # As for 'difference', but use registers r10 and r11 instead of r5 and r6. + xor. %r0,%r10,%r11 + sub %r3,%r10,%r11 + bgelr+ + mr %r3,%r11 + blr + + # For the case when s2 is aligned 3 chars behind s1, we switch + # s1 and s2... + # r10: used by 'align2' (see below) + # r11: used by 'align2' (see below) + # r12: saved link register + # cr0.eq: must be left as 1. + +align3: mflr %r12 + mr %r0,%r3 + mr %r3,%r4 + mr %r4,%r0 + bl strcmp3 + mtlr %r12 + neg %r3,%r3 + blr + + # The loop, case when s2 and s1's alignments differ by 2 + # This is the ugly case... + # FIXME: on a 601, the loop takes 7 cycles instead of the 6 you'd expect, + # because there are too many branches. This loop should probably be + # coded like the align1 case. + +a2even: lhz %r5,0(%r3) + lhz %r6,0(%r4) + b a2entry + +align2: + andi. %r0,%r3,1 + beq+ a2even + subi %r3,%r3,1 + subi %r4,%r4,1 + lbz %r5,1(%r3) + lbz %r6,1(%r4) + cmpwi %cr0,%r5,0 + cmpw %cr1,%r5,%r6 + beq- align2end2 + lhzu %r5,2(%r3) + beq+ %cr1,a2entry1 + lbz %r5,-1(%r3) + sub %r3,%r5,%r6 + blr + +loopalign2: + cmpw %cr1,%r5,%r6 + beq- align2end2 + lhzu %r5,2(%r3) + bne- %cr1,align2different +a2entry1: + lhzu %r6,2(%r4) +a2entry: + cmpwi %cr5,%r5,0x00ff + andi. %r0,%r5,0x00ff + bgt+ %cr5,loopalign2 + +align2end: + andi. %r3,%r6,0xff00 + neg %r3,%r3 + blr + +align2different: + lhzu %r5,-2(%r3) +align2end2: + sub %r3,%r5,%r6 + blr + +0: + .size strcmp,0b-strcmp |