aboutsummaryrefslogtreecommitdiff
path: root/sysdeps/powerpc/strcmp.s
diff options
context:
space:
mode:
Diffstat (limited to 'sysdeps/powerpc/strcmp.s')
-rw-r--r--sysdeps/powerpc/strcmp.s273
1 files changed, 273 insertions, 0 deletions
diff --git a/sysdeps/powerpc/strcmp.s b/sysdeps/powerpc/strcmp.s
new file mode 100644
index 0000000..f901b82
--- /dev/null
+++ b/sysdeps/powerpc/strcmp.s
@@ -0,0 +1,273 @@
+ # Optimized strcmp implementation for PowerPC.
+ # Copyright (C) 1997 Free Software Foundation, Inc.
+ # This file is part of the GNU C Library.
+ #
+ # The GNU C Library is free software; you can redistribute it and/or
+ # modify it under the terms of the GNU Library General Public License as
+ # published by the Free Software Foundation; either version 2 of the
+ # License, or (at your option) any later version.
+ #
+ # The GNU C Library is distributed in the hope that it will be useful,
+ # but WITHOUT ANY WARRANTY; without even the implied warranty of
+ # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ # Library General Public License for more details.
+ #
+ # You should have received a copy of the GNU Library General Public
+ # License along with the GNU C Library; see the file COPYING.LIB. If not,
+ # write to the Free Software Foundation, Inc., 59 Temple Place - Suite 330,
+ # Boston, MA 02111-1307, USA.
+
+ # See strlen.s for comments on how the end-of-string testing works.
+
+ .section ".text"
+ .align 3
+ .globl strcmp
+ .type strcmp,@function
+strcmp:
+ # int [r3] strcmp (const char *p1 [r3], const char *p2 [r4])
+
+ # General register assignments:
+ # r0: temporary
+ # r3: pointer to previous word in s1
+ # r4: pointer to previous word in s2
+ # r5: current first word in s1
+ # r6: current first word in s2 (after re-alignment)
+ # r7: 0xfefefeff
+ # r8: 0x7f7f7f7f
+ # r9: ~(word in s1 | 0x7f7f7f7f)
+
+ # Register assignments in the prologue:
+ # r10: low 2 bits of p2-p1
+ # r11: mask to orc with r5/r6
+
+ subf. %r10,%r4,%r3
+ beq- equal
+ andi. %r10,%r10,3
+ cmpi %cr1,%r10,2
+ beq- %cr1,align2
+ lis %r7,0xfeff
+ lis %r8,0x7f7f
+ addi %r8,%r8,0x7f7f
+ addi %r7,%r7,0xfffffeff
+ bgt- %cr1,align3
+strcmp3:
+ rlwinm %r0,%r3,3,27,28
+ li %r11,-1
+ srw %r11,%r11,%r0
+ clrrwi %r3,%r3,2
+ clrrwi %r4,%r4,2
+ lwz %r5,0(%r3)
+ lwz %r6,0(%r4)
+ bne- align1
+
+ # The loop, case when both strings are aligned the same.
+ # on entry, cr1.eq must be 1.
+ # r10: second word in s1
+ # r11: second word in s2 OR mask to orc with first two words.
+align0:
+ andi. %r0,%r3,4
+ orc %r5,%r5,%r11
+ orc %r6,%r6,%r11
+ beq+ a0start
+ add %r0,%r7,%r5
+ nor %r9,%r8,%r5
+ and. %r0,%r0,%r9
+ cmplw %cr1,%r5,%r6
+ subi %r3,%r3,4
+ bne- endstringeq
+ subi %r4,%r4,4
+ bne- %cr1,difference
+
+loopalign0:
+ lwzu %r5,8(%r3)
+ bne- %cr1,difference2
+ lwzu %r6,8(%r4)
+a0start:
+ add %r0,%r7,%r5
+ nor %r9,%r8,%r5
+ and. %r0,%r0,%r9
+ cmplw %cr1,%r5,%r6
+ lwz %r10,4(%r3)
+ bne- endstringeq
+ add %r0,%r7,%r10
+ bne- %cr1,difference
+ nor %r9,%r8,%r10
+ lwz %r11,4(%r4)
+ and. %r0,%r0,%r9
+ cmplw %cr1,%r10,%r11
+ beq+ loopalign0
+
+ mr %r5,%r10
+ mr %r6,%r11
+
+ # fall through to...
+
+endstringeq:
+ # (like 'endstring', but an equality code is in cr1)
+ beq %cr1,equal
+endstring:
+ # OK. We've hit the end of the string. We need to be careful that
+ # we don't compare two strings as different because of gunk beyond
+ # the end of the strings. We do it like this...
+ and %r0,%r8,%r5
+ add %r0,%r0,%r8
+ xor. %r10,%r5,%r6
+ andc %r9,%r9,%r0
+ cntlzw %r10,%r10
+ cntlzw %r9,%r9
+ addi %r9,%r9,7
+ cmpw %cr1,%r9,%r10
+ blt %cr1,equal
+ sub %r3,%r5,%r6
+ bgelr+
+ mr %r3,%r6
+ blr
+equal: li %r3,0
+ blr
+
+ # The loop, case when s2 is aligned 1 char behind s1.
+ # r10: current word in s2 (before re-alignment)
+
+align1:
+ cmpwi %cr1,%r0,0
+ orc %r5,%r5,%r11
+ bne %cr1,align1_123
+ # When s1 is aligned to a word boundary, the startup processing is special.
+ slwi. %r6,%r6,24
+ bne+ a1entry_0
+ nor %r9,%r8,%r5
+ b endstring
+
+align1_123:
+ # Otherwise (s1 not aligned to a word boundary):
+ mr %r10,%r6
+ add %r0,%r7,%r5
+ nor %r9,%r8,%r5
+ and. %r0,%r0,%r9
+ srwi %r6,%r6,8
+ orc %r6,%r6,%r11
+ cmplw %cr1,%r5,%r6
+ bne- endstringeq
+ bne- %cr1,difference
+
+loopalign1:
+ slwi. %r6,%r10,24
+ bne- %cr1,a1difference
+ lwzu %r5,4(%r3)
+ beq- endstring1
+a1entry_0:
+ lwzu %r10,4(%r4)
+a1entry_123:
+ add %r0,%r7,%r5
+ nor %r9,%r8,%r5
+ and. %r0,%r0,%r9
+ rlwimi %r6,%r10,24,8,31
+ cmplw %cr1,%r5,%r6
+ beq+ loopalign1
+ b endstringeq
+
+endstring1:
+ srwi %r3,%r5,24
+ blr
+
+a1difference:
+ lbz %r6,-1(%r4)
+ slwi %r6,%r6,24
+ rlwimi %r6,%r10,24,8,31
+
+ # fall through to...
+
+difference:
+ # The idea here is that we could just return '%r5 - %r6', except
+ # that the result might overflow. Overflow can only happen when %r5
+ # and %r6 have different signs (thus the xor), in which case we want to
+ # return negative iff %r6 has its high bit set so %r5 < %r6.
+ # A branch-free implementation of this is
+ # xor %r0,%r5,%r6
+ # rlwinm %r0,%r0,1,31,31
+ # rlwnm %r5,%r5,%r0,1,31
+ # rlwnm %r6,%r6,%r0,1,31
+ # sub %r3,%r5,%r6
+ # blr
+ # but this is usually more expensive.
+ xor. %r0,%r5,%r6
+ sub %r3,%r5,%r6
+ bgelr+
+ mr %r3,%r6
+ blr
+
+difference2:
+ # As for 'difference', but use registers r10 and r11 instead of r5 and r6.
+ xor. %r0,%r10,%r11
+ sub %r3,%r10,%r11
+ bgelr+
+ mr %r3,%r11
+ blr
+
+ # For the case when s2 is aligned 3 chars behind s1, we switch
+ # s1 and s2...
+ # r10: used by 'align2' (see below)
+ # r11: used by 'align2' (see below)
+ # r12: saved link register
+ # cr0.eq: must be left as 1.
+
+align3: mflr %r12
+ mr %r0,%r3
+ mr %r3,%r4
+ mr %r4,%r0
+ bl strcmp3
+ mtlr %r12
+ neg %r3,%r3
+ blr
+
+ # The loop, case when s2 and s1's alignments differ by 2
+ # This is the ugly case...
+ # FIXME: on a 601, the loop takes 7 cycles instead of the 6 you'd expect,
+ # because there are too many branches. This loop should probably be
+ # coded like the align1 case.
+
+a2even: lhz %r5,0(%r3)
+ lhz %r6,0(%r4)
+ b a2entry
+
+align2:
+ andi. %r0,%r3,1
+ beq+ a2even
+ subi %r3,%r3,1
+ subi %r4,%r4,1
+ lbz %r5,1(%r3)
+ lbz %r6,1(%r4)
+ cmpwi %cr0,%r5,0
+ cmpw %cr1,%r5,%r6
+ beq- align2end2
+ lhzu %r5,2(%r3)
+ beq+ %cr1,a2entry1
+ lbz %r5,-1(%r3)
+ sub %r3,%r5,%r6
+ blr
+
+loopalign2:
+ cmpw %cr1,%r5,%r6
+ beq- align2end2
+ lhzu %r5,2(%r3)
+ bne- %cr1,align2different
+a2entry1:
+ lhzu %r6,2(%r4)
+a2entry:
+ cmpwi %cr5,%r5,0x00ff
+ andi. %r0,%r5,0x00ff
+ bgt+ %cr5,loopalign2
+
+align2end:
+ andi. %r3,%r6,0xff00
+ neg %r3,%r3
+ blr
+
+align2different:
+ lhzu %r5,-2(%r3)
+align2end2:
+ sub %r3,%r5,%r6
+ blr
+
+0:
+ .size strcmp,0b-strcmp