diff options
Diffstat (limited to 'sysdeps/powerpc')
32 files changed, 1284 insertions, 1700 deletions
diff --git a/sysdeps/powerpc/Makefile b/sysdeps/powerpc/Makefile index 0a50956..3e8f22b 100644 --- a/sysdeps/powerpc/Makefile +++ b/sysdeps/powerpc/Makefile @@ -11,3 +11,21 @@ tests += test-arith test-arithf LDLIBS-test-arith = libm LDLIBS-test-arithf = libm endif + +ifeq ($(subdir),gmon) +sysdep_routines += ppc-mcount +endif + +# On PPC, -fpic works until the GOT contains 2^15 bytes, and possibly +# more depending on how clever the linker is. Each GOT entry takes 4 bytes, +# so that's at least 8192 entries. Since libc only uses about 1200 entries, +# we want to use -fpic, because this generates fewer relocs. +ifeq (yes,$(build-shared)) +CFLAGS-.os = -fpic -fno-common +endif + +# The initfini generation code doesn't work in the presence of -fPIC, so +# we use -fpic instead which is much better. +ifeq ($(subdir),csu) +CFLAGS-initfini.s = -g0 -fpic +endif diff --git a/sysdeps/powerpc/add_n.S b/sysdeps/powerpc/add_n.S new file mode 100644 index 0000000..2bd59ae --- /dev/null +++ b/sysdeps/powerpc/add_n.S @@ -0,0 +1,68 @@ +/* Add two limb vectors of equal, non-zero length for PowerPC. + Copyright (C) 1997 Free Software Foundation, Inc. + This file is part of the GNU C Library. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Library General Public License as + published by the Free Software Foundation; either version 2 of the + License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Library General Public License for more details. + + You should have received a copy of the GNU Library General Public + License along with the GNU C Library; see the file COPYING.LIB. If not, + write to the Free Software Foundation, Inc., 59 Temple Place - Suite 330, + Boston, MA 02111-1307, USA. */ + +#include <sysdep.h> + +/* mp_limb_t mpn_add_n (mp_ptr res_ptr, mp_srcptr s1_ptr, mp_srcptr s2_ptr, + mp_size_t size) + Calculate s1+s2 and put result in res_ptr; return carry, 0 or 1. */ + +/* Note on optimisation: This code is optimal for the 601. Almost every other + possible 2-unrolled inner loop will not be. Also, watch out for the + alignment... */ + +EALIGN(__mpn_add_n,3,0) +/* Set up for loop below. */ + mtcrf 0x01,%r6 + srwi. %r7,%r6,1 + li %r10,0 + mtctr %r7 + bt 31,2f + +/* Clear the carry. */ + addic %r0,%r0,0 +/* Adjust pointers for loop. */ + addi %r3,%r3,-4 + addi %r4,%r4,-4 + addi %r5,%r5,-4 + b 0f + +2: lwz %r7,0(%r5) + lwz %r6,0(%r4) + addc %r6,%r6,%r7 + stw %r6,0(%r3) + beq 1f + +/* The loop. */ + +/* Align start of loop to an odd word boundary to guarantee that the + last two words can be fetched in one access (for 601). */ +0: lwz %r9,4(%r4) + lwz %r8,4(%r5) + lwzu %r6,8(%r4) + lwzu %r7,8(%r5) + adde %r8,%r9,%r8 + stw %r8,4(%r3) + adde %r6,%r6,%r7 + stwu %r6,8(%r3) + bdnz 0b +/* Return the carry. */ +1: addze %r3,%r10 + blr +END(__mpn_add_n) diff --git a/sysdeps/powerpc/add_n.s b/sysdeps/powerpc/add_n.s deleted file mode 100644 index 609f0a5..0000000 --- a/sysdeps/powerpc/add_n.s +++ /dev/null @@ -1,68 +0,0 @@ - # Add two limb vectors of equal, non-zero length for PowerPC. - # Copyright (C) 1997 Free Software Foundation, Inc. - # This file is part of the GNU C Library. - # - # The GNU C Library is free software; you can redistribute it and/or - # modify it under the terms of the GNU Library General Public License as - # published by the Free Software Foundation; either version 2 of the - # License, or (at your option) any later version. - # - # The GNU C Library is distributed in the hope that it will be useful, - # but WITHOUT ANY WARRANTY; without even the implied warranty of - # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU - # Library General Public License for more details. - # - # You should have received a copy of the GNU Library General Public - # License along with the GNU C Library; see the file COPYING.LIB. If not, - # write to the Free Software Foundation, Inc., 59 Temple Place - Suite 330, - # Boston, MA 02111-1307, USA. - - # mp_limb_t mpn_add_n (mp_ptr res_ptr, mp_srcptr s1_ptr, mp_srcptr s2_ptr, - # mp_size_t size) - # Calculate s1+s2 and put result in res_ptr; return carry, 0 or 1. - - # Note on optimisation: This code is optimal for the 601. Almost every other - # possible 2-unrolled inner loop will not be. Also, watch out for the - # alignment... - - .align 3 - .globl __mpn_add_n - .type __mpn_add_n,@function -__mpn_add_n: - # Set up for loop below. - mtcrf 0x01,%r6 - srwi. %r7,%r6,1 - li %r10,0 - mtctr %r7 - bt 31,2f - - # Clear the carry. - addic %r0,%r0,0 - # Adjust pointers for loop. - addi %r3,%r3,-4 - addi %r4,%r4,-4 - addi %r5,%r5,-4 - b 0f - -2: lwz %r7,0(%r5) - lwz %r6,0(%r4) - addc %r6,%r6,%r7 - stw %r6,0(%r3) - beq 1f - - # The loop. - - # Align start of loop to an odd word boundary to guarantee that the - # last two words can be fetched in one access (for 601). -0: lwz %r9,4(%r4) - lwz %r8,4(%r5) - lwzu %r6,8(%r4) - lwzu %r7,8(%r5) - adde %r8,%r9,%r8 - stw %r8,4(%r3) - adde %r6,%r6,%r7 - stwu %r6,8(%r3) - bdnz 0b - # return the carry -1: addze %r3,%r10 - blr diff --git a/sysdeps/powerpc/addmul_1.S b/sysdeps/powerpc/addmul_1.S new file mode 100644 index 0000000..dc762fc --- /dev/null +++ b/sysdeps/powerpc/addmul_1.S @@ -0,0 +1,49 @@ +/* Multiply a limb vector by a single limb, for PowerPC. + Copyright (C) 1993, 1994, 1995, 1997 Free Software Foundation, Inc. + This file is part of the GNU C Library. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Library General Public License as + published by the Free Software Foundation; either version 2 of the + License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Library General Public License for more details. + + You should have received a copy of the GNU Library General Public + License along with the GNU C Library; see the file COPYING.LIB. If not, + write to the Free Software Foundation, Inc., 59 Temple Place - Suite 330, + Boston, MA 02111-1307, USA. */ + +#include <sysdep.h> + +/* mp_limb_t mpn_addmul_1 (mp_ptr res_ptr, mp_srcptr s1_ptr, + mp_size_t s1_size, mp_limb_t s2_limb) + Calculate res+s1*s2 and put result back in res; return carry. */ +ENTRY(__mpn_addmul_1) + mtctr %r5 + + lwz %r0,0(%r4) + mullw %r7,%r0,%r6 + mulhwu %r10,%r0,%r6 + lwz %r9,0(%r3) + addc %r8,%r7,%r9 + addi %r3,%r3,-4 /* adjust res_ptr */ + bdz 1f + +0: lwzu %r0,4(%r4) + stwu %r8,4(%r3) + mullw %r8,%r0,%r6 + adde %r7,%r8,%r10 + mulhwu %r10,%r0,%r6 + lwz %r9,4(%r3) + addze %r10,%r10 + addc %r8,%r7,%r9 + bdnz 0b + +1: stw %r8,4(%r3) + addze %r3,%r10 + blr +END(__mpn_addmul_1) diff --git a/sysdeps/powerpc/addmul_1.s b/sysdeps/powerpc/addmul_1.s deleted file mode 100644 index cf8fd2a..0000000 --- a/sysdeps/powerpc/addmul_1.s +++ /dev/null @@ -1,50 +0,0 @@ - # Multiply a limb vector by a single limb, for PowerPC. - # Copyright (C) 1993, 1994, 1995, 1997 Free Software Foundation, Inc. - # This file is part of the GNU C Library. - # - # The GNU C Library is free software; you can redistribute it and/or - # modify it under the terms of the GNU Library General Public License as - # published by the Free Software Foundation; either version 2 of the - # License, or (at your option) any later version. - # - # The GNU C Library is distributed in the hope that it will be useful, - # but WITHOUT ANY WARRANTY; without even the implied warranty of - # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU - # Library General Public License for more details. - # - # You should have received a copy of the GNU Library General Public - # License along with the GNU C Library; see the file COPYING.LIB. If not, - # write to the Free Software Foundation, Inc., 59 Temple Place - Suite 330, - # Boston, MA 02111-1307, USA. - - # mp_limb_t mpn_addmul_1 (mp_ptr res_ptr, mp_srcptr s1_ptr, - # mp_size_t s1_size, mp_limb_t s2_limb) - # Calculate res+s1*s2 and put result back in res; return carry. - - .align 2 - .globl __mpn_addmul_1 - .type __mpn_addmul_1,@function -__mpn_addmul_1: - mtctr %r5 - - lwz %r0,0(%r4) - mullw %r7,%r0,%r6 - mulhwu %r10,%r0,%r6 - lwz %r9,0(%r3) - addc %r8,%r7,%r9 - addi %r3,%r3,-4 # adjust res_ptr - bdz Lend - -Loop: lwzu %r0,4(%r4) - stwu %r8,4(%r3) - mullw %r8,%r0,%r6 - adde %r7,%r8,%r10 - mulhwu %r10,%r0,%r6 - lwz %r9,4(%r3) - addze %r10,%r10 - addc %r8,%r7,%r9 - bdnz Loop - -Lend: stw %r8,4(%r3) - addze %r3,%r10 - blr diff --git a/sysdeps/powerpc/bsd-_setjmp.S b/sysdeps/powerpc/bsd-_setjmp.S index ffd90d5..ef31f84 100644 --- a/sysdeps/powerpc/bsd-_setjmp.S +++ b/sysdeps/powerpc/bsd-_setjmp.S @@ -25,9 +25,5 @@ ENTRY (_setjmp) li %r4,0 /* Set second argument to 0. */ -#ifdef PIC - b __sigsetjmp@plt -#else - b __sigsetjmp -#endif + b JUMPTARGET(__sigsetjmp) END (_setjmp) diff --git a/sysdeps/powerpc/bsd-setjmp.S b/sysdeps/powerpc/bsd-setjmp.S index f02d781..d26b3fc 100644 --- a/sysdeps/powerpc/bsd-setjmp.S +++ b/sysdeps/powerpc/bsd-setjmp.S @@ -25,11 +25,7 @@ ENTRY (__setjmp) li %r4,1 /* Set second argument to 1. */ -#ifdef PIC - b __sigsetjmp@plt -#else - b __sigsetjmp -#endif + b JUMPTARGET(__sigsetjmp) END (__setjmp) .globl setjmp diff --git a/sysdeps/powerpc/dl-machine.h b/sysdeps/powerpc/dl-machine.h index 917e4f7..771b711 100644 --- a/sysdeps/powerpc/dl-machine.h +++ b/sysdeps/powerpc/dl-machine.h @@ -149,33 +149,34 @@ elf_machine_load_address (void) #define elf_machine_relplt elf_machine_rela /* This code is used in dl-runtime.c to call the `fixup' function - and then redirect to the address it returns. It is called - from code built in the PLT by elf_machine_runtime_setup. */ + and then redirect to the address it returns. It is called + from code built in the PLT by elf_machine_runtime_setup. */ #define ELF_MACHINE_RUNTIME_TRAMPOLINE asm ("\ .section \".text\" .align 2 .globl _dl_runtime_resolve .type _dl_runtime_resolve,@function _dl_runtime_resolve: - # We need to save the registers used to pass parameters. - # We build a stack frame to put them in. + # We need to save the registers used to pass parameters, and register 0, + # which is used by _mcount; the registers are saved in a stack frame. stwu 1,-48(1) - mflr 0 + stw 0,12(1) stw 3,16(1) stw 4,20(1) - stw 0,52(1) + # The code that calls this has put parameters for `fixup' in r12 and r11. + mr 3,12 stw 5,24(1) - # We also need to save some of the condition register fields. - mfcr 0 + mr 4,11 stw 6,28(1) + mflr 0 + # We also need to save some of the condition register fields. stw 7,32(1) + stw 0,52(1) stw 8,36(1) + mfcr 0 stw 9,40(1) stw 10,44(1) - stw 0,12(1) - # The code that calls this has put parameters for `fixup' in r12 and r11. - mr 3,12 - mr 4,11 + stw 0,8(1) bl fixup@local # 'fixup' returns the address we want to branch to. mtctr 3 @@ -184,20 +185,21 @@ _dl_runtime_resolve: lwz 10,44(1) lwz 9,40(1) mtlr 0 - lwz 0,12(1) lwz 8,36(1) + lwz 0,8(1) lwz 7,32(1) lwz 6,28(1) mtcrf 0xFF,0 lwz 5,24(1) lwz 4,20(1) lwz 3,16(1) + lwz 0,12(1) # ...unwind the stack frame, and jump to the PLT entry we updated. addi 1,1,48 bctr 0: .size _dl_runtime_resolve,0b-_dl_runtime_resolve - # undo '.section text'. + # Undo '.section text'. .previous "); @@ -213,20 +215,20 @@ asm ("\ .type _start,@function _start: # We start with the following on the stack, from top: - # argc (4 bytes) - # arguments for program (terminated by NULL) - # environment variables (terminated by NULL) - # arguments for the program loader + # argc (4 bytes); + # arguments for program (terminated by NULL); + # environment variables (terminated by NULL); + # arguments for the program loader. # FIXME: perhaps this should do the same trick as elf/start.c? # Call _dl_start with one parameter pointing at argc - mr 3,1 + mr 3,1 # (we have to frob the stack pointer a bit to allow room for # _dl_start to save the link register) - li 4,0 + li 4,0 addi 1,1,-16 - stw 4,0(1) - bl _dl_start@local + stw 4,0(1) + bl _dl_start@local # Now, we do our main work of calling initialisation procedures. # The ELF ABI doesn't say anything about parameters for these, @@ -234,70 +236,72 @@ _start: # Changing these is strongly discouraged (not least because argc is # passed by value!). - # put our GOT pointer in r31 - bl _GLOBAL_OFFSET_TABLE_-4@local + # Put our GOT pointer in r31, + bl _GLOBAL_OFFSET_TABLE_-4@local mflr 31 - # the address of _start in r30 - mr 30,3 - # &_dl_argc in 29, &_dl_argv in 27, and _dl_default_scope in 28 - lwz 28,_dl_default_scope@got(31) - lwz 29,_dl_argc@got(31) - lwz 27,_dl_argv@got(31) + # the address of _start in r30, + mr 30,3 + # &_dl_argc in 29, &_dl_argv in 27, and _dl_default_scope in 28. + lwz 28,_dl_default_scope@got(31) + lwz 29,_dl_argc@got(31) + lwz 27,_dl_argv@got(31) 0: - # call initfunc = _dl_init_next(_dl_default_scope[2]) - lwz 3,8(28) - bl _dl_init_next@plt - # if initfunc is NULL, we exit the loop - mr. 0,3 - beq 1f + # Set initfunc = _dl_init_next(_dl_default_scope[2]) + lwz 3,8(28) + bl _dl_init_next@plt + # If initfunc is NULL, we exit the loop; otherwise, + cmpwi 3,0 + beq 1f # call initfunc(_dl_argc, _dl_argv, _dl_argv+_dl_argc+1) - mtlr 0 - lwz 3,0(29) - lwz 4,0(27) + mtlr 3 + lwz 3,0(29) + lwz 4,0(27) slwi 5,3,2 - add 5,4,5 + add 5,4,5 addi 5,5,4 blrl # and loop. - b 0b + b 0b 1: # Now, to conform to the ELF ABI, we have to: - # pass argv (actually _dl_argv) in r4 - lwz 4,0(27) - # pass argc (actually _dl_argc) in r3 - lwz 3,0(29) - # pass envp (actually _dl_argv+_dl_argc+1) in r5 + # Pass argc (actually _dl_argc) in r3; + lwz 3,0(29) + # pass argv (actually _dl_argv) in r4; + lwz 4,0(27) + # pass envp (actually _dl_argv+_dl_argc+1) in r5; slwi 5,3,2 - add 5,4,5 - addi 5,5,4 - # pass the auxilary vector in r6. This is passed just after _envp. - addi 6,5,-4 + add 6,4,5 + addi 5,6,4 + # pass the auxilary vector in r6. This is passed to us just after _envp. 2: lwzu 0,4(6) - cmpwi 1,0,0 - bne 2b + cmpwi 0,0,0 + bne 2b addi 6,6,4 - # pass a termination function pointer (in this case _dl_fini) in r7 - lwz 7,_dl_fini@got(31) - # now, call the start function in r30... + # Pass a termination function pointer (in this case _dl_fini) in r7. + lwz 7,_dl_fini@got(31) + # Now, call the start function in r30... mtctr 30 - # pass the stack pointer in r1 (so far so good), pointing to a NULL value - # (this lets our startup code distinguish between a program linked statically, + lwz 26,_dl_starting_up@got(31) + # Pass the stack pointer in r1 (so far so good), pointing to a NULL value. + # (This lets our startup code distinguish between a program linked statically, # which linux will call with argc on top of the stack which will hopefully # never be zero, and a dynamically linked program which will always have # a NULL on the top of the stack). # Take the opportunity to clear LR, so anyone who accidentally returns - # from _start gets SEGV. - li 0,0 - stw 0,0(1) - mtlr 0 - # and also clear _dl_starting_up - lwz 26,_dl_starting_up@got(31) - stw 0,0(26) - # go do it! + # from _start gets SEGV. Also clear the next few words of the stack. + li 31,0 + stw 31,0(1) + mtlr 31 + stw 31,4(1) + stw 31,8(1) + stw 31,12(1) + # Clear _dl_starting_up. + stw 31,0(26) + # Go do it! bctr 0: .size _start,0b-_start - # undo '.section text'. + # Undo '.section text'. .previous "); @@ -346,7 +350,7 @@ static ElfW(Addr) _dl_preferred_address = 1 /* We require the address of the PLT entry returned from fixup, not the first word of the PLT entry. */ -#define ELF_FIXUP_RETURN_VALUE(map, result) (&(result)) +#define ELF_FIXUP_RETURN_VALUE(map, result) ((Elf32_Addr) &(result)) /* Nonzero iff TYPE should not be allowed to resolve to one of the main executable's symbols, as for a COPY reloc. */ @@ -396,7 +400,7 @@ elf_machine_runtime_setup (struct link_map *map, int lazy, int profile) { if (map->l_info[DT_JMPREL]) { - int i; + Elf32_Word i; /* Fill in the PLT. Its initial contents are directed to a function earlier in the PLT which arranges for the dynamic linker to be called back. */ @@ -516,10 +520,10 @@ elf_machine_rela (struct link_map *map, const Elf32_Rela *reloc, { #ifndef RTLD_BOOTSTRAP const Elf32_Sym *const refsym = sym; + extern char **_dl_argv; #endif Elf32_Word loadbase, finaladdr; const int rinfo = ELF32_R_TYPE (reloc->r_info); - extern char **_dl_argv; if (rinfo == R_PPC_NONE) return; @@ -551,9 +555,9 @@ elf_machine_rela (struct link_map *map, const Elf32_Rela *reloc, + reloc->r_addend); } - /* This is an if/else if chain because GCC 2.7.2.[012] turns case - statements into non-PIC table lookups. When a later version - comes out that fixes this, this should be changed. */ + /* This is still an if/else if chain because GCC uses the GOT to find + the table for table-based switch statements, and we haven't set it + up yet. */ if (rinfo == R_PPC_UADDR32 || rinfo == R_PPC_GLOB_DAT || rinfo == R_PPC_ADDR32 || @@ -561,6 +565,7 @@ elf_machine_rela (struct link_map *map, const Elf32_Rela *reloc, { *reloc_addr = finaladdr; } +#ifndef RTLD_BOOTSTRAP else if (rinfo == R_PPC_ADDR16_LO) { *(Elf32_Half*) reloc_addr = finaladdr; @@ -573,7 +578,6 @@ elf_machine_rela (struct link_map *map, const Elf32_Rela *reloc, { *(Elf32_Half*) reloc_addr = (finaladdr + 0x8000) >> 16; } -#ifndef RTLD_BOOTSTRAP else if (rinfo == R_PPC_REL24) { Elf32_Sword delta = finaladdr - (Elf32_Word) (char *) reloc_addr; @@ -693,12 +697,14 @@ elf_machine_rela (struct link_map *map, const Elf32_Rela *reloc, #endif } +#ifndef RTLD_BOOTSTRAP if (rinfo == R_PPC_ADDR16_LO || rinfo == R_PPC_ADDR16_HI || rinfo == R_PPC_ADDR16_HA || rinfo == R_PPC_REL24 || rinfo == R_PPC_ADDR24) MODIFIED_CODE_NOQUEUE (reloc_addr); +#endif } #define ELF_MACHINE_NO_REL 1 diff --git a/sysdeps/powerpc/lshift.S b/sysdeps/powerpc/lshift.S new file mode 100644 index 0000000..b1487a1 --- /dev/null +++ b/sysdeps/powerpc/lshift.S @@ -0,0 +1,123 @@ +/* Shift a limb left, low level routine. + Copyright (C) 1996, 1997 Free Software Foundation, Inc. + This file is part of the GNU C Library. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Library General Public License as + published by the Free Software Foundation; either version 2 of the + License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Library General Public License for more details. + + You should have received a copy of the GNU Library General Public + License along with the GNU C Library; see the file COPYING.LIB. If not, + write to the Free Software Foundation, Inc., 59 Temple Place - Suite 330, + Boston, MA 02111-1307, USA. */ + +#include <sysdep.h> + +/* mp_limb_t mpn_lshift (mp_ptr wp, mp_srcptr up, mp_size_t usize, + unsigned int cnt) */ + +EALIGN(__mpn_lshift,3,0) + mtctr %r5 # copy size into CTR + cmplwi %cr0,%r5,16 # is size < 16 + slwi %r0,%r5,2 + add %r7,%r3,%r0 # make r7 point at end of res + add %r4,%r4,%r0 # make r4 point at end of s1 + lwzu %r11,-4(%r4) # load first s1 limb + subfic %r8,%r6,32 + srw %r3,%r11,%r8 # compute function return value + bge %cr0,L(big) # branch if size >= 16 + + bdz L(end1) + +0: lwzu %r10,-4(%r4) + slw %r9,%r11,%r6 + srw %r12,%r10,%r8 + or %r9,%r9,%r12 + stwu %r9,-4(%r7) + bdz L(end2) + lwzu %r11,-4(%r4) + slw %r9,%r10,%r6 + srw %r12,%r11,%r8 + or %r9,%r9,%r12 + stwu %r9,-4(%r7) + bdnz 0b + +L(end1):slw %r0,%r11,%r6 + stw %r0,-4(%r7) + blr + + +/* Guaranteed not to succeed. */ +L(boom): tweq %r0,%r0 + +/* We imitate a case statement, by using (yuk!) fixed-length code chunks, + of size 4*12 bytes. We have to do this (or something) to make this PIC. */ +L(big): mflr %r9 + bltl- %cr0,L(boom) # Never taken, only used to set LR. + slwi %r10,%r6,4 + mflr %r12 + add %r10,%r12,%r10 + slwi %r8,%r6,5 + add %r10,%r8,%r10 + mtctr %r10 + addi %r5,%r5,-1 + mtlr %r9 + bctr + +L(end2):slw %r0,%r10,%r6 + stw %r0,-4(%r7) + blr + +#define DO_LSHIFT(n) \ + mtctr %r5; \ +0: lwzu %r10,-4(%r4); \ + slwi %r9,%r11,n; \ + inslwi %r9,%r10,n,32-n; \ + stwu %r9,-4(%r7); \ + bdz- L(end2); \ + lwzu %r11,-4(%r4); \ + slwi %r9,%r10,n; \ + inslwi %r9,%r11,n,32-n; \ + stwu %r9,-4(%r7); \ + bdnz 0b; \ + b L(end1) + + DO_LSHIFT(1) + DO_LSHIFT(2) + DO_LSHIFT(3) + DO_LSHIFT(4) + DO_LSHIFT(5) + DO_LSHIFT(6) + DO_LSHIFT(7) + DO_LSHIFT(8) + DO_LSHIFT(9) + DO_LSHIFT(10) + DO_LSHIFT(11) + DO_LSHIFT(12) + DO_LSHIFT(13) + DO_LSHIFT(14) + DO_LSHIFT(15) + DO_LSHIFT(16) + DO_LSHIFT(17) + DO_LSHIFT(18) + DO_LSHIFT(19) + DO_LSHIFT(20) + DO_LSHIFT(21) + DO_LSHIFT(22) + DO_LSHIFT(23) + DO_LSHIFT(24) + DO_LSHIFT(25) + DO_LSHIFT(26) + DO_LSHIFT(27) + DO_LSHIFT(28) + DO_LSHIFT(29) + DO_LSHIFT(30) + DO_LSHIFT(31) + +END(__mpn_lshift) diff --git a/sysdeps/powerpc/lshift.s b/sysdeps/powerpc/lshift.s deleted file mode 100644 index 9612a3d..0000000 --- a/sysdeps/powerpc/lshift.s +++ /dev/null @@ -1,479 +0,0 @@ - # Shift a limb left, low level routine. - # Copyright (C) 1996, 1997 Free Software Foundation, Inc. - # This file is part of the GNU C Library. - # - # The GNU C Library is free software; you can redistribute it and/or - # modify it under the terms of the GNU Library General Public License as - # published by the Free Software Foundation; either version 2 of the - # License, or (at your option) any later version. - # - # The GNU C Library is distributed in the hope that it will be useful, - # but WITHOUT ANY WARRANTY; without even the implied warranty of - # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU - # Library General Public License for more details. - # - # You should have received a copy of the GNU Library General Public - # License along with the GNU C Library; see the file COPYING.LIB. If not, - # write to the Free Software Foundation, Inc., 59 Temple Place - Suite 330, - # Boston, MA 02111-1307, USA. - - # mp_limb_t mpn_lshift (mp_ptr wp, mp_srcptr up, mp_size_t usize, - # unsigned int cnt) - - .align 3 - .globl __mpn_lshift - .type __mpn_lshift,@function -__mpn_lshift: - mtctr %r5 # copy size into CTR - cmplwi %cr0,%r5,16 # is size < 16 - slwi %r0,%r5,2 - add %r7,%r3,%r0 # make r7 point at end of res - add %r4,%r4,%r0 # make r4 point at end of s1 - lwzu %r11,-4(%r4) # load first s1 limb - subfic %r8,%r6,32 - srw %r3,%r11,%r8 # compute function return value - bge %cr0,Lbig # branch if size >= 16 - - bdz Lend1 - -Loop: lwzu %r10,-4(%r4) - slw %r9,%r11,%r6 - srw %r12,%r10,%r8 - or %r9,%r9,%r12 - stwu %r9,-4(%r7) - bdz Lend2 - lwzu %r11,-4(%r4) - slw %r9,%r10,%r6 - srw %r12,%r11,%r8 - or %r9,%r9,%r12 - stwu %r9,-4(%r7) - bdnz Loop - b Lend1 - - # Guaranteed not to succeed. -LBoom: tweq %r0,%r0 - - # We imitate a case statement, by using (yuk!) fixed-length code chunks, - # of size 4*12 bytes. We have to do this (or something) to make this PIC. -Lbig: mflr %r9 - bltl %cr0,LBoom # Never taken, only used to set LR. - slwi %r10,%r6,4 - mflr %r12 - add %r10,%r12,%r10 - slwi %r8,%r6,5 - add %r10,%r8,%r10 - mtctr %r10 - addi %r5,%r5,-1 - mtlr %r9 - bctr - -Lend1: slw %r0,%r11,%r6 - stw %r0,-4(%r7) - blr - - mtctr %r5 -Loop1: lwzu %r10,-4(%r4) - slwi %r9,%r11,1 - inslwi %r9,%r10,1,31 - stwu %r9,-4(%r7) - bdz Lend2 - lwzu %r11,-4(%r4) - slwi %r9,%r10,1 - inslwi %r9,%r11,1,31 - stwu %r9,-4(%r7) - bdnz Loop1 - b Lend1 - - mtctr %r5 -Loop2: lwzu %r10,-4(%r4) - slwi %r9,%r11,2 - inslwi %r9,%r10,2,30 - stwu %r9,-4(%r7) - bdz Lend2 - lwzu %r11,-4(%r4) - slwi %r9,%r10,2 - inslwi %r9,%r11,2,30 - stwu %r9,-4(%r7) - bdnz Loop2 - b Lend1 - - mtctr %r5 -Loop3: lwzu %r10,-4(%r4) - slwi %r9,%r11,3 - inslwi %r9,%r10,3,29 - stwu %r9,-4(%r7) - bdz Lend2 - lwzu %r11,-4(%r4) - slwi %r9,%r10,3 - inslwi %r9,%r11,3,29 - stwu %r9,-4(%r7) - bdnz Loop3 - b Lend1 - - mtctr %r5 -Loop4: lwzu %r10,-4(%r4) - slwi %r9,%r11,4 - inslwi %r9,%r10,4,28 - stwu %r9,-4(%r7) - bdz Lend2 - lwzu %r11,-4(%r4) - slwi %r9,%r10,4 - inslwi %r9,%r11,4,28 - stwu %r9,-4(%r7) - bdnz Loop4 - b Lend1 - - mtctr %r5 -Loop5: lwzu %r10,-4(%r4) - slwi %r9,%r11,5 - inslwi %r9,%r10,5,27 - stwu %r9,-4(%r7) - bdz Lend2 - lwzu %r11,-4(%r4) - slwi %r9,%r10,5 - inslwi %r9,%r11,5,27 - stwu %r9,-4(%r7) - bdnz Loop5 - b Lend1 - - mtctr %r5 -Loop6: lwzu %r10,-4(%r4) - slwi %r9,%r11,6 - inslwi %r9,%r10,6,26 - stwu %r9,-4(%r7) - bdz Lend2 - lwzu %r11,-4(%r4) - slwi %r9,%r10,6 - inslwi %r9,%r11,6,26 - stwu %r9,-4(%r7) - bdnz Loop6 - b Lend1 - - mtctr %r5 -Loop7: lwzu %r10,-4(%r4) - slwi %r9,%r11,7 - inslwi %r9,%r10,7,25 - stwu %r9,-4(%r7) - bdz Lend2 - lwzu %r11,-4(%r4) - slwi %r9,%r10,7 - inslwi %r9,%r11,7,25 - stwu %r9,-4(%r7) - bdnz Loop7 - b Lend1 - - mtctr %r5 -Loop8: lwzu %r10,-4(%r4) - slwi %r9,%r11,8 - inslwi %r9,%r10,8,24 - stwu %r9,-4(%r7) - bdz Lend2 - lwzu %r11,-4(%r4) - slwi %r9,%r10,8 - inslwi %r9,%r11,8,24 - stwu %r9,-4(%r7) - bdnz Loop8 - b Lend1 - - mtctr %r5 -Loop9: lwzu %r10,-4(%r4) - slwi %r9,%r11,9 - inslwi %r9,%r10,9,23 - stwu %r9,-4(%r7) - bdz Lend2 - lwzu %r11,-4(%r4) - slwi %r9,%r10,9 - inslwi %r9,%r11,9,23 - stwu %r9,-4(%r7) - bdnz Loop9 - b Lend1 - - mtctr %r5 -Loop10: lwzu %r10,-4(%r4) - slwi %r9,%r11,10 - inslwi %r9,%r10,10,22 - stwu %r9,-4(%r7) - bdz Lend2 - lwzu %r11,-4(%r4) - slwi %r9,%r10,10 - inslwi %r9,%r11,10,22 - stwu %r9,-4(%r7) - bdnz Loop10 - b Lend1 - - mtctr %r5 -Loop11: lwzu %r10,-4(%r4) - slwi %r9,%r11,11 - inslwi %r9,%r10,11,21 - stwu %r9,-4(%r7) - bdz Lend2 - lwzu %r11,-4(%r4) - slwi %r9,%r10,11 - inslwi %r9,%r11,11,21 - stwu %r9,-4(%r7) - bdnz Loop11 - b Lend1 - - mtctr %r5 -Loop12: lwzu %r10,-4(%r4) - slwi %r9,%r11,12 - inslwi %r9,%r10,12,20 - stwu %r9,-4(%r7) - bdz Lend2 - lwzu %r11,-4(%r4) - slwi %r9,%r10,12 - inslwi %r9,%r11,12,20 - stwu %r9,-4(%r7) - bdnz Loop12 - b Lend1 - - mtctr %r5 -Loop13: lwzu %r10,-4(%r4) - slwi %r9,%r11,13 - inslwi %r9,%r10,13,19 - stwu %r9,-4(%r7) - bdz Lend2 - lwzu %r11,-4(%r4) - slwi %r9,%r10,13 - inslwi %r9,%r11,13,19 - stwu %r9,-4(%r7) - bdnz Loop13 - b Lend1 - - mtctr %r5 -Loop14: lwzu %r10,-4(%r4) - slwi %r9,%r11,14 - inslwi %r9,%r10,14,18 - stwu %r9,-4(%r7) - bdz Lend2 - lwzu %r11,-4(%r4) - slwi %r9,%r10,14 - inslwi %r9,%r11,14,18 - stwu %r9,-4(%r7) - bdnz Loop14 - b Lend1 - - mtctr %r5 -Loop15: lwzu %r10,-4(%r4) - slwi %r9,%r11,15 - inslwi %r9,%r10,15,17 - stwu %r9,-4(%r7) - bdz Lend2 - lwzu %r11,-4(%r4) - slwi %r9,%r10,15 - inslwi %r9,%r11,15,17 - stwu %r9,-4(%r7) - bdnz Loop15 - b Lend1 - - mtctr %r5 -Loop16: lwzu %r10,-4(%r4) - slwi %r9,%r11,16 - inslwi %r9,%r10,16,16 - stwu %r9,-4(%r7) - bdz Lend2 - lwzu %r11,-4(%r4) - slwi %r9,%r10,16 - inslwi %r9,%r11,16,16 - stwu %r9,-4(%r7) - bdnz Loop16 - b Lend1 - - mtctr %r5 -Loop17: lwzu %r10,-4(%r4) - slwi %r9,%r11,17 - inslwi %r9,%r10,17,15 - stwu %r9,-4(%r7) - bdz Lend2 - lwzu %r11,-4(%r4) - slwi %r9,%r10,17 - inslwi %r9,%r11,17,15 - stwu %r9,-4(%r7) - bdnz Loop17 - b Lend1 - - mtctr %r5 -Loop18: lwzu %r10,-4(%r4) - slwi %r9,%r11,18 - inslwi %r9,%r10,18,14 - stwu %r9,-4(%r7) - bdz Lend2 - lwzu %r11,-4(%r4) - slwi %r9,%r10,18 - inslwi %r9,%r11,18,14 - stwu %r9,-4(%r7) - bdnz Loop18 - b Lend1 - - mtctr %r5 -Loop19: lwzu %r10,-4(%r4) - slwi %r9,%r11,19 - inslwi %r9,%r10,19,13 - stwu %r9,-4(%r7) - bdz Lend2 - lwzu %r11,-4(%r4) - slwi %r9,%r10,19 - inslwi %r9,%r11,19,13 - stwu %r9,-4(%r7) - bdnz Loop19 - b Lend1 - - mtctr %r5 -Loop20: lwzu %r10,-4(%r4) - slwi %r9,%r11,20 - inslwi %r9,%r10,20,12 - stwu %r9,-4(%r7) - bdz Lend2 - lwzu %r11,-4(%r4) - slwi %r9,%r10,20 - inslwi %r9,%r11,20,12 - stwu %r9,-4(%r7) - bdnz Loop20 - b Lend1 - - mtctr %r5 -Loop21: lwzu %r10,-4(%r4) - slwi %r9,%r11,21 - inslwi %r9,%r10,21,11 - stwu %r9,-4(%r7) - bdz Lend2 - lwzu %r11,-4(%r4) - slwi %r9,%r10,21 - inslwi %r9,%r11,21,11 - stwu %r9,-4(%r7) - bdnz Loop21 - b Lend1 - - mtctr %r5 -Loop22: lwzu %r10,-4(%r4) - slwi %r9,%r11,22 - inslwi %r9,%r10,22,10 - stwu %r9,-4(%r7) - bdz Lend2 - lwzu %r11,-4(%r4) - slwi %r9,%r10,22 - inslwi %r9,%r11,22,10 - stwu %r9,-4(%r7) - bdnz Loop22 - b Lend1 - - mtctr %r5 -Loop23: lwzu %r10,-4(%r4) - slwi %r9,%r11,23 - inslwi %r9,%r10,23,9 - stwu %r9,-4(%r7) - bdz Lend2 - lwzu %r11,-4(%r4) - slwi %r9,%r10,23 - inslwi %r9,%r11,23,9 - stwu %r9,-4(%r7) - bdnz Loop23 - b Lend1 - - mtctr %r5 -Loop24: lwzu %r10,-4(%r4) - slwi %r9,%r11,24 - inslwi %r9,%r10,24,8 - stwu %r9,-4(%r7) - bdz Lend2 - lwzu %r11,-4(%r4) - slwi %r9,%r10,24 - inslwi %r9,%r11,24,8 - stwu %r9,-4(%r7) - bdnz Loop24 - b Lend1 - - mtctr %r5 -Loop25: lwzu %r10,-4(%r4) - slwi %r9,%r11,25 - inslwi %r9,%r10,25,7 - stwu %r9,-4(%r7) - bdz Lend2 - lwzu %r11,-4(%r4) - slwi %r9,%r10,25 - inslwi %r9,%r11,25,7 - stwu %r9,-4(%r7) - bdnz Loop25 - b Lend1 - - mtctr %r5 -Loop26: lwzu %r10,-4(%r4) - slwi %r9,%r11,26 - inslwi %r9,%r10,26,6 - stwu %r9,-4(%r7) - bdz Lend2 - lwzu %r11,-4(%r4) - slwi %r9,%r10,26 - inslwi %r9,%r11,26,6 - stwu %r9,-4(%r7) - bdnz Loop26 - b Lend1 - - mtctr %r5 -Loop27: lwzu %r10,-4(%r4) - slwi %r9,%r11,27 - inslwi %r9,%r10,27,5 - stwu %r9,-4(%r7) - bdz Lend2 - lwzu %r11,-4(%r4) - slwi %r9,%r10,27 - inslwi %r9,%r11,27,5 - stwu %r9,-4(%r7) - bdnz Loop27 - b Lend1 - - mtctr %r5 -Loop28: lwzu %r10,-4(%r4) - slwi %r9,%r11,28 - inslwi %r9,%r10,28,4 - stwu %r9,-4(%r7) - bdz Lend2 - lwzu %r11,-4(%r4) - slwi %r9,%r10,28 - inslwi %r9,%r11,28,4 - stwu %r9,-4(%r7) - bdnz Loop28 - b Lend1 - - mtctr %r5 -Loop29: lwzu %r10,-4(%r4) - slwi %r9,%r11,29 - inslwi %r9,%r10,29,3 - stwu %r9,-4(%r7) - bdz Lend2 - lwzu %r11,-4(%r4) - slwi %r9,%r10,29 - inslwi %r9,%r11,29,3 - stwu %r9,-4(%r7) - bdnz Loop29 - b Lend1 - - mtctr %r5 -Loop30: lwzu %r10,-4(%r4) - slwi %r9,%r11,30 - inslwi %r9,%r10,30,2 - stwu %r9,-4(%r7) - bdz Lend2 - lwzu %r11,-4(%r4) - slwi %r9,%r10,30 - inslwi %r9,%r11,30,2 - stwu %r9,-4(%r7) - bdnz Loop30 - b Lend1 - - mtctr %r5 -Loop31: lwzu %r10,-4(%r4) - slwi %r9,%r11,31 - inslwi %r9,%r10,31,1 - stwu %r9,-4(%r7) - bdz Lend2 - lwzu %r11,-4(%r4) - slwi %r9,%r10,31 - inslwi %r9,%r11,31,1 - stwu %r9,-4(%r7) - bdnz Loop31 - b Lend1 - -Lend2: slw %r0,%r10,%r6 - stw %r0,-4(%r7) - blr diff --git a/sysdeps/powerpc/machine-gmon.h b/sysdeps/powerpc/machine-gmon.h new file mode 100644 index 0000000..ba53807 --- /dev/null +++ b/sysdeps/powerpc/machine-gmon.h @@ -0,0 +1,32 @@ +/* PowerPC-specific implementation of profiling support. + Copyright (C) 1997 Free Software Foundation, Inc. + This file is part of the GNU C Library. + Contributed by Ulrich Drepper <drepper@cygnus.com>, 1997. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Library General Public License as + published by the Free Software Foundation; either version 2 of the + License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Library General Public License for more details. + + You should have received a copy of the GNU Library General Public + License along with the GNU C Library; see the file COPYING.LIB. If not, + write to the Free Software Foundation, Inc., 59 Temple Place - Suite 330, + Boston, MA 02111-1307, USA. */ + +/* We need a special version of the `mcount' function because it has + to preserve more registers than your usual function. */ + +void __mcount_internal (unsigned long frompc, unsigned long selfpc); + +#define _MCOUNT_DECL(frompc, selfpc) \ +void __mcount_internal (unsigned long frompc, unsigned long selfpc) + + +/* Define MCOUNT as empty since we have the implementation in another + file. */ +#define MCOUNT diff --git a/sysdeps/powerpc/memset.S b/sysdeps/powerpc/memset.S new file mode 100644 index 0000000..6ac32dd --- /dev/null +++ b/sysdeps/powerpc/memset.S @@ -0,0 +1,199 @@ +/* Optimized memset implementation for PowerPC. + Copyright (C) 1997 Free Software Foundation, Inc. + This file is part of the GNU C Library. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Library General Public License as + published by the Free Software Foundation; either version 2 of the + License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Library General Public License for more details. + + You should have received a copy of the GNU Library General Public + License along with the GNU C Library; see the file COPYING.LIB. If not, + write to the Free Software Foundation, Inc., 59 Temple Place - Suite 330, + Boston, MA 02111-1307, USA. */ + +#include <sysdep.h> + +EALIGN(memset,5,1) +/* __ptr_t [r3] memset (__ptr_t s [r3], int c [r4], size_t n [r5])); + Returns 's'. + + The memset is done in three sizes: byte (8 bits), word (32 bits), + cache line (256 bits). There is a special case for setting cache lines + to 0, to take advantage of the dcbz instruction. + r6: current address we are storing at + r7: number of bytes we are setting now (when aligning) */ + +/* take care of case for size <= 4 */ + cmplwi %cr1,%r5,4 + andi. %r7,%r3,3 + mr %r6,%r3 + ble- %cr1,L(small) +/* align to word boundary */ + cmplwi %cr5,%r5,31 + rlwimi %r4,%r4,8,16,23 + beq+ L(aligned) # 8th instruction from .align + mtcrf 0x01,%r3 + subfic %r7,%r7,4 + add %r6,%r6,%r7 + sub %r5,%r5,%r7 + bf+ 31,0f + stb %r4,0(%r3) + bt 30,L(aligned) +0: sth %r4,-2(%r6) # 16th instruction from .align +/* take care of case for size < 31 */ +L(aligned): + mtcrf 0x01,%r5 + rlwimi %r4,%r4,16,0,15 + ble %cr5,L(medium) +/* align to cache line boundary... */ + andi. %r7,%r6,0x1C + subfic %r7,%r7,0x20 + beq L(caligned) + mtcrf 0x01,%r7 + add %r6,%r6,%r7 + sub %r5,%r5,%r7 + cmplwi %cr1,%r7,0x10 + mr %r8,%r6 + bf 28,1f + stw %r4,-4(%r8) + stwu %r4,-8(%r8) +1: blt %cr1,2f + stw %r4,-4(%r8) # 32nd instruction from .align + stw %r4,-8(%r8) + stw %r4,-12(%r8) + stwu %r4,-16(%r8) +2: bf 29,L(caligned) + stw %r4,-4(%r8) +/* now aligned to a cache line. */ +L(caligned): + cmplwi %cr1,%r4,0 + clrrwi. %r7,%r5,5 + mtcrf 0x01,%r5 # 40th instruction from .align + beq %cr1,L(zloopstart) # special case for clearing memory using dcbz + srwi %r0,%r7,5 + mtctr %r0 + beq L(medium) # we may not actually get to do a full line + clrlwi. %r5,%r5,27 + add %r6,%r6,%r7 +0: li %r8,-0x40 + bdz L(cloopdone) # 48th instruction from .align + +3: dcbz %r8,%r6 + stw %r4,-4(%r6) + stw %r4,-8(%r6) + stw %r4,-12(%r6) + stw %r4,-16(%r6) + nop # let 601 fetch last 4 instructions of loop + stw %r4,-20(%r6) + stw %r4,-24(%r6) # 56th instruction from .align + nop # let 601 fetch first 8 instructions of loop + stw %r4,-28(%r6) + stwu %r4,-32(%r6) + bdnz 3b +L(cloopdone): + stw %r4,-4(%r6) + stw %r4,-8(%r6) + stw %r4,-12(%r6) + stw %r4,-16(%r6) # 64th instruction from .align + stw %r4,-20(%r6) + cmplwi %cr1,%r5,16 + stw %r4,-24(%r6) + stw %r4,-28(%r6) + stwu %r4,-32(%r6) + beqlr + add %r6,%r6,%r7 + b L(medium_tail2) # 72nd instruction from .align + + .align 5 + nop +/* Clear lines of memory in 128-byte chunks. */ +L(zloopstart): + clrlwi %r5,%r5,27 + mtcrf 0x02,%r7 + srwi. %r0,%r7,7 + mtctr %r0 + li %r7,0x20 + li %r8,-0x40 + cmplwi %cr1,%r5,16 # 8 + bf 26,0f + dcbz 0,%r6 + addi %r6,%r6,0x20 +0: li %r9,-0x20 + bf 25,1f + dcbz 0,%r6 + dcbz %r7,%r6 + addi %r6,%r6,0x40 # 16 +1: cmplwi %cr5,%r5,0 + beq L(medium) +L(zloop): + dcbz 0,%r6 + dcbz %r7,%r6 + addi %r6,%r6,0x80 + dcbz %r8,%r6 + dcbz %r9,%r6 + bdnz L(zloop) + beqlr %cr5 + b L(medium_tail2) + + .align 5 +L(small): +/* Memset of 4 bytes or less. */ + cmplwi %cr5,%r5,1 + cmplwi %cr1,%r5,3 + bltlr %cr5 + stb %r4,0(%r6) + beqlr %cr5 + nop + stb %r4,1(%r6) + bltlr %cr1 + stb %r4,2(%r6) + beqlr %cr1 + nop + stb %r4,3(%r6) + blr + +/* Memset of 0-31 bytes. */ + .align 5 +L(medium): + cmplwi %cr1,%r5,16 +L(medium_tail2): + add %r6,%r6,%r5 +L(medium_tail): + bt- 31,L(medium_31t) + bt- 30,L(medium_30t) +L(medium_30f): + bt- 29,L(medium_29t) +L(medium_29f): + bge- %cr1,L(medium_27t) + bflr- 28 + stw %r4,-4(%r6) # 8th instruction from .align + stw %r4,-8(%r6) + blr + +L(medium_31t): + stbu %r4,-1(%r6) + bf- 30,L(medium_30f) +L(medium_30t): + sthu %r4,-2(%r6) + bf- 29,L(medium_29f) +L(medium_29t): + stwu %r4,-4(%r6) + blt- %cr1,L(medium_27f) # 16th instruction from .align +L(medium_27t): + stw %r4,-4(%r6) + stw %r4,-8(%r6) + stw %r4,-12(%r6) + stwu %r4,-16(%r6) +L(medium_27f): + bflr- 28 +L(medium_28t): + stw %r4,-4(%r6) + stw %r4,-8(%r6) + blr +END(memset) diff --git a/sysdeps/powerpc/memset.s b/sysdeps/powerpc/memset.s deleted file mode 100644 index 4c8bf8c..0000000 --- a/sysdeps/powerpc/memset.s +++ /dev/null @@ -1,202 +0,0 @@ - # Optimized memset implementation for PowerPC. - # Copyright (C) 1997 Free Software Foundation, Inc. - # This file is part of the GNU C Library. - # - # The GNU C Library is free software; you can redistribute it and/or - # modify it under the terms of the GNU Library General Public License as - # published by the Free Software Foundation; either version 2 of the - # License, or (at your option) any later version. - # - # The GNU C Library is distributed in the hope that it will be useful, - # but WITHOUT ANY WARRANTY; without even the implied warranty of - # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU - # Library General Public License for more details. - # - # You should have received a copy of the GNU Library General Public - # License along with the GNU C Library; see the file COPYING.LIB. If not, - # write to the Free Software Foundation, Inc., 59 Temple Place - Suite 330, - # Boston, MA 02111-1307, USA. - - .section ".text" - .align 5 - nop - - .globl memset - .type memset,@function -memset: - # __ptr_t [r3] memset (__ptr_t s [r3], int c [r4], size_t n [r5])); - # Returns 's'. - - # The memset is done in three sizes: byte (8 bits), word (32 bits), - # cache line (256 bits). There is a special case for setting cache lines - # to 0, to take advantage of the dcbz instruction. - # r6: current address we are storing at - # r7: number of bytes we are setting now (when aligning) - - # take care of case for size <= 4 - cmplwi %cr1,%r5,4 - andi. %r7,%r3,3 - mr %r6,%r3 - ble- %cr1,small - # align to word boundary - cmplwi %cr5,%r5,31 - rlwimi %r4,%r4,8,16,23 - beq+ aligned # 8th instruction from .align - mtcrf 0x01,%r3 - subfic %r7,%r7,4 - add %r6,%r6,%r7 - sub %r5,%r5,%r7 - bf+ 31,0f - stb %r4,0(%r3) - bt 30,aligned -0: sth %r4,-2(%r6) # 16th instruction from .align - # take care of case for size < 31 -aligned: - mtcrf 0x01,%r5 - rlwimi %r4,%r4,16,0,15 - ble %cr5,medium - # align to cache line boundary... - andi. %r7,%r6,0x1C - subfic %r7,%r7,0x20 - beq caligned - mtcrf 0x01,%r7 - add %r6,%r6,%r7 - sub %r5,%r5,%r7 - cmplwi %cr1,%r7,0x10 - mr %r8,%r6 - bf 28,1f - stw %r4,-4(%r8) - stwu %r4,-8(%r8) -1: blt %cr1,2f - stw %r4,-4(%r8) # 32nd instruction from .align - stw %r4,-8(%r8) - stw %r4,-12(%r8) - stwu %r4,-16(%r8) -2: bf 29,caligned - stw %r4,-4(%r8) - # now aligned to a cache line. -caligned: - cmplwi %cr1,%r4,0 - clrrwi. %r7,%r5,5 - mtcrf 0x01,%r5 # 40th instruction from .align - beq %cr1,zloopstart # special case for clearing memory using dcbz - srwi %r0,%r7,5 - mtctr %r0 - beq medium # we may not actually get to do a full line - clrlwi. %r5,%r5,27 - add %r6,%r6,%r7 -0: li %r8,-0x40 - bdz cloopdone # 48th instruction from .align - -cloop: dcbz %r8,%r6 - stw %r4,-4(%r6) - stw %r4,-8(%r6) - stw %r4,-12(%r6) - stw %r4,-16(%r6) - nop # let 601 fetch last 4 instructions of loop - stw %r4,-20(%r6) - stw %r4,-24(%r6) # 56th instruction from .align - nop # let 601 fetch first 8 instructions of loop - stw %r4,-28(%r6) - stwu %r4,-32(%r6) - bdnz cloop -cloopdone: - stw %r4,-4(%r6) - stw %r4,-8(%r6) - stw %r4,-12(%r6) - stw %r4,-16(%r6) # 64th instruction from .align - stw %r4,-20(%r6) - cmplwi %cr1,%r5,16 - stw %r4,-24(%r6) - stw %r4,-28(%r6) - stwu %r4,-32(%r6) - beqlr - add %r6,%r6,%r7 - b medium_tail2 # 72nd instruction from .align - - .align 5 - nop -# clear lines of memory in 128-byte chunks. -zloopstart: - clrlwi %r5,%r5,27 - mtcrf 0x02,%r7 - srwi. %r0,%r7,7 - mtctr %r0 - li %r7,0x20 - li %r8,-0x40 - cmplwi %cr1,%r5,16 # 8 - bf 26,0f - dcbz 0,%r6 - addi %r6,%r6,0x20 -0: li %r9,-0x20 - bf 25,1f - dcbz 0,%r6 - dcbz %r7,%r6 - addi %r6,%r6,0x40 # 16 -1: cmplwi %cr5,%r5,0 - beq medium -zloop: - dcbz 0,%r6 - dcbz %r7,%r6 - addi %r6,%r6,0x80 - dcbz %r8,%r6 - dcbz %r9,%r6 - bdnz zloop - beqlr %cr5 - b medium_tail2 - - .align 5 -small: - # Memset of 4 bytes or less. - cmplwi %cr5,%r5,1 - cmplwi %cr1,%r5,3 - bltlr %cr5 - stb %r4,0(%r6) - beqlr %cr5 - nop - stb %r4,1(%r6) - bltlr %cr1 - stb %r4,2(%r6) - beqlr %cr1 - nop - stb %r4,3(%r6) - blr - -# memset of 0-31 bytes - .align 5 -medium: - cmplwi %cr1,%r5,16 -medium_tail2: - add %r6,%r6,%r5 -medium_tail: - bt- 31,medium_31t - bt- 30,medium_30t -medium_30f: - bt- 29,medium_29t -medium_29f: - bge- %cr1,medium_27t - bflr- 28 - stw %r4,-4(%r6) # 8th instruction from .align - stw %r4,-8(%r6) - blr - -medium_31t: - stbu %r4,-1(%r6) - bf- 30,medium_30f -medium_30t: - sthu %r4,-2(%r6) - bf- 29,medium_29f -medium_29t: - stwu %r4,-4(%r6) - blt- %cr1,medium_27f # 16th instruction from .align -medium_27t: - stw %r4,-4(%r6) - stw %r4,-8(%r6) - stw %r4,-12(%r6) - stwu %r4,-16(%r6) -medium_27f: - bflr- 28 -medium_28t: - stw %r4,-4(%r6) - stw %r4,-8(%r6) - blr diff --git a/sysdeps/powerpc/mul_1.S b/sysdeps/powerpc/mul_1.S new file mode 100644 index 0000000..d48bd8f --- /dev/null +++ b/sysdeps/powerpc/mul_1.S @@ -0,0 +1,46 @@ +/* Multiply a limb vector by a limb, for PowerPC. + Copyright (C) 1993, 1994, 1995, 1997 Free Software Foundation, Inc. + This file is part of the GNU C Library. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Library General Public License as + published by the Free Software Foundation; either version 2 of the + License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Library General Public License for more details. + + You should have received a copy of the GNU Library General Public + License along with the GNU C Library; see the file COPYING.LIB. If not, + write to the Free Software Foundation, Inc., 59 Temple Place - Suite 330, + Boston, MA 02111-1307, USA. */ + +#include <sysdep.h> + +/* mp_limb_t mpn_mul_1 (mp_ptr res_ptr, mp_srcptr s1_ptr, + mp_size_t s1_size, mp_limb_t s2_limb) + Calculate s1*s2 and put result in res_ptr; return carry. */ + +ENTRY(__mpn_mul_1) + mtctr %r5 + + lwz %r0,0(%r4) + mullw %r7,%r0,%r6 + mulhwu %r10,%r0,%r6 + addi %r3,%r3,-4 # adjust res_ptr + addic %r5,%r5,0 # clear cy with dummy insn + bdz 1f + +0: lwzu %r0,4(%r4) + stwu %r7,4(%r3) + mullw %r8,%r0,%r6 + adde %r7,%r8,%r10 + mulhwu %r10,%r0,%r6 + bdnz 0b + +1: stw %r7,4(%r3) + addze %r3,%r10 + blr +END(__mpn_mul_1) diff --git a/sysdeps/powerpc/mul_1.s b/sysdeps/powerpc/mul_1.s deleted file mode 100644 index d6eb623..0000000 --- a/sysdeps/powerpc/mul_1.s +++ /dev/null @@ -1,47 +0,0 @@ - # Multiply a limb vector by a limb, for PowerPC. - # Copyright (C) 1993, 1994, 1995, 1997 Free Software Foundation, Inc. - # This file is part of the GNU C Library. - # - # The GNU C Library is free software; you can redistribute it and/or - # modify it under the terms of the GNU Library General Public License as - # published by the Free Software Foundation; either version 2 of the - # License, or (at your option) any later version. - # - # The GNU C Library is distributed in the hope that it will be useful, - # but WITHOUT ANY WARRANTY; without even the implied warranty of - # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU - # Library General Public License for more details. - # - # You should have received a copy of the GNU Library General Public - # License along with the GNU C Library; see the file COPYING.LIB. If not, - # write to the Free Software Foundation, Inc., 59 Temple Place - Suite 330, - # Boston, MA 02111-1307, USA. - - # mp_limb_t mpn_mul_1 (mp_ptr res_ptr, mp_srcptr s1_ptr, - # mp_size_t s1_size, mp_limb_t s2_limb) - # Calculate s1*s2 and put result in res_ptr; return carry. - - .align 2 - .globl __mpn_mul_1 - .type __mpn_mul_1,@function - -__mpn_mul_1: - mtctr %r5 - - lwz %r0,0(%r4) - mullw %r7,%r0,%r6 - mulhwu %r10,%r0,%r6 - addi %r3,%r3,-4 # adjust res_ptr - addic %r5,%r5,0 # clear cy with dummy insn - bdz Lend - -Loop: lwzu %r0,4(%r4) - stwu %r7,4(%r3) - mullw %r8,%r0,%r6 - adde %r7,%r8,%r10 - mulhwu %r10,%r0,%r6 - bdnz Loop - -Lend: stw %r7,4(%r3) - addze %r3,%r10 - blr diff --git a/sysdeps/powerpc/ppc-mcount.S b/sysdeps/powerpc/ppc-mcount.S new file mode 100644 index 0000000..06f1fcd --- /dev/null +++ b/sysdeps/powerpc/ppc-mcount.S @@ -0,0 +1,84 @@ +/* PowerPC-specific implementation of profiling support. + Copyright (C) 1997 Free Software Foundation, Inc. + This file is part of the GNU C Library. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Library General Public License as + published by the Free Software Foundation; either version 2 of the + License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Library General Public License for more details. + + You should have received a copy of the GNU Library General Public + License along with the GNU C Library; see the file COPYING.LIB. If not, + write to the Free Software Foundation, Inc., 59 Temple Place - Suite 330, + Boston, MA 02111-1307, USA. */ + +/* This would be bad. */ +#ifdef PROF +#undef PROF +#endif + +#include <sysdep.h> + +/* We do profiling as described in the SYSV ELF ABI, _mcount is called + with the address of a data word in r0 (that is different for every + routine, initialised to 0, and otherwise unused). The caller has put + the address the caller will return to in the usual place on the stack, + 4(%r1). _mcount is responsible for ensuring that when it returns no + argument-passing registers are disturbed, and that the LR is set back + to (what the caller sees as) 4(%r1). + + This is intended so that the following code can be inserted at the + front of any routine without changing the routine: + + .data + .align 2 + 0: .long 0 + .previous + mflr %r0 + lis %r11,0b@ha + stw %r0,4(%r1) + addi %r0,%r11,0b@l + bl _mcount +*/ + +ENTRY(_mcount) + stwu %r1,-48(%r1) +/* We need to save the parameter-passing registers. */ + stw %r3, 12(%r1) + stw %r4, 16(%r1) + stw %r5, 20(%r1) + stw %r6, 24(%r1) + mflr %r4 + lwz %r3, 52(%r1) + mfcr %r5 + stw %r7, 28(%r1) + stw %r8, 32(%r1) + stw %r9, 36(%r1) + stw %r10,40(%r1) + stw %r4, 44(%r1) + stw %r5, 8(%r1) + bl JUMPTARGET(__mcount_internal) + /* Restore the registers... */ + lwz %r6, 8(%r1) + lwz %r0, 44(%r1) + lwz %r3, 12(%r1) + mtctr %r0 + lwz %r4, 16(%r1) + mtcrf 0xff,%r6 + lwz %r5, 20(%r1) + lwz %r6, 24(%r1) + lwz %r0, 52(%r1) + lwz %r7, 28(%r1) + lwz %r8, 32(%r1) + mtlr %r0 + lwz %r9, 36(%r1) + lwz %r10,40(%r1) + /* ...unwind the stack frame, and return to your usual programming. */ + addi %r1,%r1,48 + bctr +END(_mcount) diff --git a/sysdeps/powerpc/rshift.S b/sysdeps/powerpc/rshift.S new file mode 100644 index 0000000..eb1f562 --- /dev/null +++ b/sysdeps/powerpc/rshift.S @@ -0,0 +1,56 @@ +/* Shift a limb right, low level routine. + Copyright (C) 1995, 1997 Free Software Foundation, Inc. + This file is part of the GNU C Library. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Library General Public License as + published by the Free Software Foundation; either version 2 of the + License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Library General Public License for more details. + + You should have received a copy of the GNU Library General Public + License along with the GNU C Library; see the file COPYING.LIB. If not, + write to the Free Software Foundation, Inc., 59 Temple Place - Suite 330, + Boston, MA 02111-1307, USA. */ + +#include <sysdep.h> + +/* INPUT PARAMETERS + res_ptr r3 + s1_ptr r4 + size r5 + cnt r6 */ + +ENTRY(__mpn_rshift) + mtctr 5 # copy size into CTR + addi 7,3,-4 # move adjusted res_ptr to free return reg + subfic 8,6,32 + lwz 11,0(4) # load first s1 limb + slw 3,11,8 # compute function return value + bdz 1f + +0: lwzu 10,4(4) + srw 9,11,6 + slw 12,10,8 + or 9,9,12 + stwu 9,4(7) + bdz 2f + lwzu 11,4(4) + srw 9,10,6 + slw 12,11,8 + or 9,9,12 + stwu 9,4(7) + bdnz 0b + +1: srw 0,11,6 + stw 0,4(7) + blr + +2: srw 0,10,6 + stw 0,4(7) + blr +END(__mpn_rshift) diff --git a/sysdeps/powerpc/rshift.s b/sysdeps/powerpc/rshift.s deleted file mode 100644 index 20f09ad..0000000 --- a/sysdeps/powerpc/rshift.s +++ /dev/null @@ -1,59 +0,0 @@ -# PowerPC-32 __mpn_rshift -- - -# Copyright (C) 1995 Free Software Foundation, Inc. - -# This file is part of the GNU MP Library. - -# The GNU MP Library is free software; you can redistribute it and/or modify -# it under the terms of the GNU Library General Public License as published by -# the Free Software Foundation; either version 2 of the License, or (at your -# option) any later version. - -# The GNU MP Library is distributed in the hope that it will be useful, but -# WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY -# or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Library General Public -# License for more details. - -# You should have received a copy of the GNU Library General Public License -# along with the GNU MP Library; see the file COPYING.LIB. If not, write to -# the Free Software Foundation, Inc., 59 Temple Place - Suite 330, Boston, -# MA 02111-1307, USA. - - -# INPUT PARAMETERS -# res_ptr r3 -# s1_ptr r4 -# size r5 -# cnt r6 - - .align 3 - .globl __mpn_rshift - .type __mpn_rshift,@function -__mpn_rshift: - mtctr 5 # copy size into CTR - addi 7,3,-4 # move adjusted res_ptr to free return reg - subfic 8,6,32 - lwz 11,0(4) # load first s1 limb - slw 3,11,8 # compute function return value - bdz Lend1 - -Loop: lwzu 10,4(4) - srw 9,11,6 - slw 12,10,8 - or 9,9,12 - stwu 9,4(7) - bdz Lend2 - lwzu 11,4(4) - srw 9,10,6 - slw 12,11,8 - or 9,9,12 - stwu 9,4(7) - bdnz Loop - -Lend1: srw 0,11,6 - stw 0,4(7) - blr - -Lend2: srw 0,10,6 - stw 0,4(7) - blr diff --git a/sysdeps/powerpc/s_copysign.S b/sysdeps/powerpc/s_copysign.S index adc7df2..6d5ba82 100644 --- a/sysdeps/powerpc/s_copysign.S +++ b/sysdeps/powerpc/s_copysign.S @@ -1,17 +1,17 @@ /* Copy a sign bit between floating-point values. Copyright (C) 1997 Free Software Foundation, Inc. This file is part of the GNU C Library. - + The GNU C Library is free software; you can redistribute it and/or modify it under the terms of the GNU Library General Public License as published by the Free Software Foundation; either version 2 of the License, or (at your option) any later version. - + The GNU C Library is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Library General Public License for more details. - + You should have received a copy of the GNU Library General Public License along with the GNU C Library; see the file COPYING.LIB. If not, write to the Free Software Foundation, Inc., 59 Temple Place - Suite 330, @@ -20,15 +20,12 @@ /* This has been coded in assembler because GCC makes such a mess of it when it's coded in C. */ - .section ".text" - .align 2 - .globl __copysign - .type __copysign,@function -__copysign: +#include <sysdep.h> + +ENTRY(__copysign) /* double [f1] copysign (double [f1] x, double [f2] y); copysign(x,y) returns a value with the magnitude of x and with the sign bit of y. */ - stwu %r1,-16(%r1) stfd %f2,8(%r1) lwz %r3,8(%r1) @@ -39,22 +36,15 @@ __copysign: blr 0: fnabs %f1,%f1 blr -0: - .size __copysign,0b-__copysign - - .globl copysign - .globl copysignf - .globl __copysignf - .weak copysign - .weak copysignf - .set copysign,__copysign + END (__copysign) + +weak_alias(__copysign,copysign) + /* It turns out that it's safe to use this code even for single-precision. */ - .set __copysignf,__copysign - .set copysignf,__copysign +weak_alias(__copysign,copysignf) +strong_alias(__copysign,__copysignf) + #ifdef NO_LONG_DOUBLE - .globl copysignl - .globl __copysignl - .weak copysignl - .set __copysignl,__copysign - .set copysignl,__copysign +weak_alias(__copysign,copysignl) +strong_alias(__copysign,__copysignl) #endif diff --git a/sysdeps/powerpc/s_fabs.S b/sysdeps/powerpc/s_fabs.S index a527335..3c6374b 100644 --- a/sysdeps/powerpc/s_fabs.S +++ b/sysdeps/powerpc/s_fabs.S @@ -1,42 +1,37 @@ /* Floating-point absolute value. PowerPC version. Copyright (C) 1997 Free Software Foundation, Inc. This file is part of the GNU C Library. - + The GNU C Library is free software; you can redistribute it and/or modify it under the terms of the GNU Library General Public License as published by the Free Software Foundation; either version 2 of the License, or (at your option) any later version. - + The GNU C Library is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Library General Public License for more details. - + You should have received a copy of the GNU Library General Public License along with the GNU C Library; see the file COPYING.LIB. If not, write to the Free Software Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. */ - .section ".text" - .align 2 - .globl __fabs - .type __fabs,@function -__fabs: +#include <sysdep.h> + +ENTRY(__fabs) /* double [f1] fabs (double [f1] x); */ fabs %f1,%f1 blr -0: - .size __fabs,0b-__fabs +END(__fabs) + +weak_alias(__fabs,fabs) - .globl fabs,fabsf,__fabsf - .weak fabs,fabsf - .set fabs,__fabs /* It turns out that it's safe to use this code even for single-precision. */ - .set __fabsf,__fabs - .set fabsf,__fabs +strong_alias(__fabs,__fabsf) +weak_alias(__fabs,fabsf) + #ifdef NO_LONG_DOUBLE - .globl fabsl,__fabsl - .weak fabsl - .set __fabsl,__fabs - .set fabsl,__fabs +weak_alias(__fabs,__fabsl) +weak_alias(__fabs,fabsl) #endif diff --git a/sysdeps/powerpc/setjmp.S b/sysdeps/powerpc/setjmp.S index ddfea7e..8fa863f 100644 --- a/sysdeps/powerpc/setjmp.S +++ b/sysdeps/powerpc/setjmp.S @@ -62,9 +62,5 @@ ENTRY (__sigsetjmp) stfd %f30,((JB_FPRS+16*2)*4)(3) stw %r31,((JB_GPRS+17)*4)(3) stfd %f31,((JB_FPRS+17*2)*4)(3) -#ifdef PIC - b __sigjmp_save@plt -#else - b __sigjmp_save -#endif + b JUMPTARGET(__sigjmp_save) END (__sigsetjmp) diff --git a/sysdeps/powerpc/strchr.S b/sysdeps/powerpc/strchr.S new file mode 100644 index 0000000..156d4d1 --- /dev/null +++ b/sysdeps/powerpc/strchr.S @@ -0,0 +1,111 @@ +/* Optimized strchr implementation for PowerPC. + Copyright (C) 1997 Free Software Foundation, Inc. + This file is part of the GNU C Library. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Library General Public License as + published by the Free Software Foundation; either version 2 of the + License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Library General Public License for more details. + + You should have received a copy of the GNU Library General Public + License along with the GNU C Library; see the file COPYING.LIB. If not, + write to the Free Software Foundation, Inc., 59 Temple Place - Suite 330, + Boston, MA 02111-1307, USA. */ + +#include <sysdep.h> + +/* See strlen.s for comments on how this works. */ + +/* char * [r3] strchr (const char *s [r3] , int c [r4] ) + + r0: a temporary + r3: our return result. + r4: byte we're looking for, spread over the whole word + r5: the current word + r6: the constant 0xfefefeff (-0x01010101) + r7: the constant 0x7f7f7f7f + r8: pointer to the current word. + r9: a temporary + r10: the number of bits we should ignore in the first word + r11: a mask with the bits to ignore set to 0 + r12: a temporary */ +ENTRY(strchr) + rlwimi %r4,%r4,8,16,23 + li %r11,-1 + rlwimi %r4,%r4,16,0,15 + lis %r6,0xfeff + lis %r7,0x7f7f + clrrwi %r8,%r3,2 + addi %r7,%r7,0x7f7f + addi %r6,%r6,0xfffffeff + rlwinm %r10,%r3,3,27,28 +/* Test the first (partial?) word. */ + lwz %r5,0(%r8) + srw %r11,%r11,%r10 + orc %r5,%r5,%r11 + add %r0,%r6,%r5 + nor %r9,%r7,%r5 + and. %r0,%r0,%r9 + xor %r12,%r4,%r5 + orc %r12,%r12,%r11 + b L(loopentry) + +/* The loop. */ + +L(loop):lwzu %r5,4(%r8) + and. %r0,%r0,%r9 +/* Test for 0. */ + add %r0,%r6,%r5 + nor %r9,%r7,%r5 + bne L(foundit) + and. %r0,%r0,%r9 +/* Start test for the bytes we're looking for. */ + xor %r12,%r4,%r5 +L(loopentry): + add %r0,%r6,%r12 + nor %r9,%r7,%r12 + beq L(loop) +/* There is a zero byte in the word, but may also be a matching byte (either + before or after the zero byte). In fact, we may be looking for a + zero byte, in which case we return a match. We guess that this hasn't + happened, though. */ +L(missed): + and. %r0,%r0,%r9 + li %r3,0 + beqlr +/* It did happen. Decide which one was first... + I'm not sure if this is actually faster than a sequence of + rotates, compares, and branches (we use it anyway because it's shorter). */ + and %r6,%r7,%r5 + or %r11,%r7,%r5 + and %r0,%r7,%r12 + or %r10,%r7,%r12 + add %r6,%r6,%r7 + add %r0,%r0,%r7 + nor %r5,%r11,%r6 + nor %r9,%r10,%r0 + cmplw %r5,%r9 + bgtlr + cntlzw %r4,%r9 + srwi %r4,%r4,3 + add %r3,%r8,%r4 + blr + +L(foundit): + and %r0,%r7,%r12 + or %r10,%r7,%r12 + add %r0,%r0,%r7 + nor %r9,%r10,%r0 + cntlzw %r4,%r9 + subi %r8,%r8,4 + srwi %r4,%r4,3 + add %r3,%r8,%r4 + blr +END(strchr) + +weak_alias(strchr,index) diff --git a/sysdeps/powerpc/strchr.s b/sysdeps/powerpc/strchr.s deleted file mode 100644 index c1df66f..0000000 --- a/sysdeps/powerpc/strchr.s +++ /dev/null @@ -1,118 +0,0 @@ - # Optimized strchr implementation for PowerPC. - # Copyright (C) 1997 Free Software Foundation, Inc. - # This file is part of the GNU C Library. - # - # The GNU C Library is free software; you can redistribute it and/or - # modify it under the terms of the GNU Library General Public License as - # published by the Free Software Foundation; either version 2 of the - # License, or (at your option) any later version. - # - # The GNU C Library is distributed in the hope that it will be useful, - # but WITHOUT ANY WARRANTY; without even the implied warranty of - # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU - # Library General Public License for more details. - # - # You should have received a copy of the GNU Library General Public - # License along with the GNU C Library; see the file COPYING.LIB. If not, - # write to the Free Software Foundation, Inc., 59 Temple Place - Suite 330, - # Boston, MA 02111-1307, USA. - - # See strlen.s for comments on how this works. - - .section ".text" - .align 2 - .globl strchr - .type strchr,@function -strchr: - # char * [r3] strchr (const char *s [r3] , int c [r4] ) - - # r0: a temporary - # r3: our return result. - # r4: byte we're looking for, spread over the whole word - # r5: the current word - # r6: the constant 0xfefefeff (-0x01010101) - # r7: the constant 0x7f7f7f7f - # r8: pointer to the current word. - # r9: a temporary - # r10: the number of bits we should ignore in the first word - # r11: a mask with the bits to ignore set to 0 - # r12: a temporary - - rlwimi %r4,%r4,8,16,23 - li %r11,-1 - rlwimi %r4,%r4,16,0,15 - lis %r6,0xfeff - lis %r7,0x7f7f - clrrwi %r8,%r3,2 - addi %r7,%r7,0x7f7f - addi %r6,%r6,0xfffffeff - rlwinm %r10,%r3,3,27,28 - # Test the first (partial?) word. - lwz %r5,0(%r8) - srw %r11,%r11,%r10 - orc %r5,%r5,%r11 - add %r0,%r6,%r5 - nor %r9,%r7,%r5 - and. %r0,%r0,%r9 - xor %r12,%r4,%r5 - orc %r12,%r12,%r11 - b loopentry - - # The loop. - -loop: lwzu %r5,4(%r8) - and. %r0,%r0,%r9 - # Test for 0 - add %r0,%r6,%r5 - nor %r9,%r7,%r5 - bne foundit - and. %r0,%r0,%r9 - # Start test for the bytes we're looking for - xor %r12,%r4,%r5 -loopentry: - add %r0,%r6,%r12 - nor %r9,%r7,%r12 - beq loop - # There is a zero byte in the word, but may also be a matching byte (either - # before or after the zero byte). In fact, we may be looking for a - # zero byte, in which case we return a match. We guess that this hasn't - # happened, though. -missed: - and. %r0,%r0,%r9 - li %r3,0 - beqlr - # It did happen. Decide which one was first... - # I'm not sure if this is actually faster than a sequence of - # rotates, compares, and branches (we use it anyway because it's shorter). - and %r6,%r7,%r5 - or %r11,%r7,%r5 - and %r0,%r7,%r12 - or %r10,%r7,%r12 - add %r6,%r6,%r7 - add %r0,%r0,%r7 - nor %r5,%r11,%r6 - nor %r9,%r10,%r0 - cmplw %r5,%r9 - bgtlr - cntlzw %r4,%r9 - srwi %r4,%r4,3 - add %r3,%r8,%r4 - blr - -foundit: - and %r0,%r7,%r12 - or %r10,%r7,%r12 - add %r0,%r0,%r7 - nor %r9,%r10,%r0 - cntlzw %r4,%r9 - subi %r8,%r8,4 - srwi %r4,%r4,3 - add %r3,%r8,%r4 - blr - -0: - .size strchr,0b-strchr - - .globl index - .weak index - .set index,strchr diff --git a/sysdeps/powerpc/strcmp.S b/sysdeps/powerpc/strcmp.S new file mode 100644 index 0000000..9f4d134 --- /dev/null +++ b/sysdeps/powerpc/strcmp.S @@ -0,0 +1,115 @@ +/* Optimized strcmp implementation for PowerPC. + Copyright (C) 1997 Free Software Foundation, Inc. + This file is part of the GNU C Library. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Library General Public License as + published by the Free Software Foundation; either version 2 of the + License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Library General Public License for more details. + + You should have received a copy of the GNU Library General Public + License along with the GNU C Library; see the file COPYING.LIB. If not, + write to the Free Software Foundation, Inc., 59 Temple Place - Suite 330, + Boston, MA 02111-1307, USA. */ + +#include <sysdep.h> + +/* See strlen.s for comments on how the end-of-string testing works. */ + +EALIGN(strcmp,4,0) +/* int [r3] strcmp (const char *p1 [r3], const char *p2 [r4]) */ + +/* General register assignments: + r0: temporary + r3: pointer to previous word in s1 + r4: pointer to previous word in s2 + r5: current word from s1 + r6: current word from s2 + r7: 0xfefefeff + r8: 0x7f7f7f7f + r9: ~(word in s1 | 0x7f7f7f7f) */ + +/* Register assignments in the prologue: + r10: low 2 bits of p2-p1 + r11: mask to orc with r5/r6 */ + + or %r0,%r4,%r3 + clrlwi. %r0,%r0,30 + lis %r7,0xfeff + bne L(unaligned) + + lwz %r5,0(%r3) + lwz %r6,0(%r4) + lis %r8,0x7f7f + addi %r7,%r7,-0x101 + addi %r8,%r8,0x7f7f + b 1f + +0: lwzu %r5,4(%r3) + bne %cr1,L(different) + lwzu %r6,4(%r4) +1: add %r0,%r7,%r5 + nor %r9,%r8,%r5 + and. %r0,%r0,%r9 + cmpw %cr1,%r5,%r6 + beq+ 0b +L(endstring): +/* OK. We've hit the end of the string. We need to be careful that + we don't compare two strings as different because of gunk beyond + the end of the strings... */ + and %r0,%r8,%r5 + beq %cr1,L(equal) + add %r0,%r0,%r8 + xor. %r10,%r5,%r6 + andc %r9,%r9,%r0 + blt- L(highbit) + cntlzw %r10,%r10 + cntlzw %r9,%r9 + addi %r9,%r9,7 + cmpw %cr1,%r9,%r10 + sub %r3,%r5,%r6 + bgelr+ %cr1 +L(equal): + li %r3,0 + blr + +L(different): + lwz %r5,-4(%r3) + xor. %r10,%r5,%r6 + sub %r3,%r5,%r6 + bgelr+ +L(highbit): + mr %r3,%r6 + blr + + +/* Oh well. In this case, we just do a byte-by-byte comparison. */ + .align 4 +L(unaligned): + lbz %r5,0(%r3) + lbz %r6,0(%r4) + b 1f + +0: lbzu %r5,1(%r3) + bne- 4f + lbzu %r6,1(%r4) +1: cmpwi %cr1,%r5,0 + beq- %cr1,3f + cmpw %r5,%r6 + bne- 3f + lbzu %r5,1(%r3) + lbzu %r6,1(%r4) + cmpwi %cr1,%r5,0 + cmpw %r5,%r6 + bne+ %cr1,0b +3: sub %r3,%r5,%r6 + blr +4: lbz %r5,-1(%r3) + sub %r3,%r5,%r6 + blr +END(strcmp) diff --git a/sysdeps/powerpc/strcmp.s b/sysdeps/powerpc/strcmp.s deleted file mode 100644 index f901b82..0000000 --- a/sysdeps/powerpc/strcmp.s +++ /dev/null @@ -1,273 +0,0 @@ - # Optimized strcmp implementation for PowerPC. - # Copyright (C) 1997 Free Software Foundation, Inc. - # This file is part of the GNU C Library. - # - # The GNU C Library is free software; you can redistribute it and/or - # modify it under the terms of the GNU Library General Public License as - # published by the Free Software Foundation; either version 2 of the - # License, or (at your option) any later version. - # - # The GNU C Library is distributed in the hope that it will be useful, - # but WITHOUT ANY WARRANTY; without even the implied warranty of - # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU - # Library General Public License for more details. - # - # You should have received a copy of the GNU Library General Public - # License along with the GNU C Library; see the file COPYING.LIB. If not, - # write to the Free Software Foundation, Inc., 59 Temple Place - Suite 330, - # Boston, MA 02111-1307, USA. - - # See strlen.s for comments on how the end-of-string testing works. - - .section ".text" - .align 3 - .globl strcmp - .type strcmp,@function -strcmp: - # int [r3] strcmp (const char *p1 [r3], const char *p2 [r4]) - - # General register assignments: - # r0: temporary - # r3: pointer to previous word in s1 - # r4: pointer to previous word in s2 - # r5: current first word in s1 - # r6: current first word in s2 (after re-alignment) - # r7: 0xfefefeff - # r8: 0x7f7f7f7f - # r9: ~(word in s1 | 0x7f7f7f7f) - - # Register assignments in the prologue: - # r10: low 2 bits of p2-p1 - # r11: mask to orc with r5/r6 - - subf. %r10,%r4,%r3 - beq- equal - andi. %r10,%r10,3 - cmpi %cr1,%r10,2 - beq- %cr1,align2 - lis %r7,0xfeff - lis %r8,0x7f7f - addi %r8,%r8,0x7f7f - addi %r7,%r7,0xfffffeff - bgt- %cr1,align3 -strcmp3: - rlwinm %r0,%r3,3,27,28 - li %r11,-1 - srw %r11,%r11,%r0 - clrrwi %r3,%r3,2 - clrrwi %r4,%r4,2 - lwz %r5,0(%r3) - lwz %r6,0(%r4) - bne- align1 - - # The loop, case when both strings are aligned the same. - # on entry, cr1.eq must be 1. - # r10: second word in s1 - # r11: second word in s2 OR mask to orc with first two words. -align0: - andi. %r0,%r3,4 - orc %r5,%r5,%r11 - orc %r6,%r6,%r11 - beq+ a0start - add %r0,%r7,%r5 - nor %r9,%r8,%r5 - and. %r0,%r0,%r9 - cmplw %cr1,%r5,%r6 - subi %r3,%r3,4 - bne- endstringeq - subi %r4,%r4,4 - bne- %cr1,difference - -loopalign0: - lwzu %r5,8(%r3) - bne- %cr1,difference2 - lwzu %r6,8(%r4) -a0start: - add %r0,%r7,%r5 - nor %r9,%r8,%r5 - and. %r0,%r0,%r9 - cmplw %cr1,%r5,%r6 - lwz %r10,4(%r3) - bne- endstringeq - add %r0,%r7,%r10 - bne- %cr1,difference - nor %r9,%r8,%r10 - lwz %r11,4(%r4) - and. %r0,%r0,%r9 - cmplw %cr1,%r10,%r11 - beq+ loopalign0 - - mr %r5,%r10 - mr %r6,%r11 - - # fall through to... - -endstringeq: - # (like 'endstring', but an equality code is in cr1) - beq %cr1,equal -endstring: - # OK. We've hit the end of the string. We need to be careful that - # we don't compare two strings as different because of gunk beyond - # the end of the strings. We do it like this... - and %r0,%r8,%r5 - add %r0,%r0,%r8 - xor. %r10,%r5,%r6 - andc %r9,%r9,%r0 - cntlzw %r10,%r10 - cntlzw %r9,%r9 - addi %r9,%r9,7 - cmpw %cr1,%r9,%r10 - blt %cr1,equal - sub %r3,%r5,%r6 - bgelr+ - mr %r3,%r6 - blr -equal: li %r3,0 - blr - - # The loop, case when s2 is aligned 1 char behind s1. - # r10: current word in s2 (before re-alignment) - -align1: - cmpwi %cr1,%r0,0 - orc %r5,%r5,%r11 - bne %cr1,align1_123 - # When s1 is aligned to a word boundary, the startup processing is special. - slwi. %r6,%r6,24 - bne+ a1entry_0 - nor %r9,%r8,%r5 - b endstring - -align1_123: - # Otherwise (s1 not aligned to a word boundary): - mr %r10,%r6 - add %r0,%r7,%r5 - nor %r9,%r8,%r5 - and. %r0,%r0,%r9 - srwi %r6,%r6,8 - orc %r6,%r6,%r11 - cmplw %cr1,%r5,%r6 - bne- endstringeq - bne- %cr1,difference - -loopalign1: - slwi. %r6,%r10,24 - bne- %cr1,a1difference - lwzu %r5,4(%r3) - beq- endstring1 -a1entry_0: - lwzu %r10,4(%r4) -a1entry_123: - add %r0,%r7,%r5 - nor %r9,%r8,%r5 - and. %r0,%r0,%r9 - rlwimi %r6,%r10,24,8,31 - cmplw %cr1,%r5,%r6 - beq+ loopalign1 - b endstringeq - -endstring1: - srwi %r3,%r5,24 - blr - -a1difference: - lbz %r6,-1(%r4) - slwi %r6,%r6,24 - rlwimi %r6,%r10,24,8,31 - - # fall through to... - -difference: - # The idea here is that we could just return '%r5 - %r6', except - # that the result might overflow. Overflow can only happen when %r5 - # and %r6 have different signs (thus the xor), in which case we want to - # return negative iff %r6 has its high bit set so %r5 < %r6. - # A branch-free implementation of this is - # xor %r0,%r5,%r6 - # rlwinm %r0,%r0,1,31,31 - # rlwnm %r5,%r5,%r0,1,31 - # rlwnm %r6,%r6,%r0,1,31 - # sub %r3,%r5,%r6 - # blr - # but this is usually more expensive. - xor. %r0,%r5,%r6 - sub %r3,%r5,%r6 - bgelr+ - mr %r3,%r6 - blr - -difference2: - # As for 'difference', but use registers r10 and r11 instead of r5 and r6. - xor. %r0,%r10,%r11 - sub %r3,%r10,%r11 - bgelr+ - mr %r3,%r11 - blr - - # For the case when s2 is aligned 3 chars behind s1, we switch - # s1 and s2... - # r10: used by 'align2' (see below) - # r11: used by 'align2' (see below) - # r12: saved link register - # cr0.eq: must be left as 1. - -align3: mflr %r12 - mr %r0,%r3 - mr %r3,%r4 - mr %r4,%r0 - bl strcmp3 - mtlr %r12 - neg %r3,%r3 - blr - - # The loop, case when s2 and s1's alignments differ by 2 - # This is the ugly case... - # FIXME: on a 601, the loop takes 7 cycles instead of the 6 you'd expect, - # because there are too many branches. This loop should probably be - # coded like the align1 case. - -a2even: lhz %r5,0(%r3) - lhz %r6,0(%r4) - b a2entry - -align2: - andi. %r0,%r3,1 - beq+ a2even - subi %r3,%r3,1 - subi %r4,%r4,1 - lbz %r5,1(%r3) - lbz %r6,1(%r4) - cmpwi %cr0,%r5,0 - cmpw %cr1,%r5,%r6 - beq- align2end2 - lhzu %r5,2(%r3) - beq+ %cr1,a2entry1 - lbz %r5,-1(%r3) - sub %r3,%r5,%r6 - blr - -loopalign2: - cmpw %cr1,%r5,%r6 - beq- align2end2 - lhzu %r5,2(%r3) - bne- %cr1,align2different -a2entry1: - lhzu %r6,2(%r4) -a2entry: - cmpwi %cr5,%r5,0x00ff - andi. %r0,%r5,0x00ff - bgt+ %cr5,loopalign2 - -align2end: - andi. %r3,%r6,0xff00 - neg %r3,%r3 - blr - -align2different: - lhzu %r5,-2(%r3) -align2end2: - sub %r3,%r5,%r6 - blr - -0: - .size strcmp,0b-strcmp diff --git a/sysdeps/powerpc/strlen.S b/sysdeps/powerpc/strlen.S new file mode 100644 index 0000000..dc6660b --- /dev/null +++ b/sysdeps/powerpc/strlen.S @@ -0,0 +1,144 @@ +/* Optimized strlen implementation for PowerPC. + Copyright (C) 1997 Free Software Foundation, Inc. + This file is part of the GNU C Library. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Library General Public License as + published by the Free Software Foundation; either version 2 of the + License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Library General Public License for more details. + + You should have received a copy of the GNU Library General Public + License along with the GNU C Library; see the file COPYING.LIB. If not, + write to the Free Software Foundation, Inc., 59 Temple Place - Suite 330, + Boston, MA 02111-1307, USA. */ + +#include <sysdep.h> + +/* The algorithm here uses the following techniques: + + 1) Given a word 'x', we can test to see if it contains any 0 bytes + by subtracting 0x01010101, and seeing if any of the high bits of each + byte changed from 0 to 1. This works because the least significant + 0 byte must have had no incoming carry (otherwise it's not the least + significant), so it is 0x00 - 0x01 == 0xff. For all other + byte values, either they have the high bit set initially, or when + 1 is subtracted you get a value in the range 0x00-0x7f, none of which + have their high bit set. The expression here is + (x + 0xfefefeff) & ~(x | 0x7f7f7f7f), which gives 0x00000000 when + there were no 0x00 bytes in the word. + + 2) Given a word 'x', we can test to see _which_ byte was zero by + calculating ~(((x & 0x7f7f7f7f) + 0x7f7f7f7f) | x | 0x7f7f7f7f). + This produces 0x80 in each byte that was zero, and 0x00 in all + the other bytes. The '| 0x7f7f7f7f' clears the low 7 bits in each + byte, and the '| x' part ensures that bytes with the high bit set + produce 0x00. The addition will carry into the high bit of each byte + iff that byte had one of its low 7 bits set. We can then just see + which was the most significant bit set and divide by 8 to find how + many to add to the index. + This is from the book 'The PowerPC Compiler Writer's Guide', + by Steve Hoxey, Faraydon Karim, Bill Hay and Hank Warren. + + We deal with strings not aligned to a word boundary by taking the + first word and ensuring that bytes not part of the string + are treated as nonzero. To allow for memory latency, we unroll the + loop a few times, being careful to ensure that we do not read ahead + across cache line boundaries. + + Questions to answer: + 1) How long are strings passed to strlen? If they're often really long, + we should probably use cache management instructions and/or unroll the + loop more. If they're often quite short, it might be better to use + fact (2) in the inner loop than have to recalculate it. + 2) How popular are bytes with the high bit set? If they are very rare, + on some processors it might be useful to use the simpler expression + ~((x - 0x01010101) | 0x7f7f7f7f) (that is, on processors with only one + ALU), but this fails when any character has its high bit set. */ + +/* Some notes on register usage: Under the SVR4 ABI, we can use registers + 0 and 3 through 12 (so long as we don't call any procedures) without + saving them. We can also use registers 14 through 31 if we save them. + We can't use r1 (it's the stack pointer), r2 nor r13 because the user + program may expect them to hold their usual value if we get sent + a signal. Integer parameters are passed in r3 through r10. + We can use condition registers cr0, cr1, cr5, cr6, and cr7 without saving + them, the others we must save. */ + +ENTRY(strlen) +/* On entry, r3 points to the string, and it's left that way. + We use r6 to store 0xfefefeff, and r7 to store 0x7f7f7f7f. + r4 is used to keep the current index into the string; r5 holds + the number of padding bits we prepend to the string to make it + start at a word boundary. r8 holds the 'current' word. + r9-12 are temporaries. r0 is used as a temporary and for discarded + results. */ + clrrwi %r4,%r3,2 + lis %r7,0x7f7f + rlwinm %r5,%r3,3,27,28 + lwz %r8,0(%r4) + li %r9,-1 + addi %r7,%r7,0x7f7f +/* That's the setup done, now do the first pair of words. + We make an exception and use method (2) on the first two words, to reduce + overhead. */ + srw %r9,%r9,%r5 + and %r0,%r7,%r8 + or %r10,%r7,%r8 + add %r0,%r0,%r7 + nor %r0,%r10,%r0 + and. %r8,%r0,%r9 + mtcrf 0x01,%r3 + bne L(done0) + lis %r6,0xfeff + addi %r6,%r6,-0x101 +/* Are we now aligned to a doubleword boundary? */ + bt 29,L(loop) + +/* Handle second word of pair. */ + lwzu %r8,4(%r4) + and %r0,%r7,%r8 + or %r10,%r7,%r8 + add %r0,%r0,%r7 + nor. %r8,%r10,%r0 + bne L(done0) + +/* The loop. */ + +L(loop): + lwz %r8,4(%r4) + lwzu %r9,8(%r4) + add %r0,%r6,%r8 + nor %r10,%r7,%r8 + and. %r0,%r0,%r10 + add %r11,%r6,%r9 + nor %r12,%r7,%r9 + bne L(done1) + and. %r0,%r11,%r12 + beq L(loop) + + and %r0,%r7,%r9 + add %r0,%r0,%r7 + andc %r8,%r12,%r0 + b L(done0) + +L(done1): + and %r0,%r7,%r8 + subi %r4,%r4,4 + add %r0,%r0,%r7 + andc %r8,%r10,%r0 + +/* When we get to here, r4 points to the first word in the string that + contains a zero byte, and the most significant set bit in r8 is in that + byte. */ +L(done0): + cntlzw %r11,%r8 + subf %r0,%r3,%r4 + srwi %r11,%r11,3 + add %r3,%r0,%r11 + blr +END(strlen) diff --git a/sysdeps/powerpc/strlen.s b/sysdeps/powerpc/strlen.s deleted file mode 100644 index ea80977..0000000 --- a/sysdeps/powerpc/strlen.s +++ /dev/null @@ -1,144 +0,0 @@ - # Optimized strlen implementation for PowerPC. - # Copyright (C) 1997 Free Software Foundation, Inc. - # This file is part of the GNU C Library. - # - # The GNU C Library is free software; you can redistribute it and/or - # modify it under the terms of the GNU Library General Public License as - # published by the Free Software Foundation; either version 2 of the - # License, or (at your option) any later version. - # - # The GNU C Library is distributed in the hope that it will be useful, - # but WITHOUT ANY WARRANTY; without even the implied warranty of - # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU - # Library General Public License for more details. - # - # You should have received a copy of the GNU Library General Public - # License along with the GNU C Library; see the file COPYING.LIB. If not, - # write to the Free Software Foundation, Inc., 59 Temple Place - Suite 330, - # Boston, MA 02111-1307, USA. - - # The algorithm here uses the following techniques: - # - # 1) Given a word 'x', we can test to see if it contains any 0 bytes - # by subtracting 0x01010101, and seeing if any of the high bits of each - # byte changed from 0 to 1. This works because the least significant - # 0 byte must have had no incoming carry (otherwise it's not the least - # significant), so it is 0x00 - 0x01 == 0xff. For all other - # byte values, either they have the high bit set initially, or when - # 1 is subtracted you get a value in the range 0x00-0x7f, none of which - # have their high bit set. The expression here is - # (x + 0xfefefeff) & ~(x | 0x7f7f7f7f), which gives 0x00000000 when - # there were no 0x00 bytes in the word. - # - # 2) Given a word 'x', we can test to see _which_ byte was zero by - # calculating ~(((x & 0x7f7f7f7f) + 0x7f7f7f7f) | x | 0x7f7f7f7f). - # This produces 0x80 in each byte that was zero, and 0x00 in all - # the other bytes. The '| 0x7f7f7f7f' clears the low 7 bits in each - # byte, and the '| x' part ensures that bytes with the high bit set - # produce 0x00. The addition will carry into the high bit of each byte - # iff that byte had one of its low 7 bits set. We can then just see - # which was the most significant bit set and divide by 8 to find how - # many to add to the index. - # This is from the book 'The PowerPC Compiler Writer's Guide', - # by Steve Hoxey, Faraydon Karim, Bill Hay and Hank Warren. - # - # We deal with strings not aligned to a word boundary by taking the - # first word and ensuring that bytes not part of the string - # are treated as nonzero. To allow for memory latency, we unroll the - # loop a few times, being careful to ensure that we do not read ahead - # across cache line boundaries. - # - # Questions to answer: - # 1) How long are strings passed to strlen? If they're often really long, - # we should probably use cache management instructions and/or unroll the - # loop more. If they're often quite short, it might be better to use - # fact (2) in the inner loop than have to recalculate it. - # 2) How popular are bytes with the high bit set? If they are very rare, - # on some processors it might be useful to use the simpler expression - # ~((x - 0x01010101) | 0x7f7f7f7f) (that is, on processors with only one - # ALU), but this fails when any character has its high bit set. - - # Some notes on register usage: Under the SVR4 ABI, we can use registers - # 0 and 3 through 12 (so long as we don't call any procedures) without - # saving them. We can also use registers 14 through 31 if we save them. - # We can't use r1 (it's the stack pointer), r2 nor r13 because the user - # program may expect them to hold their usual value if we get sent - # a signal. Integer parameters are passed in r3 through r10. - # We can use condition registers cr0, cr1, cr5, cr6, and cr7 without saving - # them, the others we must save. - - .section ".text" - .align 2 - .globl strlen - .type strlen,@function -strlen: - # On entry, r3 points to the string, and it's left that way. - # We use r6 to store 0xfefefeff, and r7 to store 0x7f7f7f7f. - # r4 is used to keep the current index into the string; r5 holds - # the number of padding bits we prepend to the string to make it - # start at a word boundary. r8 holds the 'current' word. - # r9-12 are temporaries. r0 is used as a temporary and for discarded - # results. - clrrwi %r4,%r3,2 - lis %r7,0x7f7f - rlwinm %r5,%r3,3,27,28 - lwz %r8,0(%r4) - li %r9,-1 - addi %r7,%r7,0x7f7f - # That's the setup done, now do the first pair of words. - # We make an exception and use method (2) on the first two words, to reduce - # overhead. - srw %r9,%r9,%r5 - and %r0,%r7,%r8 - or %r10,%r7,%r8 - add %r0,%r0,%r7 - nor %r0,%r10,%r0 - and. %r8,%r0,%r9 - mtcrf 0x01,%r3 - bne done0 - lis %r6,0xfeff - addi %r6,%r6,-0x101 - # Are we now aligned to a doubleword boundary? - bt 29,loop - - # Handle second word of pair. - lwzu %r8,4(%r4) - and %r0,%r7,%r8 - or %r10,%r7,%r8 - add %r0,%r0,%r7 - nor. %r8,%r10,%r0 - bne done0 - - # The loop. - -loop: lwz %r8,4(%r4) - lwzu %r9,8(%r4) - add %r0,%r6,%r8 - nor %r10,%r7,%r8 - and. %r0,%r0,%r10 - add %r11,%r6,%r9 - nor %r12,%r7,%r9 - bne done1 - and. %r0,%r11,%r12 - beq loop - - and %r0,%r7,%r9 - add %r0,%r0,%r7 - andc %r8,%r12,%r0 - b done0 - -done1: and %r0,%r7,%r8 - subi %r4,%r4,4 - add %r0,%r0,%r7 - andc %r8,%r10,%r0 - - # When we get to here, r4 points to the first word in the string that - # contains a zero byte, and the most significant set bit in r8 is in that - # byte. -done0: cntlzw %r11,%r8 - subf %r0,%r3,%r4 - srwi %r11,%r11,3 - add %r3,%r0,%r11 - blr -0: - .size strlen,0b-strlen diff --git a/sysdeps/powerpc/sub_n.S b/sysdeps/powerpc/sub_n.S new file mode 100644 index 0000000..7af577d --- /dev/null +++ b/sysdeps/powerpc/sub_n.S @@ -0,0 +1,68 @@ +/* Subtract two limb vectors of equal, non-zero length for PowerPC. + Copyright (C) 1997 Free Software Foundation, Inc. + This file is part of the GNU C Library. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Library General Public License as + published by the Free Software Foundation; either version 2 of the + License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Library General Public License for more details. + + You should have received a copy of the GNU Library General Public + License along with the GNU C Library; see the file COPYING.LIB. If not, + write to the Free Software Foundation, Inc., 59 Temple Place - Suite 330, + Boston, MA 02111-1307, USA. */ + +#include <sysdep.h> + +/* mp_limb_t mpn_sub_n (mp_ptr res_ptr, mp_srcptr s1_ptr, mp_srcptr s2_ptr, + mp_size_t size) + Calculate s1-s2 and put result in res_ptr; return borrow, 0 or 1. */ + +/* Note on optimisation: This code is optimal for the 601. Almost every other + possible 2-unrolled inner loop will not be. Also, watch out for the + alignment... */ + +EALIGN(__mpn_sub_n,3,1) +/* Set up for loop below. */ + mtcrf 0x01,%r6 + srwi. %r7,%r6,1 + mtctr %r7 + bt 31,2f + +/* Set the carry (clear the borrow). */ + subfc %r0,%r0,%r0 +/* Adjust pointers for loop. */ + addi %r3,%r3,-4 + addi %r4,%r4,-4 + addi %r5,%r5,-4 + b 0f + +2: lwz %r7,0(%r5) + lwz %r6,0(%r4) + subfc %r6,%r7,%r6 + stw %r6,0(%r3) + beq 1f + +/* Align start of loop to an odd word boundary to guarantee that the + last two words can be fetched in one access (for 601). This turns + out to be important. */ +0: + lwz %r9,4(%r4) + lwz %r8,4(%r5) + lwzu %r6,8(%r4) + lwzu %r7,8(%r5) + subfe %r8,%r8,%r9 + stw %r8,4(%r3) + subfe %r6,%r7,%r6 + stwu %r6,8(%r3) + bdnz 0b +/* Return the borrow. */ +1: subfe %r3,%r3,%r3 + neg %r3,%r3 + blr +END(__mpn_sub_n) diff --git a/sysdeps/powerpc/sub_n.s b/sysdeps/powerpc/sub_n.s deleted file mode 100644 index 8711bf9..0000000 --- a/sysdeps/powerpc/sub_n.s +++ /dev/null @@ -1,69 +0,0 @@ - # Subtract two limb vectors of equal, non-zero length for PowerPC. - # Copyright (C) 1997 Free Software Foundation, Inc. - # This file is part of the GNU C Library. - # - # The GNU C Library is free software; you can redistribute it and/or - # modify it under the terms of the GNU Library General Public License as - # published by the Free Software Foundation; either version 2 of the - # License, or (at your option) any later version. - # - # The GNU C Library is distributed in the hope that it will be useful, - # but WITHOUT ANY WARRANTY; without even the implied warranty of - # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU - # Library General Public License for more details. - # - # You should have received a copy of the GNU Library General Public - # License along with the GNU C Library; see the file COPYING.LIB. If not, - # write to the Free Software Foundation, Inc., 59 Temple Place - Suite 330, - # Boston, MA 02111-1307, USA. - - # mp_limb_t mpn_sub_n (mp_ptr res_ptr, mp_srcptr s1_ptr, mp_srcptr s2_ptr, - # mp_size_t size) - # Calculate s1-s2 and put result in res_ptr; return borrow, 0 or 1. - - # Note on optimisation: This code is optimal for the 601. Almost every other - # possible 2-unrolled inner loop will not be. Also, watch out for the - # alignment... - - .align 3 - .globl __mpn_sub_n - .type __mpn_sub_n,@function - nop -__mpn_sub_n: - # Set up for loop below. - mtcrf 0x01,%r6 - srwi. %r7,%r6,1 - mtctr %r7 - bt 31,2f - - # Set the carry (clear the borrow). - subfc %r0,%r0,%r0 - # Adjust pointers for loop. - addi %r3,%r3,-4 - addi %r4,%r4,-4 - addi %r5,%r5,-4 - b 0f - -2: lwz %r7,0(%r5) - lwz %r6,0(%r4) - subfc %r6,%r7,%r6 - stw %r6,0(%r3) - beq 1f - - # Align start of loop to an odd word boundary to guarantee that the - # last two words can be fetched in one access (for 601). This turns - # out to be important. -0: - lwz %r9,4(%r4) - lwz %r8,4(%r5) - lwzu %r6,8(%r4) - lwzu %r7,8(%r5) - subfe %r8,%r8,%r9 - stw %r8,4(%r3) - subfe %r6,%r7,%r6 - stwu %r6,8(%r3) - bdnz 0b - # return the borrow -1: subfe %r3,%r3,%r3 - neg %r3,%r3 - blr diff --git a/sysdeps/powerpc/submul_1.S b/sysdeps/powerpc/submul_1.S new file mode 100644 index 0000000..80da8ec --- /dev/null +++ b/sysdeps/powerpc/submul_1.S @@ -0,0 +1,52 @@ +/* Multiply a limb vector by a single limb, for PowerPC. + Copyright (C) 1993, 1994, 1995, 1997 Free Software Foundation, Inc. + This file is part of the GNU C Library. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Library General Public License as + published by the Free Software Foundation; either version 2 of the + License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Library General Public License for more details. + + You should have received a copy of the GNU Library General Public + License along with the GNU C Library; see the file COPYING.LIB. If not, + write to the Free Software Foundation, Inc., 59 Temple Place - Suite 330, + Boston, MA 02111-1307, USA. */ + +#include <sysdep.h> + +/* mp_limb_t mpn_submul_1 (mp_ptr res_ptr, mp_srcptr s1_ptr, + mp_size_t s1_size, mp_limb_t s2_limb) + Calculate res-s1*s2 and put result back in res; return carry. */ + +ENTRY(__mpn_submul_1) + mtctr %r5 + + lwz %r0,0(%r4) + mullw %r7,%r0,%r6 + mulhwu %r10,%r0,%r6 + lwz %r9,0(%r3) + subf %r8,%r7,%r9 + addc %r7,%r7,%r8 # invert cy (r7 is junk) + addi %r3,%r3,-4 # adjust res_ptr + bdz 1f + +0: lwzu %r0,4(%r4) + stwu %r8,4(%r3) + mullw %r8,%r0,%r6 + adde %r7,%r8,%r10 + mulhwu %r10,%r0,%r6 + lwz %r9,4(%r3) + addze %r10,%r10 + subf %r8,%r7,%r9 + addc %r7,%r7,%r8 # invert cy (r7 is junk) + bdnz 0b + +1: stw %r8,4(%r3) + addze %r3,%r10 + blr +END(__mpn_submul_1) diff --git a/sysdeps/powerpc/submul_1.s b/sysdeps/powerpc/submul_1.s deleted file mode 100644 index 999430d..0000000 --- a/sysdeps/powerpc/submul_1.s +++ /dev/null @@ -1,52 +0,0 @@ - # Multiply a limb vector by a single limb, for PowerPC. - # Copyright (C) 1993, 1994, 1995, 1997 Free Software Foundation, Inc. - # This file is part of the GNU C Library. - # - # The GNU C Library is free software; you can redistribute it and/or - # modify it under the terms of the GNU Library General Public License as - # published by the Free Software Foundation; either version 2 of the - # License, or (at your option) any later version. - # - # The GNU C Library is distributed in the hope that it will be useful, - # but WITHOUT ANY WARRANTY; without even the implied warranty of - # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU - # Library General Public License for more details. - # - # You should have received a copy of the GNU Library General Public - # License along with the GNU C Library; see the file COPYING.LIB. If not, - # write to the Free Software Foundation, Inc., 59 Temple Place - Suite 330, - # Boston, MA 02111-1307, USA. - - # mp_limb_t mpn_submul_1 (mp_ptr res_ptr, mp_srcptr s1_ptr, - # mp_size_t s1_size, mp_limb_t s2_limb) - # Calculate res-s1*s2 and put result back in res; return carry. - - .align 2 - .globl __mpn_submul_1 - .type __mpn_submul_1,@function -__mpn_submul_1: - mtctr %r5 - - lwz %r0,0(%r4) - mullw %r7,%r0,%r6 - mulhwu %r10,%r0,%r6 - lwz %r9,0(%r3) - subf %r8,%r7,%r9 - addc %r7,%r7,%r8 # invert cy (r7 is junk) - addi %r3,%r3,-4 # adjust res_ptr - bdz Lend - -Loop: lwzu %r0,4(%r4) - stwu %r8,4(%r3) - mullw %r8,%r0,%r6 - adde %r7,%r8,%r10 - mulhwu %r10,%r0,%r6 - lwz %r9,4(%r3) - addze %r10,%r10 - subf %r8,%r7,%r9 - addc %r7,%r7,%r8 # invert cy (r7 is junk) - bdnz Loop - -Lend: stw %r8,4(%r3) - addze %r3,%r10 - blr diff --git a/sysdeps/powerpc/test-arith.c b/sysdeps/powerpc/test-arith.c index c846b0d..9e1be88 100644 --- a/sysdeps/powerpc/test-arith.c +++ b/sysdeps/powerpc/test-arith.c @@ -226,7 +226,7 @@ check_result(int line, const char *rm, tocheck_t expected, tocheck_t actual) if (memcmp(&expected, &actual, sizeof(tocheck_t)) != 0) { unsigned char *ex, *ac; - int i; + size_t i; printf("%s:%d:round %s:result failed\n" " expected result 0x", __FILE__, line, rm); @@ -323,7 +323,7 @@ check_excepts(int line, const char *rm, int expected, int actual) expected = expected & ~excepts_missing | FE_INVALID_SNAN; if ((expected & all_exceptions) != actual) { - int i; + size_t i; printf("%s:%d:round %s:exceptions failed\n" " expected exceptions ", __FILE__, line,rm); for (i = 0; i < sizeof(excepts)/sizeof(excepts[0]); i++) @@ -419,7 +419,7 @@ static const optest_t optests[] = { {__LINE__,B_NEG, 1,P_1Z,P_1Z1, 0,0,0, R_ALL, 0, 0,P_1Z,P_1Z1 }, {__LINE__,B_NEG, 0,P_Z,P_Z1, 0,0,0, R_ALL, 0, 1,P_Z,P_Z1 }, {__LINE__,B_NEG, 1,P_Z,P_Z1, 0,0,0, R_ALL, 0, 0,P_Z,P_Z1 }, - + /* Absolute value. */ {__LINE__,B_ABS, 0,P_Z,P_Z, 0,0,0, R_ALL, 0, 0,P_Z,P_Z }, {__LINE__,B_ABS, 1,P_Z,P_Z, 0,0,0, R_ALL, 0, 0,P_Z,P_Z }, @@ -433,7 +433,7 @@ static const optest_t optests[] = { {__LINE__,B_ABS, 1,P_1Z,P_1Z1, 0,0,0, R_ALL, 0, 0,P_1Z,P_1Z1 }, {__LINE__,B_ABS, 0,P_Z,P_Z1, 0,0,0, R_ALL, 0, 0,P_Z,P_Z1 }, {__LINE__,B_ABS, 1,P_Z,P_Z1, 0,0,0, R_ALL, 0, 0,P_Z,P_Z1 }, - + /* Square root. */ {__LINE__,B_SQRT, 0,P_Z,P_Z, 0,0,0, R_ALL, 0, 0,P_Z,P_Z }, {__LINE__,B_SQRT, 1,P_Z,P_Z, 0,0,0, R_ALL, 0, 1,P_Z,P_Z }, @@ -459,7 +459,8 @@ static const optest_t optests[] = { static void check_op(void) { - int i, j; + size_t i; + int j; tocheck_t r, a, b, x; int raised; @@ -497,7 +498,7 @@ static void fail_xr(int line, const char *rm, tocheck_t x, tocheck_t r, tocheck_t xx, int xflag) { - int i; + size_t i; unsigned char *cx, *cr, *cxx; printf("%s:%d:round %s:fail\n with x=0x", __FILE__, line,rm); @@ -539,7 +540,7 @@ check_sqrt(tocheck_t a) r0 = delta(r1,-1); r2 = delta(r1,1); switch (1 << j) { - case R_NEAREST: + case R_NEAREST: x0 = r0 * r0 - a; x2 = r2 * r2 - a; ok = fabs(x0) >= fabs(x1) && fabs(x1) <= fabs(x2); break; |