aboutsummaryrefslogtreecommitdiff
path: root/sysdeps/powerpc
diff options
context:
space:
mode:
Diffstat (limited to 'sysdeps/powerpc')
-rw-r--r--sysdeps/powerpc/Makefile18
-rw-r--r--sysdeps/powerpc/add_n.S68
-rw-r--r--sysdeps/powerpc/add_n.s68
-rw-r--r--sysdeps/powerpc/addmul_1.S49
-rw-r--r--sysdeps/powerpc/addmul_1.s50
-rw-r--r--sysdeps/powerpc/bsd-_setjmp.S6
-rw-r--r--sysdeps/powerpc/bsd-setjmp.S6
-rw-r--r--sysdeps/powerpc/dl-machine.h152
-rw-r--r--sysdeps/powerpc/lshift.S123
-rw-r--r--sysdeps/powerpc/lshift.s479
-rw-r--r--sysdeps/powerpc/machine-gmon.h32
-rw-r--r--sysdeps/powerpc/memset.S199
-rw-r--r--sysdeps/powerpc/memset.s202
-rw-r--r--sysdeps/powerpc/mul_1.S46
-rw-r--r--sysdeps/powerpc/mul_1.s47
-rw-r--r--sysdeps/powerpc/ppc-mcount.S84
-rw-r--r--sysdeps/powerpc/rshift.S56
-rw-r--r--sysdeps/powerpc/rshift.s59
-rw-r--r--sysdeps/powerpc/s_copysign.S40
-rw-r--r--sysdeps/powerpc/s_fabs.S33
-rw-r--r--sysdeps/powerpc/setjmp.S6
-rw-r--r--sysdeps/powerpc/strchr.S111
-rw-r--r--sysdeps/powerpc/strchr.s118
-rw-r--r--sysdeps/powerpc/strcmp.S115
-rw-r--r--sysdeps/powerpc/strcmp.s273
-rw-r--r--sysdeps/powerpc/strlen.S144
-rw-r--r--sysdeps/powerpc/strlen.s144
-rw-r--r--sysdeps/powerpc/sub_n.S68
-rw-r--r--sysdeps/powerpc/sub_n.s69
-rw-r--r--sysdeps/powerpc/submul_1.S52
-rw-r--r--sysdeps/powerpc/submul_1.s52
-rw-r--r--sysdeps/powerpc/test-arith.c15
32 files changed, 1284 insertions, 1700 deletions
diff --git a/sysdeps/powerpc/Makefile b/sysdeps/powerpc/Makefile
index 0a50956..3e8f22b 100644
--- a/sysdeps/powerpc/Makefile
+++ b/sysdeps/powerpc/Makefile
@@ -11,3 +11,21 @@ tests += test-arith test-arithf
LDLIBS-test-arith = libm
LDLIBS-test-arithf = libm
endif
+
+ifeq ($(subdir),gmon)
+sysdep_routines += ppc-mcount
+endif
+
+# On PPC, -fpic works until the GOT contains 2^15 bytes, and possibly
+# more depending on how clever the linker is. Each GOT entry takes 4 bytes,
+# so that's at least 8192 entries. Since libc only uses about 1200 entries,
+# we want to use -fpic, because this generates fewer relocs.
+ifeq (yes,$(build-shared))
+CFLAGS-.os = -fpic -fno-common
+endif
+
+# The initfini generation code doesn't work in the presence of -fPIC, so
+# we use -fpic instead which is much better.
+ifeq ($(subdir),csu)
+CFLAGS-initfini.s = -g0 -fpic
+endif
diff --git a/sysdeps/powerpc/add_n.S b/sysdeps/powerpc/add_n.S
new file mode 100644
index 0000000..2bd59ae
--- /dev/null
+++ b/sysdeps/powerpc/add_n.S
@@ -0,0 +1,68 @@
+/* Add two limb vectors of equal, non-zero length for PowerPC.
+ Copyright (C) 1997 Free Software Foundation, Inc.
+ This file is part of the GNU C Library.
+
+ The GNU C Library is free software; you can redistribute it and/or
+ modify it under the terms of the GNU Library General Public License as
+ published by the Free Software Foundation; either version 2 of the
+ License, or (at your option) any later version.
+
+ The GNU C Library is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ Library General Public License for more details.
+
+ You should have received a copy of the GNU Library General Public
+ License along with the GNU C Library; see the file COPYING.LIB. If not,
+ write to the Free Software Foundation, Inc., 59 Temple Place - Suite 330,
+ Boston, MA 02111-1307, USA. */
+
+#include <sysdep.h>
+
+/* mp_limb_t mpn_add_n (mp_ptr res_ptr, mp_srcptr s1_ptr, mp_srcptr s2_ptr,
+ mp_size_t size)
+ Calculate s1+s2 and put result in res_ptr; return carry, 0 or 1. */
+
+/* Note on optimisation: This code is optimal for the 601. Almost every other
+ possible 2-unrolled inner loop will not be. Also, watch out for the
+ alignment... */
+
+EALIGN(__mpn_add_n,3,0)
+/* Set up for loop below. */
+ mtcrf 0x01,%r6
+ srwi. %r7,%r6,1
+ li %r10,0
+ mtctr %r7
+ bt 31,2f
+
+/* Clear the carry. */
+ addic %r0,%r0,0
+/* Adjust pointers for loop. */
+ addi %r3,%r3,-4
+ addi %r4,%r4,-4
+ addi %r5,%r5,-4
+ b 0f
+
+2: lwz %r7,0(%r5)
+ lwz %r6,0(%r4)
+ addc %r6,%r6,%r7
+ stw %r6,0(%r3)
+ beq 1f
+
+/* The loop. */
+
+/* Align start of loop to an odd word boundary to guarantee that the
+ last two words can be fetched in one access (for 601). */
+0: lwz %r9,4(%r4)
+ lwz %r8,4(%r5)
+ lwzu %r6,8(%r4)
+ lwzu %r7,8(%r5)
+ adde %r8,%r9,%r8
+ stw %r8,4(%r3)
+ adde %r6,%r6,%r7
+ stwu %r6,8(%r3)
+ bdnz 0b
+/* Return the carry. */
+1: addze %r3,%r10
+ blr
+END(__mpn_add_n)
diff --git a/sysdeps/powerpc/add_n.s b/sysdeps/powerpc/add_n.s
deleted file mode 100644
index 609f0a5..0000000
--- a/sysdeps/powerpc/add_n.s
+++ /dev/null
@@ -1,68 +0,0 @@
- # Add two limb vectors of equal, non-zero length for PowerPC.
- # Copyright (C) 1997 Free Software Foundation, Inc.
- # This file is part of the GNU C Library.
- #
- # The GNU C Library is free software; you can redistribute it and/or
- # modify it under the terms of the GNU Library General Public License as
- # published by the Free Software Foundation; either version 2 of the
- # License, or (at your option) any later version.
- #
- # The GNU C Library is distributed in the hope that it will be useful,
- # but WITHOUT ANY WARRANTY; without even the implied warranty of
- # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
- # Library General Public License for more details.
- #
- # You should have received a copy of the GNU Library General Public
- # License along with the GNU C Library; see the file COPYING.LIB. If not,
- # write to the Free Software Foundation, Inc., 59 Temple Place - Suite 330,
- # Boston, MA 02111-1307, USA.
-
- # mp_limb_t mpn_add_n (mp_ptr res_ptr, mp_srcptr s1_ptr, mp_srcptr s2_ptr,
- # mp_size_t size)
- # Calculate s1+s2 and put result in res_ptr; return carry, 0 or 1.
-
- # Note on optimisation: This code is optimal for the 601. Almost every other
- # possible 2-unrolled inner loop will not be. Also, watch out for the
- # alignment...
-
- .align 3
- .globl __mpn_add_n
- .type __mpn_add_n,@function
-__mpn_add_n:
- # Set up for loop below.
- mtcrf 0x01,%r6
- srwi. %r7,%r6,1
- li %r10,0
- mtctr %r7
- bt 31,2f
-
- # Clear the carry.
- addic %r0,%r0,0
- # Adjust pointers for loop.
- addi %r3,%r3,-4
- addi %r4,%r4,-4
- addi %r5,%r5,-4
- b 0f
-
-2: lwz %r7,0(%r5)
- lwz %r6,0(%r4)
- addc %r6,%r6,%r7
- stw %r6,0(%r3)
- beq 1f
-
- # The loop.
-
- # Align start of loop to an odd word boundary to guarantee that the
- # last two words can be fetched in one access (for 601).
-0: lwz %r9,4(%r4)
- lwz %r8,4(%r5)
- lwzu %r6,8(%r4)
- lwzu %r7,8(%r5)
- adde %r8,%r9,%r8
- stw %r8,4(%r3)
- adde %r6,%r6,%r7
- stwu %r6,8(%r3)
- bdnz 0b
- # return the carry
-1: addze %r3,%r10
- blr
diff --git a/sysdeps/powerpc/addmul_1.S b/sysdeps/powerpc/addmul_1.S
new file mode 100644
index 0000000..dc762fc
--- /dev/null
+++ b/sysdeps/powerpc/addmul_1.S
@@ -0,0 +1,49 @@
+/* Multiply a limb vector by a single limb, for PowerPC.
+ Copyright (C) 1993, 1994, 1995, 1997 Free Software Foundation, Inc.
+ This file is part of the GNU C Library.
+
+ The GNU C Library is free software; you can redistribute it and/or
+ modify it under the terms of the GNU Library General Public License as
+ published by the Free Software Foundation; either version 2 of the
+ License, or (at your option) any later version.
+
+ The GNU C Library is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ Library General Public License for more details.
+
+ You should have received a copy of the GNU Library General Public
+ License along with the GNU C Library; see the file COPYING.LIB. If not,
+ write to the Free Software Foundation, Inc., 59 Temple Place - Suite 330,
+ Boston, MA 02111-1307, USA. */
+
+#include <sysdep.h>
+
+/* mp_limb_t mpn_addmul_1 (mp_ptr res_ptr, mp_srcptr s1_ptr,
+ mp_size_t s1_size, mp_limb_t s2_limb)
+ Calculate res+s1*s2 and put result back in res; return carry. */
+ENTRY(__mpn_addmul_1)
+ mtctr %r5
+
+ lwz %r0,0(%r4)
+ mullw %r7,%r0,%r6
+ mulhwu %r10,%r0,%r6
+ lwz %r9,0(%r3)
+ addc %r8,%r7,%r9
+ addi %r3,%r3,-4 /* adjust res_ptr */
+ bdz 1f
+
+0: lwzu %r0,4(%r4)
+ stwu %r8,4(%r3)
+ mullw %r8,%r0,%r6
+ adde %r7,%r8,%r10
+ mulhwu %r10,%r0,%r6
+ lwz %r9,4(%r3)
+ addze %r10,%r10
+ addc %r8,%r7,%r9
+ bdnz 0b
+
+1: stw %r8,4(%r3)
+ addze %r3,%r10
+ blr
+END(__mpn_addmul_1)
diff --git a/sysdeps/powerpc/addmul_1.s b/sysdeps/powerpc/addmul_1.s
deleted file mode 100644
index cf8fd2a..0000000
--- a/sysdeps/powerpc/addmul_1.s
+++ /dev/null
@@ -1,50 +0,0 @@
- # Multiply a limb vector by a single limb, for PowerPC.
- # Copyright (C) 1993, 1994, 1995, 1997 Free Software Foundation, Inc.
- # This file is part of the GNU C Library.
- #
- # The GNU C Library is free software; you can redistribute it and/or
- # modify it under the terms of the GNU Library General Public License as
- # published by the Free Software Foundation; either version 2 of the
- # License, or (at your option) any later version.
- #
- # The GNU C Library is distributed in the hope that it will be useful,
- # but WITHOUT ANY WARRANTY; without even the implied warranty of
- # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
- # Library General Public License for more details.
- #
- # You should have received a copy of the GNU Library General Public
- # License along with the GNU C Library; see the file COPYING.LIB. If not,
- # write to the Free Software Foundation, Inc., 59 Temple Place - Suite 330,
- # Boston, MA 02111-1307, USA.
-
- # mp_limb_t mpn_addmul_1 (mp_ptr res_ptr, mp_srcptr s1_ptr,
- # mp_size_t s1_size, mp_limb_t s2_limb)
- # Calculate res+s1*s2 and put result back in res; return carry.
-
- .align 2
- .globl __mpn_addmul_1
- .type __mpn_addmul_1,@function
-__mpn_addmul_1:
- mtctr %r5
-
- lwz %r0,0(%r4)
- mullw %r7,%r0,%r6
- mulhwu %r10,%r0,%r6
- lwz %r9,0(%r3)
- addc %r8,%r7,%r9
- addi %r3,%r3,-4 # adjust res_ptr
- bdz Lend
-
-Loop: lwzu %r0,4(%r4)
- stwu %r8,4(%r3)
- mullw %r8,%r0,%r6
- adde %r7,%r8,%r10
- mulhwu %r10,%r0,%r6
- lwz %r9,4(%r3)
- addze %r10,%r10
- addc %r8,%r7,%r9
- bdnz Loop
-
-Lend: stw %r8,4(%r3)
- addze %r3,%r10
- blr
diff --git a/sysdeps/powerpc/bsd-_setjmp.S b/sysdeps/powerpc/bsd-_setjmp.S
index ffd90d5..ef31f84 100644
--- a/sysdeps/powerpc/bsd-_setjmp.S
+++ b/sysdeps/powerpc/bsd-_setjmp.S
@@ -25,9 +25,5 @@
ENTRY (_setjmp)
li %r4,0 /* Set second argument to 0. */
-#ifdef PIC
- b __sigsetjmp@plt
-#else
- b __sigsetjmp
-#endif
+ b JUMPTARGET(__sigsetjmp)
END (_setjmp)
diff --git a/sysdeps/powerpc/bsd-setjmp.S b/sysdeps/powerpc/bsd-setjmp.S
index f02d781..d26b3fc 100644
--- a/sysdeps/powerpc/bsd-setjmp.S
+++ b/sysdeps/powerpc/bsd-setjmp.S
@@ -25,11 +25,7 @@
ENTRY (__setjmp)
li %r4,1 /* Set second argument to 1. */
-#ifdef PIC
- b __sigsetjmp@plt
-#else
- b __sigsetjmp
-#endif
+ b JUMPTARGET(__sigsetjmp)
END (__setjmp)
.globl setjmp
diff --git a/sysdeps/powerpc/dl-machine.h b/sysdeps/powerpc/dl-machine.h
index 917e4f7..771b711 100644
--- a/sysdeps/powerpc/dl-machine.h
+++ b/sysdeps/powerpc/dl-machine.h
@@ -149,33 +149,34 @@ elf_machine_load_address (void)
#define elf_machine_relplt elf_machine_rela
/* This code is used in dl-runtime.c to call the `fixup' function
- and then redirect to the address it returns. It is called
- from code built in the PLT by elf_machine_runtime_setup. */
+ and then redirect to the address it returns. It is called
+ from code built in the PLT by elf_machine_runtime_setup. */
#define ELF_MACHINE_RUNTIME_TRAMPOLINE asm ("\
.section \".text\"
.align 2
.globl _dl_runtime_resolve
.type _dl_runtime_resolve,@function
_dl_runtime_resolve:
- # We need to save the registers used to pass parameters.
- # We build a stack frame to put them in.
+ # We need to save the registers used to pass parameters, and register 0,
+ # which is used by _mcount; the registers are saved in a stack frame.
stwu 1,-48(1)
- mflr 0
+ stw 0,12(1)
stw 3,16(1)
stw 4,20(1)
- stw 0,52(1)
+ # The code that calls this has put parameters for `fixup' in r12 and r11.
+ mr 3,12
stw 5,24(1)
- # We also need to save some of the condition register fields.
- mfcr 0
+ mr 4,11
stw 6,28(1)
+ mflr 0
+ # We also need to save some of the condition register fields.
stw 7,32(1)
+ stw 0,52(1)
stw 8,36(1)
+ mfcr 0
stw 9,40(1)
stw 10,44(1)
- stw 0,12(1)
- # The code that calls this has put parameters for `fixup' in r12 and r11.
- mr 3,12
- mr 4,11
+ stw 0,8(1)
bl fixup@local
# 'fixup' returns the address we want to branch to.
mtctr 3
@@ -184,20 +185,21 @@ _dl_runtime_resolve:
lwz 10,44(1)
lwz 9,40(1)
mtlr 0
- lwz 0,12(1)
lwz 8,36(1)
+ lwz 0,8(1)
lwz 7,32(1)
lwz 6,28(1)
mtcrf 0xFF,0
lwz 5,24(1)
lwz 4,20(1)
lwz 3,16(1)
+ lwz 0,12(1)
# ...unwind the stack frame, and jump to the PLT entry we updated.
addi 1,1,48
bctr
0:
.size _dl_runtime_resolve,0b-_dl_runtime_resolve
- # undo '.section text'.
+ # Undo '.section text'.
.previous
");
@@ -213,20 +215,20 @@ asm ("\
.type _start,@function
_start:
# We start with the following on the stack, from top:
- # argc (4 bytes)
- # arguments for program (terminated by NULL)
- # environment variables (terminated by NULL)
- # arguments for the program loader
+ # argc (4 bytes);
+ # arguments for program (terminated by NULL);
+ # environment variables (terminated by NULL);
+ # arguments for the program loader.
# FIXME: perhaps this should do the same trick as elf/start.c?
# Call _dl_start with one parameter pointing at argc
- mr 3,1
+ mr 3,1
# (we have to frob the stack pointer a bit to allow room for
# _dl_start to save the link register)
- li 4,0
+ li 4,0
addi 1,1,-16
- stw 4,0(1)
- bl _dl_start@local
+ stw 4,0(1)
+ bl _dl_start@local
# Now, we do our main work of calling initialisation procedures.
# The ELF ABI doesn't say anything about parameters for these,
@@ -234,70 +236,72 @@ _start:
# Changing these is strongly discouraged (not least because argc is
# passed by value!).
- # put our GOT pointer in r31
- bl _GLOBAL_OFFSET_TABLE_-4@local
+ # Put our GOT pointer in r31,
+ bl _GLOBAL_OFFSET_TABLE_-4@local
mflr 31
- # the address of _start in r30
- mr 30,3
- # &_dl_argc in 29, &_dl_argv in 27, and _dl_default_scope in 28
- lwz 28,_dl_default_scope@got(31)
- lwz 29,_dl_argc@got(31)
- lwz 27,_dl_argv@got(31)
+ # the address of _start in r30,
+ mr 30,3
+ # &_dl_argc in 29, &_dl_argv in 27, and _dl_default_scope in 28.
+ lwz 28,_dl_default_scope@got(31)
+ lwz 29,_dl_argc@got(31)
+ lwz 27,_dl_argv@got(31)
0:
- # call initfunc = _dl_init_next(_dl_default_scope[2])
- lwz 3,8(28)
- bl _dl_init_next@plt
- # if initfunc is NULL, we exit the loop
- mr. 0,3
- beq 1f
+ # Set initfunc = _dl_init_next(_dl_default_scope[2])
+ lwz 3,8(28)
+ bl _dl_init_next@plt
+ # If initfunc is NULL, we exit the loop; otherwise,
+ cmpwi 3,0
+ beq 1f
# call initfunc(_dl_argc, _dl_argv, _dl_argv+_dl_argc+1)
- mtlr 0
- lwz 3,0(29)
- lwz 4,0(27)
+ mtlr 3
+ lwz 3,0(29)
+ lwz 4,0(27)
slwi 5,3,2
- add 5,4,5
+ add 5,4,5
addi 5,5,4
blrl
# and loop.
- b 0b
+ b 0b
1:
# Now, to conform to the ELF ABI, we have to:
- # pass argv (actually _dl_argv) in r4
- lwz 4,0(27)
- # pass argc (actually _dl_argc) in r3
- lwz 3,0(29)
- # pass envp (actually _dl_argv+_dl_argc+1) in r5
+ # Pass argc (actually _dl_argc) in r3;
+ lwz 3,0(29)
+ # pass argv (actually _dl_argv) in r4;
+ lwz 4,0(27)
+ # pass envp (actually _dl_argv+_dl_argc+1) in r5;
slwi 5,3,2
- add 5,4,5
- addi 5,5,4
- # pass the auxilary vector in r6. This is passed just after _envp.
- addi 6,5,-4
+ add 6,4,5
+ addi 5,6,4
+ # pass the auxilary vector in r6. This is passed to us just after _envp.
2: lwzu 0,4(6)
- cmpwi 1,0,0
- bne 2b
+ cmpwi 0,0,0
+ bne 2b
addi 6,6,4
- # pass a termination function pointer (in this case _dl_fini) in r7
- lwz 7,_dl_fini@got(31)
- # now, call the start function in r30...
+ # Pass a termination function pointer (in this case _dl_fini) in r7.
+ lwz 7,_dl_fini@got(31)
+ # Now, call the start function in r30...
mtctr 30
- # pass the stack pointer in r1 (so far so good), pointing to a NULL value
- # (this lets our startup code distinguish between a program linked statically,
+ lwz 26,_dl_starting_up@got(31)
+ # Pass the stack pointer in r1 (so far so good), pointing to a NULL value.
+ # (This lets our startup code distinguish between a program linked statically,
# which linux will call with argc on top of the stack which will hopefully
# never be zero, and a dynamically linked program which will always have
# a NULL on the top of the stack).
# Take the opportunity to clear LR, so anyone who accidentally returns
- # from _start gets SEGV.
- li 0,0
- stw 0,0(1)
- mtlr 0
- # and also clear _dl_starting_up
- lwz 26,_dl_starting_up@got(31)
- stw 0,0(26)
- # go do it!
+ # from _start gets SEGV. Also clear the next few words of the stack.
+ li 31,0
+ stw 31,0(1)
+ mtlr 31
+ stw 31,4(1)
+ stw 31,8(1)
+ stw 31,12(1)
+ # Clear _dl_starting_up.
+ stw 31,0(26)
+ # Go do it!
bctr
0:
.size _start,0b-_start
- # undo '.section text'.
+ # Undo '.section text'.
.previous
");
@@ -346,7 +350,7 @@ static ElfW(Addr) _dl_preferred_address = 1
/* We require the address of the PLT entry returned from fixup, not
the first word of the PLT entry. */
-#define ELF_FIXUP_RETURN_VALUE(map, result) (&(result))
+#define ELF_FIXUP_RETURN_VALUE(map, result) ((Elf32_Addr) &(result))
/* Nonzero iff TYPE should not be allowed to resolve to one of
the main executable's symbols, as for a COPY reloc. */
@@ -396,7 +400,7 @@ elf_machine_runtime_setup (struct link_map *map, int lazy, int profile)
{
if (map->l_info[DT_JMPREL])
{
- int i;
+ Elf32_Word i;
/* Fill in the PLT. Its initial contents are directed to a
function earlier in the PLT which arranges for the dynamic
linker to be called back. */
@@ -516,10 +520,10 @@ elf_machine_rela (struct link_map *map, const Elf32_Rela *reloc,
{
#ifndef RTLD_BOOTSTRAP
const Elf32_Sym *const refsym = sym;
+ extern char **_dl_argv;
#endif
Elf32_Word loadbase, finaladdr;
const int rinfo = ELF32_R_TYPE (reloc->r_info);
- extern char **_dl_argv;
if (rinfo == R_PPC_NONE)
return;
@@ -551,9 +555,9 @@ elf_machine_rela (struct link_map *map, const Elf32_Rela *reloc,
+ reloc->r_addend);
}
- /* This is an if/else if chain because GCC 2.7.2.[012] turns case
- statements into non-PIC table lookups. When a later version
- comes out that fixes this, this should be changed. */
+ /* This is still an if/else if chain because GCC uses the GOT to find
+ the table for table-based switch statements, and we haven't set it
+ up yet. */
if (rinfo == R_PPC_UADDR32 ||
rinfo == R_PPC_GLOB_DAT ||
rinfo == R_PPC_ADDR32 ||
@@ -561,6 +565,7 @@ elf_machine_rela (struct link_map *map, const Elf32_Rela *reloc,
{
*reloc_addr = finaladdr;
}
+#ifndef RTLD_BOOTSTRAP
else if (rinfo == R_PPC_ADDR16_LO)
{
*(Elf32_Half*) reloc_addr = finaladdr;
@@ -573,7 +578,6 @@ elf_machine_rela (struct link_map *map, const Elf32_Rela *reloc,
{
*(Elf32_Half*) reloc_addr = (finaladdr + 0x8000) >> 16;
}
-#ifndef RTLD_BOOTSTRAP
else if (rinfo == R_PPC_REL24)
{
Elf32_Sword delta = finaladdr - (Elf32_Word) (char *) reloc_addr;
@@ -693,12 +697,14 @@ elf_machine_rela (struct link_map *map, const Elf32_Rela *reloc,
#endif
}
+#ifndef RTLD_BOOTSTRAP
if (rinfo == R_PPC_ADDR16_LO ||
rinfo == R_PPC_ADDR16_HI ||
rinfo == R_PPC_ADDR16_HA ||
rinfo == R_PPC_REL24 ||
rinfo == R_PPC_ADDR24)
MODIFIED_CODE_NOQUEUE (reloc_addr);
+#endif
}
#define ELF_MACHINE_NO_REL 1
diff --git a/sysdeps/powerpc/lshift.S b/sysdeps/powerpc/lshift.S
new file mode 100644
index 0000000..b1487a1
--- /dev/null
+++ b/sysdeps/powerpc/lshift.S
@@ -0,0 +1,123 @@
+/* Shift a limb left, low level routine.
+ Copyright (C) 1996, 1997 Free Software Foundation, Inc.
+ This file is part of the GNU C Library.
+
+ The GNU C Library is free software; you can redistribute it and/or
+ modify it under the terms of the GNU Library General Public License as
+ published by the Free Software Foundation; either version 2 of the
+ License, or (at your option) any later version.
+
+ The GNU C Library is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ Library General Public License for more details.
+
+ You should have received a copy of the GNU Library General Public
+ License along with the GNU C Library; see the file COPYING.LIB. If not,
+ write to the Free Software Foundation, Inc., 59 Temple Place - Suite 330,
+ Boston, MA 02111-1307, USA. */
+
+#include <sysdep.h>
+
+/* mp_limb_t mpn_lshift (mp_ptr wp, mp_srcptr up, mp_size_t usize,
+ unsigned int cnt) */
+
+EALIGN(__mpn_lshift,3,0)
+ mtctr %r5 # copy size into CTR
+ cmplwi %cr0,%r5,16 # is size < 16
+ slwi %r0,%r5,2
+ add %r7,%r3,%r0 # make r7 point at end of res
+ add %r4,%r4,%r0 # make r4 point at end of s1
+ lwzu %r11,-4(%r4) # load first s1 limb
+ subfic %r8,%r6,32
+ srw %r3,%r11,%r8 # compute function return value
+ bge %cr0,L(big) # branch if size >= 16
+
+ bdz L(end1)
+
+0: lwzu %r10,-4(%r4)
+ slw %r9,%r11,%r6
+ srw %r12,%r10,%r8
+ or %r9,%r9,%r12
+ stwu %r9,-4(%r7)
+ bdz L(end2)
+ lwzu %r11,-4(%r4)
+ slw %r9,%r10,%r6
+ srw %r12,%r11,%r8
+ or %r9,%r9,%r12
+ stwu %r9,-4(%r7)
+ bdnz 0b
+
+L(end1):slw %r0,%r11,%r6
+ stw %r0,-4(%r7)
+ blr
+
+
+/* Guaranteed not to succeed. */
+L(boom): tweq %r0,%r0
+
+/* We imitate a case statement, by using (yuk!) fixed-length code chunks,
+ of size 4*12 bytes. We have to do this (or something) to make this PIC. */
+L(big): mflr %r9
+ bltl- %cr0,L(boom) # Never taken, only used to set LR.
+ slwi %r10,%r6,4
+ mflr %r12
+ add %r10,%r12,%r10
+ slwi %r8,%r6,5
+ add %r10,%r8,%r10
+ mtctr %r10
+ addi %r5,%r5,-1
+ mtlr %r9
+ bctr
+
+L(end2):slw %r0,%r10,%r6
+ stw %r0,-4(%r7)
+ blr
+
+#define DO_LSHIFT(n) \
+ mtctr %r5; \
+0: lwzu %r10,-4(%r4); \
+ slwi %r9,%r11,n; \
+ inslwi %r9,%r10,n,32-n; \
+ stwu %r9,-4(%r7); \
+ bdz- L(end2); \
+ lwzu %r11,-4(%r4); \
+ slwi %r9,%r10,n; \
+ inslwi %r9,%r11,n,32-n; \
+ stwu %r9,-4(%r7); \
+ bdnz 0b; \
+ b L(end1)
+
+ DO_LSHIFT(1)
+ DO_LSHIFT(2)
+ DO_LSHIFT(3)
+ DO_LSHIFT(4)
+ DO_LSHIFT(5)
+ DO_LSHIFT(6)
+ DO_LSHIFT(7)
+ DO_LSHIFT(8)
+ DO_LSHIFT(9)
+ DO_LSHIFT(10)
+ DO_LSHIFT(11)
+ DO_LSHIFT(12)
+ DO_LSHIFT(13)
+ DO_LSHIFT(14)
+ DO_LSHIFT(15)
+ DO_LSHIFT(16)
+ DO_LSHIFT(17)
+ DO_LSHIFT(18)
+ DO_LSHIFT(19)
+ DO_LSHIFT(20)
+ DO_LSHIFT(21)
+ DO_LSHIFT(22)
+ DO_LSHIFT(23)
+ DO_LSHIFT(24)
+ DO_LSHIFT(25)
+ DO_LSHIFT(26)
+ DO_LSHIFT(27)
+ DO_LSHIFT(28)
+ DO_LSHIFT(29)
+ DO_LSHIFT(30)
+ DO_LSHIFT(31)
+
+END(__mpn_lshift)
diff --git a/sysdeps/powerpc/lshift.s b/sysdeps/powerpc/lshift.s
deleted file mode 100644
index 9612a3d..0000000
--- a/sysdeps/powerpc/lshift.s
+++ /dev/null
@@ -1,479 +0,0 @@
- # Shift a limb left, low level routine.
- # Copyright (C) 1996, 1997 Free Software Foundation, Inc.
- # This file is part of the GNU C Library.
- #
- # The GNU C Library is free software; you can redistribute it and/or
- # modify it under the terms of the GNU Library General Public License as
- # published by the Free Software Foundation; either version 2 of the
- # License, or (at your option) any later version.
- #
- # The GNU C Library is distributed in the hope that it will be useful,
- # but WITHOUT ANY WARRANTY; without even the implied warranty of
- # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
- # Library General Public License for more details.
- #
- # You should have received a copy of the GNU Library General Public
- # License along with the GNU C Library; see the file COPYING.LIB. If not,
- # write to the Free Software Foundation, Inc., 59 Temple Place - Suite 330,
- # Boston, MA 02111-1307, USA.
-
- # mp_limb_t mpn_lshift (mp_ptr wp, mp_srcptr up, mp_size_t usize,
- # unsigned int cnt)
-
- .align 3
- .globl __mpn_lshift
- .type __mpn_lshift,@function
-__mpn_lshift:
- mtctr %r5 # copy size into CTR
- cmplwi %cr0,%r5,16 # is size < 16
- slwi %r0,%r5,2
- add %r7,%r3,%r0 # make r7 point at end of res
- add %r4,%r4,%r0 # make r4 point at end of s1
- lwzu %r11,-4(%r4) # load first s1 limb
- subfic %r8,%r6,32
- srw %r3,%r11,%r8 # compute function return value
- bge %cr0,Lbig # branch if size >= 16
-
- bdz Lend1
-
-Loop: lwzu %r10,-4(%r4)
- slw %r9,%r11,%r6
- srw %r12,%r10,%r8
- or %r9,%r9,%r12
- stwu %r9,-4(%r7)
- bdz Lend2
- lwzu %r11,-4(%r4)
- slw %r9,%r10,%r6
- srw %r12,%r11,%r8
- or %r9,%r9,%r12
- stwu %r9,-4(%r7)
- bdnz Loop
- b Lend1
-
- # Guaranteed not to succeed.
-LBoom: tweq %r0,%r0
-
- # We imitate a case statement, by using (yuk!) fixed-length code chunks,
- # of size 4*12 bytes. We have to do this (or something) to make this PIC.
-Lbig: mflr %r9
- bltl %cr0,LBoom # Never taken, only used to set LR.
- slwi %r10,%r6,4
- mflr %r12
- add %r10,%r12,%r10
- slwi %r8,%r6,5
- add %r10,%r8,%r10
- mtctr %r10
- addi %r5,%r5,-1
- mtlr %r9
- bctr
-
-Lend1: slw %r0,%r11,%r6
- stw %r0,-4(%r7)
- blr
-
- mtctr %r5
-Loop1: lwzu %r10,-4(%r4)
- slwi %r9,%r11,1
- inslwi %r9,%r10,1,31
- stwu %r9,-4(%r7)
- bdz Lend2
- lwzu %r11,-4(%r4)
- slwi %r9,%r10,1
- inslwi %r9,%r11,1,31
- stwu %r9,-4(%r7)
- bdnz Loop1
- b Lend1
-
- mtctr %r5
-Loop2: lwzu %r10,-4(%r4)
- slwi %r9,%r11,2
- inslwi %r9,%r10,2,30
- stwu %r9,-4(%r7)
- bdz Lend2
- lwzu %r11,-4(%r4)
- slwi %r9,%r10,2
- inslwi %r9,%r11,2,30
- stwu %r9,-4(%r7)
- bdnz Loop2
- b Lend1
-
- mtctr %r5
-Loop3: lwzu %r10,-4(%r4)
- slwi %r9,%r11,3
- inslwi %r9,%r10,3,29
- stwu %r9,-4(%r7)
- bdz Lend2
- lwzu %r11,-4(%r4)
- slwi %r9,%r10,3
- inslwi %r9,%r11,3,29
- stwu %r9,-4(%r7)
- bdnz Loop3
- b Lend1
-
- mtctr %r5
-Loop4: lwzu %r10,-4(%r4)
- slwi %r9,%r11,4
- inslwi %r9,%r10,4,28
- stwu %r9,-4(%r7)
- bdz Lend2
- lwzu %r11,-4(%r4)
- slwi %r9,%r10,4
- inslwi %r9,%r11,4,28
- stwu %r9,-4(%r7)
- bdnz Loop4
- b Lend1
-
- mtctr %r5
-Loop5: lwzu %r10,-4(%r4)
- slwi %r9,%r11,5
- inslwi %r9,%r10,5,27
- stwu %r9,-4(%r7)
- bdz Lend2
- lwzu %r11,-4(%r4)
- slwi %r9,%r10,5
- inslwi %r9,%r11,5,27
- stwu %r9,-4(%r7)
- bdnz Loop5
- b Lend1
-
- mtctr %r5
-Loop6: lwzu %r10,-4(%r4)
- slwi %r9,%r11,6
- inslwi %r9,%r10,6,26
- stwu %r9,-4(%r7)
- bdz Lend2
- lwzu %r11,-4(%r4)
- slwi %r9,%r10,6
- inslwi %r9,%r11,6,26
- stwu %r9,-4(%r7)
- bdnz Loop6
- b Lend1
-
- mtctr %r5
-Loop7: lwzu %r10,-4(%r4)
- slwi %r9,%r11,7
- inslwi %r9,%r10,7,25
- stwu %r9,-4(%r7)
- bdz Lend2
- lwzu %r11,-4(%r4)
- slwi %r9,%r10,7
- inslwi %r9,%r11,7,25
- stwu %r9,-4(%r7)
- bdnz Loop7
- b Lend1
-
- mtctr %r5
-Loop8: lwzu %r10,-4(%r4)
- slwi %r9,%r11,8
- inslwi %r9,%r10,8,24
- stwu %r9,-4(%r7)
- bdz Lend2
- lwzu %r11,-4(%r4)
- slwi %r9,%r10,8
- inslwi %r9,%r11,8,24
- stwu %r9,-4(%r7)
- bdnz Loop8
- b Lend1
-
- mtctr %r5
-Loop9: lwzu %r10,-4(%r4)
- slwi %r9,%r11,9
- inslwi %r9,%r10,9,23
- stwu %r9,-4(%r7)
- bdz Lend2
- lwzu %r11,-4(%r4)
- slwi %r9,%r10,9
- inslwi %r9,%r11,9,23
- stwu %r9,-4(%r7)
- bdnz Loop9
- b Lend1
-
- mtctr %r5
-Loop10: lwzu %r10,-4(%r4)
- slwi %r9,%r11,10
- inslwi %r9,%r10,10,22
- stwu %r9,-4(%r7)
- bdz Lend2
- lwzu %r11,-4(%r4)
- slwi %r9,%r10,10
- inslwi %r9,%r11,10,22
- stwu %r9,-4(%r7)
- bdnz Loop10
- b Lend1
-
- mtctr %r5
-Loop11: lwzu %r10,-4(%r4)
- slwi %r9,%r11,11
- inslwi %r9,%r10,11,21
- stwu %r9,-4(%r7)
- bdz Lend2
- lwzu %r11,-4(%r4)
- slwi %r9,%r10,11
- inslwi %r9,%r11,11,21
- stwu %r9,-4(%r7)
- bdnz Loop11
- b Lend1
-
- mtctr %r5
-Loop12: lwzu %r10,-4(%r4)
- slwi %r9,%r11,12
- inslwi %r9,%r10,12,20
- stwu %r9,-4(%r7)
- bdz Lend2
- lwzu %r11,-4(%r4)
- slwi %r9,%r10,12
- inslwi %r9,%r11,12,20
- stwu %r9,-4(%r7)
- bdnz Loop12
- b Lend1
-
- mtctr %r5
-Loop13: lwzu %r10,-4(%r4)
- slwi %r9,%r11,13
- inslwi %r9,%r10,13,19
- stwu %r9,-4(%r7)
- bdz Lend2
- lwzu %r11,-4(%r4)
- slwi %r9,%r10,13
- inslwi %r9,%r11,13,19
- stwu %r9,-4(%r7)
- bdnz Loop13
- b Lend1
-
- mtctr %r5
-Loop14: lwzu %r10,-4(%r4)
- slwi %r9,%r11,14
- inslwi %r9,%r10,14,18
- stwu %r9,-4(%r7)
- bdz Lend2
- lwzu %r11,-4(%r4)
- slwi %r9,%r10,14
- inslwi %r9,%r11,14,18
- stwu %r9,-4(%r7)
- bdnz Loop14
- b Lend1
-
- mtctr %r5
-Loop15: lwzu %r10,-4(%r4)
- slwi %r9,%r11,15
- inslwi %r9,%r10,15,17
- stwu %r9,-4(%r7)
- bdz Lend2
- lwzu %r11,-4(%r4)
- slwi %r9,%r10,15
- inslwi %r9,%r11,15,17
- stwu %r9,-4(%r7)
- bdnz Loop15
- b Lend1
-
- mtctr %r5
-Loop16: lwzu %r10,-4(%r4)
- slwi %r9,%r11,16
- inslwi %r9,%r10,16,16
- stwu %r9,-4(%r7)
- bdz Lend2
- lwzu %r11,-4(%r4)
- slwi %r9,%r10,16
- inslwi %r9,%r11,16,16
- stwu %r9,-4(%r7)
- bdnz Loop16
- b Lend1
-
- mtctr %r5
-Loop17: lwzu %r10,-4(%r4)
- slwi %r9,%r11,17
- inslwi %r9,%r10,17,15
- stwu %r9,-4(%r7)
- bdz Lend2
- lwzu %r11,-4(%r4)
- slwi %r9,%r10,17
- inslwi %r9,%r11,17,15
- stwu %r9,-4(%r7)
- bdnz Loop17
- b Lend1
-
- mtctr %r5
-Loop18: lwzu %r10,-4(%r4)
- slwi %r9,%r11,18
- inslwi %r9,%r10,18,14
- stwu %r9,-4(%r7)
- bdz Lend2
- lwzu %r11,-4(%r4)
- slwi %r9,%r10,18
- inslwi %r9,%r11,18,14
- stwu %r9,-4(%r7)
- bdnz Loop18
- b Lend1
-
- mtctr %r5
-Loop19: lwzu %r10,-4(%r4)
- slwi %r9,%r11,19
- inslwi %r9,%r10,19,13
- stwu %r9,-4(%r7)
- bdz Lend2
- lwzu %r11,-4(%r4)
- slwi %r9,%r10,19
- inslwi %r9,%r11,19,13
- stwu %r9,-4(%r7)
- bdnz Loop19
- b Lend1
-
- mtctr %r5
-Loop20: lwzu %r10,-4(%r4)
- slwi %r9,%r11,20
- inslwi %r9,%r10,20,12
- stwu %r9,-4(%r7)
- bdz Lend2
- lwzu %r11,-4(%r4)
- slwi %r9,%r10,20
- inslwi %r9,%r11,20,12
- stwu %r9,-4(%r7)
- bdnz Loop20
- b Lend1
-
- mtctr %r5
-Loop21: lwzu %r10,-4(%r4)
- slwi %r9,%r11,21
- inslwi %r9,%r10,21,11
- stwu %r9,-4(%r7)
- bdz Lend2
- lwzu %r11,-4(%r4)
- slwi %r9,%r10,21
- inslwi %r9,%r11,21,11
- stwu %r9,-4(%r7)
- bdnz Loop21
- b Lend1
-
- mtctr %r5
-Loop22: lwzu %r10,-4(%r4)
- slwi %r9,%r11,22
- inslwi %r9,%r10,22,10
- stwu %r9,-4(%r7)
- bdz Lend2
- lwzu %r11,-4(%r4)
- slwi %r9,%r10,22
- inslwi %r9,%r11,22,10
- stwu %r9,-4(%r7)
- bdnz Loop22
- b Lend1
-
- mtctr %r5
-Loop23: lwzu %r10,-4(%r4)
- slwi %r9,%r11,23
- inslwi %r9,%r10,23,9
- stwu %r9,-4(%r7)
- bdz Lend2
- lwzu %r11,-4(%r4)
- slwi %r9,%r10,23
- inslwi %r9,%r11,23,9
- stwu %r9,-4(%r7)
- bdnz Loop23
- b Lend1
-
- mtctr %r5
-Loop24: lwzu %r10,-4(%r4)
- slwi %r9,%r11,24
- inslwi %r9,%r10,24,8
- stwu %r9,-4(%r7)
- bdz Lend2
- lwzu %r11,-4(%r4)
- slwi %r9,%r10,24
- inslwi %r9,%r11,24,8
- stwu %r9,-4(%r7)
- bdnz Loop24
- b Lend1
-
- mtctr %r5
-Loop25: lwzu %r10,-4(%r4)
- slwi %r9,%r11,25
- inslwi %r9,%r10,25,7
- stwu %r9,-4(%r7)
- bdz Lend2
- lwzu %r11,-4(%r4)
- slwi %r9,%r10,25
- inslwi %r9,%r11,25,7
- stwu %r9,-4(%r7)
- bdnz Loop25
- b Lend1
-
- mtctr %r5
-Loop26: lwzu %r10,-4(%r4)
- slwi %r9,%r11,26
- inslwi %r9,%r10,26,6
- stwu %r9,-4(%r7)
- bdz Lend2
- lwzu %r11,-4(%r4)
- slwi %r9,%r10,26
- inslwi %r9,%r11,26,6
- stwu %r9,-4(%r7)
- bdnz Loop26
- b Lend1
-
- mtctr %r5
-Loop27: lwzu %r10,-4(%r4)
- slwi %r9,%r11,27
- inslwi %r9,%r10,27,5
- stwu %r9,-4(%r7)
- bdz Lend2
- lwzu %r11,-4(%r4)
- slwi %r9,%r10,27
- inslwi %r9,%r11,27,5
- stwu %r9,-4(%r7)
- bdnz Loop27
- b Lend1
-
- mtctr %r5
-Loop28: lwzu %r10,-4(%r4)
- slwi %r9,%r11,28
- inslwi %r9,%r10,28,4
- stwu %r9,-4(%r7)
- bdz Lend2
- lwzu %r11,-4(%r4)
- slwi %r9,%r10,28
- inslwi %r9,%r11,28,4
- stwu %r9,-4(%r7)
- bdnz Loop28
- b Lend1
-
- mtctr %r5
-Loop29: lwzu %r10,-4(%r4)
- slwi %r9,%r11,29
- inslwi %r9,%r10,29,3
- stwu %r9,-4(%r7)
- bdz Lend2
- lwzu %r11,-4(%r4)
- slwi %r9,%r10,29
- inslwi %r9,%r11,29,3
- stwu %r9,-4(%r7)
- bdnz Loop29
- b Lend1
-
- mtctr %r5
-Loop30: lwzu %r10,-4(%r4)
- slwi %r9,%r11,30
- inslwi %r9,%r10,30,2
- stwu %r9,-4(%r7)
- bdz Lend2
- lwzu %r11,-4(%r4)
- slwi %r9,%r10,30
- inslwi %r9,%r11,30,2
- stwu %r9,-4(%r7)
- bdnz Loop30
- b Lend1
-
- mtctr %r5
-Loop31: lwzu %r10,-4(%r4)
- slwi %r9,%r11,31
- inslwi %r9,%r10,31,1
- stwu %r9,-4(%r7)
- bdz Lend2
- lwzu %r11,-4(%r4)
- slwi %r9,%r10,31
- inslwi %r9,%r11,31,1
- stwu %r9,-4(%r7)
- bdnz Loop31
- b Lend1
-
-Lend2: slw %r0,%r10,%r6
- stw %r0,-4(%r7)
- blr
diff --git a/sysdeps/powerpc/machine-gmon.h b/sysdeps/powerpc/machine-gmon.h
new file mode 100644
index 0000000..ba53807
--- /dev/null
+++ b/sysdeps/powerpc/machine-gmon.h
@@ -0,0 +1,32 @@
+/* PowerPC-specific implementation of profiling support.
+ Copyright (C) 1997 Free Software Foundation, Inc.
+ This file is part of the GNU C Library.
+ Contributed by Ulrich Drepper <drepper@cygnus.com>, 1997.
+
+ The GNU C Library is free software; you can redistribute it and/or
+ modify it under the terms of the GNU Library General Public License as
+ published by the Free Software Foundation; either version 2 of the
+ License, or (at your option) any later version.
+
+ The GNU C Library is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ Library General Public License for more details.
+
+ You should have received a copy of the GNU Library General Public
+ License along with the GNU C Library; see the file COPYING.LIB. If not,
+ write to the Free Software Foundation, Inc., 59 Temple Place - Suite 330,
+ Boston, MA 02111-1307, USA. */
+
+/* We need a special version of the `mcount' function because it has
+ to preserve more registers than your usual function. */
+
+void __mcount_internal (unsigned long frompc, unsigned long selfpc);
+
+#define _MCOUNT_DECL(frompc, selfpc) \
+void __mcount_internal (unsigned long frompc, unsigned long selfpc)
+
+
+/* Define MCOUNT as empty since we have the implementation in another
+ file. */
+#define MCOUNT
diff --git a/sysdeps/powerpc/memset.S b/sysdeps/powerpc/memset.S
new file mode 100644
index 0000000..6ac32dd
--- /dev/null
+++ b/sysdeps/powerpc/memset.S
@@ -0,0 +1,199 @@
+/* Optimized memset implementation for PowerPC.
+ Copyright (C) 1997 Free Software Foundation, Inc.
+ This file is part of the GNU C Library.
+
+ The GNU C Library is free software; you can redistribute it and/or
+ modify it under the terms of the GNU Library General Public License as
+ published by the Free Software Foundation; either version 2 of the
+ License, or (at your option) any later version.
+
+ The GNU C Library is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ Library General Public License for more details.
+
+ You should have received a copy of the GNU Library General Public
+ License along with the GNU C Library; see the file COPYING.LIB. If not,
+ write to the Free Software Foundation, Inc., 59 Temple Place - Suite 330,
+ Boston, MA 02111-1307, USA. */
+
+#include <sysdep.h>
+
+EALIGN(memset,5,1)
+/* __ptr_t [r3] memset (__ptr_t s [r3], int c [r4], size_t n [r5]));
+ Returns 's'.
+
+ The memset is done in three sizes: byte (8 bits), word (32 bits),
+ cache line (256 bits). There is a special case for setting cache lines
+ to 0, to take advantage of the dcbz instruction.
+ r6: current address we are storing at
+ r7: number of bytes we are setting now (when aligning) */
+
+/* take care of case for size <= 4 */
+ cmplwi %cr1,%r5,4
+ andi. %r7,%r3,3
+ mr %r6,%r3
+ ble- %cr1,L(small)
+/* align to word boundary */
+ cmplwi %cr5,%r5,31
+ rlwimi %r4,%r4,8,16,23
+ beq+ L(aligned) # 8th instruction from .align
+ mtcrf 0x01,%r3
+ subfic %r7,%r7,4
+ add %r6,%r6,%r7
+ sub %r5,%r5,%r7
+ bf+ 31,0f
+ stb %r4,0(%r3)
+ bt 30,L(aligned)
+0: sth %r4,-2(%r6) # 16th instruction from .align
+/* take care of case for size < 31 */
+L(aligned):
+ mtcrf 0x01,%r5
+ rlwimi %r4,%r4,16,0,15
+ ble %cr5,L(medium)
+/* align to cache line boundary... */
+ andi. %r7,%r6,0x1C
+ subfic %r7,%r7,0x20
+ beq L(caligned)
+ mtcrf 0x01,%r7
+ add %r6,%r6,%r7
+ sub %r5,%r5,%r7
+ cmplwi %cr1,%r7,0x10
+ mr %r8,%r6
+ bf 28,1f
+ stw %r4,-4(%r8)
+ stwu %r4,-8(%r8)
+1: blt %cr1,2f
+ stw %r4,-4(%r8) # 32nd instruction from .align
+ stw %r4,-8(%r8)
+ stw %r4,-12(%r8)
+ stwu %r4,-16(%r8)
+2: bf 29,L(caligned)
+ stw %r4,-4(%r8)
+/* now aligned to a cache line. */
+L(caligned):
+ cmplwi %cr1,%r4,0
+ clrrwi. %r7,%r5,5
+ mtcrf 0x01,%r5 # 40th instruction from .align
+ beq %cr1,L(zloopstart) # special case for clearing memory using dcbz
+ srwi %r0,%r7,5
+ mtctr %r0
+ beq L(medium) # we may not actually get to do a full line
+ clrlwi. %r5,%r5,27
+ add %r6,%r6,%r7
+0: li %r8,-0x40
+ bdz L(cloopdone) # 48th instruction from .align
+
+3: dcbz %r8,%r6
+ stw %r4,-4(%r6)
+ stw %r4,-8(%r6)
+ stw %r4,-12(%r6)
+ stw %r4,-16(%r6)
+ nop # let 601 fetch last 4 instructions of loop
+ stw %r4,-20(%r6)
+ stw %r4,-24(%r6) # 56th instruction from .align
+ nop # let 601 fetch first 8 instructions of loop
+ stw %r4,-28(%r6)
+ stwu %r4,-32(%r6)
+ bdnz 3b
+L(cloopdone):
+ stw %r4,-4(%r6)
+ stw %r4,-8(%r6)
+ stw %r4,-12(%r6)
+ stw %r4,-16(%r6) # 64th instruction from .align
+ stw %r4,-20(%r6)
+ cmplwi %cr1,%r5,16
+ stw %r4,-24(%r6)
+ stw %r4,-28(%r6)
+ stwu %r4,-32(%r6)
+ beqlr
+ add %r6,%r6,%r7
+ b L(medium_tail2) # 72nd instruction from .align
+
+ .align 5
+ nop
+/* Clear lines of memory in 128-byte chunks. */
+L(zloopstart):
+ clrlwi %r5,%r5,27
+ mtcrf 0x02,%r7
+ srwi. %r0,%r7,7
+ mtctr %r0
+ li %r7,0x20
+ li %r8,-0x40
+ cmplwi %cr1,%r5,16 # 8
+ bf 26,0f
+ dcbz 0,%r6
+ addi %r6,%r6,0x20
+0: li %r9,-0x20
+ bf 25,1f
+ dcbz 0,%r6
+ dcbz %r7,%r6
+ addi %r6,%r6,0x40 # 16
+1: cmplwi %cr5,%r5,0
+ beq L(medium)
+L(zloop):
+ dcbz 0,%r6
+ dcbz %r7,%r6
+ addi %r6,%r6,0x80
+ dcbz %r8,%r6
+ dcbz %r9,%r6
+ bdnz L(zloop)
+ beqlr %cr5
+ b L(medium_tail2)
+
+ .align 5
+L(small):
+/* Memset of 4 bytes or less. */
+ cmplwi %cr5,%r5,1
+ cmplwi %cr1,%r5,3
+ bltlr %cr5
+ stb %r4,0(%r6)
+ beqlr %cr5
+ nop
+ stb %r4,1(%r6)
+ bltlr %cr1
+ stb %r4,2(%r6)
+ beqlr %cr1
+ nop
+ stb %r4,3(%r6)
+ blr
+
+/* Memset of 0-31 bytes. */
+ .align 5
+L(medium):
+ cmplwi %cr1,%r5,16
+L(medium_tail2):
+ add %r6,%r6,%r5
+L(medium_tail):
+ bt- 31,L(medium_31t)
+ bt- 30,L(medium_30t)
+L(medium_30f):
+ bt- 29,L(medium_29t)
+L(medium_29f):
+ bge- %cr1,L(medium_27t)
+ bflr- 28
+ stw %r4,-4(%r6) # 8th instruction from .align
+ stw %r4,-8(%r6)
+ blr
+
+L(medium_31t):
+ stbu %r4,-1(%r6)
+ bf- 30,L(medium_30f)
+L(medium_30t):
+ sthu %r4,-2(%r6)
+ bf- 29,L(medium_29f)
+L(medium_29t):
+ stwu %r4,-4(%r6)
+ blt- %cr1,L(medium_27f) # 16th instruction from .align
+L(medium_27t):
+ stw %r4,-4(%r6)
+ stw %r4,-8(%r6)
+ stw %r4,-12(%r6)
+ stwu %r4,-16(%r6)
+L(medium_27f):
+ bflr- 28
+L(medium_28t):
+ stw %r4,-4(%r6)
+ stw %r4,-8(%r6)
+ blr
+END(memset)
diff --git a/sysdeps/powerpc/memset.s b/sysdeps/powerpc/memset.s
deleted file mode 100644
index 4c8bf8c..0000000
--- a/sysdeps/powerpc/memset.s
+++ /dev/null
@@ -1,202 +0,0 @@
- # Optimized memset implementation for PowerPC.
- # Copyright (C) 1997 Free Software Foundation, Inc.
- # This file is part of the GNU C Library.
- #
- # The GNU C Library is free software; you can redistribute it and/or
- # modify it under the terms of the GNU Library General Public License as
- # published by the Free Software Foundation; either version 2 of the
- # License, or (at your option) any later version.
- #
- # The GNU C Library is distributed in the hope that it will be useful,
- # but WITHOUT ANY WARRANTY; without even the implied warranty of
- # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
- # Library General Public License for more details.
- #
- # You should have received a copy of the GNU Library General Public
- # License along with the GNU C Library; see the file COPYING.LIB. If not,
- # write to the Free Software Foundation, Inc., 59 Temple Place - Suite 330,
- # Boston, MA 02111-1307, USA.
-
- .section ".text"
- .align 5
- nop
-
- .globl memset
- .type memset,@function
-memset:
- # __ptr_t [r3] memset (__ptr_t s [r3], int c [r4], size_t n [r5]));
- # Returns 's'.
-
- # The memset is done in three sizes: byte (8 bits), word (32 bits),
- # cache line (256 bits). There is a special case for setting cache lines
- # to 0, to take advantage of the dcbz instruction.
- # r6: current address we are storing at
- # r7: number of bytes we are setting now (when aligning)
-
- # take care of case for size <= 4
- cmplwi %cr1,%r5,4
- andi. %r7,%r3,3
- mr %r6,%r3
- ble- %cr1,small
- # align to word boundary
- cmplwi %cr5,%r5,31
- rlwimi %r4,%r4,8,16,23
- beq+ aligned # 8th instruction from .align
- mtcrf 0x01,%r3
- subfic %r7,%r7,4
- add %r6,%r6,%r7
- sub %r5,%r5,%r7
- bf+ 31,0f
- stb %r4,0(%r3)
- bt 30,aligned
-0: sth %r4,-2(%r6) # 16th instruction from .align
- # take care of case for size < 31
-aligned:
- mtcrf 0x01,%r5
- rlwimi %r4,%r4,16,0,15
- ble %cr5,medium
- # align to cache line boundary...
- andi. %r7,%r6,0x1C
- subfic %r7,%r7,0x20
- beq caligned
- mtcrf 0x01,%r7
- add %r6,%r6,%r7
- sub %r5,%r5,%r7
- cmplwi %cr1,%r7,0x10
- mr %r8,%r6
- bf 28,1f
- stw %r4,-4(%r8)
- stwu %r4,-8(%r8)
-1: blt %cr1,2f
- stw %r4,-4(%r8) # 32nd instruction from .align
- stw %r4,-8(%r8)
- stw %r4,-12(%r8)
- stwu %r4,-16(%r8)
-2: bf 29,caligned
- stw %r4,-4(%r8)
- # now aligned to a cache line.
-caligned:
- cmplwi %cr1,%r4,0
- clrrwi. %r7,%r5,5
- mtcrf 0x01,%r5 # 40th instruction from .align
- beq %cr1,zloopstart # special case for clearing memory using dcbz
- srwi %r0,%r7,5
- mtctr %r0
- beq medium # we may not actually get to do a full line
- clrlwi. %r5,%r5,27
- add %r6,%r6,%r7
-0: li %r8,-0x40
- bdz cloopdone # 48th instruction from .align
-
-cloop: dcbz %r8,%r6
- stw %r4,-4(%r6)
- stw %r4,-8(%r6)
- stw %r4,-12(%r6)
- stw %r4,-16(%r6)
- nop # let 601 fetch last 4 instructions of loop
- stw %r4,-20(%r6)
- stw %r4,-24(%r6) # 56th instruction from .align
- nop # let 601 fetch first 8 instructions of loop
- stw %r4,-28(%r6)
- stwu %r4,-32(%r6)
- bdnz cloop
-cloopdone:
- stw %r4,-4(%r6)
- stw %r4,-8(%r6)
- stw %r4,-12(%r6)
- stw %r4,-16(%r6) # 64th instruction from .align
- stw %r4,-20(%r6)
- cmplwi %cr1,%r5,16
- stw %r4,-24(%r6)
- stw %r4,-28(%r6)
- stwu %r4,-32(%r6)
- beqlr
- add %r6,%r6,%r7
- b medium_tail2 # 72nd instruction from .align
-
- .align 5
- nop
-# clear lines of memory in 128-byte chunks.
-zloopstart:
- clrlwi %r5,%r5,27
- mtcrf 0x02,%r7
- srwi. %r0,%r7,7
- mtctr %r0
- li %r7,0x20
- li %r8,-0x40
- cmplwi %cr1,%r5,16 # 8
- bf 26,0f
- dcbz 0,%r6
- addi %r6,%r6,0x20
-0: li %r9,-0x20
- bf 25,1f
- dcbz 0,%r6
- dcbz %r7,%r6
- addi %r6,%r6,0x40 # 16
-1: cmplwi %cr5,%r5,0
- beq medium
-zloop:
- dcbz 0,%r6
- dcbz %r7,%r6
- addi %r6,%r6,0x80
- dcbz %r8,%r6
- dcbz %r9,%r6
- bdnz zloop
- beqlr %cr5
- b medium_tail2
-
- .align 5
-small:
- # Memset of 4 bytes or less.
- cmplwi %cr5,%r5,1
- cmplwi %cr1,%r5,3
- bltlr %cr5
- stb %r4,0(%r6)
- beqlr %cr5
- nop
- stb %r4,1(%r6)
- bltlr %cr1
- stb %r4,2(%r6)
- beqlr %cr1
- nop
- stb %r4,3(%r6)
- blr
-
-# memset of 0-31 bytes
- .align 5
-medium:
- cmplwi %cr1,%r5,16
-medium_tail2:
- add %r6,%r6,%r5
-medium_tail:
- bt- 31,medium_31t
- bt- 30,medium_30t
-medium_30f:
- bt- 29,medium_29t
-medium_29f:
- bge- %cr1,medium_27t
- bflr- 28
- stw %r4,-4(%r6) # 8th instruction from .align
- stw %r4,-8(%r6)
- blr
-
-medium_31t:
- stbu %r4,-1(%r6)
- bf- 30,medium_30f
-medium_30t:
- sthu %r4,-2(%r6)
- bf- 29,medium_29f
-medium_29t:
- stwu %r4,-4(%r6)
- blt- %cr1,medium_27f # 16th instruction from .align
-medium_27t:
- stw %r4,-4(%r6)
- stw %r4,-8(%r6)
- stw %r4,-12(%r6)
- stwu %r4,-16(%r6)
-medium_27f:
- bflr- 28
-medium_28t:
- stw %r4,-4(%r6)
- stw %r4,-8(%r6)
- blr
diff --git a/sysdeps/powerpc/mul_1.S b/sysdeps/powerpc/mul_1.S
new file mode 100644
index 0000000..d48bd8f
--- /dev/null
+++ b/sysdeps/powerpc/mul_1.S
@@ -0,0 +1,46 @@
+/* Multiply a limb vector by a limb, for PowerPC.
+ Copyright (C) 1993, 1994, 1995, 1997 Free Software Foundation, Inc.
+ This file is part of the GNU C Library.
+
+ The GNU C Library is free software; you can redistribute it and/or
+ modify it under the terms of the GNU Library General Public License as
+ published by the Free Software Foundation; either version 2 of the
+ License, or (at your option) any later version.
+
+ The GNU C Library is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ Library General Public License for more details.
+
+ You should have received a copy of the GNU Library General Public
+ License along with the GNU C Library; see the file COPYING.LIB. If not,
+ write to the Free Software Foundation, Inc., 59 Temple Place - Suite 330,
+ Boston, MA 02111-1307, USA. */
+
+#include <sysdep.h>
+
+/* mp_limb_t mpn_mul_1 (mp_ptr res_ptr, mp_srcptr s1_ptr,
+ mp_size_t s1_size, mp_limb_t s2_limb)
+ Calculate s1*s2 and put result in res_ptr; return carry. */
+
+ENTRY(__mpn_mul_1)
+ mtctr %r5
+
+ lwz %r0,0(%r4)
+ mullw %r7,%r0,%r6
+ mulhwu %r10,%r0,%r6
+ addi %r3,%r3,-4 # adjust res_ptr
+ addic %r5,%r5,0 # clear cy with dummy insn
+ bdz 1f
+
+0: lwzu %r0,4(%r4)
+ stwu %r7,4(%r3)
+ mullw %r8,%r0,%r6
+ adde %r7,%r8,%r10
+ mulhwu %r10,%r0,%r6
+ bdnz 0b
+
+1: stw %r7,4(%r3)
+ addze %r3,%r10
+ blr
+END(__mpn_mul_1)
diff --git a/sysdeps/powerpc/mul_1.s b/sysdeps/powerpc/mul_1.s
deleted file mode 100644
index d6eb623..0000000
--- a/sysdeps/powerpc/mul_1.s
+++ /dev/null
@@ -1,47 +0,0 @@
- # Multiply a limb vector by a limb, for PowerPC.
- # Copyright (C) 1993, 1994, 1995, 1997 Free Software Foundation, Inc.
- # This file is part of the GNU C Library.
- #
- # The GNU C Library is free software; you can redistribute it and/or
- # modify it under the terms of the GNU Library General Public License as
- # published by the Free Software Foundation; either version 2 of the
- # License, or (at your option) any later version.
- #
- # The GNU C Library is distributed in the hope that it will be useful,
- # but WITHOUT ANY WARRANTY; without even the implied warranty of
- # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
- # Library General Public License for more details.
- #
- # You should have received a copy of the GNU Library General Public
- # License along with the GNU C Library; see the file COPYING.LIB. If not,
- # write to the Free Software Foundation, Inc., 59 Temple Place - Suite 330,
- # Boston, MA 02111-1307, USA.
-
- # mp_limb_t mpn_mul_1 (mp_ptr res_ptr, mp_srcptr s1_ptr,
- # mp_size_t s1_size, mp_limb_t s2_limb)
- # Calculate s1*s2 and put result in res_ptr; return carry.
-
- .align 2
- .globl __mpn_mul_1
- .type __mpn_mul_1,@function
-
-__mpn_mul_1:
- mtctr %r5
-
- lwz %r0,0(%r4)
- mullw %r7,%r0,%r6
- mulhwu %r10,%r0,%r6
- addi %r3,%r3,-4 # adjust res_ptr
- addic %r5,%r5,0 # clear cy with dummy insn
- bdz Lend
-
-Loop: lwzu %r0,4(%r4)
- stwu %r7,4(%r3)
- mullw %r8,%r0,%r6
- adde %r7,%r8,%r10
- mulhwu %r10,%r0,%r6
- bdnz Loop
-
-Lend: stw %r7,4(%r3)
- addze %r3,%r10
- blr
diff --git a/sysdeps/powerpc/ppc-mcount.S b/sysdeps/powerpc/ppc-mcount.S
new file mode 100644
index 0000000..06f1fcd
--- /dev/null
+++ b/sysdeps/powerpc/ppc-mcount.S
@@ -0,0 +1,84 @@
+/* PowerPC-specific implementation of profiling support.
+ Copyright (C) 1997 Free Software Foundation, Inc.
+ This file is part of the GNU C Library.
+
+ The GNU C Library is free software; you can redistribute it and/or
+ modify it under the terms of the GNU Library General Public License as
+ published by the Free Software Foundation; either version 2 of the
+ License, or (at your option) any later version.
+
+ The GNU C Library is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ Library General Public License for more details.
+
+ You should have received a copy of the GNU Library General Public
+ License along with the GNU C Library; see the file COPYING.LIB. If not,
+ write to the Free Software Foundation, Inc., 59 Temple Place - Suite 330,
+ Boston, MA 02111-1307, USA. */
+
+/* This would be bad. */
+#ifdef PROF
+#undef PROF
+#endif
+
+#include <sysdep.h>
+
+/* We do profiling as described in the SYSV ELF ABI, _mcount is called
+ with the address of a data word in r0 (that is different for every
+ routine, initialised to 0, and otherwise unused). The caller has put
+ the address the caller will return to in the usual place on the stack,
+ 4(%r1). _mcount is responsible for ensuring that when it returns no
+ argument-passing registers are disturbed, and that the LR is set back
+ to (what the caller sees as) 4(%r1).
+
+ This is intended so that the following code can be inserted at the
+ front of any routine without changing the routine:
+
+ .data
+ .align 2
+ 0: .long 0
+ .previous
+ mflr %r0
+ lis %r11,0b@ha
+ stw %r0,4(%r1)
+ addi %r0,%r11,0b@l
+ bl _mcount
+*/
+
+ENTRY(_mcount)
+ stwu %r1,-48(%r1)
+/* We need to save the parameter-passing registers. */
+ stw %r3, 12(%r1)
+ stw %r4, 16(%r1)
+ stw %r5, 20(%r1)
+ stw %r6, 24(%r1)
+ mflr %r4
+ lwz %r3, 52(%r1)
+ mfcr %r5
+ stw %r7, 28(%r1)
+ stw %r8, 32(%r1)
+ stw %r9, 36(%r1)
+ stw %r10,40(%r1)
+ stw %r4, 44(%r1)
+ stw %r5, 8(%r1)
+ bl JUMPTARGET(__mcount_internal)
+ /* Restore the registers... */
+ lwz %r6, 8(%r1)
+ lwz %r0, 44(%r1)
+ lwz %r3, 12(%r1)
+ mtctr %r0
+ lwz %r4, 16(%r1)
+ mtcrf 0xff,%r6
+ lwz %r5, 20(%r1)
+ lwz %r6, 24(%r1)
+ lwz %r0, 52(%r1)
+ lwz %r7, 28(%r1)
+ lwz %r8, 32(%r1)
+ mtlr %r0
+ lwz %r9, 36(%r1)
+ lwz %r10,40(%r1)
+ /* ...unwind the stack frame, and return to your usual programming. */
+ addi %r1,%r1,48
+ bctr
+END(_mcount)
diff --git a/sysdeps/powerpc/rshift.S b/sysdeps/powerpc/rshift.S
new file mode 100644
index 0000000..eb1f562
--- /dev/null
+++ b/sysdeps/powerpc/rshift.S
@@ -0,0 +1,56 @@
+/* Shift a limb right, low level routine.
+ Copyright (C) 1995, 1997 Free Software Foundation, Inc.
+ This file is part of the GNU C Library.
+
+ The GNU C Library is free software; you can redistribute it and/or
+ modify it under the terms of the GNU Library General Public License as
+ published by the Free Software Foundation; either version 2 of the
+ License, or (at your option) any later version.
+
+ The GNU C Library is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ Library General Public License for more details.
+
+ You should have received a copy of the GNU Library General Public
+ License along with the GNU C Library; see the file COPYING.LIB. If not,
+ write to the Free Software Foundation, Inc., 59 Temple Place - Suite 330,
+ Boston, MA 02111-1307, USA. */
+
+#include <sysdep.h>
+
+/* INPUT PARAMETERS
+ res_ptr r3
+ s1_ptr r4
+ size r5
+ cnt r6 */
+
+ENTRY(__mpn_rshift)
+ mtctr 5 # copy size into CTR
+ addi 7,3,-4 # move adjusted res_ptr to free return reg
+ subfic 8,6,32
+ lwz 11,0(4) # load first s1 limb
+ slw 3,11,8 # compute function return value
+ bdz 1f
+
+0: lwzu 10,4(4)
+ srw 9,11,6
+ slw 12,10,8
+ or 9,9,12
+ stwu 9,4(7)
+ bdz 2f
+ lwzu 11,4(4)
+ srw 9,10,6
+ slw 12,11,8
+ or 9,9,12
+ stwu 9,4(7)
+ bdnz 0b
+
+1: srw 0,11,6
+ stw 0,4(7)
+ blr
+
+2: srw 0,10,6
+ stw 0,4(7)
+ blr
+END(__mpn_rshift)
diff --git a/sysdeps/powerpc/rshift.s b/sysdeps/powerpc/rshift.s
deleted file mode 100644
index 20f09ad..0000000
--- a/sysdeps/powerpc/rshift.s
+++ /dev/null
@@ -1,59 +0,0 @@
-# PowerPC-32 __mpn_rshift --
-
-# Copyright (C) 1995 Free Software Foundation, Inc.
-
-# This file is part of the GNU MP Library.
-
-# The GNU MP Library is free software; you can redistribute it and/or modify
-# it under the terms of the GNU Library General Public License as published by
-# the Free Software Foundation; either version 2 of the License, or (at your
-# option) any later version.
-
-# The GNU MP Library is distributed in the hope that it will be useful, but
-# WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
-# or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Library General Public
-# License for more details.
-
-# You should have received a copy of the GNU Library General Public License
-# along with the GNU MP Library; see the file COPYING.LIB. If not, write to
-# the Free Software Foundation, Inc., 59 Temple Place - Suite 330, Boston,
-# MA 02111-1307, USA.
-
-
-# INPUT PARAMETERS
-# res_ptr r3
-# s1_ptr r4
-# size r5
-# cnt r6
-
- .align 3
- .globl __mpn_rshift
- .type __mpn_rshift,@function
-__mpn_rshift:
- mtctr 5 # copy size into CTR
- addi 7,3,-4 # move adjusted res_ptr to free return reg
- subfic 8,6,32
- lwz 11,0(4) # load first s1 limb
- slw 3,11,8 # compute function return value
- bdz Lend1
-
-Loop: lwzu 10,4(4)
- srw 9,11,6
- slw 12,10,8
- or 9,9,12
- stwu 9,4(7)
- bdz Lend2
- lwzu 11,4(4)
- srw 9,10,6
- slw 12,11,8
- or 9,9,12
- stwu 9,4(7)
- bdnz Loop
-
-Lend1: srw 0,11,6
- stw 0,4(7)
- blr
-
-Lend2: srw 0,10,6
- stw 0,4(7)
- blr
diff --git a/sysdeps/powerpc/s_copysign.S b/sysdeps/powerpc/s_copysign.S
index adc7df2..6d5ba82 100644
--- a/sysdeps/powerpc/s_copysign.S
+++ b/sysdeps/powerpc/s_copysign.S
@@ -1,17 +1,17 @@
/* Copy a sign bit between floating-point values.
Copyright (C) 1997 Free Software Foundation, Inc.
This file is part of the GNU C Library.
-
+
The GNU C Library is free software; you can redistribute it and/or
modify it under the terms of the GNU Library General Public License as
published by the Free Software Foundation; either version 2 of the
License, or (at your option) any later version.
-
+
The GNU C Library is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
Library General Public License for more details.
-
+
You should have received a copy of the GNU Library General Public
License along with the GNU C Library; see the file COPYING.LIB. If not,
write to the Free Software Foundation, Inc., 59 Temple Place - Suite 330,
@@ -20,15 +20,12 @@
/* This has been coded in assembler because GCC makes such a mess of it
when it's coded in C. */
- .section ".text"
- .align 2
- .globl __copysign
- .type __copysign,@function
-__copysign:
+#include <sysdep.h>
+
+ENTRY(__copysign)
/* double [f1] copysign (double [f1] x, double [f2] y);
copysign(x,y) returns a value with the magnitude of x and
with the sign bit of y. */
-
stwu %r1,-16(%r1)
stfd %f2,8(%r1)
lwz %r3,8(%r1)
@@ -39,22 +36,15 @@ __copysign:
blr
0: fnabs %f1,%f1
blr
-0:
- .size __copysign,0b-__copysign
-
- .globl copysign
- .globl copysignf
- .globl __copysignf
- .weak copysign
- .weak copysignf
- .set copysign,__copysign
+ END (__copysign)
+
+weak_alias(__copysign,copysign)
+
/* It turns out that it's safe to use this code even for single-precision. */
- .set __copysignf,__copysign
- .set copysignf,__copysign
+weak_alias(__copysign,copysignf)
+strong_alias(__copysign,__copysignf)
+
#ifdef NO_LONG_DOUBLE
- .globl copysignl
- .globl __copysignl
- .weak copysignl
- .set __copysignl,__copysign
- .set copysignl,__copysign
+weak_alias(__copysign,copysignl)
+strong_alias(__copysign,__copysignl)
#endif
diff --git a/sysdeps/powerpc/s_fabs.S b/sysdeps/powerpc/s_fabs.S
index a527335..3c6374b 100644
--- a/sysdeps/powerpc/s_fabs.S
+++ b/sysdeps/powerpc/s_fabs.S
@@ -1,42 +1,37 @@
/* Floating-point absolute value. PowerPC version.
Copyright (C) 1997 Free Software Foundation, Inc.
This file is part of the GNU C Library.
-
+
The GNU C Library is free software; you can redistribute it and/or
modify it under the terms of the GNU Library General Public License as
published by the Free Software Foundation; either version 2 of the
License, or (at your option) any later version.
-
+
The GNU C Library is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
Library General Public License for more details.
-
+
You should have received a copy of the GNU Library General Public
License along with the GNU C Library; see the file COPYING.LIB. If not,
write to the Free Software Foundation, Inc., 59 Temple Place - Suite 330,
Boston, MA 02111-1307, USA. */
- .section ".text"
- .align 2
- .globl __fabs
- .type __fabs,@function
-__fabs:
+#include <sysdep.h>
+
+ENTRY(__fabs)
/* double [f1] fabs (double [f1] x); */
fabs %f1,%f1
blr
-0:
- .size __fabs,0b-__fabs
+END(__fabs)
+
+weak_alias(__fabs,fabs)
- .globl fabs,fabsf,__fabsf
- .weak fabs,fabsf
- .set fabs,__fabs
/* It turns out that it's safe to use this code even for single-precision. */
- .set __fabsf,__fabs
- .set fabsf,__fabs
+strong_alias(__fabs,__fabsf)
+weak_alias(__fabs,fabsf)
+
#ifdef NO_LONG_DOUBLE
- .globl fabsl,__fabsl
- .weak fabsl
- .set __fabsl,__fabs
- .set fabsl,__fabs
+weak_alias(__fabs,__fabsl)
+weak_alias(__fabs,fabsl)
#endif
diff --git a/sysdeps/powerpc/setjmp.S b/sysdeps/powerpc/setjmp.S
index ddfea7e..8fa863f 100644
--- a/sysdeps/powerpc/setjmp.S
+++ b/sysdeps/powerpc/setjmp.S
@@ -62,9 +62,5 @@ ENTRY (__sigsetjmp)
stfd %f30,((JB_FPRS+16*2)*4)(3)
stw %r31,((JB_GPRS+17)*4)(3)
stfd %f31,((JB_FPRS+17*2)*4)(3)
-#ifdef PIC
- b __sigjmp_save@plt
-#else
- b __sigjmp_save
-#endif
+ b JUMPTARGET(__sigjmp_save)
END (__sigsetjmp)
diff --git a/sysdeps/powerpc/strchr.S b/sysdeps/powerpc/strchr.S
new file mode 100644
index 0000000..156d4d1
--- /dev/null
+++ b/sysdeps/powerpc/strchr.S
@@ -0,0 +1,111 @@
+/* Optimized strchr implementation for PowerPC.
+ Copyright (C) 1997 Free Software Foundation, Inc.
+ This file is part of the GNU C Library.
+
+ The GNU C Library is free software; you can redistribute it and/or
+ modify it under the terms of the GNU Library General Public License as
+ published by the Free Software Foundation; either version 2 of the
+ License, or (at your option) any later version.
+
+ The GNU C Library is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ Library General Public License for more details.
+
+ You should have received a copy of the GNU Library General Public
+ License along with the GNU C Library; see the file COPYING.LIB. If not,
+ write to the Free Software Foundation, Inc., 59 Temple Place - Suite 330,
+ Boston, MA 02111-1307, USA. */
+
+#include <sysdep.h>
+
+/* See strlen.s for comments on how this works. */
+
+/* char * [r3] strchr (const char *s [r3] , int c [r4] )
+
+ r0: a temporary
+ r3: our return result.
+ r4: byte we're looking for, spread over the whole word
+ r5: the current word
+ r6: the constant 0xfefefeff (-0x01010101)
+ r7: the constant 0x7f7f7f7f
+ r8: pointer to the current word.
+ r9: a temporary
+ r10: the number of bits we should ignore in the first word
+ r11: a mask with the bits to ignore set to 0
+ r12: a temporary */
+ENTRY(strchr)
+ rlwimi %r4,%r4,8,16,23
+ li %r11,-1
+ rlwimi %r4,%r4,16,0,15
+ lis %r6,0xfeff
+ lis %r7,0x7f7f
+ clrrwi %r8,%r3,2
+ addi %r7,%r7,0x7f7f
+ addi %r6,%r6,0xfffffeff
+ rlwinm %r10,%r3,3,27,28
+/* Test the first (partial?) word. */
+ lwz %r5,0(%r8)
+ srw %r11,%r11,%r10
+ orc %r5,%r5,%r11
+ add %r0,%r6,%r5
+ nor %r9,%r7,%r5
+ and. %r0,%r0,%r9
+ xor %r12,%r4,%r5
+ orc %r12,%r12,%r11
+ b L(loopentry)
+
+/* The loop. */
+
+L(loop):lwzu %r5,4(%r8)
+ and. %r0,%r0,%r9
+/* Test for 0. */
+ add %r0,%r6,%r5
+ nor %r9,%r7,%r5
+ bne L(foundit)
+ and. %r0,%r0,%r9
+/* Start test for the bytes we're looking for. */
+ xor %r12,%r4,%r5
+L(loopentry):
+ add %r0,%r6,%r12
+ nor %r9,%r7,%r12
+ beq L(loop)
+/* There is a zero byte in the word, but may also be a matching byte (either
+ before or after the zero byte). In fact, we may be looking for a
+ zero byte, in which case we return a match. We guess that this hasn't
+ happened, though. */
+L(missed):
+ and. %r0,%r0,%r9
+ li %r3,0
+ beqlr
+/* It did happen. Decide which one was first...
+ I'm not sure if this is actually faster than a sequence of
+ rotates, compares, and branches (we use it anyway because it's shorter). */
+ and %r6,%r7,%r5
+ or %r11,%r7,%r5
+ and %r0,%r7,%r12
+ or %r10,%r7,%r12
+ add %r6,%r6,%r7
+ add %r0,%r0,%r7
+ nor %r5,%r11,%r6
+ nor %r9,%r10,%r0
+ cmplw %r5,%r9
+ bgtlr
+ cntlzw %r4,%r9
+ srwi %r4,%r4,3
+ add %r3,%r8,%r4
+ blr
+
+L(foundit):
+ and %r0,%r7,%r12
+ or %r10,%r7,%r12
+ add %r0,%r0,%r7
+ nor %r9,%r10,%r0
+ cntlzw %r4,%r9
+ subi %r8,%r8,4
+ srwi %r4,%r4,3
+ add %r3,%r8,%r4
+ blr
+END(strchr)
+
+weak_alias(strchr,index)
diff --git a/sysdeps/powerpc/strchr.s b/sysdeps/powerpc/strchr.s
deleted file mode 100644
index c1df66f..0000000
--- a/sysdeps/powerpc/strchr.s
+++ /dev/null
@@ -1,118 +0,0 @@
- # Optimized strchr implementation for PowerPC.
- # Copyright (C) 1997 Free Software Foundation, Inc.
- # This file is part of the GNU C Library.
- #
- # The GNU C Library is free software; you can redistribute it and/or
- # modify it under the terms of the GNU Library General Public License as
- # published by the Free Software Foundation; either version 2 of the
- # License, or (at your option) any later version.
- #
- # The GNU C Library is distributed in the hope that it will be useful,
- # but WITHOUT ANY WARRANTY; without even the implied warranty of
- # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
- # Library General Public License for more details.
- #
- # You should have received a copy of the GNU Library General Public
- # License along with the GNU C Library; see the file COPYING.LIB. If not,
- # write to the Free Software Foundation, Inc., 59 Temple Place - Suite 330,
- # Boston, MA 02111-1307, USA.
-
- # See strlen.s for comments on how this works.
-
- .section ".text"
- .align 2
- .globl strchr
- .type strchr,@function
-strchr:
- # char * [r3] strchr (const char *s [r3] , int c [r4] )
-
- # r0: a temporary
- # r3: our return result.
- # r4: byte we're looking for, spread over the whole word
- # r5: the current word
- # r6: the constant 0xfefefeff (-0x01010101)
- # r7: the constant 0x7f7f7f7f
- # r8: pointer to the current word.
- # r9: a temporary
- # r10: the number of bits we should ignore in the first word
- # r11: a mask with the bits to ignore set to 0
- # r12: a temporary
-
- rlwimi %r4,%r4,8,16,23
- li %r11,-1
- rlwimi %r4,%r4,16,0,15
- lis %r6,0xfeff
- lis %r7,0x7f7f
- clrrwi %r8,%r3,2
- addi %r7,%r7,0x7f7f
- addi %r6,%r6,0xfffffeff
- rlwinm %r10,%r3,3,27,28
- # Test the first (partial?) word.
- lwz %r5,0(%r8)
- srw %r11,%r11,%r10
- orc %r5,%r5,%r11
- add %r0,%r6,%r5
- nor %r9,%r7,%r5
- and. %r0,%r0,%r9
- xor %r12,%r4,%r5
- orc %r12,%r12,%r11
- b loopentry
-
- # The loop.
-
-loop: lwzu %r5,4(%r8)
- and. %r0,%r0,%r9
- # Test for 0
- add %r0,%r6,%r5
- nor %r9,%r7,%r5
- bne foundit
- and. %r0,%r0,%r9
- # Start test for the bytes we're looking for
- xor %r12,%r4,%r5
-loopentry:
- add %r0,%r6,%r12
- nor %r9,%r7,%r12
- beq loop
- # There is a zero byte in the word, but may also be a matching byte (either
- # before or after the zero byte). In fact, we may be looking for a
- # zero byte, in which case we return a match. We guess that this hasn't
- # happened, though.
-missed:
- and. %r0,%r0,%r9
- li %r3,0
- beqlr
- # It did happen. Decide which one was first...
- # I'm not sure if this is actually faster than a sequence of
- # rotates, compares, and branches (we use it anyway because it's shorter).
- and %r6,%r7,%r5
- or %r11,%r7,%r5
- and %r0,%r7,%r12
- or %r10,%r7,%r12
- add %r6,%r6,%r7
- add %r0,%r0,%r7
- nor %r5,%r11,%r6
- nor %r9,%r10,%r0
- cmplw %r5,%r9
- bgtlr
- cntlzw %r4,%r9
- srwi %r4,%r4,3
- add %r3,%r8,%r4
- blr
-
-foundit:
- and %r0,%r7,%r12
- or %r10,%r7,%r12
- add %r0,%r0,%r7
- nor %r9,%r10,%r0
- cntlzw %r4,%r9
- subi %r8,%r8,4
- srwi %r4,%r4,3
- add %r3,%r8,%r4
- blr
-
-0:
- .size strchr,0b-strchr
-
- .globl index
- .weak index
- .set index,strchr
diff --git a/sysdeps/powerpc/strcmp.S b/sysdeps/powerpc/strcmp.S
new file mode 100644
index 0000000..9f4d134
--- /dev/null
+++ b/sysdeps/powerpc/strcmp.S
@@ -0,0 +1,115 @@
+/* Optimized strcmp implementation for PowerPC.
+ Copyright (C) 1997 Free Software Foundation, Inc.
+ This file is part of the GNU C Library.
+
+ The GNU C Library is free software; you can redistribute it and/or
+ modify it under the terms of the GNU Library General Public License as
+ published by the Free Software Foundation; either version 2 of the
+ License, or (at your option) any later version.
+
+ The GNU C Library is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ Library General Public License for more details.
+
+ You should have received a copy of the GNU Library General Public
+ License along with the GNU C Library; see the file COPYING.LIB. If not,
+ write to the Free Software Foundation, Inc., 59 Temple Place - Suite 330,
+ Boston, MA 02111-1307, USA. */
+
+#include <sysdep.h>
+
+/* See strlen.s for comments on how the end-of-string testing works. */
+
+EALIGN(strcmp,4,0)
+/* int [r3] strcmp (const char *p1 [r3], const char *p2 [r4]) */
+
+/* General register assignments:
+ r0: temporary
+ r3: pointer to previous word in s1
+ r4: pointer to previous word in s2
+ r5: current word from s1
+ r6: current word from s2
+ r7: 0xfefefeff
+ r8: 0x7f7f7f7f
+ r9: ~(word in s1 | 0x7f7f7f7f) */
+
+/* Register assignments in the prologue:
+ r10: low 2 bits of p2-p1
+ r11: mask to orc with r5/r6 */
+
+ or %r0,%r4,%r3
+ clrlwi. %r0,%r0,30
+ lis %r7,0xfeff
+ bne L(unaligned)
+
+ lwz %r5,0(%r3)
+ lwz %r6,0(%r4)
+ lis %r8,0x7f7f
+ addi %r7,%r7,-0x101
+ addi %r8,%r8,0x7f7f
+ b 1f
+
+0: lwzu %r5,4(%r3)
+ bne %cr1,L(different)
+ lwzu %r6,4(%r4)
+1: add %r0,%r7,%r5
+ nor %r9,%r8,%r5
+ and. %r0,%r0,%r9
+ cmpw %cr1,%r5,%r6
+ beq+ 0b
+L(endstring):
+/* OK. We've hit the end of the string. We need to be careful that
+ we don't compare two strings as different because of gunk beyond
+ the end of the strings... */
+ and %r0,%r8,%r5
+ beq %cr1,L(equal)
+ add %r0,%r0,%r8
+ xor. %r10,%r5,%r6
+ andc %r9,%r9,%r0
+ blt- L(highbit)
+ cntlzw %r10,%r10
+ cntlzw %r9,%r9
+ addi %r9,%r9,7
+ cmpw %cr1,%r9,%r10
+ sub %r3,%r5,%r6
+ bgelr+ %cr1
+L(equal):
+ li %r3,0
+ blr
+
+L(different):
+ lwz %r5,-4(%r3)
+ xor. %r10,%r5,%r6
+ sub %r3,%r5,%r6
+ bgelr+
+L(highbit):
+ mr %r3,%r6
+ blr
+
+
+/* Oh well. In this case, we just do a byte-by-byte comparison. */
+ .align 4
+L(unaligned):
+ lbz %r5,0(%r3)
+ lbz %r6,0(%r4)
+ b 1f
+
+0: lbzu %r5,1(%r3)
+ bne- 4f
+ lbzu %r6,1(%r4)
+1: cmpwi %cr1,%r5,0
+ beq- %cr1,3f
+ cmpw %r5,%r6
+ bne- 3f
+ lbzu %r5,1(%r3)
+ lbzu %r6,1(%r4)
+ cmpwi %cr1,%r5,0
+ cmpw %r5,%r6
+ bne+ %cr1,0b
+3: sub %r3,%r5,%r6
+ blr
+4: lbz %r5,-1(%r3)
+ sub %r3,%r5,%r6
+ blr
+END(strcmp)
diff --git a/sysdeps/powerpc/strcmp.s b/sysdeps/powerpc/strcmp.s
deleted file mode 100644
index f901b82..0000000
--- a/sysdeps/powerpc/strcmp.s
+++ /dev/null
@@ -1,273 +0,0 @@
- # Optimized strcmp implementation for PowerPC.
- # Copyright (C) 1997 Free Software Foundation, Inc.
- # This file is part of the GNU C Library.
- #
- # The GNU C Library is free software; you can redistribute it and/or
- # modify it under the terms of the GNU Library General Public License as
- # published by the Free Software Foundation; either version 2 of the
- # License, or (at your option) any later version.
- #
- # The GNU C Library is distributed in the hope that it will be useful,
- # but WITHOUT ANY WARRANTY; without even the implied warranty of
- # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
- # Library General Public License for more details.
- #
- # You should have received a copy of the GNU Library General Public
- # License along with the GNU C Library; see the file COPYING.LIB. If not,
- # write to the Free Software Foundation, Inc., 59 Temple Place - Suite 330,
- # Boston, MA 02111-1307, USA.
-
- # See strlen.s for comments on how the end-of-string testing works.
-
- .section ".text"
- .align 3
- .globl strcmp
- .type strcmp,@function
-strcmp:
- # int [r3] strcmp (const char *p1 [r3], const char *p2 [r4])
-
- # General register assignments:
- # r0: temporary
- # r3: pointer to previous word in s1
- # r4: pointer to previous word in s2
- # r5: current first word in s1
- # r6: current first word in s2 (after re-alignment)
- # r7: 0xfefefeff
- # r8: 0x7f7f7f7f
- # r9: ~(word in s1 | 0x7f7f7f7f)
-
- # Register assignments in the prologue:
- # r10: low 2 bits of p2-p1
- # r11: mask to orc with r5/r6
-
- subf. %r10,%r4,%r3
- beq- equal
- andi. %r10,%r10,3
- cmpi %cr1,%r10,2
- beq- %cr1,align2
- lis %r7,0xfeff
- lis %r8,0x7f7f
- addi %r8,%r8,0x7f7f
- addi %r7,%r7,0xfffffeff
- bgt- %cr1,align3
-strcmp3:
- rlwinm %r0,%r3,3,27,28
- li %r11,-1
- srw %r11,%r11,%r0
- clrrwi %r3,%r3,2
- clrrwi %r4,%r4,2
- lwz %r5,0(%r3)
- lwz %r6,0(%r4)
- bne- align1
-
- # The loop, case when both strings are aligned the same.
- # on entry, cr1.eq must be 1.
- # r10: second word in s1
- # r11: second word in s2 OR mask to orc with first two words.
-align0:
- andi. %r0,%r3,4
- orc %r5,%r5,%r11
- orc %r6,%r6,%r11
- beq+ a0start
- add %r0,%r7,%r5
- nor %r9,%r8,%r5
- and. %r0,%r0,%r9
- cmplw %cr1,%r5,%r6
- subi %r3,%r3,4
- bne- endstringeq
- subi %r4,%r4,4
- bne- %cr1,difference
-
-loopalign0:
- lwzu %r5,8(%r3)
- bne- %cr1,difference2
- lwzu %r6,8(%r4)
-a0start:
- add %r0,%r7,%r5
- nor %r9,%r8,%r5
- and. %r0,%r0,%r9
- cmplw %cr1,%r5,%r6
- lwz %r10,4(%r3)
- bne- endstringeq
- add %r0,%r7,%r10
- bne- %cr1,difference
- nor %r9,%r8,%r10
- lwz %r11,4(%r4)
- and. %r0,%r0,%r9
- cmplw %cr1,%r10,%r11
- beq+ loopalign0
-
- mr %r5,%r10
- mr %r6,%r11
-
- # fall through to...
-
-endstringeq:
- # (like 'endstring', but an equality code is in cr1)
- beq %cr1,equal
-endstring:
- # OK. We've hit the end of the string. We need to be careful that
- # we don't compare two strings as different because of gunk beyond
- # the end of the strings. We do it like this...
- and %r0,%r8,%r5
- add %r0,%r0,%r8
- xor. %r10,%r5,%r6
- andc %r9,%r9,%r0
- cntlzw %r10,%r10
- cntlzw %r9,%r9
- addi %r9,%r9,7
- cmpw %cr1,%r9,%r10
- blt %cr1,equal
- sub %r3,%r5,%r6
- bgelr+
- mr %r3,%r6
- blr
-equal: li %r3,0
- blr
-
- # The loop, case when s2 is aligned 1 char behind s1.
- # r10: current word in s2 (before re-alignment)
-
-align1:
- cmpwi %cr1,%r0,0
- orc %r5,%r5,%r11
- bne %cr1,align1_123
- # When s1 is aligned to a word boundary, the startup processing is special.
- slwi. %r6,%r6,24
- bne+ a1entry_0
- nor %r9,%r8,%r5
- b endstring
-
-align1_123:
- # Otherwise (s1 not aligned to a word boundary):
- mr %r10,%r6
- add %r0,%r7,%r5
- nor %r9,%r8,%r5
- and. %r0,%r0,%r9
- srwi %r6,%r6,8
- orc %r6,%r6,%r11
- cmplw %cr1,%r5,%r6
- bne- endstringeq
- bne- %cr1,difference
-
-loopalign1:
- slwi. %r6,%r10,24
- bne- %cr1,a1difference
- lwzu %r5,4(%r3)
- beq- endstring1
-a1entry_0:
- lwzu %r10,4(%r4)
-a1entry_123:
- add %r0,%r7,%r5
- nor %r9,%r8,%r5
- and. %r0,%r0,%r9
- rlwimi %r6,%r10,24,8,31
- cmplw %cr1,%r5,%r6
- beq+ loopalign1
- b endstringeq
-
-endstring1:
- srwi %r3,%r5,24
- blr
-
-a1difference:
- lbz %r6,-1(%r4)
- slwi %r6,%r6,24
- rlwimi %r6,%r10,24,8,31
-
- # fall through to...
-
-difference:
- # The idea here is that we could just return '%r5 - %r6', except
- # that the result might overflow. Overflow can only happen when %r5
- # and %r6 have different signs (thus the xor), in which case we want to
- # return negative iff %r6 has its high bit set so %r5 < %r6.
- # A branch-free implementation of this is
- # xor %r0,%r5,%r6
- # rlwinm %r0,%r0,1,31,31
- # rlwnm %r5,%r5,%r0,1,31
- # rlwnm %r6,%r6,%r0,1,31
- # sub %r3,%r5,%r6
- # blr
- # but this is usually more expensive.
- xor. %r0,%r5,%r6
- sub %r3,%r5,%r6
- bgelr+
- mr %r3,%r6
- blr
-
-difference2:
- # As for 'difference', but use registers r10 and r11 instead of r5 and r6.
- xor. %r0,%r10,%r11
- sub %r3,%r10,%r11
- bgelr+
- mr %r3,%r11
- blr
-
- # For the case when s2 is aligned 3 chars behind s1, we switch
- # s1 and s2...
- # r10: used by 'align2' (see below)
- # r11: used by 'align2' (see below)
- # r12: saved link register
- # cr0.eq: must be left as 1.
-
-align3: mflr %r12
- mr %r0,%r3
- mr %r3,%r4
- mr %r4,%r0
- bl strcmp3
- mtlr %r12
- neg %r3,%r3
- blr
-
- # The loop, case when s2 and s1's alignments differ by 2
- # This is the ugly case...
- # FIXME: on a 601, the loop takes 7 cycles instead of the 6 you'd expect,
- # because there are too many branches. This loop should probably be
- # coded like the align1 case.
-
-a2even: lhz %r5,0(%r3)
- lhz %r6,0(%r4)
- b a2entry
-
-align2:
- andi. %r0,%r3,1
- beq+ a2even
- subi %r3,%r3,1
- subi %r4,%r4,1
- lbz %r5,1(%r3)
- lbz %r6,1(%r4)
- cmpwi %cr0,%r5,0
- cmpw %cr1,%r5,%r6
- beq- align2end2
- lhzu %r5,2(%r3)
- beq+ %cr1,a2entry1
- lbz %r5,-1(%r3)
- sub %r3,%r5,%r6
- blr
-
-loopalign2:
- cmpw %cr1,%r5,%r6
- beq- align2end2
- lhzu %r5,2(%r3)
- bne- %cr1,align2different
-a2entry1:
- lhzu %r6,2(%r4)
-a2entry:
- cmpwi %cr5,%r5,0x00ff
- andi. %r0,%r5,0x00ff
- bgt+ %cr5,loopalign2
-
-align2end:
- andi. %r3,%r6,0xff00
- neg %r3,%r3
- blr
-
-align2different:
- lhzu %r5,-2(%r3)
-align2end2:
- sub %r3,%r5,%r6
- blr
-
-0:
- .size strcmp,0b-strcmp
diff --git a/sysdeps/powerpc/strlen.S b/sysdeps/powerpc/strlen.S
new file mode 100644
index 0000000..dc6660b
--- /dev/null
+++ b/sysdeps/powerpc/strlen.S
@@ -0,0 +1,144 @@
+/* Optimized strlen implementation for PowerPC.
+ Copyright (C) 1997 Free Software Foundation, Inc.
+ This file is part of the GNU C Library.
+
+ The GNU C Library is free software; you can redistribute it and/or
+ modify it under the terms of the GNU Library General Public License as
+ published by the Free Software Foundation; either version 2 of the
+ License, or (at your option) any later version.
+
+ The GNU C Library is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ Library General Public License for more details.
+
+ You should have received a copy of the GNU Library General Public
+ License along with the GNU C Library; see the file COPYING.LIB. If not,
+ write to the Free Software Foundation, Inc., 59 Temple Place - Suite 330,
+ Boston, MA 02111-1307, USA. */
+
+#include <sysdep.h>
+
+/* The algorithm here uses the following techniques:
+
+ 1) Given a word 'x', we can test to see if it contains any 0 bytes
+ by subtracting 0x01010101, and seeing if any of the high bits of each
+ byte changed from 0 to 1. This works because the least significant
+ 0 byte must have had no incoming carry (otherwise it's not the least
+ significant), so it is 0x00 - 0x01 == 0xff. For all other
+ byte values, either they have the high bit set initially, or when
+ 1 is subtracted you get a value in the range 0x00-0x7f, none of which
+ have their high bit set. The expression here is
+ (x + 0xfefefeff) & ~(x | 0x7f7f7f7f), which gives 0x00000000 when
+ there were no 0x00 bytes in the word.
+
+ 2) Given a word 'x', we can test to see _which_ byte was zero by
+ calculating ~(((x & 0x7f7f7f7f) + 0x7f7f7f7f) | x | 0x7f7f7f7f).
+ This produces 0x80 in each byte that was zero, and 0x00 in all
+ the other bytes. The '| 0x7f7f7f7f' clears the low 7 bits in each
+ byte, and the '| x' part ensures that bytes with the high bit set
+ produce 0x00. The addition will carry into the high bit of each byte
+ iff that byte had one of its low 7 bits set. We can then just see
+ which was the most significant bit set and divide by 8 to find how
+ many to add to the index.
+ This is from the book 'The PowerPC Compiler Writer's Guide',
+ by Steve Hoxey, Faraydon Karim, Bill Hay and Hank Warren.
+
+ We deal with strings not aligned to a word boundary by taking the
+ first word and ensuring that bytes not part of the string
+ are treated as nonzero. To allow for memory latency, we unroll the
+ loop a few times, being careful to ensure that we do not read ahead
+ across cache line boundaries.
+
+ Questions to answer:
+ 1) How long are strings passed to strlen? If they're often really long,
+ we should probably use cache management instructions and/or unroll the
+ loop more. If they're often quite short, it might be better to use
+ fact (2) in the inner loop than have to recalculate it.
+ 2) How popular are bytes with the high bit set? If they are very rare,
+ on some processors it might be useful to use the simpler expression
+ ~((x - 0x01010101) | 0x7f7f7f7f) (that is, on processors with only one
+ ALU), but this fails when any character has its high bit set. */
+
+/* Some notes on register usage: Under the SVR4 ABI, we can use registers
+ 0 and 3 through 12 (so long as we don't call any procedures) without
+ saving them. We can also use registers 14 through 31 if we save them.
+ We can't use r1 (it's the stack pointer), r2 nor r13 because the user
+ program may expect them to hold their usual value if we get sent
+ a signal. Integer parameters are passed in r3 through r10.
+ We can use condition registers cr0, cr1, cr5, cr6, and cr7 without saving
+ them, the others we must save. */
+
+ENTRY(strlen)
+/* On entry, r3 points to the string, and it's left that way.
+ We use r6 to store 0xfefefeff, and r7 to store 0x7f7f7f7f.
+ r4 is used to keep the current index into the string; r5 holds
+ the number of padding bits we prepend to the string to make it
+ start at a word boundary. r8 holds the 'current' word.
+ r9-12 are temporaries. r0 is used as a temporary and for discarded
+ results. */
+ clrrwi %r4,%r3,2
+ lis %r7,0x7f7f
+ rlwinm %r5,%r3,3,27,28
+ lwz %r8,0(%r4)
+ li %r9,-1
+ addi %r7,%r7,0x7f7f
+/* That's the setup done, now do the first pair of words.
+ We make an exception and use method (2) on the first two words, to reduce
+ overhead. */
+ srw %r9,%r9,%r5
+ and %r0,%r7,%r8
+ or %r10,%r7,%r8
+ add %r0,%r0,%r7
+ nor %r0,%r10,%r0
+ and. %r8,%r0,%r9
+ mtcrf 0x01,%r3
+ bne L(done0)
+ lis %r6,0xfeff
+ addi %r6,%r6,-0x101
+/* Are we now aligned to a doubleword boundary? */
+ bt 29,L(loop)
+
+/* Handle second word of pair. */
+ lwzu %r8,4(%r4)
+ and %r0,%r7,%r8
+ or %r10,%r7,%r8
+ add %r0,%r0,%r7
+ nor. %r8,%r10,%r0
+ bne L(done0)
+
+/* The loop. */
+
+L(loop):
+ lwz %r8,4(%r4)
+ lwzu %r9,8(%r4)
+ add %r0,%r6,%r8
+ nor %r10,%r7,%r8
+ and. %r0,%r0,%r10
+ add %r11,%r6,%r9
+ nor %r12,%r7,%r9
+ bne L(done1)
+ and. %r0,%r11,%r12
+ beq L(loop)
+
+ and %r0,%r7,%r9
+ add %r0,%r0,%r7
+ andc %r8,%r12,%r0
+ b L(done0)
+
+L(done1):
+ and %r0,%r7,%r8
+ subi %r4,%r4,4
+ add %r0,%r0,%r7
+ andc %r8,%r10,%r0
+
+/* When we get to here, r4 points to the first word in the string that
+ contains a zero byte, and the most significant set bit in r8 is in that
+ byte. */
+L(done0):
+ cntlzw %r11,%r8
+ subf %r0,%r3,%r4
+ srwi %r11,%r11,3
+ add %r3,%r0,%r11
+ blr
+END(strlen)
diff --git a/sysdeps/powerpc/strlen.s b/sysdeps/powerpc/strlen.s
deleted file mode 100644
index ea80977..0000000
--- a/sysdeps/powerpc/strlen.s
+++ /dev/null
@@ -1,144 +0,0 @@
- # Optimized strlen implementation for PowerPC.
- # Copyright (C) 1997 Free Software Foundation, Inc.
- # This file is part of the GNU C Library.
- #
- # The GNU C Library is free software; you can redistribute it and/or
- # modify it under the terms of the GNU Library General Public License as
- # published by the Free Software Foundation; either version 2 of the
- # License, or (at your option) any later version.
- #
- # The GNU C Library is distributed in the hope that it will be useful,
- # but WITHOUT ANY WARRANTY; without even the implied warranty of
- # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
- # Library General Public License for more details.
- #
- # You should have received a copy of the GNU Library General Public
- # License along with the GNU C Library; see the file COPYING.LIB. If not,
- # write to the Free Software Foundation, Inc., 59 Temple Place - Suite 330,
- # Boston, MA 02111-1307, USA.
-
- # The algorithm here uses the following techniques:
- #
- # 1) Given a word 'x', we can test to see if it contains any 0 bytes
- # by subtracting 0x01010101, and seeing if any of the high bits of each
- # byte changed from 0 to 1. This works because the least significant
- # 0 byte must have had no incoming carry (otherwise it's not the least
- # significant), so it is 0x00 - 0x01 == 0xff. For all other
- # byte values, either they have the high bit set initially, or when
- # 1 is subtracted you get a value in the range 0x00-0x7f, none of which
- # have their high bit set. The expression here is
- # (x + 0xfefefeff) & ~(x | 0x7f7f7f7f), which gives 0x00000000 when
- # there were no 0x00 bytes in the word.
- #
- # 2) Given a word 'x', we can test to see _which_ byte was zero by
- # calculating ~(((x & 0x7f7f7f7f) + 0x7f7f7f7f) | x | 0x7f7f7f7f).
- # This produces 0x80 in each byte that was zero, and 0x00 in all
- # the other bytes. The '| 0x7f7f7f7f' clears the low 7 bits in each
- # byte, and the '| x' part ensures that bytes with the high bit set
- # produce 0x00. The addition will carry into the high bit of each byte
- # iff that byte had one of its low 7 bits set. We can then just see
- # which was the most significant bit set and divide by 8 to find how
- # many to add to the index.
- # This is from the book 'The PowerPC Compiler Writer's Guide',
- # by Steve Hoxey, Faraydon Karim, Bill Hay and Hank Warren.
- #
- # We deal with strings not aligned to a word boundary by taking the
- # first word and ensuring that bytes not part of the string
- # are treated as nonzero. To allow for memory latency, we unroll the
- # loop a few times, being careful to ensure that we do not read ahead
- # across cache line boundaries.
- #
- # Questions to answer:
- # 1) How long are strings passed to strlen? If they're often really long,
- # we should probably use cache management instructions and/or unroll the
- # loop more. If they're often quite short, it might be better to use
- # fact (2) in the inner loop than have to recalculate it.
- # 2) How popular are bytes with the high bit set? If they are very rare,
- # on some processors it might be useful to use the simpler expression
- # ~((x - 0x01010101) | 0x7f7f7f7f) (that is, on processors with only one
- # ALU), but this fails when any character has its high bit set.
-
- # Some notes on register usage: Under the SVR4 ABI, we can use registers
- # 0 and 3 through 12 (so long as we don't call any procedures) without
- # saving them. We can also use registers 14 through 31 if we save them.
- # We can't use r1 (it's the stack pointer), r2 nor r13 because the user
- # program may expect them to hold their usual value if we get sent
- # a signal. Integer parameters are passed in r3 through r10.
- # We can use condition registers cr0, cr1, cr5, cr6, and cr7 without saving
- # them, the others we must save.
-
- .section ".text"
- .align 2
- .globl strlen
- .type strlen,@function
-strlen:
- # On entry, r3 points to the string, and it's left that way.
- # We use r6 to store 0xfefefeff, and r7 to store 0x7f7f7f7f.
- # r4 is used to keep the current index into the string; r5 holds
- # the number of padding bits we prepend to the string to make it
- # start at a word boundary. r8 holds the 'current' word.
- # r9-12 are temporaries. r0 is used as a temporary and for discarded
- # results.
- clrrwi %r4,%r3,2
- lis %r7,0x7f7f
- rlwinm %r5,%r3,3,27,28
- lwz %r8,0(%r4)
- li %r9,-1
- addi %r7,%r7,0x7f7f
- # That's the setup done, now do the first pair of words.
- # We make an exception and use method (2) on the first two words, to reduce
- # overhead.
- srw %r9,%r9,%r5
- and %r0,%r7,%r8
- or %r10,%r7,%r8
- add %r0,%r0,%r7
- nor %r0,%r10,%r0
- and. %r8,%r0,%r9
- mtcrf 0x01,%r3
- bne done0
- lis %r6,0xfeff
- addi %r6,%r6,-0x101
- # Are we now aligned to a doubleword boundary?
- bt 29,loop
-
- # Handle second word of pair.
- lwzu %r8,4(%r4)
- and %r0,%r7,%r8
- or %r10,%r7,%r8
- add %r0,%r0,%r7
- nor. %r8,%r10,%r0
- bne done0
-
- # The loop.
-
-loop: lwz %r8,4(%r4)
- lwzu %r9,8(%r4)
- add %r0,%r6,%r8
- nor %r10,%r7,%r8
- and. %r0,%r0,%r10
- add %r11,%r6,%r9
- nor %r12,%r7,%r9
- bne done1
- and. %r0,%r11,%r12
- beq loop
-
- and %r0,%r7,%r9
- add %r0,%r0,%r7
- andc %r8,%r12,%r0
- b done0
-
-done1: and %r0,%r7,%r8
- subi %r4,%r4,4
- add %r0,%r0,%r7
- andc %r8,%r10,%r0
-
- # When we get to here, r4 points to the first word in the string that
- # contains a zero byte, and the most significant set bit in r8 is in that
- # byte.
-done0: cntlzw %r11,%r8
- subf %r0,%r3,%r4
- srwi %r11,%r11,3
- add %r3,%r0,%r11
- blr
-0:
- .size strlen,0b-strlen
diff --git a/sysdeps/powerpc/sub_n.S b/sysdeps/powerpc/sub_n.S
new file mode 100644
index 0000000..7af577d
--- /dev/null
+++ b/sysdeps/powerpc/sub_n.S
@@ -0,0 +1,68 @@
+/* Subtract two limb vectors of equal, non-zero length for PowerPC.
+ Copyright (C) 1997 Free Software Foundation, Inc.
+ This file is part of the GNU C Library.
+
+ The GNU C Library is free software; you can redistribute it and/or
+ modify it under the terms of the GNU Library General Public License as
+ published by the Free Software Foundation; either version 2 of the
+ License, or (at your option) any later version.
+
+ The GNU C Library is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ Library General Public License for more details.
+
+ You should have received a copy of the GNU Library General Public
+ License along with the GNU C Library; see the file COPYING.LIB. If not,
+ write to the Free Software Foundation, Inc., 59 Temple Place - Suite 330,
+ Boston, MA 02111-1307, USA. */
+
+#include <sysdep.h>
+
+/* mp_limb_t mpn_sub_n (mp_ptr res_ptr, mp_srcptr s1_ptr, mp_srcptr s2_ptr,
+ mp_size_t size)
+ Calculate s1-s2 and put result in res_ptr; return borrow, 0 or 1. */
+
+/* Note on optimisation: This code is optimal for the 601. Almost every other
+ possible 2-unrolled inner loop will not be. Also, watch out for the
+ alignment... */
+
+EALIGN(__mpn_sub_n,3,1)
+/* Set up for loop below. */
+ mtcrf 0x01,%r6
+ srwi. %r7,%r6,1
+ mtctr %r7
+ bt 31,2f
+
+/* Set the carry (clear the borrow). */
+ subfc %r0,%r0,%r0
+/* Adjust pointers for loop. */
+ addi %r3,%r3,-4
+ addi %r4,%r4,-4
+ addi %r5,%r5,-4
+ b 0f
+
+2: lwz %r7,0(%r5)
+ lwz %r6,0(%r4)
+ subfc %r6,%r7,%r6
+ stw %r6,0(%r3)
+ beq 1f
+
+/* Align start of loop to an odd word boundary to guarantee that the
+ last two words can be fetched in one access (for 601). This turns
+ out to be important. */
+0:
+ lwz %r9,4(%r4)
+ lwz %r8,4(%r5)
+ lwzu %r6,8(%r4)
+ lwzu %r7,8(%r5)
+ subfe %r8,%r8,%r9
+ stw %r8,4(%r3)
+ subfe %r6,%r7,%r6
+ stwu %r6,8(%r3)
+ bdnz 0b
+/* Return the borrow. */
+1: subfe %r3,%r3,%r3
+ neg %r3,%r3
+ blr
+END(__mpn_sub_n)
diff --git a/sysdeps/powerpc/sub_n.s b/sysdeps/powerpc/sub_n.s
deleted file mode 100644
index 8711bf9..0000000
--- a/sysdeps/powerpc/sub_n.s
+++ /dev/null
@@ -1,69 +0,0 @@
- # Subtract two limb vectors of equal, non-zero length for PowerPC.
- # Copyright (C) 1997 Free Software Foundation, Inc.
- # This file is part of the GNU C Library.
- #
- # The GNU C Library is free software; you can redistribute it and/or
- # modify it under the terms of the GNU Library General Public License as
- # published by the Free Software Foundation; either version 2 of the
- # License, or (at your option) any later version.
- #
- # The GNU C Library is distributed in the hope that it will be useful,
- # but WITHOUT ANY WARRANTY; without even the implied warranty of
- # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
- # Library General Public License for more details.
- #
- # You should have received a copy of the GNU Library General Public
- # License along with the GNU C Library; see the file COPYING.LIB. If not,
- # write to the Free Software Foundation, Inc., 59 Temple Place - Suite 330,
- # Boston, MA 02111-1307, USA.
-
- # mp_limb_t mpn_sub_n (mp_ptr res_ptr, mp_srcptr s1_ptr, mp_srcptr s2_ptr,
- # mp_size_t size)
- # Calculate s1-s2 and put result in res_ptr; return borrow, 0 or 1.
-
- # Note on optimisation: This code is optimal for the 601. Almost every other
- # possible 2-unrolled inner loop will not be. Also, watch out for the
- # alignment...
-
- .align 3
- .globl __mpn_sub_n
- .type __mpn_sub_n,@function
- nop
-__mpn_sub_n:
- # Set up for loop below.
- mtcrf 0x01,%r6
- srwi. %r7,%r6,1
- mtctr %r7
- bt 31,2f
-
- # Set the carry (clear the borrow).
- subfc %r0,%r0,%r0
- # Adjust pointers for loop.
- addi %r3,%r3,-4
- addi %r4,%r4,-4
- addi %r5,%r5,-4
- b 0f
-
-2: lwz %r7,0(%r5)
- lwz %r6,0(%r4)
- subfc %r6,%r7,%r6
- stw %r6,0(%r3)
- beq 1f
-
- # Align start of loop to an odd word boundary to guarantee that the
- # last two words can be fetched in one access (for 601). This turns
- # out to be important.
-0:
- lwz %r9,4(%r4)
- lwz %r8,4(%r5)
- lwzu %r6,8(%r4)
- lwzu %r7,8(%r5)
- subfe %r8,%r8,%r9
- stw %r8,4(%r3)
- subfe %r6,%r7,%r6
- stwu %r6,8(%r3)
- bdnz 0b
- # return the borrow
-1: subfe %r3,%r3,%r3
- neg %r3,%r3
- blr
diff --git a/sysdeps/powerpc/submul_1.S b/sysdeps/powerpc/submul_1.S
new file mode 100644
index 0000000..80da8ec
--- /dev/null
+++ b/sysdeps/powerpc/submul_1.S
@@ -0,0 +1,52 @@
+/* Multiply a limb vector by a single limb, for PowerPC.
+ Copyright (C) 1993, 1994, 1995, 1997 Free Software Foundation, Inc.
+ This file is part of the GNU C Library.
+
+ The GNU C Library is free software; you can redistribute it and/or
+ modify it under the terms of the GNU Library General Public License as
+ published by the Free Software Foundation; either version 2 of the
+ License, or (at your option) any later version.
+
+ The GNU C Library is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ Library General Public License for more details.
+
+ You should have received a copy of the GNU Library General Public
+ License along with the GNU C Library; see the file COPYING.LIB. If not,
+ write to the Free Software Foundation, Inc., 59 Temple Place - Suite 330,
+ Boston, MA 02111-1307, USA. */
+
+#include <sysdep.h>
+
+/* mp_limb_t mpn_submul_1 (mp_ptr res_ptr, mp_srcptr s1_ptr,
+ mp_size_t s1_size, mp_limb_t s2_limb)
+ Calculate res-s1*s2 and put result back in res; return carry. */
+
+ENTRY(__mpn_submul_1)
+ mtctr %r5
+
+ lwz %r0,0(%r4)
+ mullw %r7,%r0,%r6
+ mulhwu %r10,%r0,%r6
+ lwz %r9,0(%r3)
+ subf %r8,%r7,%r9
+ addc %r7,%r7,%r8 # invert cy (r7 is junk)
+ addi %r3,%r3,-4 # adjust res_ptr
+ bdz 1f
+
+0: lwzu %r0,4(%r4)
+ stwu %r8,4(%r3)
+ mullw %r8,%r0,%r6
+ adde %r7,%r8,%r10
+ mulhwu %r10,%r0,%r6
+ lwz %r9,4(%r3)
+ addze %r10,%r10
+ subf %r8,%r7,%r9
+ addc %r7,%r7,%r8 # invert cy (r7 is junk)
+ bdnz 0b
+
+1: stw %r8,4(%r3)
+ addze %r3,%r10
+ blr
+END(__mpn_submul_1)
diff --git a/sysdeps/powerpc/submul_1.s b/sysdeps/powerpc/submul_1.s
deleted file mode 100644
index 999430d..0000000
--- a/sysdeps/powerpc/submul_1.s
+++ /dev/null
@@ -1,52 +0,0 @@
- # Multiply a limb vector by a single limb, for PowerPC.
- # Copyright (C) 1993, 1994, 1995, 1997 Free Software Foundation, Inc.
- # This file is part of the GNU C Library.
- #
- # The GNU C Library is free software; you can redistribute it and/or
- # modify it under the terms of the GNU Library General Public License as
- # published by the Free Software Foundation; either version 2 of the
- # License, or (at your option) any later version.
- #
- # The GNU C Library is distributed in the hope that it will be useful,
- # but WITHOUT ANY WARRANTY; without even the implied warranty of
- # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
- # Library General Public License for more details.
- #
- # You should have received a copy of the GNU Library General Public
- # License along with the GNU C Library; see the file COPYING.LIB. If not,
- # write to the Free Software Foundation, Inc., 59 Temple Place - Suite 330,
- # Boston, MA 02111-1307, USA.
-
- # mp_limb_t mpn_submul_1 (mp_ptr res_ptr, mp_srcptr s1_ptr,
- # mp_size_t s1_size, mp_limb_t s2_limb)
- # Calculate res-s1*s2 and put result back in res; return carry.
-
- .align 2
- .globl __mpn_submul_1
- .type __mpn_submul_1,@function
-__mpn_submul_1:
- mtctr %r5
-
- lwz %r0,0(%r4)
- mullw %r7,%r0,%r6
- mulhwu %r10,%r0,%r6
- lwz %r9,0(%r3)
- subf %r8,%r7,%r9
- addc %r7,%r7,%r8 # invert cy (r7 is junk)
- addi %r3,%r3,-4 # adjust res_ptr
- bdz Lend
-
-Loop: lwzu %r0,4(%r4)
- stwu %r8,4(%r3)
- mullw %r8,%r0,%r6
- adde %r7,%r8,%r10
- mulhwu %r10,%r0,%r6
- lwz %r9,4(%r3)
- addze %r10,%r10
- subf %r8,%r7,%r9
- addc %r7,%r7,%r8 # invert cy (r7 is junk)
- bdnz Loop
-
-Lend: stw %r8,4(%r3)
- addze %r3,%r10
- blr
diff --git a/sysdeps/powerpc/test-arith.c b/sysdeps/powerpc/test-arith.c
index c846b0d..9e1be88 100644
--- a/sysdeps/powerpc/test-arith.c
+++ b/sysdeps/powerpc/test-arith.c
@@ -226,7 +226,7 @@ check_result(int line, const char *rm, tocheck_t expected, tocheck_t actual)
if (memcmp(&expected, &actual, sizeof(tocheck_t)) != 0)
{
unsigned char *ex, *ac;
- int i;
+ size_t i;
printf("%s:%d:round %s:result failed\n"
" expected result 0x", __FILE__, line, rm);
@@ -323,7 +323,7 @@ check_excepts(int line, const char *rm, int expected, int actual)
expected = expected & ~excepts_missing | FE_INVALID_SNAN;
if ((expected & all_exceptions) != actual)
{
- int i;
+ size_t i;
printf("%s:%d:round %s:exceptions failed\n"
" expected exceptions ", __FILE__, line,rm);
for (i = 0; i < sizeof(excepts)/sizeof(excepts[0]); i++)
@@ -419,7 +419,7 @@ static const optest_t optests[] = {
{__LINE__,B_NEG, 1,P_1Z,P_1Z1, 0,0,0, R_ALL, 0, 0,P_1Z,P_1Z1 },
{__LINE__,B_NEG, 0,P_Z,P_Z1, 0,0,0, R_ALL, 0, 1,P_Z,P_Z1 },
{__LINE__,B_NEG, 1,P_Z,P_Z1, 0,0,0, R_ALL, 0, 0,P_Z,P_Z1 },
-
+
/* Absolute value. */
{__LINE__,B_ABS, 0,P_Z,P_Z, 0,0,0, R_ALL, 0, 0,P_Z,P_Z },
{__LINE__,B_ABS, 1,P_Z,P_Z, 0,0,0, R_ALL, 0, 0,P_Z,P_Z },
@@ -433,7 +433,7 @@ static const optest_t optests[] = {
{__LINE__,B_ABS, 1,P_1Z,P_1Z1, 0,0,0, R_ALL, 0, 0,P_1Z,P_1Z1 },
{__LINE__,B_ABS, 0,P_Z,P_Z1, 0,0,0, R_ALL, 0, 0,P_Z,P_Z1 },
{__LINE__,B_ABS, 1,P_Z,P_Z1, 0,0,0, R_ALL, 0, 0,P_Z,P_Z1 },
-
+
/* Square root. */
{__LINE__,B_SQRT, 0,P_Z,P_Z, 0,0,0, R_ALL, 0, 0,P_Z,P_Z },
{__LINE__,B_SQRT, 1,P_Z,P_Z, 0,0,0, R_ALL, 0, 1,P_Z,P_Z },
@@ -459,7 +459,8 @@ static const optest_t optests[] = {
static void
check_op(void)
{
- int i, j;
+ size_t i;
+ int j;
tocheck_t r, a, b, x;
int raised;
@@ -497,7 +498,7 @@ static void
fail_xr(int line, const char *rm, tocheck_t x, tocheck_t r, tocheck_t xx,
int xflag)
{
- int i;
+ size_t i;
unsigned char *cx, *cr, *cxx;
printf("%s:%d:round %s:fail\n with x=0x", __FILE__, line,rm);
@@ -539,7 +540,7 @@ check_sqrt(tocheck_t a)
r0 = delta(r1,-1); r2 = delta(r1,1);
switch (1 << j)
{
- case R_NEAREST:
+ case R_NEAREST:
x0 = r0 * r0 - a; x2 = r2 * r2 - a;
ok = fabs(x0) >= fabs(x1) && fabs(x1) <= fabs(x2);
break;