diff options
author | Jakub Jelinek <jakub@redhat.com> | 2007-07-12 18:26:36 +0000 |
---|---|---|
committer | Jakub Jelinek <jakub@redhat.com> | 2007-07-12 18:26:36 +0000 |
commit | 0ecb606cb6cf65de1d9fc8a919bceb4be476c602 (patch) | |
tree | 2ea1f8305970753e4a657acb2ccc15ca3eec8e2c /sysdeps/powerpc/powerpc64 | |
parent | 7d58530341304d403a6626d7f7a1913165fe2f32 (diff) | |
download | glibc-0ecb606cb6cf65de1d9fc8a919bceb4be476c602.zip glibc-0ecb606cb6cf65de1d9fc8a919bceb4be476c602.tar.gz glibc-0ecb606cb6cf65de1d9fc8a919bceb4be476c602.tar.bz2 |
2.5-18.1
Diffstat (limited to 'sysdeps/powerpc/powerpc64')
44 files changed, 1640 insertions, 246 deletions
diff --git a/sysdeps/powerpc/powerpc64/Dist b/sysdeps/powerpc/powerpc64/Dist deleted file mode 100644 index bbadfef..0000000 --- a/sysdeps/powerpc/powerpc64/Dist +++ /dev/null @@ -1,4 +0,0 @@ -dl-machine.c -ppc-mcount.S -elf/entry.h -bp-asm.h diff --git a/sysdeps/powerpc/powerpc64/Makefile b/sysdeps/powerpc/powerpc64/Makefile index 3ced656..78d4f07 100644 --- a/sysdeps/powerpc/powerpc64/Makefile +++ b/sysdeps/powerpc/powerpc64/Makefile @@ -10,11 +10,25 @@ ifeq (yes,$(build-shared)) pic-ccflag = -fpic endif +# These flags prevent FPU or Altivec registers from being used, +# for code called in contexts that is not allowed to touch those registers. +# Stupid GCC requires us to pass all these ridiculous switches. +no-special-regs := $(sort $(foreach n,40 41 50 51 60 61 62 63 \ + $(foreach m,2 3 4 5 6 7 8 9, \ + 3$m 4$m 5$m),\ + -ffixed-$n)) \ + $(sort $(foreach n,$(foreach m,0 1 2 3 4 5 6 7 8 9,\ + $m 1$m 2$m) 30 31,\ + -ffixed-v$n)) \ + -ffixed-vrsave -ffixed-vscr + ifeq ($(subdir),csu) +sysdep_routines += hp-timing +elide-routines.os += hp-timing ifneq ($(elf),no) -# The initfini generation code doesn't work in the presence of -g1 or -# higher, so we use -g0. -CFLAGS-initfini.s = -g0 -fpic -O1 +# The initfini generation code doesn't work in the presence of -fPIC, so +# we use -fpic instead which is much better. +CFLAGS-initfini.s += -fpic -O1 endif endif @@ -27,5 +41,5 @@ ifeq ($(subdir),gmon) # The assembly functions assume that fp arg regs are not trashed. # Compiling with -msoft-float ensures that fp regs are not used # for moving memory around. -CFLAGS-mcount.c += -msoft-float +CFLAGS-mcount.c += $(no-special-regs) endif diff --git a/sysdeps/powerpc/powerpc64/__longjmp-common.S b/sysdeps/powerpc/powerpc64/__longjmp-common.S index 30087d7..700a2a5 100644 --- a/sysdeps/powerpc/powerpc64/__longjmp-common.S +++ b/sysdeps/powerpc/powerpc64/__longjmp-common.S @@ -1,6 +1,6 @@ /* longjmp for PowerPC64. - Copyright (C) 1995, 1996, 1997, 1999, 2000, 2001, 2002, 2003, 2004 - Free Software Foundation, Inc. + Copyright (C) 1995, 1996,1997,1999,2000,2001,2002,2003,2004,2005,2006 + Free Software Foundation, Inc. This file is part of the GNU C Library. The GNU C Library is free software; you can redistribute it and/or @@ -24,7 +24,7 @@ #ifdef __NO_VMX__ # include <novmxsetjmp.h> #else -# include <bits/setjmp.h> +# include <jmpbuf-offsets.h> #endif #include <bp-sym.h> #include <bp-asm.h> @@ -108,7 +108,12 @@ aligned_restore_vmx: lvx v31,0,r6 L(no_vmx): #endif +#ifdef PTR_DEMANGLE + ld r22,(JB_GPR1*8)(r3) + PTR_DEMANGLE3 (r1, r22, r25) +#else ld r1,(JB_GPR1*8)(r3) +#endif ld r2,(JB_GPR2*8)(r3) ld r0,(JB_LR*8)(r3) ld r14,((JB_GPRS+0)*8)(r3) @@ -128,6 +133,9 @@ L(no_vmx): lfd fp19,((JB_FPRS+5)*8)(r3) ld r20,((JB_GPRS+6)*8)(r3) lfd fp20,((JB_FPRS+6)*8)(r3) +#ifdef PTR_DEMANGLE + PTR_DEMANGLE2 (r0, r25) +#endif mtlr r0 /* std r2,40(r1) Restore the TOC save area. */ ld r21,((JB_GPRS+7)*8)(r3) diff --git a/sysdeps/powerpc/powerpc64/backtrace.c b/sysdeps/powerpc/powerpc64/backtrace.c index 8669b6a..9c8ebbb 100644 --- a/sysdeps/powerpc/powerpc64/backtrace.c +++ b/sysdeps/powerpc/powerpc64/backtrace.c @@ -1,5 +1,5 @@ /* Return backtrace of current program state. - Copyright (C) 1998, 2000, 2002 Free Software Foundation, Inc. + Copyright (C) 1998, 2000, 2002, 2005 Free Software Foundation, Inc. This file is part of the GNU C Library. The GNU C Library is free software; you can redistribute it and/or @@ -67,3 +67,4 @@ __backtrace (void **array, int size) return count; } weak_alias (__backtrace, backtrace) +libc_hidden_def (__backtrace) diff --git a/sysdeps/powerpc/powerpc64/dl-machine.h b/sysdeps/powerpc/powerpc64/dl-machine.h index 3fcf77d..cec271b 100644 --- a/sysdeps/powerpc/powerpc64/dl-machine.h +++ b/sysdeps/powerpc/powerpc64/dl-machine.h @@ -1,6 +1,6 @@ /* Machine-dependent ELF dynamic relocation inline functions. PowerPC64 version. - Copyright 1995, 1996, 1997, 1998, 1999, 2000, 2001, 2002, 2003, 2004 + Copyright 1995, 1996, 1997, 1998, 1999, 2000, 2001, 2002, 2003, 2004, 2005 Free Software Foundation, Inc. This file is part of the GNU C Library. @@ -107,92 +107,6 @@ elf_machine_dynamic (void) /* The PLT uses Elf64_Rela relocs. */ #define elf_machine_relplt elf_machine_rela -/* This code gets called via a .glink stub which loads PLT0. It is - used in dl-runtime.c to call the `fixup' function and then redirect - to the address `fixup' returns. - - Enter with r0 = plt reloc index, - r2 = ld.so tocbase, - r11 = ld.so link map. */ - -#define TRAMPOLINE_TEMPLATE(tramp_name, fixup_name) \ - asm (".section \".text\"\n" \ -" .align 2\n" \ -" .type " BODY_PREFIX #tramp_name ",@function\n" \ -" .section \".opd\",\"aw\"\n" \ -" .align 3\n" \ -" .globl " #tramp_name "\n" \ -" " ENTRY_2(tramp_name) "\n" \ -#tramp_name ":\n" \ -" " OPD_ENT(tramp_name) "\n" \ -" .previous\n" \ -BODY_PREFIX #tramp_name ":\n" \ -/* We need to save the registers used to pass parameters, ie. r3 thru \ - r10; the registers are saved in a stack frame. */ \ -" stdu 1,-128(1)\n" \ -" std 3,48(1)\n" \ -" mr 3,11\n" \ -" std 4,56(1)\n" \ -" sldi 4,0,1\n" \ -" std 5,64(1)\n" \ -" add 4,4,0\n" \ -" std 6,72(1)\n" \ -" sldi 4,4,3\n" \ -" std 7,80(1)\n" \ -" mflr 0\n" \ -" std 8,88(1)\n" \ -/* Store the LR in the LR Save area of the previous frame. */ \ -" std 0,128+16(1)\n" \ -" mfcr 0\n" \ -" std 9,96(1)\n" \ -" std 10,104(1)\n" \ -/* I'm almost certain we don't have to save cr... be safe. */ \ -" std 0,8(1)\n" \ -" bl " DOT_PREFIX #fixup_name "\n" \ -/* Put the registers back. */ \ -" ld 0,128+16(1)\n" \ -" ld 10,104(1)\n" \ -" ld 9,96(1)\n" \ -" ld 8,88(1)\n" \ -" ld 7,80(1)\n" \ -" mtlr 0\n" \ -" ld 0,8(1)\n" \ -" ld 6,72(1)\n" \ -" ld 5,64(1)\n" \ -" ld 4,56(1)\n" \ -" mtcrf 0xFF,0\n" \ -/* Load the target address, toc and static chain reg from the function \ - descriptor returned by fixup. */ \ -" ld 0,0(3)\n" \ -" ld 2,8(3)\n" \ -" mtctr 0\n" \ -" ld 11,16(3)\n" \ -" ld 3,48(1)\n" \ -/* Unwind the stack frame, and jump. */ \ -" addi 1,1,128\n" \ -" bctr\n" \ -".LT_" #tramp_name ":\n" \ -" .long 0\n" \ -" .byte 0x00,0x0c,0x24,0x40,0x00,0x00,0x00,0x00\n" \ -" .long .LT_" #tramp_name "-" BODY_PREFIX #tramp_name "\n" \ -" .short .LT_" #tramp_name "_name_end-.LT_" #tramp_name "_name_start\n" \ -".LT_" #tramp_name "_name_start:\n" \ -" .ascii \"" #tramp_name "\"\n" \ -".LT_" #tramp_name "_name_end:\n" \ -" .align 2\n" \ -" " END_2(tramp_name) "\n" \ -" .previous"); - -#ifndef PROF -#define ELF_MACHINE_RUNTIME_TRAMPOLINE \ - TRAMPOLINE_TEMPLATE (_dl_runtime_resolve, fixup); \ - TRAMPOLINE_TEMPLATE (_dl_profile_resolve, profile_fixup); -#else -#define ELF_MACHINE_RUNTIME_TRAMPOLINE \ - TRAMPOLINE_TEMPLATE (_dl_runtime_resolve, fixup); \ - void _dl_runtime_resolve (void); \ - strong_alias (_dl_runtime_resolve, _dl_profile_resolve); -#endif #ifdef HAVE_INLINED_SYSCALLS /* We do not need _dl_starting_up. */ @@ -208,16 +122,16 @@ BODY_PREFIX #tramp_name ":\n" \ `_dl_start' is the real entry point; its return value is the user program's entry point. */ #define RTLD_START \ - asm (".section \".text\"\n" \ + asm (".pushsection \".text\"\n" \ " .align 2\n" \ " .type " BODY_PREFIX "_start,@function\n" \ -" .section \".opd\",\"aw\"\n" \ +" .pushsection \".opd\",\"aw\"\n" \ " .align 3\n" \ " .globl _start\n" \ " " ENTRY_2(_start) "\n" \ "_start:\n" \ " " OPD_ENT(_start) "\n" \ -" .previous\n" \ +" .popsection\n" \ BODY_PREFIX "_start:\n" \ /* We start with the following on the stack, from top: \ argc (4 bytes); \ @@ -243,11 +157,11 @@ BODY_PREFIX "_start:\n" \ " .align 2\n" \ " " END_2(_start) "\n" \ " .globl _dl_start_user\n" \ -" .section \".opd\",\"aw\"\n" \ +" .pushsection \".opd\",\"aw\"\n" \ "_dl_start_user:\n" \ " " OPD_ENT(_dl_start_user) "\n" \ -" .previous\n" \ -" .section \".toc\",\"aw\"\n" \ +" .popsection\n" \ +" .pushsection \".toc\",\"aw\"\n" \ DL_STARTING_UP_DEF \ ".LC__rtld_global:\n" \ " .tc _rtld_global[TC],_rtld_global\n" \ @@ -257,7 +171,7 @@ DL_STARTING_UP_DEF \ " .tc _dl_argv_internal[TC],_dl_argv_internal\n" \ ".LC__dl_fini:\n" \ " .tc _dl_fini[TC],_dl_fini\n" \ -" .previous\n" \ +" .popsection\n" \ " .type " BODY_PREFIX "_dl_start_user,@function\n" \ " " ENTRY_2(_dl_start_user) "\n" \ /* Now, we do our main work of calling initialisation procedures. \ @@ -331,7 +245,7 @@ BODY_PREFIX "_dl_start_user:\n" \ ".LT__dl_start_user_name_end:\n" \ " .align 2\n" \ " " END_2(_dl_start_user) "\n" \ -" .previous"); +" .popsection"); /* Nonzero iff TYPE should not be allowed to resolve to one of the main executable's symbols, as for a COPY reloc. */ @@ -420,7 +334,8 @@ elf_machine_runtime_setup (struct link_map *map, int lazy, int profile) resolve_fd = (Elf64_FuncDesc *) (profile ? _dl_profile_resolve : _dl_runtime_resolve); - if (profile && _dl_name_match_p (GLRO(dl_profile), map)) + if (profile && GLRO(dl_profile) != NULL + && _dl_name_match_p (GLRO(dl_profile), map)) /* This is the object we are looking for. Say that we really want profiling and the timers are started. */ GL(dl_profile_map) = map; @@ -545,6 +460,11 @@ elf_machine_plt_value (struct link_map *map, const Elf64_Rela *reloc, return value + reloc->r_addend; } + +/* Names of the architecture-specific auditing callback functions. */ +#define ARCH_LA_PLTENTER ppc64_gnu_pltenter +#define ARCH_LA_PLTEXIT ppc64_gnu_pltexit + #endif /* dl_machine_h */ #ifdef RESOLVE_MAP @@ -567,7 +487,7 @@ extern void _dl_reloc_overflow (struct link_map *map, const Elf64_Sym *refsym) attribute_hidden; -static inline void +auto inline void __attribute__ ((always_inline)) elf_machine_rela_relative (Elf64_Addr l_addr, const Elf64_Rela *reloc, void *const reloc_addr_arg) { @@ -577,7 +497,7 @@ elf_machine_rela_relative (Elf64_Addr l_addr, const Elf64_Rela *reloc, #if defined USE_TLS && (!defined RTLD_BOOTSTRAP || USE___THREAD) /* This computes the value used by TPREL* relocs. */ -static Elf64_Addr __attribute__ ((const)) +auto inline Elf64_Addr __attribute__ ((always_inline, const)) elf_machine_tprel (struct link_map *map, struct link_map *sym_map, const Elf64_Sym *sym, @@ -598,7 +518,7 @@ elf_machine_tprel (struct link_map *map, /* Perform the relocation specified by RELOC and SYM (which is fully resolved). MAP is the object containing the reloc. */ -static inline void +auto inline void __attribute__ ((always_inline)) elf_machine_rela (struct link_map *map, const Elf64_Rela *reloc, const Elf64_Sym *sym, @@ -883,11 +803,12 @@ elf_machine_rela (struct link_map *map, MODIFIED_CODE_NOQUEUE (reloc_addr); } -static inline void +auto inline void __attribute__ ((always_inline)) elf_machine_lazy_rel (struct link_map *map, Elf64_Addr l_addr, const Elf64_Rela *reloc) { /* elf_machine_runtime_setup handles this. */ } + #endif /* RESOLVE */ diff --git a/sysdeps/powerpc/powerpc64/dl-trampoline.S b/sysdeps/powerpc/powerpc64/dl-trampoline.S new file mode 100644 index 0000000..9ca394d --- /dev/null +++ b/sysdeps/powerpc/powerpc64/dl-trampoline.S @@ -0,0 +1,442 @@ +/* PLT trampolines. PPC64 version. + Copyright (C) 2005, 2006 Free Software Foundation, Inc. + This file is part of the GNU C Library. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library; if not, write to the Free + Software Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston MA + 02110-1301 USA. */ + +#include <sysdep.h> +#include <rtld-global-offsets.h> + + + .section ".text" +/* On entry r0 contains the index of the PLT entry we need to fixup + and r11 contains the link_map (from PLT0+16). The link_map becomes + parm1 (r3) and the index (r0) need to be converted to an offset + (index * 24) in parm2 (r4). */ + +EALIGN(_dl_runtime_resolve, 4, 0) +/* We need to save the registers used to pass parameters, ie. r3 thru + r10; the registers are saved in a stack frame. */ + stdu r1,-128(r1) + cfi_adjust_cfa_offset (128) + std r3,48(r1) + mr r3,r11 + std r4,56(r1) + sldi r4,r0,1 + std r5,64(r1) + add r4,r4,r0 + std r6,72(r1) + sldi r4,r4,3 + std r7,80(r1) + mflr r0 + std r8,88(r1) +/* Store the LR in the LR Save area of the previous frame. */ + std r0,128+16(r1) + cfi_offset (lr, 16) + mfcr r0 + std r9,96(r1) + std r10,104(r1) +/* I'm almost certain we don't have to save cr... be safe. */ + std r0,8(r1) + bl JUMPTARGET(_dl_fixup) +/* Put the registers back. */ + ld r0,128+16(r1) + ld r10,104(r1) + ld r9,96(r1) + ld r8,88(r1) + ld r7,80(r1) + mtlr r0 + ld r0,8(r1) + ld r6,72(r1) + ld r5,64(r1) + ld r4,56(r1) + mtcrf 0xFF,r0 +/* Load the target address, toc and static chain reg from the function + descriptor returned by fixup. */ + ld r0,0(r3) + ld r2,8(r3) + mtctr r0 + ld r11,16(r3) + ld r3,48(r1) +/* Unwind the stack frame, and jump. */ + addi r1,r1,128 + bctr +END(_dl_runtime_resolve) + + /* Stack layout: + +592 previous backchain + +584 spill_r31 + +576 spill_r30 + +560 v1 + +552 fp4 + +544 fp3 + +536 fp2 + +528 fp1 + +520 r4 + +512 r3 + return values + +504 free + +496 stackframe + +488 lr + +480 r1 + +464 v13 + +448 v12 + +432 v11 + +416 v10 + +400 v9 + +384 v8 + +368 v7 + +352 v6 + +336 v5 + +320 v4 + +304 v3 + +288 v2 + * VMX Parms in V2-V13, V0-V1 are scratch + +284 vrsave + +280 free + +272 fp13 + +264 fp12 + +256 fp11 + +248 fp10 + +240 fp9 + +232 fp8 + +224 fp7 + +216 fp6 + +208 fp5 + +200 fp4 + +192 fp3 + +184 fp2 + +176 fp1 + * FP Parms in FP1-FP13, FP0 is a scratch register + +168 r10 + +160 r9 + +152 r8 + +144 r7 + +136 r6 + +128 r5 + +120 r4 + +112 r3 + * Integer parms in R3-R10, R0 is scratch, R1 SP, R2 is TOC + +104 parm8 + +96 parm7 + +88 parm6 + +80 parm5 + +72 parm4 + +64 parm3 + +56 parm2 + +48 parm1 + * Parameter save area, Allocated by the call, at least 8 double words + +40 TOC save area + +32 Reserved for linker + +24 Reserved for compiler + +16 LR save area + +8 CR save area + r1+0 stack back chain + */ +#define FRAME_SIZE 592 +#define INT_RTN 512 +#define FPR_RTN 528 +#define VR_RTN 560 +#define STACK_FRAME 496 +#define CALLING_LR 488 +#define CALLING_SP 480 +#define INT_PARMS 112 +#define FPR_PARMS 176 +#define VR_PARMS 288 +#define VR_VRSAVE 284 + .section ".toc","aw" +.LC__dl_hwcap: +# ifdef SHARED + .tc _rtld_global_ro[TC],_rtld_global_ro +# else + .tc _dl_hwcap[TC],_dl_hwcap +# endif + .section ".text" + + .machine "altivec" +/* On entry r0 contains the index of the PLT entry we need to fixup + and r11 contains the link_map (from PLT0+16). The link_map becomes + parm1 (r3) and the index (r0) needs to be converted to an offset + (index * 24) in parm2 (r4). */ +#ifndef PROF +EALIGN(_dl_profile_resolve, 4, 0) +/* Spill r30, r31 to preserve the link_map* and reloc_addr, in case we + need to call _dl_call_pltexit. */ + std r31,-8(r1) + cfi_offset(r31,-8) + std r30,-16(r1) + cfi_offset(r30,-16) +/* We need to save the registers used to pass parameters, ie. r3 thru + r10; the registers are saved in a stack frame. */ + stdu r1,-FRAME_SIZE(r1) + cfi_adjust_cfa_offset (FRAME_SIZE) + std r3,INT_PARMS+0(r1) + mr r3,r11 + std r4,INT_PARMS+8(r1) + sldi r4,r0,1 /* index * 2 */ + std r5,INT_PARMS+16(r1) + add r4,r4,r0 /* index * 3 */ + std r6,INT_PARMS+24(r1) + sldi r4,r4,3 /* index * 24 == PLT offset */ + mflr r5 + std r7,INT_PARMS+32(r1) + std r8,INT_PARMS+40(r1) +/* Store the LR in the LR Save area of the previous frame. */ +/* XXX Do we have to do this? */ + la r8,FRAME_SIZE(r1) + std r5,FRAME_SIZE+16(r1) + cfi_offset (lr, 16) + std r5,CALLING_LR(r1) + mfcr r0 + std r9,INT_PARMS+48(r1) + std r10,INT_PARMS+56(r1) + std r8,CALLING_SP(r1) +/* I'm almost certain we don't have to save cr... be safe. */ + std r0,8(r1) + ld r12,.LC__dl_hwcap@toc(r2) +#ifdef SHARED + /* Load _rtld-global._dl_hwcap. */ + ld r12,RTLD_GLOBAL_RO_DL_HWCAP_OFFSET(r12) +#else + ld r12,0(r12) /* Load extern _dl_hwcap. */ +#endif + andis. r0,r12,(PPC_FEATURE_HAS_ALTIVEC >> 16) + beq L(saveFP) + la r10,(VR_PARMS+0)(r1) + la r9,(VR_PARMS+16)(r1) + li r11,32 + li r12,64 + stvx v2,0,r10 + stvx v3,0,r9 + + stvx v4,r11,r10 + stvx v5,r11,r9 + addi r11,r11,64 + + stvx v6,r12,r10 + stvx v7,r12,r9 + addi r12,r12,64 + + stvx v8,r11,r10 + stvx v9,r11,r9 + addi r11,r11,64 + + stvx v10,r12,r10 + stvx v11,r12,r9 + mfspr r0,VRSAVE + + stvx v12,r11,r10 + stvx v13,r11,r9 +L(saveFP): + stw r0,VR_VRSAVE(r1) +/* Save floating registers. */ + stfd fp1,FPR_PARMS+0(r1) + stfd fp2,FPR_PARMS+8(r1) + stfd fp3,FPR_PARMS+16(r1) + stfd fp4,FPR_PARMS+24(r1) + stfd fp5,FPR_PARMS+32(r1) + stfd fp6,FPR_PARMS+40(r1) + stfd fp7,FPR_PARMS+48(r1) + stfd fp8,FPR_PARMS+56(r1) + stfd fp9,FPR_PARMS+64(r1) + stfd fp10,FPR_PARMS+72(r1) + stfd fp11,FPR_PARMS+80(r1) + li r0,-1 + stfd fp12,FPR_PARMS+88(r1) + stfd fp13,FPR_PARMS+96(r1) +/* Load the extra parameters. */ + addi r6,r1,INT_PARMS + addi r7,r1,STACK_FRAME +/* Save link_map* and reloc_addr parms for later. */ + mr r31,r3 + mr r30,r4 + std r0,0(r7) + bl JUMPTARGET(_dl_profile_fixup) + nop +/* Test *framesizep > 0 to see if need to do pltexit processing. */ + ld r0,STACK_FRAME(r1) +/* Put the registers back. */ + lwz r12,VR_VRSAVE(r1) + cmpdi cr1,r0,0 + cmpdi cr0,r12,0 + bgt cr1,L(do_pltexit) + la r10,(VR_PARMS+0)(r1) + la r9,(VR_PARMS+16)(r1) +/* VRSAVE must be non-zero if VMX is present and VRs are in use. */ + beq L(restoreFXR) + li r11,32 + li r12,64 + lvx v2,0,r10 + lvx v3,0,r9 + + lvx v4,r11,r10 + lvx v5,r11,r9 + addi r11,r11,64 + + lvx v6,r12,r10 + lvx v7,r12,r9 + addi r12,r12,64 + + lvx v8,r11,r10 + lvx v9,r11,r9 + addi r11,r11,64 + + lvx v10,r12,r10 + lvx v11,r12,r9 + + lvx v12,r11,r10 + lvx v13,r11,r9 +L(restoreFXR): + ld r0,FRAME_SIZE+16(r1) + ld r10,INT_PARMS+56(r1) + ld r9,INT_PARMS+48(r1) + ld r8,INT_PARMS+40(r1) + ld r7,INT_PARMS+32(r1) + mtlr r0 + ld r0,8(r1) + ld r6,INT_PARMS+24(r1) + ld r5,INT_PARMS+16(r1) + ld r4,INT_PARMS+8(r1) + mtcrf 0xFF,r0 +/* Load the target address, toc and static chain reg from the function + descriptor returned by fixup. */ + ld r0,0(r3) + ld r2,8(r3) + ld r11,16(r3) + ld r3,INT_PARMS+0(r1) + mtctr r0 +/* Load the floating point registers. */ + lfd fp1,FPR_PARMS+0(r1) + lfd fp2,FPR_PARMS+8(r1) + lfd fp3,FPR_PARMS+16(r1) + lfd fp4,FPR_PARMS+24(r1) + lfd fp5,FPR_PARMS+32(r1) + lfd fp6,FPR_PARMS+40(r1) + lfd fp7,FPR_PARMS+48(r1) + lfd fp8,FPR_PARMS+56(r1) + lfd fp9,FPR_PARMS+64(r1) + lfd fp10,FPR_PARMS+72(r1) + lfd fp11,FPR_PARMS+80(r1) + lfd fp12,FPR_PARMS+88(r1) + lfd fp13,FPR_PARMS+96(r1) +/* Unwind the stack frame, and jump. */ + ld r31,584(r1) + ld r30,576(r1) + addi r1,r1,FRAME_SIZE + bctr +L(do_pltexit): + la r10,(VR_PARMS+0)(r1) + la r9,(VR_PARMS+16)(r1) + beq L(restoreFXR2) + li r11,32 + li r12,64 + lvx v2,0,r10 + lvx v3,0,r9 + + lvx v4,r11,r10 + lvx v5,r11,r9 + addi r11,r11,64 + + lvx v6,r12,r10 + lvx v7,r12,r9 + addi r12,r12,64 + + lvx v8,r11,r10 + lvx v9,r11,r9 + addi r11,r11,64 + + lvx v10,r12,r10 + lvx v11,r12,r9 + + lvx v12,r11,r10 + lvx v13,r11,r9 +L(restoreFXR2): + ld r0,FRAME_SIZE+16(r1) + ld r10,INT_PARMS+56(r1) + ld r9,INT_PARMS+48(r1) + ld r8,INT_PARMS+40(r1) + ld r7,INT_PARMS+32(r1) + mtlr r0 + ld r0,8(r1) + ld r6,INT_PARMS+24(r1) + ld r5,INT_PARMS+16(r1) + ld r4,INT_PARMS+8(r1) + mtcrf 0xFF,r0 +/* Load the target address, toc and static chain reg from the function + descriptor returned by fixup. */ + ld r0,0(r3) + std r2,40(r1) + ld r2,8(r3) + ld r11,16(r3) + ld r3,INT_PARMS+0(r1) + mtctr r0 +/* Load the floating point registers. */ + lfd fp1,FPR_PARMS+0(r1) + lfd fp2,FPR_PARMS+8(r1) + lfd fp3,FPR_PARMS+16(r1) + lfd fp4,FPR_PARMS+24(r1) + lfd fp5,FPR_PARMS+32(r1) + lfd fp6,FPR_PARMS+40(r1) + lfd fp7,FPR_PARMS+48(r1) + lfd fp8,FPR_PARMS+56(r1) + lfd fp9,FPR_PARMS+64(r1) + lfd fp10,FPR_PARMS+72(r1) + lfd fp11,FPR_PARMS+80(r1) + lfd fp12,FPR_PARMS+88(r1) + lfd fp13,FPR_PARMS+96(r1) +/* Call the target function. */ + bctrl + ld r2,40(r1) + lwz r12,VR_VRSAVE(r1) +/* But return here and store the return values. */ + std r3,INT_RTN(r1) + std r4,INT_RTN+8(r1) + stfd fp1,FPR_PARMS+0(r1) + stfd fp2,FPR_PARMS+8(r1) + cmpdi cr0,r12,0 + la r10,VR_RTN(r1) + stfd fp3,FPR_PARMS+16(r1) + stfd fp4,FPR_PARMS+24(r1) + mr r3,r31 + mr r4,r30 + beq L(callpltexit) + stvx v2,0,r10 +L(callpltexit): + addi r5,r1,INT_PARMS + addi r6,r1,INT_RTN + bl JUMPTARGET(_dl_call_pltexit) + nop +/* Restore the return values from target function. */ + lwz r12,VR_VRSAVE(r1) + ld r3,INT_RTN(r1) + ld r4,INT_RTN+8(r1) + lfd fp1,FPR_PARMS+0(r1) + lfd fp2,FPR_PARMS+8(r1) + cmpdi cr0,r12,0 + la r10,VR_RTN(r1) + lfd fp3,FPR_PARMS+16(r1) + lfd fp4,FPR_PARMS+24(r1) + beq L(pltexitreturn) + lvx v2,0,r10 +L(pltexitreturn): + ld r0,FRAME_SIZE+16(r1) + ld r31,584(r1) + ld r30,576(r1) + mtlr r0 + ld r1,0(r1) + blr +END(_dl_profile_resolve) +#endif diff --git a/sysdeps/powerpc/powerpc64/elf/Makefile b/sysdeps/powerpc/powerpc64/elf/Makefile index bd0580e..6a77e11 100644 --- a/sysdeps/powerpc/powerpc64/elf/Makefile +++ b/sysdeps/powerpc/powerpc64/elf/Makefile @@ -2,10 +2,10 @@ # Need to prevent gcc from using fprs in code used during dynamic linking. -CFLAGS-dl-runtime.os := -msoft-float -CFLAGS-dl-lookup.os := -msoft-float -CFLAGS-dl-misc.os := -msoft-float -CFLAGS-rtld-mempcpy.os := -msoft-float -CFLAGS-rtld-memmove.os := -msoft-float -CFLAGS-rtld-memchr.os := -msoft-float -CFLAGS-rtld-strnlen.os := -msoft-float +CFLAGS-dl-runtime.os = $(no-special-regs) +CFLAGS-dl-lookup.os = $(no-special-regs) +CFLAGS-dl-misc.os = $(no-special-regs) +CFLAGS-rtld-mempcpy.os = $(no-special-regs) +CFLAGS-rtld-memmove.os = $(no-special-regs) +CFLAGS-rtld-memchr.os = $(no-special-regs) +CFLAGS-rtld-strnlen.os = $(no-special-regs) diff --git a/sysdeps/powerpc/powerpc64/fpu/e_sqrt.c b/sysdeps/powerpc/powerpc64/fpu/e_sqrt.c new file mode 100644 index 0000000..0a229cb --- /dev/null +++ b/sysdeps/powerpc/powerpc64/fpu/e_sqrt.c @@ -0,0 +1,29 @@ +/* Double-precision floating point square root. + Copyright (C) 1997, 2002, 2003, 2004, 2006 Free Software Foundation, Inc. + This file is part of the GNU C Library. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library; if not, write to the Free + Software Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA + 02111-1307 USA. */ + +#include <math.h> +#include <math_private.h> + +double +__ieee754_sqrt (double x) +{ + double z; + __asm __volatile ("fsqrt %0,%1" : "=f" (z) : "f" (x)); + return z; +} diff --git a/sysdeps/powerpc/powerpc64/dl-lookupcfg.h b/sysdeps/powerpc/powerpc64/fpu/e_sqrtf.c index e502941..0f17a64 100644 --- a/sysdeps/powerpc/powerpc64/dl-lookupcfg.h +++ b/sysdeps/powerpc/powerpc64/fpu/e_sqrtf.c @@ -1,5 +1,5 @@ -/* Configuration of lookup functions. PowerPC64 version. - Copyright (C) 2002 Free Software Foundation, Inc. +/* Single-precision floating point square root. + Copyright (C) 1997, 2003, 2004, 2006 Free Software Foundation, Inc. This file is part of the GNU C Library. The GNU C Library is free software; you can redistribute it and/or @@ -17,6 +17,13 @@ Software Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA. */ -/* Return the symbol map from the symbol lookup function. */ +#include <math.h> +#include <math_private.h> -#define DL_LOOKUP_RETURNS_MAP 1 +float +__ieee754_sqrtf (float x) +{ + double z; + __asm ("fsqrts %0,%1" : "=f" (z) : "f" (x)); + return z; +} diff --git a/sysdeps/powerpc/powerpc64/fpu/s_ceil.S b/sysdeps/powerpc/powerpc64/fpu/s_ceil.S index a1bfaa7..02b7094 100644 --- a/sysdeps/powerpc/powerpc64/fpu/s_ceil.S +++ b/sysdeps/powerpc/powerpc64/fpu/s_ceil.S @@ -1,5 +1,5 @@ /* ceil function. PowerPC64 version. - Copyright (C) 2004 Free Software Foundation, Inc. + Copyright (C) 2004, 2006 Free Software Foundation, Inc. This file is part of the GNU C Library. The GNU C Library is free software; you can redistribute it and/or @@ -18,15 +18,14 @@ 02111-1307 USA. */ #include <sysdep.h> +#include <math_ldbl_opt.h> .section ".toc","aw" .LC0: /* 2**52 */ .tc FD_43300000_0[TC],0x4330000000000000 -.LC1: /* -0.0 */ - .tc FD_80000000_0[TC],0x8000000000000000 .section ".text" -ENTRY (__ceil) +EALIGN (__ceil, 4, 0) CALL_MCOUNT 0 mffs fp11 /* Save current FPU rounding mode. */ lfd fp13,.LC0@toc(2) @@ -39,17 +38,18 @@ ENTRY (__ceil) ble- cr6,.L4 fadd fp1,fp1,fp13 /* x+= TWO52; */ fsub fp1,fp1,fp13 /* x-= TWO52; */ -.L9: + fabs fp1,fp1 /* if (x == 0.0) */ + /* x = 0.0; */ mtfsf 0x01,fp11 /* restore previous rounding mode. */ blr .L4: bge- cr6,.L9 /* if (x < 0.0) */ fsub fp1,fp1,fp13 /* x-= TWO52; */ fadd fp1,fp1,fp13 /* x+= TWO52; */ - fcmpu cr5,fp1,fp12 /* if (x > 0.0) */ + fnabs fp1,fp1 /* if (x == 0.0) */ + /* x = -0.0; */ +.L9: mtfsf 0x01,fp11 /* restore previous rounding mode. */ - bnelr+ cr5 - lfd fp1,.LC1@toc(2) /* x must be -0.0 for the 0.0 case. */ blr END (__ceil) @@ -59,3 +59,6 @@ weak_alias (__ceil, ceil) weak_alias (__ceil, ceill) strong_alias (__ceil, __ceill) #endif +#if LONG_DOUBLE_COMPAT(libm, GLIBC_2_0) +compat_symbol (libm, __ceil, ceill, GLIBC_2_0) +#endif diff --git a/sysdeps/powerpc/powerpc64/fpu/s_ceilf.S b/sysdeps/powerpc/powerpc64/fpu/s_ceilf.S index 42eb274..1ccd133 100644 --- a/sysdeps/powerpc/powerpc64/fpu/s_ceilf.S +++ b/sysdeps/powerpc/powerpc64/fpu/s_ceilf.S @@ -21,15 +21,13 @@ .section ".toc","aw" .LC0: /* 2**23 */ - .tc FD_41600000_0[TC],0x4160000000000000 -.LC1: /* -0.0 */ - .tc FD_80000000_0[TC],0x8000000000000000 + .tc FD_4b000000_0[TC],0x4b00000000000000 .section ".text" -ENTRY (__ceilf) +EALIGN (__ceilf, 4, 0) CALL_MCOUNT 0 mffs fp11 /* Save current FPU rounding mode. */ - lfd fp13,.LC0@toc(2) + lfs fp13,.LC0@toc(2) fabs fp0,fp1 fsubs fp12,fp13,fp13 /* generate 0.0 */ fcmpu cr7,fp0,fp13 /* if (fabs(x) > TWO23) */ @@ -39,17 +37,18 @@ ENTRY (__ceilf) ble- cr6,.L4 fadds fp1,fp1,fp13 /* x+= TWO23; */ fsubs fp1,fp1,fp13 /* x-= TWO23; */ -.L9: + fabs fp1,fp1 /* if (x == 0.0) */ + /* x = 0.0; */ mtfsf 0x01,fp11 /* restore previous rounding mode. */ blr .L4: bge- cr6,.L9 /* if (x < 0.0) */ fsubs fp1,fp1,fp13 /* x-= TWO23; */ fadds fp1,fp1,fp13 /* x+= TWO23; */ - fcmpu cr5,fp1,fp12 /* if (x > 0.0) */ + fnabs fp1,fp1 /* if (x == 0.0) */ + /* x = -0.0; */ +.L9: mtfsf 0x01,fp11 /* restore previous rounding mode. */ - bnelr+ cr5 - lfd fp1,.LC1@toc(2) /* x must be -0.0 for the 0.0 case. */ blr END (__ceilf) diff --git a/sysdeps/powerpc/powerpc64/fpu/s_ceill.S b/sysdeps/powerpc/powerpc64/fpu/s_ceill.S new file mode 100644 index 0000000..a8f8a0a --- /dev/null +++ b/sysdeps/powerpc/powerpc64/fpu/s_ceill.S @@ -0,0 +1,133 @@ +/* s_ceill.S IBM extended format long double version. + Copyright (C) 2004, 2006 Free Software Foundation, Inc. + This file is part of the GNU C Library. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library; if not, write to the Free + Software Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA + 02111-1307 USA. */ + +#include <sysdep.h> +#include <math_ldbl_opt.h> + + .section ".toc","aw" +.LC0: /* 2**52 */ + .tc FD_43300000_0[TC],0x4330000000000000 + + .section ".text" + +/* long double [fp1,fp2] ceill (long double x [fp1,fp2]) + IEEE 1003.1 ceil function. + + PowerPC64 long double uses the IBM extended format which is + represented two 64-floating point double values. The values are + non-overlapping giving an effective precision of 106 bits. The first + double contains the high order bits of mantisa and is always ceiled + to represent a normal ceiling of long double to double. Since the + long double value is sum of the high and low values, the low double + normally has the opposite sign to compensate for the this ceiling. + + For long double there are two cases: + 1) |x| < 2**52, all the integer bits are in the high double. + ceil the high double and set the low double to -0.0. + 2) |x| >= 2**52, ceiling involves both doubles. + See the comment before lable .L2 for details. + */ + +ENTRY (__ceill) + mffs fp11 /* Save current FPU rounding mode. */ + lfd fp13,.LC0@toc(2) + fabs fp0,fp1 + fabs fp9,fp2 + fsub fp12,fp13,fp13 /* generate 0.0 */ + fcmpu cr7,fp0,fp13 /* if (fabs(x) > TWO52) */ + fcmpu cr6,fp1,fp12 /* if (x > 0.0) */ + bnl- cr7,.L2 + mtfsfi 7,2 /* Set rounding mode toward +inf. */ + fneg fp2,fp12 + ble- cr6,.L1 + fadd fp1,fp1,fp13 /* x+= TWO52; */ + fsub fp1,fp1,fp13 /* x-= TWO52; */ + fabs fp1,fp1 /* if (x == 0.0) */ +.L0: + mtfsf 0x01,fp11 /* restore previous rounding mode. */ + blr /* x = 0.0; */ +.L1: + bge- cr6,.L0 /* if (x < 0.0) */ + fsub fp1,fp1,fp13 /* x-= TWO52; */ + fadd fp1,fp1,fp13 /* x+= TWO52; */ + fcmpu cr5,fp1,fp12 /* if (x > 0.0) */ + mtfsf 0x01,fp11 /* restore previous rounding mode. */ + fnabs fp1,fp1 /* if (x == 0.0) */ + blr /* x = -0.0; */ + +/* The high double is > TWO52 so we need to round the low double and + perhaps the high double. In this case we have to round the low + double and handle any adjustment to the high double that may be + caused by rounding (up). This is complicated by the fact that the + high double may already be rounded and the low double may have the + opposite sign to compensate.This gets a bit tricky so we use the + following algorithm: + + tau = floor(x_high/TWO52); + x0 = x_high - tau; + x1 = x_low + tau; + r1 = rint(x1); + y_high = x0 + r1; + y_low = x0 - y_high + r1; + return y; */ +.L2: + fcmpu cr7,fp9,fp13 /* if (|x_low| > TWO52) */ + fcmpu cr0,fp9,fp12 /* || (|x_low| == 0.0) */ + fcmpu cr5,fp2,fp12 /* if (x_low > 0.0) */ + bgelr- cr7 /* return x; */ + beqlr- cr0 + mtfsfi 7,2 /* Set rounding mode toward +inf. */ + fdiv fp8,fp1,fp13 /* x_high/TWO52 */ + + bng- cr6,.L6 /* if (x > 0.0) */ + fctidz fp0,fp8 + fcfid fp8,fp0 /* tau = floor(x_high/TWO52); */ + bng cr5,.L4 /* if (x_low > 0.0) */ + fmr fp3,fp1 + fmr fp4,fp2 + b .L5 +.L4: /* if (x_low < 0.0) */ + fsub fp3,fp1,fp8 /* x0 = x_high - tau; */ + fadd fp4,fp2,fp8 /* x1 = x_low + tau; */ +.L5: + fadd fp5,fp4,fp13 /* r1 = r1 + TWO52; */ + fsub fp5,fp5,fp13 /* r1 = r1 - TWO52; */ + b .L9 +.L6: /* if (x < 0.0) */ + fctidz fp0,fp8 + fcfid fp8,fp0 /* tau = floor(x_high/TWO52); */ + bnl cr5,.L7 /* if (x_low < 0.0) */ + fmr fp3,fp1 + fmr fp4,fp2 + b .L8 +.L7: /* if (x_low > 0.0) */ + fsub fp3,fp1,fp8 /* x0 = x_high - tau; */ + fadd fp4,fp2,fp8 /* x1 = x_low + tau; */ +.L8: + fsub fp5,fp4,fp13 /* r1-= TWO52; */ + fadd fp5,fp5,fp13 /* r1+= TWO52; */ +.L9: + mtfsf 0x01,fp11 /* restore previous rounding mode. */ + fadd fp1,fp3,fp5 /* y_high = x0 + r1; */ + fsub fp2,fp3,fp1 /* y_low = x0 - y_high + r1; */ + fadd fp2,fp2,fp5 + blr +END (__ceill) + +long_double_symbol (libm, __ceill, ceill) diff --git a/sysdeps/powerpc/powerpc64/fpu/s_copysign.S b/sysdeps/powerpc/powerpc64/fpu/s_copysign.S index a43ed12..38171e3 100644 --- a/sysdeps/powerpc/powerpc64/fpu/s_copysign.S +++ b/sysdeps/powerpc/powerpc64/fpu/s_copysign.S @@ -1,5 +1,5 @@ /* Copy a sign bit between floating-point values. PowerPC64 version. - Copyright (C) 1997, 1999, 2000, 2002 Free Software Foundation, Inc. + Copyright (C) 1997, 1999, 2000, 2002, 2006 Free Software Foundation, Inc. This file is part of the GNU C Library. The GNU C Library is free software; you can redistribute it and/or @@ -21,6 +21,7 @@ when it's coded in C. */ #include <sysdep.h> +#include <math_ldbl_opt.h> ENTRY(__copysign) CALL_MCOUNT 0 @@ -28,7 +29,11 @@ ENTRY(__copysign) copysign(x,y) returns a value with the magnitude of x and with the sign bit of y. */ stdu r1,-48(r1) + cfi_adjust_cfa_offset (48) stfd fp2,24(r1) + nop + nop + nop ld r3,24(r1) cmpdi r3,0 addi r1,r1,48 @@ -39,13 +44,20 @@ L(0): fnabs fp1,fp1 blr END (__copysign) -weak_alias(__copysign,copysign) +weak_alias (__copysign,copysign) /* It turns out that it's safe to use this code even for single-precision. */ -weak_alias(__copysign,copysignf) +weak_alias (__copysign,copysignf) strong_alias(__copysign,__copysignf) #ifdef NO_LONG_DOUBLE -weak_alias(__copysign,copysignl) +weak_alias (__copysign,copysignl) strong_alias(__copysign,__copysignl) #endif +#ifdef IS_IN_libm +# if LONG_DOUBLE_COMPAT(libm, GLIBC_2_0) +compat_symbol (libm, __copysign, copysignl, GLIBC_2_0) +# endif +#elif LONG_DOUBLE_COMPAT(libc, GLIBC_2_0) +compat_symbol (libc, __copysign, copysignl, GLIBC_2_0) +#endif diff --git a/sysdeps/powerpc/powerpc64/fpu/s_copysignl.S b/sysdeps/powerpc/powerpc64/fpu/s_copysignl.S new file mode 100644 index 0000000..b2c62ea --- /dev/null +++ b/sysdeps/powerpc/powerpc64/fpu/s_copysignl.S @@ -0,0 +1,51 @@ +/* Copy a sign bit between floating-point values. + IBM extended format long double version. + Copyright (C) 2004, 2006 Free Software Foundation, Inc. + This file is part of the GNU C Library. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library; if not, write to the Free + Software Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA + 02111-1307 USA. */ + +#include <sysdep.h> +#include <math_ldbl_opt.h> + +ENTRY(__copysignl) +/* long double [f1,f2] copysign (long double [f1,f2] x, long double [f3,f4] y); + copysign(x,y) returns a value with the magnitude of x and + with the sign bit of y. */ + stfd fp3,-16(r1) + ld r3,-16(r1) + cmpdi r3,0 + blt L(0) + fmr fp0,fp1 + fabs fp1,fp1 + fcmpu cr1,fp0,fp1 + beqlr cr1 + fneg fp2,fp2 + blr +L(0): + fmr fp0,fp1 + fnabs fp1,fp1 + fcmpu cr1,fp0,fp1 + beqlr cr1 + fneg fp2,fp2 + blr +END (__copysignl) + +#ifdef IS_IN_libm +long_double_symbol (libm, __copysignl, copysignl) +#else +long_double_symbol (libc, __copysignl, copysignl) +#endif diff --git a/sysdeps/powerpc/powerpc64/fpu/s_fabs.S b/sysdeps/powerpc/powerpc64/fpu/s_fabs.S new file mode 100644 index 0000000..53d2130 --- /dev/null +++ b/sysdeps/powerpc/powerpc64/fpu/s_fabs.S @@ -0,0 +1,5 @@ +#include <math_ldbl_opt.h> +#include <sysdeps/powerpc/fpu/s_fabs.S> +#if LONG_DOUBLE_COMPAT(libm, GLIBC_2_0) +compat_symbol (libm, __fabs, fabsl, GLIBC_2_0) +#endif diff --git a/sysdeps/powerpc/powerpc64/fpu/s_fabsl.S b/sysdeps/powerpc/powerpc64/fpu/s_fabsl.S new file mode 100644 index 0000000..3655e5b --- /dev/null +++ b/sysdeps/powerpc/powerpc64/fpu/s_fabsl.S @@ -0,0 +1,36 @@ +/* Copy a sign bit between floating-point values. + IBM extended format long double version. + Copyright (C) 2004, 2006 Free Software Foundation, Inc. + This file is part of the GNU C Library. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library; if not, write to the Free + Software Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA + 02111-1307 USA. */ + +#include <sysdep.h> +#include <math_ldbl_opt.h> + +ENTRY(__fabsl) +/* long double [f1,f2] fabs (long double [f1,f2] x); + fabs(x,y) returns a value with the magnitude of x and + with the sign bit of y. */ + fmr fp0,fp1 + fabs fp1,fp1 + fcmpu cr1,fp0,fp1 + beqlr cr1 + fneg fp2,fp2 + blr +END (__fabsl) + +long_double_symbol (libm, __fabsl, fabsl) diff --git a/sysdeps/powerpc/powerpc64/fpu/s_fdim.c b/sysdeps/powerpc/powerpc64/fpu/s_fdim.c new file mode 100644 index 0000000..e34b51e --- /dev/null +++ b/sysdeps/powerpc/powerpc64/fpu/s_fdim.c @@ -0,0 +1,5 @@ +#include <math_ldbl_opt.h> +#include <sysdeps/powerpc/fpu/s_fdim.c> +#if LONG_DOUBLE_COMPAT(libm, GLIBC_2_1) +compat_symbol (libm, __fdim, fdiml, GLIBC_2_1); +#endif diff --git a/sysdeps/powerpc/powerpc64/fpu/s_floor.S b/sysdeps/powerpc/powerpc64/fpu/s_floor.S index 80cbdc5..65a2848 100644 --- a/sysdeps/powerpc/powerpc64/fpu/s_floor.S +++ b/sysdeps/powerpc/powerpc64/fpu/s_floor.S @@ -1,5 +1,5 @@ /* Floor function. PowerPC64 version. - Copyright (C) 2004 Free Software Foundation, Inc. + Copyright (C) 2004, 2006 Free Software Foundation, Inc. This file is part of the GNU C Library. The GNU C Library is free software; you can redistribute it and/or @@ -18,13 +18,14 @@ 02111-1307 USA. */ #include <sysdep.h> +#include <math_ldbl_opt.h> .section ".toc","aw" .LC0: /* 2**52 */ .tc FD_43300000_0[TC],0x4330000000000000 .section ".text" -ENTRY (__floor) +EALIGN (__floor, 4, 0) CALL_MCOUNT 0 mffs fp11 /* Save current FPU rounding mode. */ lfd fp13,.LC0@toc(2) @@ -37,15 +38,16 @@ ENTRY (__floor) ble- cr6,.L4 fadd fp1,fp1,fp13 /* x+= TWO52; */ fsub fp1,fp1,fp13 /* x-= TWO52; */ - fcmpu cr5,fp1,fp12 /* if (x > 0.0) */ + fabs fp1,fp1 /* if (x == 0.0) */ + /* x = 0.0; */ mtfsf 0x01,fp11 /* restore previous rounding mode. */ - bnelr+ cr5 - fmr fp1,fp12 /* x must be +0.0 for the 0.0 case. */ blr .L4: bge- cr6,.L9 /* if (x < 0.0) */ fsub fp1,fp1,fp13 /* x-= TWO52; */ fadd fp1,fp1,fp13 /* x+= TWO52; */ + fnabs fp1,fp1 /* if (x == 0.0) */ + /* x = -0.0; */ .L9: mtfsf 0x01,fp11 /* restore previous rounding mode. */ blr @@ -57,3 +59,6 @@ weak_alias (__floor, floor) weak_alias (__floor, floorl) strong_alias (__floor, __floorl) #endif +#if LONG_DOUBLE_COMPAT(libm, GLIBC_2_0) +compat_symbol (libm, __floor, floorl, GLIBC_2_0) +#endif diff --git a/sysdeps/powerpc/powerpc64/fpu/s_floorf.S b/sysdeps/powerpc/powerpc64/fpu/s_floorf.S index 20cbb15..bcdbf78 100644 --- a/sysdeps/powerpc/powerpc64/fpu/s_floorf.S +++ b/sysdeps/powerpc/powerpc64/fpu/s_floorf.S @@ -21,13 +21,13 @@ .section ".toc","aw" .LC0: /* 2**23 */ - .tc FD_41600000_0[TC],0x4160000000000000 + .tc FD_4b000000_0[TC],0x4b00000000000000 .section ".text" -ENTRY (__floorf) +EALIGN (__floorf, 4, 0) CALL_MCOUNT 0 mffs fp11 /* Save current FPU rounding mode. */ - lfd fp13,.LC0@toc(2) + lfs fp13,.LC0@toc(2) fabs fp0,fp1 fsubs fp12,fp13,fp13 /* generate 0.0 */ fcmpu cr7,fp0,fp13 /* if (fabs(x) > TWO23) */ @@ -37,15 +37,16 @@ ENTRY (__floorf) ble- cr6,.L4 fadds fp1,fp1,fp13 /* x+= TWO23; */ fsubs fp1,fp1,fp13 /* x-= TWO23; */ - fcmpu cr5,fp1,fp12 /* if (x > 0.0) */ + fabs fp1,fp1 /* if (x == 0.0) */ + /* x = 0.0; */ mtfsf 0x01,fp11 /* restore previous rounding mode. */ - bnelr+ cr5 - fmr fp1,fp12 /* x must be +0.0 for the 0.0 case. */ blr .L4: bge- cr6,.L9 /* if (x < 0.0) */ fsubs fp1,fp1,fp13 /* x-= TWO23; */ fadds fp1,fp1,fp13 /* x+= TWO23; */ + fnabs fp1,fp1 /* if (x == 0.0) */ + /* x = -0.0; */ .L9: mtfsf 0x01,fp11 /* restore previous rounding mode. */ blr diff --git a/sysdeps/powerpc/powerpc64/fpu/s_floorl.S b/sysdeps/powerpc/powerpc64/fpu/s_floorl.S new file mode 100644 index 0000000..01b3c21 --- /dev/null +++ b/sysdeps/powerpc/powerpc64/fpu/s_floorl.S @@ -0,0 +1,134 @@ +/* long double floor function. + IBM extended format long double version. + Copyright (C) 2004, 2006 Free Software Foundation, Inc. + This file is part of the GNU C Library. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library; if not, write to the Free + Software Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA + 02111-1307 USA. */ + +#include <sysdep.h> +#include <math_ldbl_opt.h> + + .section ".toc","aw" +.LC0: /* 2**52 */ + .tc FD_43300000_0[TC],0x4330000000000000 + + .section ".text" +/* long double [fp1,fp2] floorl (long double x [fp1,fp2]) + IEEE 1003.1 floor function. + + PowerPC64 long double uses the IBM extended format which is + represented two 64-floating point double values. The values are + non-overlapping giving an effective precision of 106 bits. The first + double contains the high order bits of mantisa and is always rounded + to represent a normal rounding of long double to double. Since the + long double value is sum of the high and low values, the low double + normally has the opposite sign to compensate for the this rounding. + + For long double there are two cases: + 1) |x| < 2**52, all the integer bits are in the high double. + floor the high double and set the low double to -0.0. + 2) |x| >= 2**52, Rounding involves both doubles. + See the comment before lable .L2 for details. + */ + +ENTRY (__floorl) + mffs fp11 /* Save current FPU rounding mode. */ + lfd fp13,.LC0@toc(2) + fabs fp0,fp1 + fabs fp9,fp2 + fsub fp12,fp13,fp13 /* generate 0.0 */ + fcmpu cr7,fp0,fp13 /* if (fabs(x) > TWO52) */ + fcmpu cr6,fp1,fp12 /* if (x > 0.0) */ + bnl- cr7,.L2 + mtfsfi 7,3 /* Set rounding mode toward -inf. */ + fneg fp2,fp12 /* set low double to -0.0. */ + ble- cr6,.L0 + fadd fp1,fp1,fp13 /* x+= TWO52; */ + fsub fp1,fp1,fp13 /* x-= TWO52; */ + fcmpu cr5,fp1,fp12 /* if (x > 0.0) */ + mtfsf 0x01,fp11 /* restore previous rounding mode. */ + bnelr+ cr5 + fmr fp1,fp12 /* x must be +0.0 for the 0.0 case. */ + blr +.L0: + bge- cr6,.L1 /* if (x < 0.0) */ + fsub fp1,fp1,fp13 /* x-= TWO52; */ + fadd fp1,fp1,fp13 /* x+= TWO52; */ +.L1: + mtfsf 0x01,fp11 /* restore previous rounding mode. */ + blr + + +/* The high double is > TWO52 so we need to round the low double and + perhaps the high double. In this case we have to round the low + double and handle any adjustment to the high double that may be + caused by rounding (up). This is complicated by the fact that the + high double may already be rounded and the low double may have the + opposite sign to compensate.This gets a bit tricky so we use the + following algorithm: + + tau = floor(x_high/TWO52); + x0 = x_high - tau; + x1 = x_low + tau; + r1 = rint(x1); + y_high = x0 + r1; + y_low = x0 - y_high + r1; + return y; */ +.L2: + fcmpu cr7,fp9,fp13 /* if (|x_low| > TWO52) */ + fcmpu cr0,fp9,fp12 /* || (|x_low| == 0.0) */ + fcmpu cr5,fp2,fp12 /* if (x_low > 0.0) */ + bgelr- cr7 /* return x; */ + beqlr- cr0 + mtfsfi 7,3 /* Set rounding mode toward -inf. */ + fdiv fp8,fp1,fp13 /* x_high/TWO52 */ + + bng- cr6,.L6 /* if (x > 0.0) */ + fctidz fp0,fp8 + fcfid fp8,fp0 /* tau = floor(x_high/TWO52); */ + bng cr5,.L4 /* if (x_low > 0.0) */ + fmr fp3,fp1 + fmr fp4,fp2 + b .L5 +.L4: /* if (x_low < 0.0) */ + fsub fp3,fp1,fp8 /* x0 = x_high - tau; */ + fadd fp4,fp2,fp8 /* x1 = x_low + tau; */ +.L5: + fadd fp5,fp4,fp13 /* r1 = r1 + TWO52; */ + fsub fp5,fp5,fp13 /* r1 = r1 - TWO52; */ + b .L9 +.L6: /* if (x < 0.0) */ + fctidz fp0,fp8 + fcfid fp8,fp0 /* tau = floor(x_high/TWO52); */ + bnl cr5,.L7 /* if (x_low < 0.0) */ + fmr fp3,fp1 + fmr fp4,fp2 + b .L8 +.L7: /* if (x_low > 0.0) */ + fsub fp3,fp1,fp8 /* x0 = x_high - tau; */ + fadd fp4,fp2,fp8 /* x1 = x_low + tau; */ +.L8: + fsub fp5,fp4,fp13 /* r1-= TWO52; */ + fadd fp5,fp5,fp13 /* r1+= TWO52; */ +.L9: + mtfsf 0x01,fp11 /* restore previous rounding mode. */ + fadd fp1,fp3,fp5 /* y_high = x0 + r1; */ + fsub fp2,fp3,fp1 /* y_low = x0 - y_high + r1; */ + fadd fp2,fp2,fp5 + blr +END (__floorl) + +long_double_symbol (libm, __floorl, floorl) diff --git a/sysdeps/powerpc/powerpc64/fpu/s_fmax.S b/sysdeps/powerpc/powerpc64/fpu/s_fmax.S new file mode 100644 index 0000000..6973576 --- /dev/null +++ b/sysdeps/powerpc/powerpc64/fpu/s_fmax.S @@ -0,0 +1,5 @@ +#include <math_ldbl_opt.h> +#include <sysdeps/powerpc/fpu/s_fmax.S> +#if LONG_DOUBLE_COMPAT(libm, GLIBC_2_1) +compat_symbol (libm, __fmax, fmaxl, GLIBC_2_1) +#endif diff --git a/sysdeps/powerpc/powerpc64/fpu/s_fmin.S b/sysdeps/powerpc/powerpc64/fpu/s_fmin.S new file mode 100644 index 0000000..6d4a0a9 --- /dev/null +++ b/sysdeps/powerpc/powerpc64/fpu/s_fmin.S @@ -0,0 +1,5 @@ +#include <math_ldbl_opt.h> +#include <sysdeps/powerpc/fpu/s_fmin.S> +#if LONG_DOUBLE_COMPAT(libm, GLIBC_2_1) +compat_symbol (libm, __fmin, fminl, GLIBC_2_1) +#endif diff --git a/sysdeps/powerpc/powerpc64/fpu/s_isnan.c b/sysdeps/powerpc/powerpc64/fpu/s_isnan.c new file mode 100644 index 0000000..397717b --- /dev/null +++ b/sysdeps/powerpc/powerpc64/fpu/s_isnan.c @@ -0,0 +1,7 @@ +#include <sysdeps/powerpc/fpu/s_isnan.c> +#ifndef IS_IN_libm +# if LONG_DOUBLE_COMPAT(libc, GLIBC_2_0) +compat_symbol (libc, __isnan, __isnanl, GLIBC_2_0); +compat_symbol (libc, isnan, isnanl, GLIBC_2_0); +# endif +#endif diff --git a/sysdeps/powerpc/powerpc64/fpu/s_llrint.S b/sysdeps/powerpc/powerpc64/fpu/s_llrint.S index 610b561..ff0ba94 100644 --- a/sysdeps/powerpc/powerpc64/fpu/s_llrint.S +++ b/sysdeps/powerpc/powerpc64/fpu/s_llrint.S @@ -1,5 +1,5 @@ /* Round double to long int. PowerPC64 version. - Copyright (C) 2004 Free Software Foundation, Inc. + Copyright (C) 2004, 2006 Free Software Foundation, Inc. This file is part of the GNU C Library. The GNU C Library is free software; you can redistribute it and/or @@ -18,6 +18,7 @@ 02111-1307 USA. */ #include <sysdep.h> +#include <math_ldbl_opt.h> /* long long int[r3] __llrint (double x[fp1]) */ ENTRY (__llrint) @@ -41,3 +42,7 @@ weak_alias (__llrint, llrintl) strong_alias (__lrint, __lrintl) weak_alias (__lrint, lrintl) #endif +#if LONG_DOUBLE_COMPAT(libm, GLIBC_2_1) +compat_symbol (libm, __llrint, llrintl, GLIBC_2_1) +compat_symbol (libm, __lrint, lrintl, GLIBC_2_1) +#endif diff --git a/sysdeps/powerpc/powerpc64/fpu/s_llround.S b/sysdeps/powerpc/powerpc64/fpu/s_llround.S index a3dcd4c..d023b8f 100644 --- a/sysdeps/powerpc/powerpc64/fpu/s_llround.S +++ b/sysdeps/powerpc/powerpc64/fpu/s_llround.S @@ -1,5 +1,5 @@ /* llround function. PowerPC64 version. - Copyright (C) 2004 Free Software Foundation, Inc. + Copyright (C) 2004, 2006 Free Software Foundation, Inc. This file is part of the GNU C Library. The GNU C Library is free software; you can redistribute it and/or @@ -18,6 +18,7 @@ 02111-1307 USA. */ #include <sysdep.h> +#include <math_ldbl_opt.h> .section ".toc","aw" .LC0: /* -0.0 */ @@ -66,3 +67,7 @@ strong_alias (__llround, __llroundl) weak_alias (__lround, lroundl) strong_alias (__lround, __lroundl) #endif +#if LONG_DOUBLE_COMPAT(libm, GLIBC_2_1) +compat_symbol (libm, __llround, llroundl, GLIBC_2_1) +compat_symbol (libm, __lround, lroundl, GLIBC_2_1) +#endif diff --git a/sysdeps/powerpc/powerpc64/fpu/s_llroundf.S b/sysdeps/powerpc/powerpc64/fpu/s_llroundf.S index b5ca43b..bbbd054 100644 --- a/sysdeps/powerpc/powerpc64/fpu/s_llroundf.S +++ b/sysdeps/powerpc/powerpc64/fpu/s_llroundf.S @@ -1,5 +1,5 @@ /* llroundf function. PowerPC64 version. - Copyright (C) 2004 Free Software Foundation, Inc. + Copyright (C) 2004, 2006 Free Software Foundation, Inc. This file is part of the GNU C Library. The GNU C Library is free software; you can redistribute it and/or @@ -28,8 +28,8 @@ /* long long [r3] llroundf (float x [fp1]) IEEE 1003.1 llroundf function. IEEE specifies "roundf to the nearest - integer value, roundfing halfway cases away from zero, regardless of - the current roundfing mode." However PowerPC Architecture defines + integer value, rounding halfway cases away from zero, regardless of + the current rounding mode." However PowerPC Architecture defines "roundf to Nearest" as "Choose the best approximation. In case of a tie, choose the one that is even (least significant bit o).". So we can't use the PowerPC "round to Nearest" mode. Instead we set diff --git a/sysdeps/powerpc/powerpc64/fpu/s_nearbyintl.S b/sysdeps/powerpc/powerpc64/fpu/s_nearbyintl.S new file mode 100644 index 0000000..0d0eb36 --- /dev/null +++ b/sysdeps/powerpc/powerpc64/fpu/s_nearbyintl.S @@ -0,0 +1,114 @@ +/* nearbyint long double. + IBM extended format long double version. + Copyright (C) 2004, 2006 Free Software Foundation, Inc. + This file is part of the GNU C Library. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library; if not, write to the Free + Software Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA + 02111-1307 USA. */ + +#include <sysdep.h> +#include <math_ldbl_opt.h> + + .section ".toc","aw" +.LC0: /* 2**52 */ + .tc FD_43300000_0[TC],0x4330000000000000 + .section ".text" + +/* long double [fp1,fp2] nearbyintl (long double x [fp1,fp2]) + IEEE 1003.1 nearbyintl function. nearbyintl is simular to the rintl + but does raise the "inexact" exception. This implementation is + based on rintl but explicitly maskes the inexact exception on entry + and clears any pending inexact before restoring the exception mask + on exit. + + PowerPC64 long double uses the IBM extended format which is + represented two 64-floating point double values. The values are + non-overlapping giving an effective precision of 106 bits. The first + double contains the high order bits of mantisa and is always rounded + to represent a normal rounding of long double to double. Since the + long double value is sum of the high and low values, the low double + normally has the opposite sign to compensate for the this rounding. + + For long double there are two cases: + 1) |x| < 2**52, all the integer bits are in the high double. + floor the high double and set the low double to -0.0. + 2) |x| >= 2**52, Rounding involves both doubles. + See the comment before lable .L2 for details. + */ +ENTRY (__nearbyintl) + mffs fp11 /* Save current FPSCR. */ + lfd fp13,.LC0@toc(2) + fabs fp0,fp1 + mtfsb0 28 /* Disable "inexact" exceptions. */ + fsub fp12,fp13,fp13 /* generate 0.0 */ + fabs fp9,fp2 + fcmpu cr7,fp0,fp13 /* if (fabs(x) > TWO52) */ + fcmpu cr6,fp1,fp12 /* if (x > 0.0) */ + bnl- cr7,.L2 + fmr fp2,fp12 + bng- cr6,.L4 + fadd fp1,fp1,fp13 /* x+= TWO52; */ + fsub fp1,fp1,fp13 /* x-= TWO52; */ + b .L9 +.L4: + bnl- cr6,.L9 /* if (x < 0.0) */ + fsub fp1,fp13,fp1 /* x = TWO52 - x; */ + fsub fp0,fp1,fp13 /* x = - (x - TWO52); */ + fneg fp1,fp0 +.L9: + mtfsb0 6 /* Clear any pending "inexact" exceptions. */ + mtfsf 0x01,fp11 /* restore exception mask. */ + blr + +/* The high double is > TWO52 so we need to round the low double and + perhaps the high double. This gets a bit tricky so we use the + following algorithm: + + tau = floor(x_high/TWO52); + x0 = x_high - tau; + x1 = x_low + tau; + r1 = nearbyint(x1); + y_high = x0 + r1; + y_low = r1 - tau; + return y; */ +.L2: + fcmpu cr7,fp9,fp13 /* if (|x_low| > TWO52) */ + fcmpu cr0,fp9,fp12 /* || (|x_low| == 0.0) */ + bge- cr7,.L9 /* return x; */ + beq- cr0,.L9 + fdiv fp8,fp1,fp13 /* x_high/TWO52 */ + fctidz fp0,fp8 + fcfid fp8,fp0 /* tau = floor(x_high/TWO52); */ + fsub fp3,fp1,fp8 /* x0 = x_high - tau; */ + fadd fp4,fp2,fp8 /* x1 = x_low + tau; */ + + fcmpu cr6,fp4,fp12 /* if (x1 > 0.0) */ + bng- cr6,.L8 + fadd fp5,fp4,fp13 /* r1 = x1 + TWO52; */ + fsub fp5,fp5,fp13 /* r1 = r1 - TWO52; */ + b .L6 +.L8: + fmr fp5,fp4 + bge- cr6,.L6 /* if (x1 < 0.0) */ + fsub fp5,fp13,fp4 /* r1 = TWO52 - x1; */ + fsub fp0,fp5,fp13 /* r1 = - (r1 - TWO52); */ + fneg fp5,fp0 +.L6: + fadd fp1,fp3,fp5 /* y_high = x0 + r1; */ + fsub fp2,fp5,fp8 /* y_low = r1 - tau; */ + b .L9 +END (__nearbyintl) + +long_double_symbol (libm, __nearbyintl, nearbyintl) diff --git a/sysdeps/powerpc/powerpc64/fpu/s_rint.S b/sysdeps/powerpc/powerpc64/fpu/s_rint.S index 79e8072..b4fbc0b 100644 --- a/sysdeps/powerpc/powerpc64/fpu/s_rint.S +++ b/sysdeps/powerpc/powerpc64/fpu/s_rint.S @@ -1,5 +1,5 @@ /* Round to int floating-point values. PowerPC64 version. - Copyright (C) 2004 Free Software Foundation, Inc. + Copyright (C) 2004, 2006 Free Software Foundation, Inc. This file is part of the GNU C Library. The GNU C Library is free software; you can redistribute it and/or @@ -21,13 +21,14 @@ when it's coded in C. */ #include <sysdep.h> +#include <math_ldbl_opt.h> .section ".toc","aw" .LC0: /* 2**52 */ .tc FD_43300000_0[TC],0x4330000000000000 .section ".text" -ENTRY (__rint) +EALIGN (__rint, 4, 0) CALL_MCOUNT 0 lfd fp13,.LC0@toc(2) fabs fp0,fp1 @@ -38,13 +39,14 @@ ENTRY (__rint) bng- cr6,.L4 fadd fp1,fp1,fp13 /* x+= TWO52; */ fsub fp1,fp1,fp13 /* x-= TWO52; */ - blr + fabs fp1,fp1 /* if (x == 0.0) */ + blr /* x = 0.0; */ .L4: bnllr- cr6 /* if (x < 0.0) */ - fsub fp1,fp13,fp1 /* x = TWO52 - x; */ - fsub fp0,fp1,fp13 /* x = - (x - TWO52); */ - fneg fp1,fp0 - blr + fsub fp1,fp1,fp13 /* x-= TWO52; */ + fadd fp1,fp1,fp13 /* x+= TWO52; */ + fnabs fp1,fp1 /* if (x == 0.0) */ + blr /* x = -0.0; */ END (__rint) weak_alias (__rint, rint) @@ -53,3 +55,6 @@ weak_alias (__rint, rint) weak_alias (__rint, rintl) strong_alias (__rint, __rintl) #endif +#if LONG_DOUBLE_COMPAT(libm, GLIBC_2_0) +compat_symbol (libm, __rint, rintl, GLIBC_2_0) +#endif diff --git a/sysdeps/powerpc/powerpc64/fpu/s_rintf.S b/sysdeps/powerpc/powerpc64/fpu/s_rintf.S index eb34dd5..e4fa9ba 100644 --- a/sysdeps/powerpc/powerpc64/fpu/s_rintf.S +++ b/sysdeps/powerpc/powerpc64/fpu/s_rintf.S @@ -21,12 +21,12 @@ .section ".toc","aw" .LC0: /* 2**23 */ - .tc FD_41600000_0[TC],0x4160000000000000 + .tc FD_4b000000_0[TC],0x4b00000000000000 .section ".text" -ENTRY (__rintf) +EALIGN (__rintf, 4, 0) CALL_MCOUNT 0 - lfd fp13,.LC0@toc(2) + lfs fp13,.LC0@toc(2) fabs fp0,fp1 fsubs fp12,fp13,fp13 /* generate 0.0 */ fcmpu cr7,fp0,fp13 /* if (fabs(x) > TWO23) */ @@ -35,13 +35,14 @@ ENTRY (__rintf) bng- cr6,.L4 fadds fp1,fp1,fp13 /* x+= TWO23; */ fsubs fp1,fp1,fp13 /* x-= TWO23; */ - blr + fabs fp1,fp1 /* if (x == 0.0) */ + blr /* x = 0.0; */ .L4: bnllr- cr6 /* if (x < 0.0) */ - fsubs fp1,fp13,fp1 /* x = TWO23 - x; */ - fsubs fp0,fp1,fp13 /* x = - (x - TWO23); */ - fneg fp1,fp0 - blr + fsubs fp1,fp1,fp13 /* x-= TWO23; */ + fadds fp1,fp1,fp13 /* x+= TWO23; */ + fnabs fp1,fp1 /* if (x == 0.0) */ + blr /* x = -0.0; */ END (__rintf) weak_alias (__rintf, rintf) diff --git a/sysdeps/powerpc/powerpc64/fpu/s_round.S b/sysdeps/powerpc/powerpc64/fpu/s_round.S index c0b6d46..15afca1 100644 --- a/sysdeps/powerpc/powerpc64/fpu/s_round.S +++ b/sysdeps/powerpc/powerpc64/fpu/s_round.S @@ -1,5 +1,5 @@ /* round function. PowerPC64 version. - Copyright (C) 2004 Free Software Foundation, Inc. + Copyright (C) 2004, 2006 Free Software Foundation, Inc. This file is part of the GNU C Library. The GNU C Library is free software; you can redistribute it and/or @@ -18,14 +18,13 @@ 02111-1307 USA. */ #include <sysdep.h> +#include <math_ldbl_opt.h> .section ".toc","aw" .LC0: /* 2**52 */ .tc FD_43300000_0[TC],0x4330000000000000 .LC1: /* 0.5 */ .tc FD_3fe00000_0[TC],0x3fe0000000000000 -.LC2: /* -0.0 */ - .tc FD_80000000_0[TC],0x8000000000000000 .section ".text" /* double [fp1] round (double x [fp1]) @@ -38,7 +37,7 @@ "Round toward Zero" mode and round by adding +-0.5 before rounding to the integer value. */ -ENTRY (__round) +EALIGN (__round, 4, 0) CALL_MCOUNT 0 mffs fp11 /* Save current FPU rounding mode. */ lfd fp13,.LC0@toc(2) @@ -53,7 +52,8 @@ ENTRY (__round) fadd fp1,fp1,fp10 /* x+= 0.5; */ fadd fp1,fp1,fp13 /* x+= TWO52; */ fsub fp1,fp1,fp13 /* x-= TWO52; */ -.L9: + fabs fp1,fp1 /* if (x == 0.0) */ + /* x = 0.0; */ mtfsf 0x01,fp11 /* restore previous rounding mode. */ blr .L4: @@ -61,10 +61,10 @@ ENTRY (__round) bge- cr6,.L9 /* if (x < 0.0) */ fsub fp1,fp9,fp13 /* x-= TWO52; */ fadd fp1,fp1,fp13 /* x+= TWO52; */ - fcmpu cr5,fp1,fp12 /* if (x > 0.0) */ - mtfsf 0x01,fp11 /* restore previous rounding mode. */ - bnelr+ cr5 - lfd fp1,.LC2@toc(2) /* x must be -0.0 for the 0.0 case. */ + fnabs fp1,fp1 /* if (x == 0.0) */ + /* x = -0.0; */ +.L9: + mtfsf 0x01,fp11 /* restore previous rounding mode. */ blr END (__round) @@ -74,3 +74,6 @@ weak_alias (__round, round) weak_alias (__round, roundl) strong_alias (__round, __roundl) #endif +#if LONG_DOUBLE_COMPAT(libm, GLIBC_2_1) +compat_symbol (libm, __round, roundl, GLIBC_2_1) +#endif diff --git a/sysdeps/powerpc/powerpc64/fpu/s_roundf.S b/sysdeps/powerpc/powerpc64/fpu/s_roundf.S index 23ee4c0..d2e29fd 100644 --- a/sysdeps/powerpc/powerpc64/fpu/s_roundf.S +++ b/sysdeps/powerpc/powerpc64/fpu/s_roundf.S @@ -21,11 +21,9 @@ .section ".toc","aw" .LC0: /* 2**23 */ - .tc FD_41600000_0[TC],0x4160000000000000 + .tc FD_4b000000_0[TC],0x4b00000000000000 .LC1: /* 0.5 */ - .tc FD_3fe00000_0[TC],0x3fe0000000000000 -.LC2: /* -0.0 */ - .tc FD_80000000_0[TC],0x8000000000000000 + .tc FD_3f000000_0[TC],0x3f00000000000000 .section ".text" /* float [fp1] roundf (float x [fp1]) @@ -38,22 +36,23 @@ "Round toward Zero" mode and round by adding +-0.5 before rounding to the integer value. */ -ENTRY (__roundf ) +EALIGN (__roundf, 4, 0) CALL_MCOUNT 0 mffs fp11 /* Save current FPU rounding mode. */ - lfd fp13,.LC0@toc(2) + lfs fp13,.LC0@toc(2) fabs fp0,fp1 fsubs fp12,fp13,fp13 /* generate 0.0 */ fcmpu cr7,fp0,fp13 /* if (fabs(x) > TWO23) */ fcmpu cr6,fp1,fp12 /* if (x > 0.0) */ bnllr- cr7 mtfsfi 7,1 /* Set rounding mode toward 0. */ - lfd fp10,.LC1@toc(2) + lfs fp10,.LC1@toc(2) ble- cr6,.L4 fadds fp1,fp1,fp10 /* x+= 0.5; */ fadds fp1,fp1,fp13 /* x+= TWO23; */ fsubs fp1,fp1,fp13 /* x-= TWO23; */ -.L9: + fabs fp1,fp1 /* if (x == 0.0) */ + /* x = 0.0; */ mtfsf 0x01,fp11 /* restore previous rounding mode. */ blr .L4: @@ -61,10 +60,10 @@ ENTRY (__roundf ) bge- cr6,.L9 /* if (x < 0.0) */ fsubs fp1,fp9,fp13 /* x-= TWO23; */ fadds fp1,fp1,fp13 /* x+= TWO23; */ - fcmpu cr5,fp1,fp12 /* if (x > 0.0) */ - mtfsf 0x01,fp11 /* restore previous rounding mode. */ - bnelr+ cr5 - lfd fp1,.LC2@toc(2) /* x must be -0.0 for the 0.0 case. */ + fnabs fp1,fp1 /* if (x == 0.0) */ + /* x = -0.0; */ +.L9: + mtfsf 0x01,fp11 /* restore previous rounding mode. */ blr END (__roundf) diff --git a/sysdeps/powerpc/powerpc64/fpu/s_roundl.S b/sysdeps/powerpc/powerpc64/fpu/s_roundl.S new file mode 100644 index 0000000..20da828 --- /dev/null +++ b/sysdeps/powerpc/powerpc64/fpu/s_roundl.S @@ -0,0 +1,133 @@ +/* long double round function. + IBM extended format long double version. + Copyright (C) 2004, 2006 Free Software Foundation, Inc. + This file is part of the GNU C Library. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library; if not, write to the Free + Software Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA + 02111-1307 USA. */ + +#include <sysdep.h> +#include <math_ldbl_opt.h> + + .section ".toc","aw" +.LC0: /* 2**52 */ + .tc FD_43300000_0[TC],0x4330000000000000 +.LC1: /* 0.5 */ + .tc FD_3fe00000_0[TC],0x3fe0000000000000 + .section ".text" + +/* long double [fp1,fp2] roundl (long double x [fp1,fp2]) + IEEE 1003.1 round function. IEEE specifies "round to the nearest + integer value, rounding halfway cases away from zero, regardless of + the current rounding mode." However PowerPC Architecture defines + "Round to Nearest" as "Choose the best approximation. In case of a + tie, choose the one that is even (least significant bit o).". + So we can't use the PowerPC "Round to Nearest" mode. Instead we set + "Round toward Zero" mode and round by adding +-0.5 before rounding + to the integer value. */ + +ENTRY (__roundl) + mffs fp11 /* Save current FPU rounding mode. */ + lfd fp13,.LC0@toc(2) + fabs fp0,fp1 + fabs fp9,fp2 + fsub fp12,fp13,fp13 /* generate 0.0 */ + fcmpu cr7,fp0,fp13 /* if (fabs(x) > TWO52) */ + fcmpu cr6,fp1,fp12 /* if (x > 0.0) */ + bnl- cr7,.L2 + mtfsfi 7,1 /* Set rounding mode toward 0. */ + lfd fp10,.LC1@toc(2) + ble- cr6,.L1 + fneg fp2,fp12 + fadd fp1,fp1,fp10 /* x+= 0.5; */ + fadd fp1,fp1,fp13 /* x+= TWO52; */ + fsub fp1,fp1,fp13 /* x-= TWO52; */ + fabs fp1,fp1 /* if (x == 0.0) x = 0.0; */ +.L0: + mtfsf 0x01,fp11 /* restore previous rounding mode. */ + blr +.L1: + fsub fp9,fp1,fp10 /* x-= 0.5; */ + fneg fp2,fp12 + bge- cr6,.L0 /* if (x < 0.0) */ + fsub fp1,fp9,fp13 /* x-= TWO52; */ + fadd fp1,fp1,fp13 /* x+= TWO52; */ + fnabs fp1,fp1 /* if (x == 0.0) x = -0.0; */ + mtfsf 0x01,fp11 /* restore previous rounding mode. */ + blr + +/* The high double is > TWO52 so we need to round the low double and + perhaps the high double. In this case we have to round the low + double and handle any adjustment to the high double that may be + caused by rounding (up). This is complicated by the fact that the + high double may already be rounded and the low double may have the + opposite sign to compensate.This gets a bit tricky so we use the + following algorithm: + + tau = floor(x_high/TWO52); + x0 = x_high - tau; + x1 = x_low + tau; + r1 = rint(x1); + y_high = x0 + r1; + y_low = x0 - y_high + r1; + return y; */ +.L2: + fcmpu cr7,fp9,fp13 /* if (|x_low| > TWO52) */ + fcmpu cr0,fp9,fp12 /* || (|x_low| == 0.0) */ + fcmpu cr5,fp2,fp12 /* if (x_low > 0.0) */ + lfd fp10,.LC1@toc(2) + bgelr- cr7 /* return x; */ + beqlr- cr0 + mtfsfi 7,1 /* Set rounding mode toward 0. */ + fdiv fp8,fp1,fp13 /* x_high/TWO52 */ + + bng- cr6,.L6 /* if (x > 0.0) */ + fctidz fp0,fp8 + fcfid fp8,fp0 /* tau = floor(x_high/TWO52); */ + bng cr5,.L4 /* if (x_low > 0.0) */ + fmr fp3,fp1 + fmr fp4,fp2 + b .L5 +.L4: /* if (x_low < 0.0) */ + fsub fp3,fp1,fp8 /* x0 = x_high - tau; */ + fadd fp4,fp2,fp8 /* x1 = x_low + tau; */ +.L5: + fadd fp5,fp4,fp10 /* r1 = x1 + 0.5; */ + fadd fp5,fp5,fp13 /* r1 = r1 + TWO52; */ + fsub fp5,fp5,fp13 /* r1 = r1 - TWO52; */ + b .L9 +.L6: /* if (x < 0.0) */ + fctidz fp0,fp8 + fcfid fp8,fp0 /* tau = floor(x_high/TWO52); */ + bnl cr5,.L7 /* if (x_low < 0.0) */ + fmr fp3,fp1 + fmr fp4,fp2 + b .L8 +.L7: /* if (x_low > 0.0) */ + fsub fp3,fp1,fp8 /* x0 = x_high - tau; */ + fadd fp4,fp2,fp8 /* x1 = x_low + tau; */ +.L8: + fsub fp5,fp4,fp10 /* r1 = x1 - 0.5; */ + fsub fp5,fp5,fp13 /* r1-= TWO52; */ + fadd fp5,fp5,fp13 /* r1+= TWO52; */ +.L9: + mtfsf 0x01,fp11 /* restore previous rounding mode. */ + fadd fp1,fp3,fp5 /* y_high = x0 + r1; */ + fsub fp2,fp3,fp1 /* y_low = x0 - y_high + r1; */ + fadd fp2,fp2,fp5 + blr +END (__roundl) + +long_double_symbol (libm, __roundl, roundl) diff --git a/sysdeps/powerpc/powerpc64/fpu/s_trunc.S b/sysdeps/powerpc/powerpc64/fpu/s_trunc.S index 3ddd298..086ed00 100644 --- a/sysdeps/powerpc/powerpc64/fpu/s_trunc.S +++ b/sysdeps/powerpc/powerpc64/fpu/s_trunc.S @@ -1,5 +1,5 @@ /* trunc function. PowerPC64 version. - Copyright (C) 2004 Free Software Foundation, Inc. + Copyright (C) 2004, 2006 Free Software Foundation, Inc. This file is part of the GNU C Library. The GNU C Library is free software; you can redistribute it and/or @@ -18,12 +18,11 @@ 02111-1307 USA. */ #include <sysdep.h> +#include <math_ldbl_opt.h> .section ".toc","aw" .LC0: /* 2**52 */ .tc FD_43300000_0[TC],0x4330000000000000 -.LC2: /* -0.0 */ - .tc FD_80000000_0[TC],0x8000000000000000 .section ".text" /* double [fp1] trunc (double x [fp1]) @@ -33,7 +32,7 @@ We set "round toward Zero" mode and trunc by adding +-2**52 then subtracting +-2**52. */ -ENTRY (__trunc) +EALIGN (__trunc, 4, 0) CALL_MCOUNT 0 mffs fp11 /* Save current FPU rounding mode. */ lfd fp13,.LC0@toc(2) @@ -46,17 +45,18 @@ ENTRY (__trunc) ble- cr6,.L4 fadd fp1,fp1,fp13 /* x+= TWO52; */ fsub fp1,fp1,fp13 /* x-= TWO52; */ -.L9: - mtfsf 0x01,fp11 /* restore previous truncing mode. */ + fabs fp1,fp1 /* if (x == 0.0) */ + /* x = 0.0; */ + mtfsf 0x01,fp11 /* restore previous rounding mode. */ blr .L4: bge- cr6,.L9 /* if (x < 0.0) */ fsub fp1,fp1,fp13 /* x-= TWO52; */ fadd fp1,fp1,fp13 /* x+= TWO52; */ - fcmpu cr5,fp1,fp12 /* if (x > 0.0) */ + fnabs fp1,fp1 /* if (x == 0.0) */ + /* x = -0.0; */ +.L9: mtfsf 0x01,fp11 /* restore previous rounding mode. */ - bnelr+ cr5 - lfd fp1,.LC2@toc(2) /* x must be -0.0 for the 0.0 case. */ blr END (__trunc) @@ -66,3 +66,6 @@ weak_alias (__trunc, trunc) weak_alias (__trunc, truncl) strong_alias (__trunc, __truncl) #endif +#if LONG_DOUBLE_COMPAT(libm, GLIBC_2_1) +compat_symbol (libm, __trunc, truncl, GLIBC_2_1) +#endif diff --git a/sysdeps/powerpc/powerpc64/fpu/s_truncf.S b/sysdeps/powerpc/powerpc64/fpu/s_truncf.S index b38b722..1456e7f 100644 --- a/sysdeps/powerpc/powerpc64/fpu/s_truncf.S +++ b/sysdeps/powerpc/powerpc64/fpu/s_truncf.S @@ -1,5 +1,5 @@ /* truncf function. PowerPC64 version. - Copyright (C) 2004 Free Software Foundation, Inc. + Copyright (C) 2004, 2006 Free Software Foundation, Inc. This file is part of the GNU C Library. The GNU C Library is free software; you can redistribute it and/or @@ -21,9 +21,7 @@ .section ".toc","aw" .LC0: /* 2**23 */ - .tc FD_41600000_0[TC],0x4160000000000000 -.LC2: /* -0.0 */ - .tc FD_80000000_0[TC],0x8000000000000000 + .tc FD_4b000000_0[TC],0x4b00000000000000 .section ".text" /* float [fp1] truncf (float x [fp1]) @@ -33,10 +31,10 @@ We set "round toward Zero" mode and trunc by adding +-2**23 then subtracting +-2**23. */ -ENTRY (__truncf) +EALIGN (__truncf, 4, 0) CALL_MCOUNT 0 mffs fp11 /* Save current FPU rounding mode. */ - lfd fp13,.LC0@toc(2) + lfs fp13,.LC0@toc(2) fabs fp0,fp1 fsubs fp12,fp13,fp13 /* generate 0.0 */ fcmpu cr7,fp0,fp13 /* if (fabs(x) > TWO23) */ @@ -46,17 +44,18 @@ ENTRY (__truncf) ble- cr6,.L4 fadds fp1,fp1,fp13 /* x+= TWO23; */ fsubs fp1,fp1,fp13 /* x-= TWO23; */ -.L9: - mtfsf 0x01,fp11 /* restore previous truncing mode. */ + fabs fp1,fp1 /* if (x == 0.0) */ + /* x = 0.0; */ + mtfsf 0x01,fp11 /* restore previous rounding mode. */ blr .L4: bge- cr6,.L9 /* if (x < 0.0) */ fsubs fp1,fp1,fp13 /* x-= TWO23; */ fadds fp1,fp1,fp13 /* x+= TWO23; */ - fcmpu cr5,fp1,fp12 /* if (x > 0.0) */ + fnabs fp1,fp1 /* if (x == 0.0) */ + /* x = -0.0; */ +.L9: mtfsf 0x01,fp11 /* restore previous rounding mode. */ - bnelr+ cr5 - lfd fp1,.LC2@toc(2) /* x must be -0.0 for the 0.0 case. */ blr END (__truncf) diff --git a/sysdeps/powerpc/powerpc64/fpu/s_truncl.S b/sysdeps/powerpc/powerpc64/fpu/s_truncl.S new file mode 100644 index 0000000..1864fc1 --- /dev/null +++ b/sysdeps/powerpc/powerpc64/fpu/s_truncl.S @@ -0,0 +1,121 @@ +/* long double trunc function. + IBM extended format long double version. + Copyright (C) 2004, 2006 Free Software Foundation, Inc. + This file is part of the GNU C Library. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library; if not, write to the Free + Software Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA + 02111-1307 USA. */ + +#include <sysdep.h> +#include <math_ldbl_opt.h> + + .section ".toc","aw" +.LC0: /* 2**52 */ + .tc FD_43300000_0[TC],0x4330000000000000 +.LC1: /* 0.5 */ + .tc FD_3fe00000_0[TC],0x3fe0000000000000 + .section ".text" + +/* long double [fp1,fp2] truncl (long double x [fp1,fp2]) */ + +ENTRY (__truncl) + mffs fp11 /* Save current FPU rounding mode. */ + lfd fp13,.LC0@toc(2) + fabs fp0,fp1 + fabs fp9,fp2 + fsub fp12,fp13,fp13 /* generate 0.0 */ + fcmpu cr7,fp0,fp13 /* if (fabs(x) > TWO52) */ + fcmpu cr6,fp1,fp12 /* if (x > 0.0) */ + bnl- cr7,.L2 + mtfsfi 7,1 /* Set rounding mode toward 0. */ + ble- cr6,.L1 + fneg fp2,fp12 + fadd fp1,fp1,fp13 /* x+= TWO52; */ + fsub fp1,fp1,fp13 /* x-= TWO52; */ + fabs fp1,fp1 /* if (x == 0.0) x = 0.0; */ +.L0: + mtfsf 0x01,fp11 /* restore previous rounding mode. */ + blr +.L1: + fneg fp2,fp12 + bge- cr6,.L0 /* if (x < 0.0) */ + fsub fp1,fp1,fp13 /* x-= TWO52; */ + fadd fp1,fp1,fp13 /* x+= TWO52; */ + fnabs fp1,fp1 /* if (x == 0.0) x = -0.0; */ + mtfsf 0x01,fp11 /* restore previous rounding mode. */ + blr + +/* The high double is > TWO52 so we need to round the low double and + perhaps the high double. In this case we have to round the low + double and handle any adjustment to the high double that may be + caused by rounding (up). This is complicated by the fact that the + high double may already be rounded and the low double may have the + opposite sign to compensate.This gets a bit tricky so we use the + following algorithm: + + tau = floor(x_high/TWO52); + x0 = x_high - tau; + x1 = x_low + tau; + r1 = rint(x1); + y_high = x0 + r1; + y_low = x0 - y_high + r1; + return y; */ +.L2: + fcmpu cr7,fp9,fp13 /* if (|x_low| > TWO52) */ + fcmpu cr0,fp9,fp12 /* || (|x_low| == 0.0) */ + fcmpu cr5,fp2,fp12 /* if (x_low > 0.0) */ + bgelr- cr7 /* return x; */ + beqlr- cr0 + mtfsfi 7,1 /* Set rounding mode toward 0. */ + fdiv fp8,fp1,fp13 /* x_high/TWO52 */ + + bng- cr6,.L6 /* if (x > 0.0) */ + fctidz fp0,fp8 + fcfid fp8,fp0 /* tau = floor(x_high/TWO52); */ + fadd fp8,fp8,fp8 /* tau++; Make tau even */ + bng cr5,.L4 /* if (x_low > 0.0) */ + fmr fp3,fp1 + fmr fp4,fp2 + b .L5 +.L4: /* if (x_low < 0.0) */ + fsub fp3,fp1,fp8 /* x0 = x_high - tau; */ + fadd fp4,fp2,fp8 /* x1 = x_low + tau; */ +.L5: + fadd fp5,fp4,fp13 /* r1 = r1 + TWO52; */ + fsub fp5,fp5,fp13 /* r1 = r1 - TWO52; */ + b .L9 +.L6: /* if (x < 0.0) */ + fctidz fp0,fp8 + fcfid fp8,fp0 /* tau = floor(x_high/TWO52); */ + fadd fp8,fp8,fp8 /* tau++; Make tau even */ + bnl cr5,.L7 /* if (x_low < 0.0) */ + fmr fp3,fp1 + fmr fp4,fp2 + b .L8 +.L7: /* if (x_low > 0.0) */ + fsub fp3,fp1,fp8 /* x0 = x_high - tau; */ + fadd fp4,fp2,fp8 /* x1 = x_low + tau; */ +.L8: + fsub fp5,fp4,fp13 /* r1-= TWO52; */ + fadd fp5,fp5,fp13 /* r1+= TWO52; */ +.L9: + mtfsf 0x01,fp11 /* restore previous rounding mode. */ + fadd fp1,fp3,fp5 /* y_high = x0 + r1; */ + fsub fp2,fp3,fp1 /* y_low = x0 - y_high + r1; */ + fadd fp2,fp2,fp5 + blr +END (__truncl) + +long_double_symbol (libm, __truncl, truncl) diff --git a/sysdeps/powerpc/powerpc64/hp-timing.c b/sysdeps/powerpc/powerpc64/hp-timing.c new file mode 100644 index 0000000..4e54e66 --- /dev/null +++ b/sysdeps/powerpc/powerpc64/hp-timing.c @@ -0,0 +1,25 @@ +/* Support for high precision, low overhead timing functions. + powerpc64 version. + Copyright (C) 2005 Free Software Foundation, Inc. + This file is part of the GNU C Library. + Contributed by Ulrich Drepper <drepper@cygnus.com>, 1998. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library; if not, write to the Free + Software Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA + 02111-1307 USA. */ + +#include <hp-timing.h> + +/* We have to define the variable for the overhead. */ +hp_timing_t _dl_hp_timing_overhead; diff --git a/sysdeps/powerpc/powerpc64/hp-timing.h b/sysdeps/powerpc/powerpc64/hp-timing.h new file mode 100644 index 0000000..b58cca9 --- /dev/null +++ b/sysdeps/powerpc/powerpc64/hp-timing.h @@ -0,0 +1,136 @@ +/* High precision, low overhead timing functions. powerpc64 version. + Copyright (C) 2005 Free Software Foundation, Inc. + This file is part of the GNU C Library. + Contributed by Ulrich Drepper <drepper@cygnus.com>, 1998. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library; if not, write to the Free + Software Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA + 02111-1307 USA. */ + +#ifndef _HP_TIMING_H +#define _HP_TIMING_H 1 + +#include <string.h> +#include <sys/param.h> +#include <stdio-common/_itoa.h> +#include <atomic.h> + +/* The macros defined here use the powerpc 64-bit time base register. + The time base is nominally clocked at 1/8th the CPU clock, but this + can vary. + + The list of macros we need includes the following: + + - HP_TIMING_AVAIL: test for availability. + + - HP_TIMING_INLINE: this macro is non-zero if the functionality is not + implemented using function calls but instead uses some inlined code + which might simply consist of a few assembler instructions. We have to + know this since we might want to use the macros here in places where we + cannot make function calls. + + - hp_timing_t: This is the type for variables used to store the time + values. + + - HP_TIMING_ZERO: clear `hp_timing_t' object. + + - HP_TIMING_NOW: place timestamp for current time in variable given as + parameter. + + - HP_TIMING_DIFF_INIT: do whatever is necessary to be able to use the + HP_TIMING_DIFF macro. + + - HP_TIMING_DIFF: compute difference between two times and store it + in a third. Source and destination might overlap. + + - HP_TIMING_ACCUM: add time difference to another variable. This might + be a bit more complicated to implement for some platforms as the + operation should be thread-safe and 64bit arithmetic on 32bit platforms + is not. + + - HP_TIMING_ACCUM_NT: this is the variant for situations where we know + there are no threads involved. + + - HP_TIMING_PRINT: write decimal representation of the timing value into + the given string. This operation need not be inline even though + HP_TIMING_INLINE is specified. + +*/ + +/* We always assume having the timestamp register. */ +#define HP_TIMING_AVAIL (1) + +/* We indeed have inlined functions. */ +#define HP_TIMING_INLINE (1) + +/* We use 64bit values for the times. */ +typedef unsigned long long int hp_timing_t; + +/* Set timestamp value to zero. */ +#define HP_TIMING_ZERO(Var) (Var) = (0) + +/* That's quite simple. Use the `mftb' instruction. Note that the value + might not be 100% accurate since there might be some more instructions + running in this moment. This could be changed by using a barrier like + 'lwsync' right before the `mftb' instruciton. But we are not interested + in accurate clock cycles here so we don't do this. */ +#define HP_TIMING_NOW(Var) __asm__ __volatile__ ("mftb %0" : "=r" (Var)) + +/* Use two 'mftb' instructions in a row to find out how long it takes. + On current POWER4, POWER5, and 970 processors mftb take ~10 cycles. */ +#define HP_TIMING_DIFF_INIT() \ + do { \ + if (GLRO(dl_hp_timing_overhead) == 0) \ + { \ + int __cnt = 5; \ + GLRO(dl_hp_timing_overhead) = ~0ull; \ + do \ + { \ + hp_timing_t __t1, __t2; \ + HP_TIMING_NOW (__t1); \ + HP_TIMING_NOW (__t2); \ + if (__t2 - __t1 < GLRO(dl_hp_timing_overhead)) \ + GLRO(dl_hp_timing_overhead) = __t2 - __t1; \ + } \ + while (--__cnt > 0); \ + } \ + } while (0) + +/* It's simple arithmetic in 64-bit. */ +#define HP_TIMING_DIFF(Diff, Start, End) (Diff) = ((End) - (Start)) + +/* We need to insure that this add is atomic in threaded environments. We use + __arch_atomic_exchange_and_add_64 from atomic.h to get thread safety. */ +#define HP_TIMING_ACCUM(Sum, Diff) \ + do { \ + hp_timing_t __diff = (Diff) - GLRO(dl_hp_timing_overhead); \ + __arch_atomic_exchange_and_add_64 (&(Sum), __diff); \ + } while (0) + +/* No threads, no extra work. */ +#define HP_TIMING_ACCUM_NT(Sum, Diff) (Sum) += (Diff) + +/* Print the time value. */ +#define HP_TIMING_PRINT(Buf, Len, Val) \ + do { \ + char __buf[20]; \ + char *__cp = _itoa (Val, __buf + sizeof (__buf), 10, 0); \ + size_t __len = (Len); \ + char *__dest = (Buf); \ + while (__len-- > 0 && __cp < __buf + sizeof (__buf)) \ + *__dest++ = *__cp++; \ + memcpy (__dest, " ticks", MIN (__len, sizeof (" ticks"))); \ + } while (0) + +#endif /* hp-timing.h */ diff --git a/sysdeps/powerpc/powerpc64/memcpy.S b/sysdeps/powerpc/powerpc64/memcpy.S index 9df5bb4..f395de9 100644 --- a/sysdeps/powerpc/powerpc64/memcpy.S +++ b/sysdeps/powerpc/powerpc64/memcpy.S @@ -1,5 +1,5 @@ /* Optimized memcpy implementation for PowerPC64. - Copyright (C) 2003 Free Software Foundation, Inc. + Copyright (C) 2003, 2006 Free Software Foundation, Inc. This file is part of the GNU C Library. The GNU C Library is free software; you can redistribute it and/or @@ -43,6 +43,7 @@ EALIGN (BP_SYM (memcpy), 5, 0) neg 0,3 std 3,-16(1) std 31,-8(1) + cfi_offset(31,-8) andi. 11,3,7 /* check alignement of dst. */ clrldi 0,0,61 /* Number of bytes until the 1st doubleword of dst. */ clrldi 10,4,61 /* check alignement of src. */ diff --git a/sysdeps/powerpc/powerpc64/memset.S b/sysdeps/powerpc/powerpc64/memset.S index 1abc59b..09c79fc 100644 --- a/sysdeps/powerpc/powerpc64/memset.S +++ b/sysdeps/powerpc/powerpc64/memset.S @@ -62,8 +62,7 @@ EALIGN (BP_SYM (memset), 5, 0) #define rNEG64 r8 /* Constant -64 for clearing with dcbz. */ #define rCLS r8 /* Cache line size obtained from static. */ #define rCLM r9 /* Cache line size mask to check for cache alignment. */ - -___memset: +L(_memset): #if __BOUNDED_POINTERS__ cmpldi cr1, rRTN, 0 CHECK_BOUNDS_BOTH_WIDE (rMEMP0, rTMP, rTMP2, rLEN) @@ -282,11 +281,11 @@ ENTRY (BP_SYM (__bzero)) mr r4,r3 /* Tell memset that we don't want a return value. */ li r3,0 - b ___memset + b L(_memset) #else mr r5,r4 li r4,0 - b ___memset + b L(_memset) #endif END_GEN_TB (BP_SYM (__bzero),TB_TOCLESS) diff --git a/sysdeps/powerpc/powerpc64/ppc-mcount.S b/sysdeps/powerpc/powerpc64/ppc-mcount.S index 46cb9fd..5df9650 100644 --- a/sysdeps/powerpc/powerpc64/ppc-mcount.S +++ b/sysdeps/powerpc/powerpc64/ppc-mcount.S @@ -1,5 +1,5 @@ /* PowerPC64-specific implementation of profiling support. - Copyright (C) 1997, 1999, 2002 Free Software Foundation, Inc. + Copyright (C) 1997, 1999, 2002, 2006 Free Software Foundation, Inc. This file is part of the GNU C Library. The GNU C Library is free software; you can redistribute it and/or @@ -14,8 +14,8 @@ You should have received a copy of the GNU Lesser General Public License along with the GNU C Library; if not, write to the Free - Software Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA - 02111-1307 USA. */ + Software Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston MA + 02110-1301 USA. */ #include <sysdep.h> /* We don't need to save the parameter-passing registers as gcc takes @@ -26,7 +26,9 @@ ENTRY(_mcount) mflr r4 ld r11, 0(r1) stdu r1,-112(r1) + cfi_adjust_cfa_offset (112) std r4, 128(r1) + cfi_offset (lr, 16) ld r3, 16(r11) bl JUMPTARGET(__mcount_internal) nop diff --git a/sysdeps/powerpc/powerpc64/register-dump.h b/sysdeps/powerpc/powerpc64/register-dump.h index dd69af3..fc27dca 100644 --- a/sysdeps/powerpc/powerpc64/register-dump.h +++ b/sysdeps/powerpc/powerpc64/register-dump.h @@ -1,5 +1,5 @@ /* Dump registers. - Copyright (C) 1998, 2002 Free Software Foundation, Inc. + Copyright (C) 1998, 2002, 2006 Free Software Foundation, Inc. This file is part of the GNU C Library. The GNU C Library is free software; you can redistribute it and/or @@ -34,7 +34,7 @@ gr16-19: 000000000000010% 000000000000011% 000000000000012% 000000000000013%\n\ gr20-23: 000000000000014% 000000000000015% 000000000000016% 000000000000017%\n\ gr24-27: 000000000000018% 000000000000019% 00000000000001a% 00000000000001b%\n\ gr28-31: 00000000000001c% 00000000000001d% 00000000000001e% 00000000000001f%\n\ -fscr=0000071%\n\ +fscr=000000000000050%\n\ fp0-3: 000000000000030% 000000000000031% 000000000000032% 000000000000033%\n\ fp4-7: 000000000000034% 000000000000035% 000000000000036% 000000000000037%\n\ fp8-11: 000000000000038% 000000000000038% 00000000000003a% 00000000000003b%\n\ @@ -104,7 +104,7 @@ register_dump (int fd, struct sigcontext *ctx) char buffer[sizeof(dumpform)]; char *bufferpos; unsigned regno; - unsigned *regs = (unsigned *)(ctx->regs); + unsigned long *regs = (unsigned long *)(ctx->regs); memcpy(buffer, dumpform, sizeof(dumpform)); @@ -117,7 +117,7 @@ register_dump (int fd, struct sigcontext *ctx) } /* Write the output. */ - write (fd, buffer, sizeof(buffer)); + write (fd, buffer, sizeof(buffer) - 1); } diff --git a/sysdeps/powerpc/powerpc64/setjmp-common.S b/sysdeps/powerpc/powerpc64/setjmp-common.S index 0de07a8..606eef5 100644 --- a/sysdeps/powerpc/powerpc64/setjmp-common.S +++ b/sysdeps/powerpc/powerpc64/setjmp-common.S @@ -1,5 +1,5 @@ /* setjmp for PowerPC64. - Copyright (C) 1995-2003, 2004 Free Software Foundation, Inc. + Copyright (C) 1995-2003, 2004, 2005, 2006 Free Software Foundation, Inc. This file is part of the GNU C Library. The GNU C Library is free software; you can redistribute it and/or @@ -19,11 +19,10 @@ #include <sysdep.h> #define _ASM -#define _SETJMP_H #ifdef __NO_VMX__ #include <novmxsetjmp.h> #else -#include <bits/setjmp.h> +#include <jmpbuf-offsets.h> #endif #include <bp-sym.h> #include <bp-asm.h> @@ -46,7 +45,13 @@ ENTRY (BP_SYM (__sigsetjmp)) .hidden JUMPTARGET(GLUE(__sigsetjmp,_ent)) JUMPTARGET(GLUE(__sigsetjmp,_ent)): CHECK_BOUNDS_BOTH_WIDE_LIT (r3, r8, r9, JB_SIZE) +#ifdef PTR_MANGLE + mr r5, r1 + PTR_MANGLE (r5, r6) + std r5,(JB_GPR1*8)(3) +#else std r1,(JB_GPR1*8)(3) +#endif mflr r0 #if defined SHARED && !defined IS_IN_rtld ld r5,40(r1) /* Retrieve the callers TOC. */ @@ -56,6 +61,9 @@ JUMPTARGET(GLUE(__sigsetjmp,_ent)): #endif std r14,((JB_GPRS+0)*8)(3) stfd fp14,((JB_FPRS+0)*8)(3) +#ifdef PTR_MANGLE + PTR_MANGLE2 (r0, r6) +#endif std r0,(JB_LR*8)(3) std r15,((JB_GPRS+1)*8)(3) stfd fp15,((JB_FPRS+1)*8)(3) @@ -94,14 +102,14 @@ JUMPTARGET(GLUE(__sigsetjmp,_ent)): std r31,((JB_GPRS+17)*8)(3) stfd fp31,((JB_FPRS+17)*8)(3) #ifndef __NO_VMX__ - ld r5,.LC__dl_hwcap@toc(r2) + ld r6,.LC__dl_hwcap@toc(r2) # ifdef SHARED /* Load _rtld-global._dl_hwcap. */ - ld r5,RTLD_GLOBAL_RO_DL_HWCAP_OFFSET(r5) + ld r6,RTLD_GLOBAL_RO_DL_HWCAP_OFFSET(r6) # else - ld r5,0(r5) /* Load extern _dl_hwcap. */ + ld r6,0(r6) /* Load extern _dl_hwcap. */ # endif - andis. r5,r5,(PPC_FEATURE_HAS_ALTIVEC >> 16) + andis. r6,r6,(PPC_FEATURE_HAS_ALTIVEC >> 16) beq L(no_vmx) la r5,((JB_VRS)*8)(3) andi. r6,r5,0xf @@ -176,6 +184,13 @@ L(aligned_save_vmx): stvx 30,0,r5 stvx 31,0,r6 L(no_vmx): +#else + li r6,0 #endif +#if defined NOT_IN_libc && defined IS_IN_rtld + li r3,0 + blr +#else b JUMPTARGET (BP_SYM (__sigjmp_save)) +#endif END (BP_SYM (__sigsetjmp)) diff --git a/sysdeps/powerpc/powerpc64/strncmp.S b/sysdeps/powerpc/powerpc64/strncmp.S index 04bdc2f..34479e2 100644 --- a/sysdeps/powerpc/powerpc64/strncmp.S +++ b/sysdeps/powerpc/powerpc64/strncmp.S @@ -48,6 +48,7 @@ EALIGN (BP_SYM(strncmp), 4, 0) lis r7F7F, 0x7f7f dcbt 0,rSTR2 clrldi. rTMP, rTMP, 61 + cmpldi cr1, rN, 0 lis rFEFE, -0x101 bne L(unaligned) /* We are doubleword alligned so set up for two loops. first a double word @@ -55,7 +56,8 @@ EALIGN (BP_SYM(strncmp), 4, 0) srdi. rTMP, rN, 3 clrldi rN, rN, 61 addi rFEFE, rFEFE, -0x101 - addi r7F7F, r7F7F, 0x7f7f + addi r7F7F, r7F7F, 0x7f7f + cmpldi cr1, rN, 0 beq L(unaligned) mtctr rTMP /* Power4 wants mtctr 1st in dispatch group. */ @@ -126,16 +128,19 @@ L(tail): addi rSTR1, rSTR1, 8 bne- cr1, L(different) addi rSTR2, rSTR2, 8 + cmpldi cr1, rN, 0 L(unaligned): mtctr rN /* Power4 wants mtctr 1st in dispatch group */ - cmpdi rN,0 - lbz rWORD1, 0(rSTR1) - lbz rWORD2, 0(rSTR2) - bgt L(u1) + bgt cr1, L(uz) L(ux): li rRTN, 0 blr - + .align 4 +L(uz): + lbz rWORD1, 0(rSTR1) + lbz rWORD2, 0(rSTR2) + nop + b L(u1) L(u0): lbzu rWORD2, 1(rSTR2) L(u1): diff --git a/sysdeps/powerpc/powerpc64/sysdep.h b/sysdeps/powerpc/powerpc64/sysdep.h index fead0b5..2745d7e 100644 --- a/sysdeps/powerpc/powerpc64/sysdep.h +++ b/sysdeps/powerpc/powerpc64/sysdep.h @@ -1,5 +1,5 @@ /* Assembly macros for 64-bit PowerPC. - Copyright (C) 2002, 2003, 2004 Free Software Foundation, Inc. + Copyright (C) 2002, 2003, 2004, 2006 Free Software Foundation, Inc. This file is part of the GNU C Library. The GNU C Library is free software; you can redistribute it and/or @@ -14,8 +14,8 @@ You should have received a copy of the GNU Lesser General Public License along with the GNU C Library; if not, write to the Free - Software Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA - 02111-1307 USA. */ + Software Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston MA + 02110-1301 USA. */ #include <sysdeps/powerpc/sysdep.h> @@ -92,7 +92,8 @@ name##: OPD_ENT (name); \ #define ENTRY(name) \ ENTRY_2(name) \ .align ALIGNARG(2); \ -BODY_LABEL(name): +BODY_LABEL(name): \ + cfi_startproc; #define EALIGN_W_0 /* No words to insert. */ #define EALIGN_W_1 nop @@ -109,7 +110,8 @@ BODY_LABEL(name): ENTRY_2(name) \ .align ALIGNARG(alignt); \ EALIGN_W_##words; \ -BODY_LABEL(name): +BODY_LABEL(name): \ + cfi_startproc; /* Local labels stripped out by the linker. */ #undef L @@ -173,11 +175,13 @@ LT_LABELSUFFIX(name,_name_end): ; \ /* END generates Traceback tables */ #undef END #define END(name) \ + cfi_endproc; \ TRACEBACK(name) \ END_2(name) /* This form supports more informative traceback tables */ #define END_GEN_TB(name,mask) \ + cfi_endproc; \ TRACEBACK_MASK(name,mask) \ END_2(name) |