diff options
author | Roland McGrath <roland@gnu.org> | 2002-09-17 23:50:03 +0000 |
---|---|---|
committer | Roland McGrath <roland@gnu.org> | 2002-09-17 23:50:03 +0000 |
commit | cfc91acd418d2ea62a4520b42406a16ef07257c3 (patch) | |
tree | c7d291182781c1534a03528b33d417983b8d2e54 /sysdeps/powerpc | |
parent | 81cb0d8214c425e356c3f79b4b024caab36540b8 (diff) | |
download | glibc-cfc91acd418d2ea62a4520b42406a16ef07257c3.zip glibc-cfc91acd418d2ea62a4520b42406a16ef07257c3.tar.gz glibc-cfc91acd418d2ea62a4520b42406a16ef07257c3.tar.bz2 |
2002-09-17 Steven Munroe <sjmunroe@us.ibm.com>
Ported to PowerPC64 running Linux.
* sysdeps/powerpc/powerpc64/Dist: New file.
* sysdeps/powerpc/powerpc64/Implies: New file.
* sysdeps/powerpc/powerpc64/Makefile: New file.
* sysdeps/powerpc/powerpc64/__longjmp.S: New file.
* sysdeps/powerpc/powerpc64/atomicity.h: New file.
* sysdeps/powerpc/powerpc64/backtrace.c: New file.
* sysdeps/powerpc/powerpc64/bp-asm.h: New file.
* sysdeps/powerpc/powerpc64/bsd-_setjmp.S: New file.
* sysdeps/powerpc/powerpc64/bsd-setjmp.S: New file.
* sysdeps/powerpc/powerpc64/dl-dtprocnum.h: New file.
* sysdeps/powerpc/powerpc64/dl-lookupcfg.h: New file.
* sysdeps/powerpc/powerpc64/dl-machine.c: New file.
* sysdeps/powerpc/powerpc64/dl-machine.h: New file.
* sysdeps/powerpc/powerpc64/memset.S: New file.
* sysdeps/powerpc/powerpc64/ppc-mcount.S: New file.
* sysdeps/powerpc/powerpc64/register-dump.h: New file.
* sysdeps/powerpc/powerpc64/setjmp.S: New file.
* sysdeps/powerpc/powerpc64/stpcpy.S: New file.
* sysdeps/powerpc/powerpc64/strchr.S: New file.
* sysdeps/powerpc/powerpc64/strcmp.S: New file.
* sysdeps/powerpc/powerpc64/strcpy.S: New file.
* sysdeps/powerpc/powerpc64/strlen.S: New file.
* sysdeps/powerpc/powerpc64/elf/bzero.S: New file.
* sysdeps/powerpc/powerpc64/elf/start.S: New file.
* sysdeps/powerpc/powerpc64/fpu/s_copysign.S: New file.
* sysdeps/powerpc/powerpc64/fpu/s_copysignf.S: New file.
* sysdeps/unix/sysv/linux/powerpc/powerpc64/Versions: New File.
* sysdeps/unix/sysv/linux/powerpc/powerpc64/brk.S: New file.
* sysdeps/unix/sysv/linux/powerpc/powerpc64/clone.S: New file.
* sysdeps/unix/sysv/linux/powerpc/powerpc64/glob64.c: New file.
* sysdeps/unix/sysv/linux/powerpc/powerpc64/socket.S: New file.
* sysdeps/unix/sysv/linux/powerpc/powerpc64/syscalls.list: New file.
* sysdeps/unix/sysv/linux/powerpc/powerpc64/sysdep.h: New file.
Diffstat (limited to 'sysdeps/powerpc')
26 files changed, 2660 insertions, 0 deletions
diff --git a/sysdeps/powerpc/powerpc64/Dist b/sysdeps/powerpc/powerpc64/Dist new file mode 100644 index 0000000..bbadfef --- /dev/null +++ b/sysdeps/powerpc/powerpc64/Dist @@ -0,0 +1,4 @@ +dl-machine.c +ppc-mcount.S +elf/entry.h +bp-asm.h diff --git a/sysdeps/powerpc/powerpc64/Implies b/sysdeps/powerpc/powerpc64/Implies new file mode 100644 index 0000000..a8cae95 --- /dev/null +++ b/sysdeps/powerpc/powerpc64/Implies @@ -0,0 +1 @@ +wordsize-64 diff --git a/sysdeps/powerpc/powerpc64/Makefile b/sysdeps/powerpc/powerpc64/Makefile new file mode 100644 index 0000000..ff6819a --- /dev/null +++ b/sysdeps/powerpc/powerpc64/Makefile @@ -0,0 +1,23 @@ +# Powerpc64 specific build options. +# this is ./sysdeps/powerpc/powerpc64/Makefile + ++cflags += -Wa,-mppc64 -mpowerpc64 +asm-CPPFLAGS += -Wa,-mppc64 + +# On PPC64, -fpic is the default so we don't need to specify it. +# Also early compilers would issue a warning if -fpic was specified. +# Each TOC entry takes 8 bytes and the TOC holds up to 2^16 bytes, +# or 8192 entries. If the TOC fills up try -minimal-toc. + +ifeq ($(subdir),csu) +ifneq ($(elf),no) +# The initfini generation code doesn't work in the presence of -g1 or +# higher, so we use -g0. +CFLAGS-initfini.s = -g0 -O1 +endif +endif + +ifeq ($(subdir),elf) +# help gcc inline asm code from dl-machine.h ++cflags += --param max-inline-insns=2000 +endif diff --git a/sysdeps/powerpc/powerpc64/__longjmp.S b/sysdeps/powerpc/powerpc64/__longjmp.S new file mode 100644 index 0000000..e415212 --- /dev/null +++ b/sysdeps/powerpc/powerpc64/__longjmp.S @@ -0,0 +1,74 @@ +/* longjmp for PowerPC64. + Copyright (C) 1995, 1996, 1997, 1999, 2000, 2001, 2002 + Free Software Foundation, Inc. + This file is part of the GNU C Library. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library; if not, write to the Free + Software Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA + 02111-1307 USA. */ + +#include <sysdep.h> +#define _ASM +#define _SETJMP_H +#include <bits/setjmp.h> +#include <bp-sym.h> +#include <bp-asm.h> + +ENTRY (BP_SYM (__longjmp)) + CHECK_BOUNDS_BOTH_WIDE_LIT (r3, r8, r9, JB_SIZE) + ld r1,(JB_GPR1*8)(r3) + ld r2,(JB_GPR2*8)(r3) + ld r0,(JB_LR*8)(r3) + ld r14,((JB_GPRS+0)*8)(r3) + lfd fp14,((JB_FPRS+0)*8)(r3) + ld r15,((JB_GPRS+1)*8)(r3) + lfd fp15,((JB_FPRS+1)*8)(r3) + ld r16,((JB_GPRS+2)*8)(r3) + lfd fp16,((JB_FPRS+2)*8)(r3) + ld r17,((JB_GPRS+3)*8)(r3) + lfd fp17,((JB_FPRS+3)*8)(r3) + ld r18,((JB_GPRS+4)*8)(r3) + lfd fp18,((JB_FPRS+4)*8)(r3) + ld r19,((JB_GPRS+5)*8)(r3) + lfd fp19,((JB_FPRS+5)*8)(r3) + ld r20,((JB_GPRS+6)*8)(r3) + lfd fp20,((JB_FPRS+6)*8)(r3) + mtlr r0 + ld r21,((JB_GPRS+7)*8)(r3) + lfd fp21,((JB_FPRS+7)*8)(r3) + ld r22,((JB_GPRS+8)*8)(r3) + lfd fp22,((JB_FPRS+8)*8)(r3) + ld r0,(JB_CR*8)(r3) + ld r23,((JB_GPRS+9)*8)(r3) + lfd fp23,((JB_FPRS+9)*8)(r3) + ld r24,((JB_GPRS+10)*8)(r3) + lfd fp24,((JB_FPRS+10)*8)(r3) + ld r25,((JB_GPRS+11)*8)(r3) + lfd fp25,((JB_FPRS+11)*8)(r3) + mtcrf 0xFF,r0 + ld r26,((JB_GPRS+12)*8)(r3) + lfd fp26,((JB_FPRS+12)*8)(r3) + ld r27,((JB_GPRS+13)*8)(r3) + lfd fp27,((JB_FPRS+13)*8)(r3) + ld r28,((JB_GPRS+14)*8)(r3) + lfd fp28,((JB_FPRS+14)*8)(r3) + ld r29,((JB_GPRS+15)*8)(r3) + lfd fp29,((JB_FPRS+15)*8)(r3) + ld r30,((JB_GPRS+16)*8)(r3) + lfd fp30,((JB_FPRS+16)*8)(r3) + ld r31,((JB_GPRS+17)*8)(r3) + lfd fp31,((JB_FPRS+17)*8)(r3) + mr r3,r4 + blr +END (BP_SYM (__longjmp)) diff --git a/sysdeps/powerpc/powerpc64/atomicity.h b/sysdeps/powerpc/powerpc64/atomicity.h new file mode 100644 index 0000000..b9d0de0 --- /dev/null +++ b/sysdeps/powerpc/powerpc64/atomicity.h @@ -0,0 +1,132 @@ +/* Low-level functions for atomic operations. PowerPC64 version. + Copyright (C) 1997, 1998, 1999, 2000, 2002 Free Software Foundation, Inc. + This file is part of the GNU C Library. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Library General Public License as + published by the Free Software Foundation; either version 2 of the + License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Library General Public License for more details. + + You should have received a copy of the GNU Library General Public + License along with the GNU C Library; see the file COPYING.LIB. If not, + write to the Free Software Foundation, Inc., 59 Temple Place - Suite 330, + Boston, MA 02111-1307, USA. */ + +#ifndef _ATOMICITY_H +#define _ATOMICITY_H 1 + +#include <inttypes.h> + +#if BROKEN_PPC_ASM_CR0 +# define __ATOMICITY_INLINE /* nothing */ +#else +# define __ATOMICITY_INLINE inline +#endif + +static __ATOMICITY_INLINE int +__attribute__ ((unused)) +exchange_and_add (volatile uint32_t *mem, int val) +{ + int tmp, result; + __asm__ ("\n\ +0: lwarx %0,0,%2 \n\ + add%I3 %1,%0,%3 \n\ + stwcx. %1,0,%2 \n\ + bne- 0b \n\ +" : "=&b"(result), "=&r"(tmp) : "r" (mem), "Ir"(val) : "cr0", "memory"); + return result; +} + +static __ATOMICITY_INLINE void +__attribute__ ((unused)) +atomic_add_long (volatile long *mem, int val) +{ + int tmp; + __asm__ ("\n\ +0: ldarx %0,0,%1 \n\ + add%I2 %0,%0,%2 \n\ + stdcx. %0,0,%1 \n\ + bne- 0b \n\ +" : "=&b"(tmp) : "r" (mem), "Ir"(val) : "cr0", "memory"); +} + + +static __ATOMICITY_INLINE int +__attribute__ ((unused)) +exchange_and_add_long (volatile long *mem, int val) +{ + int tmp, result; + __asm__ ("\n\ +0: ldarx %0,0,%2 \n\ + add%I3 %1,%0,%3 \n\ + stdcx. %1,0,%2 \n\ + bne- 0b \n\ +" : "=&b"(result), "=&r"(tmp) : "r" (mem), "Ir"(val) : "cr0", "memory"); + return result; +} + +static __ATOMICITY_INLINE void +__attribute__ ((unused)) +atomic_add (volatile uint32_t *mem, int val) +{ + int tmp; + __asm__ ("\n\ +0: lwarx %0,0,%1 \n\ + add%I2 %0,%0,%2 \n\ + stwcx. %0,0,%1 \n\ + bne- 0b \n\ +" : "=&b"(tmp) : "r" (mem), "Ir"(val) : "cr0", "memory"); +} + +static __ATOMICITY_INLINE int +__attribute__ ((unused)) +compare_and_swap (volatile long int *p, long int oldval, long int newval) +{ + int result; + __asm__ ("\n\ +0: ldarx %0,0,%1 \n\ + sub%I2c. %0,%0,%2 \n\ + cntlzw %0,%0 \n\ + bne- 1f \n\ + stdcx. %3,0,%1 \n\ + bne- 0b \n\ +1: \n\ +" : "=&b"(result) : "r"(p), "Ir"(oldval), "r"(newval) : "cr0", "memory"); + return result >> 5; +} + +static __ATOMICITY_INLINE long int +__attribute__ ((unused)) +always_swap (volatile long int *p, long int newval) +{ + long int result; + __asm__ ("\n\ +0: ldarx %0,0,%1 \n\ + stdcx. %2,0,%1 \n\ + bne- 0b \n\ +" : "=&r"(result) : "r"(p), "r"(newval) : "cr0", "memory"); + return result; +} + +static __ATOMICITY_INLINE int +__attribute__ ((unused)) +test_and_set (volatile long int *p, long int newval) +{ + int result; + __asm__ ("\n\ +0: ldarx %0,0,%1 \n\ + cmpdi %0,0 \n\ + bne- 1f \n\ + stdcx. %2,0,%1 \n\ + bne- 0b \n\ +1: \n\ +" : "=&r"(result) : "r"(p), "r"(newval) : "cr0", "memory"); + return result; +} + +#endif /* atomicity.h */ diff --git a/sysdeps/powerpc/powerpc64/backtrace.c b/sysdeps/powerpc/powerpc64/backtrace.c new file mode 100644 index 0000000..8669b6a --- /dev/null +++ b/sysdeps/powerpc/powerpc64/backtrace.c @@ -0,0 +1,69 @@ +/* Return backtrace of current program state. + Copyright (C) 1998, 2000, 2002 Free Software Foundation, Inc. + This file is part of the GNU C Library. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Library General Public License as + published by the Free Software Foundation; either version 2 of the + License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Library General Public License for more details. + + You should have received a copy of the GNU Library General Public + License along with the GNU C Library; see the file COPYING.LIB. If not, + write to the Free Software Foundation, Inc., 59 Temple Place - Suite 330, + Boston, MA 02111-1307, USA. */ + +#include <execinfo.h> +#include <stddef.h> +#include <bp-checks.h> + +/* This is the stack layout we see with every stack frame. + Note that every routine is required by the ABI to lay out the stack + like this. + + +----------------+ +-----------------+ + %r1 -> | %r1 last frame--------> | %r1 last frame--->... --> NULL + | | | | + | cr save | | cr save | + | | | | + | (unused) | | return address | + +----------------+ +-----------------+ +*/ +struct layout +{ + struct layout *__unbounded next; + long condition_register; + void *__unbounded return_address; +}; + +int +__backtrace (void **array, int size) +{ + struct layout *current; + int count; + + /* Force gcc to spill LR. */ + asm volatile ("" : "=l"(current)); + + /* Get the address on top-of-stack. */ + asm volatile ("ld %0,0(1)" : "=r"(current)); + current = BOUNDED_1 (current); + + for ( count = 0; + current != NULL && count < size; + current = BOUNDED_1 (current->next), count++) + array[count] = current->return_address; + + /* It's possible the second-last stack frame can't return + (that is, it's __libc_start_main), in which case + the CRT startup code will have set its LR to 'NULL'. */ + if (count > 0 && array[count-1] == NULL) + count--; + + return count; +} +weak_alias (__backtrace, backtrace) diff --git a/sysdeps/powerpc/powerpc64/bp-asm.h b/sysdeps/powerpc/powerpc64/bp-asm.h new file mode 100644 index 0000000..ee99e30 --- /dev/null +++ b/sysdeps/powerpc/powerpc64/bp-asm.h @@ -0,0 +1,115 @@ +/* Bounded-pointer definitions for PowerPC64 assembler. + Copyright (C) 2000, 2002 Free Software Foundation, Inc. + Contributed by Greg McGary <greg@mcgary.org> + + This file is part of the GNU C Library. Its master source is NOT part of + the C library, however. The master source lives in the GNU MP Library. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Library General Public License as + published by the Free Software Foundation; either version 2 of the + License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Library General Public License for more details. + + You should have received a copy of the GNU Library General Public + License along with the GNU C Library; see the file COPYING.LIB. If not, + write to the Free Software Foundation, Inc., 59 Temple Place - Suite 330, + Boston, MA 02111-1307, USA. */ + +#if __BOUNDED_POINTERS__ + +/* Byte offsets of BP components. */ +# define oVALUE 0 +# define oLOW 4 +# define oHIGH 8 + +/* Don't check bounds, just convert the BP register to its simple + pointer value. */ + +# define DISCARD_BOUNDS(rBP) \ + ld rBP, oVALUE(rBP) + +/* Check low bound, with the side effect that the BP register is converted + its simple pointer value. Move the high bound into a register for + later use. */ + +# define CHECK_BOUNDS_LOW(rBP, rLOW, rHIGH) \ + ld rHIGH, oHIGH(rBP); \ + ld rLOW, oLOW(rBP); \ + ld rBP, oVALUE(rBP); \ + tdllt rBP, rLOW + +/* Check the high bound, which is in a register, using the given + conditional trap instruction. */ + +# define CHECK_BOUNDS_HIGH(rVALUE, rHIGH, TWLcc) \ + TWLcc rVALUE, rHIGH + +/* Check the high bound, which is stored at the return-value's high + bound slot, using the given conditional trap instruction. */ + +# define CHECK_BOUNDS_HIGH_RTN(rVALUE, rHIGH, TWLcc) \ + ld rHIGH, oHIGH(rRTN); \ + TWLcc rVALUE, rHIGH + +/* Check both bounds, with the side effect that the BP register is + converted to its simple pointer value. */ + +# define CHECK_BOUNDS_BOTH(rBP, rLOW, rHIGH) \ + CHECK_BOUNDS_LOW(rBP, rLOW, rHIGH); \ + tdlge rBP, rHIGH + +/* Check bounds on a memory region of given length, with the side + effect that the BP register is converted to its simple pointer + value. */ + +# define CHECK_BOUNDS_BOTH_WIDE(rBP, rLOW, rHIGH, rLENGTH) \ + CHECK_BOUNDS_LOW (rBP, rLOW, rHIGH); \ + sub rHIGH, rHIGH, rLENGTH; \ + tdlgt rBP, rHIGH + +# define CHECK_BOUNDS_BOTH_WIDE_LIT(rBP, rLOW, rHIGH, LENGTH) \ + CHECK_BOUNDS_LOW (rBP, rLOW, rHIGH); \ + subi rHIGH, rHIGH, LENGTH; \ + tdlgt rBP, rHIGH + +/* Store a pointer value register into the return-value's pointer + value slot. */ + +# define STORE_RETURN_VALUE(rVALUE) \ + std rVALUE, oVALUE(rRTN) + +/* Store a low and high bounds into the return-value's pointer bounds + slots. */ + +# define STORE_RETURN_BOUNDS(rLOW, rHIGH) \ + std rLOW, oLOW(rRTN); \ + std rHIGH, oHIGH(rRTN) + +/* Stuff zero value/low/high into the BP addressed by rRTN. */ + +# define RETURN_NULL_BOUNDED_POINTER \ + li r4, 0; \ + STORE_RETURN_VALUE (r4); \ + STORE_RETURN_BOUNDS (r4, r4) + +#else + +# define DISCARD_BOUNDS(rBP) +# define CHECK_BOUNDS_LOW(rBP, rLOW, rHIGH) +# define CHECK_BOUNDS_HIGH(rVALUE, rHIGH, TWLcc) +# define CHECK_BOUNDS_HIGH_RTN(rVALUE, rHIGH, TWLcc) +# define CHECK_BOUNDS_BOTH(rBP, rLOW, rHIGH) +# define CHECK_BOUNDS_BOTH_WIDE(rBP, rLOW, rHIGH, rLENGTH) +# define CHECK_BOUNDS_BOTH_WIDE_LIT(rBP, rLOW, rHIGH, LENGTH) +# define STORE_RETURN_VALUE(rVALUE) +# define STORE_RETURN_BOUNDS(rLOW, rHIGH) + +# define RETURN_NULL_BOUNDED_POINTER li rRTN, 0 + +#endif + diff --git a/sysdeps/powerpc/powerpc64/bsd-_setjmp.S b/sysdeps/powerpc/powerpc64/bsd-_setjmp.S new file mode 100644 index 0000000..994f82f --- /dev/null +++ b/sysdeps/powerpc/powerpc64/bsd-_setjmp.S @@ -0,0 +1,21 @@ +/* BSD `_setjmp' entry point to `sigsetjmp (..., 0)'. PowerPC64 version. + Copyright (C) 2002 Free Software Foundation, Inc. + This file is part of the GNU C Library. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library; if not, write to the Free + Software Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA + 02111-1307 USA. */ + +/* This code was moved into setjmp.S to solve a double stub call problem. + @local would have worked but it is not supported in PowerPC64 asm. */ diff --git a/sysdeps/powerpc/powerpc64/bsd-setjmp.S b/sysdeps/powerpc/powerpc64/bsd-setjmp.S new file mode 100644 index 0000000..b6bb8f6 --- /dev/null +++ b/sysdeps/powerpc/powerpc64/bsd-setjmp.S @@ -0,0 +1,21 @@ +/* BSD `setjmp' entry point to `sigsetjmp (..., 1)'. PowerPC version. + Copyright (C) 2002 Free Software Foundation, Inc. + This file is part of the GNU C Library. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library; if not, write to the Free + Software Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA + 02111-1307 USA. */ + +/* This code was moved into setjmp.S to solve a double stub call problem. + @local would have worked but it is not supported in PowerPC64 asm. */ diff --git a/sysdeps/powerpc/powerpc64/dl-dtprocnum.h b/sysdeps/powerpc/powerpc64/dl-dtprocnum.h new file mode 100644 index 0000000..477cb44 --- /dev/null +++ b/sysdeps/powerpc/powerpc64/dl-dtprocnum.h @@ -0,0 +1,22 @@ +/* Configuration of lookup functions. PowerPC64 version. + Copyright (C) 2002 Free Software Foundation, Inc. + This file is part of the GNU C Library. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library; if not, write to the Free + Software Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA + 02111-1307 USA. */ + +/* Number of extra dynamic section entries for this architecture. By + default there are none. */ +#define DT_THISPROCNUM DT_PPC64_NUM diff --git a/sysdeps/powerpc/powerpc64/dl-lookupcfg.h b/sysdeps/powerpc/powerpc64/dl-lookupcfg.h new file mode 100644 index 0000000..e502941 --- /dev/null +++ b/sysdeps/powerpc/powerpc64/dl-lookupcfg.h @@ -0,0 +1,22 @@ +/* Configuration of lookup functions. PowerPC64 version. + Copyright (C) 2002 Free Software Foundation, Inc. + This file is part of the GNU C Library. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library; if not, write to the Free + Software Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA + 02111-1307 USA. */ + +/* Return the symbol map from the symbol lookup function. */ + +#define DL_LOOKUP_RETURNS_MAP 1 diff --git a/sysdeps/powerpc/powerpc64/dl-machine.c b/sysdeps/powerpc/powerpc64/dl-machine.c new file mode 100644 index 0000000..ef7b340 --- /dev/null +++ b/sysdeps/powerpc/powerpc64/dl-machine.c @@ -0,0 +1,50 @@ +/* Machine-dependent ELF dynamic relocation functions. PowerPC64 version. + Copyright (C) 1995,96,97,98,99,2000,01, 2002 Free Software Foundation, Inc. + This file is part of the GNU C Library. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Library General Public License as + published by the Free Software Foundation; either version 2 of the + License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Library General Public License for more details. + + You should have received a copy of the GNU Library General Public + License along with the GNU C Library; see the file COPYING.LIB. If not, + write to the Free Software Foundation, Inc., 59 Temple Place - Suite 330, + Boston, MA 02111-1307, USA. */ + +#include <string.h> +#include <unistd.h> +#include <ldsodefs.h> +#include <stdio-common/_itoa.h> +#include <dl-machine.h> + +void +_dl_reloc_overflow (struct link_map *map, + const char *name, + Elf64_Addr *const reloc_addr, + const Elf64_Sym *sym, + const Elf64_Sym *refsym) +{ + char buffer[128]; + char *t; + const Elf64_Sym *errsym = sym ?: refsym; + t = stpcpy (buffer, name); + t = stpcpy (t, " reloc at 0x"); + _itoa_word ((unsigned long) reloc_addr, t, 16, 0); + if (errsym) + { + const char *strtab; + + strtab = (const void *) D_PTR (map, l_info[DT_STRTAB]); + t = stpcpy (t, " for symbol `"); + t = stpcpy (t, strtab + errsym->st_name); + t = stpcpy (t, "'"); + } + t = stpcpy (t, " out of range"); + _dl_signal_error (0, map->l_name, NULL, buffer); +} diff --git a/sysdeps/powerpc/powerpc64/dl-machine.h b/sysdeps/powerpc/powerpc64/dl-machine.h new file mode 100644 index 0000000..4081616 --- /dev/null +++ b/sysdeps/powerpc/powerpc64/dl-machine.h @@ -0,0 +1,750 @@ +/* Machine-dependent ELF dynamic relocation inline functions. + PowerPC64 version. + Copyright 1995, 1996, 1997, 1998, 1999, 2000, 2001, 2002 + Free Software Foundation, Inc. + This file is part of the GNU C Library. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Library General Public License as + published by the Free Software Foundation; either version 2 of the + License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Library General Public License for more details. + + You should have received a copy of the GNU Library General Public + License along with the GNU C Library; see the file COPYING.LIB. If not, + write to the Free Software Foundation, Inc., 59 Temple Place - Suite 330, + Boston, MA 02111-1307, USA. */ + +#ifndef dl_machine_h +#define dl_machine_h + +#define ELF_MACHINE_NAME "powerpc64" + +#include <assert.h> +#include <sys/param.h> + +/* Translate a processor specific dynamic tag to the index + in l_info array. */ +#define DT_PPC64(x) (DT_PPC64_##x - DT_LOPROC + DT_NUM) + +/* A PowerPC64 function descriptor. The .plt (procedure linkage + table) and .opd (official procedure descriptor) sections are + arrays of these. */ +typedef struct +{ + Elf64_Addr fd_func; + Elf64_Addr fd_toc; + Elf64_Addr fd_aux; +} Elf64_FuncDesc; + +#define ELF_MULT_MACHINES_SUPPORTED + +/* Return nonzero iff ELF header is compatible with the running host. */ +static inline int +elf_machine_matches_host (const Elf64_Ehdr *ehdr) +{ + return ehdr->e_machine == EM_PPC64; +} + +/* Return nonzero iff ELF header is compatible with the running host, + but not this loader. */ +static inline int +elf_host_tolerates_machine (const Elf64_Ehdr *ehdr) +{ + return ehdr->e_machine == EM_PPC; +} + +/* Return nonzero iff ELF header is compatible with the running host, + but not this loader. */ +static inline int +elf_host_tolerates_class (const Elf64_Ehdr *ehdr) +{ + return ehdr->e_ident[EI_CLASS] == ELFCLASS32; +} + + +/* Return the run-time load address of the shared object, assuming it + was originally linked at zero. */ +static inline Elf64_Addr +elf_machine_load_address (void) __attribute__ ((const)); + +static inline Elf64_Addr +elf_machine_load_address (void) +{ + Elf64_Addr ret; + + /* The first entry in .got (and thus the first entry in .toc) is the + link-time TOC_base, ie. r2. So the difference between that and + the current r2 set by the kernel is how far the shared lib has + moved. */ + asm ( " ld %0,-32768(2)\n" + " subf %0,%0,2\n" + : "=r" (ret)); + return ret; +} + +/* Return the link-time address of _DYNAMIC. */ +static inline Elf64_Addr +elf_machine_dynamic (void) +{ + Elf64_Addr runtime_dynamic; + /* It's easier to get the run-time address. */ + asm ( " addis %0,2,_DYNAMIC@toc@ha\n" + " addi %0,%0,_DYNAMIC@toc@l\n" + : "=b" (runtime_dynamic)); + /* Then subtract off the load address offset. */ + return runtime_dynamic - elf_machine_load_address() ; +} + +#define ELF_MACHINE_BEFORE_RTLD_RELOC(dynamic_info) /* nothing */ + +/* The PLT uses Elf64_Rela relocs. */ +#define elf_machine_relplt elf_machine_rela + +/* This code gets called via a .glink stub which loads PLT0. It is + used in dl-runtime.c to call the `fixup' function and then redirect + to the address `fixup' returns. + + Enter with r0 = plt reloc index, + r2 = ld.so tocbase, + r11 = ld.so link map. */ + +#define TRAMPOLINE_TEMPLATE(tramp_name, fixup_name) \ + asm (".section \".text\"\n" \ +" .align 2\n" \ +" .globl ." #tramp_name "\n" \ +" .type ." #tramp_name ",@function\n" \ +" .section \".opd\",\"aw\"\n" \ +" .align 3\n" \ +" .globl " #tramp_name "\n" \ +" .size " #tramp_name ",24\n" \ +#tramp_name ":\n" \ +" .quad ." #tramp_name ",.TOC.@tocbase,0\n" \ +" .previous\n" \ +"." #tramp_name ":\n" \ +/* We need to save the registers used to pass parameters, ie. r3 thru \ + r10; the registers are saved in a stack frame. */ \ +" stdu 1,-128(1)\n" \ +" std 3,48(1)\n" \ +" mr 3,11\n" \ +" std 4,56(1)\n" \ +" sldi 4,0,1\n" \ +" std 5,64(1)\n" \ +" add 4,4,0\n" \ +" std 6,72(1)\n" \ +" sldi 4,4,3\n" \ +" std 7,80(1)\n" \ +" mflr 0\n" \ +" std 8,88(1)\n" \ +/* Store the LR in the LR Save area of the previous frame. */ \ +" std 0,128+16(1)\n" \ +" mfcr 0\n" \ +" std 9,96(1)\n" \ +" std 10,104(1)\n" \ +/* I'm almost certain we don't have to save cr... be safe. */ \ +" std 0,8(1)\n" \ +" bl ." #fixup_name "\n" \ +/* Put the registers back. */ \ +" ld 0,128+16(1)\n" \ +" ld 10,104(1)\n" \ +" ld 9,96(1)\n" \ +" ld 8,88(1)\n" \ +" ld 7,80(1)\n" \ +" mtlr 0\n" \ +" ld 0,8(1)\n" \ +" ld 6,72(1)\n" \ +" ld 5,64(1)\n" \ +" ld 4,56(1)\n" \ +" mtcrf 0xFF,0\n" \ +/* Load the target address, toc and static chain reg from the function \ + descriptor returned by fixup. */ \ +" ld 0,0(3)\n" \ +" ld 2,8(3)\n" \ +" mtctr 0\n" \ +" ld 11,16(3)\n" \ +" ld 3,48(1)\n" \ +/* Unwind the stack frame, and jump. */ \ +" addi 1,1,128\n" \ +" bctr\n" \ +".LT_" #tramp_name ":\n" \ +" .long 0\n" \ +" .byte 0x00,0x0c,0x24,0x40,0x00,0x00,0x00,0x00\n" \ +" .long .LT_" #tramp_name "-."#tramp_name "\n" \ +" .short .LT_" #tramp_name "_name_end-.LT_" #tramp_name "_name_start\n" \ +".LT_" #tramp_name "_name_start:\n" \ +" .ascii \"" #tramp_name "\"\n" \ +".LT_" #tramp_name "_name_end:\n" \ +" .align 2\n" \ +" .size ." #tramp_name ",. - ." #tramp_name "\n" \ +" .previous"); + +#ifndef PROF +#define ELF_MACHINE_RUNTIME_TRAMPOLINE \ + TRAMPOLINE_TEMPLATE (_dl_runtime_resolve, fixup); \ + TRAMPOLINE_TEMPLATE (_dl_profile_resolve, profile_fixup); +#else +#define ELF_MACHINE_RUNTIME_TRAMPOLINE \ + TRAMPOLINE_TEMPLATE (_dl_runtime_resolve, fixup); \ + void _dl_runtime_resolve (void); \ + strong_alias (_dl_runtime_resolve, _dl_profile_resolve); +#endif + + +/* Initial entry point code for the dynamic linker. The C function + `_dl_start' is the real entry point; its return value is the user + program's entry point. */ +#define RTLD_START \ + asm (".section \".text\"\n" \ +" .align 2\n" \ +" .globl ._start\n" \ +" .type ._start,@function\n" \ +" .section \".opd\",\"aw\"\n" \ +" .align 3\n" \ +" .globl _start\n" \ +" .size _start,24\n" \ +"_start:\n" \ +" .quad ._start,.TOC.@tocbase,0\n" \ +" .previous\n" \ +"._start:\n" \ +/* We start with the following on the stack, from top: \ + argc (4 bytes); \ + arguments for program (terminated by NULL); \ + environment variables (terminated by NULL); \ + arguments for the program loader. */ \ +" mr 3,1\n" \ +" li 4,0\n" \ +" stdu 4,-128(1)\n" \ +/* Call _dl_start with one parameter pointing at argc. */ \ +" bl ._dl_start\n" \ +" nop\n" \ +/* Transfer control to _dl_start_user! */ \ +" b ._dl_start_user\n" \ +".LT__start:\n" \ +" .long 0\n" \ +" .byte 0x00,0x0c,0x24,0x40,0x00,0x00,0x00,0x00\n" \ +" .long .LT__start-._start\n" \ +" .short .LT__start_name_end-.LT__start_name_start\n" \ +".LT__start_name_start:\n" \ +" .ascii \"_start\"\n" \ +".LT__start_name_end:\n" \ +" .align 2\n" \ +" .size ._start,.-._start\n" \ +" .globl _dl_start_user\n" \ +" .section \".opd\",\"aw\"\n" \ +"_dl_start_user:\n" \ +" .quad ._dl_start_user, .TOC.@tocbase, 0\n" \ +" .previous\n" \ +" .globl ._dl_start_user\n" \ +" .type ._dl_start_user,@function\n" \ +/* Now, we do our main work of calling initialisation procedures. \ + The ELF ABI doesn't say anything about parameters for these, \ + so we just pass argc, argv, and the environment. \ + Changing these is strongly discouraged (not least because argc is \ + passed by value!). */ \ +"._dl_start_user:\n" \ +/* the address of _start in r30. */ \ +" mr 30,3\n" \ +/* &_dl_argc in 29, &_dl_argv in 27, and _dl_loaded in 28. */ \ +" ld 28,_rtld_global@got(2)\n" \ +" ld 29,_dl_argc@got(2)\n" \ +" ld 27,_dl_argv@got(2)\n" \ +/* _dl_init (_dl_loaded, _dl_argc, _dl_argv, _dl_argv+_dl_argc+1). */ \ +" ld 3,0(28)\n" \ +" lwa 4,0(29)\n" \ +" ld 5,0(27)\n" \ +" sldi 6,4,3\n" \ +" add 6,5,6\n" \ +" addi 6,6,8\n" \ +" bl ._dl_init\n" \ +" nop\n" \ +/* Now, to conform to the ELF ABI, we have to: \ + Pass argc (actually _dl_argc) in r3; */ \ +" lwa 3,0(29)\n" \ +/* Pass argv (actually _dl_argv) in r4; */ \ +" ld 4,0(27)\n" \ +/* Pass argv+argc+1 in r5; */ \ +" sldi 5,3,3\n" \ +" add 6,4,5\n" \ +" addi 5,6,8\n" \ +/* Pass the auxilary vector in r6. This is passed to us just after \ + _envp. */ \ +"2: ldu 0,8(6)\n" \ +" cmpdi 0,0\n" \ +" bne 2b\n" \ +" addi 6,6,8\n" \ +/* Pass a termination function pointer (in this case _dl_fini) in \ + r7. */ \ +" ld 7,_dl_fini@got(2)\n" \ +" ld 26,_dl_starting_up@got(2)\n" \ +/* Pass the stack pointer in r1 (so far so good), pointing to a NULL \ + value. This lets our startup code distinguish between a program \ + linked statically, which linux will call with argc on top of the \ + stack which will hopefully never be zero, and a dynamically linked \ + program which will always have a NULL on the top of the stack. \ + Take the opportunity to clear LR, so anyone who accidentally \ + returns from _start gets SEGV. Also clear the next few words of \ + the stack. */ \ +" li 31,0\n" \ +" std 31,0(1)\n" \ +" mtlr 31\n" \ +" std 31,8(1)\n" \ +" std 31,16(1)\n" \ +" std 31,24(1)\n" \ +/* Clear _dl_starting_up. */ \ +" stw 31,0(26)\n" \ +/* Now, call the start function descriptor at r30... */ \ +" .globl ._dl_main_dispatch\n" \ +"._dl_main_dispatch:\n" \ +" ld 0,0(30)\n" \ +" ld 2,8(30)\n" \ +" mtctr 0\n" \ +" ld 11,16(30)\n" \ +" bctr\n" \ +".LT__dl_start_user:\n" \ +" .long 0\n" \ +" .byte 0x00,0x0c,0x24,0x40,0x00,0x00,0x00,0x00\n" \ +" .long .LT__dl_start_user-._dl_start_user\n" \ +" .short .LT__dl_start_user_name_end-.LT__dl_start_user_name_start\n" \ +".LT__dl_start_user_name_start:\n" \ +" .ascii \"_dl_start_user\"\n" \ +".LT__dl_start_user_name_end:\n" \ +" .align 2\n" \ +" .size ._dl_start_user,.-._dl_start_user\n" \ +" .previous"); + +/* Nonzero iff TYPE should not be allowed to resolve to one of + the main executable's symbols, as for a COPY reloc. */ +#define elf_machine_lookup_noexec_p(type) ((type) == R_PPC64_COPY) + +/* Nonzero iff TYPE describes relocation of a PLT entry, so + PLT entries should not be allowed to define the value. */ +#define elf_machine_lookup_noplt_p(type) ((type) == R_PPC64_JMP_SLOT) + +/* ELF_RTYPE_CLASS_PLT iff TYPE describes relocation of a PLT entry, so + PLT entries should not be allowed to define the value. + ELF_RTYPE_CLASS_NOCOPY iff TYPE should not be allowed to resolve to one + of the main executable's symbols, as for a COPY reloc. */ +#define elf_machine_type_class(type) \ + ((((type) == R_PPC64_ADDR24) * ELF_RTYPE_CLASS_PLT) \ + | (((type) == R_PPC64_COPY) * ELF_RTYPE_CLASS_COPY)) + +/* A reloc type used for ld.so cmdline arg lookups to reject PLT entries. */ +#define ELF_MACHINE_JMP_SLOT R_PPC64_JMP_SLOT + +/* The PowerPC never uses REL relocations. */ +#define ELF_MACHINE_NO_REL 1 + +/* Stuff for the PLT. */ +#define PLT_INITIAL_ENTRY_WORDS 3 +#define GLINK_INITIAL_ENTRY_WORDS 8 + +#define PPC_DCBST(where) asm volatile ("dcbst 0,%0" : : "r"(where) : "memory") +#define PPC_SYNC asm volatile ("sync" : : : "memory") +#define PPC_ISYNC asm volatile ("sync; isync" : : : "memory") +#define PPC_ICBI(where) asm volatile ("icbi 0,%0" : : "r"(where) : "memory") +#define PPC_DIE asm volatile ("tweq 0,0") +/* Use this when you've modified some code, but it won't be in the + instruction fetch queue (or when it doesn't matter if it is). */ +#define MODIFIED_CODE_NOQUEUE(where) \ + do { PPC_DCBST(where); PPC_SYNC; PPC_ICBI(where); } while (0) +/* Use this when it might be in the instruction queue. */ +#define MODIFIED_CODE(where) \ + do { PPC_DCBST(where); PPC_SYNC; PPC_ICBI(where); PPC_ISYNC; } while (0) + +/* Set up the loaded object described by MAP so its unrelocated PLT + entries will jump to the on-demand fixup code in dl-runtime.c. */ +static inline int +elf_machine_runtime_setup (struct link_map *map, int lazy, int profile) +{ + if (map->l_info[DT_JMPREL]) + { + Elf64_Word i; + Elf64_Word *glink = NULL; + Elf64_Xword *plt = (Elf64_Xword *) D_PTR (map, l_info[DT_PLTGOT]); + Elf64_Word num_plt_entries = (map->l_info[DT_PLTRELSZ]->d_un.d_val + / sizeof (Elf64_Rela)); + Elf64_Addr l_addr = map->l_addr; + Elf64_Dyn **info = map->l_info; + char *p; + + extern void _dl_runtime_resolve (void); + extern void _dl_profile_resolve (void); + + /* Relocate the DT_PPC64_GLINK entry in the _DYNAMIC section. + elf_get_dynamic_info takes care of the standard entries but + doesn't know exactly what to do with processor specific + entires. */ + if (info[DT_PPC64(GLINK)] != NULL) + info[DT_PPC64(GLINK)]->d_un.d_ptr += l_addr; + + if (lazy) + { + /* The function descriptor of the appropriate trampline + routine is used to set the 1st and 2nd doubleword of the + plt_reserve. */ + Elf64_FuncDesc *resolve_fd; + Elf64_Word glink_offset; + /* the plt_reserve area is the 1st 3 doublewords of the PLT */ + Elf64_FuncDesc *plt_reserve = (Elf64_FuncDesc *) plt; + Elf64_Word offset; + + resolve_fd = (Elf64_FuncDesc *) (profile ? _dl_profile_resolve + : _dl_runtime_resolve); + if (profile && _dl_name_match_p (GL(dl_profile), map)) + /* This is the object we are looking for. Say that we really + want profiling and the timers are started. */ + GL(dl_profile_map) = map; + + + /* We need to stuff the address/TOC of _dl_runtime_resolve + into doublewords 0 and 1 of plt_reserve. Then we need to + stuff the map address into doubleword 2 of plt_reserve. + This allows the GLINK0 code to transfer control to the + correct trampoline which will transfer control to fixup + in dl-machine.c. */ + plt_reserve->fd_func = resolve_fd->fd_func; + plt_reserve->fd_toc = resolve_fd->fd_toc; + plt_reserve->fd_aux = (Elf64_Addr) map; +#ifdef RTLD_BOOTSTRAP + /* When we're bootstrapping, the opd entry will not have + been relocated yet. */ + plt_reserve->fd_func += l_addr; + plt_reserve->fd_toc += l_addr; +#endif + + /* Set up the lazy PLT entries. */ + glink = (Elf64_Word *) D_PTR (map, l_info[DT_PPC64(GLINK)]); + offset = PLT_INITIAL_ENTRY_WORDS; + glink_offset = GLINK_INITIAL_ENTRY_WORDS; + for (i = 0; i < num_plt_entries; i++) + { + + plt[offset] = (Elf64_Xword) &glink[glink_offset]; + offset += 3; + /* The first 32k entries of glink can set an index and + branch using two instructions; Past that point, + glink uses three instructions. */ + if (i < 0x8000) + glink_offset += 2; + else + glink_offset += 3; + } + + /* Now, we've modified data. We need to write the changes from + the data cache to a second-level unified cache, then make + sure that stale data in the instruction cache is removed. + (In a multiprocessor system, the effect is more complex.) + Most of the PLT shouldn't be in the instruction cache, but + there may be a little overlap at the start and the end. + + Assumes that dcbst and icbi apply to lines of 16 bytes or + more. Current known line sizes are 16, 32, and 128 bytes. */ + + for (p = (char *) plt; p < (char *) &plt[offset]; p += 16) + PPC_DCBST (p); + PPC_SYNC; + } + } + return lazy; +} + +static inline void +elf_machine_lazy_rel (struct link_map *map, + Elf64_Addr l_addr, const Elf64_Rela *reloc) +{ + /* elf_machine_runtime_setup handles this. */ +} + +/* Change the PLT entry whose reloc is 'reloc' to call the actual + routine. */ +static inline Elf64_Addr +elf_machine_fixup_plt (struct link_map *map, lookup_t sym_map, + const Elf64_Rela *reloc, + Elf64_Addr *reloc_addr, Elf64_Addr finaladdr) +{ + Elf64_FuncDesc *plt = (Elf64_FuncDesc *) reloc_addr; + Elf64_FuncDesc *rel = (Elf64_FuncDesc *) finaladdr; + Elf64_Addr offset = 0; +#ifndef RTLD_BOOTSTRAP + weak_extern (GL(dl_rtld_map)); +#endif + + /* If sym_map is NULL, it's a weak undefined sym; Leave the plt zero. */ + if (sym_map == NULL) + return 0; + + /* If the opd entry is not yet relocated (because it's from a shared + object that hasn't been processed yet), then manually reloc it. */ + if (map != sym_map && !sym_map->l_relocated +#ifndef RTLD_BOOTSTRAP + /* Bootstrap map doesn't have l_relocated set for it. */ + && sym_map != &GL(dl_rtld_map) +#endif + ) + offset = sym_map->l_addr; + + /* For PPC64, fixup_plt copies the function descriptor from opd + over the corresponding PLT entry. + Initially, PLT Entry[i] is set up for lazy linking, or is zero. + For lazy linking, the fd_toc and fd_aux entries are irrelevant, + so for thread safety we write them before changing fd_func. */ + + plt->fd_aux = rel->fd_aux + offset; + plt->fd_toc = rel->fd_toc + offset; + PPC_DCBST (&plt->fd_aux); + PPC_DCBST (&plt->fd_toc); + PPC_SYNC; + + plt->fd_func = rel->fd_func + offset; + PPC_DCBST (&plt->fd_func); + PPC_SYNC; + + return finaladdr; +} + +/* Return the final value of a plt relocation. */ +static inline Elf64_Addr +elf_machine_plt_value (struct link_map *map, const Elf64_Rela *reloc, + Elf64_Addr value) +{ + return value + reloc->r_addend; +} + +#endif /* dl_machine_h */ + +#ifdef RESOLVE_MAP + +#define PPC_LO(v) ((v) & 0xffff) +#define PPC_HI(v) (((v) >> 16) & 0xffff) +#define PPC_HA(v) PPC_HI ((v) + 0x8000) +#define PPC_HIGHER(v) (((v) >> 32) & 0xffff) +#define PPC_HIGHERA(v) PPC_HIGHER ((v) + 0x8000) +#define PPC_HIGHEST(v) (((v) >> 48) & 0xffff) +#define PPC_HIGHESTA(v) PPC_HIGHEST ((v) + 0x8000) +#define BIT_INSERT(old, val, mask) ((old & ~(Elf64_Addr) mask) | (val & mask)) + +#define dont_expect(X) __builtin_expect ((X), 0) + +extern void _dl_reloc_overflow (struct link_map *map, + const char *name, + Elf64_Addr *const reloc_addr, + const Elf64_Sym *sym, + const Elf64_Sym *refsym) + attribute_hidden; + +static inline void +elf_machine_rela_relative (Elf64_Addr l_addr, const Elf64_Rela *reloc, + Elf64_Addr *const reloc_addr) +{ + *reloc_addr = l_addr + reloc->r_addend; +} + +/* Perform the relocation specified by RELOC and SYM (which is fully + resolved). MAP is the object containing the reloc. */ +static inline void +elf_machine_rela (struct link_map *map, + const Elf64_Rela *reloc, + const Elf64_Sym *sym, + const struct r_found_version *version, + Elf64_Addr *const reloc_addr) +{ + int r_type = ELF64_R_TYPE (reloc->r_info); + struct link_map *sym_map; + Elf64_Addr value; +#ifndef RTLD_BOOTSTRAP + const Elf64_Sym *const refsym = sym; + /* This is defined in rtld.c, but nowhere in the static libc.a; make the + reference weak so static programs can still link. This declaration + cannot be done when compiling rtld.c (i.e. #ifdef RTLD_BOOTSTRAP) + because rtld.c contains the common defn for _dl_rtld_map, which is + incompatible with a weak decl in the same file. */ + weak_extern (GL(dl_rtld_map)); +#endif + + if (r_type == R_PPC64_RELATIVE) + { +#ifndef RTLD_BOOTSTRAP + /* Already done in dynamic linker. */ + if (map != &GL(dl_rtld_map)) +#endif + *reloc_addr = map->l_addr + reloc->r_addend; + return; + } + + if (r_type == R_PPC64_NONE) + return; + + sym_map = RESOLVE_MAP (&sym, version, r_type); + value = 0; + if (sym_map) + { + if (sym) + value = sym_map->l_addr + sym->st_value; + value += reloc->r_addend; + } + + switch (r_type) + { + case R_PPC64_ADDR64: + case R_PPC64_GLOB_DAT: + *reloc_addr = value; + return; + + case R_PPC64_JMP_SLOT: + + elf_machine_fixup_plt (map, sym_map, reloc, reloc_addr, value); + return; + +#ifndef RTLD_BOOTSTRAP /* None of the following appear in ld.so */ + case R_PPC64_ADDR16_LO_DS: + if (dont_expect ((value & 3) != 0)) + _dl_reloc_overflow (map, "R_PPC64_ADDR16_LO_DS", + reloc_addr, sym, refsym); + *(Elf64_Half *) reloc_addr = BIT_INSERT (*(Elf64_Half *) reloc_addr, + value, 0xfffc); + break; + + case R_PPC64_ADDR16_LO: + *(Elf64_Half *) reloc_addr = PPC_LO (value); + break; + + case R_PPC64_ADDR16_HI: + *(Elf64_Half *) reloc_addr = PPC_HI (value); + break; + + case R_PPC64_ADDR16_HA: + *(Elf64_Half *) reloc_addr = PPC_HA (value); + break; + + case R_PPC64_REL24: + { + Elf64_Addr delta = value - (Elf64_Xword) reloc_addr; + if (dont_expect ((delta + 0x2000000) >= 0x4000000 || (delta & 3) != 0)) + _dl_reloc_overflow (map, "R_PPC64_REL24", reloc_addr, sym, refsym); + *(Elf64_Word *) reloc_addr = BIT_INSERT (*(Elf64_Word *) reloc_addr, + delta, 0x3fffffc); + } + break; + + case R_PPC64_COPY: + if (dont_expect (sym == NULL)) + /* This can happen in trace mode when an object could not be found. */ + return; + if (dont_expect (sym->st_size > refsym->st_size + || (GL(dl_verbose) && sym->st_size < refsym->st_size))) + { + const char *strtab; + + strtab = (const void *) D_PTR (map, l_info[DT_STRTAB]); + _dl_error_printf ("%s: Symbol `%s' has different size" \ + " in shared object," \ + " consider re-linking\n", + _dl_argv[0] ?: "<program name unknown>", + strtab + refsym->st_name); + } + memcpy (reloc_addr, (char *) value, MIN (sym->st_size, refsym->st_size)); + return; + + case R_PPC64_UADDR64: + /* We are big-endian. */ + ((char *) reloc_addr)[0] = (value >> 56) & 0xff; + ((char *) reloc_addr)[1] = (value >> 48) & 0xff; + ((char *) reloc_addr)[2] = (value >> 40) & 0xff; + ((char *) reloc_addr)[3] = (value >> 32) & 0xff; + ((char *) reloc_addr)[4] = (value >> 24) & 0xff; + ((char *) reloc_addr)[5] = (value >> 16) & 0xff; + ((char *) reloc_addr)[6] = (value >> 8) & 0xff; + ((char *) reloc_addr)[7] = (value >> 0) & 0xff; + return; + + case R_PPC64_UADDR32: + /* We are big-endian. */ + ((char *) reloc_addr)[0] = (value >> 24) & 0xff; + ((char *) reloc_addr)[1] = (value >> 16) & 0xff; + ((char *) reloc_addr)[2] = (value >> 8) & 0xff; + ((char *) reloc_addr)[3] = (value >> 0) & 0xff; + return; + + case R_PPC64_ADDR24: + if (dont_expect ((value + 0x2000000) >= 0x4000000 || (value & 3) != 0)) + _dl_reloc_overflow (map, "R_PPC64_ADDR24", reloc_addr, sym, refsym); + *(Elf64_Word *) reloc_addr = BIT_INSERT (*(Elf64_Word *) reloc_addr, + value, 0x3fffffc); + break; + + case R_PPC64_ADDR16: + if (dont_expect ((value + 0x8000) >= 0x10000)) + _dl_reloc_overflow (map, "R_PPC64_ADDR16", reloc_addr, sym, refsym); + *(Elf64_Half *) reloc_addr = value; + break; + + case R_PPC64_UADDR16: + if (dont_expect ((value + 0x8000) >= 0x10000)) + _dl_reloc_overflow (map, "R_PPC64_UADDR16", reloc_addr, sym, refsym); + /* We are big-endian. */ + ((char *) reloc_addr)[0] = (value >> 8) & 0xff; + ((char *) reloc_addr)[1] = (value >> 0) & 0xff; + break; + + case R_PPC64_ADDR16_DS: + if (dont_expect ((value + 0x8000) >= 0x10000 || (value & 3) != 0)) + _dl_reloc_overflow (map, "R_PPC64_ADDR16_DS", reloc_addr, sym, refsym); + *(Elf64_Half *) reloc_addr = BIT_INSERT (*(Elf64_Half *) reloc_addr, + value, 0xfffc); + break; + + case R_PPC64_ADDR16_HIGHER: + *(Elf64_Half *) reloc_addr = PPC_HIGHER (value); + break; + + case R_PPC64_ADDR16_HIGHEST: + *(Elf64_Half *) reloc_addr = PPC_HIGHEST (value); + break; + + case R_PPC64_ADDR16_HIGHERA: + *(Elf64_Half *) reloc_addr = PPC_HIGHERA (value); + break; + + case R_PPC64_ADDR16_HIGHESTA: + *(Elf64_Half *) reloc_addr = PPC_HIGHESTA (value); + break; + + case R_PPC64_ADDR14: + case R_PPC64_ADDR14_BRTAKEN: + case R_PPC64_ADDR14_BRNTAKEN: + { + Elf64_Word insn; + if (dont_expect ((value + 0x8000) >= 0x10000 || (value & 3) != 0)) + _dl_reloc_overflow (map, "R_PPC64_ADDR14", reloc_addr, sym, refsym); + insn = BIT_INSERT (*(Elf64_Word *) reloc_addr, value, 0xfffc); + if (r_type != R_PPC64_ADDR14) + { + insn &= ~(1 << 21); + if (r_type == R_PPC64_ADDR14_BRTAKEN) + insn |= 1 << 21; + if ((insn & (0x14 << 21)) == (0x04 << 21)) + insn |= 0x02 << 21; + else if ((insn & (0x14 << 21)) == (0x10 << 21)) + insn |= 0x08 << 21; + } + *(Elf64_Word *) reloc_addr = insn; + } + break; + + case R_PPC64_REL32: + *(Elf64_Word *) reloc_addr = value - (Elf64_Xword) reloc_addr; + return; +#endif /* !RTLD_BOOTSTRAP */ + + default: + _dl_reloc_bad_type (map, r_type, 0); + return; + } + MODIFIED_CODE_NOQUEUE (reloc_addr); +} + +#endif /* RESOLVE */ diff --git a/sysdeps/powerpc/powerpc64/elf/bzero.S b/sysdeps/powerpc/powerpc64/elf/bzero.S new file mode 100644 index 0000000..f899bd2 --- /dev/null +++ b/sysdeps/powerpc/powerpc64/elf/bzero.S @@ -0,0 +1,21 @@ +/* Optimized bzero `implementation' for PowerPC64. + Copyright (C) 1997, 1999, 2000, 2002 Free Software Foundation, Inc. + This file is part of the GNU C Library. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library; if not, write to the Free + Software Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA + 02111-1307 USA. */ + +/* This code was moved into memset.S to solve a double stub call problem. + @local would have worked but it is not supported in PowerPC64 asm. */ diff --git a/sysdeps/powerpc/powerpc64/elf/start.S b/sysdeps/powerpc/powerpc64/elf/start.S new file mode 100644 index 0000000..129f126 --- /dev/null +++ b/sysdeps/powerpc/powerpc64/elf/start.S @@ -0,0 +1,66 @@ +/* Startup code for programs linked with GNU libc. PowerPC64 version. + Copyright (C) 1998, 1999, 2000, 2001, 2002 Free Software Foundation, Inc. + This file is part of the GNU C Library. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library; if not, write to the Free + Software Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA + 02111-1307 USA. */ + +#include <sysdep.h> +#include "bp-sym.h" + + /* These are the various addresses we require. */ + .section ".rodata" + .align 3 + weak_extern(_init) + weak_extern(_fini) + weak_extern(._init) + weak_extern(._fini) +L(start_addresses): + .quad 0 /* was _SDA_BASE_ but not in 64-bit ABI*/ +/* function descriptors so don't need JUMPTARGET */ + .quad BP_SYM(main) + .quad _init + .quad _fini + + ASM_SIZE_DIRECTIVE(L(start_addresses)) + + .section ".toc","aw" +.L01: + .tc L(start_addresses)[TC],L(start_addresses) + .section ".text" +ENTRY(_start) + /* Save the stack pointer, in case we're statically linked under Linux. */ + mr r9,r1 + /* Set up an initial stack frame, and clear the LR. */ + clrrdi r1,r1,4 + li r0,0 + stdu r1,-128(r1) + mtlr r0 + std r0,0(r1) + + /* put the address of start_addresses in r8... ** +** PPC64 ABI uses R13 for thread local, so we leave it alone */ + ld r8,.L01(r2) + + /* and continue in libc-start, in glibc. */ + b JUMPTARGET(BP_SYM(__libc_start_main)) + +END(_start) + +/* Define a symbol for the first piece of initialized data. */ + .section ".data" + .globl __data_start +__data_start: +weak_alias (__data_start, data_start) diff --git a/sysdeps/powerpc/powerpc64/fpu/s_copysign.S b/sysdeps/powerpc/powerpc64/fpu/s_copysign.S new file mode 100644 index 0000000..40fd83d --- /dev/null +++ b/sysdeps/powerpc/powerpc64/fpu/s_copysign.S @@ -0,0 +1,50 @@ +/* Copy a sign bit between floating-point values. PowerPC64 version. + Copyright (C) 1997, 1999, 2000, 2002 Free Software Foundation, Inc. + This file is part of the GNU C Library. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library; if not, write to the Free + Software Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA + 02111-1307 USA. */ + +/* This has been coded in assembler because GCC makes such a mess of it + when it's coded in C. */ + +#include <sysdep.h> + +ENTRY(__copysign) +/* double [f1] copysign (double [f1] x, double [f2] y); + copysign(x,y) returns a value with the magnitude of x and + with the sign bit of y. */ + stdu r1,-48(r1) + stfd fp2,24(r1) + ld r3,24(r1) + cmpdi r3,0 + addi r1,r1,48 + blt L(0) + fabs fp1,fp1 + blr +L(0): fnabs fp1,fp1 + blr + END (__copysign) + +weak_alias(__copysign,copysign) + +/* It turns out that it's safe to use this code even for single-precision. */ +weak_alias(__copysign,copysignf) +strong_alias(__copysign,__copysignf) + +#ifdef NO_LONG_DOUBLE +weak_alias(__copysign,copysignl) +strong_alias(__copysign,__copysignl) +#endif diff --git a/sysdeps/powerpc/powerpc64/fpu/s_copysignf.S b/sysdeps/powerpc/powerpc64/fpu/s_copysignf.S new file mode 100644 index 0000000..e05438a --- /dev/null +++ b/sysdeps/powerpc/powerpc64/fpu/s_copysignf.S @@ -0,0 +1 @@ +/* __copysignf is in s_copysign.S */ diff --git a/sysdeps/powerpc/powerpc64/memset.S b/sysdeps/powerpc/powerpc64/memset.S new file mode 100644 index 0000000..4bfe20d --- /dev/null +++ b/sysdeps/powerpc/powerpc64/memset.S @@ -0,0 +1,296 @@ +/* Optimized memset implementation for PowerPC64. + Copyright (C) 1997, 1999, 2000, 2002 Free Software Foundation, Inc. + This file is part of the GNU C Library. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library; if not, write to the Free + Software Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA + 02111-1307 USA. */ + +#include <sysdep.h> +#include <bp-sym.h> +#include <bp-asm.h> + +/* Define a global static that can hold the cache line size. The + assumption is that startup code will access the "aux vector" to + to obtain the value set by the kernel and store it into this + variable. */ + .globl __cache_line_size + .section ".data" + .align 2 + .type __cache_line_size,@object + .size __cache_line_size,4 +__cache_line_size: + .long 0 + .section ".toc","aw" +.LC0: + .tc __cache_line_size[TC],__cache_line_size + .section ".text" + .align 2 + +/* __ptr_t [r3] memset (__ptr_t s [r3], int c [r4], size_t n [r5])); + Returns 's'. + + The memset is done in three sizes: byte (8 bits), word (32 bits), + cache line (256 bits). There is a special case for setting cache lines + to 0, to take advantage of the dcbz instruction. */ + +EALIGN (BP_SYM (memset), 5, 0) + +#define rTMP r0 +#define rRTN r3 /* Initial value of 1st argument. */ +#if __BOUNDED_POINTERS__ +# define rMEMP0 r4 /* Original value of 1st arg. */ +# define rCHR r5 /* Char to set in each byte. */ +# define rLEN r6 /* Length of region to set. */ +# define rMEMP r10 /* Address at which we are storing. */ +#else +# define rMEMP0 r3 /* Original value of 1st arg. */ +# define rCHR r4 /* Char to set in each byte. */ +# define rLEN r5 /* Length of region to set. */ +# define rMEMP r6 /* Address at which we are storing. */ +#endif +#define rALIGN r7 /* Number of bytes we are setting now (when aligning). */ +#define rMEMP2 r8 + +#define rNEG64 r8 /* Constant -64 for clearing with dcbz. */ +#define rCLS r8 /* Cache line size obtained from static. */ +#define rCLM r9 /* Cache line size mask to check for cache alignment. */ + +___memset: +#if __BOUNDED_POINTERS__ + cmpldi cr1, rRTN, 0 + CHECK_BOUNDS_BOTH_WIDE (rMEMP0, rTMP, rTMP2, rLEN) + beq cr1, L(b0) + STORE_RETURN_VALUE (rMEMP0) + STORE_RETURN_BOUNDS (rTMP, rTMP2) +L(b0): +#endif +/* Take care of case for size <= 4. */ + cmpldi cr1, rLEN, 8 + andi. rALIGN, rMEMP0, 7 + mr rMEMP, rMEMP0 + ble- cr1, L(small) + +/* Align to doubleword boundary. */ + cmpldi cr5, rLEN, 31 + rlwimi rCHR, rCHR, 8, 16, 23 /* Replicate byte to halfword. */ + beq+ L(aligned2) + mtcrf 0x01, rMEMP0 + subfic rALIGN, rALIGN, 8 + cror 28,30,31 /* Detect odd word aligned. */ + add rMEMP, rMEMP, rALIGN + sub rLEN, rLEN, rALIGN + rlwimi rCHR, rCHR, 16, 0, 15 /* Replicate halfword to word. */ + bt 29, L(g4) +/* Process the even word of doubleword. */ + bf+ 31, L(g2) + stb rCHR, 0(rMEMP0) + bt 30, L(g4x) +L(g2): + sth rCHR, -6(rMEMP) +L(g4x): + stw rCHR, -4(rMEMP) + b L(aligned) +/* Process the odd word of doubleword. */ +L(g4): + bf 28, L(g4x) /* If false, word aligned on odd word. */ + bf+ 31, L(g0) + stb rCHR, 0(rMEMP0) + bt 30, L(aligned) +L(g0): + sth rCHR, -2(rMEMP) + +/* Handle the case of size < 31. */ +L(aligned2): + rlwimi rCHR, rCHR, 16, 0, 15 /* Replicate halfword to word. */ +L(aligned): + mtcrf 0x01, rLEN + ble cr5, L(medium) +/* Align to 32-byte boundary. */ + andi. rALIGN, rMEMP, 0x18 + subfic rALIGN, rALIGN, 0x20 + insrdi rCHR,rCHR,32,0 /* Replicate word to double word. */ + beq L(caligned) + mtcrf 0x01, rALIGN + add rMEMP, rMEMP, rALIGN + sub rLEN, rLEN, rALIGN + cmplwi cr1, rALIGN, 0x10 + mr rMEMP2, rMEMP + bf 28, L(a1) + stdu rCHR, -8(rMEMP2) +L(a1): blt cr1, L(a2) + std rCHR, -8(rMEMP2) + stdu rCHR, -16(rMEMP2) +L(a2): + +/* Now aligned to a 32 byte boundary. */ +L(caligned): + cmpldi cr1, rCHR, 0 + clrrdi. rALIGN, rLEN, 5 + mtcrf 0x01, rLEN + beq cr1, L(zloopstart) /* Special case for clearing memory using dcbz. */ +L(nondcbz): + srdi rTMP, rALIGN, 5 + mtctr rTMP + beq L(medium) /* We may not actually get to do a full line. */ + clrldi. rLEN, rLEN, 59 + add rMEMP, rMEMP, rALIGN + li rNEG64, -0x40 + bdz L(cloopdone) + +L(c3): dcbtst rNEG64, rMEMP + std rCHR, -8(rMEMP) + std rCHR, -16(rMEMP) + std rCHR, -24(rMEMP) + stdu rCHR, -32(rMEMP) + bdnz L(c3) +L(cloopdone): + std rCHR, -8(rMEMP) + std rCHR, -16(rMEMP) + cmpldi cr1, rLEN, 16 + std rCHR, -24(rMEMP) + stdu rCHR, -32(rMEMP) + beqlr + add rMEMP, rMEMP, rALIGN + b L(medium_tail2) + + .align 5 +/* Clear lines of memory in 128-byte chunks. */ +L(zloopstart): +/* If the remaining length is less the 32 bytes, don't bother getting + the cache line size. */ + beq L(medium) + ld rCLS,.LC0@toc(r2) + lwz rCLS,0(rCLS) +/* If the cache line size was not set just goto to L(nondcbz) which is + safe for any cache line size. */ + cmpldi cr1,rCLS,0 + beq cr1,L(nondcbz) + + +/* Now we know the cache line size, and it is not 32-bytes, but + we may not yet be aligned to the cache line. May have a partial + line to fill, so touch it 1st. */ + dcbt 0,rMEMP + addi rCLM,rCLS,-1 +L(getCacheAligned): + cmpldi cr1,rLEN,32 + and. rTMP,rCLM,rMEMP + blt cr1,L(handletail32) + beq L(cacheAligned) + addi rMEMP,rMEMP,32 + addi rLEN,rLEN,-32 + std rCHR,-32(rMEMP) + std rCHR,-24(rMEMP) + std rCHR,-16(rMEMP) + std rCHR,-8(rMEMP) + b L(getCacheAligned) + +/* Now we are aligned to the cache line and can use dcbz. */ +L(cacheAligned): + cmpld cr1,rLEN,rCLS + blt cr1,L(handletail32) + dcbz 0,rMEMP + subf rLEN,rCLS,rLEN + add rMEMP,rMEMP,rCLS + b L(cacheAligned) + +/* We are here because the cache line size was set and was not 32-bytes + and the remainder (rLEN) is less than the actual cache line size. + So set up the preconditions for L(nondcbz) and go there. */ +L(handletail32): + clrrwi. rALIGN, rLEN, 5 + b L(nondcbz) + + .align 5 +L(small): +/* Memset of 8 bytes or less. */ + cmpldi cr6, rLEN, 4 + cmpldi cr5, rLEN, 1 + ble cr6,L(le4) + subi rLEN, rLEN, 4 + stb rCHR,0(rMEMP) + stb rCHR,1(rMEMP) + stb rCHR,2(rMEMP) + stb rCHR,3(rMEMP) + addi rMEMP,rMEMP, 4 + cmpldi cr5, rLEN, 1 +L(le4): + cmpldi cr1, rLEN, 3 + bltlr cr5 + stb rCHR, 0(rMEMP) + beqlr cr5 + stb rCHR, 1(rMEMP) + bltlr cr1 + stb rCHR, 2(rMEMP) + beqlr cr1 + stb rCHR, 3(rMEMP) + blr + +/* Memset of 0-31 bytes. */ + .align 5 +L(medium): + insrdi rCHR,rCHR,32,0 /* Replicate word to double word. */ + cmpldi cr1, rLEN, 16 +L(medium_tail2): + add rMEMP, rMEMP, rLEN +L(medium_tail): + bt- 31, L(medium_31t) + bt- 30, L(medium_30t) +L(medium_30f): + bt- 29, L(medium_29t) +L(medium_29f): + bge- cr1, L(medium_27t) + bflr- 28 + std rCHR, -8(rMEMP) + blr + +L(medium_31t): + stbu rCHR, -1(rMEMP) + bf- 30, L(medium_30f) +L(medium_30t): + sthu rCHR, -2(rMEMP) + bf- 29, L(medium_29f) +L(medium_29t): + stwu rCHR, -4(rMEMP) + blt- cr1, L(medium_27f) +L(medium_27t): + std rCHR, -8(rMEMP) + stdu rCHR, -16(rMEMP) +L(medium_27f): + bflr- 28 +L(medium_28t): + std rCHR, -8(rMEMP) + blr +END_GEN_TB (BP_SYM (memset),TB_TOCLESS) + +/* Copied from bzero.S to prevent the linker from inserting a stub + between bzero and memset. */ +ENTRY (BP_SYM (__bzero)) +#if __BOUNDED_POINTERS__ + mr r6,r4 + li r5,0 + mr r4,r3 + /* Tell memset that we don't want a return value. */ + li r3,0 + b ___memset +#else + mr r5,r4 + li r4,0 + b ___memset +#endif +END_GEN_TB (BP_SYM (__bzero),TB_TOCLESS) + +weak_alias (BP_SYM (__bzero), BP_SYM (bzero)) + diff --git a/sysdeps/powerpc/powerpc64/ppc-mcount.S b/sysdeps/powerpc/powerpc64/ppc-mcount.S new file mode 100644 index 0000000..eaa586a --- /dev/null +++ b/sysdeps/powerpc/powerpc64/ppc-mcount.S @@ -0,0 +1,43 @@ +/* PowerPC64-specific implementation of profiling support. + Copyright (C) 1997, 1999, 2002 Free Software Foundation, Inc. + This file is part of the GNU C Library. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library; if not, write to the Free + Software Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA + 02111-1307 USA. */ + +/* This would be bad. */ +#ifdef PROF +#undef PROF +#endif + +#include <sysdep.h> +/* We don't need to save the parameter-passing registers as gcc takes + care of that for us. Thus this function looks fairly normal. + In fact, the generic code would work for us. */ + +ENTRY(_mcount) + mflr r4 + ld r11, 0(r1) + stdu r1,-112(r1) + std r4, 128(r1) + ld r3, 16(r11) + bl JUMPTARGET(__mcount_internal) + nop + ld r0, 128(r1) + mtlr r0 + addi r1,r1,112 + blr +END(_mcount) + diff --git a/sysdeps/powerpc/powerpc64/register-dump.h b/sysdeps/powerpc/powerpc64/register-dump.h new file mode 100644 index 0000000..dd69af3 --- /dev/null +++ b/sysdeps/powerpc/powerpc64/register-dump.h @@ -0,0 +1,125 @@ +/* Dump registers. + Copyright (C) 1998, 2002 Free Software Foundation, Inc. + This file is part of the GNU C Library. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library; if not, write to the Free + Software Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA + 02111-1307 USA. */ + +#include <sys/uio.h> +#include <stdio-common/_itoa.h> + +/* This prints out the information in the following form: */ +static const char dumpform[] = "\ +Register dump:\n\ +sr0=000000000000020% sr1=000000000000021% dar=000000000000029% dsi=000002a%\n\ +lr=000000000000024% ctr=000000000000023% gr3*=000000000000022% trap=0000028%\n\ +ccr=0000026% xer=0000025%\n\ +gr0-3: 000000000000000% 000000000000001% 000000000000002% 000000000000003%\n\ +gr4-7: 000000000000004% 000000000000005% 000000000000006% 000000000000007%\n\ +gr8-11: 000000000000008% 000000000000009% 00000000000000a% 00000000000000b%\n\ +gr12-15: 00000000000000c% 00000000000000d% 00000000000000e% 00000000000000f%\n\ +gr16-19: 000000000000010% 000000000000011% 000000000000012% 000000000000013%\n\ +gr20-23: 000000000000014% 000000000000015% 000000000000016% 000000000000017%\n\ +gr24-27: 000000000000018% 000000000000019% 00000000000001a% 00000000000001b%\n\ +gr28-31: 00000000000001c% 00000000000001d% 00000000000001e% 00000000000001f%\n\ +fscr=0000071%\n\ +fp0-3: 000000000000030% 000000000000031% 000000000000032% 000000000000033%\n\ +fp4-7: 000000000000034% 000000000000035% 000000000000036% 000000000000037%\n\ +fp8-11: 000000000000038% 000000000000038% 00000000000003a% 00000000000003b%\n\ +fp12-15: 00000000000003c% 00000000000003d% 00000000000003e% 00000000000003f%\n\ +fp16-19: 000000000000040% 000000000000041% 000000000000042% 000000000000043%\n\ +fp20-23: 000000000000044% 000000000000045% 000000000000046% 000000000000047%\n\ +fp24-27: 000000000000048% 000000000000049% 00000000000004a% 00000000000004b%\n\ +fp28-31: 00000000000004c% 00000000000004d% 00000000000004e% 00000000000004f%\n\ +"; + +/* Most of the fields are self-explanatory. 'sr0' is the next + instruction to execute, from SRR0, which may have some relationship + with the instruction that caused the exception. 'r3*' is the value + that will be returned in register 3 when the current system call + returns. 'sr1' is SRR1, bits 16-31 of which are copied from the MSR: + + 16 - External interrupt enable + 17 - Privilege level (1=user, 0=supervisor) + 18 - FP available + 19 - Machine check enable (if clear, processor locks up on machine check) + 20 - FP exception mode bit 0 (FP exceptions recoverable) + 21 - Single-step trace enable + 22 - Branch trace enable + 23 - FP exception mode bit 1 + 25 - exception prefix (if set, exceptions are taken from 0xFFFnnnnn, + otherwise from 0x000nnnnn). + 26 - Instruction address translation enabled. + 27 - Data address translation enabled. + 30 - Exception is recoverable (otherwise, don't try to return). + 31 - Little-endian mode enable. + + 'Trap' is the address of the exception: + + 00200 - Machine check exception (memory parity error, for instance) + 00300 - Data access exception (memory not mapped, see dsisr for why) + 00400 - Instruction access exception (memory not mapped) + 00500 - External interrupt + 00600 - Alignment exception (see dsisr for more information) + 00700 - Program exception (illegal/trap instruction, FP exception) + 00800 - FP unavailable (should not be seen by user code) + 00900 - Decrementer exception (for instance, SIGALRM) + 00A00 - I/O controller interface exception + 00C00 - System call exception (for instance, kill(3)). + 00E00 - FP assist exception (optional FP instructions, etc.) + + 'dar' is the memory location, for traps 00300, 00400, 00600, 00A00. + 'dsisr' has the following bits under trap 00300: + 0 - direct-store error exception + 1 - no page table entry for page + 4 - memory access not permitted + 5 - trying to access I/O controller space or using lwarx/stwcx on + non-write-cached memory + 6 - access was store + 9 - data access breakpoint hit + 10 - segment table search failed to find translation (64-bit ppcs only) + 11 - I/O controller instruction not permitted + For trap 00400, the same bits are set in SRR1 instead. + For trap 00600, bits 12-31 of the DSISR set to allow emulation of + the instruction without actually having to read it from memory. +*/ + +#define xtoi(x) (x >= 'a' ? x + 10 - 'a' : x - '0') + +static void +register_dump (int fd, struct sigcontext *ctx) +{ + char buffer[sizeof(dumpform)]; + char *bufferpos; + unsigned regno; + unsigned *regs = (unsigned *)(ctx->regs); + + memcpy(buffer, dumpform, sizeof(dumpform)); + + /* Generate the output. */ + while ((bufferpos = memchr (buffer, '%', sizeof(dumpform)))) + { + regno = xtoi (bufferpos[-1]) | xtoi (bufferpos[-2]) << 4; + memset (bufferpos-2, '0', 3); + _itoa_word (regs[regno], bufferpos+1, 16, 0); + } + + /* Write the output. */ + write (fd, buffer, sizeof(buffer)); +} + + +#define REGISTER_DUMP \ + register_dump (fd, ctx) diff --git a/sysdeps/powerpc/powerpc64/setjmp.S b/sysdeps/powerpc/powerpc64/setjmp.S new file mode 100644 index 0000000..35d2a27 --- /dev/null +++ b/sysdeps/powerpc/powerpc64/setjmp.S @@ -0,0 +1,87 @@ +/* setjmp for PowerPC64. + Copyright (C) 1995, 1996, 1997, 1999, 2000, 2001, 2002 + Free Software Foundation, Inc. + This file is part of the GNU C Library. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library; if not, write to the Free + Software Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA + 02111-1307 USA. */ + +#include <sysdep.h> +#define _ASM +#define _SETJMP_H +#include <bits/setjmp.h> +#include <bp-sym.h> +#include <bp-asm.h> + +ENTRY (BP_SYM (__sigsetjmp)) + CHECK_BOUNDS_BOTH_WIDE_LIT (r3, r8, r9, JB_SIZE) +___sigsetjmp: + std r1,(JB_GPR1*8)(3) + mflr r0 + std r2,(JB_GPR2*8)(3) + std r14,((JB_GPRS+0)*8)(3) + stfd fp14,((JB_FPRS+0)*8)(3) + std r0,(JB_LR*8)(3) + std r15,((JB_GPRS+1)*8)(3) + stfd fp15,((JB_FPRS+1)*8)(3) + mfcr r0 + std r16,((JB_GPRS+2)*8)(3) + stfd fp16,((JB_FPRS+2)*8)(3) + std r0,(JB_CR*8)(3) + std r17,((JB_GPRS+3)*8)(3) + stfd fp17,((JB_FPRS+3)*8)(3) + std r18,((JB_GPRS+8)*8)(3) + stfd fp18,((JB_FPRS+4)*8)(3) + std r19,((JB_GPRS+5)*8)(3) + stfd fp19,((JB_FPRS+5)*8)(3) + std r20,((JB_GPRS+6)*8)(3) + stfd fp20,((JB_FPRS+6)*8)(3) + std r21,((JB_GPRS+7)*8)(3) + stfd fp21,((JB_FPRS+7)*8)(3) + std r22,((JB_GPRS+8)*8)(3) + stfd fp22,((JB_FPRS+8)*8)(3) + std r23,((JB_GPRS+9)*8)(3) + stfd fp23,((JB_FPRS+9)*8)(3) + std r24,((JB_GPRS+10)*8)(3) + stfd fp24,((JB_FPRS+10)*8)(3) + std r25,((JB_GPRS+11)*8)(3) + stfd fp25,((JB_FPRS+11)*8)(3) + std r26,((JB_GPRS+12)*8)(3) + stfd fp26,((JB_FPRS+12)*8)(3) + std r27,((JB_GPRS+13)*8)(3) + stfd fp27,((JB_FPRS+13)*8)(3) + std r28,((JB_GPRS+14)*8)(3) + stfd fp28,((JB_FPRS+14)*8)(3) + std r29,((JB_GPRS+15)*8)(3) + stfd fp29,((JB_FPRS+15)*8)(3) + std r30,((JB_GPRS+16)*8)(3) + stfd fp30,((JB_FPRS+16)*8)(3) + std r31,((JB_GPRS+17)*8)(3) + stfd fp31,((JB_FPRS+17)*8)(3) + b JUMPTARGET (BP_SYM (__sigjmp_save)) +END (BP_SYM (__sigsetjmp)) + +ENTRY (BP_SYM (_setjmp)) + li r4,0 /* Set second argument to 0. */ + b ___sigsetjmp +END (BP_SYM (_setjmp)) + +ENTRY (BP_SYM (__setjmp)) + li r4,1 /* Set second argument to 1. */ + b ___sigsetjmp +END (BP_SYM (__setjmp)) + +strong_alias (__setjmp, setjmp) + diff --git a/sysdeps/powerpc/powerpc64/stpcpy.S b/sysdeps/powerpc/powerpc64/stpcpy.S new file mode 100644 index 0000000..c842111 --- /dev/null +++ b/sysdeps/powerpc/powerpc64/stpcpy.S @@ -0,0 +1,121 @@ +/* Optimized stpcpy implementation for PowerPC64. + Copyright (C) 1997, 1999, 2000, 2002 Free Software Foundation, Inc. + This file is part of the GNU C Library. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library; if not, write to the Free + Software Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA + 02111-1307 USA. */ + +#include <sysdep.h> +#include <bp-sym.h> +#include <bp-asm.h> + +/* See strlen.s for comments on how the end-of-string testing works. */ + +/* char * [r3] stpcpy (char *dest [r3], const char *src [r4]) */ + +EALIGN (BP_SYM (__stpcpy), 4, 0) + +#define rTMP r0 +#define rRTN r3 +#if __BOUNDED_POINTERS__ +# define rDEST r4 /* pointer to previous word in dest */ +# define rSRC r5 /* pointer to previous word in src */ +# define rLOW r11 +# define rHIGH r12 +#else +# define rDEST r3 /* pointer to previous word in dest */ +# define rSRC r4 /* pointer to previous word in src */ +#endif +#define rWORD r6 /* current word from src */ +#define rFEFE r7 /* 0xfefefeff */ +#define r7F7F r8 /* 0x7f7f7f7f */ +#define rNEG r9 /* ~(word in src | 0x7f7f7f7f) */ +#define rALT r10 /* alternate word from src */ + + CHECK_BOUNDS_LOW (rSRC, rLOW, rHIGH) + CHECK_BOUNDS_LOW (rDEST, rLOW, rHIGH) + STORE_RETURN_BOUNDS (rLOW, rHIGH) + + or rTMP, rSRC, rDEST + clrldi. rTMP, rTMP, 62 + addi rDEST, rDEST, -4 + bne L(unaligned) + + lis rFEFE, -0x101 + lis r7F7F, 0x7f7f + lwz rWORD, 0(rSRC) + addi rFEFE, rFEFE, -0x101 + addi r7F7F, r7F7F, 0x7f7f + b L(g2) + +L(g0): lwzu rALT, 4(rSRC) + stwu rWORD, 4(rDEST) + add rTMP, rFEFE, rALT + nor rNEG, r7F7F, rALT + and. rTMP, rTMP, rNEG + bne- L(g1) + lwzu rWORD, 4(rSRC) + stwu rALT, 4(rDEST) +L(g2): add rTMP, rFEFE, rWORD + nor rNEG, r7F7F, rWORD + and. rTMP, rTMP, rNEG + beq+ L(g0) + + mr rALT, rWORD +/* We've hit the end of the string. Do the rest byte-by-byte. */ +L(g1): rlwinm. rTMP, rALT, 8, 24, 31 + stbu rTMP, 4(rDEST) + beqlr- + rlwinm. rTMP, rALT, 16, 24, 31 + stbu rTMP, 1(rDEST) + beqlr- + rlwinm. rTMP, rALT, 24, 24, 31 + stbu rTMP, 1(rDEST) + beqlr- + stbu rALT, 1(rDEST) + CHECK_BOUNDS_HIGH (rDEST, rHIGH, twlgt) + STORE_RETURN_VALUE (rDEST) + blr + +/* Oh well. In this case, we just do a byte-by-byte copy. */ + .align 4 + nop +L(unaligned): + lbz rWORD, 0(rSRC) + addi rDEST, rDEST, 3 + cmpwi rWORD, 0 + beq- L(u2) + +L(u0): lbzu rALT, 1(rSRC) + stbu rWORD, 1(rDEST) + cmpwi rALT, 0 + beq- L(u1) + nop /* Let 601 load start of loop. */ + lbzu rWORD, 1(rSRC) + stbu rALT, 1(rDEST) + cmpwi rWORD, 0 + bne+ L(u0) +L(u2): stbu rWORD, 1(rDEST) + CHECK_BOUNDS_HIGH (rDEST, rHIGH, twlgt) + STORE_RETURN_VALUE (rDEST) + blr +L(u1): stbu rALT, 1(rDEST) + CHECK_BOUNDS_HIGH (rDEST, rHIGH, twlgt) + STORE_RETURN_VALUE (rDEST) + blr +END (BP_SYM (__stpcpy)) + +weak_alias (BP_SYM (__stpcpy), BP_SYM (stpcpy)) +libc_hidden_def (__stpcpy) diff --git a/sysdeps/powerpc/powerpc64/strchr.S b/sysdeps/powerpc/powerpc64/strchr.S new file mode 100644 index 0000000..f6d418b --- /dev/null +++ b/sysdeps/powerpc/powerpc64/strchr.S @@ -0,0 +1,130 @@ +/* Optimized strchr implementation for PowerPC64. + Copyright (C) 1997, 1999, 2000, 2002 Free Software Foundation, Inc. + This file is part of the GNU C Library. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library; if not, write to the Free + Software Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA + 02111-1307 USA. */ + +#include <sysdep.h> +#include <bp-sym.h> +#include <bp-asm.h> + +/* See strlen.s for comments on how this works. */ + +/* char * [r3] strchr (const char *s [r3] , int c [r4] ) */ + +ENTRY (BP_SYM (strchr)) + +#define rTMP1 r0 +#define rRTN r3 /* outgoing result */ +#if __BOUNDED_POINTERS__ +# define rSTR r4 +# define rCHR r5 /* byte we're looking for, spread over the whole word */ +# define rWORD r8 /* the current word */ +#else +# define rSTR r8 /* current word pointer */ +# define rCHR r4 /* byte we're looking for, spread over the whole word */ +# define rWORD r5 /* the current word */ +#endif +#define rCLZB rCHR /* leading zero byte count */ +#define rFEFE r6 /* constant 0xfefefeff (-0x01010101) */ +#define r7F7F r7 /* constant 0x7f7f7f7f */ +#define rTMP2 r9 +#define rIGN r10 /* number of bits we should ignore in the first word */ +#define rMASK r11 /* mask with the bits to ignore set to 0 */ +#define rTMP3 r12 + + CHECK_BOUNDS_LOW (rSTR, rTMP1, rTMP2) + STORE_RETURN_BOUNDS (rTMP1, rTMP2) + + rlwimi rCHR, rCHR, 8, 16, 23 + li rMASK, -1 + rlwimi rCHR, rCHR, 16, 0, 15 + rlwinm rIGN, rRTN, 3, 27, 28 + lis rFEFE, -0x101 + lis r7F7F, 0x7f7f + clrrdi rSTR, rRTN, 2 + addi rFEFE, rFEFE, -0x101 + addi r7F7F, r7F7F, 0x7f7f +/* Test the first (partial?) word. */ + lwz rWORD, 0(rSTR) + srw rMASK, rMASK, rIGN + orc rWORD, rWORD, rMASK + add rTMP1, rFEFE, rWORD + nor rTMP2, r7F7F, rWORD + and. rTMP1, rTMP1, rTMP2 + xor rTMP3, rCHR, rWORD + orc rTMP3, rTMP3, rMASK + b L(loopentry) + +/* The loop. */ + +L(loop):lwzu rWORD, 4(rSTR) + and. rTMP1, rTMP1, rTMP2 +/* Test for 0. */ + add rTMP1, rFEFE, rWORD + nor rTMP2, r7F7F, rWORD + bne L(foundit) + and. rTMP1, rTMP1, rTMP2 +/* Start test for the bytes we're looking for. */ + xor rTMP3, rCHR, rWORD +L(loopentry): + add rTMP1, rFEFE, rTMP3 + nor rTMP2, r7F7F, rTMP3 + beq L(loop) +/* There is a zero byte in the word, but may also be a matching byte (either + before or after the zero byte). In fact, we may be looking for a + zero byte, in which case we return a match. We guess that this hasn't + happened, though. */ +L(missed): + and. rTMP1, rTMP1, rTMP2 + li rRTN, 0 + STORE_RETURN_VALUE (rSTR) + beqlr +/* It did happen. Decide which one was first... + I'm not sure if this is actually faster than a sequence of + rotates, compares, and branches (we use it anyway because it's shorter). */ + and rFEFE, r7F7F, rWORD + or rMASK, r7F7F, rWORD + and rTMP1, r7F7F, rTMP3 + or rIGN, r7F7F, rTMP3 + add rFEFE, rFEFE, r7F7F + add rTMP1, rTMP1, r7F7F + nor rWORD, rMASK, rFEFE + nor rTMP2, rIGN, rTMP1 + cmplw rWORD, rTMP2 + bgtlr + cntlzw rCLZB, rTMP2 + srwi rCLZB, rCLZB, 3 + add rRTN, rSTR, rCLZB + CHECK_BOUNDS_HIGH_RTN (rSTR, rTMP2, twlge) + STORE_RETURN_VALUE (rSTR) + blr + +L(foundit): + and rTMP1, r7F7F, rTMP3 + or rIGN, r7F7F, rTMP3 + add rTMP1, rTMP1, r7F7F + nor rTMP2, rIGN, rTMP1 + cntlzw rCLZB, rTMP2 + subi rSTR, rSTR, 4 + srwi rCLZB, rCLZB, 3 + add rRTN, rSTR, rCLZB + CHECK_BOUNDS_HIGH_RTN (rSTR, rTMP2, twlge) + STORE_RETURN_VALUE (rSTR) + blr +END (BP_SYM (strchr)) + +weak_alias (BP_SYM (strchr), BP_SYM (index)) diff --git a/sysdeps/powerpc/powerpc64/strcmp.S b/sysdeps/powerpc/powerpc64/strcmp.S new file mode 100644 index 0000000..71c6d86 --- /dev/null +++ b/sysdeps/powerpc/powerpc64/strcmp.S @@ -0,0 +1,133 @@ +/* Optimized strcmp implementation for PowerPC64. + Copyright (C) 1997, 1999, 2000, 2002 Free Software Foundation, Inc. + This file is part of the GNU C Library. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library; if not, write to the Free + Software Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA + 02111-1307 USA. */ + +#include <sysdep.h> +#include <bp-sym.h> +#include <bp-asm.h> + +/* See strlen.s for comments on how the end-of-string testing works. */ + +/* int [r3] strcmp (const char *s1 [r3], const char *s2 [r4]) */ + +EALIGN (BP_SYM(strcmp), 4, 0) + +#define rTMP r0 +#define rRTN r3 +#define rSTR1 r3 /* first string arg */ +#define rSTR2 r4 /* second string arg */ +#if __BOUNDED_POINTERS__ +# define rHIGH1 r11 +# define rHIGH2 r12 +#endif +#define rWORD1 r5 /* current word in s1 */ +#define rWORD2 r6 /* current word in s2 */ +#define rFEFE r7 /* constant 0xfefefeff (-0x01010101) */ +#define r7F7F r8 /* constant 0x7f7f7f7f */ +#define rNEG r9 /* ~(word in s1 | 0x7f7f7f7f) */ +#define rBITDIF r10 /* bits that differ in s1 & s2 words */ + + CHECK_BOUNDS_LOW (rSTR1, rTMP, rHIGH1) + CHECK_BOUNDS_LOW (rSTR2, rTMP, rHIGH2) + + or rTMP, rSTR2, rSTR1 + clrldi. rTMP, rTMP, 62 + lis rFEFE, -0x101 + bne L(unaligned) + + lwz rWORD1, 0(rSTR1) + lwz rWORD2, 0(rSTR2) + lis r7F7F, 0x7f7f + addi rFEFE, rFEFE, -0x101 + clrldi rFEFE,rFEFE,32 /* clear upper 32 */ + addi r7F7F, r7F7F, 0x7f7f + b L(g1) + +L(g0): lwzu rWORD1, 4(rSTR1) + bne cr1, L(different) + lwzu rWORD2, 4(rSTR2) +L(g1): add rTMP, rFEFE, rWORD1 + nor rNEG, r7F7F, rWORD1 + + clrldi rNEG,rNEG,32 /* clear upper 32 */ + and. rTMP, rTMP, rNEG + cmpw cr1, rWORD1, rWORD2 + beq+ L(g0) +L(endstring): +/* OK. We've hit the end of the string. We need to be careful that + we don't compare two strings as different because of gunk beyond + the end of the strings... */ + and rTMP, r7F7F, rWORD1 + beq cr1, L(equal) + add rTMP, rTMP, r7F7F + xor. rBITDIF, rWORD1, rWORD2 + + extsw. rBITDIF,rBITDIF /* propagate sign for blt */ + andc rNEG, rNEG, rTMP + blt- L(highbit) + cntlzw rBITDIF, rBITDIF + cntlzw rNEG, rNEG + addi rNEG, rNEG, 7 + cmpw cr1, rNEG, rBITDIF + sub rRTN, rWORD1, rWORD2 + bgelr+ cr1 +L(equal): + li rRTN, 0 + /* GKM FIXME: check high bounds. */ + blr + +L(different): + lwz rWORD1, -4(rSTR1) + xor. rBITDIF, rWORD1, rWORD2 + + extsw. rBITDIF,rBITDIF /* propagate sign for bgelr */ + sub rRTN, rWORD1, rWORD2 + bgelr+ +L(highbit): + ori rRTN, rWORD2, 1 + /* GKM FIXME: check high bounds. */ + blr + + +/* Oh well. In this case, we just do a byte-by-byte comparison. */ + .align 4 +L(unaligned): + lbz rWORD1, 0(rSTR1) + lbz rWORD2, 0(rSTR2) + b L(u1) + +L(u0): lbzu rWORD1, 1(rSTR1) + bne- L(u4) + lbzu rWORD2, 1(rSTR2) +L(u1): cmpwi cr1, rWORD1, 0 + beq- cr1, L(u3) + cmpw rWORD1, rWORD2 + bne- L(u3) + lbzu rWORD1, 1(rSTR1) + lbzu rWORD2, 1(rSTR2) + cmpwi cr1, rWORD1, 0 + cmpw rWORD1, rWORD2 + bne+ cr1, L(u0) +L(u3): sub rRTN, rWORD1, rWORD2 + /* GKM FIXME: check high bounds. */ + blr +L(u4): lbz rWORD1, -1(rSTR1) + sub rRTN, rWORD1, rWORD2 + /* GKM FIXME: check high bounds. */ + blr +END (BP_SYM (strcmp)) diff --git a/sysdeps/powerpc/powerpc64/strcpy.S b/sysdeps/powerpc/powerpc64/strcpy.S new file mode 100644 index 0000000..2015eff --- /dev/null +++ b/sysdeps/powerpc/powerpc64/strcpy.S @@ -0,0 +1,120 @@ +/* Optimized strcpy implementation for PowerPC64. + Copyright (C) 1997, 1999, 2000, 2002 Free Software Foundation, Inc. + This file is part of the GNU C Library. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library; if not, write to the Free + Software Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA + 02111-1307 USA. */ + +#include <sysdep.h> +#include <bp-sym.h> +#include <bp-asm.h> + +/* See strlen.s for comments on how the end-of-string testing works. */ + +/* char * [r3] strcpy (char *dest [r3], const char *src [r4]) */ + +EALIGN (BP_SYM (strcpy), 4, 0) + +#define rTMP r0 +#define rRTN r3 /* incoming DEST arg preserved as result */ +#if __BOUNDED_POINTERS__ +# define rDEST r4 /* pointer to previous word in dest */ +# define rSRC r5 /* pointer to previous word in src */ +# define rLOW r11 +# define rHIGH r12 +#else +# define rSRC r4 /* pointer to previous word in src */ +# define rDEST r5 /* pointer to previous word in dest */ +#endif +#define rWORD r6 /* current word from src */ +#define rFEFE r7 /* constant 0xfefefeff (-0x01010101) */ +#define r7F7F r8 /* constant 0x7f7f7f7f */ +#define rNEG r9 /* ~(word in s1 | 0x7f7f7f7f) */ +#define rALT r10 /* alternate word from src */ + + CHECK_BOUNDS_LOW (rSRC, rLOW, rHIGH) + CHECK_BOUNDS_LOW (rDEST, rLOW, rHIGH) + STORE_RETURN_BOUNDS (rLOW, rHIGH) + + or rTMP, rSRC, rRTN + clrldi. rTMP, rTMP, 62 +#if __BOUNDED_POINTERS__ + addi rDEST, rDEST, -4 +#else + addi rDEST, rRTN, -4 +#endif + bne L(unaligned) + + lis rFEFE, -0x101 + lis r7F7F, 0x7f7f + lwz rWORD, 0(rSRC) + addi rFEFE, rFEFE, -0x101 + addi r7F7F, r7F7F, 0x7f7f + b L(g2) + +L(g0): lwzu rALT, 4(rSRC) + stwu rWORD, 4(rDEST) + add rTMP, rFEFE, rALT + nor rNEG, r7F7F, rALT + and. rTMP, rTMP, rNEG + bne- L(g1) + lwzu rWORD, 4(rSRC) + stwu rALT, 4(rDEST) +L(g2): add rTMP, rFEFE, rWORD + nor rNEG, r7F7F, rWORD + and. rTMP, rTMP, rNEG + beq+ L(g0) + + mr rALT, rWORD +/* We've hit the end of the string. Do the rest byte-by-byte. */ +L(g1): rlwinm. rTMP, rALT, 8, 24, 31 + stb rTMP, 4(rDEST) + beqlr- + rlwinm. rTMP, rALT, 16, 24, 31 + stb rTMP, 5(rDEST) + beqlr- + rlwinm. rTMP, rALT, 24, 24, 31 + stb rTMP, 6(rDEST) + beqlr- + stb rALT, 7(rDEST) + /* GKM FIXME: check high bound. */ + blr + +/* Oh well. In this case, we just do a byte-by-byte copy. */ + .align 4 + nop +L(unaligned): + lbz rWORD, 0(rSRC) + addi rDEST, rRTN, -1 + cmpwi rWORD, 0 + beq- L(u2) + +L(u0): lbzu rALT, 1(rSRC) + stbu rWORD, 1(rDEST) + cmpwi rALT, 0 + beq- L(u1) + nop /* Let 601 load start of loop. */ + lbzu rWORD, 1(rSRC) + stbu rALT, 1(rDEST) + cmpwi rWORD, 0 + bne+ L(u0) +L(u2): stb rWORD, 1(rDEST) + /* GKM FIXME: check high bound. */ + blr +L(u1): stb rALT, 1(rDEST) + /* GKM FIXME: check high bound. */ + blr + +END (BP_SYM (strcpy)) diff --git a/sysdeps/powerpc/powerpc64/strlen.S b/sysdeps/powerpc/powerpc64/strlen.S new file mode 100644 index 0000000..7907382 --- /dev/null +++ b/sysdeps/powerpc/powerpc64/strlen.S @@ -0,0 +1,163 @@ +/* Optimized strlen implementation for PowerPC64. + Copyright (C) 1997, 1999, 2000, 2002 Free Software Foundation, Inc. + This file is part of the GNU C Library. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library; if not, write to the Free + Software Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA + 02111-1307 USA. */ + +#include <sysdep.h> +#include <bp-sym.h> +#include <bp-asm.h> + +/* The algorithm here uses the following techniques: + + 1) Given a word 'x', we can test to see if it contains any 0 bytes + by subtracting 0x01010101, and seeing if any of the high bits of each + byte changed from 0 to 1. This works because the least significant + 0 byte must have had no incoming carry (otherwise it's not the least + significant), so it is 0x00 - 0x01 == 0xff. For all other + byte values, either they have the high bit set initially, or when + 1 is subtracted you get a value in the range 0x00-0x7f, none of which + have their high bit set. The expression here is + (x + 0xfefefeff) & ~(x | 0x7f7f7f7f), which gives 0x00000000 when + there were no 0x00 bytes in the word. + + 2) Given a word 'x', we can test to see _which_ byte was zero by + calculating ~(((x & 0x7f7f7f7f) + 0x7f7f7f7f) | x | 0x7f7f7f7f). + This produces 0x80 in each byte that was zero, and 0x00 in all + the other bytes. The '| 0x7f7f7f7f' clears the low 7 bits in each + byte, and the '| x' part ensures that bytes with the high bit set + produce 0x00. The addition will carry into the high bit of each byte + iff that byte had one of its low 7 bits set. We can then just see + which was the most significant bit set and divide by 8 to find how + many to add to the index. + This is from the book 'The PowerPC Compiler Writer's Guide', + by Steve Hoxey, Faraydon Karim, Bill Hay and Hank Warren. + + We deal with strings not aligned to a word boundary by taking the + first word and ensuring that bytes not part of the string + are treated as nonzero. To allow for memory latency, we unroll the + loop a few times, being careful to ensure that we do not read ahead + across cache line boundaries. + + Questions to answer: + 1) How long are strings passed to strlen? If they're often really long, + we should probably use cache management instructions and/or unroll the + loop more. If they're often quite short, it might be better to use + fact (2) in the inner loop than have to recalculate it. + 2) How popular are bytes with the high bit set? If they are very rare, + on some processors it might be useful to use the simpler expression + ~((x - 0x01010101) | 0x7f7f7f7f) (that is, on processors with only one + ALU), but this fails when any character has its high bit set. */ + +/* Some notes on register usage: Under the SVR4 ABI, we can use registers + 0 and 3 through 12 (so long as we don't call any procedures) without + saving them. We can also use registers 14 through 31 if we save them. + We can't use r1 (it's the stack pointer), r2 nor r13 because the user + program may expect them to hold their usual value if we get sent + a signal. Integer parameters are passed in r3 through r10. + We can use condition registers cr0, cr1, cr5, cr6, and cr7 without saving + them, the others we must save. */ + +/* int [r3] strlen (char *s [r3]) */ + +ENTRY (BP_SYM (strlen)) + +#define rTMP1 r0 +#define rRTN r3 /* incoming STR arg, outgoing result */ +#define rSTR r4 /* current string position */ +#define rPADN r5 /* number of padding bits we prepend to the + string to make it start at a word boundary */ +#define rFEFE r6 /* constant 0xfefefeff (-0x01010101) */ +#define r7F7F r7 /* constant 0x7f7f7f7f */ +#define rWORD1 r8 /* current string word */ +#define rWORD2 r9 /* next string word */ +#define rMASK r9 /* mask for first string word */ +#define rTMP2 r10 +#define rTMP3 r11 +#define rTMP4 r12 + + CHECK_BOUNDS_LOW (rRTN, rTMP1, rTMP2) + + clrrdi rSTR, rRTN, 2 + lis r7F7F, 0x7f7f + rlwinm rPADN, rRTN, 3, 27, 28 + lwz rWORD1, 0(rSTR) + li rMASK, -1 + addi r7F7F, r7F7F, 0x7f7f +/* That's the setup done, now do the first pair of words. + We make an exception and use method (2) on the first two words, to reduce + overhead. */ + srw rMASK, rMASK, rPADN + and rTMP1, r7F7F, rWORD1 + or rTMP2, r7F7F, rWORD1 + add rTMP1, rTMP1, r7F7F + nor rTMP1, rTMP2, rTMP1 + and. rWORD1, rTMP1, rMASK + mtcrf 0x01, rRTN + bne L(done0) + lis rFEFE, -0x101 + addi rFEFE, rFEFE, -0x101 + clrldi rFEFE,rFEFE,32 /* clear upper 32 */ +/* Are we now aligned to a doubleword boundary? */ + bt 29, L(loop) + +/* Handle second word of pair. */ + lwzu rWORD1, 4(rSTR) + and rTMP1, r7F7F, rWORD1 + or rTMP2, r7F7F, rWORD1 + add rTMP1, rTMP1, r7F7F + nor. rWORD1, rTMP2, rTMP1 + clrldi. rWORD1,rWORD1,32 /* clear upper 32 */ + bne L(done0) + +/* The loop. */ + +L(loop): + lwz rWORD1, 4(rSTR) + lwzu rWORD2, 8(rSTR) + add rTMP1, rFEFE, rWORD1 + nor rTMP2, r7F7F, rWORD1 + and. rTMP1, rTMP1, rTMP2 + clrldi. rTMP1,rTMP1,32 /* clear upper 32 */ + add rTMP3, rFEFE, rWORD2 + nor rTMP4, r7F7F, rWORD2 + bne L(done1) + and. rTMP1, rTMP3, rTMP4 + clrldi. rTMP1,rTMP1,32 /* clear upper 32 */ + beq L(loop) + + and rTMP1, r7F7F, rWORD2 + add rTMP1, rTMP1, r7F7F + andc rWORD1, rTMP4, rTMP1 + b L(done0) + +L(done1): + and rTMP1, r7F7F, rWORD1 + subi rSTR, rSTR, 4 + add rTMP1, rTMP1, r7F7F + andc rWORD1, rTMP2, rTMP1 + +/* When we get to here, rSTR points to the first word in the string that + contains a zero byte, and the most significant set bit in rWORD1 is in that + byte. */ +L(done0): + cntlzw rTMP3, rWORD1 + subf rTMP1, rRTN, rSTR + srwi rTMP3, rTMP3, 3 + add rRTN, rTMP1, rTMP3 + /* GKM FIXME: check high bound. */ + blr +END (BP_SYM (strlen)) |