aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
-rw-r--r--ChangeLog7
-rw-r--r--sysdeps/powerpc/bits/link.h7
-rw-r--r--sysdeps/powerpc/powerpc64/dl-trampoline.S441
3 files changed, 351 insertions, 104 deletions
diff --git a/ChangeLog b/ChangeLog
index 683c403..c2f12b6 100644
--- a/ChangeLog
+++ b/ChangeLog
@@ -1,3 +1,10 @@
+2005-02-07 Steven Munroe <sjmunroe@us.ibm.com>
+
+ * sysdeps/powerpc/bits/link.h (La_ppc64_regs): Add lr_vrsave.
+ (La_ppc64_retval): Correct size of lrc_fp.
+ * sysdeps/powerpc/powerpc64/dl-trampoline.S (_dl_profile_resolve):
+ Fix up ABI problems and complete function.
+
2005-03-10 Jakub Jelinek <jakub@redhat.com>
* math/test-misc.c (main): Add some more tests.
diff --git a/sysdeps/powerpc/bits/link.h b/sysdeps/powerpc/bits/link.h
index f8e6734..6c6f604 100644
--- a/sysdeps/powerpc/bits/link.h
+++ b/sysdeps/powerpc/bits/link.h
@@ -71,7 +71,8 @@ typedef struct La_ppc64_regs
{
uint64_t lr_reg[8];
double lr_fp[13];
- uint64_t __padding;
+ uint32_t __padding;
+ uint32_t lr_vrsave;
uint32_t lr_vreg[12][4];
uint64_t lr_r1;
uint64_t lr_lr;
@@ -82,8 +83,8 @@ typedef struct La_ppc64_retval
{
uint64_t lrv_r3;
uint64_t lrv_r4;
- double lrv_fp[8];
- uint32_t lrv_v2[4];
+ double lrv_fp[4]; /* f1-f4, float - complex long double. */
+ uint32_t lrv_v2[4]; /* v2. */
} La_ppc64_retval;
diff --git a/sysdeps/powerpc/powerpc64/dl-trampoline.S b/sysdeps/powerpc/powerpc64/dl-trampoline.S
index 316f17a..0c61706 100644
--- a/sysdeps/powerpc/powerpc64/dl-trampoline.S
+++ b/sysdeps/powerpc/powerpc64/dl-trampoline.S
@@ -18,9 +18,15 @@
02111-1307 USA. */
#include <sysdep.h>
+#include <rtld-global-offsets.h>
- .section ".text"
+ .section ".text"
+/* On entry r0 contains the index of the PLT entry we need to fixup
+ and r11 contains the link_map (from PLT0+16). The link_map becomes
+ parm1 (r3) and the index (r0) need to be converted to an offset
+ (index * 24) in parm2 (r4). */
+
EALIGN(_dl_runtime_resolve, 4, 0)
/* We need to save the registers used to pass parameters, ie. r3 thru
r10; the registers are saved in a stack frame. */
@@ -68,129 +74,362 @@ EALIGN(_dl_runtime_resolve, 4, 0)
bctr
END(_dl_runtime_resolve)
+ /* Stack layout:
+ +592 previous backchain
+ +584 spill_r31
+ +576 spill_r30
+ +560 v1
+ +552 fp4
+ +544 fp3
+ +536 fp2
+ +528 fp1
+ +520 r4
+ +512 r3
+ return values
+ +504 free
+ +496 stackframe
+ +488 lr
+ +480 r1
+ +464 v13
+ +448 v12
+ +432 v11
+ +416 v10
+ +400 v9
+ +384 v8
+ +368 v7
+ +352 v6
+ +336 v5
+ +320 v4
+ +304 v3
+ +288 v2
+ * VMX Parms in V2-V13, V0-V1 are scratch
+ +284 vrsave
+ +280 free
+ +272 fp13
+ +264 fp12
+ +256 fp11
+ +248 fp10
+ +240 fp9
+ +232 fp8
+ +224 fp7
+ +216 fp6
+ +208 fp5
+ +200 fp4
+ +192 fp3
+ +184 fp2
+ +176 fp1
+ * FP Parms in FP1-FP13, FP0 is a scratch register
+ +168 r10
+ +160 r9
+ +152 r8
+ +144 r7
+ +136 r6
+ +128 r5
+ +120 r4
+ +112 r3
+ * Integer parms in R3-R10, R0 is scratch, R1 SP, R2 is TOC
+ +104 parm8
+ +96 parm7
+ +88 parm6
+ +80 parm5
+ +72 parm4
+ +64 parm3
+ +56 parm2
+ +48 parm1
+ * Parameter save area, Allocated by the call, at least 8 double words
+ +40 TOC save area
+ +32 Reserved for linker
+ +24 Reserved for compiler
+ +16 LR save area
+ +8 CR save area
+ r1+0 stack back chain
+ */
+#define FRAME_SIZE 592
+#define INT_RTN 512
+#define FPR_RTN 528
+#define VR_RTN 560
+#define STACK_FRAME 496
+#define CALLING_LR 488
+#define CALLING_SP 480
+#define INT_PARMS 112
+#define FPR_PARMS 176
+#define VR_PARMS 288
+#define VR_VRSAVE 284
+ .section ".toc","aw"
+.LC__dl_hwcap:
+# ifdef SHARED
+ .tc _rtld_global_ro[TC],_rtld_global_ro
+# else
+ .tc _dl_hwcap[TC],_dl_hwcap
+# endif
+ .section ".text"
-
+ .machine "altivec"
+/* On entry r0 contains the index of the PLT entry we need to fixup
+ and r11 contains the link_map (from PLT0+16). The link_map becomes
+ parm1 (r3) and the index (r0) needs to be converted to an offset
+ (index * 24) in parm2 (r4). */
+
EALIGN(_dl_profile_resolve, 4, 0)
+/* Spill r30, r31 to preserve the link_map* and reloc_addr, in case we
+ need to call _dl_call_pltexit. */
+ std r31,-8(r1)
+ std r30,-16(r1)
/* We need to save the registers used to pass parameters, ie. r3 thru
r10; the registers are saved in a stack frame. */
- stdu r1,-448(r1)
- /* Stack layout:
-
- +432 stackframe
- +424 lr
- +416 r1
- +400 v12
- +384 v11
- +368 v10
- +362 v9
- +336 v8
- +320 v7
- +304 v6
- +288 v5
- +272 v4
- +256 v3
- +240 v2
- +224 v1
- +216 free
- +208 fp13
- +200 fp12
- +192 fp11
- +184 fp10
- +176 fp9
- +168 fp8
- +160 fp7
- +152 fp6
- +144 fp5
- +136 fp4
- +128 fp3
- +120 fp2
- +112 fp1
- +104 r10
- +96 r9
- +88 r8
- +80 r7
- +72 r6
- +64 r5
- +56 r4
- +48 r3
- +8 cr
- r1 link
- */
- std r3,48(r1)
+ stdu r1,-FRAME_SIZE(r1)
+ std r3,INT_PARMS+0(r1)
mr r3,r11
- std r4,56(r1)
- sldi r4,r0,1
- std r5,64(r1)
- add r4,r4,0
- std r6,72(r1)
- sldi r4,r4,3
- std r7,80(r1)
+ std r4,INT_PARMS+8(r1)
+ sldi r4,r0,1 /* index * 2 */
+ std r5,INT_PARMS+16(r1)
+ add r4,r4,r0 /* index * 3 */
+ std r6,INT_PARMS+24(r1)
+ sldi r4,r4,3 /* index * 24 == PLT offset */
mflr r5
- std r8,88(r1)
+ std r7,INT_PARMS+32(r1)
+ std r8,INT_PARMS+40(r1)
/* Store the LR in the LR Save area of the previous frame. */
/* XXX Do we have to do this? */
- std r5,448+16(r1)
- std r5,424(r1)
+ la r8,FRAME_SIZE(r1)
+ std r5,FRAME_SIZE+16(r1)
+ std r5,CALLING_LR(r1)
mfcr r0
- std r9,96(r1)
- std r10,104(r1)
+ std r9,INT_PARMS+48(r1)
+ std r10,INT_PARMS+56(r1)
+ std r8,CALLING_SP(r1)
/* I'm almost certain we don't have to save cr... be safe. */
std r0,8(r1)
+ ld r12,.LC__dl_hwcap@toc(r2)
+#ifdef SHARED
+ /* Load _rtld-global._dl_hwcap. */
+ ld r12,RTLD_GLOBAL_RO_DL_HWCAP_OFFSET(r12)
+#else
+ ld r12,0(r12) /* Load extern _dl_hwcap. */
+#endif
+ andis. r0,r12,(PPC_FEATURE_HAS_ALTIVEC >> 16)
+ beq L(saveFP)
+ la r10,(VR_PARMS+0)(r1)
+ la r9,(VR_PARMS+16)(r1)
+ li r11,32
+ li r12,64
+ stvx v2,0,r10
+ stvx v3,0,r9
+
+ stvx v4,r11,r10
+ stvx v5,r11,r9
+ addi r11,r11,64
+
+ stvx v6,r12,r10
+ stvx v7,r12,r9
+ addi r12,r12,64
+
+ stvx v8,r11,r10
+ stvx v9,r11,r9
+ addi r11,r11,64
+
+ stvx v10,r12,r10
+ stvx v11,r12,r9
+ mfspr r0,VRSAVE
+
+ stvx v12,r11,r10
+ stvx v13,r11,r9
+L(saveFP):
+ stw r0,VR_VRSAVE(r1)
/* Save floating registers. */
- stfd fp1,112(r1)
- stfd fp2,120(r1)
- stfd fp3,128(r1)
- stfd fp4,136(r1)
- stfd fp5,144(r1)
- stfd fp6,152(r1)
- stfd fp7,160(r1)
- stfd fp8,168(r1)
- stfd fp9,176(r1)
- stfd fp10,184(r1)
- stfd fp11,192(r1)
- stfd fp12,200(r1)
- stfd fp13,208(r1)
-/* XXX TODO: store vmx registers. */
-/* Load the extra parameters. */
- addi r6,r1,48
- addi r7,r1,432
+ stfd fp1,FPR_PARMS+0(r1)
+ stfd fp2,FPR_PARMS+8(r1)
+ stfd fp3,FPR_PARMS+16(r1)
+ stfd fp4,FPR_PARMS+24(r1)
+ stfd fp5,FPR_PARMS+32(r1)
+ stfd fp6,FPR_PARMS+40(r1)
+ stfd fp7,FPR_PARMS+48(r1)
+ stfd fp8,FPR_PARMS+56(r1)
+ stfd fp9,FPR_PARMS+64(r1)
+ stfd fp10,FPR_PARMS+72(r1)
+ stfd fp11,FPR_PARMS+80(r1)
li r0,-1
- stdu r0,0(r7)
+ stfd fp12,FPR_PARMS+88(r1)
+ stfd fp13,FPR_PARMS+96(r1)
+/* Load the extra parameters. */
+ addi r6,r1,INT_PARMS
+ addi r7,r1,STACK_FRAME
+/* Save link_map* and reloc_addr parms for later. */
+ mr r31,r3
+ mr r30,r4
+ std r0,0(r7)
bl JUMPTARGET(_dl_profile_fixup)
-/* Put the registers back. */
- ld r0,448+16(r1)
- ld r10,104(r1)
- ld r9,96(r1)
- ld r8,88(r1)
- ld r7,80(r1)
+ nop
+/* Test *framesizep > 0 to see if need to do pltexit processing. */
+ ld r0,STACK_FRAME(r1)
+/* Put the registers back. */
+ lwz r12,VR_VRSAVE(r1)
+ cmpdi cr1,r0,0
+ cmpdi cr0,r12,0
+ bgt cr1,L(do_pltexit)
+ la r10,(VR_PARMS+0)(r1)
+ la r9,(VR_PARMS+16)(r1)
+/* VRSAVE must be non-zero if VMX is present and VRs are in use. */
+ beq L(restoreFXR)
+ li r11,32
+ li r12,64
+ lvx v2,0,r10
+ lvx v3,0,r9
+
+ lvx v4,r11,r10
+ lvx v5,r11,r9
+ addi r11,r11,64
+
+ lvx v6,r12,r10
+ lvx v7,r12,r9
+ addi r12,r12,64
+
+ lvx v8,r11,r10
+ lvx v9,r11,r9
+ addi r11,r11,64
+
+ lvx v10,r12,r10
+ lvx v11,r12,r9
+
+ lvx v12,r11,r10
+ lvx v13,r11,r9
+L(restoreFXR):
+ ld r0,FRAME_SIZE+16(r1)
+ ld r10,INT_PARMS+56(r1)
+ ld r9,INT_PARMS+48(r1)
+ ld r8,INT_PARMS+40(r1)
+ ld r7,INT_PARMS+32(r1)
mtlr r0
ld r0,8(r1)
- ld r6,72(r1)
- ld r5,64(r1)
- ld r4,56(r1)
+ ld r6,INT_PARMS+24(r1)
+ ld r5,INT_PARMS+16(r1)
+ ld r4,INT_PARMS+8(r1)
mtcrf 0xFF,r0
/* Load the target address, toc and static chain reg from the function
descriptor returned by fixup. */
ld r0,0(r3)
ld r2,8(r3)
- mtctr r0
ld r11,16(r3)
- ld r3,48(r1)
+ ld r3,INT_PARMS+0(r1)
+ mtctr r0
/* Load the floating point registers. */
- lfd fp1,112(r1)
- lfd fp2,120(r1)
- lfd fp3,128(r1)
- lfd fp4,136(r1)
- lfd fp5,144(r1)
- lfd fp6,152(r1)
- lfd fp7,160(r1)
- lfd fp8,168(r1)
- lfd fp9,176(r1)
- lfd fp10,184(r1)
- lfd fp11,192(r1)
- lfd fp12,200(r1)
- lfd fp13,208(r1)
+ lfd fp1,FPR_PARMS+0(r1)
+ lfd fp2,FPR_PARMS+8(r1)
+ lfd fp3,FPR_PARMS+16(r1)
+ lfd fp4,FPR_PARMS+24(r1)
+ lfd fp5,FPR_PARMS+32(r1)
+ lfd fp6,FPR_PARMS+40(r1)
+ lfd fp7,FPR_PARMS+48(r1)
+ lfd fp8,FPR_PARMS+56(r1)
+ lfd fp9,FPR_PARMS+64(r1)
+ lfd fp10,FPR_PARMS+72(r1)
+ lfd fp11,FPR_PARMS+80(r1)
+ lfd fp12,FPR_PARMS+88(r1)
+ lfd fp13,FPR_PARMS+96(r1)
/* Unwind the stack frame, and jump. */
- addi r1,r1,448
+ ld r31,584(r1)
+ ld r30,576(r1)
+ addi r1,r1,FRAME_SIZE
bctr
+L(do_pltexit):
+ la r10,(VR_PARMS+0)(r1)
+ la r9,(VR_PARMS+16)(r1)
+ beq L(restoreFXR2)
+ li r11,32
+ li r12,64
+ lvx v2,0,r10
+ lvx v3,0,r9
+
+ lvx v4,r11,r10
+ lvx v5,r11,r9
+ addi r11,r11,64
+
+ lvx v6,r12,r10
+ lvx v7,r12,r9
+ addi r12,r12,64
+
+ lvx v8,r11,r10
+ lvx v9,r11,r9
+ addi r11,r11,64
+
+ lvx v10,r12,r10
+ lvx v11,r12,r9
+
+ lvx v12,r11,r10
+ lvx v13,r11,r9
+L(restoreFXR2):
+ ld r0,FRAME_SIZE+16(r1)
+ ld r10,INT_PARMS+56(r1)
+ ld r9,INT_PARMS+48(r1)
+ ld r8,INT_PARMS+40(r1)
+ ld r7,INT_PARMS+32(r1)
+ mtlr r0
+ ld r0,8(r1)
+ ld r6,INT_PARMS+24(r1)
+ ld r5,INT_PARMS+16(r1)
+ ld r4,INT_PARMS+8(r1)
+ mtcrf 0xFF,r0
+/* Load the target address, toc and static chain reg from the function
+ descriptor returned by fixup. */
+ ld r0,0(r3)
+ std r2,40(r1)
+ ld r2,8(r3)
+ ld r11,16(r3)
+ ld r3,INT_PARMS+0(r1)
+ mtctr r0
+/* Load the floating point registers. */
+ lfd fp1,FPR_PARMS+0(r1)
+ lfd fp2,FPR_PARMS+8(r1)
+ lfd fp3,FPR_PARMS+16(r1)
+ lfd fp4,FPR_PARMS+24(r1)
+ lfd fp5,FPR_PARMS+32(r1)
+ lfd fp6,FPR_PARMS+40(r1)
+ lfd fp7,FPR_PARMS+48(r1)
+ lfd fp8,FPR_PARMS+56(r1)
+ lfd fp9,FPR_PARMS+64(r1)
+ lfd fp10,FPR_PARMS+72(r1)
+ lfd fp11,FPR_PARMS+80(r1)
+ lfd fp12,FPR_PARMS+88(r1)
+ lfd fp13,FPR_PARMS+96(r1)
+/* Call the target function. */
+ bctrl
+ ld r2,40(r1)
+ lwz r12,VR_VRSAVE(r1)
+/* But return here and store the return values. */
+ std r3,INT_RTN(r1)
+ std r4,INT_RTN+8(r1)
+ stfd fp1,FPR_PARMS+0(r1)
+ stfd fp2,FPR_PARMS+8(r1)
+ cmpdi cr0,r12,0
+ la r10,VR_RTN(r1)
+ stfd fp3,FPR_PARMS+16(r1)
+ stfd fp4,FPR_PARMS+24(r1)
+ mr r3,r31
+ mr r4,r30
+ beq L(callpltexit)
+ stvx v2,0,r10
+L(callpltexit):
+ addi r5,r1,INT_PARMS
+ addi r6,r1,INT_RTN
+ bl JUMPTARGET(_dl_call_pltexit)
+ nop
+/* Restore the return values from target function. */
+ lwz r12,VR_VRSAVE(r1)
+ ld r3,INT_RTN(r1)
+ ld r4,INT_RTN+8(r1)
+ lfd fp1,FPR_PARMS+0(r1)
+ lfd fp2,FPR_PARMS+8(r1)
+ cmpdi cr0,r12,0
+ la r10,VR_RTN(r1)
+ lfd fp3,FPR_PARMS+16(r1)
+ lfd fp4,FPR_PARMS+24(r1)
+ beq L(pltexitreturn)
+ lvx v2,0,r10
+L(pltexitreturn):
+ ld r0,FRAME_SIZE+16(r1)
+ ld r31,584(r1)
+ ld r30,576(r1)
+ mtlr r0
+ ld r1,0(r1)
+ blr
END(_dl_profile_resolve)