aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
-rw-r--r--sysdeps/hppa/dl-fptr.c26
-rw-r--r--sysdeps/hppa/dl-machine.h36
-rw-r--r--sysdeps/hppa/dl-trampoline.S74
-rw-r--r--sysdeps/unix/sysv/linux/hppa/atomic-machine.h28
4 files changed, 142 insertions, 22 deletions
diff --git a/sysdeps/hppa/dl-fptr.c b/sysdeps/hppa/dl-fptr.c
index 0a37397..25ca8f8 100644
--- a/sysdeps/hppa/dl-fptr.c
+++ b/sysdeps/hppa/dl-fptr.c
@@ -172,8 +172,8 @@ make_fdesc (ElfW(Addr) ip, ElfW(Addr) gp)
}
install:
- fdesc->ip = ip;
fdesc->gp = gp;
+ fdesc->ip = ip;
return (ElfW(Addr)) fdesc;
}
@@ -350,7 +350,9 @@ ElfW(Addr)
_dl_lookup_address (const void *address)
{
ElfW(Addr) addr = (ElfW(Addr)) address;
- unsigned int *desc, *gptr;
+ ElfW(Word) reloc_arg;
+ volatile unsigned int *desc;
+ unsigned int *gptr;
/* Return ADDR if the least-significant two bits of ADDR are not consistent
with ADDR being a linker defined function pointer. The normal value for
@@ -367,7 +369,11 @@ _dl_lookup_address (const void *address)
if (!_dl_read_access_allowed (desc))
return addr;
- /* Load first word of candidate descriptor. It should be a pointer
+ /* First load the relocation offset. */
+ reloc_arg = (ElfW(Word)) desc[1];
+ atomic_full_barrier();
+
+ /* Then load first word of candidate descriptor. It should be a pointer
with word alignment and point to memory that can be read. */
gptr = (unsigned int *) desc[0];
if (((unsigned int) gptr & 3) != 0
@@ -377,8 +383,8 @@ _dl_lookup_address (const void *address)
/* See if descriptor requires resolution. The following trampoline is
used in each global offset table for function resolution:
- ldw 0(r20),r22
- bv r0(r22)
+ ldw 0(r20),r21
+ bv r0(r21)
ldw 4(r20),r21
tramp: b,l .-12,r20
depwi 0,31,2,r20
@@ -389,7 +395,15 @@ _dl_lookup_address (const void *address)
if (gptr[0] == 0xea9f1fdd /* b,l .-12,r20 */
&& gptr[1] == 0xd6801c1e /* depwi 0,31,2,r20 */
&& (ElfW(Addr)) gptr[2] == elf_machine_resolve ())
- _dl_fixup ((struct link_map *) gptr[5], (ElfW(Word)) desc[1]);
+ {
+ struct link_map *l = (struct link_map *) gptr[5];
+
+ /* If gp has been resolved, we need to hunt for relocation offset. */
+ if (!(reloc_arg & PA_GP_RELOC))
+ reloc_arg = _dl_fix_reloc_arg (addr, l);
+
+ _dl_fixup (l, reloc_arg);
+ }
return (ElfW(Addr)) desc[0];
}
diff --git a/sysdeps/hppa/dl-machine.h b/sysdeps/hppa/dl-machine.h
index 9e98366..8ecff97 100644
--- a/sysdeps/hppa/dl-machine.h
+++ b/sysdeps/hppa/dl-machine.h
@@ -48,6 +48,14 @@
#define GOT_FROM_PLT_STUB (4*4)
#define PLT_ENTRY_SIZE (2*4)
+/* The gp slot in the function descriptor contains the relocation offset
+ before resolution. To distinguish between a resolved gp value and an
+ unresolved relocation offset we set an unused bit in the relocation
+ offset. This would allow us to do a synchronzied two word update
+ using this bit (interlocked update), but instead of waiting for the
+ update we simply recompute the gp value given that we know the ip. */
+#define PA_GP_RELOC 1
+
/* Initialize the function descriptor table before relocations */
static inline void
__hppa_init_bootstrap_fdesc_table (struct link_map *map)
@@ -117,10 +125,28 @@ elf_machine_fixup_plt (struct link_map *map, lookup_t t,
volatile Elf32_Addr *rfdesc = reloc_addr;
/* map is the link_map for the caller, t is the link_map for the object
being called */
- rfdesc[1] = value.gp;
- /* Need to ensure that the gp is visible before the code
- entry point is updated */
- rfdesc[0] = value.ip;
+
+ /* We would like the function descriptor to be double word aligned. This
+ helps performance (ip and gp then reside on the same cache line) and
+ we can update the pair atomically with a single store. The linker
+ now ensures this alignment but we still have to handle old code. */
+ if ((unsigned int)reloc_addr & 7)
+ {
+ /* Need to ensure that the gp is visible before the code
+ entry point is updated */
+ rfdesc[1] = value.gp;
+ atomic_full_barrier();
+ rfdesc[0] = value.ip;
+ }
+ else
+ {
+ /* Update pair atomically with floating point store. */
+ union { ElfW(Word) v[2]; double d; } u;
+
+ u.v[0] = value.ip;
+ u.v[1] = value.gp;
+ *(volatile double *)rfdesc = u.d;
+ }
return value;
}
@@ -265,7 +291,7 @@ elf_machine_runtime_setup (struct link_map *l, int lazy, int profile)
here. The trampoline code will load the proper
LTP and pass the reloc offset to the fixup
function. */
- fptr->gp = iplt - jmprel;
+ fptr->gp = (iplt - jmprel) | PA_GP_RELOC;
} /* r_sym != 0 */
else
{
diff --git a/sysdeps/hppa/dl-trampoline.S b/sysdeps/hppa/dl-trampoline.S
index 0114ca8..d0804b3 100644
--- a/sysdeps/hppa/dl-trampoline.S
+++ b/sysdeps/hppa/dl-trampoline.S
@@ -31,7 +31,7 @@
slow down __cffc when it attempts to call fixup to resolve function
descriptor references. Please refer to gcc/gcc/config/pa/fptr.c
- Enter with r19 = reloc offset, r20 = got-8, r21 = fixup ltp. */
+ Enter with r19 = reloc offset, r20 = got-8, r21 = fixup ltp, r22 = fp. */
/* RELOCATION MARKER: bl to provide gcc's __cffc with fixup loc. */
.text
@@ -61,17 +61,20 @@ _dl_runtime_resolve:
copy %sp, %r1 /* Copy previous sp */
/* Save function result address (on entry) */
stwm %r28,128(%sp)
- /* Fillin some frame info to follow ABI */
+ /* Fill in some frame info to follow ABI */
stw %r1,-4(%sp) /* Previous sp */
stw %r21,-32(%sp) /* PIC register value */
/* Save input floating point registers. This must be done
in the new frame since the previous frame doesn't have
enough space */
- ldo -56(%sp),%r1
+ ldo -64(%sp),%r1
fstd,ma %fr4,-8(%r1)
fstd,ma %fr5,-8(%r1)
fstd,ma %fr6,-8(%r1)
+
+ /* Test PA_GP_RELOC bit. */
+ bb,>= %r19,31,2f /* branch if not reloc offset */
fstd,ma %fr7,-8(%r1)
/* Set up args to fixup func, needs only two arguments */
@@ -79,7 +82,7 @@ _dl_runtime_resolve:
copy %r19,%r25 /* (2) reloc offset */
/* Call the real address resolver. */
- bl _dl_fixup,%rp
+3: bl _dl_fixup,%rp
copy %r21,%r19 /* set fixup func ltp */
/* While the linker will set a function pointer to NULL when it
@@ -102,7 +105,7 @@ _dl_runtime_resolve:
copy %r29, %r19
/* Reload arguments fp args */
- ldo -56(%sp),%r1
+ ldo -64(%sp),%r1
fldd,ma -8(%r1),%fr4
fldd,ma -8(%r1),%fr5
fldd,ma -8(%r1),%fr6
@@ -129,6 +132,25 @@ _dl_runtime_resolve:
bv %r0(%rp)
ldo -128(%sp),%sp
+2:
+ /* Set up args for _dl_fix_reloc_arg. */
+ copy %r22,%r26 /* (1) function pointer */
+ depi 0,31,2,%r26 /* clear least significant bits */
+ ldw 8+4(%r20),%r25 /* (2) got[1] == struct link_map */
+
+ /* Save ltp and link map arg for _dl_fixup. */
+ stw %r21,-56(%sp) /* ltp */
+ stw %r25,-60(%sp) /* struct link map */
+
+ /* Find reloc offset. */
+ bl _dl_fix_reloc_arg,%rp
+ copy %r21,%r19 /* set func ltp */
+
+ /* Set up args for _dl_fixup. */
+ ldw -56(%sp),%r21 /* ltp */
+ ldw -60(%sp),%r26 /* (1) struct link map */
+ b 3b
+ copy %ret0,%r25 /* (2) reloc offset */
.EXIT
.PROCEND
cfi_endproc
@@ -153,7 +175,7 @@ _dl_runtime_profile:
copy %sp, %r1 /* Copy previous sp */
/* Save function result address (on entry) */
stwm %r28,192(%sp)
- /* Fillin some frame info to follow ABI */
+ /* Fill in some frame info to follow ABI */
stw %r1,-4(%sp) /* Previous sp */
stw %r21,-32(%sp) /* PIC register value */
@@ -181,10 +203,11 @@ _dl_runtime_profile:
fstd,ma %fr5,8(%r1)
fstd,ma %fr6,8(%r1)
fstd,ma %fr7,8(%r1)
- /* 32-bit stack pointer and return register */
- stw %sp,-56(%sp)
- stw %r2,-52(%sp)
+ /* Test PA_GP_RELOC bit. */
+ bb,>= %r19,31,2f /* branch if not reloc offset */
+ /* 32-bit stack pointer */
+ stw %sp,-56(%sp)
/* Set up args to fixup func, needs five arguments */
ldw 8+4(%r20),%r26 /* (1) got[1] == struct link_map */
@@ -197,7 +220,7 @@ _dl_runtime_profile:
stw %r1, -52(%sp) /* (5) long int *framesizep */
/* Call the real address resolver. */
- bl _dl_profile_fixup,%rp
+3: bl _dl_profile_fixup,%rp
copy %r21,%r19 /* set fixup func ltp */
/* Load up the returned function descriptor */
@@ -215,7 +238,9 @@ _dl_runtime_profile:
fldd,ma 8(%r1),%fr5
fldd,ma 8(%r1),%fr6
fldd,ma 8(%r1),%fr7
- ldw -52(%sp),%rp
+
+ /* Reload rp register -(192+20) without adjusting stack */
+ ldw -212(%sp),%rp
/* Reload static link register -(192+16) without adjusting stack */
ldw -208(%sp),%r29
@@ -303,6 +328,33 @@ L(cont):
ldw -20(%sp),%rp
/* Return */
bv,n 0(%r2)
+
+2:
+ /* Set up args for _dl_fix_reloc_arg. */
+ copy %r22,%r26 /* (1) function pointer */
+ depi 0,31,2,%r26 /* clear least significant bits */
+ ldw 8+4(%r20),%r25 /* (2) got[1] == struct link_map */
+
+ /* Save ltp and link map arg for _dl_fixup. */
+ stw %r21,-92(%sp) /* ltp */
+ stw %r25,-116(%sp) /* struct link map */
+
+ /* Find reloc offset. */
+ bl _dl_fix_reloc_arg,%rp
+ copy %r21,%r19 /* set func ltp */
+
+ /* Restore fixup ltp. */
+ ldw -92(%sp),%r21 /* ltp */
+
+ /* Set up args to fixup func, needs five arguments */
+ ldw -116(%sp),%r26 /* (1) struct link map */
+ copy %ret0,%r25 /* (2) reloc offset */
+ stw %r25,-120(%sp) /* Save reloc offset */
+ ldw -212(%sp),%r24 /* (3) profile_fixup needs rp */
+ ldo -56(%sp),%r23 /* (4) La_hppa_regs */
+ ldo -112(%sp), %r1
+ b 3b
+ stw %r1, -52(%sp) /* (5) long int *framesizep */
.EXIT
.PROCEND
cfi_endproc
diff --git a/sysdeps/unix/sysv/linux/hppa/atomic-machine.h b/sysdeps/unix/sysv/linux/hppa/atomic-machine.h
index 9d8ffbe..bf61b66 100644
--- a/sysdeps/unix/sysv/linux/hppa/atomic-machine.h
+++ b/sysdeps/unix/sysv/linux/hppa/atomic-machine.h
@@ -36,9 +36,37 @@ typedef uintptr_t uatomicptr_t;
typedef intmax_t atomic_max_t;
typedef uintmax_t uatomic_max_t;
+#define atomic_full_barrier() __sync_synchronize ()
+
#define __HAVE_64B_ATOMICS 0
#define USE_ATOMIC_COMPILER_BUILTINS 0
+/* We use the compiler atomic load and store builtins as the generic
+ defines are not atomic. In particular, we need to use compare and
+ exchange for stores as the implementation is synthesized. */
+void __atomic_link_error (void);
+#define __atomic_check_size_ls(mem) \
+ if ((sizeof (*mem) != 1) && (sizeof (*mem) != 2) && sizeof (*mem) != 4) \
+ __atomic_link_error ();
+
+#define atomic_load_relaxed(mem) \
+ ({ __atomic_check_size_ls((mem)); \
+ __atomic_load_n ((mem), __ATOMIC_RELAXED); })
+#define atomic_load_acquire(mem) \
+ ({ __atomic_check_size_ls((mem)); \
+ __atomic_load_n ((mem), __ATOMIC_ACQUIRE); })
+
+#define atomic_store_relaxed(mem, val) \
+ do { \
+ __atomic_check_size_ls((mem)); \
+ __atomic_store_n ((mem), (val), __ATOMIC_RELAXED); \
+ } while (0)
+#define atomic_store_release(mem, val) \
+ do { \
+ __atomic_check_size_ls((mem)); \
+ __atomic_store_n ((mem), (val), __ATOMIC_RELEASE); \
+ } while (0)
+
/* XXX Is this actually correct? */
#define ATOMIC_EXCHANGE_USES_CAS 1