diff options
author | Ulrich Weigand <Ulrich.Weigand@de.ibm.com> | 2013-12-04 06:52:40 -0600 |
---|---|---|
committer | Adhemerval Zanella <azanella@linux.vnet.ibm.com> | 2013-12-04 07:41:38 -0600 |
commit | 122b66defdb9e4ded3ccc5c2b290f0520c6fa3cd (patch) | |
tree | ab5ac210d7a416d9b0bb5bef9338d89793f9bb5c /sysdeps/powerpc | |
parent | 696caf1d002ff059ddd20fd5eaccd76229c14850 (diff) | |
download | glibc-122b66defdb9e4ded3ccc5c2b290f0520c6fa3cd.zip glibc-122b66defdb9e4ded3ccc5c2b290f0520c6fa3cd.tar.gz glibc-122b66defdb9e4ded3ccc5c2b290f0520c6fa3cd.tar.bz2 |
PowerPC64 ELFv2 ABI 3/6: PLT local entry point optimization
This is a follow-on to the previous patch to support the ELFv2 ABI in the
dynamic loader, split off into its own patch since it is just an optional
optimization.
In the ELFv2 ABI, most functions define both a global and a local entry
point; the local entry requires r2 to be already set up by the caller
to point to the callee's TOC; while the global entry does not require
the caller to know about the callee's TOC, but it needs to set up r12
to the callee's entry point address.
Now, when setting up a PLT slot, the dynamic linker will usually need
to enter the target function's global entry point. However, if the
linker can prove that the target function is in the same DSO as the
PLT slot itself, and the whole DSO only uses a single TOC (which the
linker will let ld.so know via a DT_PPC64_OPT entry), then it is
possible to actually enter the local entry point address into the
PLT slot, for a slight improvement in performance.
Note that this uncovered a problem on the first call via _dl_runtime_resolve,
because that routine neglected to restore the caller's TOC before calling
the target function for the first time, since it assumed that function
would always reload its own TOC anyway ...
Diffstat (limited to 'sysdeps/powerpc')
-rw-r--r-- | sysdeps/powerpc/powerpc64/dl-machine.h | 44 | ||||
-rw-r--r-- | sysdeps/powerpc/powerpc64/dl-trampoline.S | 8 |
2 files changed, 50 insertions, 2 deletions
diff --git a/sysdeps/powerpc/powerpc64/dl-machine.h b/sysdeps/powerpc/powerpc64/dl-machine.h index f222bb0..eccfbb3 100644 --- a/sysdeps/powerpc/powerpc64/dl-machine.h +++ b/sysdeps/powerpc/powerpc64/dl-machine.h @@ -424,6 +424,42 @@ elf_machine_runtime_setup (struct link_map *map, int lazy, int profile) return lazy; } +#if _CALL_ELF == 2 +/* If the PLT entry whose reloc is 'reloc' resolves to a function in + the same object, return the target function's local entry point + offset if usable. */ +static inline Elf64_Addr __attribute__ ((always_inline)) +ppc64_local_entry_offset (struct link_map *map, lookup_t sym_map, + const Elf64_Rela *reloc) +{ + const Elf64_Sym *symtab; + const Elf64_Sym *sym; + + /* If the target function is in a different object, we cannot + use the local entry point. */ + if (sym_map != map) + return 0; + + /* If the linker inserted multiple TOCs, we cannot use the + local entry point. */ + if (map->l_info[DT_PPC64(OPT)] + && (map->l_info[DT_PPC64(OPT)]->d_un.d_val & PPC64_OPT_MULTI_TOC)) + return 0; + + /* Otherwise, we can use the local entry point. Retrieve its offset + from the symbol's ELF st_other field. */ + symtab = (const void *) D_PTR (map, l_info[DT_SYMTAB]); + sym = &symtab[ELFW(R_SYM) (reloc->r_info)]; + + /* If the target function is an ifunc then the local entry offset is + for the resolver, not the final destination. */ + if (__builtin_expect (ELFW(ST_TYPE) (sym->st_info) == STT_GNU_IFUNC, 0)) + return 0; + + return PPC64_LOCAL_ENTRY_OFFSET (sym->st_other); +} +#endif + /* Change the PLT entry whose reloc is 'reloc' to call the actual routine. */ static inline Elf64_Addr __attribute__ ((always_inline)) @@ -470,6 +506,7 @@ elf_machine_fixup_plt (struct link_map *map, lookup_t sym_map, PPC_DCBST (&plt->fd_func); PPC_ISYNC; #else + finaladdr += ppc64_local_entry_offset (map, sym_map, reloc); *reloc_addr = finaladdr; #endif @@ -477,7 +514,9 @@ elf_machine_fixup_plt (struct link_map *map, lookup_t sym_map, } static inline void __attribute__ ((always_inline)) -elf_machine_plt_conflict (Elf64_Addr *reloc_addr, Elf64_Addr finaladdr) +elf_machine_plt_conflict (struct link_map *map, lookup_t sym_map, + const Elf64_Rela *reloc, + Elf64_Addr *reloc_addr, Elf64_Addr finaladdr) { #if _CALL_ELF != 2 Elf64_FuncDesc *plt = (Elf64_FuncDesc *) reloc_addr; @@ -491,6 +530,7 @@ elf_machine_plt_conflict (Elf64_Addr *reloc_addr, Elf64_Addr finaladdr) PPC_DCBST (&plt->fd_toc); PPC_SYNC; #else + finaladdr += ppc64_local_entry_offset (map, sym_map, reloc); *reloc_addr = finaladdr; #endif } @@ -646,7 +686,7 @@ elf_machine_rela (struct link_map *map, /* Fall thru */ case R_PPC64_JMP_SLOT: #ifdef RESOLVE_CONFLICT_FIND_MAP - elf_machine_plt_conflict (reloc_addr, value); + elf_machine_plt_conflict (map, sym_map, reloc, reloc_addr, value); #else elf_machine_fixup_plt (map, sym_map, reloc, reloc_addr, value); #endif diff --git a/sysdeps/powerpc/powerpc64/dl-trampoline.S b/sysdeps/powerpc/powerpc64/dl-trampoline.S index bffc4cb..e31311c 100644 --- a/sysdeps/powerpc/powerpc64/dl-trampoline.S +++ b/sysdeps/powerpc/powerpc64/dl-trampoline.S @@ -74,6 +74,10 @@ EALIGN(_dl_runtime_resolve, 4, 0) /* Prepare for calling the function returned by fixup. */ PPC64_LOAD_FUNCPTR r3 ld r3,INT_PARMS+0(r1) +#if _CALL_ELF == 2 +/* Restore the caller's TOC in case we jump to a local entry point. */ + ld r2,FRAME_SIZE+40(r1) +#endif /* Unwind the stack frame, and jump. */ addi r1,r1,FRAME_SIZE bctr @@ -321,6 +325,10 @@ L(restoreFXR): /* Prepare for calling the function returned by fixup. */ PPC64_LOAD_FUNCPTR r3 ld r3,INT_PARMS+0(r1) +#if _CALL_ELF == 2 +/* Restore the caller's TOC in case we jump to a local entry point. */ + ld r2,FRAME_SIZE+40(r1) +#endif /* Load the floating point registers. */ lfd fp1,FPR_PARMS+0(r1) lfd fp2,FPR_PARMS+8(r1) |