aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorSzabolcs Nagy <szabolcs.nagy@arm.com>2017-09-27 18:14:21 +0100
committerSzabolcs Nagy <szabolcs.nagy@arm.com>2017-11-03 14:43:32 +0000
commit91c5a366d8d398d2fc4542f961c93058a92ade6f (patch)
tree0284a85ce0e29bc2eb8d6292a414c666d8bba35f
parentb7cf203b5c17dd6d9878537d41e0c7cc3d270a67 (diff)
downloadglibc-91c5a366d8d398d2fc4542f961c93058a92ade6f.zip
glibc-91c5a366d8d398d2fc4542f961c93058a92ade6f.tar.gz
glibc-91c5a366d8d398d2fc4542f961c93058a92ade6f.tar.bz2
aarch64: Remove barriers from TLS descriptor functions
Remove ldar synchronization and most lazy TLSDESC initialization related code. * sysdeps/aarch64/dl-machine.h (elf_machine_runtime_setup): Remove DT_TLSDESC_GOT initialization. * sysdeps/aarch64/dl-tlsdesc.S (_dl_tlsdesc_return_lazy): Remove. (_dl_tlsdesc_resolve_rela): Likewise. (_dl_tlsdesc_resolve_hold): Likewise. (_dl_tlsdesc_undefweak): Remove ldar. (_dl_tlsdesc_dynamic): Likewise. * sysdeps/aarch64/dl-tlsdesc.h (_dl_tlsdesc_return_lazy): Remove. (_dl_tlsdesc_resolve_rela): Likewise. (_dl_tlsdesc_resolve_hold): Likewise. * sysdeps/aarch64/tlsdesc.c (_dl_tlsdesc_resolve_rela_fixup): Remove. (_dl_tlsdesc_resolve_hold_fixup): Likewise. (_dl_tlsdesc_resolve_rela): Likewise. (_dl_tlsdesc_resolve_hold): Likewise.
-rw-r--r--ChangeLog17
-rw-r--r--sysdeps/aarch64/dl-machine.h4
-rw-r--r--sysdeps/aarch64/dl-tlsdesc.S203
-rw-r--r--sysdeps/aarch64/dl-tlsdesc.h9
-rw-r--r--sysdeps/aarch64/tlsdesc.c127
5 files changed, 18 insertions, 342 deletions
diff --git a/ChangeLog b/ChangeLog
index 0126d77..8499ada 100644
--- a/ChangeLog
+++ b/ChangeLog
@@ -1,5 +1,22 @@
2017-11-03 Szabolcs Nagy <szabolcs.nagy@arm.com>
+ * sysdeps/aarch64/dl-machine.h (elf_machine_runtime_setup): Remove
+ DT_TLSDESC_GOT initialization.
+ * sysdeps/aarch64/dl-tlsdesc.S (_dl_tlsdesc_return_lazy): Remove.
+ (_dl_tlsdesc_resolve_rela): Likewise.
+ (_dl_tlsdesc_resolve_hold): Likewise.
+ (_dl_tlsdesc_undefweak): Remove ldar.
+ (_dl_tlsdesc_dynamic): Likewise.
+ * sysdeps/aarch64/dl-tlsdesc.h (_dl_tlsdesc_return_lazy): Remove.
+ (_dl_tlsdesc_resolve_rela): Likewise.
+ (_dl_tlsdesc_resolve_hold): Likewise.
+ * sysdeps/aarch64/tlsdesc.c (_dl_tlsdesc_resolve_rela_fixup): Remove.
+ (_dl_tlsdesc_resolve_hold_fixup): Likewise.
+ (_dl_tlsdesc_resolve_rela): Likewise.
+ (_dl_tlsdesc_resolve_hold): Likewise.
+
+2017-11-03 Szabolcs Nagy <szabolcs.nagy@arm.com>
+
* sysdeps/aarch64/dl-machine.h (elf_machine_lazy_rel): Do symbol
binding and initialization non-lazily for R_AARCH64_TLSDESC.
diff --git a/sysdeps/aarch64/dl-machine.h b/sysdeps/aarch64/dl-machine.h
index 837e281..acbd3e3 100644
--- a/sysdeps/aarch64/dl-machine.h
+++ b/sysdeps/aarch64/dl-machine.h
@@ -102,10 +102,6 @@ elf_machine_runtime_setup (struct link_map *l, int lazy, int profile)
}
}
- if (l->l_info[ADDRIDX (DT_TLSDESC_GOT)] && lazy)
- *(ElfW(Addr)*)(D_PTR (l, l_info[ADDRIDX (DT_TLSDESC_GOT)]) + l->l_addr)
- = (ElfW(Addr)) &_dl_tlsdesc_resolve_rela;
-
return lazy;
}
diff --git a/sysdeps/aarch64/dl-tlsdesc.S b/sysdeps/aarch64/dl-tlsdesc.S
index 3b2da62..70550c7 100644
--- a/sysdeps/aarch64/dl-tlsdesc.S
+++ b/sysdeps/aarch64/dl-tlsdesc.S
@@ -80,30 +80,6 @@ _dl_tlsdesc_return:
cfi_endproc
.size _dl_tlsdesc_return, .-_dl_tlsdesc_return
- /* Same as _dl_tlsdesc_return but with synchronization for
- lazy relocation.
- Prototype:
- _dl_tlsdesc_return_lazy (tlsdesc *) ;
- */
- .hidden _dl_tlsdesc_return_lazy
- .global _dl_tlsdesc_return_lazy
- .type _dl_tlsdesc_return_lazy,%function
- cfi_startproc
- .align 2
-_dl_tlsdesc_return_lazy:
- /* The ldar here happens after the load from [x0] at the call site
- (that is generated by the compiler as part of the TLS access ABI),
- so it reads the same value (this function is the final value of
- td->entry) and thus it synchronizes with the release store to
- td->entry in _dl_tlsdesc_resolve_rela_fixup ensuring that the load
- from [x0,#PTR_SIZE] here happens after the initialization of td->arg. */
- DELOUSE (0)
- ldar PTR_REG (zr), [x0]
- ldr PTR_REG (0), [x0, #PTR_SIZE]
- RET
- cfi_endproc
- .size _dl_tlsdesc_return_lazy, .-_dl_tlsdesc_return_lazy
-
/* Handler for undefined weak TLS symbols.
Prototype:
_dl_tlsdesc_undefweak (tlsdesc *);
@@ -121,14 +97,7 @@ _dl_tlsdesc_return_lazy:
_dl_tlsdesc_undefweak:
str x1, [sp, #-16]!
cfi_adjust_cfa_offset (16)
- /* The ldar here happens after the load from [x0] at the call site
- (that is generated by the compiler as part of the TLS access ABI),
- so it reads the same value (this function is the final value of
- td->entry) and thus it synchronizes with the release store to
- td->entry in _dl_tlsdesc_resolve_rela_fixup ensuring that the load
- from [x0,#8] here happens after the initialization of td->arg. */
DELOUSE (0)
- ldar PTR_REG (zr), [x0]
ldr PTR_REG (0), [x0, #PTR_SIZE]
mrs x1, tpidr_el0
sub PTR_REG (0), PTR_REG (0), PTR_REG (1)
@@ -192,13 +161,6 @@ _dl_tlsdesc_dynamic:
cfi_rel_offset (x4, 32+24)
mrs x4, tpidr_el0
- /* The ldar here happens after the load from [x0] at the call site
- (that is generated by the compiler as part of the TLS access ABI),
- so it reads the same value (this function is the final value of
- td->entry) and thus it synchronizes with the release store to
- td->entry in _dl_tlsdesc_resolve_rela_fixup ensuring that the load
- from [x0,#PTR_SIZE] here happens after the initialization of td->arg. */
- ldar PTR_REG (zr), [x0]
ldr PTR_REG (1), [x0,#TLSDESC_ARG]
ldr PTR_REG (0), [x4,#TCBHEAD_DTV]
ldr PTR_REG (3), [x1,#TLSDESC_GEN_COUNT]
@@ -276,168 +238,3 @@ _dl_tlsdesc_dynamic:
.size _dl_tlsdesc_dynamic, .-_dl_tlsdesc_dynamic
# undef NSAVEXREGPAIRS
#endif
-
- /* This function is a wrapper for a lazy resolver for TLS_DESC
- RELA relocations.
- When the actual resolver returns, it will have adjusted the
- TLS descriptor such that we can tail-call it for it to return
- the TP offset of the symbol. */
-
- .hidden _dl_tlsdesc_resolve_rela
- .global _dl_tlsdesc_resolve_rela
- .type _dl_tlsdesc_resolve_rela,%function
- cfi_startproc
- .align 2
-_dl_tlsdesc_resolve_rela:
-#define NSAVEXREGPAIRS 9
- /* The tlsdesc PLT entry pushes x2 and x3 to the stack. */
- cfi_adjust_cfa_offset (16)
- cfi_rel_offset (x2, 0)
- cfi_rel_offset (x3, 8)
- stp x29, x30, [sp, #-(32+16*NSAVEXREGPAIRS)]!
- cfi_adjust_cfa_offset (32+16*NSAVEXREGPAIRS)
- cfi_rel_offset (x29, 0)
- cfi_rel_offset (x30, 8)
- mov x29, sp
- stp x1, x4, [sp, #32+16*0]
- stp x5, x6, [sp, #32+16*1]
- stp x7, x8, [sp, #32+16*2]
- stp x9, x10, [sp, #32+16*3]
- stp x11, x12, [sp, #32+16*4]
- stp x13, x14, [sp, #32+16*5]
- stp x15, x16, [sp, #32+16*6]
- stp x17, x18, [sp, #32+16*7]
- str x0, [sp, #32+16*8]
- cfi_rel_offset (x1, 32)
- cfi_rel_offset (x4, 32+8)
- cfi_rel_offset (x5, 32+16)
- cfi_rel_offset (x6, 32+16+8)
- cfi_rel_offset (x7, 32+16*2)
- cfi_rel_offset (x8, 32+16*2+8)
- cfi_rel_offset (x9, 32+16*3)
- cfi_rel_offset (x10, 32+16*3+8)
- cfi_rel_offset (x11, 32+16*4)
- cfi_rel_offset (x12, 32+16*4+8)
- cfi_rel_offset (x13, 32+16*5)
- cfi_rel_offset (x14, 32+16*5+8)
- cfi_rel_offset (x15, 32+16*6)
- cfi_rel_offset (x16, 32+16*6+8)
- cfi_rel_offset (x17, 32+16*7)
- cfi_rel_offset (x18, 32+16*7+8)
- cfi_rel_offset (x0, 32+16*8)
-
- SAVE_Q_REGISTERS
-
- DELOUSE (3)
- ldr PTR_REG (1), [x3, #PTR_SIZE]
- bl _dl_tlsdesc_resolve_rela_fixup
-
- RESTORE_Q_REGISTERS
-
- ldr x0, [sp, #32+16*8]
- DELOUSE (0)
- ldr PTR_REG (1), [x0]
- blr x1
-
- ldp x1, x4, [sp, #32+16*0]
- ldp x5, x6, [sp, #32+16*1]
- ldp x7, x8, [sp, #32+16*2]
- ldp x9, x10, [sp, #32+16*3]
- ldp x11, x12, [sp, #32+16*4]
- ldp x13, x14, [sp, #32+16*5]
- ldp x15, x16, [sp, #32+16*6]
- ldp x17, x18, [sp, #32+16*7]
- ldp x29, x30, [sp], #(32+16*NSAVEXREGPAIRS)
- cfi_adjust_cfa_offset (-32-16*NSAVEXREGPAIRS)
- cfi_restore (x29)
- cfi_restore (x30)
- ldp x2, x3, [sp], #16
- cfi_adjust_cfa_offset (-16)
- RET
-#undef NSAVEXREGPAIRS
- cfi_endproc
- .size _dl_tlsdesc_resolve_rela, .-_dl_tlsdesc_resolve_rela
-
- /* This function is a placeholder for lazy resolving of TLS
- relocations. Once some thread starts resolving a TLS
- relocation, it sets up the TLS descriptor to use this
- resolver, such that other threads that would attempt to
- resolve it concurrently may skip the call to the original lazy
- resolver and go straight to a condition wait.
-
- When the actual resolver returns, it will have adjusted the
- TLS descriptor such that we can tail-call it for it to return
- the TP offset of the symbol. */
-
- .hidden _dl_tlsdesc_resolve_hold
- .global _dl_tlsdesc_resolve_hold
- .type _dl_tlsdesc_resolve_hold,%function
- cfi_startproc
- .align 2
-_dl_tlsdesc_resolve_hold:
-#define NSAVEXREGPAIRS 10
-1:
- stp x29, x30, [sp, #-(32+16*NSAVEXREGPAIRS)]!
- cfi_adjust_cfa_offset (32+16*NSAVEXREGPAIRS)
- cfi_rel_offset (x29, 0)
- cfi_rel_offset (x30, 8)
- mov x29, sp
- stp x1, x2, [sp, #32+16*0]
- stp x3, x4, [sp, #32+16*1]
- stp x5, x6, [sp, #32+16*2]
- stp x7, x8, [sp, #32+16*3]
- stp x9, x10, [sp, #32+16*4]
- stp x11, x12, [sp, #32+16*5]
- stp x13, x14, [sp, #32+16*6]
- stp x15, x16, [sp, #32+16*7]
- stp x17, x18, [sp, #32+16*8]
- str x0, [sp, #32+16*9]
- cfi_rel_offset (x1, 32)
- cfi_rel_offset (x2, 32+8)
- cfi_rel_offset (x3, 32+16)
- cfi_rel_offset (x4, 32+16+8)
- cfi_rel_offset (x5, 32+16*2)
- cfi_rel_offset (x6, 32+16*2+8)
- cfi_rel_offset (x7, 32+16*3)
- cfi_rel_offset (x8, 32+16*3+8)
- cfi_rel_offset (x9, 32+16*4)
- cfi_rel_offset (x10, 32+16*4+8)
- cfi_rel_offset (x11, 32+16*5)
- cfi_rel_offset (x12, 32+16*5+8)
- cfi_rel_offset (x13, 32+16*6)
- cfi_rel_offset (x14, 32+16*6+8)
- cfi_rel_offset (x15, 32+16*7)
- cfi_rel_offset (x16, 32+16*7+8)
- cfi_rel_offset (x17, 32+16*8)
- cfi_rel_offset (x18, 32+16*8+8)
- cfi_rel_offset (x0, 32+16*9)
-
- SAVE_Q_REGISTERS
-
- adr x1, 1b
- bl _dl_tlsdesc_resolve_hold_fixup
-
- RESTORE_Q_REGISTERS
-
- ldr x0, [sp, #32+16*9]
- DELOUSE (0)
- ldr PTR_REG (1), [x0]
- blr x1
-
- ldp x1, x2, [sp, #32+16*0]
- ldp x3, x4, [sp, #32+16*1]
- ldp x5, x6, [sp, #32+16*2]
- ldp x7, x8, [sp, #32+16*3]
- ldp x9, x10, [sp, #32+16*4]
- ldp x11, x12, [sp, #32+16*5]
- ldp x13, x14, [sp, #32+16*6]
- ldp x15, x16, [sp, #32+16*7]
- ldp x17, x18, [sp, #32+16*8]
- ldp x29, x30, [sp], #(32+16*NSAVEXREGPAIRS)
- cfi_adjust_cfa_offset (-32-16*NSAVEXREGPAIRS)
- cfi_restore (x29)
- cfi_restore (x30)
- RET
- cfi_endproc
- .size _dl_tlsdesc_resolve_hold, .-_dl_tlsdesc_resolve_hold
-#undef NSAVEXREGPAIRS
diff --git a/sysdeps/aarch64/dl-tlsdesc.h b/sysdeps/aarch64/dl-tlsdesc.h
index 66ec0de..25b5b64 100644
--- a/sysdeps/aarch64/dl-tlsdesc.h
+++ b/sysdeps/aarch64/dl-tlsdesc.h
@@ -46,17 +46,8 @@ extern ptrdiff_t attribute_hidden
_dl_tlsdesc_return (struct tlsdesc *);
extern ptrdiff_t attribute_hidden
-_dl_tlsdesc_return_lazy (struct tlsdesc *);
-
-extern ptrdiff_t attribute_hidden
_dl_tlsdesc_undefweak (struct tlsdesc *);
-extern ptrdiff_t attribute_hidden
-_dl_tlsdesc_resolve_rela (struct tlsdesc *);
-
-extern ptrdiff_t attribute_hidden
-_dl_tlsdesc_resolve_hold (struct tlsdesc *);
-
# ifdef SHARED
extern void *_dl_make_tlsdesc_dynamic (struct link_map *, size_t);
diff --git a/sysdeps/aarch64/tlsdesc.c b/sysdeps/aarch64/tlsdesc.c
index 4c8a5a8..bd1356f 100644
--- a/sysdeps/aarch64/tlsdesc.c
+++ b/sysdeps/aarch64/tlsdesc.c
@@ -18,137 +18,12 @@
License along with the GNU C Library; if not, see
<http://www.gnu.org/licenses/>. */
-#include <link.h>
#include <ldsodefs.h>
-#include <elf/dynamic-link.h>
#include <tls.h>
#include <dl-tlsdesc.h>
#include <dl-unmap-segments.h>
+#define _dl_tlsdesc_resolve_hold 0
#include <tlsdeschtab.h>
-#include <atomic.h>
-
-/* The following functions take an entry_check_offset argument. It's
- computed by the caller as an offset between its entry point and the
- call site, such that by adding the built-in return address that is
- implicitly passed to the function with this offset, we can easily
- obtain the caller's entry point to compare with the entry point
- given in the TLS descriptor. If it's changed, we want to return
- immediately. */
-
-/* This function is used to lazily resolve TLS_DESC RELA relocations.
- The argument location is used to hold a pointer to the relocation. */
-
-void
-attribute_hidden
-_dl_tlsdesc_resolve_rela_fixup (struct tlsdesc *td, struct link_map *l)
-{
- const ElfW(Rela) *reloc = atomic_load_relaxed (&td->arg);
-
- /* After GL(dl_load_lock) is grabbed only one caller can see td->entry in
- initial state in _dl_tlsdesc_resolve_early_return_p, other concurrent
- callers will return and retry calling td->entry. The updated td->entry
- synchronizes with the single writer so all read accesses here can use
- relaxed order. */
- if (_dl_tlsdesc_resolve_early_return_p
- (td, (void*)(D_PTR (l, l_info[ADDRIDX (DT_TLSDESC_PLT)]) + l->l_addr)))
- return;
-
- /* The code below was borrowed from _dl_fixup(),
- except for checking for STB_LOCAL. */
- const ElfW(Sym) *const symtab
- = (const void *) D_PTR (l, l_info[DT_SYMTAB]);
- const char *strtab = (const void *) D_PTR (l, l_info[DT_STRTAB]);
- const ElfW(Sym) *sym = &symtab[ELFW(R_SYM) (reloc->r_info)];
- lookup_t result;
-
- /* Look up the target symbol. If the normal lookup rules are not
- used don't look in the global scope. */
- if (ELFW(ST_BIND) (sym->st_info) != STB_LOCAL
- && __builtin_expect (ELFW(ST_VISIBILITY) (sym->st_other), 0) == 0)
- {
- const struct r_found_version *version = NULL;
-
- if (l->l_info[VERSYMIDX (DT_VERSYM)] != NULL)
- {
- const ElfW(Half) *vernum =
- (const void *) D_PTR (l, l_info[VERSYMIDX (DT_VERSYM)]);
- ElfW(Half) ndx = vernum[ELFW(R_SYM) (reloc->r_info)] & 0x7fff;
- version = &l->l_versions[ndx];
- if (version->hash == 0)
- version = NULL;
- }
-
- result = _dl_lookup_symbol_x (strtab + sym->st_name, l, &sym,
- l->l_scope, version, ELF_RTYPE_CLASS_PLT,
- DL_LOOKUP_ADD_DEPENDENCY, NULL);
- }
- else
- {
- /* We already found the symbol. The module (and therefore its load
- address) is also known. */
- result = l;
- }
-
- if (!sym)
- {
- atomic_store_relaxed (&td->arg, (void *) reloc->r_addend);
- /* This release store synchronizes with the ldar acquire load
- instruction in _dl_tlsdesc_undefweak. */
- atomic_store_release (&td->entry, _dl_tlsdesc_undefweak);
- }
- else
- {
-# ifndef SHARED
- CHECK_STATIC_TLS (l, result);
-# else
- if (!TRY_STATIC_TLS (l, result))
- {
- void *p = _dl_make_tlsdesc_dynamic (result, sym->st_value
- + reloc->r_addend);
- atomic_store_relaxed (&td->arg, p);
- /* This release store synchronizes with the ldar acquire load
- instruction in _dl_tlsdesc_dynamic. */
- atomic_store_release (&td->entry, _dl_tlsdesc_dynamic);
- }
- else
-# endif
- {
- void *p = (void*) (sym->st_value + result->l_tls_offset
- + reloc->r_addend);
- atomic_store_relaxed (&td->arg, p);
- /* This release store synchronizes with the ldar acquire load
- instruction in _dl_tlsdesc_return_lazy. */
- atomic_store_release (&td->entry, _dl_tlsdesc_return_lazy);
- }
- }
-
- _dl_tlsdesc_wake_up_held_fixups ();
-}
-
-/* This function is used to avoid busy waiting for other threads to
- complete the lazy relocation. Once another thread wins the race to
- relocate a TLS descriptor, it sets the descriptor up such that this
- function is called to wait until the resolver releases the
- lock. */
-
-void
-attribute_hidden
-_dl_tlsdesc_resolve_hold_fixup (struct tlsdesc *td, void *caller)
-{
- /* Maybe we're lucky and can return early. */
- if (caller != atomic_load_relaxed (&td->entry))
- return;
-
- /* Locking here will stop execution until the running resolver runs
- _dl_tlsdesc_wake_up_held_fixups(), releasing the lock.
-
- FIXME: We'd be better off waiting on a condition variable, such
- that we didn't have to hold the lock throughout the relocation
- processing. */
- __rtld_lock_lock_recursive (GL(dl_load_lock));
- __rtld_lock_unlock_recursive (GL(dl_load_lock));
-}
-
/* Unmap the dynamic object, but also release its TLS descriptor table
if there is one. */