diff options
author | Alan Modra <amodra@gmail.com> | 2017-08-29 15:55:33 +0930 |
---|---|---|
committer | Alan Modra <amodra@gmail.com> | 2017-08-29 21:29:37 +0930 |
commit | 34e0882b83aedc6fb9c7532c89afaf8e0043b7ce (patch) | |
tree | 639ddded69114a9e5e971bdc0e983698035d7cf6 /gold/powerpc.cc | |
parent | 34ca2bd7ace5e208d46fea6e7a56a5376de0abfe (diff) | |
download | gdb-34e0882b83aedc6fb9c7532c89afaf8e0043b7ce.zip gdb-34e0882b83aedc6fb9c7532c89afaf8e0043b7ce.tar.gz gdb-34e0882b83aedc6fb9c7532c89afaf8e0043b7ce.tar.bz2 |
[GOLD] PowerPC tls_get_addr_optimize
This implements the special __tls_get_addr_opt call stub for powerpc
gold that returns __thread variable addresses without actually making
a call to __tls_get_addr in most cases. Shared libraries that are
loaded at program load time (ie. dlopen is not used) have a known
layout for their __thread variables, and thus DTPMOD64/DPTREL64 pairs
describing those variables can be set up by ld.so for the
__tls_get_addr_opt call stub fast exit.
Ref https://sourceware.org/ml/libc-alpha/2015-03/msg00626.html
I really, really wish I'd used a differently versioned __tls_get_addr
symbol than the base symbol to indicate glibc support for the
optimized call, rather than having glibc export __tls_get_addr_opt. A
lot of the messing around here, flipping symbols from __tls_get_addr
to __tls_get_addr_opt, is caused by that decision. About the only
benefit is that a user can see at a glance that their disassembled
code is calling __tls_get_addr via the fancy call stub.. Anyway, we
need references to __tls_get_addr to seem like they were to
__tls_get_addr_opt, and in cases like the tsan interceptor, a
definition of __tls_get_addr to seem like one of __tls_get_addr_opt
as well. That's the reason for Symbol::clear_in_reg and
Symbol_table::clone, and why symbols are substituted in Scan::global
and other places dealing with dynamic linking.
elfcpp/
* elfcpp.h (DT_PPC_OPT): Define.
* powerpc.h (PPC_OPT_TLS): Define.
gold/
* options.h (tls_get_addr_optimize): New option.
* symtab.h (Symbol::clear_in_reg, clone): New functions.
(Sized_symbol::clone): New function.
(Symbol_table::clone): New function.
* resolve.cc (Symbol::clone, Sized_symbol::clone): New functions.
* powerpc.cc (Target_powerpc::has_tls_get_addr_opt_,
tls_get_addr_, tls_get_addr_opt_): New vars.
(Target_powerpc::tls_get_addr_opt, tls_get_addr,
is_tls_get_addr_opt, replace_tls_get_addr,
set_has_tls_get_addr_opt, stk_linker): New functions.
(Target_powerpc::Track_tls::maybe_skip_tls_get_addr_call): Add
target param. Update callers. Compare symbols rather than names.
(Target_powerpc::do_define_standard_symbols): Init tls_get_addr_
and tls_get_addr_opt_.
(Target_powerpc::Branch_info::mark_pltcall): Translate tls_get_addr
sym to tls_get_addr_opt.
(Target_powerpc::Branch_info::make_stub): Likewise.
(Stub_table::define_stub_syms): Likewise.
(Target_powerpc::Scan::global): Likewise.
(Target_powerpc::Relocate::relocate): Likewise.
(add_3_12_2, add_3_12_13, bctrl, beqlr, cmpdi_11_0, cmpwi_11_0,
ld_11_1, ld_11_3, ld_12_3, lwz_11_3, lwz_12_3, mr_0_3, mr_3_0,
mtlr_11, std_11_1): New constants.
(Stub_table::eh_frame_added_): Delete.
(Stub_table::tls_get_addr_opt_bctrl_, plt_fde_len_, plt_fde_): New vars.
(Stub_table::init_plt_fde): New functions.
(Stub_table::add_eh_frame, replace_eh_frame): Move definition out
of line. Init and use plt_fde_.
(Stub_table::plt_call_size): Return size for tls_get_addr stub.
Extract alignment code to..
(Stub_table::plt_call_align): ..this new function. Adjust all callers.
(Stub_table::add_plt_call_entry): Set has_tls_get_addr_opt and
tls_get_addr_opt_bctrl, and align after that.
(Stub_table::do_write): Write out tls_get_addr stub.
(Target_powerpc::do_finalize_sections): Emit DT_PPC_OPT
PPC_OPT_TLS/PPC64_OPT_TLS bit.
(Target_powerpc::Relocate::relocate): Don't check for or modify
nop following bl for tls_get_addr stub.
Diffstat (limited to 'gold/powerpc.cc')
-rw-r--r-- | gold/powerpc.cc | 363 |
1 files changed, 306 insertions, 57 deletions
diff --git a/gold/powerpc.cc b/gold/powerpc.cc index c29850b..7f3f025 100644 --- a/gold/powerpc.cc +++ b/gold/powerpc.cc @@ -613,8 +613,10 @@ class Target_powerpc : public Sized_target<size, big_endian> stub_tables_(), branch_lookup_table_(), branch_info_(), tocsave_loc_(), plt_thread_safe_(false), plt_localentry0_(false), plt_localentry0_init_(false), has_localentry0_(false), + has_tls_get_addr_opt_(false), relax_failed_(false), relax_fail_count_(0), - stub_group_size_(0), savres_section_(0) + stub_group_size_(0), savres_section_(0), + tls_get_addr_(NULL), tls_get_addr_opt_(NULL) { } @@ -1081,11 +1083,43 @@ class Target_powerpc : public Sized_target<size, big_endian> this->set_processor_specific_flags(flags); } + Symbol* + tls_get_addr_opt() const + { return this->tls_get_addr_opt_; } + + Symbol* + tls_get_addr() const + { return this->tls_get_addr_; } + + // If optimizing __tls_get_addr calls, whether this is the + // "__tls_get_addr" symbol. + bool + is_tls_get_addr_opt(const Symbol* gsym) const + { + return this->tls_get_addr_opt_ && (gsym == this->tls_get_addr_ + || gsym == this->tls_get_addr_opt_); + } + + bool + replace_tls_get_addr(const Symbol* gsym) const + { return this->tls_get_addr_opt_ && gsym == this->tls_get_addr_; } + + void + set_has_tls_get_addr_opt() + { this->has_tls_get_addr_opt_ = true; } + // Offset to toc save stack slot int stk_toc() const { return this->abiversion() < 2 ? 40 : 24; } + // Offset to linker save stack slot. ELFv2 doesn't have a linker word, + // so use the CR save slot. Used only by __tls_get_addr call stub, + // relying on __tls_get_addr not saving CR itself. + int + stk_linker() const + { return this->abiversion() < 2 ? 32 : 8; } + private: class Track_tls @@ -1139,12 +1173,14 @@ class Target_powerpc : public Sized_target<size, big_endian> {this->tls_get_addr_state_ = SKIP; } Tls_get_addr - maybe_skip_tls_get_addr_call(unsigned int r_type, const Symbol* gsym) + maybe_skip_tls_get_addr_call(Target_powerpc<size, big_endian>* target, + unsigned int r_type, const Symbol* gsym) { bool is_tls_call = ((r_type == elfcpp::R_POWERPC_REL24 || r_type == elfcpp::R_PPC_PLTREL24) && gsym != NULL - && strcmp(gsym->name(), "__tls_get_addr") == 0); + && (gsym == target->tls_get_addr() + || gsym == target->tls_get_addr_opt())); Tls_get_addr last_tls = this->tls_get_addr_state_; this->tls_get_addr_state_ = NOT_EXPECTED; if (is_tls_call && last_tls != EXPECTED) @@ -1549,12 +1585,18 @@ class Target_powerpc : public Sized_target<size, big_endian> bool plt_localentry0_; bool plt_localentry0_init_; bool has_localentry0_; + bool has_tls_get_addr_opt_; bool relax_failed_; int relax_fail_count_; int32_t stub_group_size_; Output_data_save_res<size, big_endian> *savres_section_; + + // The "__tls_get_addr" symbol, if present + Symbol* tls_get_addr_; + // If optimizing __tls_get_addr calls, the "__tls_get_addr_opt" symbol. + Symbol* tls_get_addr_opt_; }; template<> @@ -2459,6 +2501,36 @@ Target_powerpc<size, big_endian>::do_define_standard_symbols( false, false); } } + + this->tls_get_addr_ = symtab->lookup("__tls_get_addr"); + if (parameters->options().tls_get_addr_optimize() + && this->tls_get_addr_ != NULL + && this->tls_get_addr_->in_reg()) + this->tls_get_addr_opt_ = symtab->lookup("__tls_get_addr_opt"); + if (this->tls_get_addr_opt_ != NULL) + { + if (this->tls_get_addr_->is_undefined() + || this->tls_get_addr_->is_from_dynobj()) + { + // Make it seem as if references to __tls_get_addr are + // really to __tls_get_addr_opt, so the latter symbol is + // made dynamic, not the former. + this->tls_get_addr_->clear_in_reg(); + this->tls_get_addr_opt_->set_in_reg(); + } + // We have a non-dynamic definition for __tls_get_addr. + // Make __tls_get_addr_opt the same, if it does not already have + // a non-dynamic definition. + else if (this->tls_get_addr_opt_->is_undefined() + || this->tls_get_addr_opt_->is_from_dynobj()) + { + Sized_symbol<size>* from + = static_cast<Sized_symbol<size>*>(this->tls_get_addr_); + Sized_symbol<size>* to + = static_cast<Sized_symbol<size>*>(this->tls_get_addr_opt_); + symtab->clone<size>(to, from); + } + } } // Set up PowerPC target specific relobj. @@ -3030,6 +3102,8 @@ Target_powerpc<size, big_endian>::Branch_info::mark_pltcall( return false; Symbol* sym = this->object_->global_symbol(this->r_sym_); + if (target->replace_tls_get_addr(sym)) + sym = target->tls_get_addr_opt(); if (sym != NULL && sym->is_forwarder()) sym = symtab->resolve_forwards(sym); const Sized_symbol<size>* gsym = static_cast<const Sized_symbol<size>*>(sym); @@ -3055,12 +3129,14 @@ Target_powerpc<size, big_endian>::Branch_info::make_stub( Symbol_table* symtab) const { Symbol* sym = this->object_->global_symbol(this->r_sym_); - if (sym != NULL && sym->is_forwarder()) - sym = symtab->resolve_forwards(sym); - const Sized_symbol<size>* gsym = static_cast<const Sized_symbol<size>*>(sym); Target_powerpc<size, big_endian>* target = static_cast<Target_powerpc<size, big_endian>*>( parameters->sized_target<size, big_endian>()); + if (target->replace_tls_get_addr(sym)) + sym = target->tls_get_addr_opt(); + if (sym != NULL && sym->is_forwarder()) + sym = symtab->resolve_forwards(sym); + const Sized_symbol<size>* gsym = static_cast<const Sized_symbol<size>*>(sym); bool ok = true; if (gsym != NULL @@ -3675,6 +3751,8 @@ static const uint32_t add_2_2_11 = 0x7c425a14; static const uint32_t add_2_2_12 = 0x7c426214; static const uint32_t add_3_3_2 = 0x7c631214; static const uint32_t add_3_3_13 = 0x7c636a14; +static const uint32_t add_3_12_2 = 0x7c6c1214; +static const uint32_t add_3_12_13 = 0x7c6c6a14; static const uint32_t add_11_0_11 = 0x7d605a14; static const uint32_t add_11_2_11 = 0x7d625a14; static const uint32_t add_11_11_2 = 0x7d6b1214; @@ -3696,10 +3774,14 @@ static const uint32_t addis_12_12 = 0x3d8c0000; static const uint32_t b = 0x48000000; static const uint32_t bcl_20_31 = 0x429f0005; static const uint32_t bctr = 0x4e800420; +static const uint32_t bctrl = 0x4e800421; +static const uint32_t beqlr = 0x4d820020; static const uint32_t blr = 0x4e800020; static const uint32_t bnectr_p4 = 0x4ce20420; static const uint32_t cmpld_7_12_0 = 0x7fac0040; static const uint32_t cmpldi_2_0 = 0x28220000; +static const uint32_t cmpdi_11_0 = 0x2c2b0000; +static const uint32_t cmpwi_11_0 = 0x2c0b0000; static const uint32_t cror_15_15_15 = 0x4def7b82; static const uint32_t cror_31_31_31 = 0x4ffffb82; static const uint32_t ld_0_1 = 0xe8010000; @@ -3708,9 +3790,12 @@ static const uint32_t ld_2_1 = 0xe8410000; static const uint32_t ld_2_2 = 0xe8420000; static const uint32_t ld_2_11 = 0xe84b0000; static const uint32_t ld_2_12 = 0xe84c0000; +static const uint32_t ld_11_1 = 0xe9610000; static const uint32_t ld_11_2 = 0xe9620000; +static const uint32_t ld_11_3 = 0xe9630000; static const uint32_t ld_11_11 = 0xe96b0000; static const uint32_t ld_12_2 = 0xe9820000; +static const uint32_t ld_12_3 = 0xe9830000; static const uint32_t ld_12_11 = 0xe98b0000; static const uint32_t ld_12_12 = 0xe98c0000; static const uint32_t lfd_0_1 = 0xc8010000; @@ -3722,17 +3807,22 @@ static const uint32_t lis_11 = 0x3d600000; static const uint32_t lis_12 = 0x3d800000; static const uint32_t lvx_0_12_0 = 0x7c0c00ce; static const uint32_t lwz_0_12 = 0x800c0000; +static const uint32_t lwz_11_3 = 0x81630000; static const uint32_t lwz_11_11 = 0x816b0000; static const uint32_t lwz_11_30 = 0x817e0000; +static const uint32_t lwz_12_3 = 0x81830000; static const uint32_t lwz_12_12 = 0x818c0000; static const uint32_t lwzu_0_12 = 0x840c0000; static const uint32_t mflr_0 = 0x7c0802a6; static const uint32_t mflr_11 = 0x7d6802a6; static const uint32_t mflr_12 = 0x7d8802a6; +static const uint32_t mr_0_3 = 0x7c601b78; +static const uint32_t mr_3_0 = 0x7c030378; static const uint32_t mtctr_0 = 0x7c0903a6; static const uint32_t mtctr_11 = 0x7d6903a6; static const uint32_t mtctr_12 = 0x7d8903a6; static const uint32_t mtlr_0 = 0x7c0803a6; +static const uint32_t mtlr_11 = 0x7d6803a6; static const uint32_t mtlr_12 = 0x7d8803a6; static const uint32_t nop = 0x60000000; static const uint32_t ori_0_0_0 = 0x60000000; @@ -3740,6 +3830,7 @@ static const uint32_t srdi_0_0_2 = 0x7800f082; static const uint32_t std_0_1 = 0xf8010000; static const uint32_t std_0_12 = 0xf80c0000; static const uint32_t std_2_1 = 0xf8410000; +static const uint32_t std_11_1 = 0xf9610000; static const uint32_t stfd_0_1 = 0xd8010000; static const uint32_t stvx_0_12_0 = 0x7c0c01ce; static const uint32_t sub_11_11_12 = 0x7d6c5850; @@ -4102,7 +4193,8 @@ class Stub_table : public Output_relaxed_input_section orig_data_size_(owner->current_data_size()), plt_size_(0), last_plt_size_(0), branch_size_(0), last_branch_size_(0), min_size_threshold_(0), - eh_frame_added_(false), need_save_res_(false), uniq_(id) + need_save_res_(false), uniq_(id), tls_get_addr_opt_bctrl_(-1u), + plt_fde_len_(0) { this->set_output_section(output_section); @@ -4263,48 +4355,17 @@ class Stub_table : public Output_relaxed_input_section return false; } - // Add .eh_frame info for this stub section. Unlike other linker - // generated .eh_frame this is added late in the link, because we - // only want the .eh_frame info if this particular stub section is - // non-empty. + // Generate a suitable FDE to describe code in this stub group. void - add_eh_frame(Layout* layout) - { - if (!parameters->options().ld_generated_unwind_info()) - return; + init_plt_fde(); - // Since we add stub .eh_frame info late, it must be placed - // after all other linker generated .eh_frame info so that - // merge mapping need not be updated for input sections. - // There is no provision to use a different CIE to that used - // by .glink. - if (!this->targ_->has_glink()) - return; - - if (this->plt_size_ + this->branch_size_ + this->need_save_res_ == 0) - return; - - layout->add_eh_frame_for_plt(this, - Eh_cie<size>::eh_frame_cie, - sizeof (Eh_cie<size>::eh_frame_cie), - default_fde, - sizeof (default_fde)); - this->eh_frame_added_ = true; - } + // Add .eh_frame info for this stub section. + void + add_eh_frame(Layout* layout); + // Remove .eh_frame info for this stub section. void - remove_eh_frame(Layout* layout) - { - if (this->eh_frame_added_) - { - layout->remove_eh_frame_for_plt(this, - Eh_cie<size>::eh_frame_cie, - sizeof (Eh_cie<size>::eh_frame_cie), - default_fde, - sizeof (default_fde)); - this->eh_frame_added_ = false; - } - } + remove_eh_frame(Layout* layout); Target_powerpc<size, big_endian>* targ() const @@ -4356,7 +4417,12 @@ class Stub_table : public Output_relaxed_input_section plt_call_size(typename Plt_stub_entries::const_iterator p) const { if (size == 32) - return 16; + { + const Symbol* gsym = p->first.sym_; + if (this->targ_->is_tls_get_addr_opt(gsym)) + return 12 * 4; + return 4 * 4; + } bool is_iplt; Address plt_addr = this->plt_off(p, &is_iplt); @@ -4370,6 +4436,9 @@ class Stub_table : public Output_relaxed_input_section got_addr += ppcobj->toc_base_offset(); Address off = plt_addr - got_addr; unsigned int bytes = 4 * 4 + 4 * (ha(off) != 0); + const Symbol* gsym = p->first.sym_; + if (this->targ_->is_tls_get_addr_opt(gsym)) + bytes += 13 * 4; if (this->targ_->abiversion() < 2) { bool static_chain = parameters->options().plt_static_chain(); @@ -4379,6 +4448,12 @@ class Stub_table : public Output_relaxed_input_section + 8 * thread_safe + 4 * (ha(off + 8 + 8 * static_chain) != ha(off))); } + return bytes; + } + + unsigned int + plt_call_align(unsigned int bytes) const + { unsigned int align = 1 << parameters->options().plt_align(); if (align > 1) bytes = (bytes + align - 1) & -align; @@ -4518,13 +4593,16 @@ class Stub_table : public Output_relaxed_input_section // a stub table, it is zero for the first few iterations, then // increases monotonically. Address min_size_threshold_; - // Whether .eh_frame info has been created for this stub section. - bool eh_frame_added_; // Set if this stub group needs a copy of out-of-line register // save/restore functions. bool need_save_res_; // Per stub table unique identifier. uint32_t uniq_; + // The bctrl in the __tls_get_addr_opt stub, if present. + unsigned int tls_get_addr_opt_bctrl_; + // FDE unwind info for this stub group. + unsigned int plt_fde_len_; + unsigned char plt_fde_[20]; }; // Add a plt call stub, if we do not already have one for this @@ -4553,6 +4631,12 @@ Stub_table<size, big_endian>::add_plt_call_entry( p.first->second.localentry0_ = 1; this->targ_->set_has_localentry0(); } + if (this->targ_->is_tls_get_addr_opt(gsym)) + { + this->targ_->set_has_tls_get_addr_opt(); + this->tls_get_addr_opt_bctrl_ = this->plt_size_ - 5 * 4; + } + this->plt_size_ = this->plt_call_align(this->plt_size_); } if (size == 64 && !tocsave @@ -4578,6 +4662,7 @@ Stub_table<size, big_endian>::add_plt_call_entry( if (p.second) { this->plt_size_ = ent.off_ + this->plt_call_size(p.first); + this->plt_size_ = this->plt_call_align(this->plt_size_); if (size == 64 && this->targ_->is_elfv2_localentry0(object, locsym_index)) { @@ -4697,6 +4782,94 @@ Stub_table<size, big_endian>::find_long_branch_entry( return p->second; } +// Generate a suitable FDE to describe code in this stub group. +// The __tls_get_addr_opt call stub needs to describe where it saves +// LR, to support exceptions that might be thrown from __tls_get_addr. + +template<int size, bool big_endian> +void +Stub_table<size, big_endian>::init_plt_fde() +{ + unsigned char* p = this->plt_fde_; + // offset pcrel sdata4, size udata4, and augmentation size byte. + memset (p, 0, 9); + p += 9; + if (this->tls_get_addr_opt_bctrl_ != -1u) + { + unsigned int to_bctrl = this->tls_get_addr_opt_bctrl_ / 4; + if (to_bctrl < 64) + *p++ = elfcpp::DW_CFA_advance_loc + to_bctrl; + else if (to_bctrl < 256) + { + *p++ = elfcpp::DW_CFA_advance_loc1; + *p++ = to_bctrl; + } + else if (to_bctrl < 65536) + { + *p++ = elfcpp::DW_CFA_advance_loc2; + elfcpp::Swap<16, big_endian>::writeval(p, to_bctrl); + p += 2; + } + else + { + *p++ = elfcpp::DW_CFA_advance_loc4; + elfcpp::Swap<32, big_endian>::writeval(p, to_bctrl); + p += 4; + } + *p++ = elfcpp::DW_CFA_offset_extended_sf; + *p++ = 65; + *p++ = -(this->targ_->stk_linker() / 8) & 0x7f; + *p++ = elfcpp::DW_CFA_advance_loc + 4; + *p++ = elfcpp::DW_CFA_restore_extended; + *p++ = 65; + } + this->plt_fde_len_ = p - this->plt_fde_; +} + +// Add .eh_frame info for this stub section. Unlike other linker +// generated .eh_frame this is added late in the link, because we +// only want the .eh_frame info if this particular stub section is +// non-empty. + +template<int size, bool big_endian> +void +Stub_table<size, big_endian>::add_eh_frame(Layout* layout) +{ + if (!parameters->options().ld_generated_unwind_info()) + return; + + // Since we add stub .eh_frame info late, it must be placed + // after all other linker generated .eh_frame info so that + // merge mapping need not be updated for input sections. + // There is no provision to use a different CIE to that used + // by .glink. + if (!this->targ_->has_glink()) + return; + + if (this->plt_size_ + this->branch_size_ + this->need_save_res_ == 0) + return; + + this->init_plt_fde(); + layout->add_eh_frame_for_plt(this, + Eh_cie<size>::eh_frame_cie, + sizeof (Eh_cie<size>::eh_frame_cie), + this->plt_fde_, this->plt_fde_len_); +} + +template<int size, bool big_endian> +void +Stub_table<size, big_endian>::remove_eh_frame(Layout* layout) +{ + if (this->plt_fde_len_ != 0) + { + layout->remove_eh_frame_for_plt(this, + Eh_cie<size>::eh_frame_cie, + sizeof (Eh_cie<size>::eh_frame_cie), + this->plt_fde_, this->plt_fde_len_); + this->plt_fde_len_ = 0; + } +} + // A class to handle .glink. template<int size, bool big_endian> @@ -4896,13 +5069,15 @@ Stub_table<size, big_endian>::define_stub_syms(Symbol_table* symtab) sprintf(localname, "%x", cs->first.locsym_); symname = localname; } + else if (this->targ_->is_tls_get_addr_opt(cs->first.sym_)) + symname = this->targ_->tls_get_addr_opt()->name(); else symname = cs->first.sym_->name(); char* name = new char[8 + 10 + strlen(obj) + strlen(symname) + strlen(add) + 1]; sprintf(name, "%08x.plt_call.%s%s%s", this->uniq_, obj, symname, add); Address value = this->stub_address() - this->address() + cs->second.off_; - unsigned int stub_size = this->plt_call_size(cs); + unsigned int stub_size = this->plt_call_align(this->plt_call_size(cs)); this->targ_->define_local(symtab, name, this, value, stub_size); } } @@ -5011,6 +5186,33 @@ Stub_table<size, big_endian>::do_write(Output_file* of) } p = oview + cs->second.off_; + const Symbol* gsym = cs->first.sym_; + if (this->targ_->is_tls_get_addr_opt(gsym)) + { + write_insn<big_endian>(p, ld_11_3 + 0); + p += 4; + write_insn<big_endian>(p, ld_12_3 + 8); + p += 4; + write_insn<big_endian>(p, mr_0_3); + p += 4; + write_insn<big_endian>(p, cmpdi_11_0); + p += 4; + write_insn<big_endian>(p, add_3_12_13); + p += 4; + write_insn<big_endian>(p, beqlr); + p += 4; + write_insn<big_endian>(p, mr_3_0); + p += 4; + if (!cs->second.localentry0_) + { + write_insn<big_endian>(p, mflr_11); + p += 4; + write_insn<big_endian>(p, (std_11_1 + + this->targ_->stk_linker())); + p += 4; + } + use_fake_dep = thread_safe; + } if (ha(off) != 0) { if (cs->second.r2save_) @@ -5097,7 +5299,20 @@ Stub_table<size, big_endian>::do_write(Output_file* of) p += 4; } } - if (thread_safe && !use_fake_dep) + if (!cs->second.localentry0_ + && this->targ_->is_tls_get_addr_opt(gsym)) + { + write_insn<big_endian>(p, bctrl); + p += 4; + write_insn<big_endian>(p, ld_2_1 + this->targ_->stk_toc()); + p += 4; + write_insn<big_endian>(p, ld_11_1 + this->targ_->stk_linker()); + p += 4; + write_insn<big_endian>(p, mtlr_11); + p += 4; + write_insn<big_endian>(p, blr); + } + else if (thread_safe && !use_fake_dep) { write_insn<big_endian>(p, cmpldi_2_0); p += 4; @@ -5173,6 +5388,26 @@ Stub_table<size, big_endian>::do_write(Output_file* of) plt_addr += plt_base; p = oview + cs->second.off_; + const Symbol* gsym = cs->first.sym_; + if (this->targ_->is_tls_get_addr_opt(gsym)) + { + write_insn<big_endian>(p, lwz_11_3 + 0); + p += 4; + write_insn<big_endian>(p, lwz_12_3 + 4); + p += 4; + write_insn<big_endian>(p, mr_0_3); + p += 4; + write_insn<big_endian>(p, cmpwi_11_0); + p += 4; + write_insn<big_endian>(p, add_3_12_2); + p += 4; + write_insn<big_endian>(p, beqlr); + p += 4; + write_insn<big_endian>(p, mr_3_0); + p += 4; + write_insn<big_endian>(p, nop); + p += 4; + } if (parameters->options().output_is_position_independent()) { Address got_addr; @@ -6225,7 +6460,7 @@ Target_powerpc<size, big_endian>::Scan::local( const elfcpp::Sym<size, big_endian>& lsym, bool is_discarded) { - this->maybe_skip_tls_get_addr_call(r_type, NULL); + this->maybe_skip_tls_get_addr_call(target, r_type, NULL); if ((size == 64 && r_type == elfcpp::R_PPC64_TLSGD) || (size == 32 && r_type == elfcpp::R_PPC_TLSGD)) @@ -6772,9 +7007,15 @@ Target_powerpc<size, big_endian>::Scan::global( unsigned int r_type, Symbol* gsym) { - if (this->maybe_skip_tls_get_addr_call(r_type, gsym) == Track_tls::SKIP) + if (this->maybe_skip_tls_get_addr_call(target, r_type, gsym) + == Track_tls::SKIP) return; + if (target->replace_tls_get_addr(gsym)) + // Change a __tls_get_addr reference to __tls_get_addr_opt + // so dynamic relocs are emitted against the latter symbol. + gsym = target->tls_get_addr_opt(); + if ((size == 64 && r_type == elfcpp::R_PPC64_TLSGD) || (size == 32 && r_type == elfcpp::R_PPC_TLSGD)) { @@ -7904,6 +8145,8 @@ Target_powerpc<size, big_endian>::do_finalize_sections( odyn->add_section_plus_offset(elfcpp::DT_PPC_GOT, this->got_, this->got_->g_o_t()); } + if (this->has_tls_get_addr_opt_) + odyn->add_constant(elfcpp::DT_PPC_OPT, elfcpp::PPC_OPT_TLS); } else { @@ -7915,9 +8158,12 @@ Target_powerpc<size, big_endian>::do_finalize_sections( (this->glink_->pltresolve_size - 32)); } - if (this->has_localentry0_) + if (this->has_localentry0_ || this->has_tls_get_addr_opt_) odyn->add_constant(elfcpp::DT_PPC64_OPT, - elfcpp::PPC64_OPT_LOCALENTRY); + ((this->has_localentry0_ + ? elfcpp::PPC64_OPT_LOCALENTRY : 0) + | (this->has_tls_get_addr_opt_ + ? elfcpp::PPC64_OPT_TLS : 0))); } } @@ -8035,9 +8281,12 @@ Target_powerpc<size, big_endian>::Relocate::relocate( if (view == NULL) return true; + if (target->replace_tls_get_addr(gsym)) + gsym = static_cast<const Sized_symbol<size>*>(target->tls_get_addr_opt()); + const elfcpp::Rela<size, big_endian> rela(preloc); unsigned int r_type = elfcpp::elf_r_type<size>(rela.get_r_info()); - switch (this->maybe_skip_tls_get_addr_call(r_type, gsym)) + switch (this->maybe_skip_tls_get_addr_call(target, r_type, gsym)) { case Track_tls::NOT_EXPECTED: gold_error_at_location(relinfo, relnum, rela.get_r_offset(), @@ -8162,8 +8411,8 @@ Target_powerpc<size, big_endian>::Relocate::relocate( { typedef typename elfcpp::Swap<32, big_endian>::Valtype Valtype; Valtype* wv = reinterpret_cast<Valtype*>(view); - bool can_plt_call = localentry0; - if (!localentry0 && rela.get_r_offset() + 8 <= view_size) + bool can_plt_call = localentry0 || target->is_tls_get_addr_opt(gsym); + if (!can_plt_call && rela.get_r_offset() + 8 <= view_size) { Valtype insn = elfcpp::Swap<32, big_endian>::readval(wv); Valtype insn2 = elfcpp::Swap<32, big_endian>::readval(wv + 1); |