diff options
author | Alan Modra <amodra@gmail.com> | 2020-08-24 16:32:57 +0930 |
---|---|---|
committer | Alan Modra <amodra@gmail.com> | 2020-08-24 21:15:06 +0930 |
commit | 252dcdf432c67f6baafb766ed068c64db1eb2bad (patch) | |
tree | f065483aba2070e20ef18b2c5cb711b82bb6041c /gold | |
parent | f16c3d4f137ae02fab0982782b2e1c2b2afc8583 (diff) | |
download | gdb-252dcdf432c67f6baafb766ed068c64db1eb2bad.zip gdb-252dcdf432c67f6baafb766ed068c64db1eb2bad.tar.gz gdb-252dcdf432c67f6baafb766ed068c64db1eb2bad.tar.bz2 |
PowerPC TPREL_HA/LO optimisation
ppc64 ld optimises sequences like the following
addis 3,13,wot@tprel@ha
lwz 3,wot@tprel@l(3)
to
nop
lwz 3,wot@tprel(13)
when "wot" is located near enough to the thread pointer.
However, the ABI doesn't require that R_PPC64_TPREL16_HA always be on
an addis rt,13,imm instruction, and while ld checked for that on the
high-part instruction it didn't disable the optimisation on the
low-part instruction. This patch fixes that problem, disabling the
tprel optimisation globally if high-part instructions don't pass
sanity checks. The optimisation is also enabled for ppc32, where
before ld.bfd had the code in the wrong place and ld.gold had it in a
block only enabled for ppc64.
bfd/
* elf32-ppc.c (ppc_elf_check_relocs): Set has_tls_reloc for
high part tprel16 relocs.
(ppc_elf_tls_optimize): Sanity check high part tprel16 relocs.
Clear do_tls_opt on odd instructions.
(ppc_elf_relocate_section): Move TPREL16_HA/LO optimisation later.
Don't sanity check them here.
* elf64-ppc.c (ppc64_elf_check_relocs): Set has_tls_reloc for
high part tprel16 relocs.
(ppc64_elf_tls_optimize): Sanity check high part tprel16 relocs.
Clear do_tls_opt on odd instructions.
(ppc64_elf_relocate_section): Don't sanity check TPREL16_HA.
ld/
* testsuite/ld-powerpc/tls32.d: Update for TPREL_HA/LO optimisation.
* testsuite/ld-powerpc/tlsexe32.d: Likewise.
* testsuite/ld-powerpc/tlsldopt32.d: Likewise.
* testsuite/ld-powerpc/tlsmark32.d: Likewise.
* testsuite/ld-powerpc/tlsopt4_32.d: Likewise.
* testsuite/ld-powerpc/tprel.s,
* testsuite/ld-powerpc/tprel.d,
* testsuite/ld-powerpc/tprel32.d: New tests.
* testsuite/ld-powerpc/tprelbad.s,
* testsuite/ld-powerpc/tprelbad.d: New test.
* testsuite/ld-powerpc/powerpc.exp: Run them.
gold/
* powerpc.cc (Target_powerpc): Add tprel_opt_ and accessors.
(Target_powerpc::Scan::local): Sanity check tprel high relocs.
(Target_powerpc::Scan::global): Likewise.
(Target_powerpc::Relocate::relocate): Control tprel optimisation
with tprel_opt_ and enable for 32-bit.
Diffstat (limited to 'gold')
-rw-r--r-- | gold/ChangeLog | 8 | ||||
-rw-r--r-- | gold/powerpc.cc | 261 |
2 files changed, 181 insertions, 88 deletions
diff --git a/gold/ChangeLog b/gold/ChangeLog index 51b1ddb..176b728 100644 --- a/gold/ChangeLog +++ b/gold/ChangeLog @@ -1,3 +1,11 @@ +2020-08-24 Alan Modra <amodra@gmail.com> + + * powerpc.cc (Target_powerpc): Add tprel_opt_ and accessors. + (Target_powerpc::Scan::local): Sanity check tprel high relocs. + (Target_powerpc::Scan::global): Likewise. + (Target_powerpc::Relocate::relocate): Control tprel optimisation + with tprel_opt_ and enable for 32-bit. + 2020-08-12 Nick Clifton <nickc@redhat.com> * po/sr.po: Updated Serbian translation. diff --git a/gold/powerpc.cc b/gold/powerpc.cc index d573666..e35cbcf 100644 --- a/gold/powerpc.cc +++ b/gold/powerpc.cc @@ -650,6 +650,7 @@ class Target_powerpc : public Sized_target<size, big_endian> power10_stubs_(false), plt_thread_safe_(false), plt_localentry0_(false), plt_localentry0_init_(false), has_localentry0_(false), has_tls_get_addr_opt_(false), + tprel_opt_(parameters->options().tls_optimize()), relax_failed_(false), relax_fail_count_(0), stub_group_size_(0), savres_section_(0), tls_get_addr_(NULL), tls_get_addr_opt_(NULL), @@ -1145,6 +1146,14 @@ class Target_powerpc : public Sized_target<size, big_endian> return false; } + bool + tprel_opt() const + { return this->tprel_opt_; } + + void + set_tprel_opt(bool val) + { this->tprel_opt_ = val; } + // Remember any symbols seen with non-zero localentry, even those // not providing a definition bool @@ -1702,6 +1711,7 @@ class Target_powerpc : public Sized_target<size, big_endian> bool plt_localentry0_init_; bool has_localentry0_; bool has_tls_get_addr_opt_; + bool tprel_opt_; bool relax_failed_; int relax_fail_count_; @@ -8365,10 +8375,6 @@ Target_powerpc<size, big_endian>::Scan::local( switch (r_type) { - case elfcpp::R_POWERPC_TPREL16: - case elfcpp::R_POWERPC_TPREL16_LO: - case elfcpp::R_POWERPC_TPREL16_HI: - case elfcpp::R_POWERPC_TPREL16_HA: case elfcpp::R_PPC64_TPREL16_DS: case elfcpp::R_PPC64_TPREL16_LO_DS: case elfcpp::R_PPC64_TPREL16_HIGH: @@ -8378,6 +8384,13 @@ Target_powerpc<size, big_endian>::Scan::local( case elfcpp::R_PPC64_TPREL16_HIGHEST: case elfcpp::R_PPC64_TPREL16_HIGHESTA: case elfcpp::R_PPC64_TPREL34: + if (size != 64) + break; + // Fall through. + case elfcpp::R_POWERPC_TPREL16: + case elfcpp::R_POWERPC_TPREL16_LO: + case elfcpp::R_POWERPC_TPREL16_HI: + case elfcpp::R_POWERPC_TPREL16_HA: layout->set_has_static_tls(); break; default: @@ -8386,6 +8399,42 @@ Target_powerpc<size, big_endian>::Scan::local( switch (r_type) { + case elfcpp::R_POWERPC_TPREL16_HA: + if (target->tprel_opt()) + { + section_size_type slen; + const unsigned char* view = NULL; + view = ppc_object->section_contents(data_shndx, &slen, false); + section_size_type off + = convert_to_section_size_type(reloc.get_r_offset()) & -4; + if (off < slen) + { + uint32_t insn = elfcpp::Swap<32, big_endian>::readval(view + off); + if ((insn & ((0x3fu << 26) | 0x1f << 16)) + != ((15u << 26) | ((size == 32 ? 2 : 13) << 16))) + target->set_tprel_opt(false); + } + } + break; + + case elfcpp::R_PPC64_TPREL16_HIGH: + case elfcpp::R_PPC64_TPREL16_HIGHA: + case elfcpp::R_PPC64_TPREL16_HIGHER: + case elfcpp::R_PPC64_TPREL16_HIGHERA: + case elfcpp::R_PPC64_TPREL16_HIGHEST: + case elfcpp::R_PPC64_TPREL16_HIGHESTA: + if (size != 64) + break; + // Fall through. + case elfcpp::R_POWERPC_TPREL16_HI: + target->set_tprel_opt(false); + break; + default: + break; + } + + switch (r_type) + { case elfcpp::R_PPC64_D34: case elfcpp::R_PPC64_D34_LO: case elfcpp::R_PPC64_D34_HI30: @@ -9123,10 +9172,6 @@ Target_powerpc<size, big_endian>::Scan::global( switch (r_type) { - case elfcpp::R_POWERPC_TPREL16: - case elfcpp::R_POWERPC_TPREL16_LO: - case elfcpp::R_POWERPC_TPREL16_HI: - case elfcpp::R_POWERPC_TPREL16_HA: case elfcpp::R_PPC64_TPREL16_DS: case elfcpp::R_PPC64_TPREL16_LO_DS: case elfcpp::R_PPC64_TPREL16_HIGH: @@ -9136,6 +9181,13 @@ Target_powerpc<size, big_endian>::Scan::global( case elfcpp::R_PPC64_TPREL16_HIGHEST: case elfcpp::R_PPC64_TPREL16_HIGHESTA: case elfcpp::R_PPC64_TPREL34: + if (size != 64) + break; + // Fall through. + case elfcpp::R_POWERPC_TPREL16: + case elfcpp::R_POWERPC_TPREL16_LO: + case elfcpp::R_POWERPC_TPREL16_HI: + case elfcpp::R_POWERPC_TPREL16_HA: layout->set_has_static_tls(); break; default: @@ -9144,6 +9196,42 @@ Target_powerpc<size, big_endian>::Scan::global( switch (r_type) { + case elfcpp::R_POWERPC_TPREL16_HA: + if (target->tprel_opt()) + { + section_size_type slen; + const unsigned char* view = NULL; + view = ppc_object->section_contents(data_shndx, &slen, false); + section_size_type off + = convert_to_section_size_type(reloc.get_r_offset()) & -4; + if (off < slen) + { + uint32_t insn = elfcpp::Swap<32, big_endian>::readval(view + off); + if ((insn & ((0x3fu << 26) | 0x1f << 16)) + != ((15u << 26) | ((size == 32 ? 2 : 13) << 16))) + target->set_tprel_opt(false); + } + } + break; + + case elfcpp::R_PPC64_TPREL16_HIGH: + case elfcpp::R_PPC64_TPREL16_HIGHA: + case elfcpp::R_PPC64_TPREL16_HIGHER: + case elfcpp::R_PPC64_TPREL16_HIGHERA: + case elfcpp::R_PPC64_TPREL16_HIGHEST: + case elfcpp::R_PPC64_TPREL16_HIGHESTA: + if (size != 64) + break; + // Fall through. + case elfcpp::R_POWERPC_TPREL16_HI: + target->set_tprel_opt(false); + break; + default: + break; + } + + switch (r_type) + { case elfcpp::R_PPC64_D34: case elfcpp::R_PPC64_D34_LO: case elfcpp::R_PPC64_D34_HI30: @@ -11124,10 +11212,9 @@ Target_powerpc<size, big_endian>::Relocate::relocate( break; } - if (size == 64 - && (gsym - ? relative_value_is_known(gsym) - : relative_value_is_known(psymval))) + if (gsym + ? relative_value_is_known(gsym) + : relative_value_is_known(psymval)) { Insn* iview; Insn* iview2; @@ -11152,7 +11239,7 @@ Target_powerpc<size, big_endian>::Relocate::relocate( case elfcpp::R_POWERPC_GOT_DTPREL16_HA: case elfcpp::R_POWERPC_GOT16_HA: case elfcpp::R_PPC64_TOC16_HA: - if (parameters->options().toc_optimize()) + if (size == 64 && parameters->options().toc_optimize()) { iview = reinterpret_cast<Insn*>(view - d_offset); insn = elfcpp::Swap<32, big_endian>::readval(iview); @@ -11184,7 +11271,7 @@ Target_powerpc<size, big_endian>::Relocate::relocate( case elfcpp::R_PPC64_GOT16_LO_DS: case elfcpp::R_PPC64_TOC16_LO: case elfcpp::R_PPC64_TOC16_LO_DS: - if (parameters->options().toc_optimize()) + if (size == 64 && parameters->options().toc_optimize()) { iview = reinterpret_cast<Insn*>(view - d_offset); insn = elfcpp::Swap<32, big_endian>::readval(iview); @@ -11223,7 +11310,7 @@ Target_powerpc<size, big_endian>::Relocate::relocate( break; case elfcpp::R_PPC64_GOT_PCREL34: - if (parameters->options().toc_optimize()) + if (size == 64 && parameters->options().toc_optimize()) { iview = reinterpret_cast<Insn*>(view); pinsn = elfcpp::Swap<32, big_endian>::readval(iview); @@ -11249,63 +11336,57 @@ Target_powerpc<size, big_endian>::Relocate::relocate( break; case elfcpp::R_PPC64_PCREL34: - { - iview = reinterpret_cast<Insn*>(view); - pinsn = elfcpp::Swap<32, big_endian>::readval(iview); - pinsn <<= 32; - pinsn |= elfcpp::Swap<32, big_endian>::readval(iview + 1); - if ((pinsn & ((-1ULL << 50) | (63ULL << 26))) - != ((1ULL << 58) | (2ULL << 56) | (1ULL << 52) - | (14ULL << 26) /* paddi */)) - break; + if (size == 64) + { + iview = reinterpret_cast<Insn*>(view); + pinsn = elfcpp::Swap<32, big_endian>::readval(iview); + pinsn <<= 32; + pinsn |= elfcpp::Swap<32, big_endian>::readval(iview + 1); + if ((pinsn & ((-1ULL << 50) | (63ULL << 26))) + != ((1ULL << 58) | (2ULL << 56) | (1ULL << 52) + | (14ULL << 26) /* paddi */)) + break; - pcrelopt: - const int reloc_size = elfcpp::Elf_sizes<size>::rela_size; - elfcpp::Shdr<size, big_endian> shdr(relinfo->reloc_shdr); - size_t reloc_count = shdr.get_sh_size() / reloc_size; - if (relnum >= reloc_count - 1) - break; + pcrelopt: + const int reloc_size = elfcpp::Elf_sizes<size>::rela_size; + elfcpp::Shdr<size, big_endian> shdr(relinfo->reloc_shdr); + size_t reloc_count = shdr.get_sh_size() / reloc_size; + if (relnum >= reloc_count - 1) + break; - Reltype next_rela(preloc + reloc_size); - if ((elfcpp::elf_r_type<size>(next_rela.get_r_info()) - != elfcpp::R_PPC64_PCREL_OPT) - || next_rela.get_r_offset() != rela.get_r_offset()) - break; + Reltype next_rela(preloc + reloc_size); + if ((elfcpp::elf_r_type<size>(next_rela.get_r_info()) + != elfcpp::R_PPC64_PCREL_OPT) + || next_rela.get_r_offset() != rela.get_r_offset()) + break; - Address off = next_rela.get_r_addend(); - if (off == 0) - off = 8; // zero means next insn. - if (off + rela.get_r_offset() + 4 > view_size) - break; + Address off = next_rela.get_r_addend(); + if (off == 0) + off = 8; // zero means next insn. + if (off + rela.get_r_offset() + 4 > view_size) + break; - iview2 = reinterpret_cast<Insn*>(view + off); - pinsn2 = elfcpp::Swap<32, big_endian>::readval(iview2); - pinsn2 <<= 32; - if ((pinsn2 & (63ULL << 58)) == 1ULL << 58) - break; - if (xlate_pcrel_opt(&pinsn, &pinsn2)) - { - elfcpp::Swap<32, big_endian>::writeval(iview, pinsn >> 32); - elfcpp::Swap<32, big_endian>::writeval(iview + 1, - pinsn & 0xffffffff); - elfcpp::Swap<32, big_endian>::writeval(iview2, pinsn2 >> 32); - } - } + iview2 = reinterpret_cast<Insn*>(view + off); + pinsn2 = elfcpp::Swap<32, big_endian>::readval(iview2); + pinsn2 <<= 32; + if ((pinsn2 & (63ULL << 58)) == 1ULL << 58) + break; + if (xlate_pcrel_opt(&pinsn, &pinsn2)) + { + elfcpp::Swap<32, big_endian>::writeval(iview, pinsn >> 32); + elfcpp::Swap<32, big_endian>::writeval(iview + 1, + pinsn & 0xffffffff); + elfcpp::Swap<32, big_endian>::writeval(iview2, pinsn2 >> 32); + } + } break; case elfcpp::R_POWERPC_TPREL16_HA: - if (parameters->options().tls_optimize() && value + 0x8000 < 0x10000) + if (target->tprel_opt() && value + 0x8000 < 0x10000) { Insn* iview = reinterpret_cast<Insn*>(view - d_offset); - Insn insn = elfcpp::Swap<32, big_endian>::readval(iview); - if ((insn & ((0x3f << 26) | 0x1f << 16)) - != ((15u << 26) | ((size == 32 ? 2 : 13) << 16))) - ; - else - { - elfcpp::Swap<32, big_endian>::writeval(iview, nop); - return true; - } + elfcpp::Swap<32, big_endian>::writeval(iview, nop); + return true; } break; @@ -11315,7 +11396,7 @@ Target_powerpc<size, big_endian>::Relocate::relocate( break; // Fall through. case elfcpp::R_POWERPC_TPREL16_LO: - if (parameters->options().tls_optimize() && value + 0x8000 < 0x10000) + if (target->tprel_opt() && value + 0x8000 < 0x10000) { Insn* iview = reinterpret_cast<Insn*>(view - d_offset); Insn insn = elfcpp::Swap<32, big_endian>::readval(iview); @@ -11326,29 +11407,12 @@ Target_powerpc<size, big_endian>::Relocate::relocate( break; case elfcpp::R_PPC64_ENTRY: - value = (target->got_section()->output_section()->address() - + object->toc_base_offset()); - if (value + 0x80008000 <= 0xffffffff - && !parameters->options().output_is_position_independent()) - { - Insn* iview = reinterpret_cast<Insn*>(view); - Insn insn1 = elfcpp::Swap<32, big_endian>::readval(iview); - Insn insn2 = elfcpp::Swap<32, big_endian>::readval(iview + 1); - - if ((insn1 & ~0xfffc) == ld_2_12 - && insn2 == add_2_2_12) - { - insn1 = lis_2 + ha(value); - elfcpp::Swap<32, big_endian>::writeval(iview, insn1); - insn2 = addi_2_2 + l(value); - elfcpp::Swap<32, big_endian>::writeval(iview + 1, insn2); - return true; - } - } - else + if (size == 64) { - value -= address; - if (value + 0x80008000 <= 0xffffffff) + value = (target->got_section()->output_section()->address() + + object->toc_base_offset()); + if (value + 0x80008000 <= 0xffffffff + && !parameters->options().output_is_position_independent()) { Insn* iview = reinterpret_cast<Insn*>(view); Insn insn1 = elfcpp::Swap<32, big_endian>::readval(iview); @@ -11357,13 +11421,33 @@ Target_powerpc<size, big_endian>::Relocate::relocate( if ((insn1 & ~0xfffc) == ld_2_12 && insn2 == add_2_2_12) { - insn1 = addis_2_12 + ha(value); + insn1 = lis_2 + ha(value); elfcpp::Swap<32, big_endian>::writeval(iview, insn1); insn2 = addi_2_2 + l(value); elfcpp::Swap<32, big_endian>::writeval(iview + 1, insn2); return true; } } + else + { + value -= address; + if (value + 0x80008000 <= 0xffffffff) + { + Insn* iview = reinterpret_cast<Insn*>(view); + Insn insn1 = elfcpp::Swap<32, big_endian>::readval(iview); + Insn insn2 = elfcpp::Swap<32, big_endian>::readval(iview + 1); + + if ((insn1 & ~0xfffc) == ld_2_12 + && insn2 == add_2_2_12) + { + insn1 = addis_2_12 + ha(value); + elfcpp::Swap<32, big_endian>::writeval(iview, insn1); + insn2 = addi_2_2 + l(value); + elfcpp::Swap<32, big_endian>::writeval(iview + 1, insn2); + return true; + } + } + } } break; @@ -11375,7 +11459,8 @@ Target_powerpc<size, big_endian>::Relocate::relocate( // lis 2,.TOC.@ha // addi 2,2,.TOC.@l // if .TOC. is in range. */ - if (value + address - 4 + 0x80008000 <= 0xffffffff + if (size == 64 + && value + address - 4 + 0x80008000 <= 0xffffffff && relnum + 1 > 1 && preloc != NULL && target->abiversion() >= 2 |