From a5a5f7a3361f35dc24a91a33a061ff8516c2595b Mon Sep 17 00:00:00 2001 From: "David S. Miller" Date: Tue, 24 Apr 2012 22:40:23 +0000 Subject: gold: Add linker relaxation of tail calls on sparc. gold/ * sparc.cc (Target_sparc::Relocate::relax_call): New function. (Target_sparc::Relocate::relocate): Call it for R_SPARC_WDISP30 and R_SPARC_WPLT30. --- gold/ChangeLog | 6 +++ gold/sparc.cc | 152 +++++++++++++++++++++++++++++++++++++++++++++++++++++++++ 2 files changed, 158 insertions(+) (limited to 'gold') diff --git a/gold/ChangeLog b/gold/ChangeLog index ea98788..9916e58 100644 --- a/gold/ChangeLog +++ b/gold/ChangeLog @@ -1,3 +1,9 @@ +2012-04-24 David S. Miller + + * sparc.cc (Target_sparc::Relocate::relax_call): New function. + (Target_sparc::Relocate::relocate): Call it for R_SPARC_WDISP30 + and R_SPARC_WPLT30. + 2012-04-24 Cary Coutant * incremental-dump.cc (find_input_containing_global): Replace diff --git a/gold/sparc.cc b/gold/sparc.cc index eb238f9..a85bb81 100644 --- a/gold/sparc.cc +++ b/gold/sparc.cc @@ -333,6 +333,12 @@ class Target_sparc : public Sized_target typename elfcpp::Elf_types::Elf_Addr, section_size_type); + inline void + relax_call(Target_sparc* target, + unsigned char* view, + const elfcpp::Rela& rela, + section_size_type view_size); + // Ignore the next relocation which should be R_SPARC_TLS_GD_ADD bool ignore_gd_add_; @@ -3304,6 +3310,8 @@ Target_sparc::Relocate::relocate( case elfcpp::R_SPARC_WDISP30: case elfcpp::R_SPARC_WPLT30: Reloc::wdisp30(view, object, psymval, addend, address); + if (target->may_relax()) + relax_call(target, view, rela, view_size); break; case elfcpp::R_SPARC_WDISP22: @@ -3954,6 +3962,150 @@ Target_sparc::Relocate::relocate_tls( } } +// Relax a call instruction. + +template +inline void +Target_sparc::Relocate::relax_call( + Target_sparc* target, + unsigned char* view, + const elfcpp::Rela& rela, + section_size_type view_size) +{ + typedef typename elfcpp::Swap<32, true>::Valtype Insntype; + Insntype *wv = reinterpret_cast(view); + Insntype call_insn, delay_insn, set_insn; + uint32_t op3, reg, off; + + // This code tries to relax call instructions that meet + // certain criteria. + // + // The first criteria is that the call must be such that the return + // address which the call writes into %o7 is unused. Two sequences + // meet this criteria, and are used to implement tail calls. + // + // Leaf function tail call: + // + // or %o7, %g0, %ANY_REG + // call FUNC + // or %ANY_REG, %g0, %o7 + // + // Non-leaf function tail call: + // + // call FUNC + // restore + // + // The second criteria is that the call destination is close. If + // the displacement can fit in a signed 22-bit immediate field of a + // pre-V9 branch, we can do it. If we are generating a 64-bit + // object or a 32-bit object with ELF machine type EF_SPARC32PLUS, + // and the displacement fits in a signed 19-bit immediate field, + // then we can use a V9 branch. + + // Make sure the delay instruction can be safely accessed. + if (rela.get_r_offset() + 8 > view_size) + return; + + call_insn = elfcpp::Swap<32, true>::readval(wv); + delay_insn = elfcpp::Swap<32, true>::readval(wv + 1); + + // Make sure it is really a call instruction. + if (((call_insn >> 30) & 0x3) != 1) + return; + + if (((delay_insn >> 30) & 0x3) != 2) + return; + + // Accept only a restore or an integer arithmetic operation whose + // sole side effect is to write the %o7 register (and perhaps set + // the condition codes, which are considered clobbered across + // function calls). + // + // For example, we don't want to match a tagged addition or + // subtraction. We also don't want to match something like a + // divide. + // + // Specifically we accept add{,cc}, and{,cc}, or{,cc}, + // xor{,cc}, sub{,cc}, andn{,cc}, orn{,cc}, and xnor{,cc}. + + op3 = (delay_insn >> 19) & 0x3f; + reg = (delay_insn >> 25) & 0x1f; + if (op3 != 0x3d + && ((op3 & 0x28) != 0 || reg != 15)) + return; + + // For non-restore instructions, make sure %o7 isn't + // an input. + if (op3 != 0x3d) + { + // First check RS1 + reg = (delay_insn >> 14) & 0x15; + if (reg == 15) + return; + + // And if non-immediate, check RS2 + if (((delay_insn >> 13) & 1) == 0) + { + reg = (delay_insn & 0x1f); + if (reg == 15) + return; + } + } + + // Now check the branch distance. We are called after the + // call has been relocated, so we just have to peek at the + // offset contained in the instruction. + off = call_insn & 0x3fffffff; + if ((off & 0x3fe00000) != 0 + && (off & 0x3fe00000) != 0x3fe00000) + return; + + if ((size == 64 || target->elf_machine_ == elfcpp::EM_SPARC32PLUS) + && ((off & 0x3c0000) == 0 + || (off & 0x3c0000) == 0x3c0000)) + { + // ba,pt %xcc, FUNC + call_insn = 0x10680000 | (off & 0x07ffff); + } + else + { + // ba FUNC + call_insn = 0x10800000 | (off & 0x3fffff); + } + elfcpp::Swap<32, true>::writeval(wv, call_insn); + + // See if we can NOP out the delay slot instruction. We peek + // at the instruction before the call to make sure we're dealing + // with exactly the: + // + // or %o7, %g0, %ANY_REG + // call + // or %ANY_REG, %g0, %o7 + // + // case. Otherwise this might be a tricky piece of hand written + // assembler calculating %o7 in some non-trivial way, and therefore + // we can't be sure that NOP'ing out the delay slot is safe. + if (op3 == 0x02 + && rela.get_r_offset() >= 4) + { + if ((delay_insn & ~(0x1f << 14)) != 0x9e100000) + return; + + set_insn = elfcpp::Swap<32, true>::readval(wv - 1); + if ((set_insn & ~(0x1f << 25)) != 0x8013c000) + return; + + reg = (set_insn >> 25) & 0x1f; + if (reg == 0 || reg == 15) + return; + if (reg != ((delay_insn >> 14) & 0x1f)) + return; + + // All tests pass, nop it out. + elfcpp::Swap<32, true>::writeval(wv + 1, sparc_nop); + } +} + // Relocate section data. template -- cgit v1.1