aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
-rw-r--r--gold/ChangeLog6
-rw-r--r--gold/sparc.cc152
2 files changed, 158 insertions, 0 deletions
diff --git a/gold/ChangeLog b/gold/ChangeLog
index ea98788..9916e58 100644
--- a/gold/ChangeLog
+++ b/gold/ChangeLog
@@ -1,3 +1,9 @@
+2012-04-24 David S. Miller <davem@davemloft.net>
+
+ * sparc.cc (Target_sparc::Relocate::relax_call): New function.
+ (Target_sparc::Relocate::relocate): Call it for R_SPARC_WDISP30
+ and R_SPARC_WPLT30.
+
2012-04-24 Cary Coutant <ccoutant@google.com>
* incremental-dump.cc (find_input_containing_global): Replace
diff --git a/gold/sparc.cc b/gold/sparc.cc
index eb238f9..a85bb81 100644
--- a/gold/sparc.cc
+++ b/gold/sparc.cc
@@ -333,6 +333,12 @@ class Target_sparc : public Sized_target<size, big_endian>
typename elfcpp::Elf_types<size>::Elf_Addr,
section_size_type);
+ inline void
+ relax_call(Target_sparc<size, big_endian>* target,
+ unsigned char* view,
+ const elfcpp::Rela<size, big_endian>& rela,
+ section_size_type view_size);
+
// Ignore the next relocation which should be R_SPARC_TLS_GD_ADD
bool ignore_gd_add_;
@@ -3304,6 +3310,8 @@ Target_sparc<size, big_endian>::Relocate::relocate(
case elfcpp::R_SPARC_WDISP30:
case elfcpp::R_SPARC_WPLT30:
Reloc::wdisp30(view, object, psymval, addend, address);
+ if (target->may_relax())
+ relax_call(target, view, rela, view_size);
break;
case elfcpp::R_SPARC_WDISP22:
@@ -3954,6 +3962,150 @@ Target_sparc<size, big_endian>::Relocate::relocate_tls(
}
}
+// Relax a call instruction.
+
+template<int size, bool big_endian>
+inline void
+Target_sparc<size, big_endian>::Relocate::relax_call(
+ Target_sparc<size, big_endian>* target,
+ unsigned char* view,
+ const elfcpp::Rela<size, big_endian>& rela,
+ section_size_type view_size)
+{
+ typedef typename elfcpp::Swap<32, true>::Valtype Insntype;
+ Insntype *wv = reinterpret_cast<Insntype*>(view);
+ Insntype call_insn, delay_insn, set_insn;
+ uint32_t op3, reg, off;
+
+ // This code tries to relax call instructions that meet
+ // certain criteria.
+ //
+ // The first criteria is that the call must be such that the return
+ // address which the call writes into %o7 is unused. Two sequences
+ // meet this criteria, and are used to implement tail calls.
+ //
+ // Leaf function tail call:
+ //
+ // or %o7, %g0, %ANY_REG
+ // call FUNC
+ // or %ANY_REG, %g0, %o7
+ //
+ // Non-leaf function tail call:
+ //
+ // call FUNC
+ // restore
+ //
+ // The second criteria is that the call destination is close. If
+ // the displacement can fit in a signed 22-bit immediate field of a
+ // pre-V9 branch, we can do it. If we are generating a 64-bit
+ // object or a 32-bit object with ELF machine type EF_SPARC32PLUS,
+ // and the displacement fits in a signed 19-bit immediate field,
+ // then we can use a V9 branch.
+
+ // Make sure the delay instruction can be safely accessed.
+ if (rela.get_r_offset() + 8 > view_size)
+ return;
+
+ call_insn = elfcpp::Swap<32, true>::readval(wv);
+ delay_insn = elfcpp::Swap<32, true>::readval(wv + 1);
+
+ // Make sure it is really a call instruction.
+ if (((call_insn >> 30) & 0x3) != 1)
+ return;
+
+ if (((delay_insn >> 30) & 0x3) != 2)
+ return;
+
+ // Accept only a restore or an integer arithmetic operation whose
+ // sole side effect is to write the %o7 register (and perhaps set
+ // the condition codes, which are considered clobbered across
+ // function calls).
+ //
+ // For example, we don't want to match a tagged addition or
+ // subtraction. We also don't want to match something like a
+ // divide.
+ //
+ // Specifically we accept add{,cc}, and{,cc}, or{,cc},
+ // xor{,cc}, sub{,cc}, andn{,cc}, orn{,cc}, and xnor{,cc}.
+
+ op3 = (delay_insn >> 19) & 0x3f;
+ reg = (delay_insn >> 25) & 0x1f;
+ if (op3 != 0x3d
+ && ((op3 & 0x28) != 0 || reg != 15))
+ return;
+
+ // For non-restore instructions, make sure %o7 isn't
+ // an input.
+ if (op3 != 0x3d)
+ {
+ // First check RS1
+ reg = (delay_insn >> 14) & 0x15;
+ if (reg == 15)
+ return;
+
+ // And if non-immediate, check RS2
+ if (((delay_insn >> 13) & 1) == 0)
+ {
+ reg = (delay_insn & 0x1f);
+ if (reg == 15)
+ return;
+ }
+ }
+
+ // Now check the branch distance. We are called after the
+ // call has been relocated, so we just have to peek at the
+ // offset contained in the instruction.
+ off = call_insn & 0x3fffffff;
+ if ((off & 0x3fe00000) != 0
+ && (off & 0x3fe00000) != 0x3fe00000)
+ return;
+
+ if ((size == 64 || target->elf_machine_ == elfcpp::EM_SPARC32PLUS)
+ && ((off & 0x3c0000) == 0
+ || (off & 0x3c0000) == 0x3c0000))
+ {
+ // ba,pt %xcc, FUNC
+ call_insn = 0x10680000 | (off & 0x07ffff);
+ }
+ else
+ {
+ // ba FUNC
+ call_insn = 0x10800000 | (off & 0x3fffff);
+ }
+ elfcpp::Swap<32, true>::writeval(wv, call_insn);
+
+ // See if we can NOP out the delay slot instruction. We peek
+ // at the instruction before the call to make sure we're dealing
+ // with exactly the:
+ //
+ // or %o7, %g0, %ANY_REG
+ // call
+ // or %ANY_REG, %g0, %o7
+ //
+ // case. Otherwise this might be a tricky piece of hand written
+ // assembler calculating %o7 in some non-trivial way, and therefore
+ // we can't be sure that NOP'ing out the delay slot is safe.
+ if (op3 == 0x02
+ && rela.get_r_offset() >= 4)
+ {
+ if ((delay_insn & ~(0x1f << 14)) != 0x9e100000)
+ return;
+
+ set_insn = elfcpp::Swap<32, true>::readval(wv - 1);
+ if ((set_insn & ~(0x1f << 25)) != 0x8013c000)
+ return;
+
+ reg = (set_insn >> 25) & 0x1f;
+ if (reg == 0 || reg == 15)
+ return;
+ if (reg != ((delay_insn >> 14) & 0x1f))
+ return;
+
+ // All tests pass, nop it out.
+ elfcpp::Swap<32, true>::writeval(wv + 1, sparc_nop);
+ }
+}
+
// Relocate section data.
template<int size, bool big_endian>