diff options
author | H.J. Lu <hjl.tools@gmail.com> | 2016-06-29 08:38:43 -0700 |
---|---|---|
committer | H.J. Lu <hjl.tools@gmail.com> | 2016-06-29 08:38:58 -0700 |
commit | b287eca391bbb10e709fb8ba9a56be166ab2ff1d (patch) | |
tree | 5a26635b87621c0df49ebaac6ccdcc456f92e974 /gold/i386.cc | |
parent | ad961eab9a010e79d17a4ea7e6bb977fe6dd86c2 (diff) | |
download | binutils-b287eca391bbb10e709fb8ba9a56be166ab2ff1d.zip binutils-b287eca391bbb10e709fb8ba9a56be166ab2ff1d.tar.gz binutils-b287eca391bbb10e709fb8ba9a56be166ab2ff1d.tar.bz2 |
gold: Support 386 TLS code sequences without PLT
There are extensions to 386 psABI:
https://groups.google.com/forum/#!topic/ia32-abi/awsRSvJOJfs
to call tls_get_addr via GOT:
call *___tls_get_addr@GOT(%reg)
where EBX register isn't required as GOT base.
Since direct call is 4-byte long and indirect call, is 5-byte long, the
extra one byte must be handled properly.
For general dynamic model, 7-byte lea instruction before call
instruction is replaced by 6-byte one to make room for indirect call.
For local dynamic model, we simply use 5-byte indirect call.
TLS linker optimization is updated to recognize new instruction
patterns. For local dynamic model to local exec model transition,
we generate a 6-byte lea instruction as nop, instead of a 1-byte nop
plus a 4-byte lea instruction.
PR gold/20308
* i386.cc (Target_i386::Relocate::relocate): Allow
R_386_GOT32X relocation against ___tls_get_addr.
(Target_i386::Relocate::tls_gd_to_ie): Support indirect
call to __tls_get_addr.
(Target_i386::Relocate::tls_gd_to_le): Likewise.
(Target_i386::Relocate::tls_ld_to_le): Likewise.
* testsuite/Makefile.am (check_PROGRAMS): Add pr20308a_test,
pr20308b_test, pr20308c_test, pr20308d_test, pr20308e_test.
(pr20308a_test_SOURCES): New.
(pr20308a_test_DEPENDENCIES): Likewise.
(pr20308a_test_CFLAGS): Likewise.
(pr20308a_test_LDFLAGS): Likewise.
(pr20308a_test_LDADD): Likewise.
(pr20308b_test_SOURCES): Likewise.
(pr20308b_test_DEPENDENCIES): Likewise.
(pr20308b_test_CFLAGS): Likewise.
(pr20308b_test_LDFLAGS): Likewise.
(pr20308b_test_LDADD): Likewise.
(pr20308c_test_SOURCES): Likewise.
(pr20308c_test_DEPENDENCIES): Likewise.
(pr20308c_test_CFLAGS): Likewise.
(pr20308c_test_LDFLAGS): Likewise.
(pr20308c_test_LDADD): Likewise.
(pr20308d_test_SOURCES): Likewise.
(pr20308d_test_DEPENDENCIES): Likewise.
(pr20308d_test_CFLAGS): Likewise.
(pr20308d_test_LDFLAGS): Likewise.
(pr20308d_test_LDADD): Likewise.
(pr20308e_test_SOURCES): Likewise.
(pr20308e_test_DEPENDENCIES): Likewise.
(pr20308e_test_CFLAGS): Likewise.
(pr20308e_test_LDFLAGS): Likewise.
(pr20308e_test_LDADD): Likewise.
(pr20308a.so): Likewise.
(pr20308b.so): Likewise.
(pr20308_gd.o): Likewise.
(pr20308_ld.o): Likewise.
(MOSTLYCLEANFILES): Add pr20308a.so pr20308b.so.
* testsuite/Makefile.in: Regenerated.
* testsuite/pr20308_def.c: New file.
* testsuite/pr20308_gd.S: Likewise.
* testsuite/pr20308_ld.S: Likewise.
* testsuite/pr20308_main.c: Likewise.
Diffstat (limited to 'gold/i386.cc')
-rw-r--r-- | gold/i386.cc | 72 |
1 files changed, 54 insertions, 18 deletions
diff --git a/gold/i386.cc b/gold/i386.cc index 87e6a6d..28864cd 100644 --- a/gold/i386.cc +++ b/gold/i386.cc @@ -2790,6 +2790,7 @@ Target_i386::Relocate::relocate(const Relocate_info<32, false>* relinfo, if (this->skip_call_tls_get_addr_) { if ((r_type != elfcpp::R_386_PLT32 + && r_type != elfcpp::R_386_GOT32X && r_type != elfcpp::R_386_PC32) || gsym == NULL || strcmp(gsym->name(), "___tls_get_addr") != 0) @@ -3318,9 +3319,11 @@ Target_i386::Relocate::tls_gd_to_le(const Relocate_info<32, false>* relinfo, unsigned char* view, section_size_type view_size) { - // leal foo(,%reg,1),%eax; call ___tls_get_addr + // leal foo(,%ebx,1),%eax; call ___tls_get_addr@PLT // ==> movl %gs:0,%eax; subl $foo@tpoff,%eax - // leal foo(%reg),%eax; call ___tls_get_addr + // leal foo(%ebx),%eax; call ___tls_get_addr@PLT + // ==> movl %gs:0,%eax; subl $foo@tpoff,%eax + // leal foo(%reg),%eax; call *___tls_get_addr@GOT(%reg) // ==> movl %gs:0,%eax; subl $foo@tpoff,%eax tls::check_range(relinfo, relnum, rel.get_r_offset(), view_size, -2); @@ -3328,10 +3331,12 @@ Target_i386::Relocate::tls_gd_to_le(const Relocate_info<32, false>* relinfo, unsigned char op1 = view[-1]; unsigned char op2 = view[-2]; + unsigned char op3 = view[4]; tls::check_tls(relinfo, relnum, rel.get_r_offset(), op2 == 0x8d || op2 == 0x04); - tls::check_tls(relinfo, relnum, rel.get_r_offset(), view[4] == 0xe8); + tls::check_tls(relinfo, relnum, rel.get_r_offset(), + op3 == 0xe8 || op3 == 0xff); int roff = 5; @@ -3345,12 +3350,18 @@ Target_i386::Relocate::tls_gd_to_le(const Relocate_info<32, false>* relinfo, } else { + unsigned char reg = op1 & 7; tls::check_tls(relinfo, relnum, rel.get_r_offset(), - (op1 & 0xf8) == 0x80 && (op1 & 7) != 4); - if (rel.get_r_offset() + 9 < view_size - && view[9] == 0x90) + ((op1 & 0xf8) == 0x80 + && reg != 4 + && reg != 0 + && (op3 == 0xe8 || (view[5] & 0x7) == reg))); + if (op3 == 0xff + || (rel.get_r_offset() + 9 < view_size + && view[9] == 0x90)) { - // There is a trailing nop. Use the size byte subl. + // There is an indirect call or a trailing nop. Use the size + // byte subl. memcpy(view - 2, "\x65\xa1\0\0\0\0\x81\xe8\0\0\0", 12); roff = 6; } @@ -3381,20 +3392,24 @@ Target_i386::Relocate::tls_gd_to_ie(const Relocate_info<32, false>* relinfo, unsigned char* view, section_size_type view_size) { - // leal foo(,%ebx,1),%eax; call ___tls_get_addr + // leal foo(,%ebx,1),%eax; call ___tls_get_addr@PLT // ==> movl %gs:0,%eax; addl foo@gotntpoff(%ebx),%eax - // leal foo(%ebx),%eax; call ___tls_get_addr; nop + // leal foo(%ebx),%eax; call ___tls_get_addr@PLT; nop // ==> movl %gs:0,%eax; addl foo@gotntpoff(%ebx),%eax + // leal foo(%reg),%eax; call *___tls_get_addr@GOT(%reg) + // ==> movl %gs:0,%eax; addl foo@gotntpoff(%reg),%eax tls::check_range(relinfo, relnum, rel.get_r_offset(), view_size, -2); tls::check_range(relinfo, relnum, rel.get_r_offset(), view_size, 9); unsigned char op1 = view[-1]; unsigned char op2 = view[-2]; + unsigned char op3 = view[4]; tls::check_tls(relinfo, relnum, rel.get_r_offset(), op2 == 0x8d || op2 == 0x04); - tls::check_tls(relinfo, relnum, rel.get_r_offset(), view[4] == 0xe8); + tls::check_tls(relinfo, relnum, rel.get_r_offset(), + op3 == 0xe8 || op3 == 0xff); int roff; @@ -3408,10 +3423,14 @@ Target_i386::Relocate::tls_gd_to_ie(const Relocate_info<32, false>* relinfo, } else { + unsigned char reg = op1 & 7; tls::check_range(relinfo, relnum, rel.get_r_offset(), view_size, 10); tls::check_tls(relinfo, relnum, rel.get_r_offset(), - (op1 & 0xf8) == 0x80 && (op1 & 7) != 4); - tls::check_tls(relinfo, relnum, rel.get_r_offset(), view[9] == 0x90); + ((op1 & 0xf8) == 0x80 + && reg != 4 + && reg != 0 + && ((op3 == 0xe8 && view[9] == 0x90) + || (view[5] & 0x7) == reg))); roff = 6; } @@ -3512,19 +3531,36 @@ Target_i386::Relocate::tls_ld_to_le(const Relocate_info<32, false>* relinfo, unsigned char* view, section_size_type view_size) { - // leal foo(%reg), %eax; call ___tls_get_addr + // leal foo(%ebx), %eax; call ___tls_get_addr@PLT // ==> movl %gs:0,%eax; nop; leal 0(%esi,1),%esi + // leal foo(%reg), %eax; call call *___tls_get_addr@GOT(%reg) + // ==> movl %gs:0,%eax; leal (%esi),%esi tls::check_range(relinfo, relnum, rel.get_r_offset(), view_size, -2); - tls::check_range(relinfo, relnum, rel.get_r_offset(), view_size, 9); - // FIXME: Does this test really always pass? + unsigned char op1 = view[-1]; + unsigned char op2 = view[-2]; + unsigned char op3 = view[4]; + tls::check_tls(relinfo, relnum, rel.get_r_offset(), - view[-2] == 0x8d && view[-1] == 0x83); + op3 == 0xe8 || op3 == 0xff); + tls::check_range(relinfo, relnum, rel.get_r_offset(), view_size, + op3 == 0xe8 ? 9 : 10); + + // FIXME: Does this test really always pass? + tls::check_tls(relinfo, relnum, rel.get_r_offset(), op2 == 0x8d); - tls::check_tls(relinfo, relnum, rel.get_r_offset(), view[4] == 0xe8); + unsigned char reg = op1 & 7; + tls::check_tls(relinfo, relnum, rel.get_r_offset(), + ((op1 & 0xf8) == 0x80 + && reg != 4 + && reg != 0 + && (op3 == 0xe8 || (view[5] & 0x7) == reg))); - memcpy(view - 2, "\x65\xa1\0\0\0\0\x90\x8d\x74\x26\0", 11); + if (op3 == 0xe8) + memcpy(view - 2, "\x65\xa1\0\0\0\0\x90\x8d\x74\x26\0", 11); + else + memcpy(view - 2, "\x65\xa1\0\0\0\0\x8d\xb6\0\0\0\0", 12); // The next reloc should be a PLT32 reloc against __tls_get_addr. // We can skip it. |