aboutsummaryrefslogtreecommitdiff
path: root/gold/x86_64.cc
diff options
context:
space:
mode:
authorH.J. Lu <hjl.tools@gmail.com>2016-06-29 08:37:30 -0700
committerH.J. Lu <hjl.tools@gmail.com>2016-06-29 08:37:42 -0700
commitad961eab9a010e79d17a4ea7e6bb977fe6dd86c2 (patch)
treea4e66fa49c5f2abc3328cceae2ab73cbf470ad0b /gold/x86_64.cc
parent9bf74fb27dc6e2a9679403d66fe919215e3c2a45 (diff)
downloadfsf-binutils-gdb-ad961eab9a010e79d17a4ea7e6bb977fe6dd86c2.zip
fsf-binutils-gdb-ad961eab9a010e79d17a4ea7e6bb977fe6dd86c2.tar.gz
fsf-binutils-gdb-ad961eab9a010e79d17a4ea7e6bb977fe6dd86c2.tar.bz2
gold: Support x86-64 TLS code sequences without PLT
There are extensions to x86-64 psABI: https://groups.google.com/forum/#!topic/x86-64-abi/de5_KnLHxtI to call tls_get_addr via GOT: call *__tls_get_addr@GOTPCREL(%rip) Since direct call is 4-byte long and indirect call, is 5-byte long, the extra one byte must be handled properly. For general dynamic model, one 0x66 prefix before call instruction is removed to make room for indirect call. For local dynamic model, we simply use 5-byte indirect call. TLS linker optimization is updated to recognize new instruction patterns. For local dynamic model to local exec model transition, we generate 4 0x66 prefixes, instead of 3, before mov instruction in 64-bit and generate a 5-byte nop, instead of 4-byte, before mov instruction in 32-bit. PR gold/20216 * configure.ac (DEFAULT_TARGET_X86_64_OR_X32): New AM_CONDITIONAL. * configure: Regenerated. * x86_64.cc (Target_x86_64<size>::Relocate::relocate): Allow R_X86_64_GOTPCRELX relocation against __tls_get_addr. (Target_x86_64<size>::Relocate::tls_gd_to_ie): Support indirect call to __tls_get_addr. (Target_x86_64<size>::Relocate::tls_gd_to_le): Likewise. (Target_x86_64<size>::Relocate::tls_ld_to_le): Likewise. * testsuite/Makefile.am (check_PROGRAMS): Add pr20216a_test, pr20216b_test, pr20216c_test, pr20216d_test, pr20216e_test. (pr20216a_test_SOURCES): New. (pr20216a_test_DEPENDENCIES): Likewise. (pr20216a_test_CFLAGS): Likewise. (pr20216a_test_LDFLAGS): Likewise. (pr20216a_test_LDADD): Likewise. (pr20216b_test_SOURCES): Likewise. (pr20216b_test_DEPENDENCIES): Likewise. (pr20216b_test_CFLAGS): Likewise. (pr20216b_test_LDFLAGS): Likewise. (pr20216b_test_LDADD): Likewise. (pr20216c_test_SOURCES): Likewise. (pr20216c_test_DEPENDENCIES): Likewise. (pr20216c_test_CFLAGS): Likewise. (pr20216c_test_LDFLAGS): Likewise. (pr20216c_test_LDADD): Likewise. (pr20216d_test_SOURCES): Likewise. (pr20216d_test_DEPENDENCIES): Likewise. (pr20216d_test_CFLAGS): Likewise. (pr20216d_test_LDFLAGS): Likewise. (pr20216d_test_LDADD): Likewise. (pr20216e_test_SOURCES): Likewise. (pr20216e_test_DEPENDENCIES): Likewise. (pr20216e_test_CFLAGS): Likewise. (pr20216e_test_LDFLAGS): Likewise. (pr20216e_test_LDADD): Likewise. (pr20216a.so): Likewise. (pr20216b.so): Likewise. (pr20216_gd.o): Likewise. (pr20216_ld.o): Likewise. (MOSTLYCLEANFILES): Add pr20216a.so pr20216b.so. * testsuite/Makefile.in: Regenerated. * testsuite/pr20216_def.c: New file. * testsuite/pr20216_gd.S: Likewise. * testsuite/pr20216_ld.S: Likewise. * testsuite/pr20216_main.c: Likewise.
Diffstat (limited to 'gold/x86_64.cc')
-rw-r--r--gold/x86_64.cc55
1 files changed, 45 insertions, 10 deletions
diff --git a/gold/x86_64.cc b/gold/x86_64.cc
index 6c511e2..6aa489a 100644
--- a/gold/x86_64.cc
+++ b/gold/x86_64.cc
@@ -3505,6 +3505,7 @@ Target_x86_64<size>::Relocate::relocate(
if (this->skip_call_tls_get_addr_)
{
if ((r_type != elfcpp::R_X86_64_PLT32
+ && r_type != elfcpp::R_X86_64_GOTPCRELX
&& r_type != elfcpp::R_X86_64_PLT32_BND
&& r_type != elfcpp::R_X86_64_PC32_BND
&& r_type != elfcpp::R_X86_64_PC32)
@@ -4169,16 +4170,23 @@ Target_x86_64<size>::Relocate::tls_gd_to_ie(
{
// For SIZE == 64:
// .byte 0x66; leaq foo@tlsgd(%rip),%rdi;
- // .word 0x6666; rex64; call __tls_get_addr
+ // .word 0x6666; rex64; call __tls_get_addr@PLT
+ // ==> movq %fs:0,%rax; addq x@gottpoff(%rip),%rax
+ // .byte 0x66; leaq foo@tlsgd(%rip),%rdi;
+ // .word 0x66; rex64; call *__tls_get_addr@GOTPCREL(%rip)
// ==> movq %fs:0,%rax; addq x@gottpoff(%rip),%rax
// For SIZE == 32:
// leaq foo@tlsgd(%rip),%rdi;
- // .word 0x6666; rex64; call __tls_get_addr
+ // .word 0x6666; rex64; call __tls_get_addr@PLT
+ // ==> movl %fs:0,%eax; addq x@gottpoff(%rip),%rax
+ // leaq foo@tlsgd(%rip),%rdi;
+ // .word 0x66; rex64; call *__tls_get_addr@GOTPCREL(%rip)
// ==> movl %fs:0,%eax; addq x@gottpoff(%rip),%rax
tls::check_range(relinfo, relnum, rela.get_r_offset(), view_size, 12);
tls::check_tls(relinfo, relnum, rela.get_r_offset(),
- (memcmp(view + 4, "\x66\x66\x48\xe8", 4) == 0));
+ (memcmp(view + 4, "\x66\x66\x48\xe8", 4) == 0
+ || memcmp(view + 4, "\x66\x48\xff", 3) == 0));
if (size == 64)
{
@@ -4225,16 +4233,23 @@ Target_x86_64<size>::Relocate::tls_gd_to_le(
{
// For SIZE == 64:
// .byte 0x66; leaq foo@tlsgd(%rip),%rdi;
- // .word 0x6666; rex64; call __tls_get_addr
+ // .word 0x6666; rex64; call __tls_get_addr@PLT
+ // ==> movq %fs:0,%rax; leaq x@tpoff(%rax),%rax
+ // .byte 0x66; leaq foo@tlsgd(%rip),%rdi;
+ // .word 0x66; rex64; call *__tls_get_addr@GOTPCREL(%rip)
// ==> movq %fs:0,%rax; leaq x@tpoff(%rax),%rax
// For SIZE == 32:
// leaq foo@tlsgd(%rip),%rdi;
- // .word 0x6666; rex64; call __tls_get_addr
+ // .word 0x6666; rex64; call __tls_get_addr@PLT
+ // ==> movl %fs:0,%eax; leaq x@tpoff(%rax),%rax
+ // leaq foo@tlsgd(%rip),%rdi;
+ // .word 0x66; rex64; call *__tls_get_addr@GOTPCREL(%rip)
// ==> movl %fs:0,%eax; leaq x@tpoff(%rax),%rax
tls::check_range(relinfo, relnum, rela.get_r_offset(), view_size, 12);
tls::check_tls(relinfo, relnum, rela.get_r_offset(),
- (memcmp(view + 4, "\x66\x66\x48\xe8", 4) == 0));
+ (memcmp(view + 4, "\x66\x66\x48\xe8", 4) == 0
+ || memcmp(view + 4, "\x66\x48\xff", 3) == 0));
if (size == 64)
{
@@ -4362,6 +4377,13 @@ Target_x86_64<size>::Relocate::tls_ld_to_le(
// For SIZE == 32:
// ... leq foo@dtpoff(%rax),%reg
// ==> nopl 0x0(%rax); movl %fs:0,%eax ... leaq x@tpoff(%rax),%rdx
+ // leaq foo@tlsld(%rip),%rdi; call *__tls_get_addr@GOTPCREL(%rip)
+ // For SIZE == 64:
+ // ... leq foo@dtpoff(%rax),%reg
+ // ==> .word 0x6666; .byte 0x6666; movq %fs:0,%rax ... leaq x@tpoff(%rax),%rdx
+ // For SIZE == 32:
+ // ... leq foo@dtpoff(%rax),%reg
+ // ==> nopw 0x0(%rax); movl %fs:0,%eax ... leaq x@tpoff(%rax),%rdx
tls::check_range(relinfo, relnum, rela.get_r_offset(), view_size, -3);
tls::check_range(relinfo, relnum, rela.get_r_offset(), view_size, 9);
@@ -4369,12 +4391,25 @@ Target_x86_64<size>::Relocate::tls_ld_to_le(
tls::check_tls(relinfo, relnum, rela.get_r_offset(),
view[-3] == 0x48 && view[-2] == 0x8d && view[-1] == 0x3d);
- tls::check_tls(relinfo, relnum, rela.get_r_offset(), view[4] == 0xe8);
+ tls::check_tls(relinfo, relnum, rela.get_r_offset(),
+ view[4] == 0xe8 || view[4] == 0xff);
- if (size == 64)
- memcpy(view - 3, "\x66\x66\x66\x64\x48\x8b\x04\x25\0\0\0\0", 12);
+ if (view[4] == 0xe8)
+ {
+ if (size == 64)
+ memcpy(view - 3, "\x66\x66\x66\x64\x48\x8b\x04\x25\0\0\0\0", 12);
+ else
+ memcpy(view - 3, "\x0f\x1f\x40\x00\x64\x8b\x04\x25\0\0\0\0", 12);
+ }
else
- memcpy(view - 3, "\x0f\x1f\x40\x00\x64\x8b\x04\x25\0\0\0\0", 12);
+ {
+ if (size == 64)
+ memcpy(view - 3, "\x66\x66\x66\x66\x64\x48\x8b\x04\x25\0\0\0\0",
+ 13);
+ else
+ memcpy(view - 3, "\x66\x0f\x1f\x40\x00\x64\x8b\x04\x25\0\0\0\0",
+ 13);
+ }
// The next reloc should be a PLT32 reloc against __tls_get_addr.
// We can skip it.