aboutsummaryrefslogtreecommitdiff
path: root/bfd/elf64-x86-64.c
diff options
context:
space:
mode:
authorH.J. Lu <hjl.tools@gmail.com>2016-02-26 09:38:08 -0800
committerH.J. Lu <hjl.tools@gmail.com>2016-02-26 09:39:24 -0800
commitbae420ef26f4331415b0503141c5931318025906 (patch)
tree997f55c531ebd08c7220d5fe523d3df00b709f87 /bfd/elf64-x86-64.c
parentfc5a9bd57cbb974b8fc3aeb9a15d644cd9103451 (diff)
downloadgdb-bae420ef26f4331415b0503141c5931318025906.zip
gdb-bae420ef26f4331415b0503141c5931318025906.tar.gz
gdb-bae420ef26f4331415b0503141c5931318025906.tar.bz2
Optimize x86 GOT32X/GOTPCRELX relocations
R_386_GOT32X, R_X86_64_GOTPCRELX and R_X86_64_REX_GOTPCRELX relocations retrieve the symbol address via its GOT slot. If the symbol address is known at the link-time, we can use it directly by changing instruction encoding. Indirect branch can only be converted to PC relative direct branch. MOV can be changed to LEA or encoded differently with signed address. The subset of binary operations can be encoded only with signed address. If undefined weak symbol is resolved to zero link-time, we can use it as address. Zero addresss can't used with PC relative direct branch when PIC is true since the current PC is unknown. In 64-bit, 32-bit relocation for PC relatiave direct branch to zero may also overflow. If this optimization causes relocation overflow, --no-relax can be used to work around it. bfd/ PR ld/19609 * elf32-i386.c (elf_i386_convert_load): Convert to R_386_32 for load with locally bound symbols if PIC is false or there is no base register. Optimize branch to 0 if PIC is false. (elf_i386_relocate_section): Don't generate dynamic relocations against undefined weak symbols if PIC is false. * elf64-x86-64.c (elf_x86_64_convert_load): Disable optimization if we can't estimate relocation overflow with --no-relax. Convert to R_X86_64_32S/R_X86_64_32 for load with locally bound symbols if PIC is false. Optimize branch to 0 if PIC is false. (elf_x86_64_relocate_section): Don't generate dynamic relocations against undefined weak symbols if PIC is false. ld/ PR ld/19609 * testsuite/ld-i386/got1.dd: Updated. * testsuite/ld-i386/lea1c.d: Likewise. * testsuite/ld-i386/load1-nacl.d: Likewise. * testsuite/ld-i386/load1.d: Likewise. * testsuite/ld-i386/load4b.d: Likewise. * testsuite/ld-i386/load5b.d: Likewise. * testsuite/ld-i386/mov1b.d: Likewise. * testsuite/ld-x86-64/mov1b.d: Likewise. * testsuite/ld-x86-64/mov1d.d: Likewise. * testsuite/ld-ifunc/ifunc-21-i386.d: Likewise. * testsuite/ld-ifunc/ifunc-21-x86-64.d: Likewise. * testsuite/ld-ifunc/ifunc-22-i386.d: Likewise. * testsuite/ld-ifunc/ifunc-22-x86-64.d: Likewise. * testsuite/ld-x86-64/gotpcrel1.dd: Likewise. * testsuite/ld-x86-64/lea1a.d: Likewise. * testsuite/ld-x86-64/lea1b.d: Likewise. * testsuite/ld-x86-64/lea1c.d: Likewise. * testsuite/ld-x86-64/lea1d.d: Likewise. * testsuite/ld-x86-64/lea1e.d: Likewise. * testsuite/ld-x86-64/lea1f.d: Likewise. * testsuite/ld-x86-64/mov1b.d: Likewise. * testsuite/ld-x86-64/mov1d.d: Likewise. * testsuite/ld-x86-64/pr13082-3b.d: Likewise. * testsuite/ld-x86-64/pr13082-4b.d: Likewise. * testsuite/ld-x86-64/lea1.s: Add tests for 32-bit registers. * testsuite/ld-i386/pr19609-1.s: New file. * testsuite/ld-i386/pr19609-1a.d: Likewise. * testsuite/ld-i386/pr19609-1b.d: Likewise. * testsuite/ld-i386/pr19609-1c.d: Likewise. * testsuite/ld-i386/pr19609-1d.d: Likewise. * testsuite/ld-i386/pr19609-1e.d: Likewise. * testsuite/ld-i386/pr19609-1f.d: Likewise. * testsuite/ld-i386/pr19609-1g.d: Likewise. * testsuite/ld-i386/pr19609-1h.d: Likewise. * testsuite/ld-i386/pr19609-1i.d: Likewise. * testsuite/ld-i386/pr19609-2.s: Likewise. * testsuite/ld-i386/pr19609-2a.d: Likewise. * testsuite/ld-i386/pr19609-2b.d: Likewise. * testsuite/ld-i386/pr19609-2c.d: Likewise. * testsuite/ld-i386/undefweak.s: Likewise. * testsuite/ld-i386/undefweaka.d: Likewise. * testsuite/ld-i386/undefweakb.d: Likewise. * testsuite/ld-x86-64/pr13082-3c.d: Likewise. * testsuite/ld-x86-64/pr13082-3d.d: Likewise. * testsuite/ld-x86-64/pr19609-1.s: Likewise. * testsuite/ld-x86-64/pr19609-1a.d: Likewise. * testsuite/ld-x86-64/pr19609-1b.d: Likewise. * testsuite/ld-x86-64/pr19609-1c.d: Likewise. * testsuite/ld-x86-64/pr19609-1d.d: Likewise. * testsuite/ld-x86-64/pr19609-1e.d: Likewise. * testsuite/ld-x86-64/pr19609-1f.d: Likewise. * testsuite/ld-x86-64/pr19609-1g.d: Likewise. * testsuite/ld-x86-64/pr19609-1h.d: Likewise. * testsuite/ld-x86-64/pr19609-1i.d: Likewise. * testsuite/ld-x86-64/pr19609-1j.d: Likewise. * testsuite/ld-x86-64/pr19609-1k.d: Likewise. * testsuite/ld-x86-64/pr19609-1l.d: Likewise. * testsuite/ld-x86-64/pr19609-1m.d: Likewise. * testsuite/ld-x86-64/pr19609-2.s: Likewise. * testsuite/ld-x86-64/pr19609-2a.d: Likewise. * testsuite/ld-x86-64/pr19609-2b.d: Likewise. * testsuite/ld-x86-64/pr19609-2c.d: Likewise. * testsuite/ld-x86-64/pr19609-2d.d: Likewise. * testsuite/ld-x86-64/pr19609-3.s: Likewise. * testsuite/ld-x86-64/pr19609-3a.d: Likewise. * testsuite/ld-x86-64/pr19609-3b.d: Likewise. * testsuite/ld-x86-64/pr19609-4.s: Likewise. * testsuite/ld-x86-64/pr19609-4a.d: Likewise. * testsuite/ld-x86-64/pr19609-4b.d: Likewise. * testsuite/ld-x86-64/pr19609-4c.d: Likewise. * testsuite/ld-x86-64/pr19609-4d.d: Likewise. * testsuite/ld-x86-64/pr19609-4e.d: Likewise. * testsuite/ld-x86-64/pr19609-5.s: Likewise. * testsuite/ld-x86-64/pr19609-5a.d: Likewise. * testsuite/ld-x86-64/pr19609-5b.d: Likewise. * testsuite/ld-x86-64/pr19609-5c.d: Likewise. * testsuite/ld-x86-64/pr19609-5d.d: Likewise. * testsuite/ld-x86-64/pr19609-5e.d: Likewise. * testsuite/ld-x86-64/pr19609-6.s: Likewise. * testsuite/ld-x86-64/pr19609-6a.d: Likewise. * testsuite/ld-x86-64/pr19609-6b.d: Likewise. * testsuite/ld-x86-64/pr19609-6c.d: Likewise. * testsuite/ld-x86-64/pr19609-6d.d: Likewise. * testsuite/ld-x86-64/pr19609-7.s: Likewise. * testsuite/ld-x86-64/pr19609-7a.d: Likewise. * testsuite/ld-x86-64/pr19609-7b.d: Likewise. * testsuite/ld-x86-64/pr19609-7c.d: Likewise. * testsuite/ld-x86-64/pr19609-7d.d: Likewise. * testsuite/ld-i386/i386.exp: Run undefweak tests and tests for PR ld/19609. * testsuite/ld-x86-64/x86-64.exp: Run pr13082-3c, pr13082-3d and tests for PR ld/19609.
Diffstat (limited to 'bfd/elf64-x86-64.c')
-rw-r--r--bfd/elf64-x86-64.c178
1 files changed, 138 insertions, 40 deletions
diff --git a/bfd/elf64-x86-64.c b/bfd/elf64-x86-64.c
index 6ca3b2e..c696850 100644
--- a/bfd/elf64-x86-64.c
+++ b/bfd/elf64-x86-64.c
@@ -3070,6 +3070,8 @@ elf_x86_64_convert_load (bfd *abfd, asection *sec,
bfd_boolean changed_relocs;
bfd_signed_vma *local_got_refcounts;
bfd_vma maxpagesize;
+ bfd_boolean is_pic;
+ bfd_boolean require_reloc_pc32;
/* Don't even try to convert non-ELF outputs. */
if (!is_elf_hash_table (link_info->hash))
@@ -3105,6 +3107,13 @@ elf_x86_64_convert_load (bfd *abfd, asection *sec,
goto error_return;
}
+ is_pic = bfd_link_pic (link_info);
+
+ /* TRUE if we can convert only to R_X86_64_PC32. Enable it for
+ --no-relax. */
+ require_reloc_pc32
+ = link_info->disable_target_specific_optimizations > 1;
+
irelend = internal_relocs + sec->reloc_count;
for (irel = internal_relocs; irel < irelend; irel++)
{
@@ -3118,10 +3127,12 @@ elf_x86_64_convert_load (bfd *abfd, asection *sec,
bfd_signed_vma raddend;
unsigned int opcode;
unsigned int modrm;
+ bfd_boolean relocx;
+ bfd_boolean to_reloc_pc32;
- if (r_type != R_X86_64_GOTPCREL
- && r_type != R_X86_64_GOTPCRELX
- && r_type != R_X86_64_REX_GOTPCRELX)
+ relocx = (r_type == R_X86_64_GOTPCRELX
+ || r_type == R_X86_64_REX_GOTPCRELX);
+ if (!relocx && r_type != R_X86_64_GOTPCREL)
continue;
roff = irel->r_offset;
@@ -3135,26 +3146,27 @@ elf_x86_64_convert_load (bfd *abfd, asection *sec,
opcode = bfd_get_8 (abfd, contents + roff - 2);
- /* It is OK to convert mov to lea. */
+ /* Convert mov to lea since it has been done for a while. */
if (opcode != 0x8b)
{
/* Only convert R_X86_64_GOTPCRELX and R_X86_64_REX_GOTPCRELX
- for mov call, jmp or one of adc, add, and, cmp, or, sbb,
- sub, test, xor instructions. */
- if (r_type != R_X86_64_GOTPCRELX
- && r_type != R_X86_64_REX_GOTPCRELX)
+ for call, jmp or one of adc, add, and, cmp, or, sbb, sub,
+ test, xor instructions. */
+ if (!relocx)
continue;
-
- /* It is OK to convert indirect branch to direct branch. */
- if (opcode != 0xff)
- {
- /* It is OK to convert adc, add, and, cmp, or, sbb, sub,
- test, xor only when PIC is false. */
- if (bfd_link_pic (link_info))
- continue;
- }
}
+ /* We convert only to R_X86_64_PC32:
+ 1. Branch.
+ 2. R_X86_64_GOTPCREL since we can't modify REX byte.
+ 3. require_reloc_pc32 is true.
+ 4. PIC.
+ */
+ to_reloc_pc32 = (opcode == 0xff
+ || !relocx
+ || require_reloc_pc32
+ || is_pic);
+
/* Get the symbol referred to by the reloc. */
if (r_symndx < symtab_hdr->sh_info)
{
@@ -3195,22 +3207,59 @@ elf_x86_64_convert_load (bfd *abfd, asection *sec,
/* STT_GNU_IFUNC must keep GOTPCREL relocations. We also
avoid optimizing GOTPCREL relocations againt _DYNAMIC
since ld.so may use its link-time address. */
- if ((h->def_regular
- || h->root.type == bfd_link_hash_defined
- || h->root.type == bfd_link_hash_defweak)
- && h->type != STT_GNU_IFUNC
- && h != htab->elf.hdynamic
- && SYMBOL_REFERENCES_LOCAL (link_info, h))
+ if (h->type == STT_GNU_IFUNC)
+ continue;
+
+ /* Undefined weak symbol is only bound locally in executable
+ and its reference is resolved as 0 without relocation
+ overflow. We can only perform this optimization for
+ GOTPCRELX relocations since we need to modify REX byte.
+ It is OK convert mov with R_X86_64_GOTPCREL to
+ R_X86_64_PC32. */
+ if ((relocx || opcode == 0x8b)
+ && UNDEFINED_WEAK_RESOLVED_TO_ZERO (link_info,
+ elf_x86_64_hash_entry (h)))
+ {
+ if (opcode == 0xff)
+ {
+ /* Skip for branch instructions since R_X86_64_PC32
+ may overflow. */
+ if (require_reloc_pc32)
+ continue;
+ }
+ else if (relocx)
+ {
+ /* For non-branch instructions, we can convert to
+ R_X86_64_32/R_X86_64_32S since we know if there
+ is a REX byte. */
+ to_reloc_pc32 = FALSE;
+ }
+
+ /* Since we don't know the current PC when PIC is true,
+ we can't convert to R_X86_64_PC32. */
+ if (to_reloc_pc32 && is_pic)
+ continue;
+
+ goto convert;
+ }
+ else if ((h->def_regular
+ || h->root.type == bfd_link_hash_defined
+ || h->root.type == bfd_link_hash_defweak)
+ && h != htab->elf.hdynamic
+ && SYMBOL_REFERENCES_LOCAL (link_info, h))
{
/* bfd_link_hash_new or bfd_link_hash_undefined is
- set by an assignment in a linker script in
- bfd_elf_record_link_assignment. FIXME: If we
- ever get a linker error due relocation overflow,
- we will skip this optimization. */
+ set by an assignment in a linker script in
+ bfd_elf_record_link_assignment. */
if (h->def_regular
&& (h->root.type == bfd_link_hash_new
|| h->root.type == bfd_link_hash_undefined))
- goto convert;
+ {
+ /* Skip since R_X86_64_32/R_X86_64_32S may overflow. */
+ if (require_reloc_pc32)
+ continue;
+ goto convert;
+ }
tsec = h->root.u.def.section;
toff = h->root.u.def.value;
symtype = h->type;
@@ -3219,6 +3268,10 @@ elf_x86_64_convert_load (bfd *abfd, asection *sec,
continue;
}
+ /* We can only estimate relocation overflow for R_X86_64_PC32. */
+ if (!to_reloc_pc32)
+ goto convert;
+
if (tsec->sec_info_type == SEC_INFO_TYPE_MERGE)
{
/* At this stage in linking, no SEC_MERGE symbol has been
@@ -3342,15 +3395,55 @@ convert:
}
else
{
+ unsigned int rex;
+ unsigned int rex_mask = REX_R;
+
+ if (r_type == R_X86_64_REX_GOTPCRELX)
+ rex = bfd_get_8 (abfd, contents + roff - 3);
+ else
+ rex = 0;
+
if (opcode == 0x8b)
{
- /* Convert "mov foo@GOTPCREL(%rip), %reg" to
- "lea foo(%rip), %reg". */
- opcode = 0x8d;
- r_type = R_X86_64_PC32;
+ if (to_reloc_pc32)
+ {
+ /* Convert "mov foo@GOTPCREL(%rip), %reg" to
+ "lea foo(%rip), %reg". */
+ opcode = 0x8d;
+ r_type = R_X86_64_PC32;
+ }
+ else
+ {
+ /* Convert "mov foo@GOTPCREL(%rip), %reg" to
+ "mov $foo, %reg". */
+ opcode = 0xc7;
+ modrm = bfd_get_8 (abfd, contents + roff - 1);
+ modrm = 0xc0 | (modrm & 0x38) >> 3;
+ if ((rex & REX_W) != 0
+ && ABI_64_P (link_info->output_bfd))
+ {
+ /* Keep the REX_W bit in REX byte for LP64. */
+ r_type = R_X86_64_32S;
+ goto rewrite_modrm_rex;
+ }
+ else
+ {
+ /* If the REX_W bit in REX byte isn't needed,
+ use R_X86_64_32 and clear the W bit to avoid
+ sign-extend imm32 to imm64. */
+ r_type = R_X86_64_32;
+ /* Clear the W bit in REX byte. */
+ rex_mask |= REX_W;
+ goto rewrite_modrm_rex;
+ }
+ }
}
else
{
+ /* R_X86_64_PC32 isn't supported. */
+ if (to_reloc_pc32)
+ continue;
+
modrm = bfd_get_8 (abfd, contents + roff - 1);
if (opcode == 0x85)
{
@@ -3366,18 +3459,23 @@ convert:
modrm = 0xc0 | (modrm & 0x38) >> 3 | (opcode & 0x3c);
opcode = 0x81;
}
+
+ /* Use R_X86_64_32 with 32-bit operand to avoid relocation
+ overflow when sign-extending imm32 to imm64. */
+ r_type = (rex & REX_W) != 0 ? R_X86_64_32S : R_X86_64_32;
+
+rewrite_modrm_rex:
bfd_put_8 (abfd, modrm, contents + roff - 1);
- if (r_type == R_X86_64_REX_GOTPCRELX)
+ if (rex)
{
/* Move the R bit to the B bit in REX byte. */
- unsigned int rex = bfd_get_8 (abfd, contents + roff - 3);
- rex = (rex & ~REX_R) | (rex & REX_R) >> 2;
+ rex = (rex & ~rex_mask) | (rex & REX_R) >> 2;
bfd_put_8 (abfd, rex, contents + roff - 3);
}
- /* No addend for R_X86_64_32S relocation. */
+
+ /* No addend for R_X86_64_32/R_X86_64_32S relocations. */
irel->r_addend = 0;
- r_type = R_X86_64_32S;
}
bfd_put_8 (abfd, opcode, contents + roff - 2);
@@ -4688,9 +4786,9 @@ direct:
|| eh->func_pointer_refcount > 0
|| (h->root.type == bfd_link_hash_undefweak
&& !resolved_to_zero))
- && ((h->def_dynamic
- && !h->def_regular)
- || h->root.type == bfd_link_hash_undefweak
+ && ((h->def_dynamic && !h->def_regular)
+ /* Undefined weak symbol is bound locally when
+ PIC is false. */
|| h->root.type == bfd_link_hash_undefined)))
{
Elf_Internal_Rela outrel;