diff options
author | Alan Modra <amodra@gmail.com> | 2018-04-09 09:32:39 +0930 |
---|---|---|
committer | Alan Modra <amodra@gmail.com> | 2018-04-09 17:40:54 +0930 |
commit | 3e04d7655bf63f4e5a0d0354c21aa3fa2ece3681 (patch) | |
tree | c79cf2877a3db9fcf0c1aa2cd21bd963da6b7c96 /bfd | |
parent | 23cedd1dc90d05c4b80d4a4b000ed5f37b9c3268 (diff) | |
download | gdb-3e04d7655bf63f4e5a0d0354c21aa3fa2ece3681.zip gdb-3e04d7655bf63f4e5a0d0354c21aa3fa2ece3681.tar.gz gdb-3e04d7655bf63f4e5a0d0354c21aa3fa2ece3681.tar.bz2 |
Inline PLT call optimization
This patch adds the analysis part of PLT call optimization, enabling
the code added with the previous patch that actually performs the
optimization.
Gold support is not available yet.
bfd/
* elf64-ppc.c (struct _ppc64_elf_section_data): Add has_pltcall field.
(struct ppc_link_hash_table): Add can_convert_all_inline_plt.
(ppc64_elf_check_relocs): Set has_pltcall.
(ppc64_elf_adjust_dynamic_symbol): Discard some PLT entries.
(ppc64_elf_inline_plt): New function.
(ppc64_elf_size_dynamic_sections): Discard some PLT entries for locals.
* elf64-ppc.h (ppc64_elf_inline_plt): Declare.
* elf32-ppc.c (has_pltcall): Define.
(struct ppc_elf_link_hash_table): Add can_convert_all_inline_plt.
(ppc_elf_check_relocs): Set has_pltcall.
(ppc_elf_inline_plt): New function.
(ppc_elf_adjust_dynamic_symbol): Discard some PLT entries.
(ppc_elf_size_dynamic_sections): Likewise.
* elf32-ppc.h (ppc_elf_inline_plt): Declare.
ld/
* emultempl/ppc64elf.em (no_inline_plt): New var.
(ppc_before_allocation): Call ppc64_elf_inline_plt.
(enum ppc64_opt): Add OPTION_NO_INLINE_OPT.
(PARSE_AND_LIST_LONGOPTS, PARSE_AND_LIST_OPTIONS,
PARSE_AND_LIST_ARGS_CASES): Handle --no-inline-optimize.
* emultemps/ppc32elf.em (no_inline_opt): New var.
(prelim_size_sections): New function, extracted from..
(ppc_before_allocation): ..here. Call ppc_elf_inline_plt.
(enum ppc32_opt): Add OPTION_NO_INLINE_OPT.
(PARSE_AND_LIST_LONGOPTS, PARSE_AND_LIST_OPTIONS,
PARSE_AND_LIST_ARGS_CASES): Handle --no-inline-optimize.
Diffstat (limited to 'bfd')
-rw-r--r-- | bfd/ChangeLog | 17 | ||||
-rw-r--r-- | bfd/elf32-ppc.c | 156 | ||||
-rw-r--r-- | bfd/elf32-ppc.h | 1 | ||||
-rw-r--r-- | bfd/elf64-ppc.c | 168 | ||||
-rw-r--r-- | bfd/elf64-ppc.h | 2 |
5 files changed, 336 insertions, 8 deletions
diff --git a/bfd/ChangeLog b/bfd/ChangeLog index 445eff0..8df416f 100644 --- a/bfd/ChangeLog +++ b/bfd/ChangeLog @@ -1,5 +1,22 @@ 2018-04-09 Alan Modra <amodra@gmail.com> + * elf64-ppc.c (struct _ppc64_elf_section_data): Add has_pltcall field. + (struct ppc_link_hash_table): Add can_convert_all_inline_plt. + (ppc64_elf_check_relocs): Set has_pltcall. + (ppc64_elf_adjust_dynamic_symbol): Discard some PLT entries. + (ppc64_elf_inline_plt): New function. + (ppc64_elf_size_dynamic_sections): Discard some PLT entries for locals. + * elf64-ppc.h (ppc64_elf_inline_plt): Declare. + * elf32-ppc.c (has_pltcall): Define. + (struct ppc_elf_link_hash_table): Add can_convert_all_inline_plt. + (ppc_elf_check_relocs): Set has_pltcall. + (ppc_elf_inline_plt): New function. + (ppc_elf_adjust_dynamic_symbol): Discard some PLT entries. + (ppc_elf_size_dynamic_sections): Likewise. + * elf32-ppc.h (ppc_elf_inline_plt): Declare. + +2018-04-09 Alan Modra <amodra@gmail.com> + * elf32-ppc.c (ppc_elf_howto_raw): Add PLTSEQ and PLTCALL howtos. (is_plt_seq_reloc): New function. (ppc_elf_check_relocs): Handle PLTSEQ and PLTCALL relocs. diff --git a/bfd/elf32-ppc.c b/bfd/elf32-ppc.c index 36a01ee..7f51fac 100644 --- a/bfd/elf32-ppc.c +++ b/bfd/elf32-ppc.c @@ -3347,6 +3347,9 @@ struct ppc_elf_link_hash_table /* Set if tls optimization is enabled. */ unsigned int do_tls_opt:1; + /* Set if inline plt calls should be converted to direct calls. */ + unsigned int can_convert_all_inline_plt:1; + /* The size of PLT entries. */ int plt_entry_size; /* The distance between adjacent PLT slots. */ @@ -3367,6 +3370,9 @@ struct ppc_elf_link_hash_table /* Nonzero if this section has a call to __tls_get_addr. */ #define has_tls_get_addr_call sec_flg1 + /* Flag set when PLTCALL relocs are detected. */ +#define has_pltcall sec_flg2 + /* Get the PPC ELF linker hash table from a link_info structure. */ #define ppc_elf_hash_table(p) \ @@ -4351,14 +4357,18 @@ ppc_elf_check_relocs (bfd *abfd, if (h == NULL) break; ppc_elf_tdata (abfd)->makes_plt_call = 1; - /* Fall through */ + goto pltentry; case R_PPC_PLTCALL: + sec->has_pltcall = 1; + /* Fall through. */ + case R_PPC_PLT32: case R_PPC_PLTREL32: case R_PPC_PLT16_LO: case R_PPC_PLT16_HI: case R_PPC_PLT16_HA: + pltentry: #ifdef DEBUG fprintf (stderr, "Reloc requires a PLT entry\n"); #endif @@ -5236,6 +5246,141 @@ get_sym_h (struct elf_link_hash_entry **hp, return TRUE; } +/* Analyze inline PLT call relocations to see whether calls to locally + defined functions can be converted to direct calls. */ + +bfd_boolean +ppc_elf_inline_plt (struct bfd_link_info *info) +{ + struct ppc_elf_link_hash_table *htab; + bfd *ibfd; + asection *sec; + bfd_vma low_vma, high_vma, limit; + + htab = ppc_elf_hash_table (info); + if (htab == NULL) + return FALSE; + + /* A bl insn can reach -0x2000000 to 0x1fffffc. The limit is + reduced somewhat to cater for possible stubs that might be added + between the call and its destination. */ + limit = 0x1e00000; + low_vma = -1; + high_vma = 0; + for (sec = info->output_bfd->sections; sec != NULL; sec = sec->next) + if ((sec->flags & (SEC_ALLOC | SEC_CODE)) == (SEC_ALLOC | SEC_CODE)) + { + if (low_vma > sec->vma) + low_vma = sec->vma; + if (high_vma < sec->vma + sec->size) + high_vma = sec->vma + sec->size; + } + + /* If a "bl" can reach anywhere in local code sections, then we can + convert all inline PLT sequences to direct calls when the symbol + is local. */ + if (high_vma - low_vma < limit) + { + htab->can_convert_all_inline_plt = 1; + return TRUE; + } + + /* Otherwise, go looking through relocs for cases where a direct + call won't reach. Mark the symbol on any such reloc to disable + the optimization and keep the PLT entry as it seems likely that + this will be better than creating trampolines. Note that this + will disable the optimization for all inline PLT calls to a + particular symbol, not just those that won't reach. The + difficulty in doing a more precise optimization is that the + linker needs to make a decision depending on whether a + particular R_PPC_PLTCALL insn can be turned into a direct + call, for each of the R_PPC_PLTSEQ and R_PPC_PLT16* insns in + the sequence, and there is nothing that ties those relocs + together except their symbol. */ + + for (ibfd = info->input_bfds; ibfd != NULL; ibfd = ibfd->link.next) + { + Elf_Internal_Shdr *symtab_hdr; + Elf_Internal_Sym *local_syms; + + if (!is_ppc_elf (ibfd)) + continue; + + local_syms = NULL; + symtab_hdr = &elf_symtab_hdr (ibfd); + + for (sec = ibfd->sections; sec != NULL; sec = sec->next) + if (sec->has_pltcall + && !bfd_is_abs_section (sec->output_section)) + { + Elf_Internal_Rela *relstart, *rel, *relend; + + /* Read the relocations. */ + relstart = _bfd_elf_link_read_relocs (ibfd, sec, NULL, NULL, + info->keep_memory); + if (relstart == NULL) + return FALSE; + + relend = relstart + sec->reloc_count; + for (rel = relstart; rel < relend; ) + { + enum elf_ppc_reloc_type r_type; + unsigned long r_symndx; + asection *sym_sec; + struct elf_link_hash_entry *h; + Elf_Internal_Sym *sym; + unsigned char *tls_maskp; + + r_type = ELF32_R_TYPE (rel->r_info); + if (r_type != R_PPC_PLTCALL) + continue; + + r_symndx = ELF32_R_SYM (rel->r_info); + if (!get_sym_h (&h, &sym, &sym_sec, &tls_maskp, &local_syms, + r_symndx, ibfd)) + { + if (elf_section_data (sec)->relocs != relstart) + free (relstart); + if (local_syms != NULL + && symtab_hdr->contents != (unsigned char *) local_syms) + free (local_syms); + return FALSE; + } + + if (sym_sec != NULL && sym_sec->output_section != NULL) + { + bfd_vma from, to; + if (h != NULL) + to = h->root.u.def.value; + else + to = sym->st_value; + to += (rel->r_addend + + sym_sec->output_offset + + sym_sec->output_section->vma); + from = (rel->r_offset + + sec->output_offset + + sec->output_section->vma); + if (to - from + limit < 2 * limit) + *tls_maskp &= ~PLT_KEEP; + } + } + if (elf_section_data (sec)->relocs != relstart) + free (relstart); + } + + if (local_syms != NULL + && symtab_hdr->contents != (unsigned char *) local_syms) + { + if (!info->keep_memory) + free (local_syms); + else + symtab_hdr->contents = (unsigned char *) local_syms; + } + } + + return TRUE; +} + /* Set plt output section type, htab->tls_get_addr, and call the generic ELF tls_setup function. */ @@ -5716,8 +5861,9 @@ ppc_elf_adjust_dynamic_symbol (struct bfd_link_info *info, if (ent == NULL || (h->type != STT_GNU_IFUNC && local - && ((ppc_elf_hash_entry (h)->tls_mask & (TLS_TLS | PLT_KEEP)) - != PLT_KEEP))) + && (htab->can_convert_all_inline_plt + || (ppc_elf_hash_entry (h)->tls_mask + & (TLS_TLS | PLT_KEEP)) != PLT_KEEP))) { /* A PLT entry is not required/allowed when: @@ -6216,6 +6362,7 @@ allocate_dynrelocs (struct elf_link_hash_entry *h, void *inf) || (h->needs_plt && h->def_regular && !htab->elf.dynamic_sections_created + && !htab->can_convert_all_inline_plt && (ppc_elf_hash_entry (h)->tls_mask & (TLS_TLS | PLT_KEEP)) == PLT_KEEP)) { @@ -6570,7 +6717,8 @@ ppc_elf_size_dynamic_sections (bfd *output_bfd, { if ((*lgot_masks & (TLS_TLS | PLT_IFUNC)) == PLT_IFUNC) s = htab->elf.iplt; - else if ((*lgot_masks & (TLS_TLS | PLT_KEEP)) != PLT_KEEP) + else if (htab->can_convert_all_inline_plt + || (*lgot_masks & (TLS_TLS | PLT_KEEP)) != PLT_KEEP) { ent->plt.offset = (bfd_vma) -1; continue; diff --git a/bfd/elf32-ppc.h b/bfd/elf32-ppc.h index 265859b..fa860f1 100644 --- a/bfd/elf32-ppc.h +++ b/bfd/elf32-ppc.h @@ -60,6 +60,7 @@ struct ppc_elf_params void ppc_elf_link_params (struct bfd_link_info *, struct ppc_elf_params *); int ppc_elf_select_plt_layout (bfd *, struct bfd_link_info *); +bfd_boolean ppc_elf_inline_plt (struct bfd_link_info *); asection *ppc_elf_tls_setup (bfd *, struct bfd_link_info *); bfd_boolean ppc_elf_tls_optimize (bfd *, struct bfd_link_info *); void ppc_elf_maybe_strip_sdata_syms (struct bfd_link_info *); diff --git a/bfd/elf64-ppc.c b/bfd/elf64-ppc.c index 9472f7b..59c2922 100644 --- a/bfd/elf64-ppc.c +++ b/bfd/elf64-ppc.c @@ -3123,6 +3123,9 @@ struct _ppc64_elf_section_data /* Flag set when small branches are detected. Used to select suitable defaults for the stub group size. */ unsigned int has_14bit_branch:1; + + /* Flag set when PLTCALL relocs are detected. */ + unsigned int has_pltcall:1; }; #define ppc64_elf_section_data(sec) \ @@ -4191,6 +4194,9 @@ struct ppc_link_hash_table /* Set if tls optimization is enabled. */ unsigned int do_tls_opt:1; + /* Set if inline plt calls should be converted to direct calls. */ + unsigned int can_convert_all_inline_plt:1; + /* Set on error. */ unsigned int stub_error:1; @@ -5818,10 +5824,14 @@ ppc64_elf_check_relocs (bfd *abfd, struct bfd_link_info *info, if (dest != sec) ppc64_elf_section_data (sec)->has_14bit_branch = 1; } + goto rel24; + + case R_PPC64_PLTCALL: + ppc64_elf_section_data (sec)->has_pltcall = 1; /* Fall through. */ case R_PPC64_REL24: - case R_PPC64_PLTCALL: + rel24: plt_list = ifunc; if (h != NULL) { @@ -7261,8 +7271,9 @@ ppc64_elf_adjust_dynamic_symbol (struct bfd_link_info *info, if (ent == NULL || (h->type != STT_GNU_IFUNC && local - && (((struct ppc_link_hash_entry *) h)->tls_mask - & (TLS_TLS | PLT_KEEP)) != PLT_KEEP)) + && (htab->can_convert_all_inline_plt + || (((struct ppc_link_hash_entry *) h)->tls_mask + & (TLS_TLS | PLT_KEEP)) != PLT_KEEP))) { h->plt.plist = NULL; h->needs_plt = 0; @@ -8276,6 +8287,153 @@ ppc64_elf_edit_opd (struct bfd_link_info *info) return TRUE; } +/* Analyze inline PLT call relocations to see whether calls to locally + defined functions can be converted to direct calls. */ + +bfd_boolean +ppc64_elf_inline_plt (struct bfd_link_info *info) +{ + struct ppc_link_hash_table *htab; + bfd *ibfd; + asection *sec; + bfd_vma low_vma, high_vma, limit; + + htab = ppc_hash_table (info); + if (htab == NULL) + return FALSE; + + /* A bl insn can reach -0x2000000 to 0x1fffffc. The limit is + reduced somewhat to cater for possible stubs that might be added + between the call and its destination. */ + if (htab->params->group_size < 0) + { + limit = -htab->params->group_size; + if (limit == 1) + limit = 0x1e00000; + } + else + { + limit = htab->params->group_size; + if (limit == 1) + limit = 0x1c00000; + } + + low_vma = -1; + high_vma = 0; + for (sec = info->output_bfd->sections; sec != NULL; sec = sec->next) + if ((sec->flags & (SEC_ALLOC | SEC_CODE)) == (SEC_ALLOC | SEC_CODE)) + { + if (low_vma > sec->vma) + low_vma = sec->vma; + if (high_vma < sec->vma + sec->size) + high_vma = sec->vma + sec->size; + } + + /* If a "bl" can reach anywhere in local code sections, then we can + convert all inline PLT sequences to direct calls when the symbol + is local. */ + if (high_vma - low_vma < limit) + { + htab->can_convert_all_inline_plt = 1; + return TRUE; + } + + /* Otherwise, go looking through relocs for cases where a direct + call won't reach. Mark the symbol on any such reloc to disable + the optimization and keep the PLT entry as it seems likely that + this will be better than creating trampolines. Note that this + will disable the optimization for all inline PLT calls to a + particular symbol, not just those that won't reach. The + difficulty in doing a more precise optimization is that the + linker needs to make a decision depending on whether a + particular R_PPC64_PLTCALL insn can be turned into a direct + call, for each of the R_PPC64_PLTSEQ and R_PPC64_PLT16* insns in + the sequence, and there is nothing that ties those relocs + together except their symbol. */ + + for (ibfd = info->input_bfds; ibfd != NULL; ibfd = ibfd->link.next) + { + Elf_Internal_Shdr *symtab_hdr; + Elf_Internal_Sym *local_syms; + + if (!is_ppc64_elf (ibfd)) + continue; + + local_syms = NULL; + symtab_hdr = &elf_symtab_hdr (ibfd); + + for (sec = ibfd->sections; sec != NULL; sec = sec->next) + if (ppc64_elf_section_data (sec)->has_pltcall + && !bfd_is_abs_section (sec->output_section)) + { + Elf_Internal_Rela *relstart, *rel, *relend; + + /* Read the relocations. */ + relstart = _bfd_elf_link_read_relocs (ibfd, sec, NULL, NULL, + info->keep_memory); + if (relstart == NULL) + return FALSE; + + relend = relstart + sec->reloc_count; + for (rel = relstart; rel < relend; ) + { + enum elf_ppc64_reloc_type r_type; + unsigned long r_symndx; + asection *sym_sec; + struct elf_link_hash_entry *h; + Elf_Internal_Sym *sym; + unsigned char *tls_maskp; + + r_type = ELF64_R_TYPE (rel->r_info); + if (r_type != R_PPC64_PLTCALL) + continue; + + r_symndx = ELF64_R_SYM (rel->r_info); + if (!get_sym_h (&h, &sym, &sym_sec, &tls_maskp, &local_syms, + r_symndx, ibfd)) + { + if (elf_section_data (sec)->relocs != relstart) + free (relstart); + if (local_syms != NULL + && symtab_hdr->contents != (unsigned char *) local_syms) + free (local_syms); + return FALSE; + } + + if (sym_sec != NULL && sym_sec->output_section != NULL) + { + bfd_vma from, to; + if (h != NULL) + to = h->root.u.def.value; + else + to = sym->st_value; + to += (rel->r_addend + + sym_sec->output_offset + + sym_sec->output_section->vma); + from = (rel->r_offset + + sec->output_offset + + sec->output_section->vma); + if (to - from + limit < 2 * limit) + *tls_maskp &= ~PLT_KEEP; + } + } + if (elf_section_data (sec)->relocs != relstart) + free (relstart); + } + + if (local_syms != NULL + && symtab_hdr->contents != (unsigned char *) local_syms) + { + if (!info->keep_memory) + free (local_syms); + else + symtab_hdr->contents = (unsigned char *) local_syms; + } + } + + return TRUE; +} + /* Set htab->tls_get_addr and call the generic ELF tls_setup function. */ asection * @@ -9913,6 +10071,7 @@ allocate_dynrelocs (struct elf_link_hash_entry *h, void *inf) || (h->needs_plt && h->def_regular && !htab->elf.dynamic_sections_created + && !htab->can_convert_all_inline_plt && (((struct ppc_link_hash_entry *) h)->tls_mask & (TLS_TLS | PLT_KEEP)) == PLT_KEEP)) { @@ -10241,7 +10400,8 @@ ppc64_elf_size_dynamic_sections (bfd *output_bfd, s->size += PLT_ENTRY_SIZE (htab); htab->elf.irelplt->size += sizeof (Elf64_External_Rela); } - else if ((*lgot_masks & (TLS_TLS | PLT_KEEP)) != PLT_KEEP) + else if (htab->can_convert_all_inline_plt + || (*lgot_masks & (TLS_TLS | PLT_KEEP)) != PLT_KEEP) ent->plt.offset = (bfd_vma) -1; else { diff --git a/bfd/elf64-ppc.h b/bfd/elf64-ppc.h index 8fa0140..9d5a85d 100644 --- a/bfd/elf64-ppc.h +++ b/bfd/elf64-ppc.h @@ -69,6 +69,8 @@ bfd_boolean ppc64_elf_init_stub_bfd (struct bfd_link_info *, struct ppc64_elf_params *); bfd_boolean ppc64_elf_edit_opd (struct bfd_link_info *); +bfd_boolean ppc64_elf_inline_plt + (struct bfd_link_info *); asection *ppc64_elf_tls_setup (struct bfd_link_info *); bfd_boolean ppc64_elf_tls_optimize |