From f378ab099d535f5540f292fed07fcf4b1fabd314 Mon Sep 17 00:00:00 2001 From: Alan Modra Date: Fri, 26 May 2017 10:02:29 +0930 Subject: PPC64_OPT_LOCALENTRY ELFv2 functions with localentry:0 are those with a single entry point, ie. global entry == local entry, and that have no requirement on r2 or r12, and guarantee r2 is unchanged on return. Such an external function can be called via the PLT without saving r2 or restoring it on return, avoiding a common load-hit-store for small functions. The optimization is attractive. The TOC pointer load-hit-store is a major reason why calls to small functions that need no register saves, or with shrink-wrap, no register saves on a fast path, are slow on powerpc64le. To be safe, this optimization needs ld.so support to check that the run-time matches link-time function implementation. If a function in a shared library with st_other localentry non-zero is called without saving and restoring r2, r2 will be trashed on return, leading to segfaults. For that reason the optimization does not happen for weak functions since a weak definition is a fairly solid hint that the function will likely be overridden. I'm also not enabling the optimization by default unless glibc-2.26 is detected, which should have the ld.so checks implemented. bfd/ * elf64-ppc.c (struct ppc_link_hash_table): Add has_plt_localentry0. (ppc64_elf_merge_symbol_attribute): Merge localentry bits from dynamic objects. (is_elfv2_localentry0): New function. (ppc64_elf_tls_setup): Default params->plt_localentry0. (plt_stub_size): Adjust size for tls_get_addr_opt stub. (build_tls_get_addr_stub): Use a simpler stub when r2 is not saved. (ppc64_elf_size_stubs): Leave stub_type as ppc_stub_plt_call for optimized localentry:0 stubs. (ppc64_elf_build_stubs): Save r2 in ELFv2 __glink_PLTresolve. (ppc64_elf_relocate_section): Leave nop unchanged for optimized localentry:0 stubs. (ppc64_elf_finish_dynamic_sections): Set PPC64_OPT_LOCALENTRY in DT_PPC64_OPT. * elf64-ppc.h (struct ppc64_elf_params): Add plt_localentry0. include/ * elf/ppc64.h (PPC64_OPT_LOCALENTRY): Define. ld/ * emultempl/ppc64elf.em (params): Init plt_localentry0 field. (enum ppc64_opt): New, replacing OPTION_* defines. Add OPTION_PLT_LOCALENTRY, and OPTION_NO_PLT_LOCALENTRY. (PARSE_AND_LIST_*): Support --plt-localentry and --no-plt-localentry. * testsuite/ld-powerpc/elfv2so.d: Update. * testsuite/ld-powerpc/powerpc.exp (TLS opt 5): Use --no-plt-localentry. * testsuite/ld-powerpc/tlsopt5.d: Update. --- ld/ChangeLog | 10 ++++++ ld/emultempl/ppc64elf.em | 65 ++++++++++++++++++++++++------------- ld/testsuite/ld-powerpc/elfv2so.d | 2 +- ld/testsuite/ld-powerpc/powerpc.exp | 2 +- ld/testsuite/ld-powerpc/tlsopt5.d | 4 +-- 5 files changed, 57 insertions(+), 26 deletions(-) (limited to 'ld') diff --git a/ld/ChangeLog b/ld/ChangeLog index da03e3a..e2d5496 100644 --- a/ld/ChangeLog +++ b/ld/ChangeLog @@ -1,3 +1,13 @@ +2017-06-01 Alan Modra + + * emultempl/ppc64elf.em (params): Init plt_localentry0 field. + (enum ppc64_opt): New, replacing OPTION_* defines. Add + OPTION_PLT_LOCALENTRY, and OPTION_NO_PLT_LOCALENTRY. + (PARSE_AND_LIST_*): Support --plt-localentry and --no-plt-localentry. + * testsuite/ld-powerpc/elfv2so.d: Update. + * testsuite/ld-powerpc/powerpc.exp (TLS opt 5): Use --no-plt-localentry. + * testsuite/ld-powerpc/tlsopt5.d: Update. + 2017-05-31 Alan Modra * emultempl/ppc64elf.em (plt-static-chain help): Fix quoting. diff --git a/ld/emultempl/ppc64elf.em b/ld/emultempl/ppc64elf.em index f1d5274..58cb798 100644 --- a/ld/emultempl/ppc64elf.em +++ b/ld/emultempl/ppc64elf.em @@ -38,7 +38,7 @@ static struct ppc64_elf_params params = { NULL, &ppc_layout_sections_again, 1, -1, 0, ${DEFAULT_PLT_STATIC_CHAIN-0}, -1, 0, - 0, -1, -1, 0}; + -1, 0, -1, -1, 0}; /* Fake input file for stubs. */ static lang_input_statement_type *stub_file; @@ -683,27 +683,32 @@ fi # parse_args and list_options functions. # PARSE_AND_LIST_PROLOGUE=${PARSE_AND_LIST_PROLOGUE}' -#define OPTION_STUBGROUP_SIZE 321 -#define OPTION_PLT_STATIC_CHAIN (OPTION_STUBGROUP_SIZE + 1) -#define OPTION_NO_PLT_STATIC_CHAIN (OPTION_PLT_STATIC_CHAIN + 1) -#define OPTION_PLT_THREAD_SAFE (OPTION_NO_PLT_STATIC_CHAIN + 1) -#define OPTION_NO_PLT_THREAD_SAFE (OPTION_PLT_THREAD_SAFE + 1) -#define OPTION_PLT_ALIGN (OPTION_NO_PLT_THREAD_SAFE + 1) -#define OPTION_NO_PLT_ALIGN (OPTION_PLT_ALIGN + 1) -#define OPTION_STUBSYMS (OPTION_NO_PLT_ALIGN + 1) -#define OPTION_NO_STUBSYMS (OPTION_STUBSYMS + 1) -#define OPTION_SAVRES (OPTION_NO_STUBSYMS + 1) -#define OPTION_NO_SAVRES (OPTION_SAVRES + 1) -#define OPTION_DOTSYMS (OPTION_NO_SAVRES + 1) -#define OPTION_NO_DOTSYMS (OPTION_DOTSYMS + 1) -#define OPTION_NO_TLS_OPT (OPTION_NO_DOTSYMS + 1) -#define OPTION_TLS_GET_ADDR_OPT (OPTION_NO_TLS_OPT + 1) -#define OPTION_NO_TLS_GET_ADDR_OPT (OPTION_TLS_GET_ADDR_OPT + 1) -#define OPTION_NO_OPD_OPT (OPTION_NO_TLS_GET_ADDR_OPT + 1) -#define OPTION_NO_TOC_OPT (OPTION_NO_OPD_OPT + 1) -#define OPTION_NO_MULTI_TOC (OPTION_NO_TOC_OPT + 1) -#define OPTION_NO_TOC_SORT (OPTION_NO_MULTI_TOC + 1) -#define OPTION_NON_OVERLAPPING_OPD (OPTION_NO_TOC_SORT + 1) +enum ppc64_opt +{ + OPTION_STUBGROUP_SIZE = 321, + OPTION_PLT_STATIC_CHAIN, + OPTION_NO_PLT_STATIC_CHAIN, + OPTION_PLT_THREAD_SAFE, + OPTION_NO_PLT_THREAD_SAFE, + OPTION_PLT_ALIGN, + OPTION_NO_PLT_ALIGN, + OPTION_PLT_LOCALENTRY, + OPTION_NO_PLT_LOCALENTRY, + OPTION_STUBSYMS, + OPTION_NO_STUBSYMS, + OPTION_SAVRES, + OPTION_NO_SAVRES, + OPTION_DOTSYMS, + OPTION_NO_DOTSYMS, + OPTION_NO_TLS_OPT, + OPTION_TLS_GET_ADDR_OPT, + OPTION_NO_TLS_GET_ADDR_OPT, + OPTION_NO_OPD_OPT, + OPTION_NO_TOC_OPT, + OPTION_NO_MULTI_TOC, + OPTION_NO_TOC_SORT, + OPTION_NON_OVERLAPPING_OPD +}; ' PARSE_AND_LIST_LONGOPTS=${PARSE_AND_LIST_LONGOPTS}' @@ -714,6 +719,8 @@ PARSE_AND_LIST_LONGOPTS=${PARSE_AND_LIST_LONGOPTS}' { "no-plt-thread-safe", no_argument, NULL, OPTION_NO_PLT_THREAD_SAFE }, { "plt-align", optional_argument, NULL, OPTION_PLT_ALIGN }, { "no-plt-align", no_argument, NULL, OPTION_NO_PLT_ALIGN }, + { "plt-localentry", optional_argument, NULL, OPTION_PLT_LOCALENTRY }, + { "no-plt-localentry", no_argument, NULL, OPTION_NO_PLT_LOCALENTRY }, { "emit-stub-syms", no_argument, NULL, OPTION_STUBSYMS }, { "no-emit-stub-syms", no_argument, NULL, OPTION_NO_STUBSYMS }, { "dotsyms", no_argument, NULL, OPTION_DOTSYMS }, @@ -760,6 +767,12 @@ PARSE_AND_LIST_OPTIONS=${PARSE_AND_LIST_OPTIONS}' --no-plt-align Dont'\''t align individual PLT call stubs.\n" )); fprintf (file, _("\ + --plt-localentry Optimize calls to ELFv2 localentry:0 functions.\n" + )); + fprintf (file, _("\ + --no-plt-localentry Don'\''t optimize ELFv2 calls.\n" + )); + fprintf (file, _("\ --emit-stub-syms Label linker stubs with a symbol.\n" )); fprintf (file, _("\ @@ -852,6 +865,14 @@ PARSE_AND_LIST_ARGS_CASES=${PARSE_AND_LIST_ARGS_CASES}' params.plt_stub_align = 0; break; + case OPTION_PLT_LOCALENTRY: + params.plt_localentry0 = 1; + break; + + case OPTION_NO_PLT_LOCALENTRY: + params.plt_localentry0 = 0; + break; + case OPTION_STUBSYMS: params.emit_stub_syms = 1; break; diff --git a/ld/testsuite/ld-powerpc/elfv2so.d b/ld/testsuite/ld-powerpc/elfv2so.d index d6f0b7d..f3962ac 100644 --- a/ld/testsuite/ld-powerpc/elfv2so.d +++ b/ld/testsuite/ld-powerpc/elfv2so.d @@ -57,6 +57,7 @@ Disassembly of section \.text: .*: (7c 08 02 a6|a6 02 08 7c) mflr r0 .*: (42 9f 00 05|05 00 9f 42) bcl .* .*: (7d 68 02 a6|a6 02 68 7d) mflr r11 +.*: (18 00 41 f8|f8 41 00 18) std r2,24\(r1\) .*: (e8 4b ff f0|f0 ff 4b e8) ld r2,-16\(r11\) .*: (7c 08 03 a6|a6 03 08 7c) mtlr r0 .*: (7d 8b 60 50|50 60 8b 7d) subf r12,r11,r12 @@ -67,7 +68,6 @@ Disassembly of section \.text: .*: (7d 89 03 a6|a6 03 89 7d) mtctr r12 .*: (e9 6b 00 08|08 00 6b e9) ld r11,8\(r11\) .*: (4e 80 04 20|20 04 80 4e) bctr -.*: (60 00 00 00|00 00 00 60) nop .* : .*: (4b ff ff c8|c8 ff ff 4b) b .* <__glink_PLTresolve> diff --git a/ld/testsuite/ld-powerpc/powerpc.exp b/ld/testsuite/ld-powerpc/powerpc.exp index 5350235..f12e413 100644 --- a/ld/testsuite/ld-powerpc/powerpc.exp +++ b/ld/testsuite/ld-powerpc/powerpc.exp @@ -210,7 +210,7 @@ set ppc64elftests { "tlsopt4"} {"TLS DLL" "-shared -melf64ppc --version-script tlsdll.ver" "" "-a64" {tlsdll.s} {} "tlsdll.so"} - {"TLS opt 5" "-melf64ppc --gc-sections tmpdir/tlsdll.so" "" "-a64" {tlsopt5.s} + {"TLS opt 5" "-melf64ppc --gc-sections --no-plt-localentry tmpdir/tlsdll.so" "" "-a64" {tlsopt5.s} {{objdump -dr tlsopt5.d}} "tlsopt5"} {"sym@tocbase" "-shared -melf64ppc" "" "-a64" {symtocbase-1.s symtocbase-2.s} diff --git a/ld/testsuite/ld-powerpc/tlsopt5.d b/ld/testsuite/ld-powerpc/tlsopt5.d index 7b17130..b356a2e 100644 --- a/ld/testsuite/ld-powerpc/tlsopt5.d +++ b/ld/testsuite/ld-powerpc/tlsopt5.d @@ -1,6 +1,6 @@ #source: tlsopt5.s #as: -a64 -#ld: --gc-sections tlsdll.so +#ld: --gc-sections --no-plt-localentry tlsdll.so #objdump: -dr #target: powerpc64*-*-* @@ -38,6 +38,7 @@ Disassembly of section \.text: .*: (a6 02 08 7c|7c 08 02 a6) mflr r0 .*: (05 00 9f 42|42 9f 00 05) bcl .* .*: (a6 02 68 7d|7d 68 02 a6) mflr r11 +.*: (18 00 41 f8|f8 41 00 18) std r2,24\(r1\) .*: (f0 ff 4b e8|e8 4b ff f0) ld r2,-16\(r11\) .*: (a6 03 08 7c|7c 08 03 a6) mtlr r0 .*: (50 60 8b 7d|7d 8b 60 50) subf r12,r11,r12 @@ -48,7 +49,6 @@ Disassembly of section \.text: .*: (a6 03 89 7d|7d 89 03 a6) mtctr r12 .*: (08 00 6b e9|e9 6b 00 08) ld r11,8\(r11\) .*: (20 04 80 4e|4e 80 04 20) bctr -.*: (00 00 00 60|60 00 00 00) nop 0000000010000390 <__tls_get_addr_opt@plt>: .*: (c8 ff ff 4b|4b ff ff c8) b .* -- cgit v1.1