diff options
author | Alan Modra <amodra@gmail.com> | 2020-07-17 16:47:28 +0930 |
---|---|---|
committer | Alan Modra <amodra@gmail.com> | 2020-07-19 12:27:47 +0930 |
commit | e10a07b32dc1faed25b5bbcbbc47d68e2ff537b9 (patch) | |
tree | 1eeb63b4cdb332eb53f3e8f4ae91773bda69a008 /ld | |
parent | 6095ca52575d8e49003a3685453bc5bd0d929f1e (diff) | |
download | gdb-e10a07b32dc1faed25b5bbcbbc47d68e2ff537b9.zip gdb-e10a07b32dc1faed25b5bbcbbc47d68e2ff537b9.tar.gz gdb-e10a07b32dc1faed25b5bbcbbc47d68e2ff537b9.tar.bz2 |
Power10 stub selection
This patch better supports mixing of power10 and non-power10 code,
as might be seen in a cpu-optimized library using ifuncs to select
functions optimized for a given cpu. Using -Wl,--no-power10-stubs
isn't that good in this situation since non-power10 notoc stubs are
slower and larger than the power10 variants, which you'd like to use
on power10 code paths.
With this change, power10 pc-relative code that makes calls marked
@notoc uses power10 stubs if stubs are necessary, and other calls use
non-power10 instructions in stubs. This will mean that if gcc is
generating code for -mcpu=power10 but with pc-rel disabled then you'll
get the older stubs even on power10 (unless you force with
-Wl,--power10-stubs). That shouldn't be too big a problem: stubs that
use r2 are reasonable. It's just the ones that set up addressing
using "mflr 12; bcl 20,31,.+4; mflr 11; mtlr 12" that should be
avoided if possible.
bfd/
* elf64-ppc.c (struct ppc_link_hash_table): Add has_power10_relocs.
(select_alt_stub): New function.
(ppc_get_stub_entry): Use it here.
(ppc64_elf_check_relocs): Set had_power10_relocs rather than
power10_stubs.
(ppc64_elf_size_stubs): Clear power10_stubs here instead. Don't
merge notoc stubs with other varieties when power10_stubs is "auto".
Instead dup the stub hash table entry.
(plt_stub_size, ppc_build_one_stub, ppc_size_one_stub): Adjust
tests of power10_stubs.
ld/
* emultempl/ppc64elf.em (power10-stubs): Accept optional "auto" arg.
* ld.texi (power10-stubs): Update.
* testsuite/ld-powerpc/callstub-1.d: Force --power10-stubs.
* testsuite/ld-powerpc/callstub-2.d: Relax branch offset comparison.
* testsuite/ld-powerpc/callstub-4.d: New test.
* testsuite/ld-powerpc/notoc.d: Force --no-power10-stubs.
* testsuite/ld-powerpc/notoc3.d,
* testsuite/ld-powerpc/notoc3.s,
* testsuite/ld-powerpc/notoc3.wf: New test.
* testsuite/ld-powerpc/powerpc.exp: Run new tests. Pass
--no-power10-stubs for notoc link.
Diffstat (limited to 'ld')
-rw-r--r-- | ld/ChangeLog | 14 | ||||
-rw-r--r-- | ld/emultempl/ppc64elf.em | 19 | ||||
-rw-r--r-- | ld/ld.texi | 12 | ||||
-rw-r--r-- | ld/testsuite/ld-powerpc/callstub-1.d | 12 | ||||
-rw-r--r-- | ld/testsuite/ld-powerpc/callstub-2.d | 8 | ||||
-rw-r--r-- | ld/testsuite/ld-powerpc/callstub-4.d | 32 | ||||
-rw-r--r-- | ld/testsuite/ld-powerpc/notoc.d | 2 | ||||
-rw-r--r-- | ld/testsuite/ld-powerpc/notoc3.d | 97 | ||||
-rw-r--r-- | ld/testsuite/ld-powerpc/notoc3.s | 59 | ||||
-rw-r--r-- | ld/testsuite/ld-powerpc/notoc3.wf | 17 | ||||
-rw-r--r-- | ld/testsuite/ld-powerpc/powerpc.exp | 7 |
11 files changed, 257 insertions, 22 deletions
diff --git a/ld/ChangeLog b/ld/ChangeLog index 32b4616..51a4e9c 100644 --- a/ld/ChangeLog +++ b/ld/ChangeLog @@ -1,3 +1,17 @@ +2020-07-19 Alan Modra <amodra@gmail.com> + + * emultempl/ppc64elf.em (power10-stubs): Accept optional "auto" arg. + * ld.texi (power10-stubs): Update. + * testsuite/ld-powerpc/callstub-1.d: Force --power10-stubs. + * testsuite/ld-powerpc/callstub-2.d: Relax branch offset comparison. + * testsuite/ld-powerpc/callstub-4.d: New test. + * testsuite/ld-powerpc/notoc.d: Force --no-power10-stubs. + * testsuite/ld-powerpc/notoc3.d, + * testsuite/ld-powerpc/notoc3.s, + * testsuite/ld-powerpc/notoc3.wf: New test. + * testsuite/ld-powerpc/powerpc.exp: Run new tests. Pass + --no-power10-stubs for notoc link. + 2020-07-17 Hans-Peter Nilsson <hp@bitrange.com> * scripttempt/mmo.sc: Move .init first in .text output section. diff --git a/ld/emultempl/ppc64elf.em b/ld/emultempl/ppc64elf.em index 4987243..1331d03 100644 --- a/ld/emultempl/ppc64elf.em +++ b/ld/emultempl/ppc64elf.em @@ -716,7 +716,7 @@ PARSE_AND_LIST_LONGOPTS=${PARSE_AND_LIST_LONGOPTS}' { "no-plt-align", no_argument, NULL, OPTION_NO_PLT_ALIGN }, { "plt-localentry", optional_argument, NULL, OPTION_PLT_LOCALENTRY }, { "no-plt-localentry", no_argument, NULL, OPTION_NO_PLT_LOCALENTRY }, - { "power10-stubs", no_argument, NULL, OPTION_POWER10_STUBS }, + { "power10-stubs", optional_argument, NULL, OPTION_POWER10_STUBS }, { "no-power10-stubs", no_argument, NULL, OPTION_NO_POWER10_STUBS }, { "emit-stub-syms", no_argument, NULL, OPTION_STUBSYMS }, { "no-emit-stub-syms", no_argument, NULL, OPTION_NO_STUBSYMS }, @@ -773,7 +773,7 @@ PARSE_AND_LIST_OPTIONS=${PARSE_AND_LIST_OPTIONS}' --no-plt-localentry Don'\''t optimize ELFv2 calls\n" )); fprintf (file, _("\ - --power10-stubs Use Power10 PLT call stubs (default auto)\n" + --power10-stubs [=auto] Use Power10 PLT call stubs (default auto)\n" )); fprintf (file, _("\ --no-power10-stubs Don'\''t use Power10 PLT call stubs\n" @@ -889,7 +889,20 @@ PARSE_AND_LIST_ARGS_CASES=${PARSE_AND_LIST_ARGS_CASES}' break; case OPTION_POWER10_STUBS: - params.power10_stubs = 1; + if (optarg != NULL) + { + if (strcasecmp (optarg, "auto") == 0) + params.power10_stubs = -1; + else if (strcasecmp (optarg, "yes") == 0) + params.power10_stubs = 1; + else if (strcasecmp (optarg, "no") == 0) + params.power10_stubs = 0; + else + einfo (_("%F%P: invalid --power10-stubs argument `%s'\''\n"), + optarg); + } + else + params.power10_stubs = 1; break; case OPTION_NO_POWER10_STUBS: @@ -7922,15 +7922,13 @@ care. @option{--no-plt-localentry} is the default. @itemx --no-power10-stubs When PowerPC64 @command{ld} links input object files containing relocations used on power10 prefixed instructions it normally creates -linkage stubs (PLT call and long branch) using power10 instructions. -In particular for @code{@@notoc} PLT calls where @code{r2} is not -known the power10 stubs are smaller and faster, so are preferred for +linkage stubs (PLT call and long branch) using power10 instructions +for @code{@@notoc} PLT calls where @code{r2} is not known. The +power10 notoc stubs are smaller and faster, so are preferred for power10. @option{--power10-stubs} and @option{--no-power10-stubs} allow you to override the linker's selection of stub instructions. -For example, when linking a shared library that contains cpu-optimized -versions of functions for both power9 and power10, you might use -@option{--no-power10-stubs} so that power9 code making calls doesn't -attempt to execute power10 instructions. +@option{--power10-stubs=auto} allows the user to select the default +auto mode. @end table @ifclear GENERIC diff --git a/ld/testsuite/ld-powerpc/callstub-1.d b/ld/testsuite/ld-powerpc/callstub-1.d index 21eea76..48efdfb 100644 --- a/ld/testsuite/ld-powerpc/callstub-1.d +++ b/ld/testsuite/ld-powerpc/callstub-1.d @@ -1,5 +1,5 @@ #as: -a64 -mpower10 -#ld: -melf64ppc -shared --plt-align=0 --hash-style=gnu +#ld: -melf64ppc -shared --plt-align=0 --power10-stubs --hash-style=gnu #objdump: -dr -Mpower10 .* @@ -20,10 +20,10 @@ Disassembly of section \.text: .*: (7d 89 03 a6|a6 03 89 7d) mtctr r12 .*: (4e 80 04 20|20 04 80 4e) bctr #... -.*: (4b ff ff c1|c1 ff ff 4b) bl .*\.plt_call\.f1> + +.*<_start>: +.*: (4b ff .. ..|.. .. ff 4b) bl .*\.plt_call\.f1> .*: (e8 41 00 18|18 00 41 e8) ld r2,24\(r1\) -.*: (4b ff ff bd|bd ff ff 4b) bl .*\.plt_call\.f1\+0x4> -.*: (4b ff ff cd|cd ff ff 4b) bl .*\.plt_call\.f2> -.*: (04 10 00 01|01 00 10 04) pld r3,.* -.*: (e4 60 .. ..|.. .. 60 e4) +.*: (4b ff .. ..|.. .. ff 4b) bl .*\.plt_call\.f1\+0x4> +.*: (4b ff .. ..|.. .. ff 4b) bl .*\.plt_call\.f2> #pass diff --git a/ld/testsuite/ld-powerpc/callstub-2.d b/ld/testsuite/ld-powerpc/callstub-2.d index 6036707..5b43783 100644 --- a/ld/testsuite/ld-powerpc/callstub-2.d +++ b/ld/testsuite/ld-powerpc/callstub-2.d @@ -27,9 +27,9 @@ Disassembly of section \.text: .*: (7d 89 03 a6|a6 03 89 7d) mtctr r12 .*: (4e 80 04 20|20 04 80 4e) bctr -#... -.*: (4b ff ff bd|bd ff ff 4b) bl .*\.plt_call\.f1> +.*<_start>: +.*: (4b ff .. ..|.. .. ff 4b) bl .*\.plt_call\.f1> .*: (e8 41 00 18|18 00 41 e8) ld r2,24\(r1\) -.*: (4b ff ff b9|b9 ff ff 4b) bl .*\.plt_call\.f1\+0x4> -.*: (4b ff ff d5|d5 ff ff 4b) bl .*\.plt_call\.f2> +.*: (4b ff .. ..|.. .. ff 4b) bl .*\.plt_call\.f1\+0x4> +.*: (4b ff .. ..|.. .. ff 4b) bl .*\.plt_call\.f2> #pass diff --git a/ld/testsuite/ld-powerpc/callstub-4.d b/ld/testsuite/ld-powerpc/callstub-4.d new file mode 100644 index 0000000..12ea0d9 --- /dev/null +++ b/ld/testsuite/ld-powerpc/callstub-4.d @@ -0,0 +1,32 @@ +#source: callstub-1.s +#as: -a64 -mpower10 +#ld: -melf64ppc -shared --plt-align=0 --hash-style=gnu +#objdump: -dr -Mpower10 + +.* + +Disassembly of section \.text: + +.*\.plt_call\.f1>: +.*: (04 10 00 01|01 00 10 04) pld r12,.* +.*: (e5 80 .. ..|.. .. 80 e5) +.*: (7d 89 03 a6|a6 03 89 7d) mtctr r12 +.*: (4e 80 04 20|20 04 80 4e) bctr +.*: (f8 41 00 18|18 00 41 f8) std r2,24\(r1\) +.*: (e9 82 80 28|28 80 82 e9) ld r12,-32728\(r2\) +.*: (7d 89 03 a6|a6 03 89 7d) mtctr r12 +.*: (4e 80 04 20|20 04 80 4e) bctr + +.*\.plt_call\.f2>: +.*: (04 10 00 01|01 00 10 04) pld r12,.* +.*: (e5 80 .. ..|.. .. 80 e5) +.*: (7d 89 03 a6|a6 03 89 7d) mtctr r12 +.*: (4e 80 04 20|20 04 80 4e) bctr +#... + +.*<_start>: +.*: (4b ff .. ..|.. .. ff 4b) bl .*\.plt_call\.f1\+0x10> +.*: (e8 41 00 18|18 00 41 e8) ld r2,24\(r1\) +.*: (4b ff .. ..|.. .. ff 4b) bl .*\.plt_call\.f1> +.*: (4b ff .. ..|.. .. ff 4b) bl .*\.plt_call\.f2> +#pass diff --git a/ld/testsuite/ld-powerpc/notoc.d b/ld/testsuite/ld-powerpc/notoc.d index eaafd01..3555be7 100644 --- a/ld/testsuite/ld-powerpc/notoc.d +++ b/ld/testsuite/ld-powerpc/notoc.d @@ -1,6 +1,6 @@ #source: notoc.s #as: -a64 -#ld: --no-plt-localentry -T ext.lnk +#ld: --no-plt-localentry --no-power10-stubs -T ext.lnk #objdump: -d #target: powerpc64*-*-* diff --git a/ld/testsuite/ld-powerpc/notoc3.d b/ld/testsuite/ld-powerpc/notoc3.d new file mode 100644 index 0000000..ce19e99 --- /dev/null +++ b/ld/testsuite/ld-powerpc/notoc3.d @@ -0,0 +1,97 @@ +#as: -a64 -mpower10 +#ld: --no-plt-localentry -T ext.lnk +#objdump: -d +#target: powerpc64*-*-* + +.* + +Disassembly of section \.text: + +.* <.*\.long_branch\.f1>: +.*: (18 00 41 f8|f8 41 00 18) std r2,24\(r1\) +.*: (.. .. 00 48|48 00 .. ..) b .* <f1> + +.* <.*\.long_branch\.g1>: +.*: (18 00 41 f8|f8 41 00 18) std r2,24\(r1\) +.*: (.. .. 00 48|48 00 .. ..) b .* <g1> + +.* <.*\.plt_branch\.ext>: +.*: (00 20 60 3d|3d 60 20 00) lis r11,8192 +.*: (00 00 6b 61|61 6b 00 00) ori r11,r11,0 +.*: (ff ef 13 06|06 13 ef ff) pla r12,-268435736 +.*: (e8 fe 80 39|39 80 fe e8) +.*: (46 17 6b 79|79 6b 17 46) rldicr r11,r11,34,29 +.*: (14 62 8b 7d|7d 8b 62 14) add r12,r11,r12 +.*: (a6 03 89 7d|7d 89 03 a6) mtctr r12 +.*: (20 04 80 4e|4e 80 04 20) bctr +.*: (00 80 82 e9|e9 82 80 00) ld r12,-32768\(r2\) +.*: (a6 03 89 7d|7d 89 03 a6) mtctr r12 +.*: (20 04 80 4e|4e 80 04 20) bctr + +.* <.*\.long_branch\.f2>: +.*: (00 00 00 60|60 00 00 00) nop +.*: (00 00 10 06|06 10 00 00) pla r12,108 +.*: (6c 00 80 39|39 80 00 6c) +.*: (.. .. 00 48|48 00 .. ..) b .* <f2> + +.* <.*\.long_branch\.g2>: +.*: (00 00 00 60|60 00 00 00) nop +.*: (00 00 10 06|06 10 00 00) pla r12,144 +.*: (90 00 80 39|39 80 00 90) +.*: (.. .. 00 48|48 00 .. ..) b .* <g2> +#... + +.* <f1>: +.*: (01 00 00 48|48 00 00 01) bl .* <f1> +.*: (.. .. ff 4b|4b ff .. ..) bl .* <.*\.long_branch\.f2> +.*: (.. .. 00 48|48 00 .. ..) bl .* <g1> +.*: (.. .. ff 4b|4b ff .. ..) bl .* <.*\.long_branch\.g2> +.*: (.. .. ff 4b|4b ff .. ..) bl .* <.*\.plt_branch\.ext> +.*: (20 00 80 4e|4e 80 00 20) blr + +.* <g1>: +.*: (.. .. ff 4b|4b ff .. ..) bl .* <.*\.long_branch\.f2> +.*: (.. .. ff 4b|4b ff .. ..) bl .* <f1> +.*: (.. .. ff 4b|4b ff .. ..) bl .* <.*\.long_branch\.g2> +.*: (.. .. ff 4b|4b ff .. ..) bl .* <g1> +.*: (20 00 80 4e|4e 80 00 20) blr + +.* <f2>: +.*: (02 10 40 3c|3c 40 10 02) lis r2,4098 +.*: (00 90 42 38|38 42 90 00) addi r2,r2,-28672 +.*: (.. .. ff 4b|4b ff .. ..) bl .* <.*\.long_branch\.f1> +.*: (18 00 41 e8|e8 41 00 18) ld r2,24\(r1\) +.*: (.. .. ff 4b|4b ff .. ..) bl .* <f2\+0x8> +.*: (00 00 00 60|60 00 00 00) nop +.*: (.. .. ff 4b|4b ff .. ..) bl .* <.*\.long_branch\.g1> +.*: (18 00 41 e8|e8 41 00 18) ld r2,24\(r1\) +.*: (.. .. 00 48|48 00 .. ..) bl .* <g2\+0x8> +.*: (00 00 00 60|60 00 00 00) nop +.*: (.. .. ff 4b|4b ff .. ..) bl .* <.*\.plt_branch\.ext\+0x20> +.*: (00 00 00 60|60 00 00 00) nop +.*: (20 00 80 4e|4e 80 00 20) blr + +.* <g2>: +.*: (02 10 40 3c|3c 40 10 02) lis r2,4098 +.*: (00 90 42 38|38 42 90 00) addi r2,r2,-28672 +.*: (.. .. ff 4b|4b ff .. ..) bl .* <f2\+0x8> +.*: (00 00 00 60|60 00 00 00) nop +.*: (.. .. ff 4b|4b ff .. ..) bl .* <.*\.long_branch\.f1> +.*: (18 00 41 e8|e8 41 00 18) ld r2,24\(r1\) +.*: (.. .. ff 4b|4b ff .. ..) bl .* <g2\+0x8> +.*: (00 00 00 60|60 00 00 00) nop +.*: (.. .. ff 4b|4b ff .. ..) bl .* <.*\.long_branch\.g1> +.*: (18 00 41 e8|e8 41 00 18) ld r2,24\(r1\) +.*: (20 00 80 4e|4e 80 00 20) blr + +.* <_start>: +.*: (00 00 00 48|48 00 00 00) b .* <_start> +#... + +Disassembly of section \.text\.ext: + +8000000000000000 <ext>: +8000000000000000: (02 10 40 3c|3c 40 10 02) lis r2,4098 +8000000000000004: (00 90 42 38|38 42 90 00) addi r2,r2,-28672 +8000000000000008: (00 00 00 60|60 00 00 00) nop +800000000000000c: (20 00 80 4e|4e 80 00 20) blr diff --git a/ld/testsuite/ld-powerpc/notoc3.s b/ld/testsuite/ld-powerpc/notoc3.s new file mode 100644 index 0000000..afbefa4 --- /dev/null +++ b/ld/testsuite/ld-powerpc/notoc3.s @@ -0,0 +1,59 @@ + .text + .globl f1, f2, g1, g2, _start + .weak ext + .abiversion 2 + +f1: + .localentry f1,1 + bl f1@notoc + bl f2@notoc + bl g1@notoc + bl g2@notoc + bl ext@notoc + blr + +g1: + .localentry g1,1 + bl f2@notoc + bl f1@notoc + bl g2@notoc + bl g1@notoc + blr + +f2: +0: + addis 2,12,.TOC.-0b@ha + addi 2,2,.TOC.-0b@l + .localentry f2,.-0b + bl f1 + nop + bl f2 + nop + bl g1 + nop + bl g2 + nop + bl ext + nop + blr + +g2: +0: + addis 2,12,.TOC.-0b@ha + addi 2,2,.TOC.-0b@l + .localentry g2,.-0b + bl f2 + nop + bl f1 + nop + bl g2 + nop + bl g1 + nop + blr + +_start: + .cfi_startproc + b _start + pld 3,_start + .cfi_endproc diff --git a/ld/testsuite/ld-powerpc/notoc3.wf b/ld/testsuite/ld-powerpc/notoc3.wf new file mode 100644 index 0000000..23904f8 --- /dev/null +++ b/ld/testsuite/ld-powerpc/notoc3.wf @@ -0,0 +1,17 @@ +Contents of the \.eh_frame section: + + +00000000 0000000000000010 00000000 CIE + Version: 1 + Augmentation: "zR" + Code alignment factor: 4 + Data alignment factor: -8 + Return address column: 65 + Augmentation data: 1b + DW_CFA_def_cfa: r1 ofs 0 + +00000014 0000000000000010 00000018 FDE cie=00000000 pc=000000001000020c\.\.0000000010000218 + DW_CFA_nop + DW_CFA_nop + DW_CFA_nop + diff --git a/ld/testsuite/ld-powerpc/powerpc.exp b/ld/testsuite/ld-powerpc/powerpc.exp index 50553ba..bd269a2 100644 --- a/ld/testsuite/ld-powerpc/powerpc.exp +++ b/ld/testsuite/ld-powerpc/powerpc.exp @@ -317,10 +317,14 @@ set ppc64elftests { {"ambig shared v1" "-shared -melf64ppc" "" "-a64" {funv1.s} {} "funv1.so"} {"ambig shared v2" "-shared -melf64ppc" "" "-a64" {funv2.s} {} "funv2.so"} {"notoc ext" "" "" "-a64" {ext.s} {} ""} - {"notoc" "-melf64ppc --no-plt-localentry -T ext.lnk" "" "-a64" {notoc.s} + {"notoc" "-melf64ppc --no-plt-localentry --no-power10-stubs -T ext.lnk" + "" "-a64" {notoc.s} {{objdump -d notoc.d} {readelf {-wf -W} notoc.wf}} "notoc"} {"notoc2" "-melf64ppc -shared" "" "-a64 -mpower10" {notoc2.s} {{objdump {-d -Mpower10} notoc2.d}} "notoc2"} + {"notoc3" "-melf64ppc --no-plt-localentry -T ext.lnk" "" + "-a64 -mpower10" {notoc3.s} + {{objdump -d notoc3.d} {readelf {-wf -W} notoc3.wf}} "notoc3"} {"pcrelopt" "-melf64ppc --hash-style=gnu" "tmpdir/symtocbase.so" "-a64 -mpower10" {pcrelopt.s} {{objdump {-d -Mpower10} pcrelopt.d} @@ -403,6 +407,7 @@ if [ supports_ppc64 ] then { run_dump_test "callstub-1" run_dump_test "callstub-2" run_dump_test "callstub-3" + run_dump_test "callstub-4" run_dump_test "tlsgd" run_dump_test "tlsld" run_dump_test "tlsie" |