diff options
author | Tamar Christina <tamar.christina@arm.com> | 2019-08-20 16:34:26 +0100 |
---|---|---|
committer | Tamar Christina <tamar.christina@arm.com> | 2019-08-20 16:35:28 +0100 |
commit | b4e87f2c1eee99dee805e3828514f8afa162f9ba (patch) | |
tree | 213986cc59e1260fff20c20126bd5e4ffd3d0835 /bfd/elf32-arm.c | |
parent | d7a11d1383a2d0cdffc8c08e46b0cf5a66b3ce28 (diff) | |
download | gdb-b4e87f2c1eee99dee805e3828514f8afa162f9ba.zip gdb-b4e87f2c1eee99dee805e3828514f8afa162f9ba.tar.gz gdb-b4e87f2c1eee99dee805e3828514f8afa162f9ba.tar.bz2 |
Arm: Fix performance issue with thumb-2 tailcalls
We currently use a padding NOP after a Thumb to Arm interworking veneer (BX pc).
The NOP is never executed but may result in a performance penalty on some cores.
For this reason this patch changes the NOPs after Thumb to Arm veneers into B .-2
and adds a note to this in the source code for future reference.
bfd/ChangeLog:
* elf32-arm.c (elf32_thumb2_plt_entry, elf32_arm_plt_thumb_stub,
elf32_arm_stub_long_branch_v4t_thumb_thumb,
elf32_arm_stub_long_branch_v4t_thumb_arm,
elf32_arm_stub_short_branch_v4t_thumb_arm,
elf32_arm_stub_long_branch_v4t_thumb_arm_pic,
elf32_arm_stub_long_branch_v4t_thumb_thumb_pic,
elf32_arm_stub_long_branch_v4t_thumb_tls_pic): Change nop to branch to
previous instruction.
ld/ChangeLog:
* testsuite/ld-arm/cortex-a8-fix-b-plt.d: Update Testcase.
* testsuite/ld-arm/cortex-a8-fix-b-rel-arm.d: Likewise.
* testsuite/ld-arm/cortex-a8-fix-bcc-plt.d: Likewise.
* testsuite/ld-arm/farcall-cond-thumb-arm.d: Likewise.
* testsuite/ld-arm/farcall-mixed-app.d: Likewise.
* testsuite/ld-arm/farcall-mixed-app2.d: Likewise.
* testsuite/ld-arm/farcall-mixed-lib-v4t.d: Likewise.
* testsuite/ld-arm/farcall-thumb-arm-pic-veneer.d: Likewise.
* testsuite/ld-arm/farcall-thumb-arm-short.d: Likewise.
* testsuite/ld-arm/farcall-thumb-arm.d: Likewise.
* testsuite/ld-arm/farcall-thumb-thumb-pic-veneer.d: Likewise.
* testsuite/ld-arm/farcall-thumb-thumb.d: Likewise.
* testsuite/ld-arm/fix-arm1176-on.d: Likewise.
* testsuite/ld-arm/ifunc-10.dd: Likewise.
* testsuite/ld-arm/ifunc-2.dd: Likewise.
* testsuite/ld-arm/ifunc-4.dd: Likewise.
* testsuite/ld-arm/ifunc-6.dd: Likewise.
* testsuite/ld-arm/ifunc-8.dd: Likewise.
* testsuite/ld-arm/jump-reloc-veneers-long.d: Likewise.
* testsuite/ld-arm/mixed-app.d: Likewise.
* testsuite/ld-arm/thumb2-b-interwork.d: Likewise.
* testsuite/ld-arm/tls-longplt.d: Likewise.
* testsuite/ld-arm/tls-thumb1.d: Likewise.
Diffstat (limited to 'bfd/elf32-arm.c')
-rw-r--r-- | bfd/elf32-arm.c | 25 |
1 files changed, 16 insertions, 9 deletions
diff --git a/bfd/elf32-arm.c b/bfd/elf32-arm.c index f1895df..d1548d6 100644 --- a/bfd/elf32-arm.c +++ b/bfd/elf32-arm.c @@ -2329,6 +2329,11 @@ static const unsigned long dl_tlsdesc_lazy_trampoline [] = 0x00000018, /* 4: .word _GLOBAL_OFFSET_TABLE_ - 2b - 8 */ }; +/* NOTE: [Thumb nop sequence] + When adding code that transitions from Thumb to Arm the instruction that + should be used for the alignment padding should be 0xe7fd (b .-2) instead of + a nop for performance reasons. */ + /* ARM FDPIC PLT entry. */ /* The last 5 words contain PLT lazy fragment code and data. */ static const bfd_vma elf32_arm_fdpic_plt_entry [] = @@ -2446,8 +2451,8 @@ static const bfd_vma elf32_thumb2_plt_entry [] = 0x0c00f240, /* movw ip, #0xNNNN */ 0x0c00f2c0, /* movt ip, #0xNNNN */ 0xf8dc44fc, /* add ip, pc */ - 0xbf00f000 /* ldr.w pc, [ip] */ - /* nop */ + 0xe7fdf000 /* ldr.w pc, [ip] */ + /* b .-2 */ }; /* The format of the first entry in the procedure linkage table @@ -2487,7 +2492,7 @@ static const bfd_vma elf32_arm_vxworks_shared_plt_entry[] = static const bfd_vma elf32_arm_plt_thumb_stub [] = { 0x4778, /* bx pc */ - 0x46c0 /* nop */ + 0xe7fd /* b .-2 */ }; /* The entries in a PLT when using a DLL-based target with multiple @@ -2574,6 +2579,8 @@ typedef struct int reloc_addend; } insn_sequence; +/* See note [Thumb nop sequence] when adding a veneer. */ + /* Arm/Thumb -> Arm/Thumb long branch stub. On V5T and above, use blx to reach the stub if necessary. */ static const insn_sequence elf32_arm_stub_long_branch_any_any[] = @@ -2624,7 +2631,7 @@ static const insn_sequence elf32_arm_stub_long_branch_thumb2_only_pure[] = static const insn_sequence elf32_arm_stub_long_branch_v4t_thumb_thumb[] = { THUMB16_INSN (0x4778), /* bx pc */ - THUMB16_INSN (0x46c0), /* nop */ + THUMB16_INSN (0xe7fd), /* b .-2 */ ARM_INSN (0xe59fc000), /* ldr ip, [pc, #0] */ ARM_INSN (0xe12fff1c), /* bx ip */ DATA_WORD (0, R_ARM_ABS32, 0), /* dcd R_ARM_ABS32(X) */ @@ -2635,7 +2642,7 @@ static const insn_sequence elf32_arm_stub_long_branch_v4t_thumb_thumb[] = static const insn_sequence elf32_arm_stub_long_branch_v4t_thumb_arm[] = { THUMB16_INSN (0x4778), /* bx pc */ - THUMB16_INSN (0x46c0), /* nop */ + THUMB16_INSN (0xe7fd), /* b .-2 */ ARM_INSN (0xe51ff004), /* ldr pc, [pc, #-4] */ DATA_WORD (0, R_ARM_ABS32, 0), /* dcd R_ARM_ABS32(X) */ }; @@ -2645,7 +2652,7 @@ static const insn_sequence elf32_arm_stub_long_branch_v4t_thumb_arm[] = static const insn_sequence elf32_arm_stub_short_branch_v4t_thumb_arm[] = { THUMB16_INSN (0x4778), /* bx pc */ - THUMB16_INSN (0x46c0), /* nop */ + THUMB16_INSN (0xe7fd), /* b .-2 */ ARM_REL_INSN (0xea000000, -8), /* b (X-8) */ }; @@ -2683,7 +2690,7 @@ static const insn_sequence elf32_arm_stub_long_branch_v4t_arm_thumb_pic[] = static const insn_sequence elf32_arm_stub_long_branch_v4t_thumb_arm_pic[] = { THUMB16_INSN (0x4778), /* bx pc */ - THUMB16_INSN (0x46c0), /* nop */ + THUMB16_INSN (0xe7fd), /* b .-2 */ ARM_INSN (0xe59fc000), /* ldr ip, [pc, #0] */ ARM_INSN (0xe08cf00f), /* add pc, ip, pc */ DATA_WORD (0, R_ARM_REL32, -4), /* dcd R_ARM_REL32(X) */ @@ -2707,7 +2714,7 @@ static const insn_sequence elf32_arm_stub_long_branch_thumb_only_pic[] = static const insn_sequence elf32_arm_stub_long_branch_v4t_thumb_thumb_pic[] = { THUMB16_INSN (0x4778), /* bx pc */ - THUMB16_INSN (0x46c0), /* nop */ + THUMB16_INSN (0xe7fd), /* b .-2 */ ARM_INSN (0xe59fc004), /* ldr ip, [pc, #4] */ ARM_INSN (0xe08fc00c), /* add ip, pc, ip */ ARM_INSN (0xe12fff1c), /* bx ip */ @@ -2728,7 +2735,7 @@ static const insn_sequence elf32_arm_stub_long_branch_any_tls_pic[] = static const insn_sequence elf32_arm_stub_long_branch_v4t_thumb_tls_pic[] = { THUMB16_INSN (0x4778), /* bx pc */ - THUMB16_INSN (0x46c0), /* nop */ + THUMB16_INSN (0xe7fd), /* b .-2 */ ARM_INSN (0xe59f1000), /* ldr r1, [pc, #0] */ ARM_INSN (0xe081f00f), /* add pc, r1, pc */ DATA_WORD (0, R_ARM_REL32, -4), /* dcd R_ARM_REL32(X) */ |