diff options
Diffstat (limited to 'lld/ELF/Arch/ARM.cpp')
-rw-r--r-- | lld/ELF/Arch/ARM.cpp | 176 |
1 files changed, 123 insertions, 53 deletions
diff --git a/lld/ELF/Arch/ARM.cpp b/lld/ELF/Arch/ARM.cpp index 687f949..3e0efe5 100644 --- a/lld/ELF/Arch/ARM.cpp +++ b/lld/ELF/Arch/ARM.cpp @@ -231,36 +231,71 @@ static void writePltHeaderLong(uint8_t *buf) { // The default PLT header requires the .got.plt to be within 128 Mb of the // .plt in the positive direction. void ARM::writePltHeader(uint8_t *buf) const { - // Use a similar sequence to that in writePlt(), the difference is the calling - // conventions mean we use lr instead of ip. The PLT entry is responsible for - // saving lr on the stack, the dynamic loader is responsible for reloading - // it. - const uint32_t pltData[] = { - 0xe52de004, // L1: str lr, [sp,#-4]! - 0xe28fe600, // add lr, pc, #0x0NN00000 &(.got.plt - L1 - 4) - 0xe28eea00, // add lr, lr, #0x000NN000 &(.got.plt - L1 - 4) - 0xe5bef000, // ldr pc, [lr, #0x00000NNN] &(.got.plt -L1 - 4) - }; - - uint64_t offset = in.gotPlt->getVA() - in.plt->getVA() - 4; - if (!llvm::isUInt<27>(offset)) { - // We cannot encode the Offset, use the long form. - writePltHeaderLong(buf); - return; + if (config->armThumbPLTs) { + // The instruction sequence for thumb: + // + // 0: b500 push {lr} + // 2: f8df e008 ldr.w lr, [pc, #0x8] @ 0xe <func+0xe> + // 6: 44fe add lr, pc + // 8: f85e ff08 ldr pc, [lr, #8]! + // e: .word .got.plt - .plt - 16 + // + // At 0x8, we want to jump to .got.plt, the -16 accounts for 8 bytes from + // `pc` in the add instruction and 8 bytes for the `lr` adjustment. + // + uint64_t offset = in.gotPlt->getVA() - in.plt->getVA() - 16; + assert(llvm::isUInt<32>(offset) && "This should always fit into a 32-bit offset"); + write16(buf + 0, 0xb500); + // Split into two halves to support endianness correctly. + write16(buf + 2, 0xf8df); + write16(buf + 4, 0xe008); + write16(buf + 6, 0x44fe); + // Split into two halves to support endianness correctly. + write16(buf + 8, 0xf85e); + write16(buf + 10, 0xff08); + write32(buf + 12, offset); + + memcpy(buf + 16, trapInstr.data(), 4); // Pad to 32-byte boundary + memcpy(buf + 20, trapInstr.data(), 4); + memcpy(buf + 24, trapInstr.data(), 4); + memcpy(buf + 28, trapInstr.data(), 4); + } else { + // Use a similar sequence to that in writePlt(), the difference is the + // calling conventions mean we use lr instead of ip. The PLT entry is + // responsible for saving lr on the stack, the dynamic loader is responsible + // for reloading it. + const uint32_t pltData[] = { + 0xe52de004, // L1: str lr, [sp,#-4]! + 0xe28fe600, // add lr, pc, #0x0NN00000 &(.got.plt - L1 - 4) + 0xe28eea00, // add lr, lr, #0x000NN000 &(.got.plt - L1 - 4) + 0xe5bef000, // ldr pc, [lr, #0x00000NNN] &(.got.plt -L1 - 4) + }; + + uint64_t offset = in.gotPlt->getVA() - in.plt->getVA() - 4; + if (!llvm::isUInt<27>(offset)) { + // We cannot encode the Offset, use the long form. + writePltHeaderLong(buf); + return; + } + write32(buf + 0, pltData[0]); + write32(buf + 4, pltData[1] | ((offset >> 20) & 0xff)); + write32(buf + 8, pltData[2] | ((offset >> 12) & 0xff)); + write32(buf + 12, pltData[3] | (offset & 0xfff)); + memcpy(buf + 16, trapInstr.data(), 4); // Pad to 32-byte boundary + memcpy(buf + 20, trapInstr.data(), 4); + memcpy(buf + 24, trapInstr.data(), 4); + memcpy(buf + 28, trapInstr.data(), 4); } - write32(buf + 0, pltData[0]); - write32(buf + 4, pltData[1] | ((offset >> 20) & 0xff)); - write32(buf + 8, pltData[2] | ((offset >> 12) & 0xff)); - write32(buf + 12, pltData[3] | (offset & 0xfff)); - memcpy(buf + 16, trapInstr.data(), 4); // Pad to 32-byte boundary - memcpy(buf + 20, trapInstr.data(), 4); - memcpy(buf + 24, trapInstr.data(), 4); - memcpy(buf + 28, trapInstr.data(), 4); } void ARM::addPltHeaderSymbols(InputSection &isec) const { - addSyntheticLocal("$a", STT_NOTYPE, 0, 0, isec); - addSyntheticLocal("$d", STT_NOTYPE, 16, 0, isec); + if (config->armThumbPLTs) { + addSyntheticLocal("$t", STT_NOTYPE, 0, 0, isec); + addSyntheticLocal("$d", STT_NOTYPE, 12, 0, isec); + } else { + addSyntheticLocal("$a", STT_NOTYPE, 0, 0, isec); + addSyntheticLocal("$d", STT_NOTYPE, 16, 0, isec); + } } // Long form PLT entries that do not have any restrictions on the displacement @@ -279,32 +314,65 @@ static void writePltLong(uint8_t *buf, uint64_t gotPltEntryAddr, // .plt in the positive direction. void ARM::writePlt(uint8_t *buf, const Symbol &sym, uint64_t pltEntryAddr) const { - // The PLT entry is similar to the example given in Appendix A of ELF for - // the Arm Architecture. Instead of using the Group Relocations to find the - // optimal rotation for the 8-bit immediate used in the add instructions we - // hard code the most compact rotations for simplicity. This saves a load - // instruction over the long plt sequences. - const uint32_t pltData[] = { - 0xe28fc600, // L1: add ip, pc, #0x0NN00000 Offset(&(.got.plt) - L1 - 8 - 0xe28cca00, // add ip, ip, #0x000NN000 Offset(&(.got.plt) - L1 - 8 - 0xe5bcf000, // ldr pc, [ip, #0x00000NNN] Offset(&(.got.plt) - L1 - 8 - }; - uint64_t offset = sym.getGotPltVA() - pltEntryAddr - 8; - if (!llvm::isUInt<27>(offset)) { - // We cannot encode the Offset, use the long form. - writePltLong(buf, sym.getGotPltVA(), pltEntryAddr); - return; + if (!config->armThumbPLTs) { + uint64_t offset = sym.getGotPltVA() - pltEntryAddr - 8; + + // The PLT entry is similar to the example given in Appendix A of ELF for + // the Arm Architecture. Instead of using the Group Relocations to find the + // optimal rotation for the 8-bit immediate used in the add instructions we + // hard code the most compact rotations for simplicity. This saves a load + // instruction over the long plt sequences. + const uint32_t pltData[] = { + 0xe28fc600, // L1: add ip, pc, #0x0NN00000 Offset(&(.got.plt) - L1 - 8 + 0xe28cca00, // add ip, ip, #0x000NN000 Offset(&(.got.plt) - L1 - 8 + 0xe5bcf000, // ldr pc, [ip, #0x00000NNN] Offset(&(.got.plt) - L1 - 8 + }; + if (!llvm::isUInt<27>(offset)) { + // We cannot encode the Offset, use the long form. + writePltLong(buf, sym.getGotPltVA(), pltEntryAddr); + return; + } + write32(buf + 0, pltData[0] | ((offset >> 20) & 0xff)); + write32(buf + 4, pltData[1] | ((offset >> 12) & 0xff)); + write32(buf + 8, pltData[2] | (offset & 0xfff)); + memcpy(buf + 12, trapInstr.data(), 4); // Pad to 16-byte boundary + } else { + uint64_t offset = sym.getGotPltVA() - pltEntryAddr - 12; + assert(llvm::isUInt<32>(offset) && "This should always fit into a 32-bit offset"); + + // A PLT entry will be: + // + // movw ip, #<lower 16 bits> + // movt ip, #<upper 16 bits> + // add ip, pc + // L1: ldr.w pc, [ip] + // b L1 + // + // where ip = r12 = 0xc + + // movw ip, #<lower 16 bits> + write16(buf + 2, 0x0c00); // use `ip` + relocateNoSym(buf, R_ARM_THM_MOVW_ABS_NC, offset); + + // movt ip, #<upper 16 bits> + write16(buf + 6, 0x0c00); // use `ip` + relocateNoSym(buf + 4, R_ARM_THM_MOVT_ABS, offset); + + write16(buf + 8, 0x44fc); // add ip, pc + write16(buf + 10, 0xf8dc); // ldr.w pc, [ip] (bottom half) + write16(buf + 12, 0xf000); // ldr.w pc, [ip] (upper half) + write16(buf + 14, 0xe7fc); // Branch to previous instruction } - write32(buf + 0, pltData[0] | ((offset >> 20) & 0xff)); - write32(buf + 4, pltData[1] | ((offset >> 12) & 0xff)); - write32(buf + 8, pltData[2] | (offset & 0xfff)); - memcpy(buf + 12, trapInstr.data(), 4); // Pad to 16-byte boundary } void ARM::addPltSymbols(InputSection &isec, uint64_t off) const { - addSyntheticLocal("$a", STT_NOTYPE, off, 0, isec); - addSyntheticLocal("$d", STT_NOTYPE, off + 12, 0, isec); + if (config->armThumbPLTs) { + addSyntheticLocal("$t", STT_NOTYPE, off, 0, isec); + } else { + addSyntheticLocal("$a", STT_NOTYPE, off, 0, isec); + addSyntheticLocal("$d", STT_NOTYPE, off + 12, 0, isec); + } } bool ARM::needsThunk(RelExpr expr, RelType type, const InputFile *file, @@ -325,6 +393,8 @@ bool ARM::needsThunk(RelExpr expr, RelType type, const InputFile *file, case R_ARM_JUMP24: // Source is ARM, all PLT entries are ARM so no interworking required. // Otherwise we need to interwork if STT_FUNC Symbol has bit 0 set (Thumb). + assert(!config->armThumbPLTs && + "If the source is ARM, we should not need Thumb PLTs"); if (s.isFunc() && expr == R_PC && (s.getVA() & 1)) return true; [[fallthrough]]; @@ -335,9 +405,9 @@ bool ARM::needsThunk(RelExpr expr, RelType type, const InputFile *file, } case R_ARM_THM_JUMP19: case R_ARM_THM_JUMP24: - // Source is Thumb, all PLT entries are ARM so interworking is required. + // Source is Thumb, when all PLT entries are ARM interworking is required. // Otherwise we need to interwork if STT_FUNC Symbol has bit 0 clear (ARM). - if (expr == R_PLT_PC || (s.isFunc() && (s.getVA() & 1) == 0)) + if ((expr == R_PLT_PC && !config->armThumbPLTs) || (s.isFunc() && (s.getVA() & 1) == 0)) return true; [[fallthrough]]; case R_ARM_THM_CALL: { @@ -547,7 +617,6 @@ void ARM::relocate(uint8_t *loc, const Relocation &rel, uint64_t val) const { // STT_FUNC we choose whether to write a BL or BLX depending on the // value of bit 0 of Val. With bit 0 == 1 denoting Thumb. If the symbol is // not of type STT_FUNC then we must preserve the original instruction. - // PLT entries are always ARM state so we know we don't need to interwork. assert(rel.sym); // R_ARM_CALL is always reached via relocate(). bool bit0Thumb = val & 1; bool isBlx = (read32(loc) & 0xfe000000) == 0xfa000000; @@ -606,12 +675,13 @@ void ARM::relocate(uint8_t *loc, const Relocation &rel, uint64_t val) const { // PLT entries are always ARM state so we know we need to interwork. assert(rel.sym); // R_ARM_THM_CALL is always reached via relocate(). bool bit0Thumb = val & 1; + bool useThumb = bit0Thumb || config->armThumbPLTs; bool isBlx = (read16(loc + 2) & 0x1000) == 0; // lld 10.0 and before always used bit0Thumb when deciding to write a BLX - // even when type not STT_FUNC. PLT entries generated by LLD are always ARM. - if (!rel.sym->isFunc() && !rel.sym->isInPlt() && isBlx == bit0Thumb) + // even when type not STT_FUNC. + if (!rel.sym->isFunc() && !rel.sym->isInPlt() && isBlx == useThumb) stateChangeWarning(loc, rel.type, *rel.sym); - if (rel.sym->isFunc() || rel.sym->isInPlt() ? !bit0Thumb : isBlx) { + if ((rel.sym->isFunc() || rel.sym->isInPlt()) ? !useThumb : isBlx) { // We are writing a BLX. Ensure BLX destination is 4-byte aligned. As // the BLX instruction may only be two byte aligned. This must be done // before overflow check. |