diff options
author | Amir Ayupov <aaupov@fb.com> | 2024-07-18 20:50:43 -0700 |
---|---|---|
committer | GitHub <noreply@github.com> | 2024-07-18 20:50:43 -0700 |
commit | 7800ad4550051ce44d3485edf5b8084ff2eeb774 (patch) | |
tree | a8856a4d862069de866569fc7c471f7eba5ddf08 | |
parent | fa488a9590a160521a6880abd2dcdc0f75a68fb8 (diff) | |
download | llvm-users/aaupov/spr/main.bolt-support-possible_pic_fixed_branch.zip llvm-users/aaupov/spr/main.bolt-support-possible_pic_fixed_branch.tar.gz llvm-users/aaupov/spr/main.bolt-support-possible_pic_fixed_branch.tar.bz2 |
[BOLT] Support POSSIBLE_PIC_FIXED_BRANCH (#91667)users/aaupov/spr/main.bolt-support-possible_pic_fixed_branch
Detect and support fixed PIC indirect jumps of the following form:
```
movslq En(%rip), %r1
leaq PIC_JUMP_TABLE(%rip), %r2
addq %r2, %r1
jmpq *%r1
```
with PIC_JUMP_TABLE that looks like following:
```
JT: ----------
E1:| L1 - JT |
|----------|
E2:| L2 - JT |
|----------|
| |
......
En:| Ln - JT |
----------
```
The code could be produced by compilers, see
https://github.com/llvm/llvm-project/issues/91648.
Test Plan: updated jump-table-fixed-ref-pic.test
-rw-r--r-- | bolt/include/bolt/Core/BinaryContext.h | 3 | ||||
-rw-r--r-- | bolt/include/bolt/Core/MCPlusBuilder.h | 13 | ||||
-rw-r--r-- | bolt/lib/Core/BinaryContext.cpp | 10 | ||||
-rw-r--r-- | bolt/lib/Core/BinaryFunction.cpp | 47 | ||||
-rw-r--r-- | bolt/lib/Passes/IndirectCallPromotion.cpp | 4 | ||||
-rw-r--r-- | bolt/lib/Target/AArch64/AArch64MCPlusBuilder.cpp | 13 | ||||
-rw-r--r-- | bolt/lib/Target/RISCV/RISCVMCPlusBuilder.cpp | 3 | ||||
-rw-r--r-- | bolt/lib/Target/X86/X86MCPlusBuilder.cpp | 105 | ||||
-rw-r--r-- | bolt/test/X86/Inputs/jump-table-fixed-ref-pic.s | 2 | ||||
-rw-r--r-- | bolt/test/X86/jump-table-fixed-ref-pic.test | 12 |
10 files changed, 153 insertions, 59 deletions
diff --git a/bolt/include/bolt/Core/BinaryContext.h b/bolt/include/bolt/Core/BinaryContext.h index 73932c4..befdc99 100644 --- a/bolt/include/bolt/Core/BinaryContext.h +++ b/bolt/include/bolt/Core/BinaryContext.h @@ -431,6 +431,9 @@ public: return nullptr; } + /// Deregister JumpTable registered at a given \p Address and delete it. + void deleteJumpTable(uint64_t Address); + unsigned getDWARFEncodingSize(unsigned Encoding) { if (Encoding == dwarf::DW_EH_PE_omit) return 0; diff --git a/bolt/include/bolt/Core/MCPlusBuilder.h b/bolt/include/bolt/Core/MCPlusBuilder.h index ab07f07..e543513 100644 --- a/bolt/include/bolt/Core/MCPlusBuilder.h +++ b/bolt/include/bolt/Core/MCPlusBuilder.h @@ -58,6 +58,8 @@ enum class IndirectBranchType : char { POSSIBLE_PIC_JUMP_TABLE, /// Possibly a jump table for PIC. POSSIBLE_GOTO, /// Possibly a gcc's computed goto. POSSIBLE_FIXED_BRANCH, /// Possibly an indirect branch to a fixed location. + POSSIBLE_PIC_FIXED_BRANCH, /// Possibly an indirect jump to a fixed entry in a + /// PIC jump table. }; class MCPlusBuilder { @@ -1474,12 +1476,11 @@ public: /// will be set to the different components of the branch. \p MemLocInstr /// is the instruction that loads up the indirect function pointer. It may /// or may not be same as \p Instruction. - virtual IndirectBranchType - analyzeIndirectBranch(MCInst &Instruction, InstructionIterator Begin, - InstructionIterator End, const unsigned PtrSize, - MCInst *&MemLocInstr, unsigned &BaseRegNum, - unsigned &IndexRegNum, int64_t &DispValue, - const MCExpr *&DispExpr, MCInst *&PCRelBaseOut) const { + virtual IndirectBranchType analyzeIndirectBranch( + MCInst &Instruction, InstructionIterator Begin, InstructionIterator End, + const unsigned PtrSize, MCInst *&MemLocInstr, unsigned &BaseRegNum, + unsigned &IndexRegNum, int64_t &DispValue, const MCExpr *&DispExpr, + MCInst *&PCRelBaseOut, MCInst *&FixedEntryLoadInst) const { llvm_unreachable("not implemented"); return IndirectBranchType::UNKNOWN; } diff --git a/bolt/lib/Core/BinaryContext.cpp b/bolt/lib/Core/BinaryContext.cpp index 0a1f1bb..035f68e 100644 --- a/bolt/lib/Core/BinaryContext.cpp +++ b/bolt/lib/Core/BinaryContext.cpp @@ -2523,6 +2523,16 @@ BinaryFunction *BinaryContext::getBinaryFunctionAtAddress(uint64_t Address) { return nullptr; } +/// Deregister JumpTable registered at a given \p Address and delete it. +void BinaryContext::deleteJumpTable(uint64_t Address) { + assert(JumpTables.count(Address) && "Must have a jump table at address"); + JumpTable *JT = JumpTables.at(Address); + for (BinaryFunction *Parent : JT->Parents) + Parent->JumpTables.erase(Address); + JumpTables.erase(Address); + delete JT; +} + DebugAddressRangesVector BinaryContext::translateModuleAddressRanges( const DWARFAddressRangesVector &InputRanges) const { DebugAddressRangesVector OutputRanges; diff --git a/bolt/lib/Core/BinaryFunction.cpp b/bolt/lib/Core/BinaryFunction.cpp index 89c05e3..0db1e0d 100644 --- a/bolt/lib/Core/BinaryFunction.cpp +++ b/bolt/lib/Core/BinaryFunction.cpp @@ -780,6 +780,9 @@ BinaryFunction::processIndirectBranch(MCInst &Instruction, unsigned Size, // setting the value of the register used by the branch. MCInst *MemLocInstr; + // The instruction loading the fixed PIC jump table entry value. + MCInst *FixedEntryLoadInstr; + // Address of the table referenced by MemLocInstr. Could be either an // array of function pointers, or a jump table. uint64_t ArrayStart = 0; @@ -811,7 +814,7 @@ BinaryFunction::processIndirectBranch(MCInst &Instruction, unsigned Size, IndirectBranchType BranchType = BC.MIB->analyzeIndirectBranch( Instruction, Begin, Instructions.end(), PtrSize, MemLocInstr, BaseRegNum, - IndexRegNum, DispValue, DispExpr, PCRelBaseInstr); + IndexRegNum, DispValue, DispExpr, PCRelBaseInstr, FixedEntryLoadInstr); if (BranchType == IndirectBranchType::UNKNOWN && !MemLocInstr) return BranchType; @@ -877,6 +880,43 @@ BinaryFunction::processIndirectBranch(MCInst &Instruction, unsigned Size, if (BaseRegNum == BC.MRI->getProgramCounter()) ArrayStart += getAddress() + Offset + Size; + if (FixedEntryLoadInstr) { + assert(BranchType == IndirectBranchType::POSSIBLE_PIC_FIXED_BRANCH && + "Invalid IndirectBranch type"); + MCInst::iterator FixedEntryDispOperand = + BC.MIB->getMemOperandDisp(*FixedEntryLoadInstr); + assert(FixedEntryDispOperand != FixedEntryLoadInstr->end() && + "Invalid memory instruction"); + const MCExpr *FixedEntryDispExpr = FixedEntryDispOperand->getExpr(); + const uint64_t EntryAddress = getExprValue(FixedEntryDispExpr); + uint64_t EntrySize = BC.getJumpTableEntrySize(JumpTable::JTT_PIC); + ErrorOr<int64_t> Value = + BC.getSignedValueAtAddress(EntryAddress, EntrySize); + if (!Value) + return IndirectBranchType::UNKNOWN; + + BC.outs() << "BOLT-INFO: fixed PIC indirect branch detected in " << *this + << " at 0x" << Twine::utohexstr(getAddress() + Offset) + << " referencing data at 0x" << Twine::utohexstr(EntryAddress) + << " the destination value is 0x" + << Twine::utohexstr(ArrayStart + *Value) << '\n'; + + TargetAddress = ArrayStart + *Value; + + // Remove spurious JumpTable at EntryAddress caused by PIC reference from + // the load instruction. + BC.deleteJumpTable(EntryAddress); + + // Replace FixedEntryDispExpr used in target address calculation with outer + // jump table reference. + JumpTable *JT = BC.getJumpTableContainingAddress(ArrayStart); + assert(JT && "Must have a containing jump table for PIC fixed branch"); + BC.MIB->replaceMemOperandDisp(*FixedEntryLoadInstr, JT->getFirstLabel(), + EntryAddress - ArrayStart, &*BC.Ctx); + + return BranchType; + } + LLVM_DEBUG(dbgs() << "BOLT-DEBUG: addressed memory is 0x" << Twine::utohexstr(ArrayStart) << '\n'); @@ -1126,6 +1166,7 @@ void BinaryFunction::handleIndirectBranch(MCInst &Instruction, uint64_t Size, } case IndirectBranchType::POSSIBLE_JUMP_TABLE: case IndirectBranchType::POSSIBLE_PIC_JUMP_TABLE: + case IndirectBranchType::POSSIBLE_PIC_FIXED_BRANCH: if (opts::JumpTables == JTS_NONE) IsSimple = false; break; @@ -1878,9 +1919,11 @@ bool BinaryFunction::postProcessIndirectBranches( int64_t DispValue; const MCExpr *DispExpr; MCInst *PCRelBaseInstr; + MCInst *FixedEntryLoadInstr; IndirectBranchType Type = BC.MIB->analyzeIndirectBranch( Instr, BB.begin(), II, PtrSize, MemLocInstr, BaseRegNum, - IndexRegNum, DispValue, DispExpr, PCRelBaseInstr); + IndexRegNum, DispValue, DispExpr, PCRelBaseInstr, + FixedEntryLoadInstr); if (Type != IndirectBranchType::UNKNOWN || MemLocInstr != nullptr) continue; diff --git a/bolt/lib/Passes/IndirectCallPromotion.cpp b/bolt/lib/Passes/IndirectCallPromotion.cpp index a73eb86..2b5a591 100644 --- a/bolt/lib/Passes/IndirectCallPromotion.cpp +++ b/bolt/lib/Passes/IndirectCallPromotion.cpp @@ -386,13 +386,15 @@ IndirectCallPromotion::maybeGetHotJumpTableTargets(BinaryBasicBlock &BB, JumpTableInfoType HotTargets; MCInst *MemLocInstr; MCInst *PCRelBaseOut; + MCInst *FixedEntryLoadInstr; unsigned BaseReg, IndexReg; int64_t DispValue; const MCExpr *DispExpr; MutableArrayRef<MCInst> Insts(&BB.front(), &CallInst); const IndirectBranchType Type = BC.MIB->analyzeIndirectBranch( CallInst, Insts.begin(), Insts.end(), BC.AsmInfo->getCodePointerSize(), - MemLocInstr, BaseReg, IndexReg, DispValue, DispExpr, PCRelBaseOut); + MemLocInstr, BaseReg, IndexReg, DispValue, DispExpr, PCRelBaseOut, + FixedEntryLoadInstr); assert(MemLocInstr && "There should always be a load for jump tables"); if (!MemLocInstr) diff --git a/bolt/lib/Target/AArch64/AArch64MCPlusBuilder.cpp b/bolt/lib/Target/AArch64/AArch64MCPlusBuilder.cpp index 1a2327d..f58f785 100644 --- a/bolt/lib/Target/AArch64/AArch64MCPlusBuilder.cpp +++ b/bolt/lib/Target/AArch64/AArch64MCPlusBuilder.cpp @@ -852,16 +852,19 @@ public: return Uses; } - IndirectBranchType analyzeIndirectBranch( - MCInst &Instruction, InstructionIterator Begin, InstructionIterator End, - const unsigned PtrSize, MCInst *&MemLocInstrOut, unsigned &BaseRegNumOut, - unsigned &IndexRegNumOut, int64_t &DispValueOut, - const MCExpr *&DispExprOut, MCInst *&PCRelBaseOut) const override { + IndirectBranchType + analyzeIndirectBranch(MCInst &Instruction, InstructionIterator Begin, + InstructionIterator End, const unsigned PtrSize, + MCInst *&MemLocInstrOut, unsigned &BaseRegNumOut, + unsigned &IndexRegNumOut, int64_t &DispValueOut, + const MCExpr *&DispExprOut, MCInst *&PCRelBaseOut, + MCInst *&FixedEntryLoadInstr) const override { MemLocInstrOut = nullptr; BaseRegNumOut = AArch64::NoRegister; IndexRegNumOut = AArch64::NoRegister; DispValueOut = 0; DispExprOut = nullptr; + FixedEntryLoadInstr = nullptr; // An instruction referencing memory used by jump instruction (directly or // via register). This location could be an array of function pointers diff --git a/bolt/lib/Target/RISCV/RISCVMCPlusBuilder.cpp b/bolt/lib/Target/RISCV/RISCVMCPlusBuilder.cpp index eb3f38a..f8c83b0 100644 --- a/bolt/lib/Target/RISCV/RISCVMCPlusBuilder.cpp +++ b/bolt/lib/Target/RISCV/RISCVMCPlusBuilder.cpp @@ -176,13 +176,14 @@ public: MCInst &Instruction, InstructionIterator Begin, InstructionIterator End, const unsigned PtrSize, MCInst *&MemLocInstr, unsigned &BaseRegNum, unsigned &IndexRegNum, int64_t &DispValue, const MCExpr *&DispExpr, - MCInst *&PCRelBaseOut) const override { + MCInst *&PCRelBaseOut, MCInst *&FixedEntryLoadInst) const override { MemLocInstr = nullptr; BaseRegNum = 0; IndexRegNum = 0; DispValue = 0; DispExpr = nullptr; PCRelBaseOut = nullptr; + FixedEntryLoadInst = nullptr; // Check for the following long tail call sequence: // 1: auipc xi, %pcrel_hi(sym) diff --git a/bolt/lib/Target/X86/X86MCPlusBuilder.cpp b/bolt/lib/Target/X86/X86MCPlusBuilder.cpp index 37136f4..9f6c552 100644 --- a/bolt/lib/Target/X86/X86MCPlusBuilder.cpp +++ b/bolt/lib/Target/X86/X86MCPlusBuilder.cpp @@ -1866,8 +1866,11 @@ public: return true; } + /// Analyzes PIC-style jump table code template and return identified + /// IndirectBranchType, MemLocInstr (all cases) and FixedEntryLoadInstr + /// (POSSIBLE_PIC_FIXED_BRANCH case). template <typename Itr> - std::pair<IndirectBranchType, MCInst *> + std::tuple<IndirectBranchType, MCInst *, MCInst *> analyzePICJumpTable(Itr II, Itr IE, MCPhysReg R1, MCPhysReg R2) const { // Analyze PIC-style jump table code template: // @@ -1876,6 +1879,13 @@ public: // add %r2, %r1 // jmp *%r1 // + // or a fixed indirect jump template: + // + // movslq En(%rip), {%r2|%r1} <- FixedEntryLoadInstr + // lea PIC_JUMP_TABLE(%rip), {%r1|%r2} <- MemLocInstr + // add %r2, %r1 + // jmp *%r1 + // // (with any irrelevant instructions in-between) // // When we call this helper we've already determined %r1 and %r2, and @@ -1916,8 +1926,13 @@ public: MO.SegRegNum == X86::NoRegister; }; LLVM_DEBUG(dbgs() << "Checking for PIC jump table\n"); - MCInst *MemLocInstr = nullptr; - const MCInst *MovInstr = nullptr; + MCInst *FirstInstr = nullptr; + MCInst *SecondInstr = nullptr; + enum { + NOMATCH = 0, + MATCH_JUMP_TABLE, + MATCH_FIXED_BRANCH, + } MatchingState = NOMATCH; while (++II != IE) { MCInst &Instr = *II; const MCInstrDesc &InstrDesc = Info->get(Instr.getOpcode()); @@ -1926,68 +1941,76 @@ public: // Ignore instructions that don't affect R1, R2 registers. continue; } - if (!MovInstr) { - // Expect to see MOV instruction. - if (!isMOVSX64rm32(Instr)) { - LLVM_DEBUG(dbgs() << "MOV instruction expected.\n"); + const bool IsMOVSXInstr = isMOVSX64rm32(Instr); + const bool IsLEAInstr = isLEA64r(Instr); + if (MatchingState == NOMATCH) { + if (IsMOVSXInstr) + MatchingState = MATCH_JUMP_TABLE; + else if (IsLEAInstr) + MatchingState = MATCH_FIXED_BRANCH; + else break; - } - // Check if it's setting %r1 or %r2. In canonical form it sets %r2. - // If it sets %r1 - rename the registers so we have to only check - // a single form. - unsigned MovDestReg = Instr.getOperand(0).getReg(); - if (MovDestReg != R2) + // Check if the first instruction is setting %r1 or %r2. In canonical + // form lea sets %r1 and mov sets %r2. If it's the opposite - rename so + // we have to only check a single form. + unsigned DestReg = Instr.getOperand(0).getReg(); + MCPhysReg &ExpectReg = MatchingState == MATCH_JUMP_TABLE ? R2 : R1; + if (DestReg != ExpectReg) std::swap(R1, R2); - if (MovDestReg != R2) { - LLVM_DEBUG(dbgs() << "MOV instruction expected to set %r2\n"); + if (DestReg != ExpectReg) break; - } - // Verify operands for MOV. + // Verify operands std::optional<X86MemOperand> MO = evaluateX86MemoryOperand(Instr); if (!MO) break; - if (!isIndexed(*MO, R1)) - // POSSIBLE_PIC_JUMP_TABLE + if ((MatchingState == MATCH_JUMP_TABLE && isIndexed(*MO, R1)) || + (MatchingState == MATCH_FIXED_BRANCH && isRIPRel(*MO))) + FirstInstr = &Instr; + else break; - MovInstr = &Instr; } else { - if (!InstrDesc.hasDefOfPhysReg(Instr, R1, *RegInfo)) + unsigned ExpectReg = MatchingState == MATCH_JUMP_TABLE ? R1 : R2; + if (!InstrDesc.hasDefOfPhysReg(Instr, ExpectReg, *RegInfo)) continue; - if (!isLEA64r(Instr)) { - LLVM_DEBUG(dbgs() << "LEA instruction expected\n"); + if ((MatchingState == MATCH_JUMP_TABLE && !IsLEAInstr) || + (MatchingState == MATCH_FIXED_BRANCH && !IsMOVSXInstr)) break; - } - if (Instr.getOperand(0).getReg() != R1) { - LLVM_DEBUG(dbgs() << "LEA instruction expected to set %r1\n"); + if (Instr.getOperand(0).getReg() != ExpectReg) break; - } - // Verify operands for LEA. + // Verify operands. std::optional<X86MemOperand> MO = evaluateX86MemoryOperand(Instr); if (!MO) break; if (!isRIPRel(*MO)) break; - MemLocInstr = &Instr; + SecondInstr = &Instr; break; } } - if (!MemLocInstr) - return std::make_pair(IndirectBranchType::UNKNOWN, nullptr); + if (!SecondInstr) + return std::make_tuple(IndirectBranchType::UNKNOWN, nullptr, nullptr); + if (MatchingState == MATCH_FIXED_BRANCH) { + LLVM_DEBUG(dbgs() << "checking potential fixed indirect branch\n"); + return std::make_tuple(IndirectBranchType::POSSIBLE_PIC_FIXED_BRANCH, + FirstInstr, SecondInstr); + } LLVM_DEBUG(dbgs() << "checking potential PIC jump table\n"); - return std::make_pair(IndirectBranchType::POSSIBLE_PIC_JUMP_TABLE, - MemLocInstr); + return std::make_tuple(IndirectBranchType::POSSIBLE_PIC_JUMP_TABLE, + SecondInstr, nullptr); } - IndirectBranchType analyzeIndirectBranch( - MCInst &Instruction, InstructionIterator Begin, InstructionIterator End, - const unsigned PtrSize, MCInst *&MemLocInstrOut, unsigned &BaseRegNumOut, - unsigned &IndexRegNumOut, int64_t &DispValueOut, - const MCExpr *&DispExprOut, MCInst *&PCRelBaseOut) const override { + IndirectBranchType + analyzeIndirectBranch(MCInst &Instruction, InstructionIterator Begin, + InstructionIterator End, const unsigned PtrSize, + MCInst *&MemLocInstrOut, unsigned &BaseRegNumOut, + unsigned &IndexRegNumOut, int64_t &DispValueOut, + const MCExpr *&DispExprOut, MCInst *&PCRelBaseOut, + MCInst *&FixedEntryLoadInst) const override { // Try to find a (base) memory location from where the address for // the indirect branch is loaded. For X86-64 the memory will be specified // in the following format: @@ -2014,6 +2037,7 @@ public: IndexRegNumOut = X86::NoRegister; DispValueOut = 0; DispExprOut = nullptr; + FixedEntryLoadInst = nullptr; std::reverse_iterator<InstructionIterator> II(End); std::reverse_iterator<InstructionIterator> IE(Begin); @@ -2046,7 +2070,8 @@ public: unsigned R2 = PrevInstr.getOperand(2).getReg(); if (R1 == R2) return IndirectBranchType::UNKNOWN; - std::tie(Type, MemLocInstr) = analyzePICJumpTable(PrevII, IE, R1, R2); + std::tie(Type, MemLocInstr, FixedEntryLoadInst) = + analyzePICJumpTable(PrevII, IE, R1, R2); break; } return IndirectBranchType::UNKNOWN; @@ -2090,6 +2115,8 @@ public: if (MO->ScaleImm != 1 || MO->BaseRegNum != RIPRegister) return IndirectBranchType::UNKNOWN; break; + case IndirectBranchType::POSSIBLE_PIC_FIXED_BRANCH: + break; default: if (MO->ScaleImm != PtrSize) return IndirectBranchType::UNKNOWN; diff --git a/bolt/test/X86/Inputs/jump-table-fixed-ref-pic.s b/bolt/test/X86/Inputs/jump-table-fixed-ref-pic.s index 66629a4..6407964 100644 --- a/bolt/test/X86/Inputs/jump-table-fixed-ref-pic.s +++ b/bolt/test/X86/Inputs/jump-table-fixed-ref-pic.s @@ -6,7 +6,7 @@ main: jae .L4 cmpq $0x1, %rdi jne .L4 - mov .Ljt_pic+8(%rip), %rax + movslq .Ljt_pic+8(%rip), %rax lea .Ljt_pic(%rip), %rdx add %rdx, %rax jmpq *%rax diff --git a/bolt/test/X86/jump-table-fixed-ref-pic.test b/bolt/test/X86/jump-table-fixed-ref-pic.test index c8b6eda..d215c56 100644 --- a/bolt/test/X86/jump-table-fixed-ref-pic.test +++ b/bolt/test/X86/jump-table-fixed-ref-pic.test @@ -1,9 +1,13 @@ ## Verify that BOLT detects fixed destination of indirect jump for PIC ## case. -XFAIL: * - RUN: %clang %cflags -no-pie %S/Inputs/jump-table-fixed-ref-pic.s -Wl,-q -o %t -RUN: llvm-bolt %t --relocs -o %t.null 2>&1 | FileCheck %s +RUN: llvm-bolt %t --relocs -o %t.null -print-cfg 2>&1 | FileCheck %s + +CHECK: BOLT-INFO: fixed PIC indirect branch detected in main {{.*}} the destination value is 0x[[#TGT:]] +CHECK: Binary Function "main" after building cfg -CHECK: BOLT-INFO: fixed indirect branch detected in main +CHECK: movslq ".rodata/1"+8(%rip), %rax +CHECK-NEXT: leaq ".rodata/1"(%rip), %rdx +CHECK-NEXT: addq %rdx, %rax +CHECK-NEXT: jmpq *%rax # UNKNOWN CONTROL FLOW |