aboutsummaryrefslogtreecommitdiff
path: root/bolt
diff options
context:
space:
mode:
Diffstat (limited to 'bolt')
-rw-r--r--bolt/README.md2
-rw-r--r--bolt/docs/BAT.md1
-rw-r--r--bolt/docs/CommandLineArgumentReference.md14
-rw-r--r--bolt/docs/PointerAuthDesign.md (renamed from bolt/docs/PacRetDesign.md)48
-rw-r--r--bolt/include/bolt/Core/BinaryContext.h28
-rw-r--r--bolt/include/bolt/Core/BinaryFunction.h44
-rw-r--r--bolt/include/bolt/Core/DebugData.h12
-rw-r--r--bolt/include/bolt/Core/MCPlusBuilder.h78
-rw-r--r--bolt/include/bolt/Passes/AArch64RelaxationPass.h (renamed from bolt/include/bolt/Passes/ADRRelaxationPass.h)22
-rw-r--r--bolt/include/bolt/Passes/FixRelaxationPass.h2
-rw-r--r--bolt/include/bolt/Passes/IdenticalCodeFolding.h6
-rw-r--r--bolt/include/bolt/Passes/InsertNegateRAStatePass.h46
-rw-r--r--bolt/include/bolt/Passes/PointerAuthCFIAnalyzer.h (renamed from bolt/include/bolt/Passes/MarkRAStates.h)15
-rw-r--r--bolt/include/bolt/Passes/PointerAuthCFIFixup.h68
-rw-r--r--bolt/include/bolt/Profile/ProfileYAMLMapping.h26
-rw-r--r--bolt/include/bolt/Profile/YAMLProfileWriter.h35
-rw-r--r--bolt/include/bolt/Rewrite/MetadataRewriters.h6
-rw-r--r--bolt/include/bolt/Rewrite/RewriteInstance.h14
-rw-r--r--bolt/lib/Core/BinaryBasicBlock.cpp2
-rw-r--r--bolt/lib/Core/BinaryContext.cpp101
-rw-r--r--bolt/lib/Core/BinaryFunction.cpp169
-rw-r--r--bolt/lib/Core/BinarySection.cpp6
-rw-r--r--bolt/lib/Core/DebugNames.cpp2
-rw-r--r--bolt/lib/Core/DynoStats.cpp2
-rw-r--r--bolt/lib/Core/Exceptions.cpp8
-rw-r--r--bolt/lib/Core/MCPlusBuilder.cpp27
-rw-r--r--bolt/lib/Core/Relocation.cpp38
-rw-r--r--bolt/lib/Passes/AArch64RelaxationPass.cpp (renamed from bolt/lib/Passes/ADRRelaxationPass.cpp)47
-rw-r--r--bolt/lib/Passes/CMakeLists.txt6
-rw-r--r--bolt/lib/Passes/IdenticalCodeFolding.cpp11
-rw-r--r--bolt/lib/Passes/Inliner.cpp45
-rw-r--r--bolt/lib/Passes/InsertNegateRAStatePass.cpp142
-rw-r--r--bolt/lib/Passes/PAuthGadgetScanner.cpp2
-rw-r--r--bolt/lib/Passes/PointerAuthCFIAnalyzer.cpp (renamed from bolt/lib/Passes/MarkRAStates.cpp)78
-rw-r--r--bolt/lib/Passes/PointerAuthCFIFixup.cpp268
-rw-r--r--bolt/lib/Profile/DataAggregator.cpp39
-rw-r--r--bolt/lib/Profile/StaleProfileMatching.cpp24
-rw-r--r--bolt/lib/Profile/YAMLProfileWriter.cpp105
-rw-r--r--bolt/lib/Rewrite/BinaryPassManager.cpp31
-rw-r--r--bolt/lib/Rewrite/CMakeLists.txt1
-rw-r--r--bolt/lib/Rewrite/DWARFRewriter.cpp120
-rw-r--r--bolt/lib/Rewrite/RSeqRewriter.cpp72
-rw-r--r--bolt/lib/Rewrite/RewriteInstance.cpp280
-rw-r--r--bolt/lib/Target/AArch64/AArch64MCPlusBuilder.cpp157
-rw-r--r--bolt/lib/Target/X86/X86MCPlusBuilder.cpp6
-rw-r--r--bolt/lib/Utils/CommandLineOpts.cpp10
-rw-r--r--bolt/test/AArch64/constant-island-alignment.s1
-rw-r--r--bolt/test/AArch64/epilogue-determination.s48
-rw-r--r--bolt/test/AArch64/hook-fini.s14
-rw-r--r--bolt/test/AArch64/hook-init.s221
-rw-r--r--bolt/test/AArch64/inline-armv8.3-returns.s45
-rw-r--r--bolt/test/AArch64/inline-armv8.3-tailcall.s46
-rw-r--r--bolt/test/AArch64/inline-bti-dbg.s40
-rw-r--r--bolt/test/AArch64/inline-bti.s38
-rw-r--r--bolt/test/AArch64/inline-pauth-lr.s61
-rw-r--r--bolt/test/AArch64/instrument-no-fini.s34
-rw-r--r--bolt/test/AArch64/ldr-relaxation.s122
-rw-r--r--bolt/test/AArch64/pacret-cfi-disallow.s (renamed from bolt/test/AArch64/negate-ra-state-disallow.s)0
-rw-r--r--bolt/test/AArch64/pacret-cfi-incorrect.s (renamed from bolt/test/AArch64/negate-ra-state-incorrect.s)4
-rw-r--r--bolt/test/AArch64/pacret-cfi-reorder.s (renamed from bolt/test/AArch64/negate-ra-state-reorder.s)0
-rw-r--r--bolt/test/AArch64/pacret-cfi.s (renamed from bolt/test/AArch64/negate-ra-state.s)8
-rw-r--r--bolt/test/AArch64/pacret-split-funcs.s4
-rw-r--r--bolt/test/AArch64/relocation-type-print.s24
-rw-r--r--bolt/test/AArch64/safe-icf.s73
-rw-r--r--bolt/test/AArch64/validate-branch-target.s36
-rw-r--r--bolt/test/X86/Inputs/dwarf4-str-split-dwarf.s330
-rw-r--r--bolt/test/X86/Inputs/dwarf5-str-split-dwarf.s368
-rw-r--r--bolt/test/X86/callcont-fallthru.s9
-rw-r--r--bolt/test/X86/dwarf4-ftypes-dwp-input-dwp-output.test5
-rw-r--r--bolt/test/X86/dwarf4-str-dwp-input-dwo-output.test76
-rw-r--r--bolt/test/X86/dwarf5-str-dwp-input-dwo-output.test76
-rw-r--r--bolt/test/X86/hook-init.s221
-rw-r--r--bolt/test/X86/instrument-no-fini.s34
-rw-r--r--bolt/test/X86/internal-call-instrument-so.s9
-rw-r--r--bolt/test/X86/lit.local.cfg2
-rw-r--r--bolt/test/X86/match-blocks-with-pseudo-probes-inline.test6
-rw-r--r--bolt/test/X86/match-blocks-with-pseudo-probes.test2
-rw-r--r--bolt/test/X86/pseudoprobe-decoding-inline.test6
-rw-r--r--bolt/test/X86/pseudoprobe-decoding-noinline.test7
-rw-r--r--bolt/test/X86/rseq.s38
-rw-r--r--bolt/test/X86/unclaimed-jt-entries.s7
-rw-r--r--bolt/test/X86/unclaimed-pc-rel.s24
-rw-r--r--bolt/test/X86/validate-branch-target.s33
-rw-r--r--bolt/test/print-only.test25
-rw-r--r--bolt/test/runtime/AArch64/inline-memcpy.s34
-rw-r--r--bolt/test/runtime/AArch64/pacret-eh-function-split.cpp (renamed from bolt/test/runtime/AArch64/pacret-function-split.cpp)0
-rw-r--r--bolt/test/runtime/AArch64/pacret-eh.cpp (renamed from bolt/test/runtime/AArch64/negate-ra-state.cpp)0
-rw-r--r--bolt/test/runtime/AArch64/pacret-synchronous-unwind.cpp36
-rw-r--r--bolt/test/runtime/X86/instrument-wrong-target.s7
-rw-r--r--bolt/test/runtime/X86/unclaimed-jt-entries.s9
-rw-r--r--bolt/test/safe-icf-relative-vtable.cpp26
-rw-r--r--bolt/unittests/CMakeLists.txt1
-rw-r--r--bolt/unittests/Core/MCPlusBuilder.cpp55
-rw-r--r--bolt/unittests/Passes/CMakeLists.txt30
-rw-r--r--bolt/unittests/Passes/PointerAuthCFIFixup.cpp339
-rw-r--r--bolt/unittests/Profile/PerfSpeEvents.cpp88
96 files changed, 4277 insertions, 711 deletions
diff --git a/bolt/README.md b/bolt/README.md
index 902d1eb6..55f742c 100644
--- a/bolt/README.md
+++ b/bolt/README.md
@@ -173,7 +173,7 @@ Once you have `perf.fdata` ready, you can use it for optimizations with
BOLT. Assuming your environment is setup to include the right path, execute
`llvm-bolt`:
```
-$ llvm-bolt <executable> -o <executable>.bolt -data=perf.fdata -reorder-blocks=ext-tsp -reorder-functions=hfsort -split-functions -split-all-cold -split-eh -dyno-stats
+$ llvm-bolt <executable> -o <executable>.bolt -data=perf.fdata -reorder-blocks=ext-tsp -reorder-functions=cdsort -split-functions -split-all-cold -split-eh -dyno-stats
```
If you do need an updated debug info, then add `-update-debug-sections` option
diff --git a/bolt/docs/BAT.md b/bolt/docs/BAT.md
index 817ad28..fa43e81 100644
--- a/bolt/docs/BAT.md
+++ b/bolt/docs/BAT.md
@@ -61,6 +61,7 @@ Functions table:
### Functions table
Hot and cold functions tables share the encoding except differences marked below.
+
Header:
| Entry | Encoding | Description |
| ------ | ----- | ----------- |
diff --git a/bolt/docs/CommandLineArgumentReference.md b/bolt/docs/CommandLineArgumentReference.md
index 43cecee..0dbf6f5 100644
--- a/bolt/docs/CommandLineArgumentReference.md
+++ b/bolt/docs/CommandLineArgumentReference.md
@@ -381,11 +381,6 @@
Set verbosity level for diagnostic output
-- `--write-dwp`
-
- Output a single dwarf package file (dwp) instead of multiple non-relocatable
- dwarf object files (dwo).
-
### BOLT optimization options:
- `--align-blocks`
@@ -816,6 +811,15 @@
Specify file name of the runtime instrumentation library
+- `--runtime-lib-init-hook=<value>`
+
+ Primary target for hooking runtime library initialization, used in
+ fallback order of availability in input binary (entry_point -> init
+ -> init_array) (default: entry_point)
+ - `entry_point`: use ELF Header Entry Point
+ - `init`: use ELF DT_INIT entry
+ - `init_array`: use ELF .init_array entry
+
- `--sctc-mode=<value>`
Mode for simplify conditional tail calls
diff --git a/bolt/docs/PacRetDesign.md b/bolt/docs/PointerAuthDesign.md
index f3fe5fb..d101795 100644
--- a/bolt/docs/PacRetDesign.md
+++ b/bolt/docs/PointerAuthDesign.md
@@ -10,6 +10,10 @@ intended audience is BOLT developers. The document is an updated version of the
in assembly, or `OpNegateRAState` in BOLT sources. In this document, I will use
**negate-ra-state** as a shorthand.
+Note: there are two resolutions for CFI:
+- Call Frame Instruction: individual DWARF instruction, e.g. negate-ra-state
+- Control Flow Integrity: a security mechanism, e.g. pointer authentication.
+
## Introduction
### Pointer Authentication
@@ -104,9 +108,9 @@ negate-ra-state CFIs will become invalid during BasicBlock reordering.
## Solution design
The implementation introduces two new passes:
-1. `MarkRAStatesPass`: assigns the RA state to each instruction based on the CFIs
- in the input binary
-2. `InsertNegateRAStatePass`: reads those assigned instruction RA states after
+1. `PointerAuthCFIAnalyzer`: assigns the RA state to each instruction based on
+ the CFIs in the input binary
+2. `PointerAuthCFIFixup`: reads those assigned instruction RA states after
optimizations, and emits `DW_CFA_AARCH64_negate_ra_state` CFIs at the correct
places: wherever there is a state change between two consecutive instructions
in the layout order.
@@ -129,7 +133,7 @@ instruction.
This special case is handled by adding an `initialRAState` bool to each BinaryFunction.
If the `Offset` the CFI refers to is zero, we don't store an annotation, but set
the `initialRAState` in `FillCFIInfoFor`. This information is then used in
-`MarkRAStates`.
+`PointerAuthCFIAnalyzer`.
### Binaries without DWARF info
@@ -146,7 +150,7 @@ In summary:
- pointer auth is used, and we have DWARF CFIs: passes run, and rewrite the
negate-ra-state CFI.
-### MarkRAStates pass
+### PointerAuthCFIAnalyzer pass
This pass runs before optimizations reorder anything.
@@ -173,9 +177,9 @@ what we have before the pass, and after it.
| autiasp | negate-ra-state | signed |
| ret | | unsigned |
-##### Error handling in MarkRAState Pass:
+##### Error handling in PointerAuthCFIAnalyzer pass:
-Whenever the MarkRAStates pass finds inconsistencies in the current
+Whenever the PointerAuthCFIAnalyzer pass finds inconsistencies in the current
BinaryFunction, it marks the function as ignored using `BF.setIgnored()`. BOLT
will not optimize this function but will emit it unchanged in the original section
(`.bolt.org.text`).
@@ -188,27 +192,35 @@ The inconsistencies are as follows:
Users will be informed about the number of ignored functions in the pass, the
exact functions ignored, and the found inconsistency.
-### InsertNegateRAStatePass
+### PointerAuthCFIFixup
-This pass runs after optimizations. It performns the _inverse_ of MarkRAState pa s:
+This pass runs after optimizations. It performs the _inverse_ of PointerAuthCFIAnalyzer
+pass:
1. it reads the RA state annotations attached to the instructions, and
2. whenever the state changes, it adds a PseudoInstruction that holds an
OpNegateRAState CFI.
##### Covering newly generated instructions:
-Some BOLT passes can add new Instructions. In InsertNegateRAStatePass, we have
+Some BOLT passes can add new Instructions. In PointerAuthCFIFixup, we have
to know what RA state these have.
-The current solution has the `inferUnknownStates` function to cover these, using
-a fairly simple strategy: unknown states inherit the last known state.
+> [!important]
+> As issue #160989 explains, unwind info is missing from stubs.
+> For this same reason, we cannot generate correct pac-specific unwind info: the
+> signedness of the _incorrect_ return address is meaningless.
+
+Assignment of RAStates to newly generated instructions is done in `inferUnknownStates`.
+We have two different cases to cover:
-This will be updated to a more robust solution.
+1. If a BasicBlock has some instructions with known RA state, and some without, we
+ can copy the RAState of known instructions to the unknown ones. As the control
+ flow only changes between BasicBlocks, instructions in the same BasicBlock have
+ the same return address. (The exception is noreturn calls, but these would only
+ cause problems, if the newly inserted instruction is right after the call.)
-> [!important]
-> As issue #160989 describes, unwind info is incorrect in stubs with multiple callers.
-> For this same reason, we cannot generate correct pac-specific unwind info: the signess
-> of the _incorrect_ return address is meaningless.
+2. If a BasicBlock has no instructions with known RAState, we have to copy the
+ RAState of the previous BasicBlock in layout order.
### Optimizations requiring special attention
@@ -217,7 +229,7 @@ freely. The only special case is function splitting. When a function is split,
the split part becomes a new function in the emitted binary. For unwinding to
work, it needs to "replay" all CFIs that lead up to the split point. BOLT does
this for other CFIs. As negate-ra-state is not read (only stored as an Annotation),
-we have to do this manually in InsertNegateRAStatePass. Here, if the split part
+we have to do this manually in PointerAuthCFIFixup. Here, if the split part
starts with an instruction that has Signed RA state, we add a negate-ra-state CFI
to indicate this.
diff --git a/bolt/include/bolt/Core/BinaryContext.h b/bolt/include/bolt/Core/BinaryContext.h
index 085c026..aefb40b 100644
--- a/bolt/include/bolt/Core/BinaryContext.h
+++ b/bolt/include/bolt/Core/BinaryContext.h
@@ -354,9 +354,6 @@ public:
/// Newly created segments.
std::vector<SegmentInfo> NewSegments;
- /// Symbols that are expected to be undefined in MCContext during emission.
- std::unordered_set<MCSymbol *> UndefinedSymbols;
-
/// [name] -> [BinaryData*] map used for global symbol resolution.
using SymbolMapType = StringMap<BinaryData *>;
SymbolMapType GlobalSymbols;
@@ -500,7 +497,7 @@ public:
///
/// As we fold identical functions, multiple symbols can point
/// to the same BinaryFunction.
- std::unordered_map<const MCSymbol *, BinaryFunction *> SymbolToFunctionMap;
+ DenseMap<const MCSymbol *, BinaryFunction *> SymbolToFunctionMap;
/// A mutex that is used to control parallel accesses to SymbolToFunctionMap
mutable llvm::sys::RWMutex SymbolToFunctionMapMutex;
@@ -810,6 +807,15 @@ public:
/// the execution of the binary is completed.
std::optional<uint64_t> FiniFunctionAddress;
+ /// DT_INIT.
+ std::optional<uint64_t> InitAddress;
+
+ /// DT_INIT_ARRAY. Only used when DT_INIT is not set.
+ std::optional<uint64_t> InitArrayAddress;
+
+ /// DT_INIT_ARRAYSZ. Only used when DT_INIT is not set.
+ std::optional<uint64_t> InitArraySize;
+
/// DT_FINI.
std::optional<uint64_t> FiniAddress;
@@ -937,10 +943,11 @@ public:
/// that should be used by the branch. For example, main or secondary entry
/// point.
///
- /// If \p Address is an invalid destination, such as a constant island, return
- /// nullptr and mark \p BF as ignored, since we cannot properly handle a
- /// branch to a constant island.
- MCSymbol *handleExternalBranchTarget(uint64_t Address, BinaryFunction &BF);
+ /// This function also performs validations: If \p Address points to an
+ /// invalid instruction or lies within a constant island, return nullptr and
+ /// mark both \p Source and \p Target as ignored.
+ MCSymbol *handleExternalBranchTarget(uint64_t Address, BinaryFunction &Source,
+ BinaryFunction &Target);
/// Analyze memory contents at the given \p Address and return the type of
/// memory contents (such as a possible jump table).
@@ -1107,7 +1114,7 @@ public:
return FragmentClasses.isEquivalent(LHS, RHS);
}
- /// Add interprocedural reference for \p Function to \p Address
+ /// Add interprocedural branch reference from \p Function to \p Address.
void addInterproceduralReference(BinaryFunction *Function, uint64_t Address) {
InterproceduralReferences.push_back({Function, Address});
}
@@ -1122,7 +1129,8 @@ public:
/// argument is false.
bool handleAArch64Veneer(uint64_t Address, bool MatchOnly = false);
- /// Resolve inter-procedural dependencies from
+ /// Resolve inter-procedural branch dependencies discovered during
+ /// disassembly.
void processInterproceduralReferences();
/// Skip functions with all parent and child fragments transitively.
diff --git a/bolt/include/bolt/Core/BinaryFunction.h b/bolt/include/bolt/Core/BinaryFunction.h
index b215a15..5b147b2 100644
--- a/bolt/include/bolt/Core/BinaryFunction.h
+++ b/bolt/include/bolt/Core/BinaryFunction.h
@@ -281,6 +281,14 @@ private:
/// goto labels.
std::set<uint64_t> ExternallyReferencedOffsets;
+ /// Relocations from data sections targeting internals of this function, i.e.
+ /// some code not at an entry point. These include, but are not limited to,
+ /// jump table relocations and computed goto tables.
+ ///
+ /// Since relocations can be removed/deallocated, we store relocation offsets
+ /// instead of pointers.
+ DenseSet<uint64_t> InternalRefDataRelocations;
+
/// Offsets of indirect branches with unknown destinations.
std::set<uint64_t> UnknownIndirectBranchOffsets;
@@ -377,6 +385,10 @@ private:
/// True if the function should not have an associated symbol table entry.
bool IsAnonymous{false};
+ /// Indicates whether branch validation has already been performed,
+ /// to avoid redundant processing.
+ bool NeedBranchValidation{true};
+
/// Name for the section this function code should reside in.
std::string CodeSectionName;
@@ -640,6 +652,20 @@ private:
Islands->CodeOffsets.emplace(Offset);
}
+ /// Register a relocation from data section referencing code at a non-zero
+ /// offset in this function.
+ void registerInternalRefDataRelocation(uint64_t FuncOffset,
+ uint64_t RelOffset) {
+ assert(FuncOffset != 0 && "Relocation should reference function internals");
+ registerReferencedOffset(FuncOffset);
+ InternalRefDataRelocations.insert(RelOffset);
+ const MCSymbol *ReferencedSymbol =
+ getOrCreateLocalLabel(getAddress() + FuncOffset);
+
+ // Track the symbol mapping since it's used in relocation handling.
+ BC.setSymbolToFunctionMap(ReferencedSymbol, this);
+ }
+
/// Register an internal offset in a function referenced from outside.
void registerReferencedOffset(uint64_t Offset) {
ExternallyReferencedOffsets.emplace(Offset);
@@ -1299,6 +1325,12 @@ public:
void addRelocation(uint64_t Address, MCSymbol *Symbol, uint32_t RelType,
uint64_t Addend, uint64_t Value);
+ /// Return locations (offsets) of data section relocations targeting internals
+ /// of this functions.
+ const DenseSet<uint64_t> &getInternalRefDataRelocations() const {
+ return InternalRefDataRelocations;
+ }
+
/// Return the name of the section this function originated from.
std::optional<StringRef> getOriginSectionName() const {
if (!OriginSection)
@@ -2292,6 +2324,11 @@ public:
/// zero-value bytes.
bool isZeroPaddingAt(uint64_t Offset) const;
+ /// Validate if the target of any internal direct branch/call is a valid
+ /// executable instruction.
+ /// Return true if all the targets are valid, false otherwise.
+ bool validateInternalBranches();
+
/// Check that entry points have an associated instruction at their
/// offsets after disassembly.
void postProcessEntryPoints();
@@ -2328,10 +2365,9 @@ public:
bool postProcessIndirectBranches(MCPlusBuilder::AllocatorIdTy AllocId);
/// Validate that all data references to function offsets are claimed by
- /// recognized jump tables. Register externally referenced blocks as entry
- /// points. Returns true if there are no unclaimed externally referenced
- /// offsets.
- bool validateExternallyReferencedOffsets();
+ /// recognized jump tables. Returns true if there are no unclaimed externally
+ /// referenced offsets.
+ bool validateInternalRefDataRelocations();
/// Return all call site profile info for this function.
IndirectCallSiteProfile &getAllCallSites() { return AllCallSites; }
diff --git a/bolt/include/bolt/Core/DebugData.h b/bolt/include/bolt/Core/DebugData.h
index 7c8ea12ee..faf7bb6 100644
--- a/bolt/include/bolt/Core/DebugData.h
+++ b/bolt/include/bolt/Core/DebugData.h
@@ -471,6 +471,12 @@ public:
return std::move(StrOffsetsBuffer);
}
+ /// Returns strings of .debug_str_offsets.
+ StringRef getBufferStr() {
+ return StringRef(reinterpret_cast<const char *>(StrOffsetsBuffer->data()),
+ StrOffsetsBuffer->size());
+ }
+
/// Initializes Buffer and Stream.
void initialize(DWARFUnit &Unit);
@@ -507,6 +513,12 @@ public:
return std::move(StrBuffer);
}
+ /// Returns strings of .debug_str.
+ StringRef getBufferStr() {
+ return StringRef(reinterpret_cast<const char *>(StrBuffer->data()),
+ StrBuffer->size());
+ }
+
/// Adds string to .debug_str.
/// On first invocation it initializes internal data structures.
uint32_t addString(StringRef Str);
diff --git a/bolt/include/bolt/Core/MCPlusBuilder.h b/bolt/include/bolt/Core/MCPlusBuilder.h
index d666c10..a318ef0 100644
--- a/bolt/include/bolt/Core/MCPlusBuilder.h
+++ b/bolt/include/bolt/Core/MCPlusBuilder.h
@@ -632,6 +632,12 @@ public:
return false;
}
+ /// Generate the matching pointer authentication instruction from a fused
+ /// pauth-and-return instruction.
+ virtual void createMatchingAuth(const MCInst &AuthAndRet, MCInst &Auth) {
+ llvm_unreachable("not implemented");
+ }
+
/// Returns the register used as a return address. Returns std::nullopt if
/// not applicable, such as reading the return address from a system register
/// or from the stack.
@@ -784,6 +790,11 @@ public:
virtual bool isPop(const MCInst &Inst) const { return false; }
+ /// Determine if a basic block looks like an epilogue. For now it is only
+ /// called at the final stage of building CFG to check basic block ending
+ /// with an indirect call that has unknown control flow attribute.
+ virtual bool isEpilogue(const BinaryBasicBlock &BB) const { return false; }
+
/// Return true if the instruction is used to terminate an indirect branch.
virtual bool isTerminateBranch(const MCInst &Inst) const {
llvm_unreachable("not implemented");
@@ -840,6 +851,16 @@ public:
return false;
}
+ virtual bool isLDRWl(const MCInst &Inst) const {
+ llvm_unreachable("not implemented");
+ return false;
+ }
+
+ virtual bool isLDRXl(const MCInst &Inst) const {
+ llvm_unreachable("not implemented");
+ return false;
+ }
+
virtual bool isMOVW(const MCInst &Inst) const {
llvm_unreachable("not implemented");
return false;
@@ -1361,20 +1382,13 @@ public:
/// Return true if \p Inst has RestoreState annotation.
bool hasRestoreState(const MCInst &Inst) const;
- /// Stores RA Signed annotation on \p Inst.
- void setRASigned(MCInst &Inst) const;
-
- /// Return true if \p Inst has Signed RA annotation.
- bool isRASigned(const MCInst &Inst) const;
-
- /// Stores RA Unsigned annotation on \p Inst.
- void setRAUnsigned(MCInst &Inst) const;
+ /// Sets kRASigned or kRAUnsigned annotation on \p Inst.
+ /// Fails if \p Inst has either annotation already set.
+ void setRAState(MCInst &Inst, bool State) const;
- /// Return true if \p Inst has Unsigned RA annotation.
- bool isRAUnsigned(const MCInst &Inst) const;
-
- /// Return true if \p Inst doesn't have any annotation related to RA state.
- bool isRAStateUnknown(const MCInst &Inst) const;
+ /// Return true if \p Inst has kRASigned annotation, false if it has
+ /// kRAUnsigned annotation, and std::nullopt if neither annotation is set.
+ std::optional<bool> getRAState(const MCInst &Inst) const;
/// Return true if the instruction is a call with an exception handling info.
virtual bool isInvoke(const MCInst &Inst) const {
@@ -1789,6 +1803,19 @@ public:
llvm_unreachable("not implemented");
}
+ /// Take \p LDRInst and return ADRP+LDR instruction sequence - for
+ ///
+ /// ldr x0, [label]
+ ///
+ /// the following sequence will be generated:
+ ///
+ /// adrp x0, PageBase(label)
+ /// ldr x0, [x0, PageOffset(label)]
+ virtual InstructionListType createAdrpLdr(const MCInst &LDRInst,
+ MCContext *Ctx) const {
+ llvm_unreachable("not implemented");
+ }
+
/// Return not 0 if the instruction CurInst, in combination with the recent
/// history of disassembled instructions supplied by [Begin, End), is a linker
/// generated veneer/stub that needs patching. This happens in AArch64 when
@@ -1842,6 +1869,31 @@ public:
llvm_unreachable("not implemented");
}
+ /// Check if an Instruction is a BTI landing pad with the required properties.
+ /// Takes both explicit and implicit BTIs into account.
+ virtual bool isBTILandingPad(MCInst &Inst, bool CallTarget,
+ bool JumpTarget) const {
+ llvm_unreachable("not implemented");
+ return false;
+ }
+
+ /// Check if an Instruction is an implicit BTI c landing pad.
+ virtual bool isImplicitBTIC(MCInst &Inst) const {
+ llvm_unreachable("not implemented");
+ return false;
+ }
+
+ /// Create a BTI landing pad instruction.
+ virtual void createBTI(MCInst &Inst, bool CallTarget, bool JumpTarget) const {
+ llvm_unreachable("not implemented");
+ }
+
+ /// Update operand of BTI instruction.
+ virtual void updateBTIVariant(MCInst &Inst, bool CallTarget,
+ bool JumpTarget) const {
+ llvm_unreachable("not implemented");
+ }
+
/// Store \p Target absolute address to \p RegName
virtual InstructionListType materializeAddress(const MCSymbol *Target,
MCContext *Ctx,
diff --git a/bolt/include/bolt/Passes/ADRRelaxationPass.h b/bolt/include/bolt/Passes/AArch64RelaxationPass.h
index b9f92de..b9185a1 100644
--- a/bolt/include/bolt/Passes/ADRRelaxationPass.h
+++ b/bolt/include/bolt/Passes/AArch64RelaxationPass.h
@@ -1,4 +1,4 @@
-//===- bolt/Passes/ADRRelaxationPass.h --------------------------*- C++ -*-===//
+//===- bolt/Passes/AArch64RelaxationPass.h ----------------------*- C++ -*-===//
//
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
// See https://llvm.org/LICENSE.txt for license information.
@@ -6,29 +6,29 @@
//
//===----------------------------------------------------------------------===//
//
-// This file declares the ADRRelaxationPass class, which replaces AArch64
-// non-local ADR instructions with ADRP + ADD due to small offset range of ADR
-// instruction (+- 1MB) which could be easily overflowed after BOLT
-// optimizations. Such problems are usually connected with errata 843419
-// https://developer.arm.com/documentation/epm048406/2100/
+// This file declares the AArch64RelaxationPass class, which replaces AArch64
+// non-local ADR/LDR instructions with ADRP + ADD/LDR due to small offset
+// range of ADR and LDR instruction (+- 1MB) which could be easily overflowed
+// after BOLT optimizations. Such problems are usually connected with errata
+// 843419: https://developer.arm.com/documentation/epm048406/2100/
// The linker could replace ADRP instruction with ADR in some cases.
//
//===----------------------------------------------------------------------===//
-#ifndef BOLT_PASSES_ADRRELAXATIONPASS_H
-#define BOLT_PASSES_ADRRELAXATIONPASS_H
+#ifndef BOLT_PASSES_AARCH64RELAXATIONPASS_H
+#define BOLT_PASSES_AARCH64RELAXATIONPASS_H
#include "bolt/Passes/BinaryPasses.h"
namespace llvm {
namespace bolt {
-class ADRRelaxationPass : public BinaryFunctionPass {
+class AArch64RelaxationPass : public BinaryFunctionPass {
public:
- explicit ADRRelaxationPass(const cl::opt<bool> &PrintPass)
+ explicit AArch64RelaxationPass(const cl::opt<bool> &PrintPass)
: BinaryFunctionPass(PrintPass) {}
- const char *getName() const override { return "adr-relaxation"; }
+ const char *getName() const override { return "aarch64-relaxation"; }
/// Pass entry point
Error runOnFunctions(BinaryContext &BC) override;
diff --git a/bolt/include/bolt/Passes/FixRelaxationPass.h b/bolt/include/bolt/Passes/FixRelaxationPass.h
index 50b6448..cf5a8a1 100644
--- a/bolt/include/bolt/Passes/FixRelaxationPass.h
+++ b/bolt/include/bolt/Passes/FixRelaxationPass.h
@@ -1,4 +1,4 @@
-//===- bolt/Passes/ADRRelaxationPass.h --------------------------*- C++ -*-===//
+//===- bolt/Passes/FixRelaxationPass.h --------------------------*- C++ -*-===//
//
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
// See https://llvm.org/LICENSE.txt for license information.
diff --git a/bolt/include/bolt/Passes/IdenticalCodeFolding.h b/bolt/include/bolt/Passes/IdenticalCodeFolding.h
index f59e75c..1664c69 100644
--- a/bolt/include/bolt/Passes/IdenticalCodeFolding.h
+++ b/bolt/include/bolt/Passes/IdenticalCodeFolding.h
@@ -37,17 +37,19 @@ public:
Error runOnFunctions(BinaryContext &BC) override;
private:
+ static constexpr uint64_t VTableAddressGranularity = 4;
+
/// Bit vector of memory addresses of vtables.
llvm::SparseBitVector<> VTableBitVector;
/// Return true if the memory address is in a vtable.
bool isAddressInVTable(uint64_t Address) const {
- return VTableBitVector.test(Address / 8);
+ return VTableBitVector.test(Address / VTableAddressGranularity);
}
/// Mark memory address of a vtable as used.
void setAddressUsedInVTable(uint64_t Address) {
- VTableBitVector.set(Address / 8);
+ VTableBitVector.set(Address / VTableAddressGranularity);
}
/// Scan symbol table and mark memory addresses of
diff --git a/bolt/include/bolt/Passes/InsertNegateRAStatePass.h b/bolt/include/bolt/Passes/InsertNegateRAStatePass.h
deleted file mode 100644
index 836948b..0000000
--- a/bolt/include/bolt/Passes/InsertNegateRAStatePass.h
+++ /dev/null
@@ -1,46 +0,0 @@
-//===- bolt/Passes/InsertNegateRAStatePass.cpp ----------------------------===//
-//
-// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
-// See https://llvm.org/LICENSE.txt for license information.
-// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
-//
-//===----------------------------------------------------------------------===//
-//
-// This file implements the InsertNegateRAStatePass class.
-//
-//===----------------------------------------------------------------------===//
-#ifndef BOLT_PASSES_INSERT_NEGATE_RA_STATE_PASS
-#define BOLT_PASSES_INSERT_NEGATE_RA_STATE_PASS
-
-#include "bolt/Passes/BinaryPasses.h"
-
-namespace llvm {
-namespace bolt {
-
-class InsertNegateRAState : public BinaryFunctionPass {
-public:
- explicit InsertNegateRAState() : BinaryFunctionPass(false) {}
-
- const char *getName() const override { return "insert-negate-ra-state-pass"; }
-
- /// Pass entry point
- Error runOnFunctions(BinaryContext &BC) override;
- void runOnFunction(BinaryFunction &BF);
-
-private:
- /// Because states are tracked as MCAnnotations on individual instructions,
- /// newly inserted instructions do not have a state associated with them.
- /// New states are "inherited" from the last known state.
- void inferUnknownStates(BinaryFunction &BF);
-
- /// Support for function splitting:
- /// if two consecutive BBs with Signed state are going to end up in different
- /// functions (so are held by different FunctionFragments), we have to add a
- /// OpNegateRAState to the beginning of the newly split function, so it starts
- /// with a Signed state.
- void coverFunctionFragmentStart(BinaryFunction &BF, FunctionFragment &FF);
-};
-
-} // namespace bolt
-} // namespace llvm
-#endif
diff --git a/bolt/include/bolt/Passes/MarkRAStates.h b/bolt/include/bolt/Passes/PointerAuthCFIAnalyzer.h
index 202f1dd..cc0b27f 100644
--- a/bolt/include/bolt/Passes/MarkRAStates.h
+++ b/bolt/include/bolt/Passes/PointerAuthCFIAnalyzer.h
@@ -1,4 +1,4 @@
-//===- bolt/Passes/MarkRAStates.cpp ---------------------------------===//
+//===- bolt/Passes/PointerAuthCFIAnalyzer.h -------------------------------===//
//
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
// See https://llvm.org/LICENSE.txt for license information.
@@ -6,11 +6,11 @@
//
//===----------------------------------------------------------------------===//
//
-// This file implements the MarkRAStates class.
+// This file implements the PointerAuthCFIAnalyzer class.
//
//===----------------------------------------------------------------------===//
-#ifndef BOLT_PASSES_MARK_RA_STATES
-#define BOLT_PASSES_MARK_RA_STATES
+#ifndef BOLT_PASSES_POINTER_AUTH_CFI_ANALYZER
+#define BOLT_PASSES_POINTER_AUTH_CFI_ANALYZER
#include "bolt/Passes/BinaryPasses.h"
#include <mutex>
@@ -18,15 +18,16 @@
namespace llvm {
namespace bolt {
-class MarkRAStates : public BinaryFunctionPass {
+class PointerAuthCFIAnalyzer : public BinaryFunctionPass {
// setIgnored() is not thread-safe, but the pass is running on functions in
// parallel.
std::mutex IgnoreMutex;
public:
- explicit MarkRAStates() : BinaryFunctionPass(false) {}
+ explicit PointerAuthCFIAnalyzer(const cl::opt<bool> &PrintPass)
+ : BinaryFunctionPass(PrintPass) {}
- const char *getName() const override { return "mark-ra-states"; }
+ const char *getName() const override { return "pointer-auth-cfi-analyzer"; }
/// Pass entry point
Error runOnFunctions(BinaryContext &BC) override;
diff --git a/bolt/include/bolt/Passes/PointerAuthCFIFixup.h b/bolt/include/bolt/Passes/PointerAuthCFIFixup.h
new file mode 100644
index 0000000..61b3e3c
--- /dev/null
+++ b/bolt/include/bolt/Passes/PointerAuthCFIFixup.h
@@ -0,0 +1,68 @@
+//===- bolt/Passes/PointerAuthCFIFixup.h ----------------------------------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+//
+// This file implements the PointerAuthCFIFixup class.
+//
+//===----------------------------------------------------------------------===//
+#ifndef BOLT_PASSES_POINTER_AUTH_CFI_FIXUP
+#define BOLT_PASSES_POINTER_AUTH_CFI_FIXUP
+
+#include "bolt/Passes/BinaryPasses.h"
+
+namespace llvm {
+namespace bolt {
+
+class PointerAuthCFIFixup : public BinaryFunctionPass {
+public:
+ explicit PointerAuthCFIFixup(const cl::opt<bool> &PrintPass)
+ : BinaryFunctionPass(PrintPass) {}
+
+ const char *getName() const override { return "pointer-auth-cfi-fixup"; }
+
+ /// Pass entry point
+ Error runOnFunctions(BinaryContext &BC) override;
+ void runOnFunction(BinaryFunction &BF);
+
+private:
+ /// Because states are tracked as MCAnnotations on individual instructions,
+ /// newly inserted instructions do not have a state associated with them.
+ /// Uses fillUnknownStateInBB and fillUnknownStubs.
+ void inferUnknownStates(BinaryFunction &BF);
+
+ /// Simple case: copy RAStates to unknown insts from previous inst.
+ /// If the first inst has unknown state, copy set it to the first known state.
+ /// Accounts for signing and authenticating insts.
+ void fillUnknownStateInBB(BinaryContext &BC, BinaryBasicBlock &BB);
+
+ /// Fill in RAState in BasicBlocks consisting entirely of new instructions.
+ /// As of #160989, we have to copy the RAState from the previous BB in the
+ /// layout, because CFIs are already incorrect here.
+ void fillUnknownStubs(BinaryFunction &BF);
+
+ /// Returns the first known RAState from \p BB, or std::nullopt if all are
+ /// unknown.
+ std::optional<bool> getFirstKnownRAState(BinaryContext &BC,
+ BinaryBasicBlock &BB);
+
+ /// \p Return true if all instructions have unknown RAState.
+ bool isUnknownBlock(BinaryContext &BC, BinaryBasicBlock &BB);
+
+ /// Set all instructions in \p BB to \p State.
+ void markUnknownBlock(BinaryContext &BC, BinaryBasicBlock &BB, bool State);
+
+ /// Support for function splitting:
+ /// if two consecutive BBs with Signed state are going to end up in different
+ /// functions (so are held by different FunctionFragments), we have to add a
+ /// OpNegateRAState to the beginning of the newly split function, so it starts
+ /// with a Signed state.
+ void coverFunctionFragmentStart(BinaryFunction &BF, FunctionFragment &FF);
+};
+
+} // namespace bolt
+} // namespace llvm
+#endif
diff --git a/bolt/include/bolt/Profile/ProfileYAMLMapping.h b/bolt/include/bolt/Profile/ProfileYAMLMapping.h
index 41e2bd1..b393c85 100644
--- a/bolt/include/bolt/Profile/ProfileYAMLMapping.h
+++ b/bolt/include/bolt/Profile/ProfileYAMLMapping.h
@@ -29,6 +29,10 @@ struct CallSiteInfo {
uint32_t EntryDiscriminator{0}; /// multiple entry discriminator
uint64_t Count{0};
uint64_t Mispreds{0};
+ // Pseudo probe information, optional
+ uint32_t Probe{0};
+ bool Indirect = false;
+ uint32_t InlineTreeNode{0};
bool operator==(const CallSiteInfo &Other) const {
return Offset == Other.Offset && DestId == Other.DestId &&
@@ -63,6 +67,9 @@ template <> struct MappingTraits<bolt::CallSiteInfo> {
YamlIO.mapOptional("disc", CSI.EntryDiscriminator, (uint32_t)0);
YamlIO.mapRequired("cnt", CSI.Count);
YamlIO.mapOptional("mis", CSI.Mispreds, (uint64_t)0);
+ YamlIO.mapOptional("pp", CSI.Probe, 0);
+ YamlIO.mapOptional("ppn", CSI.InlineTreeNode, 0);
+ YamlIO.mapOptional("ind", CSI.Indirect, false);
}
static const bool flow = true;
@@ -95,29 +102,20 @@ template <> struct MappingTraits<bolt::SuccessorInfo> {
namespace bolt {
struct PseudoProbeInfo {
- uint32_t InlineTreeIndex = 0;
- uint64_t BlockMask = 0; // bitset with probe indices from 1 to 64
- std::vector<uint64_t> BlockProbes; // block probes with indices above 64
- std::vector<uint64_t> CallProbes;
- std::vector<uint64_t> IndCallProbes;
+ std::vector<uint64_t> BlockProbes;
std::vector<uint32_t> InlineTreeNodes;
bool operator==(const PseudoProbeInfo &Other) const {
- return InlineTreeIndex == Other.InlineTreeIndex &&
- BlockProbes == Other.BlockProbes && CallProbes == Other.CallProbes &&
- IndCallProbes == Other.IndCallProbes;
+ return InlineTreeNodes == Other.InlineTreeNodes &&
+ BlockProbes == Other.BlockProbes;
}
};
} // end namespace bolt
template <> struct MappingTraits<bolt::PseudoProbeInfo> {
static void mapping(IO &YamlIO, bolt::PseudoProbeInfo &PI) {
- YamlIO.mapOptional("blx", PI.BlockMask, 0);
- YamlIO.mapOptional("blk", PI.BlockProbes, std::vector<uint64_t>());
- YamlIO.mapOptional("call", PI.CallProbes, std::vector<uint64_t>());
- YamlIO.mapOptional("icall", PI.IndCallProbes, std::vector<uint64_t>());
- YamlIO.mapOptional("id", PI.InlineTreeIndex, 0);
- YamlIO.mapOptional("ids", PI.InlineTreeNodes, std::vector<uint32_t>());
+ YamlIO.mapOptional("blk", PI.BlockProbes, std::vector<uint64_t>(1, 1));
+ YamlIO.mapOptional("ids", PI.InlineTreeNodes, std::vector<uint32_t>(1, 0));
}
static const bool flow = true;
diff --git a/bolt/include/bolt/Profile/YAMLProfileWriter.h b/bolt/include/bolt/Profile/YAMLProfileWriter.h
index d4d7217..50ee78d 100644
--- a/bolt/include/bolt/Profile/YAMLProfileWriter.h
+++ b/bolt/include/bolt/Profile/YAMLProfileWriter.h
@@ -74,25 +74,24 @@ private:
collectInlineTree(const MCPseudoProbeDecoder &Decoder,
const MCDecodedPseudoProbeInlineTree &Root);
- // 0 - block probe, 1 - indirect call, 2 - direct call
- using ProbeList = std::array<SmallVector<uint64_t, 0>, 3>;
- using NodeIdToProbes = DenseMap<uint32_t, ProbeList>;
- static std::vector<yaml::bolt::PseudoProbeInfo>
- convertNodeProbes(NodeIdToProbes &NodeProbes);
-
public:
- template <typename T>
- static std::vector<yaml::bolt::PseudoProbeInfo>
- writeBlockProbes(T Probes, const InlineTreeMapTy &InlineTreeNodeId) {
- NodeIdToProbes NodeProbes;
- for (const MCDecodedPseudoProbe &Probe : Probes) {
- auto It = InlineTreeNodeId.find(Probe.getInlineTreeNode());
- if (It == InlineTreeNodeId.end())
- continue;
- NodeProbes[It->second][Probe.getType()].emplace_back(Probe.getIndex());
- }
- return convertNodeProbes(NodeProbes);
- }
+ class BlockProbeCtx {
+ struct Call {
+ uint64_t Id;
+ uint32_t Node;
+ bool Indirect;
+ bool Used;
+ };
+ // Group block probes by node id.
+ DenseMap<uint32_t, std::vector<uint64_t>> NodeToProbes;
+ // Offset -> call probe
+ DenseMap<uint32_t, Call> CallProbes;
+
+ public:
+ void addBlockProbe(const InlineTreeMapTy &Map,
+ const MCDecodedPseudoProbe &Probe, uint32_t ProbeOffset);
+ void finalize(yaml::bolt::BinaryBasicBlockProfile &YamlBB);
+ };
};
} // namespace bolt
} // namespace llvm
diff --git a/bolt/include/bolt/Rewrite/MetadataRewriters.h b/bolt/include/bolt/Rewrite/MetadataRewriters.h
index 2c09c879..6b74b0e 100644
--- a/bolt/include/bolt/Rewrite/MetadataRewriters.h
+++ b/bolt/include/bolt/Rewrite/MetadataRewriters.h
@@ -19,12 +19,14 @@ class BinaryContext;
// The list of rewriter build functions.
-std::unique_ptr<MetadataRewriter> createLinuxKernelRewriter(BinaryContext &);
-
std::unique_ptr<MetadataRewriter> createBuildIDRewriter(BinaryContext &);
+std::unique_ptr<MetadataRewriter> createLinuxKernelRewriter(BinaryContext &);
+
std::unique_ptr<MetadataRewriter> createPseudoProbeRewriter(BinaryContext &);
+std::unique_ptr<MetadataRewriter> createRSeqRewriter(BinaryContext &);
+
std::unique_ptr<MetadataRewriter> createSDTRewriter(BinaryContext &);
std::unique_ptr<MetadataRewriter> createGNUPropertyRewriter(BinaryContext &);
diff --git a/bolt/include/bolt/Rewrite/RewriteInstance.h b/bolt/include/bolt/Rewrite/RewriteInstance.h
index 0fe2e32..5950b3c 100644
--- a/bolt/include/bolt/Rewrite/RewriteInstance.h
+++ b/bolt/include/bolt/Rewrite/RewriteInstance.h
@@ -93,14 +93,23 @@ private:
/// section allocations if found.
void discoverBOLTReserved();
+ /// Check whether we should use DT_INIT or DT_INIT_ARRAY for instrumentation.
+ /// DT_INIT is preferred; DT_INIT_ARRAY is only used when no DT_INIT entry was
+ /// found.
+ Error discoverRtInitAddress();
+
/// Check whether we should use DT_FINI or DT_FINI_ARRAY for instrumentation.
/// DT_FINI is preferred; DT_FINI_ARRAY is only used when no DT_FINI entry was
/// found.
Error discoverRtFiniAddress();
+ /// If DT_INIT_ARRAY is used for instrumentation, update the relocation of its
+ /// first entry to point to the instrumentation library's init address.
+ Error updateRtInitReloc();
+
/// If DT_FINI_ARRAY is used for instrumentation, update the relocation of its
/// first entry to point to the instrumentation library's fini address.
- void updateRtFiniReloc();
+ Error updateRtFiniReloc();
/// Create and initialize metadata rewriters for this instance.
void initializeMetadataManager();
@@ -139,6 +148,9 @@ private:
void handleRelocation(const object::SectionRef &RelocatedSection,
const RelocationRef &Rel);
+ /// Collect functions that are specified to be bumped.
+ void selectFunctionsToPrint();
+
/// Mark functions that are not meant for processing as ignored.
void selectFunctionsToProcess();
diff --git a/bolt/lib/Core/BinaryBasicBlock.cpp b/bolt/lib/Core/BinaryBasicBlock.cpp
index d680850..a6d0ca9 100644
--- a/bolt/lib/Core/BinaryBasicBlock.cpp
+++ b/bolt/lib/Core/BinaryBasicBlock.cpp
@@ -22,8 +22,6 @@
namespace llvm {
namespace bolt {
-constexpr uint32_t BinaryBasicBlock::INVALID_OFFSET;
-
bool operator<(const BinaryBasicBlock &LHS, const BinaryBasicBlock &RHS) {
return LHS.Index < RHS.Index;
}
diff --git a/bolt/lib/Core/BinaryContext.cpp b/bolt/lib/Core/BinaryContext.cpp
index a383ced..51bc867 100644
--- a/bolt/lib/Core/BinaryContext.cpp
+++ b/bolt/lib/Core/BinaryContext.cpp
@@ -78,6 +78,11 @@ cl::opt<std::string> CompDirOverride(
"to *.dwo files."),
cl::Hidden, cl::init(""), cl::cat(BoltCategory));
+static cl::opt<bool> CloneConstantIsland("clone-constant-island",
+ cl::desc("clone constant islands"),
+ cl::Hidden, cl::init(true),
+ cl::ZeroOrMore, cl::cat(BoltCategory));
+
static cl::opt<bool>
FailOnInvalidPadding("fail-on-invalid-padding", cl::Hidden, cl::init(false),
cl::desc("treat invalid code padding as error"),
@@ -461,7 +466,8 @@ BinaryContext::handleAddressRef(uint64_t Address, BinaryFunction &BF,
// of dynamic relocs, as we currently do not support cloning them.
// Notice: we might fail to link because of this, if the original constant
// island we are referring would be emitted too far away.
- if (IslandIter->second->hasDynamicRelocationAtIsland()) {
+ if (IslandIter->second->hasDynamicRelocationAtIsland() ||
+ !opts::CloneConstantIsland) {
MCSymbol *IslandSym =
IslandIter->second->getOrCreateIslandAccess(Address);
if (IslandSym)
@@ -469,6 +475,12 @@ BinaryContext::handleAddressRef(uint64_t Address, BinaryFunction &BF,
} else if (MCSymbol *IslandSym =
IslandIter->second->getOrCreateProxyIslandAccess(Address,
BF)) {
+ LLVM_DEBUG(
+ dbgs() << "BOLT-DEBUG: clone constant island at address 0x"
+ << Twine::utohexstr(IslandIter->first) << " with size of 0x"
+ << Twine::utohexstr(
+ IslandIter->second->estimateConstantIslandSize())
+ << " bytes, referenced by " << BF << "\n");
BF.createIslandDependency(IslandSym, IslandIter->second);
return std::make_pair(IslandSym, 0);
}
@@ -519,20 +531,40 @@ BinaryContext::handleAddressRef(uint64_t Address, BinaryFunction &BF,
}
MCSymbol *BinaryContext::handleExternalBranchTarget(uint64_t Address,
- BinaryFunction &BF) {
- if (BF.isInConstantIsland(Address)) {
- BF.setIgnored();
- this->outs() << "BOLT-WARNING: ignoring entry point at address 0x"
- << Twine::utohexstr(Address)
- << " in constant island of function " << BF << '\n';
- return nullptr;
+ BinaryFunction &Source,
+ BinaryFunction &Target) {
+ const uint64_t Offset = Address - Target.getAddress();
+ assert(Offset < Target.getSize() &&
+ "Address should be inside the referenced function");
+
+ bool IsValid = true;
+ if (Source.NeedBranchValidation) {
+ if (Target.CurrentState == BinaryFunction::State::Disassembled &&
+ !Target.getInstructionAtOffset(Offset)) {
+ this->errs()
+ << "BOLT-WARNING: corrupted control flow detected in function "
+ << Source
+ << ": an external branch/call targets an invalid instruction "
+ << "in function " << Target << " at address 0x"
+ << Twine::utohexstr(Address) << "; ignoring both functions\n";
+ IsValid = false;
+ }
+ if (Target.isInConstantIsland(Address)) {
+ this->errs() << "BOLT-WARNING: ignoring entry point at address 0x"
+ << Twine::utohexstr(Address)
+ << " in constant island of function " << Target << '\n';
+ IsValid = false;
+ }
}
- const uint64_t Offset = Address - BF.getAddress();
- assert(Offset < BF.getSize() &&
- "Address should be inside the referenced function");
+ if (!IsValid) {
+ Source.NeedBranchValidation = false;
+ Source.setIgnored();
+ Target.setIgnored();
+ return nullptr;
+ }
- return Offset ? BF.addEntryPointAtOffset(Offset) : BF.getSymbol();
+ return Offset ? Target.addEntryPointAtOffset(Offset) : Target.getSymbol();
}
MemoryContentsType BinaryContext::analyzeMemoryAt(uint64_t Address,
@@ -778,13 +810,17 @@ void BinaryContext::populateJumpTables() {
}
if (opts::StrictMode && DataPCRelocations.size()) {
- LLVM_DEBUG({
- dbgs() << DataPCRelocations.size()
- << " unclaimed PC-relative relocations left in data:\n";
- for (uint64_t Reloc : DataPCRelocations)
- dbgs() << Twine::utohexstr(Reloc) << '\n';
- });
- assert(0 && "unclaimed PC-relative relocations left in data\n");
+ this->errs() << "BOLT-ERROR: " << DataPCRelocations.size()
+ << " unclaimed PC-relative relocation(s) left in data";
+ if (opts::Verbosity) {
+ this->errs() << ":\n";
+ for (uint64_t RelocOffset : DataPCRelocations)
+ this->errs() << " @0x" << Twine::utohexstr(RelocOffset) << '\n';
+ } else {
+ this->errs() << ". Re-run with -v=1 to see the list\n";
+ }
+ this->errs() << "BOLT-ERROR: unable to proceed with --strict\n";
+ exit(1);
}
clearList(DataPCRelocations);
}
@@ -994,14 +1030,12 @@ bool BinaryContext::hasValidCodePadding(const BinaryFunction &BF) {
return Offset - StartOffset;
};
- // Skip a sequence of zero bytes. For AArch64 we only skip 4 bytes of zeros
- // in case the following zeros belong to constant island or veneer.
+ // Skip a sequence of zero bytes. For AArch64 we only skip 4's exact
+ // multiple number of zeros in case the following zeros belong to veneer.
auto skipZeros = [&]() {
const uint64_t StartOffset = Offset;
uint64_t CurrentOffset = Offset;
- for (; CurrentOffset < BF.getMaxSize() &&
- (!isAArch64() || CurrentOffset < StartOffset + 4);
- ++CurrentOffset)
+ for (; CurrentOffset < BF.getMaxSize(); ++CurrentOffset)
if ((*FunctionData)[CurrentOffset] != 0)
break;
@@ -1419,13 +1453,11 @@ void BinaryContext::processInterproceduralReferences() {
// Create an extra entry point if needed. Can also render the target
// function ignored if the reference is invalid.
- handleExternalBranchTarget(Address, *TargetFunction);
+ handleExternalBranchTarget(Address, Function, *TargetFunction);
continue;
}
- // Check if address falls in function padding space - this could be
- // unmarked data in code. In this case adjust the padding space size.
ErrorOr<BinarySection &> Section = getSectionForAddress(Address);
assert(Section && "cannot get section for referenced address");
@@ -1437,7 +1469,7 @@ void BinaryContext::processInterproceduralReferences() {
if (SectionName == ".plt" || SectionName == ".plt.got")
continue;
- // Check if it is aarch64 veneer written at Address
+ // Check if it is aarch64 veneer written at Address.
if (isAArch64() && handleAArch64Veneer(Address))
continue;
@@ -1449,6 +1481,8 @@ void BinaryContext::processInterproceduralReferences() {
exit(1);
}
+ // Check if the address falls into the function padding space - this could
+ // be an unmarked data in code. In this case, adjust the padding space size.
TargetFunction = getBinaryFunctionContainingAddress(Address,
/*CheckPastEnd=*/false,
/*UseMaxSize=*/true);
@@ -1506,6 +1540,17 @@ void BinaryContext::foldFunction(BinaryFunction &ChildBF,
}
ChildBF.getSymbols().clear();
+ // Reset function mapping for local symbols.
+ for (uint64_t RelOffset : ChildBF.getInternalRefDataRelocations()) {
+ const Relocation *Rel = getRelocationAt(RelOffset);
+ if (!Rel || !Rel->Symbol)
+ continue;
+
+ WriteSymbolMapLock.lock();
+ SymbolToFunctionMap[Rel->Symbol] = nullptr;
+ WriteSymbolMapLock.unlock();
+ }
+
// Move other names the child function is known under.
llvm::move(ChildBF.Aliases, std::back_inserter(ParentBF.Aliases));
ChildBF.Aliases.clear();
diff --git a/bolt/lib/Core/BinaryFunction.cpp b/bolt/lib/Core/BinaryFunction.cpp
index ddaad6e..4ccef98 100644
--- a/bolt/lib/Core/BinaryFunction.cpp
+++ b/bolt/lib/Core/BinaryFunction.cpp
@@ -61,6 +61,8 @@ extern cl::OptionCategory BoltOptCategory;
extern cl::opt<bool> EnableBAT;
extern cl::opt<bool> Instrument;
+extern cl::list<std::string> PrintOnly;
+extern cl::opt<std::string> PrintOnlyFile;
extern cl::opt<bool> StrictMode;
extern cl::opt<bool> UpdateDebugSections;
extern cl::opt<unsigned> Verbosity;
@@ -133,14 +135,6 @@ PrintDynoStatsOnly("print-dyno-stats-only",
cl::Hidden,
cl::cat(BoltCategory));
-static cl::list<std::string>
-PrintOnly("print-only",
- cl::CommaSeparated,
- cl::desc("list of functions to print"),
- cl::value_desc("func1,func2,func3,..."),
- cl::Hidden,
- cl::cat(BoltCategory));
-
cl::opt<bool>
TimeBuild("time-build",
cl::desc("print time spent constructing binary functions"),
@@ -1044,8 +1038,10 @@ MCSymbol *BinaryFunction::getOrCreateLocalLabel(uint64_t Address) {
// For AArch64, check if this address is part of a constant island.
if (BC.isAArch64()) {
- if (MCSymbol *IslandSym = getOrCreateIslandAccess(Address))
+ if (MCSymbol *IslandSym = getOrCreateIslandAccess(Address)) {
+ Labels[Offset] = IslandSym;
return IslandSym;
+ }
}
if (Offset == getSize())
@@ -1414,9 +1410,7 @@ Error BinaryFunction::disassemble() {
// A recursive call. Calls to internal blocks are handled by
// ValidateInternalCalls pass.
TargetSymbol = getSymbol();
- }
-
- if (!TargetSymbol) {
+ } else {
// Create either local label or external symbol.
if (containsAddress(TargetAddress)) {
TargetSymbol = getOrCreateLocalLabel(TargetAddress);
@@ -1700,7 +1694,7 @@ bool BinaryFunction::scanExternalRefs() {
// Get a reference symbol for the function when address is a valid code
// reference.
BranchTargetSymbol =
- BC.handleExternalBranchTarget(TargetAddress, *TargetFunction);
+ BC.handleExternalBranchTarget(TargetAddress, *this, *TargetFunction);
if (!BranchTargetSymbol)
continue;
}
@@ -1896,16 +1890,6 @@ bool BinaryFunction::scanExternalRefs() {
}
}
- // Inform BinaryContext that this function symbols will not be defined and
- // relocations should not be created against them.
- if (BC.HasRelocations) {
- for (std::pair<const uint32_t, MCSymbol *> &LI : Labels)
- BC.UndefinedSymbols.insert(LI.second);
- for (MCSymbol *const EndLabel : FunctionEndLabels)
- if (EndLabel)
- BC.UndefinedSymbols.insert(EndLabel);
- }
-
clearList(Relocations);
clearList(ExternallyReferencedOffsets);
@@ -1918,6 +1902,36 @@ bool BinaryFunction::scanExternalRefs() {
return Success;
}
+bool BinaryFunction::validateInternalBranches() {
+ if (!isSimple() || TrapsOnEntry)
+ return true;
+
+ for (const auto &KV : Labels) {
+ MCSymbol *Label = KV.second;
+ if (getSecondaryEntryPointSymbol(Label))
+ continue;
+
+ const uint32_t Offset = KV.first;
+ // Skip empty functions and out-of-bounds offsets,
+ // as they may not be disassembled.
+ if (!Offset || (Offset > getSize()))
+ continue;
+
+ if (!getInstructionAtOffset(Offset) ||
+ isInConstantIsland(getAddress() + Offset)) {
+ BC.errs() << "BOLT-WARNING: corrupted control flow detected in function "
+ << *this << ": an internal branch/call targets an invalid "
+ << "instruction at address 0x"
+ << Twine::utohexstr(getAddress() + Offset)
+ << "; ignoring this function\n";
+ setIgnored();
+ return false;
+ }
+ }
+
+ return true;
+}
+
void BinaryFunction::postProcessEntryPoints() {
if (!isSimple())
return;
@@ -2059,41 +2073,47 @@ void BinaryFunction::postProcessJumpTables() {
}
}
-bool BinaryFunction::validateExternallyReferencedOffsets() {
- SmallPtrSet<MCSymbol *, 4> JTTargets;
- for (const JumpTable *JT : llvm::make_second_range(JumpTables))
- JTTargets.insert_range(JT->Entries);
+bool BinaryFunction::validateInternalRefDataRelocations() {
+ if (InternalRefDataRelocations.empty())
+ return true;
- bool HasUnclaimedReference = false;
- for (uint64_t Destination : ExternallyReferencedOffsets) {
- // Ignore __builtin_unreachable().
- if (Destination == getSize())
- continue;
- // Ignore constant islands
- if (isInConstantIsland(Destination + getAddress()))
- continue;
+ // Rely on the user hint that all data refs are valid and only used as
+ // destinations by indirect branch in the same function.
+ if (opts::StrictMode)
+ return true;
- if (BinaryBasicBlock *BB = getBasicBlockAtOffset(Destination)) {
- // Check if the externally referenced offset is a recognized jump table
- // target.
- if (JTTargets.contains(BB->getLabel()))
- continue;
+ DenseSet<uint64_t> UnclaimedRelocations(InternalRefDataRelocations);
+ for (const JumpTable *JT : llvm::make_second_range(JumpTables)) {
+ uint64_t EntryAddress = JT->getAddress();
+ while (EntryAddress < JT->getAddress() + JT->getSize()) {
+ UnclaimedRelocations.erase(EntryAddress);
+ EntryAddress += JT->EntrySize;
+ }
+ }
- if (opts::Verbosity >= 1) {
- BC.errs() << "BOLT-WARNING: unclaimed data to code reference (possibly "
- << "an unrecognized jump table entry) to " << BB->getName()
- << " in " << *this << "\n";
- }
- auto L = BC.scopeLock();
- addEntryPoint(*BB);
- } else {
- BC.errs() << "BOLT-WARNING: unknown data to code reference to offset "
- << Twine::utohexstr(Destination) << " in " << *this << "\n";
- setIgnored();
+ if (UnclaimedRelocations.empty())
+ return true;
+
+ BC.errs() << "BOLT-WARNING: " << UnclaimedRelocations.size()
+ << " unclaimed data relocation"
+ << (UnclaimedRelocations.size() > 1 ? "s" : "")
+ << " remain against function " << *this;
+ if (opts::Verbosity) {
+ BC.errs() << ":\n";
+ for (uint64_t RelocationAddress : UnclaimedRelocations) {
+ const Relocation *Relocation = BC.getRelocationAt(RelocationAddress);
+ BC.errs() << " ";
+ if (Relocation)
+ BC.errs() << *Relocation;
+ else
+ BC.errs() << "<missing relocation>";
+ BC.errs() << '\n';
}
- HasUnclaimedReference = true;
+ } else {
+ BC.errs() << ". Re-run with -v=1 to see the list\n";
}
- return !HasUnclaimedReference;
+
+ return false;
}
bool BinaryFunction::postProcessIndirectBranches(
@@ -2177,13 +2197,10 @@ bool BinaryFunction::postProcessIndirectBranches(
continue;
}
- // If this block contains an epilogue code and has an indirect branch,
- // then most likely it's a tail call. Otherwise, we cannot tell for sure
- // what it is and conservatively reject the function's CFG.
- bool IsEpilogue = llvm::any_of(BB, [&](const MCInst &Instr) {
- return BC.MIB->isLeave(Instr) || BC.MIB->isPop(Instr);
- });
- if (IsEpilogue) {
+ // If this block contains epilogue code and has an indirect branch,
+ // then most likely it's a tail call. Otherwise, we cannot tell for
+ // sure what it is and conservatively reject the function's CFG.
+ if (BC.MIB->isEpilogue(BB)) {
BC.MIB->convertJmpToTailCall(Instr);
BB.removeAllSuccessors();
continue;
@@ -2221,14 +2238,6 @@ bool BinaryFunction::postProcessIndirectBranches(
LastIndirectJumpBB->updateJumpTableSuccessors();
}
- // Validate that all data references to function offsets are claimed by
- // recognized jump tables. Register externally referenced blocks as entry
- // points.
- if (!opts::StrictMode && hasInternalReference()) {
- if (!validateExternallyReferencedOffsets())
- return false;
- }
-
if (HasUnknownControlFlow && !BC.HasRelocations)
return false;
@@ -2517,12 +2526,18 @@ Error BinaryFunction::buildCFG(MCPlusBuilder::AllocatorIdTy AllocatorId) {
CurrentState = State::CFG;
// Make any necessary adjustments for indirect branches.
- if (!postProcessIndirectBranches(AllocatorId)) {
- if (opts::Verbosity) {
- BC.errs() << "BOLT-WARNING: failed to post-process indirect branches for "
- << *this << '\n';
- }
+ bool ValidCFG = postProcessIndirectBranches(AllocatorId);
+ if (!ValidCFG && opts::Verbosity) {
+ BC.errs() << "BOLT-WARNING: failed to post-process indirect branches for "
+ << *this << '\n';
+ }
+
+ // Validate that all data references to function offsets are claimed by
+ // recognized jump tables.
+ if (ValidCFG)
+ ValidCFG = validateInternalRefDataRelocations();
+ if (!ValidCFG) {
if (BC.isAArch64())
PreserveNops = BC.HasRelocations;
@@ -3234,14 +3249,6 @@ void BinaryFunction::clearDisasmState() {
clearList(Instructions);
clearList(IgnoredBranches);
clearList(TakenBranches);
-
- if (BC.HasRelocations) {
- for (std::pair<const uint32_t, MCSymbol *> &LI : Labels)
- BC.UndefinedSymbols.insert(LI.second);
- for (MCSymbol *const EndLabel : FunctionEndLabels)
- if (EndLabel)
- BC.UndefinedSymbols.insert(EndLabel);
- }
}
void BinaryFunction::setTrapOnEntry() {
diff --git a/bolt/lib/Core/BinarySection.cpp b/bolt/lib/Core/BinarySection.cpp
index 6f07017..e803d17 100644
--- a/bolt/lib/Core/BinarySection.cpp
+++ b/bolt/lib/Core/BinarySection.cpp
@@ -112,8 +112,10 @@ void BinarySection::emitAsData(MCStreamer &Streamer,
RI = ROE;
// Skip undefined symbols.
- auto HasUndefSym = [this](const auto &Relocation) {
- return BC.UndefinedSymbols.count(Relocation.Symbol);
+ auto HasUndefSym = [](const auto &Relocation) {
+ return Relocation.Symbol && Relocation.Symbol->isTemporary() &&
+ Relocation.Symbol->isUndefined() &&
+ !Relocation.Symbol->isRegistered();
};
if (std::any_of(ROI, ROE, HasUndefSym))
diff --git a/bolt/lib/Core/DebugNames.cpp b/bolt/lib/Core/DebugNames.cpp
index 6be2c5a..5272d40 100644
--- a/bolt/lib/Core/DebugNames.cpp
+++ b/bolt/lib/Core/DebugNames.cpp
@@ -555,7 +555,7 @@ void DWARF5AcceleratorTable::populateAbbrevsMap() {
void DWARF5AcceleratorTable::writeEntry(BOLTDWARF5AccelTableData &Entry) {
const uint64_t EntryID = getEntryID(Entry);
- if (EntryRelativeOffsets.find(EntryID) != EntryRelativeOffsets.end())
+ if (EntryRelativeOffsets.contains(EntryID))
EntryRelativeOffsets[EntryID] = EntriesBuffer->size();
const std::optional<DWARF5AccelTable::UnitIndexAndEncoding> EntryRet =
diff --git a/bolt/lib/Core/DynoStats.cpp b/bolt/lib/Core/DynoStats.cpp
index 1d98187..64a6d12 100644
--- a/bolt/lib/Core/DynoStats.cpp
+++ b/bolt/lib/Core/DynoStats.cpp
@@ -51,8 +51,6 @@ PrintDynoOpcodeStat("print-dyno-opcode-stats",
namespace llvm {
namespace bolt {
-constexpr const char *DynoStats::Desc[];
-
bool DynoStats::operator<(const DynoStats &Other) const {
return std::lexicographical_compare(
&Stats[FIRST_DYNO_STAT], &Stats[LAST_DYNO_STAT],
diff --git a/bolt/lib/Core/Exceptions.cpp b/bolt/lib/Core/Exceptions.cpp
index 27656c7..9c33a7c 100644
--- a/bolt/lib/Core/Exceptions.cpp
+++ b/bolt/lib/Core/Exceptions.cpp
@@ -572,7 +572,7 @@ bool CFIReaderWriter::fillCFIInfoFor(BinaryFunction &Function) const {
if (Function.getBinaryContext().isAArch64()) {
// Support for pointer authentication:
// We need to annotate instructions that modify the RA State, to work
- // out the state of each instruction in MarkRAStates Pass.
+ // out the state of each instruction in PointerAuthCFIAnalyzer Pass.
if (Offset != 0)
Function.setInstModifiesRAState(DW_CFA_remember_state, Offset);
}
@@ -583,7 +583,7 @@ bool CFIReaderWriter::fillCFIInfoFor(BinaryFunction &Function) const {
if (Function.getBinaryContext().isAArch64()) {
// Support for pointer authentication:
// We need to annotate instructions that modify the RA State, to work
- // out the state of each instruction in MarkRAStates Pass.
+ // out the state of each instruction in PointerAuthCFIAnalyzer Pass.
if (Offset != 0)
Function.setInstModifiesRAState(DW_CFA_restore_state, Offset);
}
@@ -652,7 +652,7 @@ bool CFIReaderWriter::fillCFIInfoFor(BinaryFunction &Function) const {
// BasicBlocks, which changes during optimizations. Instead of adding
// OpNegateRAState CFIs, an annotation is added to the instruction, to
// mark that the instruction modifies the RA State. The actual state for
- // instructions are worked out in MarkRAStates based on these
+ // instructions are worked out in PointerAuthCFIAnalyzer based on these
// annotations.
if (Offset != 0)
Function.setInstModifiesRAState(DW_CFA_AARCH64_negate_ra_state,
@@ -660,7 +660,7 @@ bool CFIReaderWriter::fillCFIInfoFor(BinaryFunction &Function) const {
else
// We cannot Annotate an instruction at Offset == 0.
// Instead, we save the initial (Signed) state, and push it to
- // MarkRAStates' RAStateStack.
+ // PointerAuthCFIAnalyzer's RAStateStack.
Function.setInitialRAState(true);
break;
}
diff --git a/bolt/lib/Core/MCPlusBuilder.cpp b/bolt/lib/Core/MCPlusBuilder.cpp
index e96de80..0cb4ba1 100644
--- a/bolt/lib/Core/MCPlusBuilder.cpp
+++ b/bolt/lib/Core/MCPlusBuilder.cpp
@@ -186,26 +186,21 @@ bool MCPlusBuilder::hasRestoreState(const MCInst &Inst) const {
return hasAnnotation(Inst, MCAnnotation::kRestoreState);
}
-void MCPlusBuilder::setRASigned(MCInst &Inst) const {
+void MCPlusBuilder::setRAState(MCInst &Inst, bool State) const {
assert(!hasAnnotation(Inst, MCAnnotation::kRASigned));
- setAnnotationOpValue(Inst, MCAnnotation::kRASigned, true);
-}
-
-bool MCPlusBuilder::isRASigned(const MCInst &Inst) const {
- return hasAnnotation(Inst, MCAnnotation::kRASigned);
-}
-
-void MCPlusBuilder::setRAUnsigned(MCInst &Inst) const {
assert(!hasAnnotation(Inst, MCAnnotation::kRAUnsigned));
- setAnnotationOpValue(Inst, MCAnnotation::kRAUnsigned, true);
+ if (State)
+ setAnnotationOpValue(Inst, MCAnnotation::kRASigned, true);
+ else
+ setAnnotationOpValue(Inst, MCAnnotation::kRAUnsigned, true);
}
-bool MCPlusBuilder::isRAUnsigned(const MCInst &Inst) const {
- return hasAnnotation(Inst, MCAnnotation::kRAUnsigned);
-}
-
-bool MCPlusBuilder::isRAStateUnknown(const MCInst &Inst) const {
- return !(isRAUnsigned(Inst) || isRASigned(Inst));
+std::optional<bool> MCPlusBuilder::getRAState(const MCInst &Inst) const {
+ if (hasAnnotation(Inst, MCAnnotation::kRASigned))
+ return true;
+ if (hasAnnotation(Inst, MCAnnotation::kRAUnsigned))
+ return false;
+ return std::nullopt;
}
std::optional<MCLandingPad> MCPlusBuilder::getEHInfo(const MCInst &Inst) const {
diff --git a/bolt/lib/Core/Relocation.cpp b/bolt/lib/Core/Relocation.cpp
index 4b827b6..f872db2 100644
--- a/bolt/lib/Core/Relocation.cpp
+++ b/bolt/lib/Core/Relocation.cpp
@@ -1018,41 +1018,15 @@ void Relocation::print(raw_ostream &OS) const {
default:
OS << "RType:" << Twine::utohexstr(Type);
break;
-
- case Triple::aarch64: {
- static const char *const AArch64RelocNames[] = {
-#define ELF_RELOC(name, value) #name,
-#include "llvm/BinaryFormat/ELFRelocs/AArch64.def"
-#undef ELF_RELOC
- };
- assert(Type < ArrayRef(AArch64RelocNames).size());
- OS << AArch64RelocNames[Type];
- } break;
-
+ case Triple::aarch64:
+ OS << object::getELFRelocationTypeName(ELF::EM_AARCH64, Type);
+ break;
case Triple::riscv64:
- // RISC-V relocations are not sequentially numbered so we cannot use an
- // array
- switch (Type) {
- default:
- llvm_unreachable("illegal RISC-V relocation");
-#define ELF_RELOC(name, value) \
- case value: \
- OS << #name; \
+ OS << object::getELFRelocationTypeName(ELF::EM_RISCV, Type);
break;
-#include "llvm/BinaryFormat/ELFRelocs/RISCV.def"
-#undef ELF_RELOC
- }
+ case Triple::x86_64:
+ OS << object::getELFRelocationTypeName(ELF::EM_X86_64, Type);
break;
-
- case Triple::x86_64: {
- static const char *const X86RelocNames[] = {
-#define ELF_RELOC(name, value) #name,
-#include "llvm/BinaryFormat/ELFRelocs/x86_64.def"
-#undef ELF_RELOC
- };
- assert(Type < ArrayRef(X86RelocNames).size());
- OS << X86RelocNames[Type];
- } break;
}
OS << ", 0x" << Twine::utohexstr(Offset);
if (Symbol) {
diff --git a/bolt/lib/Passes/ADRRelaxationPass.cpp b/bolt/lib/Passes/AArch64RelaxationPass.cpp
index c3954c9..610adad 100644
--- a/bolt/lib/Passes/ADRRelaxationPass.cpp
+++ b/bolt/lib/Passes/AArch64RelaxationPass.cpp
@@ -1,4 +1,4 @@
-//===- bolt/Passes/ADRRelaxationPass.cpp ----------------------------------===//
+//===- bolt/Passes/AArch64RelaxationPass.cpp ------------------------------===//
//
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
// See https://llvm.org/LICENSE.txt for license information.
@@ -6,11 +6,11 @@
//
//===----------------------------------------------------------------------===//
//
-// This file implements the ADRRelaxationPass class.
+// This file implements the AArch64RelaxationPass class.
//
//===----------------------------------------------------------------------===//
-#include "bolt/Passes/ADRRelaxationPass.h"
+#include "bolt/Passes/AArch64RelaxationPass.h"
#include "bolt/Core/ParallelUtilities.h"
#include "bolt/Utils/CommandLineOpts.h"
#include <iterator>
@@ -20,10 +20,10 @@ using namespace llvm;
namespace opts {
extern cl::OptionCategory BoltCategory;
-static cl::opt<bool>
- AdrPassOpt("adr-relaxation",
- cl::desc("Replace ARM non-local ADR instructions with ADRP"),
- cl::init(true), cl::cat(BoltCategory), cl::ReallyHidden);
+static cl::opt<bool> AArch64PassOpt(
+ "aarch64-relaxation",
+ cl::desc("Replace ARM non-local ADR/LDR instructions with ADRP"),
+ cl::init(true), cl::cat(BoltCategory), cl::ReallyHidden);
} // namespace opts
namespace llvm {
@@ -35,7 +35,7 @@ namespace bolt {
// jobs and checking the exit flag after it.
static bool PassFailed = false;
-void ADRRelaxationPass::runOnFunction(BinaryFunction &BF) {
+void AArch64RelaxationPass::runOnFunction(BinaryFunction &BF) {
if (PassFailed)
return;
@@ -43,10 +43,13 @@ void ADRRelaxationPass::runOnFunction(BinaryFunction &BF) {
for (BinaryBasicBlock &BB : BF) {
for (auto It = BB.begin(); It != BB.end(); ++It) {
MCInst &Inst = *It;
- if (!BC.MIB->isADR(Inst))
+ bool IsADR = BC.MIB->isADR(Inst);
+
+ // TODO: Handle other types of LDR (literal, PC-relative) instructions.
+ if (!IsADR && !BC.MIB->isLDRXl(Inst) && !BC.MIB->isLDRWl(Inst))
continue;
- const MCSymbol *Symbol = BC.MIB->getTargetSymbol(Inst);
+ const MCSymbol *Symbol = BC.MIB->getTargetSymbol(Inst, IsADR ? 0 : 1);
if (!Symbol)
continue;
@@ -56,25 +59,27 @@ void ADRRelaxationPass::runOnFunction(BinaryFunction &BF) {
continue;
}
- // Don't relax ADR if it points to the same function and is in the main
- // fragment and BF initial size is < 1MB.
+ // Don't relax ADR/LDR if it points to the same function and is in the
+ // main fragment and BF initial size is < 1MB.
const unsigned OneMB = 0x100000;
if (BF.getSize() < OneMB) {
BinaryFunction *TargetBF = BC.getFunctionForSymbol(Symbol);
if (TargetBF == &BF && !BB.isSplit())
continue;
- // No relaxation needed if ADR references a basic block in the same
+ // No relaxation needed if ADR/LDR references a basic block in the same
// fragment.
if (BinaryBasicBlock *TargetBB = BF.getBasicBlockForLabel(Symbol))
if (BB.getFragmentNum() == TargetBB->getFragmentNum())
continue;
}
- InstructionListType AdrpAdd;
+ InstructionListType AdrpMaterialization;
{
auto L = BC.scopeLock();
- AdrpAdd = BC.MIB->undoAdrpAddRelaxation(Inst, BC.Ctx.get());
+ AdrpMaterialization =
+ IsADR ? BC.MIB->undoAdrpAddRelaxation(Inst, BC.Ctx.get())
+ : BC.MIB->createAdrpLdr(Inst, BC.Ctx.get());
}
if (It != BB.begin() && BC.MIB->isNoop(*std::prev(It))) {
@@ -88,18 +93,18 @@ void ADRRelaxationPass::runOnFunction(BinaryFunction &BF) {
// invalidate this offset, so we have to rely on linker-inserted NOP to
// replace it with ADRP, and abort if it is not present.
auto L = BC.scopeLock();
- BC.errs() << "BOLT-ERROR: cannot relax ADR in non-simple function "
- << BF << '\n';
+ BC.errs() << "BOLT-ERROR: cannot relax " << (IsADR ? "ADR" : "LDR")
+ << " in non-simple function " << BF << '\n';
PassFailed = true;
return;
}
- It = BB.replaceInstruction(It, AdrpAdd);
+ It = BB.replaceInstruction(It, AdrpMaterialization);
}
}
}
-Error ADRRelaxationPass::runOnFunctions(BinaryContext &BC) {
- if (!opts::AdrPassOpt || !BC.HasRelocations)
+Error AArch64RelaxationPass::runOnFunctions(BinaryContext &BC) {
+ if (!opts::AArch64PassOpt || !BC.HasRelocations)
return Error::success();
ParallelUtilities::WorkFuncTy WorkFun = [&](BinaryFunction &BF) {
@@ -108,7 +113,7 @@ Error ADRRelaxationPass::runOnFunctions(BinaryContext &BC) {
ParallelUtilities::runOnEachFunction(
BC, ParallelUtilities::SchedulingPolicy::SP_TRIVIAL, WorkFun, nullptr,
- "ADRRelaxationPass");
+ "AArch64RelaxationPass");
if (PassFailed)
return createFatalBOLTError("");
diff --git a/bolt/lib/Passes/CMakeLists.txt b/bolt/lib/Passes/CMakeLists.txt
index d751951..ec012f0 100644
--- a/bolt/lib/Passes/CMakeLists.txt
+++ b/bolt/lib/Passes/CMakeLists.txt
@@ -1,5 +1,5 @@
add_llvm_library(LLVMBOLTPasses
- ADRRelaxationPass.cpp
+ AArch64RelaxationPass.cpp
Aligner.cpp
AllocCombiner.cpp
AsmDump.cpp
@@ -17,18 +17,18 @@ add_llvm_library(LLVMBOLTPasses
IdenticalCodeFolding.cpp
IndirectCallPromotion.cpp
Inliner.cpp
- InsertNegateRAStatePass.cpp
Instrumentation.cpp
JTFootprintReduction.cpp
LongJmp.cpp
LoopInversionPass.cpp
LivenessAnalysis.cpp
MCF.cpp
- MarkRAStates.cpp
PatchEntries.cpp
PAuthGadgetScanner.cpp
PettisAndHansen.cpp
PLTCall.cpp
+ PointerAuthCFIAnalyzer.cpp
+ PointerAuthCFIFixup.cpp
ProfileQualityStats.cpp
RegAnalysis.cpp
RegReAssign.cpp
diff --git a/bolt/lib/Passes/IdenticalCodeFolding.cpp b/bolt/lib/Passes/IdenticalCodeFolding.cpp
index 8923562..c5c33b7 100644
--- a/bolt/lib/Passes/IdenticalCodeFolding.cpp
+++ b/bolt/lib/Passes/IdenticalCodeFolding.cpp
@@ -377,9 +377,11 @@ namespace bolt {
void IdenticalCodeFolding::initVTableReferences(const BinaryContext &BC) {
for (const auto &[Address, Data] : BC.getBinaryData()) {
// Filter out all symbols that are not vtables.
- if (!Data->getName().starts_with("_ZTV"))
+ if (!Data->getName().starts_with("_ZTV") && // vtable
+ !Data->getName().starts_with("_ZTCN")) // construction vtable
continue;
- for (uint64_t I = Address, End = I + Data->getSize(); I < End; I += 8)
+ for (uint64_t I = Address, End = I + Data->getSize(); I < End;
+ I += VTableAddressGranularity)
setAddressUsedInVTable(I);
}
}
@@ -437,8 +439,9 @@ void IdenticalCodeFolding::markFunctionsUnsafeToFold(BinaryContext &BC) {
NamedRegionTimer MarkFunctionsUnsafeToFoldTimer(
"markFunctionsUnsafeToFold", "markFunctionsUnsafeToFold", "ICF breakdown",
"ICF breakdown", opts::TimeICF);
- if (!BC.isX86())
- BC.outs() << "BOLT-WARNING: safe ICF is only supported for x86\n";
+ if (!BC.isX86() && !BC.isAArch64())
+ BC.outs()
+ << "BOLT-WARNING: safe ICF is only supported for x86 and AArch64\n";
analyzeDataRelocations(BC);
analyzeFunctions(BC);
}
diff --git a/bolt/lib/Passes/Inliner.cpp b/bolt/lib/Passes/Inliner.cpp
index 9b28c7e..0740fce 100644
--- a/bolt/lib/Passes/Inliner.cpp
+++ b/bolt/lib/Passes/Inliner.cpp
@@ -195,6 +195,13 @@ InliningInfo getInliningInfo(const BinaryFunction &BF) {
if (BC.MIB->isPush(Inst) || BC.MIB->isPop(Inst))
continue;
+ // Pointer signing and authenticatin instructions are used around
+ // Push and Pop. These are also straightforward to handle.
+ if (BC.isAArch64() &&
+ (BC.MIB->isPSignOnLR(Inst) || BC.MIB->isPAuthOnLR(Inst) ||
+ BC.MIB->isPAuthAndRet(Inst)))
+ continue;
+
DirectSP |= BC.MIB->hasDefOfPhysReg(Inst, SPReg) ||
BC.MIB->hasUseOfPhysReg(Inst, SPReg);
}
@@ -338,6 +345,18 @@ Inliner::inlineCall(BinaryBasicBlock &CallerBB,
BC.Ctx.get());
}
+ // Handling fused authentication and return instructions (Armv8.3-A):
+ // if the Callee does not end in a tailcall, the return will be removed
+ // from the inlined block. If that return is RETA(A|B), we have to keep
+ // the authentication part.
+ // RETAA -> AUTIASP
+ // RETAB -> AUTIBSP
+ if (!CSIsTailCall && BC.isAArch64() && BC.MIB->isPAuthAndRet(Inst)) {
+ MCInst Auth;
+ BC.MIB->createMatchingAuth(Inst, Auth);
+ InsertII =
+ std::next(InlinedBB->insertInstruction(InsertII, std::move(Auth)));
+ }
if (CSIsTailCall || (!MIB.isCall(Inst) && !MIB.isReturn(Inst))) {
InsertII =
std::next(InlinedBB->insertInstruction(InsertII, std::move(Inst)));
@@ -472,6 +491,32 @@ bool Inliner::inlineCallsInFunction(BinaryFunction &Function) {
}
}
+ // AArch64 BTI:
+ // If the callee has an indirect tailcall (BR), we would transform it to
+ // an indirect call (BLR) in InlineCall. Because of this, we would have to
+ // update the BTI at the target of the tailcall. However, these targets
+ // are not known. Instead, we skip inlining blocks with indirect
+ // tailcalls.
+ auto HasIndirectTailCall = [&](const BinaryFunction &BF) -> bool {
+ for (const auto &BB : BF) {
+ for (const auto &II : BB) {
+ if (BC.MIB->isIndirectBranch(II) && BC.MIB->isTailCall(II)) {
+ return true;
+ }
+ }
+ }
+ return false;
+ };
+
+ if (BC.isAArch64() && BC.usesBTI() &&
+ HasIndirectTailCall(*TargetFunction)) {
+ ++InstIt;
+ LLVM_DEBUG(dbgs() << "BOLT-DEBUG: Skipping inlining block with tailcall"
+ << " in " << Function << " : " << BB->getName()
+ << " to keep BTIs consistent.\n");
+ continue;
+ }
+
LLVM_DEBUG(dbgs() << "BOLT-DEBUG: inlining call to " << *TargetFunction
<< " in " << Function << " : " << BB->getName()
<< ". Count: " << BB->getKnownExecutionCount()
diff --git a/bolt/lib/Passes/InsertNegateRAStatePass.cpp b/bolt/lib/Passes/InsertNegateRAStatePass.cpp
deleted file mode 100644
index 33664e1..0000000
--- a/bolt/lib/Passes/InsertNegateRAStatePass.cpp
+++ /dev/null
@@ -1,142 +0,0 @@
-//===- bolt/Passes/InsertNegateRAStatePass.cpp ----------------------------===//
-//
-// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
-// See https://llvm.org/LICENSE.txt for license information.
-// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
-//
-//===----------------------------------------------------------------------===//
-//
-// This file implements the InsertNegateRAStatePass class. It inserts
-// OpNegateRAState CFIs to places where the state of two consecutive
-// instructions are different.
-//
-//===----------------------------------------------------------------------===//
-#include "bolt/Passes/InsertNegateRAStatePass.h"
-#include "bolt/Core/BinaryFunction.h"
-#include "bolt/Core/ParallelUtilities.h"
-#include <cstdlib>
-
-using namespace llvm;
-
-namespace llvm {
-namespace bolt {
-
-void InsertNegateRAState::runOnFunction(BinaryFunction &BF) {
- BinaryContext &BC = BF.getBinaryContext();
-
- if (BF.getState() == BinaryFunction::State::Empty)
- return;
-
- if (BF.getState() != BinaryFunction::State::CFG &&
- BF.getState() != BinaryFunction::State::CFG_Finalized) {
- BC.outs() << "BOLT-INFO: no CFG for " << BF.getPrintName()
- << " in InsertNegateRAStatePass\n";
- return;
- }
-
- inferUnknownStates(BF);
-
- for (FunctionFragment &FF : BF.getLayout().fragments()) {
- coverFunctionFragmentStart(BF, FF);
- bool FirstIter = true;
- MCInst PrevInst;
- // As this pass runs after function splitting, we should only check
- // consecutive instructions inside FunctionFragments.
- for (BinaryBasicBlock *BB : FF) {
- for (auto It = BB->begin(); It != BB->end(); ++It) {
- MCInst &Inst = *It;
- if (BC.MIB->isCFI(Inst))
- continue;
- if (!FirstIter) {
- // Consecutive instructions with different RAState means we need to
- // add a OpNegateRAState.
- if ((BC.MIB->isRASigned(PrevInst) && BC.MIB->isRAUnsigned(Inst)) ||
- (BC.MIB->isRAUnsigned(PrevInst) && BC.MIB->isRASigned(Inst))) {
- It = BF.addCFIInstruction(
- BB, It, MCCFIInstruction::createNegateRAState(nullptr));
- }
- } else {
- FirstIter = false;
- }
- PrevInst = *It;
- }
- }
- }
-}
-
-void InsertNegateRAState::coverFunctionFragmentStart(BinaryFunction &BF,
- FunctionFragment &FF) {
- BinaryContext &BC = BF.getBinaryContext();
- if (FF.empty())
- return;
- // Find the first BB in the FF which has Instructions.
- // BOLT can generate empty BBs at function splitting which are only used as
- // target labels. We should add the negate-ra-state CFI to the first
- // non-empty BB.
- auto *FirstNonEmpty =
- std::find_if(FF.begin(), FF.end(), [](BinaryBasicBlock *BB) {
- // getFirstNonPseudo returns BB.end() if it does not find any
- // Instructions.
- return BB->getFirstNonPseudo() != BB->end();
- });
- // If a function is already split in the input, the first FF can also start
- // with Signed state. This covers that scenario as well.
- if (BC.MIB->isRASigned(*((*FirstNonEmpty)->begin()))) {
- BF.addCFIInstruction(*FirstNonEmpty, (*FirstNonEmpty)->begin(),
- MCCFIInstruction::createNegateRAState(nullptr));
- }
-}
-
-void InsertNegateRAState::inferUnknownStates(BinaryFunction &BF) {
- BinaryContext &BC = BF.getBinaryContext();
- bool FirstIter = true;
- MCInst PrevInst;
- for (BinaryBasicBlock &BB : BF) {
- for (MCInst &Inst : BB) {
- if (BC.MIB->isCFI(Inst))
- continue;
-
- if (!FirstIter && BC.MIB->isRAStateUnknown(Inst)) {
- if (BC.MIB->isRASigned(PrevInst) || BC.MIB->isPSignOnLR(PrevInst)) {
- BC.MIB->setRASigned(Inst);
- } else if (BC.MIB->isRAUnsigned(PrevInst) ||
- BC.MIB->isPAuthOnLR(PrevInst)) {
- BC.MIB->setRAUnsigned(Inst);
- }
- } else {
- FirstIter = false;
- }
- PrevInst = Inst;
- }
- }
-}
-
-Error InsertNegateRAState::runOnFunctions(BinaryContext &BC) {
- std::atomic<uint64_t> FunctionsModified{0};
- ParallelUtilities::WorkFuncTy WorkFun = [&](BinaryFunction &BF) {
- FunctionsModified++;
- runOnFunction(BF);
- };
-
- ParallelUtilities::PredicateTy SkipPredicate = [&](const BinaryFunction &BF) {
- // We can skip functions which did not include negate-ra-state CFIs. This
- // includes code using pac-ret hardening as well, if the binary is
- // compiled with `-fno-exceptions -fno-unwind-tables
- // -fno-asynchronous-unwind-tables`
- return !BF.containedNegateRAState() || BF.isIgnored();
- };
-
- ParallelUtilities::runOnEachFunction(
- BC, ParallelUtilities::SchedulingPolicy::SP_INST_LINEAR, WorkFun,
- SkipPredicate, "InsertNegateRAStatePass");
-
- BC.outs() << "BOLT-INFO: rewritten pac-ret DWARF info in "
- << FunctionsModified << " out of " << BC.getBinaryFunctions().size()
- << " functions "
- << format("(%.2lf%%).\n", (100.0 * FunctionsModified) /
- BC.getBinaryFunctions().size());
- return Error::success();
-}
-
-} // end namespace bolt
-} // end namespace llvm
diff --git a/bolt/lib/Passes/PAuthGadgetScanner.cpp b/bolt/lib/Passes/PAuthGadgetScanner.cpp
index 01b350b..d38a7fa 100644
--- a/bolt/lib/Passes/PAuthGadgetScanner.cpp
+++ b/bolt/lib/Passes/PAuthGadgetScanner.cpp
@@ -547,7 +547,7 @@ protected:
// Being trusted is a strictly stronger property than being
// safe-to-dereference.
- assert(!Next.TrustedRegs.test(Next.SafeToDerefRegs) &&
+ assert(Next.TrustedRegs.subsetOf(Next.SafeToDerefRegs) &&
"SafeToDerefRegs should contain all TrustedRegs");
return Next;
diff --git a/bolt/lib/Passes/MarkRAStates.cpp b/bolt/lib/Passes/PointerAuthCFIAnalyzer.cpp
index b262d66..697b1bb 100644
--- a/bolt/lib/Passes/MarkRAStates.cpp
+++ b/bolt/lib/Passes/PointerAuthCFIAnalyzer.cpp
@@ -1,4 +1,4 @@
-//===- bolt/Passes/MarkRAStates.cpp ---------------------------------===//
+//===- bolt/Passes/PointerAuthCFIAnalyzer.cpp -----------------------------===//
//
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
// See https://llvm.org/LICENSE.txt for license information.
@@ -6,7 +6,7 @@
//
//===----------------------------------------------------------------------===//
//
-// This file implements the MarkRAStates class.
+// This file implements the PointerAuthCFIAnalyzer class.
// Three CFIs have an influence on the RA State of an instruction:
// - NegateRAState flips the RA State,
// - RememberState pushes the RA State to a stack,
@@ -16,10 +16,10 @@
// the RA State of each instruction, and save it as new MCAnnotations. The new
// annotations are Signing, Signed, Authenticating and Unsigned. After
// optimizations, .cfi_negate_ra_state CFIs are added to the places where the
-// state changes in InsertNegateRAStatePass.
+// state changes in PointerAuthCFIFixup.
//
//===----------------------------------------------------------------------===//
-#include "bolt/Passes/MarkRAStates.h"
+#include "bolt/Passes/PointerAuthCFIAnalyzer.h"
#include "bolt/Core/BinaryFunction.h"
#include "bolt/Core/ParallelUtilities.h"
#include <cstdlib>
@@ -28,10 +28,14 @@
using namespace llvm;
+namespace opts {
+extern llvm::cl::opt<unsigned> Verbosity;
+} // namespace opts
+
namespace llvm {
namespace bolt {
-bool MarkRAStates::runOnFunction(BinaryFunction &BF) {
+bool PointerAuthCFIAnalyzer::runOnFunction(BinaryFunction &BF) {
BinaryContext &BC = BF.getBinaryContext();
@@ -43,9 +47,10 @@ bool MarkRAStates::runOnFunction(BinaryFunction &BF) {
// Not all functions have .cfi_negate_ra_state in them. But if one does,
// we expect psign/pauth instructions to have the hasNegateRAState
// annotation.
- BC.outs() << "BOLT-INFO: inconsistent RAStates in function "
- << BF.getPrintName()
- << ": ptr sign/auth inst without .cfi_negate_ra_state\n";
+ if (opts::Verbosity >= 1)
+ BC.outs() << "BOLT-INFO: inconsistent RAStates in function "
+ << BF.getPrintName()
+ << ": ptr sign/auth inst without .cfi_negate_ra_state\n";
std::lock_guard<std::mutex> Lock(IgnoreMutex);
BF.setIgnored();
return false;
@@ -65,36 +70,30 @@ bool MarkRAStates::runOnFunction(BinaryFunction &BF) {
if (BC.MIB->isPSignOnLR(Inst)) {
if (RAState) {
// RA signing instructions should only follow unsigned RA state.
- BC.outs() << "BOLT-INFO: inconsistent RAStates in function "
- << BF.getPrintName()
- << ": ptr signing inst encountered in Signed RA state\n";
+ if (opts::Verbosity >= 1)
+ BC.outs() << "BOLT-INFO: inconsistent RAStates in function "
+ << BF.getPrintName()
+ << ": ptr signing inst encountered in Signed RA state\n";
std::lock_guard<std::mutex> Lock(IgnoreMutex);
BF.setIgnored();
return false;
}
- // The signing instruction itself is unsigned, the next will be
- // signed.
- BC.MIB->setRAUnsigned(Inst);
} else if (BC.MIB->isPAuthOnLR(Inst)) {
if (!RAState) {
// RA authenticating instructions should only follow signed RA state.
- BC.outs() << "BOLT-INFO: inconsistent RAStates in function "
- << BF.getPrintName()
- << ": ptr authenticating inst encountered in Unsigned RA "
- "state\n";
+ if (opts::Verbosity >= 1)
+ BC.outs() << "BOLT-INFO: inconsistent RAStates in function "
+ << BF.getPrintName()
+ << ": ptr authenticating inst encountered in Unsigned RA "
+ "state\n";
std::lock_guard<std::mutex> Lock(IgnoreMutex);
BF.setIgnored();
return false;
}
- // The authenticating instruction itself is signed, but the next will be
- // unsigned.
- BC.MIB->setRASigned(Inst);
- } else if (RAState) {
- BC.MIB->setRASigned(Inst);
- } else {
- BC.MIB->setRAUnsigned(Inst);
}
+ BC.MIB->setRAState(Inst, RAState);
+
// Updating RAState. All updates are valid from the next instruction.
// Because the same instruction can have remember and restore, the order
// here is relevant. This is the reason to loop over Annotations instead
@@ -118,7 +117,7 @@ bool MarkRAStates::runOnFunction(BinaryFunction &BF) {
return true;
}
-Error MarkRAStates::runOnFunctions(BinaryContext &BC) {
+Error PointerAuthCFIAnalyzer::runOnFunctions(BinaryContext &BC) {
std::atomic<uint64_t> FunctionsIgnored{0};
ParallelUtilities::WorkFuncTy WorkFun = [&](BinaryFunction &BF) {
if (!runOnFunction(BF)) {
@@ -138,14 +137,35 @@ Error MarkRAStates::runOnFunctions(BinaryContext &BC) {
return P.second.containedNegateRAState() && !P.second.isIgnored();
});
+ if (Total == 0)
+ return Error::success();
+
ParallelUtilities::runOnEachFunction(
BC, ParallelUtilities::SchedulingPolicy::SP_INST_LINEAR, WorkFun,
- SkipPredicate, "MarkRAStates");
- BC.outs() << "BOLT-INFO: MarkRAStates ran on " << Total
+ SkipPredicate, "PointerAuthCFIAnalyzer");
+
+ float IgnoredPercent = (100.0 * FunctionsIgnored) / Total;
+ BC.outs() << "BOLT-INFO: PointerAuthCFIAnalyzer ran on " << Total
<< " functions. Ignored " << FunctionsIgnored << " functions "
- << format("(%.2lf%%)", (100.0 * FunctionsIgnored) / Total)
+ << format("(%.2lf%%)", IgnoredPercent)
<< " because of CFI inconsistencies\n";
+ // Errors in the input are expected from two sources:
+ // - compilers emitting incorrect CFIs. This happens more frequently with
+ // older compiler versions, but it should not account for a large
+ // percentage.
+ // - input binary is using synchronous unwind tables. This means that after
+ // call sites, the unwind CFIs are dropped: the pass sees missing
+ // .cfi_negate_ra_state from autiasp instructions. If this is the case, a
+ // larger percentage of functions will be ignored.
+ //
+ // This is why the 10% threshold was chosen: we should not warn about
+ // synchronous unwind tables if only a few % are ignored.
+ if (IgnoredPercent >= 10.0)
+ BC.outs() << "BOLT-WARNING: PointerAuthCFIAnalyzer only supports "
+ "asynchronous unwind tables. For C compilers, see "
+ "-fasynchronous-unwind-tables.\n";
+
return Error::success();
}
diff --git a/bolt/lib/Passes/PointerAuthCFIFixup.cpp b/bolt/lib/Passes/PointerAuthCFIFixup.cpp
new file mode 100644
index 0000000..56086da
--- /dev/null
+++ b/bolt/lib/Passes/PointerAuthCFIFixup.cpp
@@ -0,0 +1,268 @@
+//===- bolt/Passes/PointerAuthCFIFixup.cpp --------------------------------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+//
+// This file implements the PointerAuthCFIFixup class. It inserts
+// OpNegateRAState CFIs to places where the state of two consecutive
+// instructions are different.
+//
+//===----------------------------------------------------------------------===//
+#include "bolt/Passes/PointerAuthCFIFixup.h"
+#include "bolt/Core/BinaryFunction.h"
+#include "bolt/Core/ParallelUtilities.h"
+#include <cstdlib>
+
+using namespace llvm;
+
+namespace llvm {
+namespace bolt {
+
+static bool PassFailed = false;
+
+void PointerAuthCFIFixup::runOnFunction(BinaryFunction &BF) {
+ if (PassFailed)
+ return;
+
+ BinaryContext &BC = BF.getBinaryContext();
+
+ if (BF.getState() == BinaryFunction::State::Empty)
+ return;
+
+ if (BF.getState() != BinaryFunction::State::CFG &&
+ BF.getState() != BinaryFunction::State::CFG_Finalized) {
+ BC.outs() << "BOLT-INFO: no CFG for " << BF.getPrintName()
+ << " in PointerAuthCFIFixup\n";
+ return;
+ }
+
+ inferUnknownStates(BF);
+
+ for (FunctionFragment &FF : BF.getLayout().fragments()) {
+ coverFunctionFragmentStart(BF, FF);
+ bool FirstIter = true;
+ bool PrevRAState = false;
+ // As this pass runs after function splitting, we should only check
+ // consecutive instructions inside FunctionFragments.
+ for (BinaryBasicBlock *BB : FF) {
+ for (auto It = BB->begin(); It != BB->end(); ++It) {
+ MCInst &Inst = *It;
+ if (BC.MIB->isCFI(Inst))
+ continue;
+ std::optional<bool> RAState = BC.MIB->getRAState(Inst);
+ if (!RAState.has_value()) {
+ BC.errs() << "BOLT-ERROR: unknown RAState after inferUnknownStates "
+ << " in function " << BF.getPrintName() << "\n";
+ PassFailed = true;
+ return;
+ }
+ if (!FirstIter) {
+ // Consecutive instructions with different RAState means we need to
+ // add a OpNegateRAState.
+ if (*RAState != PrevRAState)
+ It = BF.addCFIInstruction(
+ BB, It, MCCFIInstruction::createNegateRAState(nullptr));
+ } else {
+ FirstIter = false;
+ }
+ PrevRAState = *RAState;
+ }
+ }
+ }
+}
+
+void PointerAuthCFIFixup::inferUnknownStates(BinaryFunction &BF) {
+ BinaryContext &BC = BF.getBinaryContext();
+
+ // Fill in missing RAStates in simple cases (inside BBs).
+ for (BinaryBasicBlock &BB : BF) {
+ fillUnknownStateInBB(BC, BB);
+ }
+ // BasicBlocks which are made entirely of "new instructions" (instructions
+ // without RAState annotation) are stubs, and do not have correct unwind info.
+ // We should iterate in layout order and fill them based on previous known
+ // RAState.
+ fillUnknownStubs(BF);
+}
+
+void PointerAuthCFIFixup::coverFunctionFragmentStart(BinaryFunction &BF,
+ FunctionFragment &FF) {
+ BinaryContext &BC = BF.getBinaryContext();
+ if (FF.empty())
+ return;
+ // Find the first BB in the FF which has Instructions.
+ // BOLT can generate empty BBs at function splitting which are only used as
+ // target labels. We should add the negate-ra-state CFI to the first
+ // non-empty BB.
+ auto *FirstNonEmpty =
+ std::find_if(FF.begin(), FF.end(), [](BinaryBasicBlock *BB) {
+ // getFirstNonPseudo returns BB.end() if it does not find any
+ // Instructions.
+ return BB->getFirstNonPseudo() != BB->end();
+ });
+ // If a function is already split in the input, the first FF can also start
+ // with Signed state. This covers that scenario as well.
+ auto II = (*FirstNonEmpty)->getFirstNonPseudo();
+ std::optional<bool> RAState = BC.MIB->getRAState(*II);
+ if (!RAState.has_value()) {
+ BC.errs() << "BOLT-ERROR: unknown RAState after inferUnknownStates "
+ << " in function " << BF.getPrintName() << "\n";
+ PassFailed = true;
+ return;
+ }
+ if (*RAState)
+ BF.addCFIInstruction(*FirstNonEmpty, II,
+ MCCFIInstruction::createNegateRAState(nullptr));
+}
+
+std::optional<bool>
+PointerAuthCFIFixup::getFirstKnownRAState(BinaryContext &BC,
+ BinaryBasicBlock &BB) {
+ for (const MCInst &Inst : BB) {
+ if (BC.MIB->isCFI(Inst))
+ continue;
+ std::optional<bool> RAState = BC.MIB->getRAState(Inst);
+ if (RAState.has_value())
+ return RAState;
+ }
+ return std::nullopt;
+}
+
+bool PointerAuthCFIFixup::isUnknownBlock(BinaryContext &BC,
+ BinaryBasicBlock &BB) {
+ std::optional<bool> FirstRAState = getFirstKnownRAState(BC, BB);
+ return !FirstRAState.has_value();
+}
+
+void PointerAuthCFIFixup::fillUnknownStateInBB(BinaryContext &BC,
+ BinaryBasicBlock &BB) {
+
+ auto First = BB.getFirstNonPseudo();
+ if (First == BB.end())
+ return;
+ // If the first instruction has unknown RAState, we should copy the first
+ // known RAState.
+ std::optional<bool> RAState = BC.MIB->getRAState(*First);
+ if (!RAState.has_value()) {
+ std::optional<bool> FirstRAState = getFirstKnownRAState(BC, BB);
+ if (!FirstRAState.has_value())
+ // We fill unknown BBs later.
+ return;
+
+ BC.MIB->setRAState(*First, *FirstRAState);
+ }
+
+ // At this point we know the RAState of the first instruction,
+ // so we can propagate the RAStates to all subsequent unknown instructions.
+ MCInst Prev = *First;
+ for (auto It = First + 1; It != BB.end(); ++It) {
+ MCInst &Inst = *It;
+ if (BC.MIB->isCFI(Inst))
+ continue;
+
+ // No need to check for nullopt: we only entered this loop after the first
+ // instruction had its RAState set, and RAState is always set for the
+ // previous instruction in the previous iteration of the loop.
+ std::optional<bool> PrevRAState = BC.MIB->getRAState(Prev);
+
+ std::optional<bool> RAState = BC.MIB->getRAState(Inst);
+ if (!RAState.has_value()) {
+ if (BC.MIB->isPSignOnLR(Prev))
+ PrevRAState = true;
+ else if (BC.MIB->isPAuthOnLR(Prev))
+ PrevRAState = false;
+ BC.MIB->setRAState(Inst, *PrevRAState);
+ }
+ Prev = Inst;
+ }
+}
+
+void PointerAuthCFIFixup::markUnknownBlock(BinaryContext &BC,
+ BinaryBasicBlock &BB, bool State) {
+ // If we call this when an Instruction has either kRASigned or kRAUnsigned
+ // annotation, setRASigned or setRAUnsigned would fail.
+ assert(isUnknownBlock(BC, BB) &&
+ "markUnknownBlock should only be called on unknown blocks");
+ for (MCInst &Inst : BB) {
+ if (BC.MIB->isCFI(Inst))
+ continue;
+ BC.MIB->setRAState(Inst, State);
+ }
+}
+
+void PointerAuthCFIFixup::fillUnknownStubs(BinaryFunction &BF) {
+ BinaryContext &BC = BF.getBinaryContext();
+ bool FirstIter = true;
+ MCInst PrevInst;
+ for (FunctionFragment &FF : BF.getLayout().fragments()) {
+ for (BinaryBasicBlock *BB : FF) {
+ if (FirstIter) {
+ FirstIter = false;
+ if (isUnknownBlock(BC, *BB))
+ // If the first BasicBlock is unknown, the function's entry RAState
+ // should be used.
+ markUnknownBlock(BC, *BB, BF.getInitialRAState());
+ } else if (isUnknownBlock(BC, *BB)) {
+ // As explained in issue #160989, the unwind info is incorrect for
+ // stubs. Indicating the correct RAState without the rest of the unwind
+ // info being correct is not useful. Instead, we copy the RAState from
+ // the previous instruction.
+ std::optional<bool> PrevRAState = BC.MIB->getRAState(PrevInst);
+ if (!PrevRAState.has_value()) {
+ // No non-cfi instruction encountered in the function yet.
+ // This means the RAState is the same as at the function entry.
+ markUnknownBlock(BC, *BB, BF.getInitialRAState());
+ continue;
+ }
+
+ if (BC.MIB->isPSignOnLR(PrevInst))
+ PrevRAState = true;
+ else if (BC.MIB->isPAuthOnLR(PrevInst))
+ PrevRAState = false;
+ markUnknownBlock(BC, *BB, *PrevRAState);
+ }
+ // This function iterates on BasicBlocks, so the PrevInst has to be
+ // updated to the last instruction of the current BasicBlock. If the
+ // BasicBlock is empty, or only has PseudoInstructions, PrevInst will not
+ // be updated.
+ auto Last = BB->getLastNonPseudo();
+ if (Last != BB->rend())
+ PrevInst = *Last;
+ }
+ }
+}
+
+Error PointerAuthCFIFixup::runOnFunctions(BinaryContext &BC) {
+ std::atomic<uint64_t> FunctionsModified{0};
+ ParallelUtilities::WorkFuncTy WorkFun = [&](BinaryFunction &BF) {
+ FunctionsModified++;
+ runOnFunction(BF);
+ };
+
+ ParallelUtilities::PredicateTy SkipPredicate = [&](const BinaryFunction &BF) {
+ // We can skip functions which did not include negate-ra-state CFIs. This
+ // includes code using pac-ret hardening as well, if the binary is
+ // compiled with `-fno-exceptions -fno-unwind-tables
+ // -fno-asynchronous-unwind-tables`
+ return !BF.containedNegateRAState() || BF.isIgnored();
+ };
+
+ ParallelUtilities::runOnEachFunction(
+ BC, ParallelUtilities::SchedulingPolicy::SP_INST_LINEAR, WorkFun,
+ SkipPredicate, "PointerAuthCFIFixup");
+
+ BC.outs() << "BOLT-INFO: rewritten pac-ret DWARF info in "
+ << FunctionsModified << " out of " << BC.getBinaryFunctions().size()
+ << " functions "
+ << format("(%.2lf%%).\n", (100.0 * FunctionsModified) /
+ BC.getBinaryFunctions().size());
+ if (PassFailed)
+ return createFatalBOLTError("");
+ return Error::success();
+}
+
+} // end namespace bolt
+} // end namespace llvm
diff --git a/bolt/lib/Profile/DataAggregator.cpp b/bolt/lib/Profile/DataAggregator.cpp
index dc3d918..6b96901 100644
--- a/bolt/lib/Profile/DataAggregator.cpp
+++ b/bolt/lib/Profile/DataAggregator.cpp
@@ -159,8 +159,6 @@ std::vector<SectionNameAndRange> getTextSections(const BinaryContext *BC) {
}
}
-constexpr uint64_t DataAggregator::KernelBaseAddr;
-
DataAggregator::~DataAggregator() { deleteTempFiles(); }
namespace {
@@ -564,13 +562,18 @@ void DataAggregator::imputeFallThroughs() {
// Skip fall-throughs in external code.
if (Trace.From == Trace::EXTERNAL)
continue;
- std::pair CurrentBranch(Trace.Branch, Trace.From);
+ if (std::pair CurrentBranch(Trace.Branch, Trace.From);
+ CurrentBranch != PrevBranch) {
+ // New group: reset aggregates.
+ AggregateCount = AggregateFallthroughSize = 0;
+ PrevBranch = CurrentBranch;
+ }
// BR_ONLY must be the last trace in the group
if (Trace.To == Trace::BR_ONLY) {
// If the group is not empty, use aggregate values, otherwise 0-length
// for unconditional jumps (call/ret/uncond branch) or 1-length for others
uint64_t InferredBytes =
- PrevBranch == CurrentBranch
+ AggregateFallthroughSize
? AggregateFallthroughSize / AggregateCount
: !checkUnconditionalControlTransfer(Trace.From);
Trace.To = Trace.From + InferredBytes;
@@ -578,16 +581,11 @@ void DataAggregator::imputeFallThroughs() {
<< " bytes)\n");
++InferredTraces;
} else {
- // Trace with a valid fall-through
- // New group: reset aggregates.
- if (CurrentBranch != PrevBranch)
- AggregateCount = AggregateFallthroughSize = 0;
// Only use valid fall-through lengths
if (Trace.To != Trace::EXTERNAL)
AggregateFallthroughSize += (Trace.To - Trace.From) * Info.TakenCount;
AggregateCount += Info.TakenCount;
}
- PrevBranch = CurrentBranch;
}
if (opts::Verbosity >= 1)
outs() << "BOLT-INFO: imputed " << InferredTraces << " traces\n";
@@ -1321,7 +1319,8 @@ std::error_code DataAggregator::parseAggregatedLBREntry() {
}
using SSI = StringSwitch<int>;
- AddrNum = SSI(Str).Cases("T", "R", 3).Case("S", 1).Case("E", 0).Default(2);
+ AddrNum =
+ SSI(Str).Cases({"T", "R"}, 3).Case("S", 1).Case("E", 0).Default(2);
CounterNum = SSI(Str).Case("B", 2).Case("E", 0).Default(1);
}
@@ -2215,7 +2214,7 @@ DataAggregator::writeAggregatedFile(StringRef OutputFilename) const {
OutFile << "boltedcollection\n";
if (opts::BasicAggregation) {
OutFile << "no_lbr";
- for (const StringMapEntry<std::nullopt_t> &Entry : EventNames)
+ for (const StringMapEntry<EmptyStringSetTag> &Entry : EventNames)
OutFile << " " << Entry.getKey();
OutFile << "\n";
@@ -2291,7 +2290,7 @@ std::error_code DataAggregator::writeBATYAML(BinaryContext &BC,
ListSeparator LS(",");
raw_string_ostream EventNamesOS(BP.Header.EventNames);
- for (const StringMapEntry<std::nullopt_t> &EventEntry : EventNames)
+ for (const StringMapEntry<EmptyStringSetTag> &EventEntry : EventNames)
EventNamesOS << LS << EventEntry.first().str();
BP.Header.Flags = opts::BasicAggregation ? BinaryFunction::PF_BASIC
@@ -2398,10 +2397,7 @@ std::error_code DataAggregator::writeBATYAML(BinaryContext &BC,
PseudoProbeDecoder->getAddress2ProbesMap();
BinaryFunction::FragmentsSetTy Fragments(BF->Fragments);
Fragments.insert(BF);
- DenseMap<
- uint32_t,
- std::vector<std::reference_wrapper<const MCDecodedPseudoProbe>>>
- BlockProbes;
+ DenseMap<uint32_t, YAMLProfileWriter::BlockProbeCtx> BlockCtx;
for (const BinaryFunction *F : Fragments) {
const uint64_t FuncAddr = F->getAddress();
for (const MCDecodedPseudoProbe &Probe :
@@ -2409,15 +2405,14 @@ std::error_code DataAggregator::writeBATYAML(BinaryContext &BC,
const uint32_t OutputAddress = Probe.getAddress();
const uint32_t InputOffset = BAT->translate(
FuncAddr, OutputAddress - FuncAddr, /*IsBranchSrc=*/true);
- const unsigned BlockIndex = getBlock(InputOffset).second;
- BlockProbes[BlockIndex].emplace_back(Probe);
+ const auto &[BlockOffset, BlockIndex] = getBlock(InputOffset);
+ BlockCtx[BlockIndex].addBlockProbe(InlineTreeNodeId, Probe,
+ InputOffset - BlockOffset);
}
}
- for (auto &[Block, Probes] : BlockProbes) {
- YamlBF.Blocks[Block].PseudoProbes =
- YAMLProfileWriter::writeBlockProbes(Probes, InlineTreeNodeId);
- }
+ for (auto &[Block, Ctx] : BlockCtx)
+ Ctx.finalize(YamlBF.Blocks[Block]);
}
// Skip printing if there's no profile data
llvm::erase_if(
diff --git a/bolt/lib/Profile/StaleProfileMatching.cpp b/bolt/lib/Profile/StaleProfileMatching.cpp
index 1a61949..5fb65153 100644
--- a/bolt/lib/Profile/StaleProfileMatching.cpp
+++ b/bolt/lib/Profile/StaleProfileMatching.cpp
@@ -348,26 +348,10 @@ private:
return It->second;
};
- auto matchPseudoProbeInfo = [&](const yaml::bolt::PseudoProbeInfo
- &ProfileProbe,
- uint32_t NodeId) {
- for (uint64_t Index = 0; Index < 64; ++Index)
- if (ProfileProbe.BlockMask & 1ull << Index)
- ++FlowBlockMatchCount[matchProfileProbeToBlock(NodeId, Index + 1)];
- for (const auto &ProfileProbes :
- {ProfileProbe.BlockProbes, ProfileProbe.IndCallProbes,
- ProfileProbe.CallProbes})
- for (uint64_t ProfileProbe : ProfileProbes)
- ++FlowBlockMatchCount[matchProfileProbeToBlock(NodeId, ProfileProbe)];
- };
-
- for (const yaml::bolt::PseudoProbeInfo &ProfileProbe : BlockPseudoProbes) {
- if (!ProfileProbe.InlineTreeNodes.empty())
- for (uint32_t ProfileInlineTreeNode : ProfileProbe.InlineTreeNodes)
- matchPseudoProbeInfo(ProfileProbe, ProfileInlineTreeNode);
- else
- matchPseudoProbeInfo(ProfileProbe, ProfileProbe.InlineTreeIndex);
- }
+ for (const yaml::bolt::PseudoProbeInfo &ProfileProbe : BlockPseudoProbes)
+ for (uint32_t Node : ProfileProbe.InlineTreeNodes)
+ for (uint64_t Probe : ProfileProbe.BlockProbes)
+ ++FlowBlockMatchCount[matchProfileProbeToBlock(Node, Probe)];
uint32_t BestMatchCount = 0;
uint32_t TotalMatchCount = 0;
const FlowBlock *BestMatchBlock = nullptr;
diff --git a/bolt/lib/Profile/YAMLProfileWriter.cpp b/bolt/lib/Profile/YAMLProfileWriter.cpp
index 1632aa1..cd4e77b 100644
--- a/bolt/lib/Profile/YAMLProfileWriter.cpp
+++ b/bolt/lib/Profile/YAMLProfileWriter.cpp
@@ -129,50 +129,62 @@ YAMLProfileWriter::convertPseudoProbeDesc(const MCPseudoProbeDecoder &Decoder) {
return {Desc, InlineTree};
}
-std::vector<yaml::bolt::PseudoProbeInfo>
-YAMLProfileWriter::convertNodeProbes(NodeIdToProbes &NodeProbes) {
- struct BlockProbeInfoHasher {
- size_t operator()(const yaml::bolt::PseudoProbeInfo &BPI) const {
- return llvm::hash_combine(llvm::hash_combine_range(BPI.BlockProbes),
- llvm::hash_combine_range(BPI.CallProbes),
- llvm::hash_combine_range(BPI.IndCallProbes));
+void YAMLProfileWriter::BlockProbeCtx::addBlockProbe(
+ const InlineTreeMapTy &Map, const MCDecodedPseudoProbe &Probe,
+ uint32_t ProbeOffset) {
+ auto It = Map.find(Probe.getInlineTreeNode());
+ if (It == Map.end())
+ return;
+ auto NodeId = It->second;
+ uint32_t Index = Probe.getIndex();
+ if (Probe.isCall())
+ CallProbes[ProbeOffset] =
+ Call{Index, NodeId, Probe.isIndirectCall(), false};
+ else
+ NodeToProbes[NodeId].emplace_back(Index);
+}
+
+void YAMLProfileWriter::BlockProbeCtx::finalize(
+ yaml::bolt::BinaryBasicBlockProfile &YamlBB) {
+ // Hash block probes by vector
+ struct ProbeHasher {
+ size_t operator()(const ArrayRef<uint64_t> Probes) const {
+ return llvm::hash_combine_range(Probes);
}
};
- // Check identical BlockProbeInfo structs and merge them
- std::unordered_map<yaml::bolt::PseudoProbeInfo, std::vector<uint32_t>,
- BlockProbeInfoHasher>
- BPIToNodes;
- for (auto &[NodeId, Probes] : NodeProbes) {
- yaml::bolt::PseudoProbeInfo BPI;
- BPI.BlockProbes = std::vector(Probes[0].begin(), Probes[0].end());
- BPI.IndCallProbes = std::vector(Probes[1].begin(), Probes[1].end());
- BPI.CallProbes = std::vector(Probes[2].begin(), Probes[2].end());
- BPIToNodes[BPI].push_back(NodeId);
+ // Check identical block probes and merge them
+ std::unordered_map<std::vector<uint64_t>, std::vector<uint32_t>, ProbeHasher>
+ ProbesToNodes;
+ for (auto &[NodeId, Probes] : NodeToProbes) {
+ llvm::sort(Probes);
+ ProbesToNodes[Probes].emplace_back(NodeId);
}
-
- auto handleMask = [](const auto &Ids, auto &Vec, auto &Mask) {
- for (auto Id : Ids)
- if (Id > 64)
- Vec.emplace_back(Id);
- else
- Mask |= 1ull << (Id - 1);
- };
-
- // Add to YAML with merged nodes/block mask optimizations
- std::vector<yaml::bolt::PseudoProbeInfo> YamlProbes;
- YamlProbes.reserve(BPIToNodes.size());
- for (const auto &[BPI, Nodes] : BPIToNodes) {
- auto &YamlBPI = YamlProbes.emplace_back(yaml::bolt::PseudoProbeInfo());
- YamlBPI.CallProbes = BPI.CallProbes;
- YamlBPI.IndCallProbes = BPI.IndCallProbes;
- if (Nodes.size() == 1)
- YamlBPI.InlineTreeIndex = Nodes.front();
- else
- YamlBPI.InlineTreeNodes = Nodes;
- handleMask(BPI.BlockProbes, YamlBPI.BlockProbes, YamlBPI.BlockMask);
+ for (auto &[Probes, Nodes] : ProbesToNodes) {
+ llvm::sort(Nodes);
+ YamlBB.PseudoProbes.emplace_back(
+ yaml::bolt::PseudoProbeInfo{Probes, Nodes});
+ }
+ for (yaml::bolt::CallSiteInfo &CSI : YamlBB.CallSites) {
+ auto It = CallProbes.find(CSI.Offset);
+ if (It == CallProbes.end())
+ continue;
+ Call &Probe = It->second;
+ CSI.Probe = Probe.Id;
+ CSI.InlineTreeNode = Probe.Node;
+ CSI.Indirect = Probe.Indirect;
+ Probe.Used = true;
+ }
+ for (const auto &[Offset, Probe] : CallProbes) {
+ if (Probe.Used)
+ continue;
+ yaml::bolt::CallSiteInfo CSI;
+ CSI.Offset = Offset;
+ CSI.Probe = Probe.Id;
+ CSI.InlineTreeNode = Probe.Node;
+ CSI.Indirect = Probe.Indirect;
+ YamlBB.CallSites.emplace_back(CSI);
}
- return YamlProbes;
}
std::tuple<std::vector<yaml::bolt::InlineTreeNode>,
@@ -343,12 +355,13 @@ YAMLProfileWriter::convert(const BinaryFunction &BF, bool UseDFS,
const AddressProbesMap &ProbeMap =
PseudoProbeDecoder->getAddress2ProbesMap();
const uint64_t FuncAddr = BF.getAddress();
- const std::pair<uint64_t, uint64_t> &BlockRange =
- BB->getInputAddressRange();
- const std::pair<uint64_t, uint64_t> BlockAddrRange = {
- FuncAddr + BlockRange.first, FuncAddr + BlockRange.second};
- auto Probes = ProbeMap.find(BlockAddrRange.first, BlockAddrRange.second);
- YamlBB.PseudoProbes = writeBlockProbes(Probes, InlineTreeNodeId);
+ auto [Start, End] = BB->getInputAddressRange();
+ Start += FuncAddr;
+ End += FuncAddr;
+ BlockProbeCtx Ctx;
+ for (const MCDecodedPseudoProbe &Probe : ProbeMap.find(Start, End))
+ Ctx.addBlockProbe(InlineTreeNodeId, Probe, Probe.getAddress() - Start);
+ Ctx.finalize(YamlBB);
}
YamlBF.Blocks.emplace_back(YamlBB);
@@ -382,7 +395,7 @@ std::error_code YAMLProfileWriter::writeProfile(const RewriteInstance &RI) {
StringSet<> EventNames = RI.getProfileReader()->getEventNames();
if (!EventNames.empty()) {
std::string Sep;
- for (const StringMapEntry<std::nullopt_t> &EventEntry : EventNames) {
+ for (const StringMapEntry<EmptyStringSetTag> &EventEntry : EventNames) {
BP.Header.EventNames += Sep + EventEntry.first().str();
Sep = ",";
}
diff --git a/bolt/lib/Rewrite/BinaryPassManager.cpp b/bolt/lib/Rewrite/BinaryPassManager.cpp
index 782137e..85f23dc 100644
--- a/bolt/lib/Rewrite/BinaryPassManager.cpp
+++ b/bolt/lib/Rewrite/BinaryPassManager.cpp
@@ -7,7 +7,7 @@
//===----------------------------------------------------------------------===//
#include "bolt/Rewrite/BinaryPassManager.h"
-#include "bolt/Passes/ADRRelaxationPass.h"
+#include "bolt/Passes/AArch64RelaxationPass.h"
#include "bolt/Passes/Aligner.h"
#include "bolt/Passes/AllocCombiner.h"
#include "bolt/Passes/AsmDump.h"
@@ -19,15 +19,15 @@
#include "bolt/Passes/IdenticalCodeFolding.h"
#include "bolt/Passes/IndirectCallPromotion.h"
#include "bolt/Passes/Inliner.h"
-#include "bolt/Passes/InsertNegateRAStatePass.h"
#include "bolt/Passes/Instrumentation.h"
#include "bolt/Passes/JTFootprintReduction.h"
#include "bolt/Passes/LongJmp.h"
#include "bolt/Passes/LoopInversionPass.h"
#include "bolt/Passes/MCF.h"
-#include "bolt/Passes/MarkRAStates.h"
#include "bolt/Passes/PLTCall.h"
#include "bolt/Passes/PatchEntries.h"
+#include "bolt/Passes/PointerAuthCFIAnalyzer.h"
+#include "bolt/Passes/PointerAuthCFIFixup.h"
#include "bolt/Passes/ProfileQualityStats.h"
#include "bolt/Passes/RegReAssign.h"
#include "bolt/Passes/ReorderData.h"
@@ -129,10 +129,19 @@ static cl::opt<bool> PrintJTFootprintReduction(
cl::desc("print function after jt-footprint-reduction pass"), cl::Hidden,
cl::cat(BoltOptCategory));
-static cl::opt<bool>
- PrintAdrRelaxation("print-adr-relaxation",
- cl::desc("print functions after ADR Relaxation pass"),
- cl::Hidden, cl::cat(BoltOptCategory));
+static cl::opt<bool> PrintAArch64Relaxation(
+ "print-adr-ldr-relaxation",
+ cl::desc("print functions after ADR/LDR Relaxation pass"), cl::Hidden,
+ cl::cat(BoltOptCategory));
+
+cl::opt<bool> PrintPAuthCFIAnalyzer(
+ "print-pointer-auth-cfi-analyzer",
+ cl::desc("print functions after PointerAuthCFIAnalyzer pass"), cl::Hidden,
+ cl::cat(BoltOptCategory));
+static cl::opt<bool> PrintPAuthCFIFixup(
+ "print-pointer-auth-cfi-fixup",
+ cl::desc("print functions after PointerAuthCFIFixup pass"), cl::Hidden,
+ cl::cat(BoltOptCategory));
static cl::opt<bool>
PrintLongJmp("print-longjmp",
@@ -362,7 +371,8 @@ Error BinaryFunctionPassManager::runAllPasses(BinaryContext &BC) {
BinaryFunctionPassManager Manager(BC);
if (BC.isAArch64())
- Manager.registerPass(std::make_unique<MarkRAStates>());
+ Manager.registerPass(
+ std::make_unique<PointerAuthCFIAnalyzer>(PrintPAuthCFIAnalyzer));
Manager.registerPass(
std::make_unique<EstimateEdgeCounts>(PrintEstimateEdgeCounts));
@@ -517,14 +527,15 @@ Error BinaryFunctionPassManager::runAllPasses(BinaryContext &BC) {
if (BC.isAArch64()) {
Manager.registerPass(
- std::make_unique<ADRRelaxationPass>(PrintAdrRelaxation));
+ std::make_unique<AArch64RelaxationPass>(PrintAArch64Relaxation));
// Tighten branches according to offset differences between branch and
// targets. No extra instructions after this pass, otherwise we may have
// relocations out of range and crash during linking.
Manager.registerPass(std::make_unique<LongJmpPass>(PrintLongJmp));
- Manager.registerPass(std::make_unique<InsertNegateRAState>());
+ Manager.registerPass(
+ std::make_unique<PointerAuthCFIFixup>(PrintPAuthCFIFixup));
}
// This pass should always run last.*
diff --git a/bolt/lib/Rewrite/CMakeLists.txt b/bolt/lib/Rewrite/CMakeLists.txt
index 5b15edc..bc1b2ed 100644
--- a/bolt/lib/Rewrite/CMakeLists.txt
+++ b/bolt/lib/Rewrite/CMakeLists.txt
@@ -24,6 +24,7 @@ add_llvm_library(LLVMBOLTRewrite
BuildIDRewriter.cpp
PseudoProbeRewriter.cpp
RewriteInstance.cpp
+ RSeqRewriter.cpp
SDTRewriter.cpp
GNUPropertyRewriter.cpp
diff --git a/bolt/lib/Rewrite/DWARFRewriter.cpp b/bolt/lib/Rewrite/DWARFRewriter.cpp
index 5e3fa93..816acb2 100644
--- a/bolt/lib/Rewrite/DWARFRewriter.cpp
+++ b/bolt/lib/Rewrite/DWARFRewriter.cpp
@@ -1723,7 +1723,76 @@ StringRef getSectionName(const SectionRef &Section) {
return Name;
}
-// Extracts an appropriate slice if input is DWP.
+/// Extracts the slice of the .debug_str.dwo section for a given CU from a DWP
+/// file, based on the .debug_str_offsets.dwo section. This helps address DWO
+/// bloat that may occur after updates.
+///
+/// A slice of .debug_str.dwo may be composed of several non-contiguous
+/// fragments. These non-contiguous string views will be written out
+/// sequentially, avoiding the copying overhead caused by assembling them.
+///
+/// The .debug_str_offsets for the first CU often does not need to be updated,
+/// so copying is only performed when .debug_str_offsets requires updating.
+static void UpdateStrAndStrOffsets(StringRef StrDWOContent,
+ StringRef StrOffsetsContent,
+ SmallVectorImpl<StringRef> &StrDWOOutData,
+ std::string &StrOffsetsOutData,
+ unsigned DwarfVersion, bool IsLittleEndian) {
+ const llvm::endianness Endian =
+ IsLittleEndian ? llvm::endianness::little : llvm::endianness::big;
+ const uint64_t HeaderOffset = (DwarfVersion >= 5) ? 8 : 0;
+ constexpr size_t SizeOfOffset = sizeof(int32_t);
+ const uint64_t NumOffsets =
+ (StrOffsetsContent.size() - HeaderOffset) / SizeOfOffset;
+
+ DataExtractor Extractor(StrOffsetsContent, IsLittleEndian, 0);
+ uint64_t ExtractionOffset = HeaderOffset;
+
+ using StringFragment = DWARFUnitIndex::Entry::SectionContribution;
+ const auto getStringLength = [](StringRef Content,
+ uint64_t Offset) -> uint64_t {
+ size_t NullPos = Content.find('\0', Offset);
+ return (NullPos != StringRef::npos) ? (NullPos - Offset + 1) : 0;
+ };
+ const auto isContiguous = [](const StringFragment &Fragment,
+ uint64_t NextOffset) -> bool {
+ return NextOffset == Fragment.getOffset() + Fragment.getLength();
+ };
+ std::optional<StringFragment> CurrentFragment;
+ uint64_t AccumulatedStrLen = 0;
+ for (uint64_t I = 0; I < NumOffsets; ++I) {
+ const uint64_t StrOffset = Extractor.getU32(&ExtractionOffset);
+ const uint64_t StringLength = getStringLength(StrDWOContent, StrOffset);
+ if (!CurrentFragment) {
+ // First init.
+ CurrentFragment = StringFragment(StrOffset, StringLength);
+ } else {
+ if (isContiguous(*CurrentFragment, StrOffset)) {
+ // Expanding the current fragment.
+ CurrentFragment->setLength(CurrentFragment->getLength() + StringLength);
+ } else {
+ // Saving the current fragment and start a new one.
+ StrDWOOutData.push_back(StrDWOContent.substr(
+ CurrentFragment->getOffset(), CurrentFragment->getLength()));
+ CurrentFragment = StringFragment(StrOffset, StringLength);
+ }
+ }
+ if (AccumulatedStrLen != StrOffset) {
+ // Updating str offsets.
+ if (StrOffsetsOutData.empty())
+ StrOffsetsOutData = StrOffsetsContent.str();
+ llvm::support::endian::write32(
+ &StrOffsetsOutData[HeaderOffset + I * SizeOfOffset],
+ static_cast<uint32_t>(AccumulatedStrLen), Endian);
+ }
+ AccumulatedStrLen += StringLength;
+ }
+ if (CurrentFragment)
+ StrDWOOutData.push_back(StrDWOContent.substr(CurrentFragment->getOffset(),
+ CurrentFragment->getLength()));
+}
+
+// Exctracts an appropriate slice if input is DWP.
// Applies patches or overwrites the section.
std::optional<StringRef> updateDebugData(
DWARFContext &DWCtx, StringRef SectionName, StringRef SectionContents,
@@ -1772,6 +1841,8 @@ std::optional<StringRef> updateDebugData(
errs() << "BOLT-WARNING: unsupported debug section: " << SectionName
<< "\n";
if (StrWriter.isInitialized()) {
+ if (CUDWOEntry)
+ return StrWriter.getBufferStr();
OutputBuffer = StrWriter.releaseBuffer();
return StringRef(reinterpret_cast<const char *>(OutputBuffer->data()),
OutputBuffer->size());
@@ -1786,6 +1857,8 @@ std::optional<StringRef> updateDebugData(
}
case DWARFSectionKind::DW_SECT_STR_OFFSETS: {
if (StrOffstsWriter.isFinalized()) {
+ if (CUDWOEntry)
+ return StrOffstsWriter.getBufferStr();
OutputBuffer = StrOffstsWriter.releaseBuffer();
return StringRef(reinterpret_cast<const char *>(OutputBuffer->data()),
OutputBuffer->size());
@@ -1888,6 +1961,10 @@ void DWARFRewriter::writeDWOFiles(
}
}
+ StringRef StrDWOContent;
+ StringRef StrOffsetsContent;
+ llvm::SmallVector<StringRef, 3> StrDWOOutData;
+ std::string StrOffsetsOutData;
for (const SectionRef &Section : File->sections()) {
std::unique_ptr<DebugBufferVector> OutputData;
StringRef SectionName = getSectionName(Section);
@@ -1895,11 +1972,50 @@ void DWARFRewriter::writeDWOFiles(
continue;
Expected<StringRef> ContentsExp = Section.getContents();
assert(ContentsExp && "Invalid contents.");
+ if (IsDWP && SectionName == "debug_str.dwo") {
+ if (StrWriter.isInitialized())
+ StrDWOContent = StrWriter.getBufferStr();
+ else
+ StrDWOContent = *ContentsExp;
+ continue;
+ }
if (std::optional<StringRef> OutData = updateDebugData(
(*DWOCU)->getContext(), SectionName, *ContentsExp, KnownSections,
*Streamer, *this, CUDWOEntry, DWOId, OutputData, RangeListssWriter,
- LocWriter, StrOffstsWriter, StrWriter, OverridenSections))
+ LocWriter, StrOffstsWriter, StrWriter, OverridenSections)) {
+ if (IsDWP && SectionName == "debug_str_offsets.dwo") {
+ StrOffsetsContent = *OutData;
+ continue;
+ }
Streamer->emitBytes(*OutData);
+ }
+ }
+
+ if (IsDWP) {
+ // Handling both .debug_str.dwo and .debug_str_offsets.dwo concurrently. In
+ // the original DWP, .debug_str is a deduplicated global table, and the
+ // .debug_str.dwo slice for a single CU needs to be extracted according to
+ // .debug_str_offsets.dwo.
+ UpdateStrAndStrOffsets(StrDWOContent, StrOffsetsContent, StrDWOOutData,
+ StrOffsetsOutData, CU.getVersion(),
+ (*DWOCU)->getContext().isLittleEndian());
+ auto SectionIter = KnownSections.find("debug_str.dwo");
+ if (SectionIter != KnownSections.end()) {
+ Streamer->switchSection(SectionIter->second.first);
+ for (size_t i = 0; i < StrDWOOutData.size(); ++i) {
+ StringRef OutData = StrDWOOutData[i];
+ if (!OutData.empty())
+ Streamer->emitBytes(OutData);
+ }
+ }
+ SectionIter = KnownSections.find("debug_str_offsets.dwo");
+ if (SectionIter != KnownSections.end()) {
+ Streamer->switchSection(SectionIter->second.first);
+ if (!StrOffsetsOutData.empty())
+ Streamer->emitBytes(StrOffsetsOutData);
+ else
+ Streamer->emitBytes(StrOffsetsContent);
+ }
}
Streamer->finish();
TempOut->keep();
diff --git a/bolt/lib/Rewrite/RSeqRewriter.cpp b/bolt/lib/Rewrite/RSeqRewriter.cpp
new file mode 100644
index 0000000..46bce66
--- /dev/null
+++ b/bolt/lib/Rewrite/RSeqRewriter.cpp
@@ -0,0 +1,72 @@
+//===- bolt/Rewrite/RSeqRewriter.cpp --------------------------------------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+//
+// Basic support for restartable sequences used by tcmalloc. Prevent critical
+// section overrides by ignoring optimizations in containing functions.
+//
+// References:
+// * https://google.github.io/tcmalloc/rseq.html
+// * tcmalloc/internal/percpu_rseq_x86_64.S
+//
+//===----------------------------------------------------------------------===//
+
+#include "bolt/Core/BinaryFunction.h"
+#include "bolt/Rewrite/MetadataRewriter.h"
+#include "bolt/Rewrite/MetadataRewriters.h"
+#include "llvm/Support/Errc.h"
+
+using namespace llvm;
+using namespace bolt;
+
+namespace {
+
+class RSeqRewriter final : public MetadataRewriter {
+public:
+ RSeqRewriter(StringRef Name, BinaryContext &BC)
+ : MetadataRewriter(Name, BC) {}
+
+ Error preCFGInitializer() override {
+ for (const BinarySection &Section : BC.allocatableSections()) {
+ if (Section.getName() != "__rseq_cs")
+ continue;
+
+ auto handleRelocation = [&](const Relocation &Rel, bool IsDynamic) {
+ BinaryFunction *BF = nullptr;
+ if (Rel.Symbol)
+ BF = BC.getFunctionForSymbol(Rel.Symbol);
+ else if (Relocation::isRelative(Rel.Type))
+ BF = BC.getBinaryFunctionContainingAddress(Rel.Addend);
+
+ if (!BF) {
+ BC.errs() << "BOLT-WARNING: no function found matching "
+ << (IsDynamic ? "dynamic " : "")
+ << "relocation in __rseq_cs\n";
+ } else if (!BF->isIgnored()) {
+ BC.outs() << "BOLT-INFO: restartable sequence reference detected in "
+ << *BF << ". Function will not be optimized\n";
+ BF->setIgnored();
+ }
+ };
+
+ for (const Relocation &Rel : Section.dynamicRelocations())
+ handleRelocation(Rel, /*IsDynamic*/ true);
+
+ for (const Relocation &Rel : Section.relocations())
+ handleRelocation(Rel, /*IsDynamic*/ false);
+ }
+
+ return Error::success();
+ }
+};
+
+} // namespace
+
+std::unique_ptr<MetadataRewriter>
+llvm::bolt::createRSeqRewriter(BinaryContext &BC) {
+ return std::make_unique<RSeqRewriter>("rseq-cs-rewriter", BC);
+}
diff --git a/bolt/lib/Rewrite/RewriteInstance.cpp b/bolt/lib/Rewrite/RewriteInstance.cpp
index 77e5688..0e14500 100644
--- a/bolt/lib/Rewrite/RewriteInstance.cpp
+++ b/bolt/lib/Rewrite/RewriteInstance.cpp
@@ -80,8 +80,11 @@ namespace opts {
extern cl::list<std::string> HotTextMoveSections;
extern cl::opt<bool> Hugify;
extern cl::opt<bool> Instrument;
+extern cl::opt<uint32_t> InstrumentationSleepTime;
extern cl::opt<bool> KeepNops;
extern cl::opt<bool> Lite;
+extern cl::list<std::string> PrintOnly;
+extern cl::opt<std::string> PrintOnlyFile;
extern cl::list<std::string> ReorderData;
extern cl::opt<bolt::ReorderFunctions::ReorderType> ReorderFunctions;
extern cl::opt<bool> TerminalHLT;
@@ -292,10 +295,31 @@ cl::bits<GadgetScannerKind> GadgetScannersToRun(
clEnumValN(GS_ALL, "all", "All implemented scanners")),
cl::ZeroOrMore, cl::CommaSeparated, cl::cat(BinaryAnalysisCategory));
+// Primary targets for hooking runtime library initialization hooking
+// with fallback to next item in case if current item is not available
+// in the input binary.
+enum RuntimeLibInitHookTarget : char {
+ RLIH_ENTRY_POINT = 0, /// Use ELF Header Entry Point
+ RLIH_INIT = 1, /// Use ELF DT_INIT entry
+ RLIH_INIT_ARRAY = 2, /// Use ELF .init_array entry
+};
+
+cl::opt<RuntimeLibInitHookTarget> RuntimeLibInitHook(
+ "runtime-lib-init-hook",
+ cl::desc("Primary target for hooking runtime library initialization, used "
+ "in fallback order of availabiliy in input binary (entry_point -> "
+ "init -> init_array) (default: entry_point)"),
+ cl::Hidden, cl::init(RLIH_ENTRY_POINT),
+ cl::values(clEnumValN(RLIH_ENTRY_POINT, "entry_point",
+ "use ELF Header Entry Point"),
+ clEnumValN(RLIH_INIT, "init", "use ELF DT_INIT entry"),
+ clEnumValN(RLIH_INIT_ARRAY, "init_array",
+ "use ELF .init_array entry")),
+ cl::ZeroOrMore, cl::cat(BoltOptCategory));
+
} // namespace opts
// FIXME: implement a better way to mark sections for replacement.
-constexpr const char *RewriteInstance::SectionsToOverwrite[];
std::vector<std::string> RewriteInstance::DebugSectionsToOverwrite = {
".debug_abbrev", ".debug_aranges", ".debug_line", ".debug_line_str",
".debug_loc", ".debug_loclists", ".debug_ranges", ".debug_rnglists",
@@ -731,6 +755,8 @@ Error RewriteInstance::run() {
<< "\n";
BC->outs() << "BOLT-INFO: BOLT version: " << BoltRevision << "\n";
+ selectFunctionsToPrint();
+
if (Error E = discoverStorage())
return E;
if (Error E = readSpecialSections())
@@ -738,9 +764,12 @@ Error RewriteInstance::run() {
adjustCommandLineOptions();
discoverFileObjects();
- if (opts::Instrument && !BC->IsStaticExecutable)
+ if (opts::Instrument && !BC->IsStaticExecutable) {
+ if (Error E = discoverRtInitAddress())
+ return E;
if (Error E = discoverRtFiniAddress())
return E;
+ }
preprocessProfileData();
@@ -782,8 +811,12 @@ Error RewriteInstance::run() {
updateMetadata();
- if (opts::Instrument && !BC->IsStaticExecutable)
- updateRtFiniReloc();
+ if (opts::Instrument && !BC->IsStaticExecutable) {
+ if (Error E = updateRtInitReloc())
+ return E;
+ if (Error E = updateRtFiniReloc())
+ return E;
+ }
if (opts::OutputFilename == "/dev/null") {
BC->outs() << "BOLT-INFO: skipping writing final binary to disk\n";
@@ -1408,6 +1441,65 @@ void RewriteInstance::discoverBOLTReserved() {
NextAvailableAddress = BC->BOLTReserved.start();
}
+Error RewriteInstance::discoverRtInitAddress() {
+ if (BC->HasInterpHeader && opts::RuntimeLibInitHook == opts::RLIH_ENTRY_POINT)
+ return Error::success();
+
+ // Use DT_INIT if it's available.
+ if (BC->InitAddress && opts::RuntimeLibInitHook <= opts::RLIH_INIT) {
+ BC->StartFunctionAddress = BC->InitAddress;
+ return Error::success();
+ }
+
+ if (!BC->InitArrayAddress || !BC->InitArraySize) {
+ return createStringError(std::errc::not_supported,
+ "Instrumentation of shared library needs either "
+ "DT_INIT or DT_INIT_ARRAY");
+ }
+
+ if (*BC->InitArraySize < BC->AsmInfo->getCodePointerSize()) {
+ return createStringError(std::errc::not_supported,
+ "Need at least 1 DT_INIT_ARRAY slot");
+ }
+
+ ErrorOr<BinarySection &> InitArraySection =
+ BC->getSectionForAddress(*BC->InitArrayAddress);
+ if (auto EC = InitArraySection.getError())
+ return errorCodeToError(EC);
+
+ if (InitArraySection->getAddress() != *BC->InitArrayAddress) {
+ return createStringError(std::errc::not_supported,
+ "Inconsistent address of .init_array section");
+ }
+
+ if (const Relocation *Reloc = InitArraySection->getDynamicRelocationAt(0)) {
+ if (Reloc->isRelative()) {
+ BC->StartFunctionAddress = Reloc->Addend;
+ } else {
+ MCSymbol *Sym = Reloc->Symbol;
+ if (!Sym)
+ return createStringError(
+ std::errc::not_supported,
+ "Failed to locate symbol for 0 entry of .init_array");
+ const BinaryFunction *BF = BC->getFunctionForSymbol(Sym);
+ if (!BF)
+ return createStringError(
+ std::errc::not_supported,
+ "Failed to locate binary function for 0 entry of .init_array");
+ BC->StartFunctionAddress = BF->getAddress() + Reloc->Addend;
+ }
+ return Error::success();
+ }
+
+ if (const Relocation *Reloc = InitArraySection->getRelocationAt(0)) {
+ BC->StartFunctionAddress = Reloc->Value;
+ return Error::success();
+ }
+
+ return createStringError(std::errc::not_supported,
+ "No relocation for first DT_INIT_ARRAY slot");
+}
+
Error RewriteInstance::discoverRtFiniAddress() {
// Use DT_FINI if it's available.
if (BC->FiniAddress) {
@@ -1416,6 +1508,9 @@ Error RewriteInstance::discoverRtFiniAddress() {
}
if (!BC->FiniArrayAddress || !BC->FiniArraySize) {
+ // Missing fini hooks are allowed when instrumentation-sleep-time is in use.
+ if (opts::InstrumentationSleepTime > 0)
+ return Error::success();
return createStringError(
std::errc::not_supported,
"Instrumentation needs either DT_FINI or DT_FINI_ARRAY");
@@ -1431,6 +1526,11 @@ Error RewriteInstance::discoverRtFiniAddress() {
if (auto EC = FiniArraySection.getError())
return errorCodeToError(EC);
+ if (FiniArraySection->getAddress() != *BC->FiniArrayAddress) {
+ return createStringError(std::errc::not_supported,
+ "Inconsistent address of .fini_array section");
+ }
+
if (const Relocation *Reloc = FiniArraySection->getDynamicRelocationAt(0)) {
BC->FiniFunctionAddress = Reloc->Addend;
return Error::success();
@@ -1445,26 +1545,99 @@ Error RewriteInstance::discoverRtFiniAddress() {
"No relocation for first DT_FINI_ARRAY slot");
}
-void RewriteInstance::updateRtFiniReloc() {
+Error RewriteInstance::updateRtInitReloc() {
+ if (BC->HasInterpHeader && opts::RuntimeLibInitHook == opts::RLIH_ENTRY_POINT)
+ return Error::success();
+
+ // Updating DT_INIT is handled by patchELFDynamic.
+ if (BC->InitAddress && opts::RuntimeLibInitHook <= opts::RLIH_INIT)
+ return Error::success();
+
+ const RuntimeLibrary *RT = BC->getRuntimeLibrary();
+ if (!RT || !RT->getRuntimeStartAddress())
+ return Error::success();
+
+ if (!BC->InitArrayAddress)
+ return Error::success();
+
+ if (!BC->InitArrayAddress || !BC->InitArraySize)
+ return createStringError(std::errc::not_supported,
+ "inconsistent .init_array state");
+
+ ErrorOr<BinarySection &> InitArraySection =
+ BC->getSectionForAddress(*BC->InitArrayAddress);
+ if (!InitArraySection)
+ return createStringError(std::errc::not_supported, ".init_array removed");
+
+ if (std::optional<Relocation> Reloc =
+ InitArraySection->takeDynamicRelocationAt(0)) {
+ if (Reloc->isRelative()) {
+ if (Reloc->Addend != BC->StartFunctionAddress)
+ return createStringError(std::errc::not_supported,
+ "inconsistent .init_array dynamic relocation");
+ Reloc->Addend = RT->getRuntimeStartAddress();
+ InitArraySection->addDynamicRelocation(*Reloc);
+ } else {
+ MCSymbol *Sym = Reloc->Symbol;
+ if (!Sym)
+ return createStringError(
+ std::errc::not_supported,
+ "Failed to locate symbol for 0 entry of .init_array");
+ const BinaryFunction *BF = BC->getFunctionForSymbol(Sym);
+ if (!BF)
+ return createStringError(
+ std::errc::not_supported,
+ "Failed to locate binary function for 0 entry of .init_array");
+ if (BF->getAddress() + Reloc->Addend != BC->StartFunctionAddress)
+ return createStringError(std::errc::not_supported,
+ "inconsistent .init_array dynamic relocation");
+ InitArraySection->addDynamicRelocation(Relocation{
+ /*Offset*/ 0, /*Symbol*/ nullptr, /*Type*/ Relocation::getAbs64(),
+ /*Addend*/ RT->getRuntimeStartAddress(), /*Value*/ 0});
+ }
+ }
+ // Update the static relocation by adding a pending relocation which will get
+ // patched when flushPendingRelocations is called in rewriteFile. Note that
+ // flushPendingRelocations will calculate the value to patch as
+ // "Symbol + Addend". Since we don't have a symbol, just set the addend to the
+ // desired value.
+ InitArraySection->addPendingRelocation(Relocation{
+ /*Offset*/ 0, /*Symbol*/ nullptr, /*Type*/ Relocation::getAbs64(),
+ /*Addend*/ RT->getRuntimeStartAddress(), /*Value*/ 0});
+ BC->outs()
+ << "BOLT-INFO: runtime library initialization was hooked via .init_array "
+ "entry, set to 0x"
+ << Twine::utohexstr(RT->getRuntimeStartAddress()) << "\n";
+ return Error::success();
+}
+
+Error RewriteInstance::updateRtFiniReloc() {
// Updating DT_FINI is handled by patchELFDynamic.
if (BC->FiniAddress)
- return;
+ return Error::success();
const RuntimeLibrary *RT = BC->getRuntimeLibrary();
if (!RT || !RT->getRuntimeFiniAddress())
- return;
+ return Error::success();
- assert(BC->FiniArrayAddress && BC->FiniArraySize &&
- "inconsistent .fini_array state");
+ if (!BC->FiniArrayAddress || !BC->FiniArraySize) {
+ // Missing fini hooks are allowed when instrumentation-sleep-time is in use.
+ if (opts::InstrumentationSleepTime > 0)
+ return Error::success();
+ return createStringError(std::errc::not_supported,
+ "inconsistent .fini_array state");
+ }
ErrorOr<BinarySection &> FiniArraySection =
BC->getSectionForAddress(*BC->FiniArrayAddress);
- assert(FiniArraySection && ".fini_array removed");
+ if (!FiniArraySection)
+ return createStringError(std::errc::not_supported, ".fini_array removed");
if (std::optional<Relocation> Reloc =
FiniArraySection->takeDynamicRelocationAt(0)) {
- assert(Reloc->Addend == BC->FiniFunctionAddress &&
- "inconsistent .fini_array dynamic relocation");
+ if (Reloc->Addend != BC->FiniFunctionAddress)
+ return createStringError(std::errc::not_supported,
+ "inconsistent .fini_array dynamic relocation");
Reloc->Addend = RT->getRuntimeFiniAddress();
FiniArraySection->addDynamicRelocation(*Reloc);
}
@@ -1477,6 +1650,10 @@ void RewriteInstance::updateRtFiniReloc() {
FiniArraySection->addPendingRelocation(Relocation{
/*Offset*/ 0, /*Symbol*/ nullptr, /*Type*/ Relocation::getAbs64(),
/*Addend*/ RT->getRuntimeFiniAddress(), /*Value*/ 0});
+ BC->outs() << "BOLT-INFO: runtime library finalization was hooked via "
+ ".fini_array entry, set to 0x"
+ << Twine::utohexstr(RT->getRuntimeFiniAddress()) << "\n";
+ return Error::success();
}
void RewriteInstance::registerFragments() {
@@ -2075,7 +2252,7 @@ Error RewriteInstance::readSpecialSections() {
if (BC->IsStripped && !opts::AllowStripped) {
BC->errs()
<< "BOLT-ERROR: stripped binaries are not supported. If you know "
- "what you're doing, use --allow-stripped to proceed";
+ "what you're doing, use --allow-stripped to proceed\n";
exit(1);
}
@@ -2175,6 +2352,14 @@ void RewriteInstance::adjustCommandLineOptions() {
exit(1);
}
+ if (opts::Instrument && opts::RuntimeLibInitHook == opts::RLIH_ENTRY_POINT &&
+ !BC->HasInterpHeader) {
+ BC->errs()
+ << "BOLT-WARNING: adjusted runtime-lib-init-hook to 'init' due to "
+ "absence of INTERP header\n";
+ opts::RuntimeLibInitHook = opts::RLIH_INIT;
+ }
+
if (opts::HotText && opts::HotTextMoveSections.getNumOccurrences() == 0) {
opts::HotTextMoveSections.addValue(".stub");
opts::HotTextMoveSections.addValue(".mover");
@@ -2955,8 +3140,10 @@ void RewriteInstance::handleRelocation(const SectionRef &RelocatedSection,
// if-condition above) so we're handling a relocation from a function
// to itself. RISC-V uses such relocations for branches, for example.
// These should not be registered as externally references offsets.
- if (!ContainingBF)
- ReferencedBF->registerReferencedOffset(RefFunctionOffset);
+ if (!ContainingBF && !ReferencedBF->isInConstantIsland(Address)) {
+ ReferencedBF->registerInternalRefDataRelocation(RefFunctionOffset,
+ Rel.getOffset());
+ }
}
if (opts::Verbosity > 1 &&
BinarySection(*BC, RelocatedSection).isWritable())
@@ -3099,17 +3286,22 @@ static BinaryFunction *getInitFunctionIfStaticBinary(BinaryContext &BC) {
return BC.getBinaryFunctionAtAddress(BD->getAddress());
}
+static void populateFunctionNames(cl::opt<std::string> &FunctionNamesFile,
+ cl::list<std::string> &FunctionNames) {
+ if (FunctionNamesFile.empty())
+ return;
+ std::ifstream FuncsFile(FunctionNamesFile, std::ios::in);
+ std::string FuncName;
+ while (std::getline(FuncsFile, FuncName))
+ FunctionNames.push_back(FuncName);
+}
+
+void RewriteInstance::selectFunctionsToPrint() {
+ populateFunctionNames(opts::PrintOnlyFile, opts::PrintOnly);
+}
+
void RewriteInstance::selectFunctionsToProcess() {
// Extend the list of functions to process or skip from a file.
- auto populateFunctionNames = [](cl::opt<std::string> &FunctionNamesFile,
- cl::list<std::string> &FunctionNames) {
- if (FunctionNamesFile.empty())
- return;
- std::ifstream FuncsFile(FunctionNamesFile, std::ios::in);
- std::string FuncName;
- while (std::getline(FuncsFile, FuncName))
- FunctionNames.push_back(FuncName);
- };
populateFunctionNames(opts::FunctionNamesFile, opts::ForceFunctionNames);
populateFunctionNames(opts::SkipFunctionNamesFile, opts::SkipFunctionNames);
populateFunctionNames(opts::FunctionNamesFileNR, opts::ForceFunctionNamesNR);
@@ -3345,6 +3537,8 @@ void RewriteInstance::initializeMetadataManager() {
MetadataManager.registerRewriter(createPseudoProbeRewriter(*BC));
+ MetadataManager.registerRewriter(createRSeqRewriter(*BC));
+
MetadataManager.registerRewriter(createSDTRewriter(*BC));
MetadataManager.registerRewriter(createGNUPropertyRewriter(*BC));
@@ -3495,6 +3689,7 @@ void RewriteInstance::disassembleFunctions() {
if (!shouldDisassemble(Function))
continue;
+ Function.validateInternalBranches();
Function.postProcessEntryPoints();
Function.postProcessJumpTables();
}
@@ -4837,9 +5032,14 @@ void RewriteInstance::patchELFSectionHeaderTable(ELFObjectFile<ELFT> *File) {
ELFEhdrTy NewEhdr = Obj.getHeader();
if (BC->HasRelocations) {
- if (RuntimeLibrary *RtLibrary = BC->getRuntimeLibrary())
+ RuntimeLibrary *RtLibrary = BC->getRuntimeLibrary();
+ if (RtLibrary && opts::RuntimeLibInitHook == opts::RLIH_ENTRY_POINT) {
NewEhdr.e_entry = RtLibrary->getRuntimeStartAddress();
- else
+ BC->outs()
+ << "BOLT-INFO: runtime library initialization was hooked via ELF "
+ "Header Entry Point, set to 0x"
+ << Twine::utohexstr(NewEhdr.e_entry) << "\n";
+ } else
NewEhdr.e_entry = getNewFunctionAddress(NewEhdr.e_entry);
assert((NewEhdr.e_entry || !Obj.getHeader().e_entry) &&
"cannot find new address for entry point");
@@ -5680,14 +5880,23 @@ void RewriteInstance::patchELFDynamic(ELFObjectFile<ELFT> *File) {
}
RuntimeLibrary *RtLibrary = BC->getRuntimeLibrary();
if (RtLibrary && Dyn.getTag() == ELF::DT_FINI) {
- if (uint64_t Addr = RtLibrary->getRuntimeFiniAddress())
+ if (uint64_t Addr = RtLibrary->getRuntimeFiniAddress()) {
NewDE.d_un.d_ptr = Addr;
+ BC->outs()
+ << "BOLT-INFO: runtime library finalization was hooked via "
+ "DT_FINI, set to 0x"
+ << Twine::utohexstr(Addr) << "\n";
+ }
}
- if (RtLibrary && Dyn.getTag() == ELF::DT_INIT && !BC->HasInterpHeader) {
+ if (RtLibrary && Dyn.getTag() == ELF::DT_INIT &&
+ (!BC->HasInterpHeader ||
+ opts::RuntimeLibInitHook == opts::RLIH_INIT)) {
if (auto Addr = RtLibrary->getRuntimeStartAddress()) {
- LLVM_DEBUG(dbgs() << "BOLT-DEBUG: Set DT_INIT to 0x"
- << Twine::utohexstr(Addr) << '\n');
NewDE.d_un.d_ptr = Addr;
+ BC->outs()
+ << "BOLT-INFO: runtime library initialization was hooked via "
+ "DT_INIT, set to 0x"
+ << Twine::utohexstr(Addr) << "\n";
}
}
break;
@@ -5755,10 +5964,13 @@ Error RewriteInstance::readELFDynamic(ELFObjectFile<ELFT> *File) {
for (const Elf_Dyn &Dyn : DynamicEntries) {
switch (Dyn.d_tag) {
case ELF::DT_INIT:
- if (!BC->HasInterpHeader) {
- LLVM_DEBUG(dbgs() << "BOLT-DEBUG: Set start function address\n");
- BC->StartFunctionAddress = Dyn.getPtr();
- }
+ BC->InitAddress = Dyn.getPtr();
+ break;
+ case ELF::DT_INIT_ARRAY:
+ BC->InitArrayAddress = Dyn.getPtr();
+ break;
+ case ELF::DT_INIT_ARRAYSZ:
+ BC->InitArraySize = Dyn.getPtr();
break;
case ELF::DT_FINI:
BC->FiniAddress = Dyn.getPtr();
diff --git a/bolt/lib/Target/AArch64/AArch64MCPlusBuilder.cpp b/bolt/lib/Target/AArch64/AArch64MCPlusBuilder.cpp
index 7769162..5881d3f 100644
--- a/bolt/lib/Target/AArch64/AArch64MCPlusBuilder.cpp
+++ b/bolt/lib/Target/AArch64/AArch64MCPlusBuilder.cpp
@@ -142,6 +142,7 @@ static InstructionListType createIncMemory(MCPhysReg RegTo, MCPhysReg RegTmp) {
atomicAdd(Insts.back(), RegTo, RegTmp);
return Insts;
}
+
class AArch64MCPlusBuilder : public MCPlusBuilder {
public:
using MCPlusBuilder::MCPlusBuilder;
@@ -163,11 +164,53 @@ public:
bool isPush(const MCInst &Inst) const override {
return isStoreToStack(Inst);
- };
+ }
bool isPop(const MCInst &Inst) const override {
return isLoadFromStack(Inst);
- };
+ }
+
+ // We look for instructions that load from stack or make stack pointer
+ // adjustment, and assume the basic block is an epilogue if and only if
+ // such instructions are present and also immediately precede the branch
+ // instruction that ends the basic block.
+ bool isEpilogue(const BinaryBasicBlock &BB) const override {
+ if (BB.succ_size())
+ return false;
+
+ bool SeenLoadFromStack = false;
+ bool SeenStackPointerAdjustment = false;
+ for (const MCInst &Instr : BB) {
+ // Skip CFI pseudo instruction.
+ if (isCFI(Instr))
+ continue;
+
+ bool IsPop = isPop(Instr);
+ // A load from stack instruction could do SP adjustment in pre-index or
+ // post-index form, which we can skip to check for epilogue recognition
+ // purpose.
+ bool IsSPAdj = (isADD(Instr) || isMOVW(Instr)) &&
+ Instr.getOperand(0).isReg() &&
+ Instr.getOperand(0).getReg() == AArch64::SP;
+ SeenLoadFromStack |= IsPop;
+ SeenStackPointerAdjustment |= IsSPAdj;
+
+ if (!SeenLoadFromStack && !SeenStackPointerAdjustment)
+ continue;
+ if (IsPop || IsSPAdj || isPAuthOnLR(Instr))
+ continue;
+ if (isReturn(Instr))
+ return true;
+ if (isBranch(Instr))
+ break;
+
+ // Any previously seen load from stack or stack adjustment instruction
+ // is definitely not part of epilogue code sequence, so reset these two.
+ SeenLoadFromStack = false;
+ SeenStackPointerAdjustment = false;
+ }
+ return SeenLoadFromStack || SeenStackPointerAdjustment;
+ }
void createCall(MCInst &Inst, const MCSymbol *Target,
MCContext *Ctx) override {
@@ -270,6 +313,33 @@ public:
Inst.getOpcode() == AArch64::RETABSPPCr;
}
+ void createMatchingAuth(const MCInst &AuthAndRet, MCInst &Auth) override {
+ Auth.clear();
+ Auth.setOperands(AuthAndRet.getOperands());
+ switch (AuthAndRet.getOpcode()) {
+ case AArch64::RETAA:
+ Auth.setOpcode(AArch64::AUTIASP);
+ break;
+ case AArch64::RETAB:
+ Auth.setOpcode(AArch64::AUTIBSP);
+ break;
+ case AArch64::RETAASPPCi:
+ Auth.setOpcode(AArch64::AUTIASPPCi);
+ break;
+ case AArch64::RETABSPPCi:
+ Auth.setOpcode(AArch64::AUTIBSPPCi);
+ break;
+ case AArch64::RETAASPPCr:
+ Auth.setOpcode(AArch64::AUTIASPPCr);
+ break;
+ case AArch64::RETABSPPCr:
+ Auth.setOpcode(AArch64::AUTIBSPPCr);
+ break;
+ default:
+ llvm_unreachable("Unhandled fused pauth-and-return instruction");
+ }
+ }
+
std::optional<MCPhysReg> getSignedReg(const MCInst &Inst) const override {
switch (Inst.getOpcode()) {
case AArch64::PACIA:
@@ -583,6 +653,14 @@ public:
return Inst.getOpcode() == AArch64::ADDXri;
}
+ bool isLDRWl(const MCInst &Inst) const override {
+ return Inst.getOpcode() == AArch64::LDRWl;
+ }
+
+ bool isLDRXl(const MCInst &Inst) const override {
+ return Inst.getOpcode() == AArch64::LDRXl;
+ }
+
MCPhysReg getADRReg(const MCInst &Inst) const {
assert((isADR(Inst) || isADRP(Inst)) && "Not an ADR instruction");
assert(MCPlus::getNumPrimeOperands(Inst) != 0 &&
@@ -602,6 +680,40 @@ public:
return materializeAddress(Target, Ctx, Reg, Addend);
}
+ InstructionListType createAdrpLdr(const MCInst &LDRInst,
+ MCContext *Ctx) const override {
+ assert((isLDRXl(LDRInst) || isLDRWl(LDRInst)) &&
+ "LDR (literal, 32 or 64-bit integer load) instruction expected");
+ assert(LDRInst.getOperand(0).isReg() &&
+ "unexpected operand in LDR instruction");
+ const MCPhysReg DataReg = LDRInst.getOperand(0).getReg();
+ const MCPhysReg AddrReg =
+ isLDRXl(LDRInst) ? DataReg
+ : (MCPhysReg)RegInfo->getMatchingSuperReg(
+ DataReg, AArch64::sub_32,
+ &RegInfo->getRegClass(AArch64::GPR64RegClassID));
+ const MCSymbol *Target = getTargetSymbol(LDRInst, 1);
+ assert(Target && "missing target symbol in LDR instruction");
+
+ InstructionListType Insts(2);
+ Insts[0].setOpcode(AArch64::ADRP);
+ Insts[0].clear();
+ Insts[0].addOperand(MCOperand::createReg(AddrReg));
+ Insts[0].addOperand(MCOperand::createImm(0));
+ setOperandToSymbolRef(Insts[0], /* OpNum */ 1, Target, 0, Ctx,
+ ELF::R_AARCH64_NONE);
+ Insts[1].setOpcode(isLDRXl(LDRInst) ? AArch64::LDRXui : AArch64::LDRWui);
+ Insts[1].clear();
+ Insts[1].addOperand(MCOperand::createReg(DataReg));
+ Insts[1].addOperand(MCOperand::createReg(AddrReg));
+ Insts[1].addOperand(MCOperand::createImm(0));
+ Insts[1].addOperand(MCOperand::createImm(0));
+ setOperandToSymbolRef(Insts[1], /* OpNum */ 2, Target, 0, Ctx,
+ isLDRXl(LDRInst) ? ELF::R_AARCH64_LDST64_ABS_LO12_NC
+ : ELF::R_AARCH64_LDST32_ABS_LO12_NC);
+ return Insts;
+ }
+
bool isTB(const MCInst &Inst) const {
return (Inst.getOpcode() == AArch64::TBNZW ||
Inst.getOpcode() == AArch64::TBNZX ||
@@ -1750,14 +1862,12 @@ public:
}
bool isNoop(const MCInst &Inst) const override {
- return Inst.getOpcode() == AArch64::HINT &&
- Inst.getOperand(0).getImm() == 0;
+ return Inst.getOpcode() == AArch64::NOP;
}
void createNoop(MCInst &Inst) const override {
- Inst.setOpcode(AArch64::HINT);
+ Inst.setOpcode(AArch64::NOP);
Inst.clear();
- Inst.addOperand(MCOperand::createImm(0));
}
bool isTrap(const MCInst &Inst) const override {
@@ -2663,6 +2773,39 @@ public:
return Insts;
}
+ void createBTI(MCInst &Inst, bool CallTarget,
+ bool JumpTarget) const override {
+ Inst.setOpcode(AArch64::HINT);
+ unsigned HintNum = getBTIHintNum(CallTarget, JumpTarget);
+ Inst.addOperand(MCOperand::createImm(HintNum));
+ }
+
+ bool isBTILandingPad(MCInst &Inst, bool CallTarget,
+ bool JumpTarget) const override {
+ unsigned HintNum = getBTIHintNum(CallTarget, JumpTarget);
+ bool IsExplicitBTI =
+ Inst.getOpcode() == AArch64::HINT && Inst.getNumOperands() == 1 &&
+ Inst.getOperand(0).isImm() && Inst.getOperand(0).getImm() == HintNum;
+
+ bool IsImplicitBTI = HintNum == 34 && isImplicitBTIC(Inst);
+ return IsExplicitBTI || IsImplicitBTI;
+ }
+
+ bool isImplicitBTIC(MCInst &Inst) const override {
+ // PACI[AB]SP are always implicitly BTI C, independently of
+ // SCTLR_EL1.BT[01].
+ return Inst.getOpcode() == AArch64::PACIASP ||
+ Inst.getOpcode() == AArch64::PACIBSP;
+ }
+
+ void updateBTIVariant(MCInst &Inst, bool CallTarget,
+ bool JumpTarget) const override {
+ assert(Inst.getOpcode() == AArch64::HINT && "Not a BTI instruction.");
+ unsigned HintNum = getBTIHintNum(CallTarget, JumpTarget);
+ Inst.clear();
+ Inst.addOperand(MCOperand::createImm(HintNum));
+ }
+
InstructionListType materializeAddress(const MCSymbol *Target, MCContext *Ctx,
MCPhysReg RegName,
int64_t Addend = 0) const override {
@@ -2762,7 +2905,7 @@ public:
BitVector WrittenRegs(RegInfo->getNumRegs());
const BitVector &SizeRegAliases = getAliases(SizeReg);
- for (auto InstIt = BB.begin(); InstIt != CallInst; ++InstIt) {
+ for (auto InstIt = CallInst; InstIt != BB.begin(); --InstIt) {
const MCInst &Inst = *InstIt;
WrittenRegs.reset();
getWrittenRegs(Inst, WrittenRegs);
diff --git a/bolt/lib/Target/X86/X86MCPlusBuilder.cpp b/bolt/lib/Target/X86/X86MCPlusBuilder.cpp
index 5fca5e8..7c24c2c 100644
--- a/bolt/lib/Target/X86/X86MCPlusBuilder.cpp
+++ b/bolt/lib/Target/X86/X86MCPlusBuilder.cpp
@@ -219,6 +219,12 @@ public:
return getPopSize(Inst) == 0 ? false : true;
}
+ bool isEpilogue(const BinaryBasicBlock &BB) const override {
+ return ::llvm::any_of(BB, [&](const MCInst &Instr) {
+ return isLeave(Instr) || isPop(Instr);
+ });
+ }
+
bool isTerminateBranch(const MCInst &Inst) const override {
return Inst.getOpcode() == X86::ENDBR32 || Inst.getOpcode() == X86::ENDBR64;
}
diff --git a/bolt/lib/Utils/CommandLineOpts.cpp b/bolt/lib/Utils/CommandLineOpts.cpp
index 5be04d2..b7eb209 100644
--- a/bolt/lib/Utils/CommandLineOpts.cpp
+++ b/bolt/lib/Utils/CommandLineOpts.cpp
@@ -245,6 +245,16 @@ cl::opt<bool> PrintCacheMetrics(
cl::desc("calculate and print various metrics for instruction cache"),
cl::cat(BoltOptCategory));
+cl::list<std::string> PrintOnly("print-only", cl::CommaSeparated,
+ cl::desc("list of functions to print"),
+ cl::value_desc("func1,func2,func3,..."),
+ cl::Hidden, cl::cat(BoltCategory));
+
+cl::opt<std::string>
+ PrintOnlyFile("print-only-file",
+ cl::desc("file with list of functions to print"), cl::Hidden,
+ cl::cat(BoltCategory));
+
cl::opt<bool> PrintSections("print-sections",
cl::desc("print all registered sections"),
cl::Hidden, cl::cat(BoltCategory));
diff --git a/bolt/test/AArch64/constant-island-alignment.s b/bolt/test/AArch64/constant-island-alignment.s
index 99fe733..5462dcb 100644
--- a/bolt/test/AArch64/constant-island-alignment.s
+++ b/bolt/test/AArch64/constant-island-alignment.s
@@ -53,6 +53,7 @@ _start:
blr x0
mov x0, #1
ret
+.size _start,.-_start
nop
# CHECK: {{0|8}} <$d>:
.Lci:
diff --git a/bolt/test/AArch64/epilogue-determination.s b/bolt/test/AArch64/epilogue-determination.s
new file mode 100644
index 0000000..437d814
--- /dev/null
+++ b/bolt/test/AArch64/epilogue-determination.s
@@ -0,0 +1,48 @@
+# Test that we will not incorrectly take the first basic block in function
+# `_foo` as epilogue due to the first load from stack instruction.
+
+# RUN: %clang %cflags %s -o %t.so -Wl,-q
+# RUN: llvm-bolt %t.so -o %t.bolt --print-cfg | FileCheck %s
+
+ .text
+ .global _foo
+ .type _foo, %function
+_foo:
+ ldr w8, [sp]
+ adr x10, _jmptbl
+ ldrsw x9, [x10, x9, lsl #2]
+ add x10, x10, x9
+ br x10
+# CHECK-NOT: x10 # TAILCALL
+# CHECK: x10 # UNKNOWN CONTROL FLOW
+ mov x0, 0
+ ret
+ mov x0, 1
+ ret
+
+ .balign 4
+_jmptbl:
+ .long -16
+ .long -8
+
+ .global _bar
+ .type _bar, %function
+_bar:
+ stp x29, x30, [sp, #-0x10]!
+ mov x29, sp
+ sub sp, sp, #0x10
+ ldr x8, [x29, #0x30]
+ blr x8
+ add sp, sp, #0x10
+ ldp x29, x30, [sp], #0x10
+ br x2
+# CHECK-NOT: x2 # UNKNOWN CONTROL FLOW
+# CHECK: x2 # TAILCALL
+
+ .global _start
+ .type _start, %function
+_start:
+ ret
+
+ # Dummy relocation to force relocation mode
+ .reloc 0, R_AARCH64_NONE
diff --git a/bolt/test/AArch64/hook-fini.s b/bolt/test/AArch64/hook-fini.s
index 4f321d4..3bb95f9 100644
--- a/bolt/test/AArch64/hook-fini.s
+++ b/bolt/test/AArch64/hook-fini.s
@@ -15,13 +15,13 @@
# RUN: %clang %cflags -pie %s -Wl,-q -o %t.exe
# RUN: llvm-readelf -d %t.exe | FileCheck --check-prefix=DYN-FINI %s
# RUN: llvm-readelf -r %t.exe | FileCheck --check-prefix=RELOC-PIE %s
-# RUN: llvm-bolt %t.exe -o %t --instrument
+# RUN: llvm-bolt %t.exe -o %t --instrument | FileCheck --check-prefix=CHECK-BOLT-RT-FINI %s
# RUN: llvm-readelf -drs %t | FileCheck --check-prefix=CHECK-FINI %s
# RUN: %clang %cflags -pie %s -Wl,-q,-fini=0 -o %t-no-fini.exe
# RUN: llvm-readelf -d %t-no-fini.exe | FileCheck --check-prefix=DYN-NO-FINI %s
# RUN: llvm-readelf -r %t-no-fini.exe | FileCheck --check-prefix=RELOC-PIE %s
-# RUN: llvm-bolt %t-no-fini.exe -o %t-no-fini --instrument
+# RUN: llvm-bolt %t-no-fini.exe -o %t-no-fini --instrument | FileCheck --check-prefix=CHECK-BOLT-RT-FINI-ARRAY %s
# RUN: llvm-readelf -drs %t-no-fini | FileCheck --check-prefix=CHECK-NO-FINI %s
# RUN: llvm-readelf -ds -x .fini_array %t-no-fini | FileCheck --check-prefix=CHECK-NO-FINI-RELOC %s
@@ -29,7 +29,7 @@
# RUN: %clang %cflags %p/../Inputs/stub.c -fPIC -shared -o %t-stub.so
# RUN: %clang %cflags %s -no-pie -Wl,-q,-fini=0 %t-stub.so -o %t-no-pie-no-fini.exe
# RUN: llvm-readelf -r %t-no-pie-no-fini.exe | FileCheck --check-prefix=RELOC-NO-PIE %s
-# RUN: llvm-bolt %t-no-pie-no-fini.exe -o %t-no-pie-no-fini --instrument
+# RUN: llvm-bolt %t-no-pie-no-fini.exe -o %t-no-pie-no-fini --instrument | FileCheck --check-prefix=CHECK-BOLT-RT-FINI-ARRAY %s
# RUN: llvm-readelf -ds -x .fini_array %t-no-pie-no-fini | FileCheck --check-prefix=CHECK-NO-PIE-NO-FINI %s
## With fini: dynamic section should contain DT_FINI
@@ -46,6 +46,14 @@
## Without PIE: binary should not have relative relocations
# RELOC-NO-PIE-NOT: R_AARCH64_RELATIVE
+## Check BOLT output output finalization hook (DT_FINI)
+# CHECK-BOLT-RT-FINI: runtime library finalization was hooked via DT_FINI
+# CHECK-BOLT-RT-FINI-NOT: runtime library finalization was hooked via .fini_array entry
+
+## Check BOLT output output finalization hook (.fini_array entry)
+# CHECK-BOLT-RT-FINI-ARRAY-NOT: runtime library finalization was hooked via DT_FINI
+# CHECK-BOLT-RT-FINI-ARRAY: runtime library finalization was hooked via .fini_array entry
+
## Check that DT_FINI is set to __bolt_runtime_fini
# CHECK-FINI: Dynamic section at offset {{.*}} contains {{.*}} entries:
# CHECK-FINI-DAG: (FINI) 0x[[FINI:[[:xdigit:]]+]]
diff --git a/bolt/test/AArch64/hook-init.s b/bolt/test/AArch64/hook-init.s
new file mode 100644
index 0000000..a48328b
--- /dev/null
+++ b/bolt/test/AArch64/hook-init.s
@@ -0,0 +1,221 @@
+## Test the different ways of hooking the init function for instrumentation (via
+## entry point, DT_INIT and via DT_INIT_ARRAY). We test the latter for both PIE
+## and non-PIE binaries because of the different ways of handling relocations
+## (static or dynamic), executable and shared library.
+## All tests perform the following steps:
+## - Compile and link for the case to be tested
+## - Some sanity-checks on the dynamic section and relocations in the binary to
+## verify it has the shape we want for testing:
+## - INTERP in Program Headers
+## - DT_INIT or DT_INIT_ARRAY in dynamic section
+## - No relative relocations for non-PIE
+## - Instrument (with extra --runtime-lib-init-hook=init/init_array options
+## in some cases)
+## - Verify generated binary
+# REQUIRES: system-linux,bolt-runtime,target=aarch64{{.*}}
+
+# RUN: %clang %cflags -pie %s -Wl,-q -o %t.exe
+# RUN: llvm-readelf -d %t.exe | FileCheck --check-prefix=DYN-INIT %s
+# RUN: llvm-readelf -l %t.exe | FileCheck --check-prefix=PH-INTERP %s
+# RUN: llvm-readelf -r %t.exe | FileCheck --check-prefix=RELOC-PIE %s
+# RUN: llvm-bolt %t.exe -o %t --instrument | FileCheck --check-prefix=CHECK-BOLT-RT-EP %s
+# RUN: llvm-readelf -hdrs %t | FileCheck --check-prefix=CHECK-INIT-EP %s
+# RUN: llvm-bolt %t.exe -o %t-no-ep --instrument --runtime-lib-init-hook=init | FileCheck --check-prefix=CHECK-BOLT-RT-INIT %s
+# RUN: llvm-readelf -hdrs %t-no-ep | FileCheck --check-prefix=CHECK-INIT-NO-EP %s
+# RUN: llvm-bolt %t.exe -o %t-no-ep --instrument --runtime-lib-init-hook=init_array | FileCheck --check-prefix=CHECK-BOLT-RT-INIT-ARRAY %s
+# RUN: llvm-readelf -hdrs %t-no-ep | FileCheck --check-prefix=CHECK-INIT-ARRAY-NO-EP %s
+
+# RUN: %clang -shared %cflags -pie %s -Wl,-q -o %t-shared.exe
+# RUN: llvm-readelf -d %t-shared.exe | FileCheck --check-prefix=DYN-INIT %s
+# RUN: llvm-readelf -l %t-shared.exe | FileCheck --check-prefix=PH-INTERP-SHARED %s
+# RUN: llvm-readelf -r %t-shared.exe | FileCheck --check-prefix=RELOC-SHARED-PIE %s
+# RUN: llvm-bolt %t-shared.exe -o %t-shared --instrument | FileCheck --check-prefix=CHECK-BOLT-RT-INIT %s
+# RUN: llvm-readelf -hdrs %t-shared | FileCheck --check-prefix=CHECK-SHARED-INIT %s
+
+# RUN: %clang %cflags -pie %s -Wl,-q,-init=0 -o %t-no-init.exe
+# RUN: llvm-readelf -d %t-no-init.exe | FileCheck --check-prefix=DYN-NO-INIT %s
+# RUN: llvm-readelf -l %t-no-init.exe | FileCheck --check-prefix=PH-INTERP %s
+# RUN: llvm-readelf -r %t-no-init.exe | FileCheck --check-prefix=RELOC-PIE %s
+# RUN: llvm-bolt %t-no-init.exe -o %t-no-init --instrument | FileCheck --check-prefix=CHECK-BOLT-RT-EP %s
+# RUN: llvm-readelf -hdrs %t-no-init | FileCheck --check-prefix=CHECK-NO-INIT-EP %s
+# RUN: llvm-bolt %t-no-init.exe -o %t-no-init-no-ep --instrument --runtime-lib-init-hook=init | FileCheck --check-prefix=CHECK-BOLT-RT-INIT-ARRAY %s
+# RUN: llvm-readelf -hdrs %t-no-init-no-ep | FileCheck --check-prefix=CHECK-NO-INIT-NO-EP %s
+
+# RUN: %clang -shared %cflags -pie %s -Wl,-q,-init=0 -o %t-shared-no-init.exe
+# RUN: llvm-readelf -d %t-shared-no-init.exe | FileCheck --check-prefix=DYN-NO-INIT %s
+# RUN: llvm-readelf -l %t-shared-no-init.exe | FileCheck --check-prefix=PH-INTERP-SHARED %s
+# RUN: llvm-readelf -r %t-shared-no-init.exe | FileCheck --check-prefix=RELOC-SHARED-PIE %s
+# RUN: llvm-bolt %t-shared-no-init.exe -o %t-shared-no-init --instrument | FileCheck --check-prefix=CHECK-BOLT-RT-INIT-ARRAY %s
+# RUN: llvm-readelf -drs %t-shared-no-init | FileCheck --check-prefix=CHECK-SHARED-NO-INIT %s
+
+## Create a dummy shared library to link against to force creation of the dynamic section.
+# RUN: %clang %cflags %p/../Inputs/stub.c -fPIC -shared -o %t-stub.so
+# RUN: %clang %cflags %s -no-pie -Wl,-q,-init=0 %t-stub.so -o %t-no-pie-no-init.exe
+# RUN: llvm-readelf -r %t-no-pie-no-init.exe | FileCheck --check-prefix=RELOC-NO-PIE %s
+# RUN: llvm-bolt %t-no-pie-no-init.exe -o %t-no-pie-no-init --instrument | FileCheck --check-prefix=CHECK-BOLT-RT-EP %s
+# RUN: llvm-readelf -hds %t-no-pie-no-init | FileCheck --check-prefix=CHECK-NO-PIE-NO-INIT-EP %s
+
+## With init: dynamic section should contain DT_INIT
+# DYN-INIT: (INIT)
+
+## Without init: dynamic section should only contain DT_INIT_ARRAY
+# DYN-NO-INIT-NOT: (INIT)
+# DYN-NO-INIT: (INIT_ARRAY)
+# DYN-NO-INIT: (INIT_ARRAYSZ)
+
+## With interp program header (executable)
+# PH-INTERP: Program Headers:
+# PH-INTERP: INTERP
+
+## Without interp program header (shared library)
+# PH-INTERP-SHARED: Program Headers:
+# PH-INTERP-SHARED-NOT: INTERP
+
+## With PIE: binary should have relative relocations
+# RELOC-PIE: R_AARCH64_RELATIVE
+
+## With PIE: binary should have relative relocations
+# RELOC-SHARED-PIE: R_AARCH64_ABS64
+
+## Without PIE: binary should not have relative relocations
+# RELOC-NO-PIE-NOT: R_AARCH64_RELATIVE
+
+## Check BOLT output output initialization hook (ELF Header Entry Point)
+# CHECK-BOLT-RT-EP: runtime library initialization was hooked via ELF Header Entry Point
+# CHECK-BOLT-RT-EP-NOT: runtime library initialization was hooked via DT_INIT
+# CHECK-BOLT-RT-EP-NOT: runtime library initialization was hooked via .init_array entry
+
+## Check BOLT output output initialization hook (DT_INIT)
+# CHECK-BOLT-RT-INIT-NOT: runtime library initialization was hooked via ELF Header Entry Point
+# CHECK-BOLT-RT-INIT: runtime library initialization was hooked via DT_INIT
+# CHECK-BOLT-RT-INIT-NOT: runtime library initialization was hooked via .init_array entry
+
+## Check BOLT output output initialization hook (.init_array entry)
+# CHECK-BOLT-RT-INIT-ARRAY-NOT: runtime library initialization was hooked via ELF Header Entry Point
+# CHECK-BOLT-RT-INIT-ARRAY-NOT: runtime library initialization was hooked via DT_INIT
+# CHECK-BOLT-RT-INIT-ARRAY: runtime library initialization was hooked via .init_array entry
+
+## Check that entry point address is set to __bolt_runtime_start for PIE executable with DT_INIT
+# CHECK-INIT-EP: ELF Header:
+# CHECK-INIT-EP: Entry point address: 0x[[#%x,EP_ADDR:]]
+## Check that the dynamic relocation at .init and .init_array were not patched
+# CHECK-INIT-EP: Dynamic section at offset {{.*}} contains {{.*}} entries:
+# CHECK-INIT-EP-NOT: (INIT) 0x[[#%x, EP_ADDR]]
+# CHECK-INIT-EP-NOT: (INIT_ARRAY) 0x[[#%x, EP_ADDR]]
+## Check that the new entry point address points to __bolt_runtime_start
+# CHECK-INIT-EP: Symbol table '.symtab' contains {{.*}} entries:
+# CHECK-INIT-EP: {{0+}}[[#%x, EP_ADDR]] {{.*}} __bolt_runtime_start
+
+## Check that DT_INIT address is set to __bolt_runtime_start for PIE executable with DT_INIT
+# CHECK-INIT-NO-EP: ELF Header:
+# CHECK-INIT-NO-EP: Entry point address: 0x[[#%x,EP_ADDR:]]
+## Read Dynamic section DT_INIT and DT_INIT_ARRAY entries
+# CHECK-INIT-NO-EP: Dynamic section at offset {{.*}} contains {{.*}} entries:
+# CHECK-INIT-NO-EP-DAG: (INIT) 0x[[#%x,INIT:]]
+# CHECK-INIT-NO-EP-DAG: (INIT_ARRAY) 0x[[#%x,INIT_ARRAY:]]
+## Check if ELF entry point address points to _start symbol and new DT_INIT entry points to __bolt_runtime_start
+# CHECK-INIT-NO-EP: Symbol table '.symtab' contains {{.*}} entries:
+# CHECK-INIT-NO-EP-DAG: {{0+}}[[#%x, EP_ADDR]] {{.*}} _start
+# CHECK-INIT-NO-EP-DAG: {{0+}}[[#%x, INIT]] {{.*}} __bolt_runtime_start
+
+## Check that 1st entry of DT_INIT_ARRAY is set to __bolt_runtime_start and DT_INIT was not changed
+# CHECK-INIT-ARRAY-NO-EP: ELF Header:
+# CHECK-INIT-ARRAY-NO-EP: Entry point address: 0x[[#%x,EP_ADDR:]]
+## Read Dynamic section DT_INIT and DT_INIT_ARRAY entries
+# CHECK-INIT-ARRAY-NO-EP: Dynamic section at offset {{.*}} contains {{.*}} entries:
+# CHECK-INIT-ARRAY-NO-EP-DAG: (INIT) 0x[[#%x,INIT:]]
+# CHECK-INIT-ARRAY-NO-EP-DAG: (INIT_ARRAY) 0x[[#%x,INIT_ARRAY:]]
+## Read the dynamic relocation from 1st entry of .init_array
+# CHECK-INIT-ARRAY-NO-EP: Relocation section '.rela.dyn' at offset {{.*}} contains {{.*}} entries
+# CHECK-INIT-ARRAY-NO-EP: {{0+}}[[#%x,INIT_ARRAY]] {{.*}} R_AARCH64_RELATIVE [[#%x,INIT_ADDR:]]
+# CHECK-INIT-ARRAY-NO-EP-NOT: {{0+}}[[#%x,INIT_ARRAY]] {{.*}} R_AARCH64_RELATIVE [[#%x,INIT]]
+## Check that 1st entry of .init_array points to __bolt_runtime_start
+# CHECK-INIT-ARRAY-NO-EP: Symbol table '.symtab' contains {{.*}} entries:
+# CHECK-INIT-ARRAY-NO-EP-DAG: {{0+}}[[#%x, EP_ADDR]] {{.*}} _start
+# CHECK-INIT-ARRAY-NO-EP-DAG: {{[0-9]]*}}: {{0+}}[[#%x, INIT_ADDR]] {{.*}} __bolt_runtime_start
+
+## Check that entry point address is set to __bolt_runtime_start for PIE executable without DT_INIT
+# CHECK-NO-INIT-EP: ELF Header:
+# CHECK-NO-INIT-EP: Entry point address: 0x[[#%x,EP_ADDR:]]
+## Check that the dynamic relocation at .init and .init_array were not patched
+# CHECK-NO-INIT-EP: Dynamic section at offset {{.*}} contains {{.*}} entries:
+# CHECK-NO-INIT-EP-NOT: (INIT) 0x[[#%x, EP_ADDR]]
+# CHECK-NO-INIT-EP-NOT: (INIT_ARRAY) 0x[[#%x, EP_ADDR]]
+## Check that the new entry point address points to __bolt_runtime_start
+# CHECK-NO-INIT-EP: Symbol table '.symtab' contains {{.*}} entries:
+# CHECK-NO-INIT-EP: {{0+}}[[#%x, EP_ADDR]] {{.*}} __bolt_runtime_start
+
+## Check that DT_INIT is set to __bolt_runtime_start for shared library with DT_INIT
+# CHECK-SHARED-INIT: Dynamic section at offset {{.*}} contains {{.*}} entries:
+# CHECK-SHARED-INIT-DAG: (INIT) 0x[[#%x, INIT:]]
+# CHECK-SHARED-INIT-DAG: (INIT_ARRAY) 0x[[#%x, INIT_ARRAY:]]
+## Check that the dynamic relocation at .init_array was not patched
+# CHECK-SHARED-INIT: Relocation section '.rela.dyn' at offset {{.*}} contains {{.*}} entries
+# CHECK-SHARED-INIT-NOT: {{0+}}[[#%x, INIT_ARRAY]] {{.*}} R_AARCH64_ABS64 {{0+}}[[#%x, INIT]]
+## Check that dynamic section DT_INIT points to __bolt_runtime_start
+# CHECK-SHARED-INIT: Symbol table '.symtab' contains {{.*}} entries:
+# CHECK-SHARED-INIT: {{0+}}[[#%x, INIT]] {{.*}} __bolt_runtime_start
+
+## Check that entry point address is set to __bolt_runtime_start for PIE executable without DT_INIT
+# CHECK-NO-INIT-NO-EP: ELF Header:
+# CHECK-NO-INIT-NO-EP: Entry point address: 0x[[#%x,EP_ADDR:]]
+# CHECK-NO-INIT-NO-EP: Dynamic section at offset {{.*}} contains {{.*}} entries:
+# CHECK-NO-INIT-NO-EP-NOT: (INIT)
+# CHECK-NO-INIT-NO-EP: (INIT_ARRAY) 0x[[#%x,INIT_ARRAY:]]
+## Read the dynamic relocation from 1st entry of .init_array
+# CHECK-NO-INIT-NO-EP: Relocation section '.rela.dyn' at offset {{.*}} contains {{.*}} entries
+# CHECK-NO-INIT-NO-EP: {{0+}}[[#%x,INIT_ARRAY]] {{.*}} R_AARCH64_RELATIVE [[#%x,INIT_ADDR:]]
+## Check that 1st entry of .init_array points to __bolt_runtime_start
+# CHECK-NO-INIT-NO-EP: Symbol table '.symtab' contains {{.*}} entries:
+# CHECK-NO-INIT-NO-EP-DAG: {{0+}}[[#%x, EP_ADDR]] {{.*}} _start
+# CHECK-NO-INIT-NO-EP-DAG: {{[0-9]]*}}: {{0+}}[[#%x, INIT_ADDR]] {{.*}} __bolt_runtime_start
+
+## Check that entry point address is set to __bolt_runtime_start for shared library without DT_INIT
+# CHECK-SHARED-NO-INIT: Dynamic section at offset {{.*}} contains {{.*}} entries:
+# CHECK-SHARED-NO-INIT-NOT: (INIT)
+# CHECK-SHARED-NO-INIT: (INIT_ARRAY) 0x[[#%x,INIT_ARRAY:]]
+## Read the dynamic relocation from 1st entry of .init_array
+# CHECK-SHARED-NO-INIT: Relocation section '.rela.dyn' at offset {{.*}} contains {{.*}} entries
+# CHECK-SHARED-NO-INIT: {{0+}}[[#%x, INIT_ARRAY]] {{.*}} R_AARCH64_ABS64 [[#%x,INIT_ADDR:]]
+## Check that 1st entry of .init_array points to __bolt_runtime_start
+# CHECK-SHARED-NO-INIT: Symbol table '.symtab' contains {{.*}} entries:
+# CHECK-SHARED-NO-INIT: {{[0-9]]*}}: {{0+}}[[#%x, INIT_ADDR]] {{.*}} __bolt_runtime_start
+
+## Check that entry point address is set to __bolt_runtime_start for non-PIE executable with DT_INIT
+# CHECK-NO-PIE-NO-INIT-EP: ELF Header:
+# CHECK-NO-PIE-NO-INIT-EP: Entry point address: 0x[[#%x,EP_ADDR:]]
+## Check that the dynamic relocation at .init and .init_array were not patched
+# CHECK-NO-PIE-NO-INIT-EP: Dynamic section at offset {{.*}} contains {{.*}} entries:
+# CHECK-NO-PIE-NO-INIT-EP-NOT: (INIT) 0x[[#%x, EP_ADDR]]
+# CHECK-NO-PIE-NO-INIT-EP-NOT: (INIT_ARRAY) 0x[[#%x, EP_ADDR]]
+## Check that the new entry point address points to __bolt_runtime_start
+# CHECK-NO-PIE-NO-INIT-EP: Symbol table '.symtab' contains {{.*}} entries:
+# CHECK-NO-PIE-NO-INIT-EP: {{0+}}[[#%x, EP_ADDR]] {{.*}} __bolt_runtime_start
+
+ .globl _start
+ .type _start, %function
+_start:
+ # Dummy relocation to force relocation mode.
+ .reloc 0, R_AARCH64_NONE
+ ret
+.size _start, .-_start
+
+ .globl _init
+ .type _init, %function
+_init:
+ ret
+ .size _init, .-_init
+
+ .globl _fini
+ .type _fini, %function
+_fini:
+ ret
+ .size _fini, .-_fini
+
+ .section .init_array,"aw"
+ .align 3
+ .dword _init
+
+ .section .fini_array,"aw"
+ .align 3
+ .dword _fini
diff --git a/bolt/test/AArch64/inline-armv8.3-returns.s b/bolt/test/AArch64/inline-armv8.3-returns.s
new file mode 100644
index 0000000..055b589
--- /dev/null
+++ b/bolt/test/AArch64/inline-armv8.3-returns.s
@@ -0,0 +1,45 @@
+# This test checks that inlining functions with fused pointer-auth-and-return
+# instructions is properly handled by BOLT.
+
+# REQUIRES: system-linux
+
+# RUN: llvm-mc -filetype=obj -triple aarch64-unknown-unknown -mattr=+v8.3a %s -o %t.o
+# RUN: %clang %cflags -O0 %t.o -o %t.exe -Wl,-q
+# RUN: llvm-bolt --inline-all --print-inline --print-only=_Z3barP1A \
+# RUN: %t.exe -o %t.bolt | FileCheck %s
+
+# CHECK: BOLT-INFO: inlined 0 calls at 1 call sites in 2 iteration(s). Change in binary size: 8 bytes.
+# CHECK: Binary Function "_Z3barP1A" after inlining {
+# CHECK-NOT: bl _Z3fooP1A
+# CHECK: ldr x8, [x0]
+# CHECK-NEXT: ldr w0, [x8]
+# CHECK-NEXT: autiasp
+
+ .text
+ .globl _Z3fooP1A
+ .type _Z3fooP1A,@function
+_Z3fooP1A:
+ paciasp
+ ldr x8, [x0]
+ ldr w0, [x8]
+ retaa
+ .size _Z3fooP1A, .-_Z3fooP1A
+
+ .globl _Z3barP1A
+ .type _Z3barP1A,@function
+_Z3barP1A:
+ stp x29, x30, [sp, #-16]!
+ mov x29, sp
+ bl _Z3fooP1A
+ mul w0, w0, w0
+ ldp x29, x30, [sp], #16
+ ret
+ .size _Z3barP1A, .-_Z3barP1A
+
+ .globl main
+ .p2align 2
+ .type main,@function
+main:
+ mov w0, wzr
+ ret
+ .size main, .-main
diff --git a/bolt/test/AArch64/inline-armv8.3-tailcall.s b/bolt/test/AArch64/inline-armv8.3-tailcall.s
new file mode 100644
index 0000000..78e7285
--- /dev/null
+++ b/bolt/test/AArch64/inline-armv8.3-tailcall.s
@@ -0,0 +1,46 @@
+# This test checks that inlining functions with fused pointer-auth-and-return
+# instructions into a location with a tailcall is properly handled by BOLT.
+# Because _Z3barP1A ends in a tailcall, we don't remove the return instruction
+# from the inlined block. Therefore, we should see a retaa, and not an autiasp.
+
+# REQUIRES: system-linux
+
+# RUN: llvm-mc -filetype=obj -triple aarch64-unknown-unknown -mattr=+v8.3a %s -o %t.o
+# RUN: %clang %cflags -O0 %t.o -o %t.exe -Wl,-q
+# RUN: llvm-bolt --inline-all --print-inline --print-only=_Z3barP1A \
+# RUN: %t.exe -o %t.bolt | FileCheck %s
+
+# CHECK: BOLT-INFO: inlined 0 calls at 1 call sites in 2 iteration(s). Change in binary size: 12 bytes.
+# CHECK: Binary Function "_Z3barP1A" after inlining {
+# CHECK-NOT: bl _Z3fooP1A
+# CHECK: mov x29, sp
+# CHECK-NEXT: paciasp
+# CHECK-NEXT: ldr x8, [x0]
+# CHECK-NEXT: ldr w0, [x8]
+# CHECK-NEXT: retaa
+
+ .text
+ .globl _Z3fooP1A
+ .type _Z3fooP1A,@function
+_Z3fooP1A:
+ paciasp
+ ldr x8, [x0]
+ ldr w0, [x8]
+ retaa
+ .size _Z3fooP1A, .-_Z3fooP1A
+
+ .globl _Z3barP1A
+ .type _Z3barP1A,@function
+_Z3barP1A:
+ stp x29, x30, [sp, #-16]!
+ mov x29, sp
+ b _Z3fooP1A // tailcall
+ .size _Z3barP1A, .-_Z3barP1A
+
+ .globl main
+ .p2align 2
+ .type main,@function
+main:
+ mov w0, wzr
+ ret
+ .size main, .-main
diff --git a/bolt/test/AArch64/inline-bti-dbg.s b/bolt/test/AArch64/inline-bti-dbg.s
new file mode 100644
index 0000000..a0db458
--- /dev/null
+++ b/bolt/test/AArch64/inline-bti-dbg.s
@@ -0,0 +1,40 @@
+# This test checks that for AArch64 binaries with BTI, we do not inline blocks with indirect tailcalls.
+# Same as inline-bti.s, but checks the debug output, and therefore requires assertions.
+
+# REQUIRES: system-linux, assertions
+
+# RUN: llvm-mc -filetype=obj -triple aarch64-unknown-unknown %s -o %t.o
+# RUN: %clang %cflags -O0 %t.o -o %t.exe -Wl,-q -Wl,-z,force-bti
+# RUN: llvm-bolt --inline-all %t.exe -o %t.bolt --debug 2>&1 | FileCheck %s
+
+# For BTI, we should not inline foo.
+# CHECK: BOLT-DEBUG: Skipping inlining block with tailcall in _Z3barP1A : .LBB01 to keep BTIs consistent.
+# CHECK-NOT: BOLT-INFO: inlined {{[0-9]+}} calls at {{[0-9]+}} call sites in {{[0-9]+}} iteration(s). Change in binary size: {{[0-9]+}} bytes.
+
+ .text
+ .globl _Z3fooP1A
+ .type _Z3fooP1A,@function
+_Z3fooP1A:
+ ldr x8, [x0]
+ ldr w0, [x8]
+ br x30
+ .size _Z3fooP1A, .-_Z3fooP1A
+
+ .globl _Z3barP1A
+ .type _Z3barP1A,@function
+_Z3barP1A:
+ stp x29, x30, [sp, #-16]!
+ mov x29, sp
+ bl _Z3fooP1A
+ mul w0, w0, w0
+ ldp x29, x30, [sp], #16
+ ret
+ .size _Z3barP1A, .-_Z3barP1A
+
+ .globl main
+ .p2align 2
+ .type main,@function
+main:
+ mov w0, wzr
+ ret
+ .size main, .-main
diff --git a/bolt/test/AArch64/inline-bti.s b/bolt/test/AArch64/inline-bti.s
new file mode 100644
index 0000000..62f6ea6
--- /dev/null
+++ b/bolt/test/AArch64/inline-bti.s
@@ -0,0 +1,38 @@
+## This test checks that for AArch64 binaries with BTI, we do not inline blocks with indirect tailcalls.
+
+# REQUIRES: system-linux
+
+# RUN: llvm-mc -filetype=obj -triple aarch64-unknown-unknown %s -o %t.o
+# RUN: %clang %cflags -O0 %t.o -o %t.exe -Wl,-q -Wl,-z,force-bti
+# RUN: llvm-bolt --inline-all %t.exe -o %t.bolt | FileCheck %s
+
+# For BTI, we should not inline foo.
+# CHECK-NOT: BOLT-INFO: inlined {{[0-9]+}} calls at {{[0-9]+}} call sites in {{[0-9]+}} iteration(s). Change in binary size: {{[0-9]+}} bytes.
+
+ .text
+ .globl _Z3fooP1A
+ .type _Z3fooP1A,@function
+_Z3fooP1A:
+ ldr x8, [x0]
+ ldr w0, [x8]
+ br x30
+ .size _Z3fooP1A, .-_Z3fooP1A
+
+ .globl _Z3barP1A
+ .type _Z3barP1A,@function
+_Z3barP1A:
+ stp x29, x30, [sp, #-16]!
+ mov x29, sp
+ bl _Z3fooP1A
+ mul w0, w0, w0
+ ldp x29, x30, [sp], #16
+ ret
+ .size _Z3barP1A, .-_Z3barP1A
+
+ .globl main
+ .p2align 2
+ .type main,@function
+main:
+ mov w0, wzr
+ ret
+ .size main, .-main
diff --git a/bolt/test/AArch64/inline-pauth-lr.s b/bolt/test/AArch64/inline-pauth-lr.s
new file mode 100644
index 0000000..34f0572
--- /dev/null
+++ b/bolt/test/AArch64/inline-pauth-lr.s
@@ -0,0 +1,61 @@
+# This test checks that inlining functions with the pauth-lr variants of
+# fused pointer-auth-and-return instructions is properly handled by BOLT.
+
+# REQUIRES: system-linux
+
+# RUN: %clang %cflags -march=armv9.5-a+pauth-lr -O0 %s -o %t.exe -Wl,-q
+# RUN: llvm-bolt --inline-all --print-inline --print-only=_Z3barP1A \
+# RUN: %t.exe -o %t.bolt | FileCheck %s
+
+# CHECK: BOLT-INFO: inlined 0 calls at 2 call sites in 2 iteration(s). Change in binary size: 16 bytes.
+# CHECK: Binary Function "_Z3barP1A" after inlining {
+# CHECK-NOT: bl _Z3fooP1A
+# CHECK: paciasppc
+# CHECK-NEXT: ldr x8, [x0]
+# CHECK-NEXT: ldr w0, [x8]
+# CHECK-NEXT: autiasppcr x28
+# CHECK-NEXT: paciasppc
+# CHECK-NEXT: ldr x7, [x0]
+# CHECK-NEXT: ldr w0, [x7]
+# CHECK-NEXT: autiasppc _Z3bazP1A
+
+ .text
+ .globl _Z3fooP1A
+ .type _Z3fooP1A,@function
+_Z3fooP1A:
+ paciasppc
+ ldr x8, [x0]
+ ldr w0, [x8]
+ retaasppcr x28
+ .size _Z3fooP1A, .-_Z3fooP1A
+
+ .text
+ .globl _Z3bazP1A
+ .type _Z3bazP1A,@function
+_Z3bazP1A:
+0:
+ paciasppc
+ ldr x7, [x0]
+ ldr w0, [x7]
+ retaasppc 0b
+ .size _Z3bazP1A, .-_Z3bazP1A
+
+ .globl _Z3barP1A
+ .type _Z3barP1A,@function
+_Z3barP1A:
+ stp x29, x30, [sp, #-16]!
+ mov x29, sp
+ bl _Z3fooP1A
+ bl _Z3bazP1A
+ mul w0, w0, w0
+ ldp x29, x30, [sp], #16
+ ret
+ .size _Z3barP1A, .-_Z3barP1A
+
+ .globl main
+ .p2align 2
+ .type main,@function
+main:
+ mov w0, wzr
+ ret
+ .size main, .-main
diff --git a/bolt/test/AArch64/instrument-no-fini.s b/bolt/test/AArch64/instrument-no-fini.s
new file mode 100644
index 0000000..526ce11
--- /dev/null
+++ b/bolt/test/AArch64/instrument-no-fini.s
@@ -0,0 +1,34 @@
+# Test that BOLT will produce error by default and pass with instrumentation-sleep-time option
+
+# REQUIRES: system-linux,bolt-runtime,target=aarch64{{.*}}
+
+# RUN: llvm-mc -triple aarch64 -filetype=obj %s -o %t.o
+# RUN: ld.lld -q -pie -o %t.exe %t.o
+# RUN: llvm-readelf -d %t.exe | FileCheck --check-prefix=CHECK-NO-FINI %s
+# RUN: not llvm-bolt --instrument -o %t.out %t.exe 2>&1 | FileCheck %s --check-prefix=CHECK-BOLT-FAIL
+# RUN: llvm-bolt --instrument --instrumentation-sleep-time=1 -o %t.out %t.exe 2>&1 | FileCheck %s --check-prefix=CHECK-BOLT-PASS
+
+# CHECK-NO-FINI: INIT
+# CHECK-NO-FINI-NOT: FINI
+# CHECK-NO-FINI-NOT: FINI_ARRAY
+
+# CHECK-BOLT-FAIL: Instrumentation needs either DT_FINI or DT_FINI_ARRAY
+
+# CHECK-BOLT-PASS-NOT: Instrumentation needs either DT_FINI or DT_FINI_ARRAY
+# CHECK-BOLT-PASS: runtime library initialization was hooked via DT_INIT
+
+ .text
+ .globl _start
+ .type _start, %function
+_start:
+ # BOLT errs when instrumenting without relocations; create a dummy one.
+ .reloc 0, R_AARCH64_NONE
+ ret
+ .size _start, .-_start
+
+ .globl _init
+ .type _init, %function
+ # Force DT_INIT to be created (needed for instrumentation).
+_init:
+ ret
+ .size _init, .-_init
diff --git a/bolt/test/AArch64/ldr-relaxation.s b/bolt/test/AArch64/ldr-relaxation.s
new file mode 100644
index 0000000..7632504
--- /dev/null
+++ b/bolt/test/AArch64/ldr-relaxation.s
@@ -0,0 +1,122 @@
+## Check that LDR relaxation will fail since LDR is inside a non-simple
+## function and there is no NOP next to it.
+
+# RUN: llvm-mc -filetype=obj -triple aarch64-unknown-unknown \
+# RUN: --defsym FAIL=1 %s -o %t.o
+# RUN: %clang %cflags %t.o -o %t.so -Wl,-q
+# RUN: not llvm-bolt %t.so -o %t.bolt 2>&1 | FileCheck %s --check-prefix=FAIL
+
+# FAIL: BOLT-ERROR: cannot relax LDR in non-simple function _start
+
+.ifdef FAIL
+ .text
+ .global _start
+ .type _start, %function
+_start:
+ .cfi_startproc
+ br x2
+ ldr x0, _foo
+ ret
+ .cfi_endproc
+.size _start, .-_start
+.endif
+
+## Check that LDR relaxation is not needed since the reference is not far away.
+
+# RUN: llvm-mc -filetype=obj -triple aarch64-unknown-unknown \
+# RUN: --defsym NOT_NEEDED=1 %s -o %t.o
+# RUN: %clang %cflags %t.o -o %t.so -Wl,-q
+# RUN: llvm-bolt %t.so -o %t.bolt
+# RUN: llvm-objdump -d %t.bolt | FileCheck %s --check-prefix=NOT_NEEDED
+
+# NOT_NEEDED: <_start>
+# NOT_NEEDED-NEXT: ldr
+
+.ifdef NOT_NEEDED
+ .text
+ .global _start
+ .type _start, %function
+_start:
+ .cfi_startproc
+ ldr x0, _start
+ ret
+ .cfi_endproc
+.size _start, .-_start
+.endif
+
+## Check that LDR relaxation is done in a simple function, where NOP will
+## be inserted as needed.
+
+# RUN: llvm-mc -filetype=obj -triple aarch64-unknown-unknown \
+# RUN: --defsym RELAX_SIMPLE=1 %s -o %t.o
+# RUN: %clang %cflags %t.o -o %t.so -Wl,-q
+# RUN: llvm-bolt %t.so -o %t.bolt
+# RUN: llvm-objdump -d %t.bolt | FileCheck %s --check-prefix=RELAX
+
+# RELAX: adrp
+# RELAX-NEXT: ldr
+
+.ifdef RELAX_SIMPLE
+ .text
+ .global _start
+ .type _start, %function
+_start:
+ .cfi_startproc
+ ldr x0, _foo
+ ret
+ .cfi_endproc
+.size _start, .-_start
+.endif
+
+## Check that LDR relaxation is done in a non-simple function, where NOP
+## exists next to LDR.
+
+# RUN: llvm-mc -filetype=obj -triple aarch64-unknown-unknown \
+# RUN: --defsym RELAX_NON_SIMPLE=1 %s -o %t.o
+# RUN: %clang %cflags %t.o -o %t.so -Wl,-q
+# RUN: llvm-bolt %t.so -o %t.bolt
+# RUN: llvm-objdump -d %t.bolt | FileCheck %s --check-prefix=RELAX
+
+.ifdef RELAX_NON_SIMPLE
+ .text
+ .global _start
+ .type _start, %function
+_start:
+ .cfi_startproc
+ br x2
+ ldr x0, _foo
+ nop
+ ret
+ .cfi_endproc
+.size _start, .-_start
+.endif
+
+## Check LDR relaxation works on loading W (low 32-bit of X) registers.
+
+# RUN: llvm-mc -filetype=obj -triple aarch64-unknown-unknown \
+# RUN: --defsym RELAX_SIMPLE_WREG=1 %s -o %t.o
+# RUN: %clang %cflags %t.o -o %t.so -Wl,-q
+# RUN: llvm-bolt %t.so -o %t.bolt
+# RUN: llvm-objdump -d %t.bolt | FileCheck %s --check-prefix=RELAXW
+
+# RELAXW: adrp x0
+# RELAXW-NEXT: ldr w0
+
+.ifdef RELAX_SIMPLE_WREG
+ .text
+ .global _start
+ .type _start, %function
+_start:
+ .cfi_startproc
+ ldr w0, _foo
+ ret
+ .cfi_endproc
+.size _start, .-_start
+.endif
+
+ .section .text_cold
+ .global _foo
+ .align 3
+_foo:
+ .long 0x12345678
+.size _foo, .-_foo
diff --git a/bolt/test/AArch64/negate-ra-state-disallow.s b/bolt/test/AArch64/pacret-cfi-disallow.s
index 95adb71..95adb71 100644
--- a/bolt/test/AArch64/negate-ra-state-disallow.s
+++ b/bolt/test/AArch64/pacret-cfi-disallow.s
diff --git a/bolt/test/AArch64/negate-ra-state-incorrect.s b/bolt/test/AArch64/pacret-cfi-incorrect.s
index 14d2c38..68a6fc0 100644
--- a/bolt/test/AArch64/negate-ra-state-incorrect.s
+++ b/bolt/test/AArch64/pacret-cfi-incorrect.s
@@ -1,4 +1,4 @@
-# This test checks that MarkRAStates pass ignores functions with
+# This test checks that PointerAuthCFIAnalyzer pass ignores functions with
# malformed .cfi_negate_ra_state sequences in the input binary.
# The cases checked are:
@@ -8,7 +8,7 @@
# RUN: llvm-mc -filetype=obj -triple aarch64-unknown-unknown %s -o %t.o
# RUN: %clang %cflags %t.o -o %t.exe -Wl,-q
-# RUN: llvm-bolt %t.exe -o %t.exe.bolt --no-threads | FileCheck %s --check-prefix=CHECK-BOLT
+# RUN: llvm-bolt %t.exe -o %t.exe.bolt -v=1 --no-threads | FileCheck %s --check-prefix=CHECK-BOLT
# CHECK-BOLT: BOLT-INFO: inconsistent RAStates in function foo: ptr authenticating inst encountered in Unsigned RA state
# CHECK-BOLT: BOLT-INFO: inconsistent RAStates in function bar: ptr signing inst encountered in Signed RA state
diff --git a/bolt/test/AArch64/negate-ra-state-reorder.s b/bolt/test/AArch64/pacret-cfi-reorder.s
index 2659f75..2659f75 100644
--- a/bolt/test/AArch64/negate-ra-state-reorder.s
+++ b/bolt/test/AArch64/pacret-cfi-reorder.s
diff --git a/bolt/test/AArch64/negate-ra-state.s b/bolt/test/AArch64/pacret-cfi.s
index 30786d4..d320629 100644
--- a/bolt/test/AArch64/negate-ra-state.s
+++ b/bolt/test/AArch64/pacret-cfi.s
@@ -9,13 +9,13 @@
# RUN: llvm-bolt %t.exe -o %t.exe.bolt --no-threads --print-all | FileCheck %s --check-prefix=CHECK-BOLT
# Check that the negate-ra-state at the start of bar is not discarded.
-# If it was discarded, MarkRAState would report bar as having inconsistent RAStates.
+# If it was discarded, PointerAuthCFIAnalyzer would report bar as having inconsistent RAStates.
# This is testing the handling of initialRAState on the BinaryFunction.
# CHECK-BOLT-NOT: BOLT-INFO: inconsistent RAStates in function foo
# CHECK-BOLT-NOT: BOLT-INFO: inconsistent RAStates in function bar
# Check that OpNegateRAState CFIs are generated correctly.
-# CHECK-BOLT: Binary Function "foo" after insert-negate-ra-state-pass {
+# CHECK-BOLT: Binary Function "foo" after pointer-auth-cfi-fixup {
# CHECK-BOLT: paciasp
# CHECK-BOLT-NEXT: OpNegateRAState
@@ -23,7 +23,7 @@
# CHECK-BOLT-NEXT: 0: OpNegateRAState
# CHECK-BOLT-NEXT: End of Function "foo"
-# CHECK-BOLT: Binary Function "bar" after insert-negate-ra-state-pass {
+# CHECK-BOLT: Binary Function "bar" after pointer-auth-cfi-fixup {
# CHECK-BOLT: OpNegateRAState
# CHECK-BOLT-NEXT: mov x1, #0x0
# CHECK-BOLT-NEXT: mov x1, #0x1
@@ -37,7 +37,7 @@
# CHECK-BOLT-NEXT: End of Function "bar"
# End of negate-ra-state insertion logs for foo and bar.
-# CHECK: Binary Function "_start" after insert-negate-ra-state-pass {
+# CHECK: Binary Function "_start" after pointer-auth-cfi-fixup {
# Check that the functions are in the new .text section
# RUN: llvm-objdump %t.exe.bolt -d -j .text | FileCheck %s --check-prefix=CHECK-OBJDUMP
diff --git a/bolt/test/AArch64/pacret-split-funcs.s b/bolt/test/AArch64/pacret-split-funcs.s
index 27b34710..45eeb9a 100644
--- a/bolt/test/AArch64/pacret-split-funcs.s
+++ b/bolt/test/AArch64/pacret-split-funcs.s
@@ -11,7 +11,7 @@
# Checking that we don't see any OpNegateRAState CFIs before the insertion pass.
# CHECK-NOT: OpNegateRAState
-# CHECK: Binary Function "foo" after insert-negate-ra-state-pass
+# CHECK: Binary Function "foo" after pointer-auth-cfi-fixup
# CHECK: paciasp
# CHECK-NEXT: OpNegateRAState
@@ -24,7 +24,7 @@
# CHECK-NEXT: OpNegateRAState
# CHECK-NEXT: ret
-# End of the insert-negate-ra-state-pass logs
+# End of the pointer-auth-cfi-fixup logs
# CHECK: Binary Function "foo" after finalize-functions
.text
diff --git a/bolt/test/AArch64/relocation-type-print.s b/bolt/test/AArch64/relocation-type-print.s
new file mode 100644
index 0000000..111cbbb
--- /dev/null
+++ b/bolt/test/AArch64/relocation-type-print.s
@@ -0,0 +1,24 @@
+## Verify that llvm-bolt correctly prints relocation types.
+
+# REQUIRES: system-linux
+
+# RUN: %clang %cflags -nostartfiles %s -o %t.exe -Wl,-q,--no-relax
+# RUN: llvm-bolt %t.exe --print-cfg --print-relocations -o %t.bolt \
+# RUN: | FileCheck %s
+
+ .section .text
+ .align 4
+ .globl _start
+ .type _start, %function
+_start:
+
+ adrp x0, _start
+# CHECK: adrp
+# CHECK-SAME: R_AARCH64_ADR_PREL_PG_HI21
+
+ add x0, x0, :lo12:_start
+# CHECK-NEXT: add
+# CHECK-SAME: R_AARCH64_ADD_ABS_LO12_NC
+
+ ret
+ .size _start, .-_start
diff --git a/bolt/test/AArch64/safe-icf.s b/bolt/test/AArch64/safe-icf.s
new file mode 100644
index 0000000..bca28df
--- /dev/null
+++ b/bolt/test/AArch64/safe-icf.s
@@ -0,0 +1,73 @@
+# Test BOLT can do safe ICF for AArch64.
+
+# REQUIRES: system-linux,asserts
+
+# RUN: %clang %cflags -x assembler-with-cpp %s -o %t1.so -Wl,-q \
+# RUN: -Wl,-z,notext -DREF_BY_NON_CONTROL_FLOW_INSTR
+# RUN: %clang %cflags -x assembler-with-cpp %s -o %t2.so -Wl,-q \
+# RUN: -Wl,-z,notext -DREF_IN_RW_DATA_SEC
+# RUN: %clang %cflags -x assembler-with-cpp %s -o %t3.so -Wl,-q \
+# RUN: -Wl,-z,notext -DNO_DUMMY_TEXT_RELOC
+
+# RUN: llvm-bolt %t1.so -o %t.bolt --no-threads --debug-only=bolt-icf \
+# RUN: --icf=all 2>&1 | FileCheck %s --check-prefix=ICF-ALL
+# RUN: llvm-bolt %t2.so -o %t.bolt --no-threads --debug-only=bolt-icf \
+# RUN: --icf=all 2>&1 | FileCheck %s --check-prefix=ICF-ALL
+# RUN: llvm-bolt %t3.so -o %t.bolt --no-threads --debug-only=bolt-icf \
+# RUN: --icf=all 2>&1 | FileCheck %s --check-prefix=ICF-ALL
+
+# RUN: llvm-bolt %t1.so -o %t.bolt --no-threads --debug-only=bolt-icf \
+# RUN: --icf=safe 2>&1 | FileCheck %s --check-prefix=ICF-SAFE
+# RUN: llvm-bolt %t2.so -o %t.bolt --no-threads --debug-only=bolt-icf \
+# RUN: --icf=safe 2>&1 | FileCheck %s --check-prefix=ICF-SAFE
+
+# RUN: not llvm-bolt %t3.so -o %t.bolt --icf=safe 2>&1 | FileCheck %s \
+# RUN: --check-prefix=ERROR
+
+# ICF-ALL: folding bar into foo
+# ICF-SAFE: skipping function with reference taken foo
+# ERROR: binary built without relocations. Safe ICF is not supported
+
+ .text
+
+ .global foo
+ .type foo, %function
+foo:
+ mov x0, #0x10
+ ret
+
+ .global bar
+ .type bar, %function
+bar:
+ mov x0, #0x10
+ ret
+
+#if defined(REF_IN_RW_DATA_SEC) && !defined(NO_DUMMY_TEXT_RELOC)
+ # Dummy relocation to force relocation mode
+ .reloc 0, R_AARCH64_NONE
+#endif
+
+#if defined(REF_BY_NON_CONTROL_FLOW_INSTR)
+ .global random
+ .type random, %function
+random:
+ adrp x8, foo
+ add x8, x8, :lo12:foo
+ br x8
+#endif
+
+#if defined(REF_IN_RW_DATA_SEC)
+ .data
+ .balign 8
+ .global funcptr
+funcptr:
+ .xword foo
+#endif
+
+ .section .rodata
+ .global _ZTVxx
+ .balign 8
+_ZTVxx:
+ .xword foo
+ .xword bar
+ .size _ZTVxx, .-_ZTVxx
diff --git a/bolt/test/AArch64/validate-branch-target.s b/bolt/test/AArch64/validate-branch-target.s
new file mode 100644
index 0000000..b187fe7
--- /dev/null
+++ b/bolt/test/AArch64/validate-branch-target.s
@@ -0,0 +1,36 @@
+## Test that BOLT errs when detecting the target
+## of a direct call/branch is a invalid instruction
+
+# REQUIRES: system-linux
+# RUN: rm -rf %t && mkdir -p %t && cd %t
+# RUN: llvm-mc -filetype=obj -triple aarch64-unknown-linux %s -o main.o
+# RUN: %clang %cflags %t/main.o -o main.exe -Wl,-q
+# RUN: llvm-bolt %t/main.exe -o %t/main.exe.bolt -lite=0 2>&1 | FileCheck %s --check-prefix=CHECK-TARGETS
+
+# CHECK-TARGETS: BOLT-WARNING: corrupted control flow detected in function external_corrupt: an external branch/call targets an invalid instruction in function external_func at address 0x{{[0-9a-f]+}}; ignoring both functions
+# CHECK-TARGETS: BOLT-WARNING: corrupted control flow detected in function internal_corrupt: an internal branch/call targets an invalid instruction at address 0x{{[0-9a-f]+}}; ignoring this function
+
+
+.globl internal_corrupt
+.type internal_corrupt,@function
+internal_corrupt:
+ b constant_island_0 // targeting the data in code
+constant_island_0:
+ .word 0xffffffff
+.size internal_corrupt,.-internal_corrupt
+
+
+.globl external_corrupt
+.type external_corrupt,@function
+external_corrupt:
+ b constant_island_1 // targeting the data in code externally
+.size external_corrupt,.-external_corrupt
+
+.globl external_func
+.type external_func,@function
+external_func:
+ add x0, x0, x1
+constant_island_1:
+ .word 0xffffffff // data in code
+ ret
+.size external_func,.-external_func
diff --git a/bolt/test/X86/Inputs/dwarf4-str-split-dwarf.s b/bolt/test/X86/Inputs/dwarf4-str-split-dwarf.s
new file mode 100644
index 0000000..cc951b6
--- /dev/null
+++ b/bolt/test/X86/Inputs/dwarf4-str-split-dwarf.s
@@ -0,0 +1,330 @@
+#--- main.s
+# clang++ -g2 -gdwarf-4 -gsplit-dwarf=split -gno-pubnames -S main.cpp
+# extern int getReturn();
+# int main() {
+# return getReturn();
+# }
+ .file "main.cpp"
+ .globl main # -- Begin function main
+ .type main,@function
+main: # @main
+.Lfunc_begin0:
+ .file 1 "." "main.cpp"
+ .loc 1 2 0 # main.cpp:2:0
+ .loc 1 3 10 prologue_end # main.cpp:3:10
+ .loc 1 3 3 epilogue_begin is_stmt 0 # main.cpp:3:3
+ retq
+.Lfunc_end0:
+ .size main, .Lfunc_end0-main
+ .section .debug_abbrev,"",@progbits
+ .byte 1 # Abbreviation Code
+ .byte 17 # DW_TAG_compile_unit
+ .byte 0 # DW_CHILDREN_no
+ .byte 16 # DW_AT_stmt_list
+ .byte 23 # DW_FORM_sec_offset
+ .byte 27 # DW_AT_comp_dir
+ .byte 14 # DW_FORM_strp
+ .ascii "\260B" # DW_AT_GNU_dwo_name
+ .byte 14 # DW_FORM_strp
+ .ascii "\261B" # DW_AT_GNU_dwo_id
+ .byte 7 # DW_FORM_data8
+ .byte 17 # DW_AT_low_pc
+ .byte 1 # DW_FORM_addr
+ .byte 18 # DW_AT_high_pc
+ .byte 6 # DW_FORM_data4
+ .ascii "\263B" # DW_AT_GNU_addr_base
+ .byte 23 # DW_FORM_sec_offset
+ .byte 0 # EOM(1)
+ .byte 0 # EOM(2)
+ .byte 0 # EOM(3)
+ .section .debug_info,"",@progbits
+.Lcu_begin0:
+ .long .Ldebug_info_end0-.Ldebug_info_start0 # Length of Unit
+.Ldebug_info_start0:
+ .short 4 # DWARF version number
+ .long .debug_abbrev # Offset Into Abbrev. Section
+ .byte 8 # Address Size (in bytes)
+ .byte 1 # Abbrev [1] 0xb:0x25 DW_TAG_compile_unit
+ .long .Lline_table_start0 # DW_AT_stmt_list
+ .long .Lskel_string0 # DW_AT_comp_dir
+ .long .Lskel_string1 # DW_AT_GNU_dwo_name
+ .quad -9094791692727444213 # DW_AT_GNU_dwo_id
+ .quad .Lfunc_begin0 # DW_AT_low_pc
+ .long .Lfunc_end0-.Lfunc_begin0 # DW_AT_high_pc
+ .long .Laddr_table_base0 # DW_AT_GNU_addr_base
+.Ldebug_info_end0:
+ .section .debug_str,"MS",@progbits,1
+.Lskel_string0:
+ .asciz "." # string offset=0
+.Lskel_string1:
+ .asciz "main.dwo" # string offset=2
+ .section .debug_str.dwo,"eMS",@progbits,1
+.Linfo_string0:
+ .asciz "main" # string offset=0
+.Linfo_string1:
+ .asciz "int" # string offset=5
+.Linfo_string2:
+ .asciz "clang version 22.0.0" # string offset=9
+.Linfo_string3:
+ .asciz "main.cpp" # string offset=30
+.Linfo_string4:
+ .asciz "main.dwo" # string offset=39
+ .section .debug_str_offsets.dwo,"e",@progbits
+ .long 0
+ .long 5
+ .long 9
+ .long 30
+ .long 39
+ .section .debug_info.dwo,"e",@progbits
+ .long .Ldebug_info_dwo_end0-.Ldebug_info_dwo_start0 # Length of Unit
+.Ldebug_info_dwo_start0:
+ .short 4 # DWARF version number
+ .long 0 # Offset Into Abbrev. Section
+ .byte 8 # Address Size (in bytes)
+ .byte 1 # Abbrev [1] 0xb:0x22 DW_TAG_compile_unit
+ .byte 2 # DW_AT_producer
+ .short 33 # DW_AT_language
+ .byte 3 # DW_AT_name
+ .byte 4 # DW_AT_GNU_dwo_name
+ .quad -9094791692727444213 # DW_AT_GNU_dwo_id
+ .byte 2 # Abbrev [2] 0x19:0xf DW_TAG_subprogram
+ .byte 0 # DW_AT_low_pc
+ .long .Lfunc_end0-.Lfunc_begin0 # DW_AT_high_pc
+ .byte 1 # DW_AT_frame_base
+ .byte 86
+ .byte 0 # DW_AT_name
+ .byte 1 # DW_AT_decl_file
+ .byte 2 # DW_AT_decl_line
+ .long 40 # DW_AT_type
+ # DW_AT_external
+ .byte 3 # Abbrev [3] 0x28:0x4 DW_TAG_base_type
+ .byte 1 # DW_AT_name
+ .byte 5 # DW_AT_encoding
+ .byte 4 # DW_AT_byte_size
+ .byte 0 # End Of Children Mark
+.Ldebug_info_dwo_end0:
+ .section .debug_abbrev.dwo,"e",@progbits
+ .byte 1 # Abbreviation Code
+ .byte 17 # DW_TAG_compile_unit
+ .byte 1 # DW_CHILDREN_yes
+ .byte 37 # DW_AT_producer
+ .ascii "\202>" # DW_FORM_GNU_str_index
+ .byte 19 # DW_AT_language
+ .byte 5 # DW_FORM_data2
+ .byte 3 # DW_AT_name
+ .ascii "\202>" # DW_FORM_GNU_str_index
+ .ascii "\260B" # DW_AT_GNU_dwo_name
+ .ascii "\202>" # DW_FORM_GNU_str_index
+ .ascii "\261B" # DW_AT_GNU_dwo_id
+ .byte 7 # DW_FORM_data8
+ .byte 0 # EOM(1)
+ .byte 0 # EOM(2)
+ .byte 2 # Abbreviation Code
+ .byte 46 # DW_TAG_subprogram
+ .byte 0 # DW_CHILDREN_no
+ .byte 17 # DW_AT_low_pc
+ .ascii "\201>" # DW_FORM_GNU_addr_index
+ .byte 18 # DW_AT_high_pc
+ .byte 6 # DW_FORM_data4
+ .byte 64 # DW_AT_frame_base
+ .byte 24 # DW_FORM_exprloc
+ .byte 3 # DW_AT_name
+ .ascii "\202>" # DW_FORM_GNU_str_index
+ .byte 58 # DW_AT_decl_file
+ .byte 11 # DW_FORM_data1
+ .byte 59 # DW_AT_decl_line
+ .byte 11 # DW_FORM_data1
+ .byte 73 # DW_AT_type
+ .byte 19 # DW_FORM_ref4
+ .byte 63 # DW_AT_external
+ .byte 25 # DW_FORM_flag_present
+ .byte 0 # EOM(1)
+ .byte 0 # EOM(2)
+ .byte 3 # Abbreviation Code
+ .byte 36 # DW_TAG_base_type
+ .byte 0 # DW_CHILDREN_no
+ .byte 3 # DW_AT_name
+ .ascii "\202>" # DW_FORM_GNU_str_index
+ .byte 62 # DW_AT_encoding
+ .byte 11 # DW_FORM_data1
+ .byte 11 # DW_AT_byte_size
+ .byte 11 # DW_FORM_data1
+ .byte 0 # EOM(1)
+ .byte 0 # EOM(2)
+ .byte 0 # EOM(3)
+ .section .debug_addr,"",@progbits
+.Laddr_table_base0:
+ .quad .Lfunc_begin0
+ .ident "clang version 22.0.0"
+ .section ".note.GNU-stack","",@progbits
+ .addrsig
+ .addrsig_sym _Z9getReturnv
+ .section .debug_line,"",@progbits
+.Lline_table_start0:
+#--- helper.s
+# clang++ -g2 -gdwarf-4 -gsplit-dwarf=split -gno-pubnames -S helper.cpp
+# int getReturn() {
+# return 0;
+# }
+ .file "helper.cpp"
+ .globl _Z9getReturnv # -- Begin function _Z9getReturnv
+ .type _Z9getReturnv,@function
+_Z9getReturnv: # @_Z9getReturnv
+.Lfunc_begin0:
+ .file 1 "." "helper.cpp"
+ .loc 1 1 0 # helper.cpp:1:0
+ .loc 1 2 3 prologue_end # helper.cpp:2:3
+ .loc 1 2 3 epilogue_begin is_stmt 0 # helper.cpp:2:3
+ retq
+.Lfunc_end0:
+ .size _Z9getReturnv, .Lfunc_end0-_Z9getReturnv
+ .section .debug_abbrev,"",@progbits
+ .byte 1 # Abbreviation Code
+ .byte 17 # DW_TAG_compile_unit
+ .byte 0 # DW_CHILDREN_no
+ .byte 16 # DW_AT_stmt_list
+ .byte 23 # DW_FORM_sec_offset
+ .byte 27 # DW_AT_comp_dir
+ .byte 14 # DW_FORM_strp
+ .ascii "\260B" # DW_AT_GNU_dwo_name
+ .byte 14 # DW_FORM_strp
+ .ascii "\261B" # DW_AT_GNU_dwo_id
+ .byte 7 # DW_FORM_data8
+ .byte 17 # DW_AT_low_pc
+ .byte 1 # DW_FORM_addr
+ .byte 18 # DW_AT_high_pc
+ .byte 6 # DW_FORM_data4
+ .ascii "\263B" # DW_AT_GNU_addr_base
+ .byte 23 # DW_FORM_sec_offset
+ .byte 0 # EOM(1)
+ .byte 0 # EOM(2)
+ .byte 0 # EOM(3)
+ .section .debug_info,"",@progbits
+.Lcu_begin0:
+ .long .Ldebug_info_end0-.Ldebug_info_start0 # Length of Unit
+.Ldebug_info_start0:
+ .short 4 # DWARF version number
+ .long .debug_abbrev # Offset Into Abbrev. Section
+ .byte 8 # Address Size (in bytes)
+ .byte 1 # Abbrev [1] 0xb:0x25 DW_TAG_compile_unit
+ .long .Lline_table_start0 # DW_AT_stmt_list
+ .long .Lskel_string0 # DW_AT_comp_dir
+ .long .Lskel_string1 # DW_AT_GNU_dwo_name
+ .quad 5976014880088676049 # DW_AT_GNU_dwo_id
+ .quad .Lfunc_begin0 # DW_AT_low_pc
+ .long .Lfunc_end0-.Lfunc_begin0 # DW_AT_high_pc
+ .long .Laddr_table_base0 # DW_AT_GNU_addr_base
+.Ldebug_info_end0:
+ .section .debug_str,"MS",@progbits,1
+.Lskel_string0:
+ .asciz "." # string offset=0
+.Lskel_string1:
+ .asciz "helper.dwo" # string offset=2
+ .section .debug_str.dwo,"eMS",@progbits,1
+.Linfo_string0:
+ .asciz "_Z9getReturnv" # string offset=0
+.Linfo_string1:
+ .asciz "getReturn" # string offset=14
+.Linfo_string2:
+ .asciz "int" # string offset=24
+.Linfo_string3:
+ .asciz "clang version 22.0.0" # string offset=28
+.Linfo_string4:
+ .asciz "helper.cpp" # string offset=49
+.Linfo_string5:
+ .asciz "helper.dwo" # string offset=60
+ .section .debug_str_offsets.dwo,"e",@progbits
+ .long 0
+ .long 14
+ .long 24
+ .long 28
+ .long 49
+ .long 60
+ .section .debug_info.dwo,"e",@progbits
+ .long .Ldebug_info_dwo_end0-.Ldebug_info_dwo_start0 # Length of Unit
+.Ldebug_info_dwo_start0:
+ .short 4 # DWARF version number
+ .long 0 # Offset Into Abbrev. Section
+ .byte 8 # Address Size (in bytes)
+ .byte 1 # Abbrev [1] 0xb:0x23 DW_TAG_compile_unit
+ .byte 3 # DW_AT_producer
+ .short 33 # DW_AT_language
+ .byte 4 # DW_AT_name
+ .byte 5 # DW_AT_GNU_dwo_name
+ .quad 5976014880088676049 # DW_AT_GNU_dwo_id
+ .byte 2 # Abbrev [2] 0x19:0x10 DW_TAG_subprogram
+ .byte 0 # DW_AT_low_pc
+ .long .Lfunc_end0-.Lfunc_begin0 # DW_AT_high_pc
+ .byte 1 # DW_AT_frame_base
+ .byte 86
+ .byte 0 # DW_AT_linkage_name
+ .byte 1 # DW_AT_name
+ .byte 1 # DW_AT_decl_file
+ .byte 1 # DW_AT_decl_line
+ .long 41 # DW_AT_type
+ # DW_AT_external
+ .byte 3 # Abbrev [3] 0x29:0x4 DW_TAG_base_type
+ .byte 2 # DW_AT_name
+ .byte 5 # DW_AT_encoding
+ .byte 4 # DW_AT_byte_size
+ .byte 0 # End Of Children Mark
+.Ldebug_info_dwo_end0:
+ .section .debug_abbrev.dwo,"e",@progbits
+ .byte 1 # Abbreviation Code
+ .byte 17 # DW_TAG_compile_unit
+ .byte 1 # DW_CHILDREN_yes
+ .byte 37 # DW_AT_producer
+ .ascii "\202>" # DW_FORM_GNU_str_index
+ .byte 19 # DW_AT_language
+ .byte 5 # DW_FORM_data2
+ .byte 3 # DW_AT_name
+ .ascii "\202>" # DW_FORM_GNU_str_index
+ .ascii "\260B" # DW_AT_GNU_dwo_name
+ .ascii "\202>" # DW_FORM_GNU_str_index
+ .ascii "\261B" # DW_AT_GNU_dwo_id
+ .byte 7 # DW_FORM_data8
+ .byte 0 # EOM(1)
+ .byte 0 # EOM(2)
+ .byte 2 # Abbreviation Code
+ .byte 46 # DW_TAG_subprogram
+ .byte 0 # DW_CHILDREN_no
+ .byte 17 # DW_AT_low_pc
+ .ascii "\201>" # DW_FORM_GNU_addr_index
+ .byte 18 # DW_AT_high_pc
+ .byte 6 # DW_FORM_data4
+ .byte 64 # DW_AT_frame_base
+ .byte 24 # DW_FORM_exprloc
+ .byte 110 # DW_AT_linkage_name
+ .ascii "\202>" # DW_FORM_GNU_str_index
+ .byte 3 # DW_AT_name
+ .ascii "\202>" # DW_FORM_GNU_str_index
+ .byte 58 # DW_AT_decl_file
+ .byte 11 # DW_FORM_data1
+ .byte 59 # DW_AT_decl_line
+ .byte 11 # DW_FORM_data1
+ .byte 73 # DW_AT_type
+ .byte 19 # DW_FORM_ref4
+ .byte 63 # DW_AT_external
+ .byte 25 # DW_FORM_flag_present
+ .byte 0 # EOM(1)
+ .byte 0 # EOM(2)
+ .byte 3 # Abbreviation Code
+ .byte 36 # DW_TAG_base_type
+ .byte 0 # DW_CHILDREN_no
+ .byte 3 # DW_AT_name
+ .ascii "\202>" # DW_FORM_GNU_str_index
+ .byte 62 # DW_AT_encoding
+ .byte 11 # DW_FORM_data1
+ .byte 11 # DW_AT_byte_size
+ .byte 11 # DW_FORM_data1
+ .byte 0 # EOM(1)
+ .byte 0 # EOM(2)
+ .byte 0 # EOM(3)
+ .section .debug_addr,"",@progbits
+.Laddr_table_base0:
+ .quad .Lfunc_begin0
+ .ident "clang version 22.0.0"
+ .section ".note.GNU-stack","",@progbits
+ .addrsig
+ .section .debug_line,"",@progbits
+.Lline_table_start0:
diff --git a/bolt/test/X86/Inputs/dwarf5-str-split-dwarf.s b/bolt/test/X86/Inputs/dwarf5-str-split-dwarf.s
new file mode 100644
index 0000000..5e938ea
--- /dev/null
+++ b/bolt/test/X86/Inputs/dwarf5-str-split-dwarf.s
@@ -0,0 +1,368 @@
+#--- main.s
+# clang++ -g2 -gdwarf-5 -gsplit-dwarf=split -gno-pubnames -S main.cpp
+# extern int getReturn();
+# int main() {
+# return getReturn();
+# }
+ .file "main.cpp"
+ .globl main # -- Begin function main
+ .type main,@function
+main: # @main
+.Lfunc_begin0:
+ .file 0 "." "main.cpp" md5 0x9cdef858e26cf684ed9ef3b60e05bdad
+ .loc 0 2 0 # main.cpp:2:0
+ .loc 0 3 10 prologue_end # main.cpp:3:10
+ .loc 0 3 3 epilogue_begin is_stmt 0 # main.cpp:3:3
+ retq
+.Lfunc_end0:
+ .size main, .Lfunc_end0-main
+ .section .debug_abbrev,"",@progbits
+ .byte 1 # Abbreviation Code
+ .byte 74 # DW_TAG_skeleton_unit
+ .byte 0 # DW_CHILDREN_no
+ .byte 16 # DW_AT_stmt_list
+ .byte 23 # DW_FORM_sec_offset
+ .byte 114 # DW_AT_str_offsets_base
+ .byte 23 # DW_FORM_sec_offset
+ .byte 27 # DW_AT_comp_dir
+ .byte 37 # DW_FORM_strx1
+ .byte 118 # DW_AT_dwo_name
+ .byte 37 # DW_FORM_strx1
+ .byte 17 # DW_AT_low_pc
+ .byte 27 # DW_FORM_addrx
+ .byte 18 # DW_AT_high_pc
+ .byte 6 # DW_FORM_data4
+ .byte 115 # DW_AT_addr_base
+ .byte 23 # DW_FORM_sec_offset
+ .byte 0 # EOM(1)
+ .byte 0 # EOM(2)
+ .byte 0 # EOM(3)
+ .section .debug_info,"",@progbits
+.Lcu_begin0:
+ .long .Ldebug_info_end0-.Ldebug_info_start0 # Length of Unit
+.Ldebug_info_start0:
+ .short 5 # DWARF version number
+ .byte 4 # DWARF Unit Type
+ .byte 8 # Address Size (in bytes)
+ .long .debug_abbrev # Offset Into Abbrev. Section
+ .quad -9094791692727444213
+ .byte 1 # Abbrev [1] 0x14:0x14 DW_TAG_skeleton_unit
+ .long .Lline_table_start0 # DW_AT_stmt_list
+ .long .Lstr_offsets_base0 # DW_AT_str_offsets_base
+ .byte 0 # DW_AT_comp_dir
+ .byte 1 # DW_AT_dwo_name
+ .byte 0 # DW_AT_low_pc
+ .long .Lfunc_end0-.Lfunc_begin0 # DW_AT_high_pc
+ .long .Laddr_table_base0 # DW_AT_addr_base
+.Ldebug_info_end0:
+ .section .debug_str_offsets,"",@progbits
+ .long 12 # Length of String Offsets Set
+ .short 5
+ .short 0
+.Lstr_offsets_base0:
+ .section .debug_str,"MS",@progbits,1
+.Lskel_string0:
+ .asciz "." # string offset=0
+.Lskel_string1:
+ .asciz "main.dwo" # string offset=2
+ .section .debug_str_offsets,"",@progbits
+ .long .Lskel_string0
+ .long .Lskel_string1
+ .section .debug_str_offsets.dwo,"e",@progbits
+ .long 24 # Length of String Offsets Set
+ .short 5
+ .short 0
+ .section .debug_str.dwo,"eMS",@progbits,1
+.Linfo_string0:
+ .asciz "main" # string offset=0
+.Linfo_string1:
+ .asciz "int" # string offset=5
+.Linfo_string2:
+ .asciz "clang version 22.0.0" # string offset=9
+.Linfo_string3:
+ .asciz "main.cpp" # string offset=30
+.Linfo_string4:
+ .asciz "main.dwo" # string offset=39
+ .section .debug_str_offsets.dwo,"e",@progbits
+ .long 0
+ .long 5
+ .long 9
+ .long 30
+ .long 39
+ .section .debug_info.dwo,"e",@progbits
+ .long .Ldebug_info_dwo_end0-.Ldebug_info_dwo_start0 # Length of Unit
+.Ldebug_info_dwo_start0:
+ .short 5 # DWARF version number
+ .byte 5 # DWARF Unit Type
+ .byte 8 # Address Size (in bytes)
+ .long 0 # Offset Into Abbrev. Section
+ .quad -9094791692727444213
+ .byte 1 # Abbrev [1] 0x14:0x1a DW_TAG_compile_unit
+ .byte 2 # DW_AT_producer
+ .short 33 # DW_AT_language
+ .byte 3 # DW_AT_name
+ .byte 4 # DW_AT_dwo_name
+ .byte 2 # Abbrev [2] 0x1a:0xf DW_TAG_subprogram
+ .byte 0 # DW_AT_low_pc
+ .long .Lfunc_end0-.Lfunc_begin0 # DW_AT_high_pc
+ .byte 1 # DW_AT_frame_base
+ .byte 86
+ .byte 0 # DW_AT_name
+ .byte 0 # DW_AT_decl_file
+ .byte 2 # DW_AT_decl_line
+ .long 41 # DW_AT_type
+ # DW_AT_external
+ .byte 3 # Abbrev [3] 0x29:0x4 DW_TAG_base_type
+ .byte 1 # DW_AT_name
+ .byte 5 # DW_AT_encoding
+ .byte 4 # DW_AT_byte_size
+ .byte 0 # End Of Children Mark
+.Ldebug_info_dwo_end0:
+ .section .debug_abbrev.dwo,"e",@progbits
+ .byte 1 # Abbreviation Code
+ .byte 17 # DW_TAG_compile_unit
+ .byte 1 # DW_CHILDREN_yes
+ .byte 37 # DW_AT_producer
+ .byte 37 # DW_FORM_strx1
+ .byte 19 # DW_AT_language
+ .byte 5 # DW_FORM_data2
+ .byte 3 # DW_AT_name
+ .byte 37 # DW_FORM_strx1
+ .byte 118 # DW_AT_dwo_name
+ .byte 37 # DW_FORM_strx1
+ .byte 0 # EOM(1)
+ .byte 0 # EOM(2)
+ .byte 2 # Abbreviation Code
+ .byte 46 # DW_TAG_subprogram
+ .byte 0 # DW_CHILDREN_no
+ .byte 17 # DW_AT_low_pc
+ .byte 27 # DW_FORM_addrx
+ .byte 18 # DW_AT_high_pc
+ .byte 6 # DW_FORM_data4
+ .byte 64 # DW_AT_frame_base
+ .byte 24 # DW_FORM_exprloc
+ .byte 3 # DW_AT_name
+ .byte 37 # DW_FORM_strx1
+ .byte 58 # DW_AT_decl_file
+ .byte 11 # DW_FORM_data1
+ .byte 59 # DW_AT_decl_line
+ .byte 11 # DW_FORM_data1
+ .byte 73 # DW_AT_type
+ .byte 19 # DW_FORM_ref4
+ .byte 63 # DW_AT_external
+ .byte 25 # DW_FORM_flag_present
+ .byte 0 # EOM(1)
+ .byte 0 # EOM(2)
+ .byte 3 # Abbreviation Code
+ .byte 36 # DW_TAG_base_type
+ .byte 0 # DW_CHILDREN_no
+ .byte 3 # DW_AT_name
+ .byte 37 # DW_FORM_strx1
+ .byte 62 # DW_AT_encoding
+ .byte 11 # DW_FORM_data1
+ .byte 11 # DW_AT_byte_size
+ .byte 11 # DW_FORM_data1
+ .byte 0 # EOM(1)
+ .byte 0 # EOM(2)
+ .byte 0 # EOM(3)
+ .section .debug_addr,"",@progbits
+ .long .Ldebug_addr_end0-.Ldebug_addr_start0 # Length of contribution
+.Ldebug_addr_start0:
+ .short 5 # DWARF version number
+ .byte 8 # Address size
+ .byte 0 # Segment selector size
+.Laddr_table_base0:
+ .quad .Lfunc_begin0
+.Ldebug_addr_end0:
+ .ident "clang version 22.0.0"
+ .section ".note.GNU-stack","",@progbits
+ .addrsig
+ .addrsig_sym _Z9getReturnv
+ .section .debug_line,"",@progbits
+.Lline_table_start0:
+#--- helper.s
+# clang++ -g2 -gdwarf-5 -gsplit-dwarf=split -gno-pubnames -S helper.cpp
+# int getReturn() {
+# return 0;
+# }
+ .file "helper.cpp"
+ .globl _Z9getReturnv # -- Begin function _Z9getReturnv
+ .type _Z9getReturnv,@function
+_Z9getReturnv: # @_Z9getReturnv
+.Lfunc_begin0:
+ .file 0 "." "helper.cpp" md5 0xc7d7879297b54325c71b3e0cfbb65e2d
+ .loc 0 1 0 # helper.cpp:1:0
+ .loc 0 2 3 prologue_end # helper.cpp:2:3
+ .loc 0 2 3 epilogue_begin is_stmt 0 # helper.cpp:2:3
+ retq
+.Lfunc_end0:
+ .size _Z9getReturnv, .Lfunc_end0-_Z9getReturnv
+ .section .debug_abbrev,"",@progbits
+ .byte 1 # Abbreviation Code
+ .byte 74 # DW_TAG_skeleton_unit
+ .byte 0 # DW_CHILDREN_no
+ .byte 16 # DW_AT_stmt_list
+ .byte 23 # DW_FORM_sec_offset
+ .byte 114 # DW_AT_str_offsets_base
+ .byte 23 # DW_FORM_sec_offset
+ .byte 27 # DW_AT_comp_dir
+ .byte 37 # DW_FORM_strx1
+ .byte 118 # DW_AT_dwo_name
+ .byte 37 # DW_FORM_strx1
+ .byte 17 # DW_AT_low_pc
+ .byte 27 # DW_FORM_addrx
+ .byte 18 # DW_AT_high_pc
+ .byte 6 # DW_FORM_data4
+ .byte 115 # DW_AT_addr_base
+ .byte 23 # DW_FORM_sec_offset
+ .byte 0 # EOM(1)
+ .byte 0 # EOM(2)
+ .byte 0 # EOM(3)
+ .section .debug_info,"",@progbits
+.Lcu_begin0:
+ .long .Ldebug_info_end0-.Ldebug_info_start0 # Length of Unit
+.Ldebug_info_start0:
+ .short 5 # DWARF version number
+ .byte 4 # DWARF Unit Type
+ .byte 8 # Address Size (in bytes)
+ .long .debug_abbrev # Offset Into Abbrev. Section
+ .quad 5976014880088676049
+ .byte 1 # Abbrev [1] 0x14:0x14 DW_TAG_skeleton_unit
+ .long .Lline_table_start0 # DW_AT_stmt_list
+ .long .Lstr_offsets_base0 # DW_AT_str_offsets_base
+ .byte 0 # DW_AT_comp_dir
+ .byte 1 # DW_AT_dwo_name
+ .byte 0 # DW_AT_low_pc
+ .long .Lfunc_end0-.Lfunc_begin0 # DW_AT_high_pc
+ .long .Laddr_table_base0 # DW_AT_addr_base
+.Ldebug_info_end0:
+ .section .debug_str_offsets,"",@progbits
+ .long 12 # Length of String Offsets Set
+ .short 5
+ .short 0
+.Lstr_offsets_base0:
+ .section .debug_str,"MS",@progbits,1
+.Lskel_string0:
+ .asciz "." # string offset=0
+.Lskel_string1:
+ .asciz "helper.dwo" # string offset=2
+ .section .debug_str_offsets,"",@progbits
+ .long .Lskel_string0
+ .long .Lskel_string1
+ .section .debug_str_offsets.dwo,"e",@progbits
+ .long 28 # Length of String Offsets Set
+ .short 5
+ .short 0
+ .section .debug_str.dwo,"eMS",@progbits,1
+.Linfo_string0:
+ .asciz "_Z9getReturnv" # string offset=0
+.Linfo_string1:
+ .asciz "getReturn" # string offset=14
+.Linfo_string2:
+ .asciz "int" # string offset=24
+.Linfo_string3:
+ .asciz "clang version 22.0.0" # string offset=28
+.Linfo_string4:
+ .asciz "helper.cpp" # string offset=49
+.Linfo_string5:
+ .asciz "helper.dwo" # string offset=60
+ .section .debug_str_offsets.dwo,"e",@progbits
+ .long 0
+ .long 14
+ .long 24
+ .long 28
+ .long 49
+ .long 60
+ .section .debug_info.dwo,"e",@progbits
+ .long .Ldebug_info_dwo_end0-.Ldebug_info_dwo_start0 # Length of Unit
+.Ldebug_info_dwo_start0:
+ .short 5 # DWARF version number
+ .byte 5 # DWARF Unit Type
+ .byte 8 # Address Size (in bytes)
+ .long 0 # Offset Into Abbrev. Section
+ .quad 5976014880088676049
+ .byte 1 # Abbrev [1] 0x14:0x1b DW_TAG_compile_unit
+ .byte 3 # DW_AT_producer
+ .short 33 # DW_AT_language
+ .byte 4 # DW_AT_name
+ .byte 5 # DW_AT_dwo_name
+ .byte 2 # Abbrev [2] 0x1a:0x10 DW_TAG_subprogram
+ .byte 0 # DW_AT_low_pc
+ .long .Lfunc_end0-.Lfunc_begin0 # DW_AT_high_pc
+ .byte 1 # DW_AT_frame_base
+ .byte 86
+ .byte 0 # DW_AT_linkage_name
+ .byte 1 # DW_AT_name
+ .byte 0 # DW_AT_decl_file
+ .byte 1 # DW_AT_decl_line
+ .long 42 # DW_AT_type
+ # DW_AT_external
+ .byte 3 # Abbrev [3] 0x2a:0x4 DW_TAG_base_type
+ .byte 2 # DW_AT_name
+ .byte 5 # DW_AT_encoding
+ .byte 4 # DW_AT_byte_size
+ .byte 0 # End Of Children Mark
+.Ldebug_info_dwo_end0:
+ .section .debug_abbrev.dwo,"e",@progbits
+ .byte 1 # Abbreviation Code
+ .byte 17 # DW_TAG_compile_unit
+ .byte 1 # DW_CHILDREN_yes
+ .byte 37 # DW_AT_producer
+ .byte 37 # DW_FORM_strx1
+ .byte 19 # DW_AT_language
+ .byte 5 # DW_FORM_data2
+ .byte 3 # DW_AT_name
+ .byte 37 # DW_FORM_strx1
+ .byte 118 # DW_AT_dwo_name
+ .byte 37 # DW_FORM_strx1
+ .byte 0 # EOM(1)
+ .byte 0 # EOM(2)
+ .byte 2 # Abbreviation Code
+ .byte 46 # DW_TAG_subprogram
+ .byte 0 # DW_CHILDREN_no
+ .byte 17 # DW_AT_low_pc
+ .byte 27 # DW_FORM_addrx
+ .byte 18 # DW_AT_high_pc
+ .byte 6 # DW_FORM_data4
+ .byte 64 # DW_AT_frame_base
+ .byte 24 # DW_FORM_exprloc
+ .byte 110 # DW_AT_linkage_name
+ .byte 37 # DW_FORM_strx1
+ .byte 3 # DW_AT_name
+ .byte 37 # DW_FORM_strx1
+ .byte 58 # DW_AT_decl_file
+ .byte 11 # DW_FORM_data1
+ .byte 59 # DW_AT_decl_line
+ .byte 11 # DW_FORM_data1
+ .byte 73 # DW_AT_type
+ .byte 19 # DW_FORM_ref4
+ .byte 63 # DW_AT_external
+ .byte 25 # DW_FORM_flag_present
+ .byte 0 # EOM(1)
+ .byte 0 # EOM(2)
+ .byte 3 # Abbreviation Code
+ .byte 36 # DW_TAG_base_type
+ .byte 0 # DW_CHILDREN_no
+ .byte 3 # DW_AT_name
+ .byte 37 # DW_FORM_strx1
+ .byte 62 # DW_AT_encoding
+ .byte 11 # DW_FORM_data1
+ .byte 11 # DW_AT_byte_size
+ .byte 11 # DW_FORM_data1
+ .byte 0 # EOM(1)
+ .byte 0 # EOM(2)
+ .byte 0 # EOM(3)
+ .section .debug_addr,"",@progbits
+ .long .Ldebug_addr_end0-.Ldebug_addr_start0 # Length of contribution
+.Ldebug_addr_start0:
+ .short 5 # DWARF version number
+ .byte 8 # Address size
+ .byte 0 # Segment selector size
+.Laddr_table_base0:
+ .quad .Lfunc_begin0
+.Ldebug_addr_end0:
+ .ident "clang version 22.0.0"
+ .section ".note.GNU-stack","",@progbits
+ .addrsig
+ .section .debug_line,"",@progbits
+.Lline_table_start0:
diff --git a/bolt/test/X86/callcont-fallthru.s b/bolt/test/X86/callcont-fallthru.s
index 8c05491..ef0bb55 100644
--- a/bolt/test/X86/callcont-fallthru.s
+++ b/bolt/test/X86/callcont-fallthru.s
@@ -15,6 +15,8 @@
# External return to a landing pad/entry point call continuation
# RUN: link_fdata %s %t %t.pa-eret PREAGG-ERET
# RUN-DISABLED: link_fdata %s %t %t.pa-plt PREAGG-PLT
+## Fall-through imputing test cases
+# RUN: link_fdata %s %t %t.pa-imp PREAGG-IMP
# RUN: llvm-strip --strip-unneeded %t -o %t.strip
# RUN: llvm-objcopy --remove-section=.eh_frame %t.strip %t.noeh
@@ -63,6 +65,11 @@
# RUN-DISABLED: --check-prefix=CHECK-PLT
# CHECK-PLT: traces mismatching disassembled function contents: 0
+## Check --impute-trace-fall-throughs accepting duplicate branch-only traces
+# RUN: perf2bolt %t --pa -p %t.pa-imp -o %t.pa-imp.fdata --impute-trace-fall-through
+# RUN: FileCheck %s --check-prefix=CHECK-IMP --input-file %t.pa-imp.fdata
+# CHECK-IMP: 0 [unknown] 0 1 main {{.*}} 0 3
+
.globl foo
.type foo, %function
foo:
@@ -102,6 +109,8 @@ Ltmp1:
Ltmp4:
cmpl $0x0, -0x14(%rbp)
+# PREAGG-IMP: B X:0 #Ltmp4_br# 1 0
+# PREAGG-IMP: B X:0 #Ltmp4_br# 2 0
Ltmp4_br:
je Ltmp0
diff --git a/bolt/test/X86/dwarf4-ftypes-dwp-input-dwp-output.test b/bolt/test/X86/dwarf4-ftypes-dwp-input-dwp-output.test
index 673e86b..a08e352 100644
--- a/bolt/test/X86/dwarf4-ftypes-dwp-input-dwp-output.test
+++ b/bolt/test/X86/dwarf4-ftypes-dwp-input-dwp-output.test
@@ -1,4 +1,4 @@
-# UNSUPPORTED: true
+# REQUIRES: system-linux
; RUN: rm -rf %t
; RUN: mkdir %t
; RUN: cd %t
@@ -8,7 +8,8 @@
; RUN: llvm-dwp -e main.exe -o main.exe.dwp
; RUN: llvm-dwarfdump --show-form --verbose --debug-types main.exe.dwp | FileCheck -check-prefix=PRE-BOLT %s
; RUN: llvm-dwarfdump --show-form --verbose --debug-tu-index main.exe.dwp | FileCheck -check-prefix=PRE-BOLT-DWP-TU-INDEX %s
-; RUN: llvm-bolt main.exe -o main.exe.bolt --update-debug-sections --write-dwp
+; RUN: llvm-bolt main.exe -o main.exe.bolt --update-debug-sections
+; RUN: llvm-dwp -e main.exe.bolt -o main.exe.bolt.dwp
; RUN: llvm-dwarfdump --show-form --verbose --debug-types main.exe.bolt.dwp | FileCheck -check-prefix=BOLT %s
; RUN: llvm-dwarfdump --show-form --verbose --debug-tu-index main.exe.bolt.dwp | FileCheck -check-prefix=BOLT-DWP-TU-INDEX %s
diff --git a/bolt/test/X86/dwarf4-str-dwp-input-dwo-output.test b/bolt/test/X86/dwarf4-str-dwp-input-dwo-output.test
new file mode 100644
index 0000000..a0e8721
--- /dev/null
+++ b/bolt/test/X86/dwarf4-str-dwp-input-dwo-output.test
@@ -0,0 +1,76 @@
+; RUN: split-file %p/Inputs/dwarf4-str-split-dwarf.s %t
+; RUN: cd %t
+; RUN: llvm-mc --split-dwarf-file=main.dwo --triple=x86_64-unknown-linux-gnu \
+; RUN: --filetype=obj main.s -o=main.o
+; RUN: llvm-mc --split-dwarf-file=helper.dwo --triple=x86_64-unknown-linux-gnu \
+; RUN: --filetype=obj helper.s -o=helper.o
+; RUN: %clang %cflags -gdwarf-4 -gsplit-dwarf=split main.o helper.o -o main.exe
+; RUN: llvm-dwp -e main.exe -o main.exe.dwp
+; RUN: llvm-dwarfdump --show-form --verbose --debug-str main.exe.dwp \
+; RUN: | FileCheck -check-prefix=PRE-BOLT-STR %s
+; RUN: llvm-dwarfdump --show-form --verbose --debug-str-offsets main.exe.dwp \
+; RUN: | FileCheck -check-prefix=PRE-BOLT-STR-OFFSETS %s
+; RUN: llvm-bolt main.exe -o main.exe.bolt --update-debug-sections
+; RUN: llvm-dwarfdump --show-form --verbose --debug-str main.dwo.dwo \
+; RUN: | FileCheck -check-prefix=BOLT-MAIN-STR %s
+; RUN: llvm-dwarfdump --show-form --verbose --debug-str-offsets main.dwo.dwo \
+; RUN: | FileCheck -check-prefix=BOLT-MAIN-STR-OFFSETS %s
+; RUN: llvm-dwarfdump --show-form --verbose --debug-str helper.dwo.dwo \
+; RUN: | FileCheck -check-prefix=BOLT-HELPER-STR %s
+; RUN: llvm-dwarfdump --show-form --verbose --debug-str-offsets helper.dwo.dwo \
+; RUN: | FileCheck -check-prefix=BOLT-HELPER-STR-OFFSETS %s
+
+;; For DWARF4, this test checks that strings are split correctly from a combined
+;; section in DWP file, into appropriate .dwo files.
+
+; PRE-BOLT-STR: 0x00000000: "main"
+; PRE-BOLT-STR: 0x00000005: "int"
+; PRE-BOLT-STR: 0x00000009: "clang version 22.0.0"
+; PRE-BOLT-STR: 0x0000001e: "main.cpp"
+; PRE-BOLT-STR: 0x00000027: "main.dwo"
+; PRE-BOLT-STR: 0x00000030: "_Z9getReturnv"
+; PRE-BOLT-STR: 0x0000003e: "getReturn"
+; PRE-BOLT-STR: 0x00000048: "helper.cpp"
+; PRE-BOLT-STR: 0x00000053: "helper.dwo"
+
+; PRE-BOLT-STR-OFFSETS: 0x00000000: Contribution size = 20, Format = DWARF32, Version = 4
+; PRE-BOLT-STR-OFFSETS: 0x00000000: 00000000 "main"
+; PRE-BOLT-STR-OFFSETS: 0x00000004: 00000005 "int"
+; PRE-BOLT-STR-OFFSETS: 0x00000008: 00000009 "clang version 22.0.0"
+; PRE-BOLT-STR-OFFSETS: 0x0000000c: 0000001e "main.cpp"
+; PRE-BOLT-STR-OFFSETS: 0x00000010: 00000027 "main.dwo"
+; PRE-BOLT-STR-OFFSETS: 0x00000014: Contribution size = 24, Format = DWARF32, Version = 4
+; PRE-BOLT-STR-OFFSETS: 0x00000014: 00000030 "_Z9getReturnv"
+; PRE-BOLT-STR-OFFSETS: 0x00000018: 0000003e "getReturn"
+; PRE-BOLT-STR-OFFSETS: 0x0000001c: 00000005 "int"
+; PRE-BOLT-STR-OFFSETS: 0x00000020: 00000009 "clang version 22.0.0"
+; PRE-BOLT-STR-OFFSETS: 0x00000024: 00000048 "helper.cpp"
+; PRE-BOLT-STR-OFFSETS: 0x00000028: 00000053 "helper.dwo"
+
+; BOLT-MAIN-STR: 0x00000000: "main"
+; BOLT-MAIN-STR: 0x00000005: "int"
+; BOLT-MAIN-STR: 0x00000009: "clang version 22.0.0"
+; BOLT-MAIN-STR: 0x0000001e: "main.cpp"
+; BOLT-MAIN-STR: 0x00000027: "main.dwo"
+
+; BOLT-MAIN-STR-OFFSETS: 0x00000000: Contribution size = 20, Format = DWARF32, Version = 4
+; BOLT-MAIN-STR-OFFSETS: 0x00000000: 00000000 "main"
+; BOLT-MAIN-STR-OFFSETS: 0x00000004: 00000005 "int"
+; BOLT-MAIN-STR-OFFSETS: 0x00000008: 00000009 "clang version 22.0.0"
+; BOLT-MAIN-STR-OFFSETS: 0x0000000c: 0000001e "main.cpp"
+; BOLT-MAIN-STR-OFFSETS: 0x00000010: 00000027 "main.dwo"
+
+; BOLT-HELPER-STR: 0x00000000: "_Z9getReturnv"
+; BOLT-HELPER-STR: 0x0000000e: "getReturn"
+; BOLT-HELPER-STR: 0x00000018: "int"
+; BOLT-HELPER-STR: 0x0000001c: "clang version 22.0.0"
+; BOLT-HELPER-STR: 0x00000031: "helper.cpp"
+; BOLT-HELPER-STR: 0x0000003c: "helper.dwo"
+
+; BOLT-HELPER-STR-OFFSETS: 0x00000000: Contribution size = 24, Format = DWARF32, Version = 4
+; BOLT-HELPER-STR-OFFSETS: 0x00000000: 00000000 "_Z9getReturnv"
+; BOLT-HELPER-STR-OFFSETS: 0x00000004: 0000000e "getReturn"
+; BOLT-HELPER-STR-OFFSETS: 0x00000008: 00000018 "int"
+; BOLT-HELPER-STR-OFFSETS: 0x0000000c: 0000001c "clang version 22.0.0"
+; BOLT-HELPER-STR-OFFSETS: 0x00000010: 00000031 "helper.cpp"
+; BOLT-HELPER-STR-OFFSETS: 0x00000014: 0000003c "helper.dwo"
diff --git a/bolt/test/X86/dwarf5-str-dwp-input-dwo-output.test b/bolt/test/X86/dwarf5-str-dwp-input-dwo-output.test
new file mode 100644
index 0000000..2e72c6a
--- /dev/null
+++ b/bolt/test/X86/dwarf5-str-dwp-input-dwo-output.test
@@ -0,0 +1,76 @@
+; RUN: split-file %p/Inputs/dwarf5-str-split-dwarf.s %t
+; RUN: cd %t
+; RUN: llvm-mc --split-dwarf-file=main.dwo --triple=x86_64-unknown-linux-gnu \
+; RUN: --filetype=obj main.s -o=main.o
+; RUN: llvm-mc --split-dwarf-file=helper.dwo --triple=x86_64-unknown-linux-gnu \
+; RUN: --filetype=obj helper.s -o=helper.o
+; RUN: %clang %cflags -gdwarf-4 -gsplit-dwarf=split main.o helper.o -o main.exe
+; RUN: llvm-dwp -e main.exe -o main.exe.dwp
+; RUN: llvm-dwarfdump --show-form --verbose --debug-str main.exe.dwp \
+; RUN: | FileCheck -check-prefix=PRE-BOLT-STR %s
+; RUN: llvm-dwarfdump --show-form --verbose --debug-str-offsets main.exe.dwp \
+; RUN: | FileCheck -check-prefix=PRE-BOLT-STR-OFFSETS %s
+; RUN: llvm-bolt main.exe -o main.exe.bolt --update-debug-sections
+; RUN: llvm-dwarfdump --show-form --verbose --debug-str main.dwo.dwo \
+; RUN: | FileCheck -check-prefix=BOLT-MAIN-STR %s
+; RUN: llvm-dwarfdump --show-form --verbose --debug-str-offsets main.dwo.dwo \
+; RUN: | FileCheck -check-prefix=BOLT-MAIN-STR-OFFSETS %s
+; RUN: llvm-dwarfdump --show-form --verbose --debug-str helper.dwo.dwo \
+; RUN: | FileCheck -check-prefix=BOLT-HELPER-STR %s
+; RUN: llvm-dwarfdump --show-form --verbose --debug-str-offsets helper.dwo.dwo \
+; RUN: | FileCheck -check-prefix=BOLT-HELPER-STR-OFFSETS %s
+
+;; For DWARF5, this test checks that strings are split correctly from a combined
+;; section in DWP file, into appropriate .dwo files.
+
+; PRE-BOLT-STR: 0x00000000: "main"
+; PRE-BOLT-STR: 0x00000005: "int"
+; PRE-BOLT-STR: 0x00000009: "clang version 22.0.0"
+; PRE-BOLT-STR: 0x0000001e: "main.cpp"
+; PRE-BOLT-STR: 0x00000027: "main.dwo"
+; PRE-BOLT-STR: 0x00000030: "_Z9getReturnv"
+; PRE-BOLT-STR: 0x0000003e: "getReturn"
+; PRE-BOLT-STR: 0x00000048: "helper.cpp"
+; PRE-BOLT-STR: 0x00000053: "helper.dwo"
+
+; PRE-BOLT-STR-OFFSETS: 0x00000000: Contribution size = 24, Format = DWARF32, Version = 5
+; PRE-BOLT-STR-OFFSETS: 0x00000008: 00000000 "main"
+; PRE-BOLT-STR-OFFSETS: 0x0000000c: 00000005 "int"
+; PRE-BOLT-STR-OFFSETS: 0x00000010: 00000009 "clang version 22.0.0"
+; PRE-BOLT-STR-OFFSETS: 0x00000014: 0000001e "main.cpp"
+; PRE-BOLT-STR-OFFSETS: 0x00000018: 00000027 "main.dwo"
+; PRE-BOLT-STR-OFFSETS: 0x0000001c: Contribution size = 28, Format = DWARF32, Version = 5
+; PRE-BOLT-STR-OFFSETS: 0x00000024: 00000030 "_Z9getReturnv"
+; PRE-BOLT-STR-OFFSETS: 0x00000028: 0000003e "getReturn"
+; PRE-BOLT-STR-OFFSETS: 0x0000002c: 00000005 "int"
+; PRE-BOLT-STR-OFFSETS: 0x00000030: 00000009 "clang version 22.0.0"
+; PRE-BOLT-STR-OFFSETS: 0x00000034: 00000048 "helper.cpp"
+; PRE-BOLT-STR-OFFSETS: 0x00000038: 00000053 "helper.dwo"
+
+; BOLT-MAIN-STR: 0x00000000: "main"
+; BOLT-MAIN-STR: 0x00000005: "int"
+; BOLT-MAIN-STR: 0x00000009: "clang version 22.0.0"
+; BOLT-MAIN-STR: 0x0000001e: "main.cpp"
+; BOLT-MAIN-STR: 0x00000027: "main.dwo"
+
+; BOLT-MAIN-STR-OFFSETS: 0x00000000: Contribution size = 24, Format = DWARF32, Version = 5
+; BOLT-MAIN-STR-OFFSETS: 0x00000008: 00000000 "main"
+; BOLT-MAIN-STR-OFFSETS: 0x0000000c: 00000005 "int"
+; BOLT-MAIN-STR-OFFSETS: 0x00000010: 00000009 "clang version 22.0.0"
+; BOLT-MAIN-STR-OFFSETS: 0x00000014: 0000001e "main.cpp"
+; BOLT-MAIN-STR-OFFSETS: 0x00000018: 00000027 "main.dwo"
+
+; BOLT-HELPER-STR: 0x00000000: "_Z9getReturnv"
+; BOLT-HELPER-STR: 0x0000000e: "getReturn"
+; BOLT-HELPER-STR: 0x00000018: "int"
+; BOLT-HELPER-STR: 0x0000001c: "clang version 22.0.0"
+; BOLT-HELPER-STR: 0x00000031: "helper.cpp"
+; BOLT-HELPER-STR: 0x0000003c: "helper.dwo"
+
+; BOLT-HELPER-STR-OFFSETS: 0x00000000: Contribution size = 28, Format = DWARF32, Version = 5
+; BOLT-HELPER-STR-OFFSETS: 0x00000008: 00000000 "_Z9getReturnv"
+; BOLT-HELPER-STR-OFFSETS: 0x0000000c: 0000000e "getReturn"
+; BOLT-HELPER-STR-OFFSETS: 0x00000010: 00000018 "int"
+; BOLT-HELPER-STR-OFFSETS: 0x00000014: 0000001c "clang version 22.0.0"
+; BOLT-HELPER-STR-OFFSETS: 0x00000018: 00000031 "helper.cpp"
+; BOLT-HELPER-STR-OFFSETS: 0x0000001c: 0000003c "helper.dwo"
diff --git a/bolt/test/X86/hook-init.s b/bolt/test/X86/hook-init.s
new file mode 100644
index 0000000..3184541
--- /dev/null
+++ b/bolt/test/X86/hook-init.s
@@ -0,0 +1,221 @@
+## Test the different ways of hooking the init function for instrumentation (via
+## entry point, DT_INIT and via DT_INIT_ARRAY). We test the latter for both PIE
+## and non-PIE binaries because of the different ways of handling relocations
+## (static or dynamic), executable and shared library.
+## All tests perform the following steps:
+## - Compile and link for the case to be tested
+## - Some sanity-checks on the dynamic section and relocations in the binary to
+## verify it has the shape we want for testing:
+## - INTERP in Program Headers
+## - DT_INIT or DT_INIT_ARRAY in dynamic section
+## - No relative relocations for non-PIE
+## - Instrument (with extra --runtime-lib-init-hook=init/init_array options
+## in some cases)
+## - Verify generated binary
+# REQUIRES: system-linux,bolt-runtime,target=x86_64-{{.*}}
+
+# RUN: %clang %cflags -pie %s -Wl,-q -o %t.exe
+# RUN: llvm-readelf -d %t.exe | FileCheck --check-prefix=DYN-INIT %s
+# RUN: llvm-readelf -l %t.exe | FileCheck --check-prefix=PH-INTERP %s
+# RUN: llvm-readelf -r %t.exe | FileCheck --check-prefix=RELOC-PIE %s
+# RUN: llvm-bolt %t.exe -o %t --instrument | FileCheck --check-prefix=CHECK-BOLT-RT-EP %s
+# RUN: llvm-readelf -hdrs %t | FileCheck --check-prefix=CHECK-INIT-EP %s
+# RUN: llvm-bolt %t.exe -o %t-no-ep --instrument --runtime-lib-init-hook=init | FileCheck --check-prefix=CHECK-BOLT-RT-INIT %s
+# RUN: llvm-readelf -hdrs %t-no-ep | FileCheck --check-prefix=CHECK-INIT-NO-EP %s
+# RUN: llvm-bolt %t.exe -o %t-no-ep --instrument --runtime-lib-init-hook=init_array | FileCheck --check-prefix=CHECK-BOLT-RT-INIT-ARRAY %s
+# RUN: llvm-readelf -hdrs %t-no-ep | FileCheck --check-prefix=CHECK-INIT-ARRAY-NO-EP %s
+
+# RUN: %clang -shared %cflags -pie %s -Wl,-q -o %t-shared.exe
+# RUN: llvm-readelf -d %t-shared.exe | FileCheck --check-prefix=DYN-INIT %s
+# RUN: llvm-readelf -l %t-shared.exe | FileCheck --check-prefix=PH-INTERP-SHARED %s
+# RUN: llvm-readelf -r %t-shared.exe | FileCheck --check-prefix=RELOC-SHARED-PIE %s
+# RUN: llvm-bolt %t-shared.exe -o %t-shared --instrument | FileCheck --check-prefix=CHECK-BOLT-RT-INIT %s
+# RUN: llvm-readelf -hdrs %t-shared | FileCheck --check-prefix=CHECK-SHARED-INIT %s
+
+# RUN: %clang %cflags -pie %s -Wl,-q,-init=0 -o %t-no-init.exe
+# RUN: llvm-readelf -d %t-no-init.exe | FileCheck --check-prefix=DYN-NO-INIT %s
+# RUN: llvm-readelf -l %t-no-init.exe | FileCheck --check-prefix=PH-INTERP %s
+# RUN: llvm-readelf -r %t-no-init.exe | FileCheck --check-prefix=RELOC-PIE %s
+# RUN: llvm-bolt %t-no-init.exe -o %t-no-init --instrument | FileCheck --check-prefix=CHECK-BOLT-RT-EP %s
+# RUN: llvm-readelf -hdrs %t-no-init | FileCheck --check-prefix=CHECK-NO-INIT-EP %s
+# RUN: llvm-bolt %t-no-init.exe -o %t-no-init-no-ep --instrument --runtime-lib-init-hook=init | FileCheck --check-prefix=CHECK-BOLT-RT-INIT-ARRAY %s
+# RUN: llvm-readelf -hdrs %t-no-init-no-ep | FileCheck --check-prefix=CHECK-NO-INIT-NO-EP %s
+
+# RUN: %clang -shared %cflags -pie %s -Wl,-q,-init=0 -o %t-shared-no-init.exe
+# RUN: llvm-readelf -d %t-shared-no-init.exe | FileCheck --check-prefix=DYN-NO-INIT %s
+# RUN: llvm-readelf -l %t-shared-no-init.exe | FileCheck --check-prefix=PH-INTERP-SHARED %s
+# RUN: llvm-readelf -r %t-shared-no-init.exe | FileCheck --check-prefix=RELOC-SHARED-PIE %s
+# RUN: llvm-bolt %t-shared-no-init.exe -o %t-shared-no-init --instrument | FileCheck --check-prefix=CHECK-BOLT-RT-INIT-ARRAY %s
+# RUN: llvm-readelf -drs %t-shared-no-init | FileCheck --check-prefix=CHECK-SHARED-NO-INIT %s
+
+## Create a dummy shared library to link against to force creation of the dynamic section.
+# RUN: %clang %cflags %p/../Inputs/stub.c -fPIC -shared -o %t-stub.so
+# RUN: %clang %cflags %s -no-pie -Wl,-q,-init=0 %t-stub.so -o %t-no-pie-no-init.exe
+# RUN: llvm-readelf -r %t-no-pie-no-init.exe | FileCheck --check-prefix=RELOC-NO-PIE %s
+# RUN: llvm-bolt %t-no-pie-no-init.exe -o %t-no-pie-no-init --instrument | FileCheck --check-prefix=CHECK-BOLT-RT-EP %s
+# RUN: llvm-readelf -hds %t-no-pie-no-init | FileCheck --check-prefix=CHECK-NO-PIE-NO-INIT-EP %s
+
+## With init: dynamic section should contain DT_INIT
+# DYN-INIT: (INIT)
+
+## Without init: dynamic section should only contain DT_INIT_ARRAY
+# DYN-NO-INIT-NOT: (INIT)
+# DYN-NO-INIT: (INIT_ARRAY)
+# DYN-NO-INIT: (INIT_ARRAYSZ)
+
+## With interp program header (executable)
+# PH-INTERP: Program Headers:
+# PH-INTERP: INTERP
+
+## Without interp program header (shared library)
+# PH-INTERP-SHARED: Program Headers:
+# PH-INTERP-SHARED-NOT: INTERP
+
+## With PIE: binary should have relative relocations
+# RELOC-PIE: R_X86_64_RELATIVE
+
+## With PIE: binary should have relative relocations
+# RELOC-SHARED-PIE: R_X86_64_64
+
+## Without PIE: binary should not have relative relocations
+# RELOC-NO-PIE-NOT: R_X86_64_RELATIVE
+
+## Check BOLT output output initialization hook (ELF Header Entry Point)
+# CHECK-BOLT-RT-EP: runtime library initialization was hooked via ELF Header Entry Point
+# CHECK-BOLT-RT-EP-NOT: runtime library initialization was hooked via DT_INIT
+# CHECK-BOLT-RT-EP-NOT: runtime library initialization was hooked via .init_array entry
+
+## Check BOLT output output initialization hook (DT_INIT)
+# CHECK-BOLT-RT-INIT-NOT: runtime library initialization was hooked via ELF Header Entry Point
+# CHECK-BOLT-RT-INIT: runtime library initialization was hooked via DT_INIT
+# CHECK-BOLT-RT-INIT-NOT: runtime library initialization was hooked via .init_array entry
+
+## Check BOLT output output initialization hook (1st entry of .init_array)
+# CHECK-BOLT-RT-INIT-ARRAY-NOT: runtime library initialization was hooked via ELF Header Entry Point
+# CHECK-BOLT-RT-INIT-ARRAY-NOT: runtime library initialization was hooked via DT_INIT
+# CHECK-BOLT-RT-INIT-ARRAY: runtime library initialization was hooked via .init_array entry
+
+## Check that entry point address is set to __bolt_runtime_start for PIE executable with DT_INIT
+# CHECK-INIT-EP: ELF Header:
+# CHECK-INIT-EP: Entry point address: 0x[[#%x,EP_ADDR:]]
+## Check that the dynamic relocation at .init and .init_array were not patched
+# CHECK-INIT-EP: Dynamic section at offset {{.*}} contains {{.*}} entries:
+# CHECK-INIT-EP-NOT: (INIT) 0x[[#%x, EP_ADDR]]
+# CHECK-INIT-EP-NOT: (INIT_ARRAY) 0x[[#%x, EP_ADDR]]
+## Check that the new entry point address points to __bolt_runtime_start
+# CHECK-INIT-EP: Symbol table '.symtab' contains {{.*}} entries:
+# CHECK-INIT-EP: {{0+}}[[#%x, EP_ADDR]] {{.*}} __bolt_runtime_start
+
+## Check that DT_INIT address is set to __bolt_runtime_start for PIE executable with DT_INIT
+# CHECK-INIT-NO-EP: ELF Header:
+# CHECK-INIT-NO-EP: Entry point address: 0x[[#%x,EP_ADDR:]]
+## Read Dynamic section DT_INIT and DT_INIT_ARRAY entries
+# CHECK-INIT-NO-EP: Dynamic section at offset {{.*}} contains {{.*}} entries:
+# CHECK-INIT-NO-EP-DAG: (INIT) 0x[[#%x,INIT:]]
+# CHECK-INIT-NO-EP-DAG: (INIT_ARRAY) 0x[[#%x,INIT_ARRAY:]]
+## Check if ELF entry point address points to _start symbol and new DT_INIT entry points to __bolt_runtime_start
+# CHECK-INIT-NO-EP: Symbol table '.symtab' contains {{.*}} entries:
+# CHECK-INIT-NO-EP-DAG: {{0+}}[[#%x, EP_ADDR]] {{.*}} _start
+# CHECK-INIT-NO-EP-DAG: {{0+}}[[#%x, INIT]] {{.*}} __bolt_runtime_start
+
+## Check that 1st entry of DT_INIT_ARRAY is set to __bolt_runtime_start and DT_INIT was not changed
+# CHECK-INIT-ARRAY-NO-EP: ELF Header:
+# CHECK-INIT-ARRAY-NO-EP: Entry point address: 0x[[#%x,EP_ADDR:]]
+## Read Dynamic section DT_INIT and DT_INIT_ARRAY entries
+# CHECK-INIT-ARRAY-NO-EP: Dynamic section at offset {{.*}} contains {{.*}} entries:
+# CHECK-INIT-ARRAY-NO-EP-DAG: (INIT) 0x[[#%x,INIT:]]
+# CHECK-INIT-ARRAY-NO-EP-DAG: (INIT_ARRAY) 0x[[#%x,INIT_ARRAY:]]
+## Read the dynamic relocation from 1st entry of .init_array
+# CHECK-INIT-ARRAY-NO-EP: Relocation section '.rela.dyn' at offset {{.*}} contains {{.*}} entries
+# CHECK-INIT-ARRAY-NO-EP: {{0+}}[[#%x,INIT_ARRAY]] {{.*}} R_X86_64_RELATIVE [[#%x,INIT_ADDR:]]
+# CHECK-INIT-ARRAY-NO-EP-NOT: {{0+}}[[#%x,INIT_ARRAY]] {{.*}} R_X86_64_RELATIVE [[#%x,INIT]]
+## Check that 1st entry of .init_array points to __bolt_runtime_start
+# CHECK-INIT-ARRAY-NO-EP: Symbol table '.symtab' contains {{.*}} entries:
+# CHECK-INIT-ARRAY-NO-EP-DAG: {{0+}}[[#%x, EP_ADDR]] {{.*}} _start
+# CHECK-INIT-ARRAY-NO-EP-DAG: {{[0-9]]*}}: {{0+}}[[#%x, INIT_ADDR]] {{.*}} __bolt_runtime_start
+
+## Check that entry point address is set to __bolt_runtime_start for PIE executable without DT_INIT
+# CHECK-NO-INIT-EP: ELF Header:
+# CHECK-NO-INIT-EP: Entry point address: 0x[[#%x,EP_ADDR:]]
+## Check that the dynamic relocation at .init and .init_array were not patched
+# CHECK-NO-INIT-EP: Dynamic section at offset {{.*}} contains {{.*}} entries:
+# CHECK-NO-INIT-EP-NOT: (INIT) 0x[[#%x, EP_ADDR]]
+# CHECK-NO-INIT-EP-NOT: (INIT_ARRAY) 0x[[#%x, EP_ADDR]]
+## Check that the new entry point address points to __bolt_runtime_start
+# CHECK-NO-INIT-EP: Symbol table '.symtab' contains {{.*}} entries:
+# CHECK-NO-INIT-EP: {{0+}}[[#%x, EP_ADDR]] {{.*}} __bolt_runtime_start
+
+## Check that DT_INIT is set to __bolt_runtime_start for shared library with DT_INIT
+# CHECK-SHARED-INIT: Dynamic section at offset {{.*}} contains {{.*}} entries:
+# CHECK-SHARED-INIT-DAG: (INIT) 0x[[#%x, INIT:]]
+# CHECK-SHARED-INIT-DAG: (INIT_ARRAY) 0x[[#%x, INIT_ARRAY:]]
+## Check that the dynamic relocation at .init_array was not patched
+# CHECK-SHARED-INIT: Relocation section '.rela.dyn' at offset {{.*}} contains {{.*}} entries
+# CHECK-SHARED-INIT-NOT: {{0+}}[[#%x, INIT_ARRAY]] {{.*}} R_X86_64_64 {{0+}}[[#%x, INIT]]
+## Check that dynamic section DT_INIT points to __bolt_runtime_start
+# CHECK-SHARED-INIT: Symbol table '.symtab' contains {{.*}} entries:
+# CHECK-SHARED-INIT: {{0+}}[[#%x, INIT]] {{.*}} __bolt_runtime_start
+
+## Check that entry point address is set to __bolt_runtime_start for PIE executable without DT_INIT
+# CHECK-NO-INIT-NO-EP: ELF Header:
+# CHECK-NO-INIT-NO-EP: Entry point address: 0x[[#%x,EP_ADDR:]]
+# CHECK-NO-INIT-NO-EP: Dynamic section at offset {{.*}} contains {{.*}} entries:
+# CHECK-NO-INIT-NO-EP-NOT: (INIT)
+# CHECK-NO-INIT-NO-EP: (INIT_ARRAY) 0x[[#%x,INIT_ARRAY:]]
+## Read the dynamic relocation from 1st entry of .init_array
+# CHECK-NO-INIT-NO-EP: Relocation section '.rela.dyn' at offset {{.*}} contains {{.*}} entries
+# CHECK-NO-INIT-NO-EP: {{0+}}[[#%x,INIT_ARRAY]] {{.*}} R_X86_64_RELATIVE [[#%x,INIT_ADDR:]]
+## Check that 1st entry of .init_array points to __bolt_runtime_start
+# CHECK-NO-INIT-NO-EP: Symbol table '.symtab' contains {{.*}} entries:
+# CHECK-NO-INIT-NO-EP-DAG: {{0+}}[[#%x, EP_ADDR]] {{.*}} _start
+# CHECK-NO-INIT-NO-EP-DAG: {{[0-9]]*}}: {{0+}}[[#%x, INIT_ADDR]] {{.*}} __bolt_runtime_start
+
+## Check that entry point address is set to __bolt_runtime_start for shared library without DT_INIT
+# CHECK-SHARED-NO-INIT: Dynamic section at offset {{.*}} contains {{.*}} entries:
+# CHECK-SHARED-NO-INIT-NOT: (INIT)
+# CHECK-SHARED-NO-INIT: (INIT_ARRAY) 0x[[#%x,INIT_ARRAY:]]
+## Read the dynamic relocation from 1st entry of .init_array
+# CHECK-SHARED-NO-INIT: Relocation section '.rela.dyn' at offset {{.*}} contains {{.*}} entries
+# CHECK-SHARED-NO-INIT: {{0+}}[[#%x, INIT_ARRAY]] {{.*}} R_X86_64_64 [[#%x,INIT_ADDR:]]
+## Check that 1st entry of .init_array points to __bolt_runtime_start
+# CHECK-SHARED-NO-INIT: Symbol table '.symtab' contains {{.*}} entries:
+# CHECK-SHARED-NO-INIT: {{[0-9]]*}}: {{0+}}[[#%x, INIT_ADDR]] {{.*}} __bolt_runtime_start
+
+## Check that entry point address is set to __bolt_runtime_start for non-PIE executable with DT_INIT
+# CHECK-NO-PIE-NO-INIT-EP: ELF Header:
+# CHECK-NO-PIE-NO-INIT-EP: Entry point address: 0x[[#%x,EP_ADDR:]]
+## Check that the dynamic relocation at .init and .init_array were not patched
+# CHECK-NO-PIE-NO-INIT-EP: Dynamic section at offset {{.*}} contains {{.*}} entries:
+# CHECK-NO-PIE-NO-INIT-EP-NOT: (INIT) 0x[[#%x, EP_ADDR]]
+# CHECK-NO-PIE-NO-INIT-EP-NOT: (INIT_ARRAY) 0x[[#%x, EP_ADDR]]
+## Check that the new entry point address points to __bolt_runtime_start
+# CHECK-NO-PIE-NO-INIT-EP: Symbol table '.symtab' contains {{.*}} entries:
+# CHECK-NO-PIE-NO-INIT-EP: {{0+}}[[#%x, EP_ADDR]] {{.*}} __bolt_runtime_start
+
+ .globl _start
+ .type _start, %function
+_start:
+ # Dummy relocation to force relocation mode.
+ .reloc 0, R_X86_64_NONE
+ retq
+.size _start, .-_start
+
+ .globl _init
+ .type _init, %function
+_init:
+ retq
+ .size _init, .-_init
+
+ .globl _fini
+ .type _fini, %function
+_fini:
+ retq
+ .size _fini, .-_fini
+
+ .section .init_array,"aw"
+ .align 8
+ .quad _init
+
+ .section .fini_array,"aw"
+ .align 8
+ .quad _fini
diff --git a/bolt/test/X86/instrument-no-fini.s b/bolt/test/X86/instrument-no-fini.s
new file mode 100644
index 0000000..fff2376
--- /dev/null
+++ b/bolt/test/X86/instrument-no-fini.s
@@ -0,0 +1,34 @@
+# Test that BOLT will produce error by default and pass with instrumentation-sleep-time option
+
+# REQUIRES: system-linux,bolt-runtime,target=x86_64-{{.*}}
+
+# RUN: llvm-mc -triple x86_64 -filetype=obj %s -o %t.o
+# RUN: ld.lld -q -pie -o %t.exe %t.o
+# RUN: llvm-readelf -d %t.exe | FileCheck --check-prefix=CHECK-NO-FINI %s
+# RUN: not llvm-bolt --instrument -o %t.out %t.exe 2>&1 | FileCheck %s --check-prefix=CHECK-BOLT-FAIL
+# RUN: llvm-bolt --instrument --instrumentation-sleep-time=1 -o %t.out %t.exe 2>&1 | FileCheck %s --check-prefix=CHECK-BOLT-PASS
+
+# CHECK-NO-FINI: INIT
+# CHECK-NO-FINI-NOT: FINI
+# CHECK-NO-FINI-NOT: FINI_ARRAY
+
+# CHECK-BOLT-FAIL: Instrumentation needs either DT_FINI or DT_FINI_ARRAY
+
+# CHECK-BOLT-PASS-NOT: Instrumentation needs either DT_FINI or DT_FINI_ARRAY
+# CHECK-BOLT-PASS: runtime library initialization was hooked via DT_INIT
+
+ .text
+ .globl _start
+ .type _start, %function
+_start:
+ # BOLT errs when instrumenting without relocations; create a dummy one.
+ .reloc 0, R_X86_64_NONE
+ retq
+ .size _start, .-_start
+
+ .globl _init
+ .type _init, %function
+ # Force DT_INIT to be created (needed for instrumentation).
+_init:
+ retq
+ .size _init, .-_init
diff --git a/bolt/test/X86/internal-call-instrument-so.s b/bolt/test/X86/internal-call-instrument-so.s
index 99e5b29..fe23bc6 100644
--- a/bolt/test/X86/internal-call-instrument-so.s
+++ b/bolt/test/X86/internal-call-instrument-so.s
@@ -5,7 +5,7 @@
# RUN: llvm-mc -filetype=obj -triple x86_64-unknown-unknown %s -o %t.o
# Delete our BB symbols so BOLT doesn't mark them as entry points
# RUN: llvm-strip --strip-unneeded %t.o
-# RUN: ld.lld %t.o -o %t.exe -q -shared -fini=_fini
+# RUN: ld.lld %t.o -o %t.exe -q -shared -fini=_fini -init=_init
# RUN: llvm-bolt --instrument %t.exe --relocs -o %t.out
.text
@@ -48,6 +48,13 @@ _fini:
hlt
.size _fini, .-_fini
+ .globl _init
+ .type _init, %function
+ .p2align 4
+_init:
+ retq
+ .size _init, .-_init
+
.data
.globl var
var:
diff --git a/bolt/test/X86/lit.local.cfg b/bolt/test/X86/lit.local.cfg
index ea9928d..680ebdd 100644
--- a/bolt/test/X86/lit.local.cfg
+++ b/bolt/test/X86/lit.local.cfg
@@ -1,7 +1,7 @@
if not "X86" in config.root.targets:
config.unsupported = True
-flags = "--target=x86_64-unknown-linux-gnu -nostdlib"
+flags = "--target=x86_64-unknown-linux-gnu -nostdlib -mllvm -x86-asm-syntax=att"
config.substitutions.insert(0, ("%cflags", f"%cflags {flags}"))
config.substitutions.insert(0, ("%cxxflags", f"%cxxflags {flags}"))
diff --git a/bolt/test/X86/match-blocks-with-pseudo-probes-inline.test b/bolt/test/X86/match-blocks-with-pseudo-probes-inline.test
index accb474..9224cf1 100644
--- a/bolt/test/X86/match-blocks-with-pseudo-probes-inline.test
+++ b/bolt/test/X86/match-blocks-with-pseudo-probes-inline.test
@@ -30,7 +30,7 @@ functions:
insns: 11
hash: 0x1
exec: 1
- probes: [ { blx: 9 } ]
+ probes: [ { blk: [ 1, 4 ] } ]
inline_tree: [ { } ]
- name: foo
fid: 10
@@ -43,7 +43,7 @@ functions:
hash: 0x2
exec: 1
succ: [ { bid: 3, cnt: 0 } ]
- probes: [ { blx: 3 } ]
+ probes: [ { blk: [ 1, 2 ] } ]
inline_tree: [ { g: 1 }, { g: 0, cs: 8 } ]
- name: main
fid: 11
@@ -56,7 +56,7 @@ functions:
hash: 0x3
exec: 1
succ: [ { bid: 3, cnt: 0 } ]
- probes: [ { blx: 3, id: 1 }, { blx: 1 } ]
+ probes: [ { blk: [ 1, 2 ], ids: [ 1 ] }, { blk: [ 1 ] } ]
inline_tree: [ { g: 2 }, { g: 1, cs: 2 }, { g: 0, p: 1, cs: 8 } ]
pseudo_probe_desc:
gs: [ 0xE413754A191DB537, 0x5CF8C24CDB18BDAC, 0xDB956436E78DD5FA ]
diff --git a/bolt/test/X86/match-blocks-with-pseudo-probes.test b/bolt/test/X86/match-blocks-with-pseudo-probes.test
index 40cb64ee..7be327d6 100644
--- a/bolt/test/X86/match-blocks-with-pseudo-probes.test
+++ b/bolt/test/X86/match-blocks-with-pseudo-probes.test
@@ -55,7 +55,7 @@ functions:
hash: 0xFFFFFFFFFFFFFFF1
insns: 1
succ: [ { bid: 3, cnt: 1} ]
- probes: [ { blx: 1 } ]
+ probes: [ { blk: [ 1 ] } ]
inline_tree: [ { g: 0 } ]
pseudo_probe_desc:
gs: [ 0xDB956436E78DD5FA ]
diff --git a/bolt/test/X86/pseudoprobe-decoding-inline.test b/bolt/test/X86/pseudoprobe-decoding-inline.test
index e5e8aad..9748fc1 100644
--- a/bolt/test/X86/pseudoprobe-decoding-inline.test
+++ b/bolt/test/X86/pseudoprobe-decoding-inline.test
@@ -14,17 +14,17 @@
# RUN: FileCheck --input-file %t.yaml2 %s --check-prefix CHECK-YAML
# CHECK-YAML: name: bar
# CHECK-YAML: - bid: 0
-# CHECK-YAML: probes: [ { blx: 9 } ]
+# CHECK-YAML: probes: [ { blk: [ 1, 4 ] } ]
# CHECK-YAML: inline_tree: [ { } ]
#
# CHECK-YAML: name: foo
# CHECK-YAML: - bid: 0
-# CHECK-YAML: probes: [ { blx: 3 } ]
+# CHECK-YAML: probes: [ { blk: [ 1, 2 ] } ]
# CHECK-YAML: inline_tree: [ { g: 1 }, { g: 0, cs: 8 } ]
#
# CHECK-YAML: name: main
# CHECK-YAML: - bid: 0
-# CHECK-YAML: probes: [ { blx: 3, id: 1 }, { blx: 1 } ]
+# CHECK-YAML: probes: [ { blk: [ 1, 2 ], ids: [ 1 ] }, { } ]
# CHECK-YAML: inline_tree: [ { g: 2 }, { g: 1, cs: 2 }, { g: 0, p: 1, cs: 8 } ]
#
# CHECK-YAML: pseudo_probe_desc:
diff --git a/bolt/test/X86/pseudoprobe-decoding-noinline.test b/bolt/test/X86/pseudoprobe-decoding-noinline.test
index 36a2fab..4ba51cd 100644
--- a/bolt/test/X86/pseudoprobe-decoding-noinline.test
+++ b/bolt/test/X86/pseudoprobe-decoding-noinline.test
@@ -15,17 +15,18 @@
# RUN: FileCheck --input-file %t.yaml2 %s --check-prefix CHECK-YAML
# CHECK-YAML: name: bar
# CHECK-YAML: - bid: 0
-# CHECK-YAML: probes: [ { blx: 9 } ]
+# CHECK-YAML: probes: [ { blk: [ 1, 4 ] } ]
# CHECK-YAML: inline_tree: [ { } ]
#
# CHECK-YAML: name: foo
# CHECK-YAML: - bid: 0
-# CHECK-YAML: probes: [ { blx: 3 } ]
+# CHECK-YAML: probes: [ { blk: [ 1, 2 ] } ]
# CHECK-YAML: inline_tree: [ { g: 2 } ]
#
# CHECK-YAML: name: main
# CHECK-YAML: - bid: 0
-# CHECK-YAML: probes: [ { blx: 1, call: [ 2 ] } ]
+# CHECK-YAML: calls: [ { off: 0x4, fid: 0, cnt: 0, pp: 2 } ]
+# CHECK-YAML: probes: [ { } ]
# CHECK-YAML: inline_tree: [ { g: 1 } ]
#
# CHECK-YAML: pseudo_probe_desc:
diff --git a/bolt/test/X86/rseq.s b/bolt/test/X86/rseq.s
new file mode 100644
index 0000000..ef81bca
--- /dev/null
+++ b/bolt/test/X86/rseq.s
@@ -0,0 +1,38 @@
+## Check that llvm-bolt avoids optimization of functions referenced from
+## __rseq_cs section, i.e. containing critical sections and abort handlers used
+## by restartable sequences in tcmalloc.
+
+# RUN: %clang %cflags %s -o %t -nostdlib -no-pie -Wl,-q
+# RUN: llvm-bolt %t -o %t.bolt --print-cfg 2>&1 | FileCheck %s
+# RUN: %clang %cflags %s -o %t.pie -nostdlib -pie -Wl,-q
+# RUN: llvm-bolt %t.pie -o %t.pie.bolt 2>&1 | FileCheck %s
+
+# CHECK: restartable sequence reference detected in _start
+# CHECK: restartable sequence reference detected in __rseq_abort
+
+## Force relocations against .text
+ .text
+.reloc 0, R_X86_64_NONE
+
+ .global _start
+ .type _start, %function
+_start:
+ pushq %rbp
+ mov %rsp, %rbp
+.L1:
+ pop %rbp
+.L2:
+ retq
+ .size _start, .-_start
+
+ .section __rseq_abort, "ax"
+## Signature for rseq abort IP. Unmarked in the symbol table.
+ .byte 0x0f, 0x1f, 0x05
+ .long 0x42424242
+.L3:
+ jmp .L2
+
+.section __rseq_cs, "aw"
+.balign 32
+ .quad .L1
+ .quad .L3
diff --git a/bolt/test/X86/unclaimed-jt-entries.s b/bolt/test/X86/unclaimed-jt-entries.s
index 31b72c4..6cb0b08 100644
--- a/bolt/test/X86/unclaimed-jt-entries.s
+++ b/bolt/test/X86/unclaimed-jt-entries.s
@@ -32,12 +32,7 @@
# RUN: llvm-bolt %t.exe -v=1 -o %t.out 2>&1 | FileCheck %s
-# CHECK: BOLT-WARNING: unclaimed data to code reference (possibly an unrecognized jump table entry) to .Ltmp[[#]] in main
-# CHECK: BOLT-WARNING: unclaimed data to code reference (possibly an unrecognized jump table entry) to .Ltmp[[#]] in main
-# CHECK: BOLT-WARNING: unclaimed data to code reference (possibly an unrecognized jump table entry) to .Ltmp[[#]] in main
-# CHECK: BOLT-WARNING: unclaimed data to code reference (possibly an unrecognized jump table entry) to .Ltmp[[#]] in main
-# CHECK: BOLT-WARNING: unclaimed data to code reference (possibly an unrecognized jump table entry) to .Ltmp[[#]] in main
-# CHECK: BOLT-WARNING: failed to post-process indirect branches for main
+# CHECK: BOLT-WARNING: 11 unclaimed data relocations remain against function main
.text
.globl main
diff --git a/bolt/test/X86/unclaimed-pc-rel.s b/bolt/test/X86/unclaimed-pc-rel.s
new file mode 100644
index 0000000..5292ccc
--- /dev/null
+++ b/bolt/test/X86/unclaimed-pc-rel.s
@@ -0,0 +1,24 @@
+## Check that unclaimed PC-relative relocation from data to code is detected
+## and reported to the user.
+
+# REQUIRES: system-linux
+
+# RUN: %clang %cflags -no-pie %s -o %t.exe -Wl,-q -nostartfiles
+# RUN: not llvm-bolt %t.exe -o %t.bolt --strict 2>&1 | FileCheck %s
+
+# CHECK: BOLT-ERROR: 1 unclaimed PC-relative relocation(s) left in data
+
+ .text
+ .globl _start
+ .type _start, %function
+_start:
+ movl $42, %eax
+.L0:
+ ret
+ .size _start, .-_start
+
+## Force relocation mode.
+ .reloc 0, R_X86_64_NONE
+
+ .section .rodata
+ .long .L0-.
diff --git a/bolt/test/X86/validate-branch-target.s b/bolt/test/X86/validate-branch-target.s
new file mode 100644
index 0000000..3cc5b91
--- /dev/null
+++ b/bolt/test/X86/validate-branch-target.s
@@ -0,0 +1,33 @@
+## Test that BOLT errs when detecting the target
+## of a direct call/branch is a invalid instruction
+
+# REQUIRES: system-linux
+# RUN: rm -rf %t && mkdir -p %t && cd %t
+# RUN: llvm-mc -filetype=obj -triple x86_64-unknown-linux %s -o main.o
+# RUN: %clang %cflags -pie -Wl,-q %t/main.o -o main.exe
+# RUN: llvm-bolt %t/main.exe -o %t/main.exe.bolt -lite=0 2>&1 | FileCheck %s --check-prefix=CHECK-TARGETS
+
+# CHECK-TARGETS: BOLT-WARNING: corrupted control flow detected in function external_corrupt: an external branch/call targets an invalid instruction in function external_func at address 0x{{[0-9a-f]+}}; ignoring both functions
+# CHECK-TARGETS: BOLT-WARNING: corrupted control flow detected in function internal_corrupt: an internal branch/call targets an invalid instruction at address 0x{{[0-9a-f]+}}; ignoring this function
+
+
+.globl internal_corrupt
+.type internal_corrupt,@function
+internal_corrupt:
+ jb data_in_code + 1 # targeting the data in code, and jump into the middle of 'xorb' instruction
+data_in_code:
+ .byte 0x34, 0x01 # data in code, will be disassembled as 'xorb 0x1, %al'
+.size internal_corrupt,.-internal_corrupt
+
+
+.globl external_corrupt
+.type external_corrupt,@function
+external_corrupt:
+ jb external_func + 1 # targeting the middle of normal instruction externally
+.size external_corrupt,.-external_corrupt
+
+.globl external_func
+.type external_func,@function
+external_func:
+ addq $1, %rax # normal instruction
+.size external_func,.-external_func
diff --git a/bolt/test/print-only.test b/bolt/test/print-only.test
new file mode 100644
index 0000000..5e8bcd0
--- /dev/null
+++ b/bolt/test/print-only.test
@@ -0,0 +1,25 @@
+# Verify if `--print-only` and `--print-only-files` work fine.
+
+# REQUIRES: system-linux
+
+# RUN: %clang %cflags -x c %p/Inputs/bolt_icf.cpp -o %t -Wl,-q
+# RUN: llvm-bolt %t -o %t.bolt --icf=none --print-cfg \
+# RUN: --print-only=foo.*,bar.*,main.* 2>&1 | FileCheck %s
+
+# RUN: echo "bar.*" > %t.pof
+# RUN: echo "main.*" >> %t.pof
+# RUN: llvm-bolt %t -o %t.bolt --icf=none --print-cfg \
+# RUN: --print-only=foo.* --print-only-file=%t.pof \
+# RUN: 2>&1 | FileCheck %s
+
+# RUN: echo "foo.*" >> %t.pof
+# RUN: llvm-bolt %t -o %t.bolt --icf=none --print-cfg \
+# RUN: --print-only-file=%t.pof 2>&1 | FileCheck %s
+
+# CHECK-NOT: Binary Function "fiz" after building cfg
+# CHECK-NOT: Binary Function "faz" after building cfg
+# CHECK-NOT: Binary Function "zip" after building cfg
+# CHECK-NOT: Binary Function "zap" after building cfg
+# CHECK: Binary Function "foo" after building cfg
+# CHECK: Binary Function "bar" after building cfg
+# CHECK: Binary Function "main" after building cfg
diff --git a/bolt/test/runtime/AArch64/inline-memcpy.s b/bolt/test/runtime/AArch64/inline-memcpy.s
index dc59a08..75066c8 100644
--- a/bolt/test/runtime/AArch64/inline-memcpy.s
+++ b/bolt/test/runtime/AArch64/inline-memcpy.s
@@ -7,7 +7,7 @@
# RUN: llvm-bolt %t.exe --inline-memcpy -o %t.bolt 2>&1 | FileCheck %s --check-prefix=CHECK-INLINE
# RUN: llvm-objdump -d %t.bolt | FileCheck %s --check-prefix=CHECK-ASM
-# Verify BOLT reports that it inlined memcpy calls (11 successful inlines out of 16 total calls)
+# Verify BOLT reports that it inlined memcpy calls (11 successful inlines out of 17 total calls)
# CHECK-INLINE: BOLT-INFO: inlined 11 memcpy() calls
# Each function should use optimal size-specific instructions and NO memcpy calls
@@ -81,11 +81,14 @@
# CHECK-ASM: bl{{.*}}<memcpy
# Register move should NOT be inlined (size unknown at compile time)
-# CHECK-ASM-LABEL: <test_register_move_negative>:
+# CHECK-ASM-LABEL: <test_register_move_unknown>:
+# CHECK-ASM: bl{{.*}}<memcpy
+
+# CHECK-ASM-LABEL: <test_x2_rewrite_unknown>:
# CHECK-ASM: bl{{.*}}<memcpy
# Live-in parameter should NOT be inlined (size unknown at compile time)
-# CHECK-ASM-LABEL: <test_live_in_negative>:
+# CHECK-ASM-LABEL: <test_live_in_unknown>:
# CHECK-ASM: bl{{.*}}<memcpy
# _memcpy8 should be inlined with end-pointer return (dest+size)
@@ -259,9 +262,9 @@ test_4_byte_add_immediate:
ret
.size test_4_byte_add_immediate, .-test_4_byte_add_immediate
- .globl test_register_move_negative
- .type test_register_move_negative,@function
-test_register_move_negative:
+ .globl test_register_move_unknown
+ .type test_register_move_unknown,@function
+test_register_move_unknown:
stp x29, x30, [sp, #-32]!
mov x29, sp
add x1, sp, #16
@@ -271,11 +274,20 @@ test_register_move_negative:
bl memcpy
ldp x29, x30, [sp], #32
ret
- .size test_register_move_negative, .-test_register_move_negative
+ .size test_register_move_unknown, .-test_register_move_unknown
+
+ .globl test_x2_rewrite_unknown
+ .type test_x2_rewrite_unknown,@function
+test_x2_rewrite_unknown:
+ mov x2, #8
+ ldr x2, [sp, #24]
+ bl memcpy
+ ret
+ .size test_x2_rewrite_unknown, .-test_x2_rewrite_unknown
- .globl test_live_in_negative
- .type test_live_in_negative,@function
-test_live_in_negative:
+ .globl test_live_in_unknown
+ .type test_live_in_unknown,@function
+test_live_in_unknown:
# x2 comes in as parameter, no instruction sets it (should NOT inline)
stp x29, x30, [sp, #-32]!
mov x29, sp
@@ -285,7 +297,7 @@ test_live_in_negative:
bl memcpy
ldp x29, x30, [sp], #32
ret
- .size test_live_in_negative, .-test_live_in_negative
+ .size test_live_in_unknown, .-test_live_in_unknown
.globl test_memcpy8_4_byte
.type test_memcpy8_4_byte,@function
diff --git a/bolt/test/runtime/AArch64/pacret-function-split.cpp b/bolt/test/runtime/AArch64/pacret-eh-function-split.cpp
index 208fc5c..208fc5c 100644
--- a/bolt/test/runtime/AArch64/pacret-function-split.cpp
+++ b/bolt/test/runtime/AArch64/pacret-eh-function-split.cpp
diff --git a/bolt/test/runtime/AArch64/negate-ra-state.cpp b/bolt/test/runtime/AArch64/pacret-eh.cpp
index 60b0b08..60b0b08 100644
--- a/bolt/test/runtime/AArch64/negate-ra-state.cpp
+++ b/bolt/test/runtime/AArch64/pacret-eh.cpp
diff --git a/bolt/test/runtime/AArch64/pacret-synchronous-unwind.cpp b/bolt/test/runtime/AArch64/pacret-synchronous-unwind.cpp
new file mode 100644
index 0000000..0f5e9a3
--- /dev/null
+++ b/bolt/test/runtime/AArch64/pacret-synchronous-unwind.cpp
@@ -0,0 +1,36 @@
+// Test to demonstrate that functions compiled with synchronous unwind tables
+// are ignored by the PointerAuthCFIAnalyzer.
+// Exception handling is needed to have _any_ unwind tables, otherwise the
+// PointerAuthCFIAnalyzer does not run on these functions, so it does not ignore
+// any function.
+//
+// REQUIRES: system-linux,bolt-runtime
+//
+// RUN: %clangxx --target=aarch64-unknown-linux-gnu \
+// RUN: -mbranch-protection=pac-ret \
+// RUN: -fno-asynchronous-unwind-tables \
+// RUN: %s -o %t.exe -Wl,-q
+// RUN: llvm-bolt %t.exe -o %t.bolt | FileCheck %s --check-prefix=CHECK
+
+// Number of functions with .cfi-negate-ra-state in the binary is
+// platform-dependent.
+// CHECK: BOLT-INFO: PointerAuthCFIAnalyzer ran on {{[0-9]+}} functions.
+// CHECK-SAME: Ignored {{[0-9]}} functions ({{[0-9.]+}}%) because of CFI
+// CHECK-SAME: inconsistencies
+// CHECK-NEXT: BOLT-WARNING: PointerAuthCFIAnalyzer only supports
+// CHECK-SAME: asynchronous unwind tables. For C compilers, see
+// CHECK-SAME: -fasynchronous-unwind-tables.
+
+#include <cstdio>
+#include <stdexcept>
+
+void foo() { throw std::runtime_error("Exception from foo()."); }
+
+int main() {
+ try {
+ foo();
+ } catch (const std::exception &e) {
+ printf("Exception caught: %s\n", e.what());
+ }
+ return 0;
+}
diff --git a/bolt/test/runtime/X86/instrument-wrong-target.s b/bolt/test/runtime/X86/instrument-wrong-target.s
index 343d93a..fa40d43 100644
--- a/bolt/test/runtime/X86/instrument-wrong-target.s
+++ b/bolt/test/runtime/X86/instrument-wrong-target.s
@@ -19,6 +19,13 @@ _start:
ret
.size _start, .-_start
+ .globl _init
+ .type _init, %function
+ # Force DT_INIT to be created (needed for instrumentation).
+_init:
+ ret
+ .size _init, .-_init
+
.globl _fini
.type _fini, %function
# Force DT_FINI to be created (needed for instrumentation).
diff --git a/bolt/test/runtime/X86/unclaimed-jt-entries.s b/bolt/test/runtime/X86/unclaimed-jt-entries.s
index 1725fb8..943b7e2 100644
--- a/bolt/test/runtime/X86/unclaimed-jt-entries.s
+++ b/bolt/test/runtime/X86/unclaimed-jt-entries.s
@@ -18,14 +18,9 @@
# RUN: llvm-mc -filetype=obj -triple x86_64-unknown-unknown %s -o %t.o
# RUN: %clang %cflags %S/Inputs/unclaimed-jt-entries.c -no-pie %t.o -o %t.exe -Wl,-q
-# RUN: llvm-bolt %t.exe -v=1 -o %t.out --sequential-disassembly 2>&1 | FileCheck %s
+# RUN: llvm-bolt %t.exe -o %t.out 2>&1 | FileCheck %s
-# CHECK: BOLT-WARNING: unclaimed data to code reference (possibly an unrecognized jump table entry) to .Ltmp[[#]] in func
-# CHECK: BOLT-WARNING: unclaimed data to code reference (possibly an unrecognized jump table entry) to .Ltmp[[#]] in func
-# CHECK: BOLT-WARNING: unclaimed data to code reference (possibly an unrecognized jump table entry) to .Ltmp[[#]] in func
-# CHECK: BOLT-WARNING: unclaimed data to code reference (possibly an unrecognized jump table entry) to .Ltmp[[#]] in func
-# CHECK: BOLT-WARNING: unclaimed data to code reference (possibly an unrecognized jump table entry) to .Ltmp[[#]] in func
-# CHECK: BOLT-WARNING: failed to post-process indirect branches for func
+# CHECK: BOLT-WARNING: 11 unclaimed data relocations remain against function func
# Run the optimized binary
# RUN: %t.out 3 | FileCheck %s --check-prefix=CHECK3
diff --git a/bolt/test/safe-icf-relative-vtable.cpp b/bolt/test/safe-icf-relative-vtable.cpp
new file mode 100644
index 0000000..59ddcc6
--- /dev/null
+++ b/bolt/test/safe-icf-relative-vtable.cpp
@@ -0,0 +1,26 @@
+// Test safe ICF works with binaries that contain relative vtable.
+
+// REQUIRES: system-linux,asserts
+
+// RUN: %clang %cxxflags -o %t.so %s -Wl,-q -fno-rtti
+// RUN: llvm-bolt %t.so -o %t.bolt --no-threads --icf=safe \
+// RUN: --debug-only=bolt-icf 2>&1 | FileCheck %s
+
+// RUN: %clang %cxxflags -o %t.so %s -Wl,-q -fno-rtti \
+// RUN: -fexperimental-relative-c++-abi-vtables
+// RUN: llvm-bolt %t.so -o %t.bolt --no-threads --icf=safe \
+// RUN: --debug-only=bolt-icf 2>&1 | FileCheck %s
+
+// CHECK: folding {{.*bar.*}} into {{.*foo.*}}
+// CHECK-NOT: skipping function with reference taken {{.*bar.*}}
+
+class TT {
+public:
+ virtual int foo(int a) { return ++a; }
+ virtual int bar(int a) { return ++a; }
+};
+
+int main() {
+ TT T;
+ return T.foo(0) + T.bar(1);
+}
diff --git a/bolt/unittests/CMakeLists.txt b/bolt/unittests/CMakeLists.txt
index 64414b8..d47ddc4 100644
--- a/bolt/unittests/CMakeLists.txt
+++ b/bolt/unittests/CMakeLists.txt
@@ -7,3 +7,4 @@ endfunction()
add_subdirectory(Core)
add_subdirectory(Profile)
+add_subdirectory(Passes)
diff --git a/bolt/unittests/Core/MCPlusBuilder.cpp b/bolt/unittests/Core/MCPlusBuilder.cpp
index bc37ced..7b6f162 100644
--- a/bolt/unittests/Core/MCPlusBuilder.cpp
+++ b/bolt/unittests/Core/MCPlusBuilder.cpp
@@ -143,6 +143,61 @@ TEST_P(MCPlusBuilderTester, AArch64_CmpJE) {
ASSERT_EQ(Label, BB->getLabel());
}
+TEST_P(MCPlusBuilderTester, AArch64_BTI) {
+ if (GetParam() != Triple::aarch64)
+ GTEST_SKIP();
+ BinaryFunction *BF = BC->createInjectedBinaryFunction("BF", true);
+ std::unique_ptr<BinaryBasicBlock> BB = BF->createBasicBlock();
+
+ MCInst BTIjc;
+ BC->MIB->createBTI(BTIjc, true, true);
+ BB->addInstruction(BTIjc);
+ auto II = BB->begin();
+ ASSERT_EQ(II->getOpcode(), AArch64::HINT);
+ ASSERT_EQ(II->getOperand(0).getImm(), 38);
+ ASSERT_TRUE(BC->MIB->isBTILandingPad(*II, true, true));
+ BC->MIB->updateBTIVariant(*II, true, false);
+ ASSERT_TRUE(BC->MIB->isBTILandingPad(*II, true, false));
+
+ MCInst BTIj;
+ BC->MIB->createBTI(BTIj, false, true);
+ II = BB->addInstruction(BTIj);
+ ASSERT_EQ(II->getOpcode(), AArch64::HINT);
+ ASSERT_EQ(II->getOperand(0).getImm(), 36);
+ ASSERT_TRUE(BC->MIB->isBTILandingPad(*II, false, true));
+ BC->MIB->updateBTIVariant(*II, true, true);
+ ASSERT_TRUE(BC->MIB->isBTILandingPad(*II, true, true));
+
+ MCInst BTIc;
+ BC->MIB->createBTI(BTIc, true, false);
+ II = BB->addInstruction(BTIc);
+ ASSERT_EQ(II->getOpcode(), AArch64::HINT);
+ ASSERT_EQ(II->getOperand(0).getImm(), 34);
+ ASSERT_TRUE(BC->MIB->isBTILandingPad(*II, true, false));
+ BC->MIB->updateBTIVariant(*II, false, true);
+ ASSERT_TRUE(BC->MIB->isBTILandingPad(*II, false, true));
+
+#ifndef NDEBUG
+ MCInst BTIinvalid;
+ ASSERT_DEATH(BC->MIB->createBTI(BTIinvalid, false, false),
+ "No target kinds!");
+#endif
+
+ MCInst Paciasp = MCInstBuilder(AArch64::PACIASP);
+ II = BB->addInstruction(Paciasp);
+ ASSERT_TRUE(BC->MIB->isBTILandingPad(*II, true, false));
+ ASSERT_FALSE(BC->MIB->isBTILandingPad(*II, true, true));
+ ASSERT_FALSE(BC->MIB->isBTILandingPad(*II, false, true));
+ ASSERT_TRUE(BC->MIB->isImplicitBTIC(*II));
+
+ MCInst Pacibsp = MCInstBuilder(AArch64::PACIBSP);
+ II = BB->addInstruction(Pacibsp);
+ ASSERT_TRUE(BC->MIB->isBTILandingPad(*II, true, false));
+ ASSERT_FALSE(BC->MIB->isBTILandingPad(*II, true, true));
+ ASSERT_FALSE(BC->MIB->isBTILandingPad(*II, false, true));
+ ASSERT_TRUE(BC->MIB->isImplicitBTIC(*II));
+}
+
TEST_P(MCPlusBuilderTester, AArch64_CmpJNE) {
if (GetParam() != Triple::aarch64)
GTEST_SKIP();
diff --git a/bolt/unittests/Passes/CMakeLists.txt b/bolt/unittests/Passes/CMakeLists.txt
new file mode 100644
index 0000000..17ae880
--- /dev/null
+++ b/bolt/unittests/Passes/CMakeLists.txt
@@ -0,0 +1,30 @@
+set(LLVM_LINK_COMPONENTS
+ DebugInfoDWARF
+ Object
+ MC
+ ${BOLT_TARGETS_TO_BUILD}
+ )
+
+add_bolt_unittest(PassTests
+ PointerAuthCFIFixup.cpp
+
+ DISABLE_LLVM_LINK_LLVM_DYLIB
+ )
+
+target_link_libraries(PassTests
+ PRIVATE
+ LLVMBOLTCore
+ LLVMBOLTRewrite
+ LLVMBOLTPasses
+ LLVMBOLTProfile
+ LLVMBOLTUtils
+ )
+
+foreach (tgt ${BOLT_TARGETS_TO_BUILD})
+ include_directories(
+ ${LLVM_MAIN_SRC_DIR}/lib/Target/${tgt}
+ ${LLVM_BINARY_DIR}/lib/Target/${tgt}
+ )
+ string(TOUPPER "${tgt}" upper)
+ target_compile_definitions(PassTests PRIVATE "${upper}_AVAILABLE")
+endforeach()
diff --git a/bolt/unittests/Passes/PointerAuthCFIFixup.cpp b/bolt/unittests/Passes/PointerAuthCFIFixup.cpp
new file mode 100644
index 0000000..0d54b31
--- /dev/null
+++ b/bolt/unittests/Passes/PointerAuthCFIFixup.cpp
@@ -0,0 +1,339 @@
+//===- bolt/unittest/Passes/PointerAuthCFIFixup.cpp ----------------------===//
+//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+
+#ifdef AARCH64_AVAILABLE
+#include "AArch64Subtarget.h"
+#include "MCTargetDesc/AArch64MCTargetDesc.h"
+#endif // AARCH64_AVAILABLE
+
+#include "bolt/Core/BinaryBasicBlock.h"
+#include "bolt/Core/BinaryFunction.h"
+#include "bolt/Passes/PointerAuthCFIFixup.h"
+#include "bolt/Rewrite/BinaryPassManager.h"
+#include "bolt/Rewrite/RewriteInstance.h"
+#include "bolt/Utils/CommandLineOpts.h"
+#include "llvm/BinaryFormat/ELF.h"
+#include "llvm/MC/MCDwarf.h"
+#include "llvm/MC/MCInstBuilder.h"
+#include "llvm/Support/TargetSelect.h"
+#include "gtest/gtest.h"
+
+using namespace llvm;
+using namespace llvm::object;
+using namespace llvm::ELF;
+using namespace bolt;
+
+namespace opts {
+extern cl::opt<bool> PrintPAuthCFIAnalyzer;
+} // namespace opts
+
+namespace {
+struct PassTester : public testing::TestWithParam<Triple::ArchType> {
+ void SetUp() override {
+ initalizeLLVM();
+ prepareElf();
+ initializeBolt();
+ }
+
+protected:
+ void initalizeLLVM() {
+#define BOLT_TARGET(target) \
+ LLVMInitialize##target##TargetInfo(); \
+ LLVMInitialize##target##TargetMC(); \
+ LLVMInitialize##target##AsmParser(); \
+ LLVMInitialize##target##Disassembler(); \
+ LLVMInitialize##target##Target(); \
+ LLVMInitialize##target##AsmPrinter();
+
+#include "bolt/Core/TargetConfig.def"
+ }
+
+#define PREPARE_FUNC(name) \
+ constexpr uint64_t FunctionAddress = 0x1000; \
+ BinaryFunction *BF = BC->createBinaryFunction( \
+ name, *TextSection, FunctionAddress, /*Size=*/0, /*SymbolSize=*/0, \
+ /*Alignment=*/16); \
+ /* Make sure the pass runs on the BF.*/ \
+ BF->updateState(BinaryFunction::State::CFG); \
+ BF->setContainedNegateRAState(); \
+ /* All tests need at least one BB. */ \
+ BinaryBasicBlock *BB = BF->addBasicBlock(); \
+ BF->addEntryPoint(*BB); \
+ BB->setCFIState(0);
+
+ void prepareElf() {
+ memcpy(ElfBuf, "\177ELF", 4);
+ ELF64LE::Ehdr *EHdr = reinterpret_cast<typename ELF64LE::Ehdr *>(ElfBuf);
+ EHdr->e_ident[llvm::ELF::EI_CLASS] = llvm::ELF::ELFCLASS64;
+ EHdr->e_ident[llvm::ELF::EI_DATA] = llvm::ELF::ELFDATA2LSB;
+ EHdr->e_machine = GetParam() == Triple::aarch64 ? EM_AARCH64 : EM_X86_64;
+ MemoryBufferRef Source(StringRef(ElfBuf, sizeof(ElfBuf)), "ELF");
+ ObjFile = cantFail(ObjectFile::createObjectFile(Source));
+ }
+ void initializeBolt() {
+ Relocation::Arch = ObjFile->makeTriple().getArch();
+ BC = cantFail(BinaryContext::createBinaryContext(
+ ObjFile->makeTriple(), std::make_shared<orc::SymbolStringPool>(),
+ ObjFile->getFileName(), nullptr, true, DWARFContext::create(*ObjFile),
+ {llvm::outs(), llvm::errs()}));
+ ASSERT_FALSE(!BC);
+ BC->initializeTarget(std::unique_ptr<MCPlusBuilder>(
+ createMCPlusBuilder(GetParam(), BC->MIA.get(), BC->MII.get(),
+ BC->MRI.get(), BC->STI.get())));
+
+ PassManager = std::make_unique<BinaryFunctionPassManager>(*BC);
+ PassManager->registerPass(
+ std::make_unique<PointerAuthCFIFixup>(opts::PrintPAuthCFIAnalyzer));
+
+ TextSection = &BC->registerOrUpdateSection(
+ ".text", ELF::SHT_PROGBITS, ELF::SHF_ALLOC | ELF::SHF_EXECINSTR,
+ /*Data=*/nullptr, /*Size=*/0,
+ /*Alignment=*/16);
+ }
+
+ std::vector<int> findCFIOffsets(BinaryFunction &BF) {
+ std::vector<int> Locations;
+ int Idx = 0;
+ int InstSize = 4; // AArch64
+ for (BinaryBasicBlock &BB : BF) {
+ for (MCInst &Inst : BB) {
+ if (BC->MIB->isCFI(Inst)) {
+ const MCCFIInstruction *CFI = BF.getCFIFor(Inst);
+ if (CFI->getOperation() == MCCFIInstruction::OpNegateRAState)
+ Locations.push_back(Idx * InstSize);
+ }
+ Idx++;
+ }
+ }
+ return Locations;
+ }
+
+ char ElfBuf[sizeof(typename ELF64LE::Ehdr)] = {};
+ std::unique_ptr<ObjectFile> ObjFile;
+ std::unique_ptr<BinaryContext> BC;
+ std::unique_ptr<BinaryFunctionPassManager> PassManager;
+ BinarySection *TextSection;
+};
+} // namespace
+
+TEST_P(PassTester, ExampleTest) {
+ if (GetParam() != Triple::aarch64)
+ GTEST_SKIP();
+
+ ASSERT_NE(TextSection, nullptr);
+
+ PREPARE_FUNC("ExampleFunction");
+
+ MCInst UnsignedInst = MCInstBuilder(AArch64::ADDSXri)
+ .addReg(AArch64::X0)
+ .addReg(AArch64::X0)
+ .addImm(0)
+ .addImm(0);
+ BC->MIB->setRAState(UnsignedInst, false);
+ BB->addInstruction(UnsignedInst);
+
+ MCInst SignedInst = MCInstBuilder(AArch64::ADDSXri)
+ .addReg(AArch64::X0)
+ .addReg(AArch64::X0)
+ .addImm(1)
+ .addImm(0);
+ BC->MIB->setRAState(SignedInst, true);
+ BB->addInstruction(SignedInst);
+
+ Error E = PassManager->runPasses();
+ EXPECT_FALSE(E);
+
+ /* Expected layout of BF after the pass:
+
+ .LBB0 (3 instructions, align : 1)
+ Entry Point
+ CFI State : 0
+ 00000000: adds x0, x0, #0x0
+ 00000004: !CFI $0 ; OpNegateRAState
+ 00000004: adds x0, x0, #0x1
+ CFI State: 0
+ */
+ auto CFILoc = findCFIOffsets(*BF);
+ EXPECT_EQ(CFILoc.size(), 1u);
+ EXPECT_EQ(CFILoc[0], 4);
+}
+
+TEST_P(PassTester, fillUnknownStateInBBTest) {
+ /* Check that a if BB starts with unknown RAState, we can fill the unknown
+ states based on following instructions with known RAStates.
+ *
+ * .LBB0 (1 instructions, align : 1)
+ Entry Point
+ CFI State : 0
+ 00000000: adds x0, x0, #0x0
+ CFI State: 0
+
+ .LBB1 (4 instructions, align : 1)
+ CFI State : 0
+ 00000004: !CFI $0 ; OpNegateRAState
+ 00000004: adds x0, x0, #0x1
+ 00000008: adds x0, x0, #0x2
+ 0000000c: adds x0, x0, #0x3
+ CFI State: 0
+ */
+ if (GetParam() != Triple::aarch64)
+ GTEST_SKIP();
+
+ ASSERT_NE(TextSection, nullptr);
+
+ PREPARE_FUNC("FuncWithUnknownStateInBB");
+ BinaryBasicBlock *BB2 = BF->addBasicBlock();
+ BB2->setCFIState(0);
+
+ MCInst Unsigned = MCInstBuilder(AArch64::ADDSXri)
+ .addReg(AArch64::X0)
+ .addReg(AArch64::X0)
+ .addImm(0)
+ .addImm(0);
+ BC->MIB->setRAState(Unsigned, false);
+ BB->addInstruction(Unsigned);
+
+ MCInst Unknown = MCInstBuilder(AArch64::ADDSXri)
+ .addReg(AArch64::X0)
+ .addReg(AArch64::X0)
+ .addImm(1)
+ .addImm(0);
+ MCInst Unknown1 = MCInstBuilder(AArch64::ADDSXri)
+ .addReg(AArch64::X0)
+ .addReg(AArch64::X0)
+ .addImm(2)
+ .addImm(0);
+ MCInst Signed = MCInstBuilder(AArch64::ADDSXri)
+ .addReg(AArch64::X0)
+ .addReg(AArch64::X0)
+ .addImm(3)
+ .addImm(0);
+ BC->MIB->setRAState(Signed, true);
+ BB2->addInstruction(Unknown);
+ BB2->addInstruction(Unknown1);
+ BB2->addInstruction(Signed);
+
+ Error E = PassManager->runPasses();
+ EXPECT_FALSE(E);
+
+ auto CFILoc = findCFIOffsets(*BF);
+ EXPECT_EQ(CFILoc.size(), 1u);
+ EXPECT_EQ(CFILoc[0], 4);
+ // Check that the pass set Unknown and Unknown1 to signed.
+ // begin() is the CFI, begin() + 1 is Unknown, begin() + 2 is Unknown1.
+ std::optional<bool> RAState = BC->MIB->getRAState(*(BB2->begin() + 1));
+ EXPECT_TRUE(RAState.has_value());
+ EXPECT_TRUE(*RAState);
+ std::optional<bool> RAState1 = BC->MIB->getRAState(*(BB2->begin() + 2));
+ EXPECT_TRUE(RAState1.has_value());
+ EXPECT_TRUE(*RAState1);
+}
+
+TEST_P(PassTester, fillUnknownStubs) {
+ /*
+ * Stubs that are not part of the function's CFG should inherit the RAState of
+ the BasicBlock before it.
+ *
+ * LBB1 is not part of the CFG: LBB0 jumps unconditionally to LBB2.
+ * LBB1 would be a stub inserted in LongJmp in real code.
+ * We do not add any NegateRAState CFIs, as other CFIs are not added either.
+ * See issue #160989 for more details.
+ *
+ * .LBB0 (1 instructions, align : 1)
+ Entry Point
+ 00000000: b .LBB2
+ Successors: .LBB2
+
+ .LBB1 (1 instructions, align : 1)
+ 00000004: ret
+
+ .LBB2 (1 instructions, align : 1)
+ Predecessors: .LBB0
+ 00000008: ret
+ */
+ if (GetParam() != Triple::aarch64)
+ GTEST_SKIP();
+
+ ASSERT_NE(TextSection, nullptr);
+
+ PREPARE_FUNC("FuncWithStub");
+ BinaryBasicBlock *BB2 = BF->addBasicBlock();
+ BB2->setCFIState(0);
+ BinaryBasicBlock *BB3 = BF->addBasicBlock();
+ BB3->setCFIState(0);
+
+ BB->addSuccessor(BB3);
+
+ // Jumping over BB2, to BB3.
+ MCInst Jump;
+ BC->MIB->createUncondBranch(Jump, BB3->getLabel(), BC->Ctx.get());
+ BB->addInstruction(Jump);
+ BC->MIB->setRAState(Jump, false);
+
+ // BB2, in real code it would be a ShortJmp.
+ // Unknown RAState.
+ MCInst StubInst;
+ BC->MIB->createReturn(StubInst);
+ BB2->addInstruction(StubInst);
+
+ // Can be any instruction.
+ MCInst Ret;
+ BC->MIB->createReturn(Ret);
+ BB3->addInstruction(Ret);
+ BC->MIB->setRAState(Ret, false);
+
+ Error E = PassManager->runPasses();
+ EXPECT_FALSE(E);
+
+ // Check that we did not generate any NegateRAState CFIs.
+ auto CFILoc = findCFIOffsets(*BF);
+ EXPECT_EQ(CFILoc.size(), 0u);
+}
+
+TEST_P(PassTester, fillUnknownStubsEmpty) {
+ /*
+ * This test checks that BOLT can set the RAState of unknown BBs,
+ * even if all previous BBs are empty, hence no PrevInst gets set.
+ *
+ * As this means that the current (empty) BB is the first with non-pseudo
+ * instructions, the function's initialRAState should be used.
+ */
+ if (GetParam() != Triple::aarch64)
+ GTEST_SKIP();
+
+ ASSERT_NE(TextSection, nullptr);
+
+ PREPARE_FUNC("FuncWithStub");
+ BF->setInitialRAState(false);
+ BinaryBasicBlock *BB2 = BF->addBasicBlock();
+ BB2->setCFIState(0);
+
+ // BB is empty.
+ BB->addSuccessor(BB2);
+
+ // BB2, in real code it would be a ShortJmp.
+ // Unknown RAState.
+ MCInst StubInst;
+ BC->MIB->createReturn(StubInst);
+ BB2->addInstruction(StubInst);
+
+ Error E = PassManager->runPasses();
+ EXPECT_FALSE(E);
+
+ // Check that BOLT added an RAState to BB2.
+ std::optional<bool> RAState = BC->MIB->getRAState(*(BB2->begin()));
+ EXPECT_TRUE(RAState.has_value());
+ // BB2 should be set to BF.initialRAState (false).
+ EXPECT_FALSE(*RAState);
+}
+
+#ifdef AARCH64_AVAILABLE
+INSTANTIATE_TEST_SUITE_P(AArch64, PassTester,
+ ::testing::Values(Triple::aarch64));
+#endif
diff --git a/bolt/unittests/Profile/PerfSpeEvents.cpp b/bolt/unittests/Profile/PerfSpeEvents.cpp
index 8d023cd..4f060cd 100644
--- a/bolt/unittests/Profile/PerfSpeEvents.cpp
+++ b/bolt/unittests/Profile/PerfSpeEvents.cpp
@@ -161,4 +161,92 @@ TEST_F(PerfSpeEventsTestHelper, SpeBranchesWithBrstack) {
parseAndCheckBrstackEvents(1234, ExpectedSamples);
}
+TEST_F(PerfSpeEventsTestHelper, SpeBranchesWithBrstackAndPbt) {
+ // Check perf input with SPE branch events as brstack format by
+ // combining with the previous branch target address (named as PBT).
+ // Example collection command:
+ // ```
+ // perf record -e 'arm_spe_0/branch_filter=1/u' -- BINARY
+ // ```
+ // How Bolt extracts the branch events:
+ // ```
+ // perf script -F pid,brstack --itrace=bl
+ // ```
+
+ opts::ArmSPE = true;
+ opts::ReadPerfEvents =
+ // "<PID> <SRC>/<DEST>/PN/-/-/10/COND/- <NULL>/<PBT>/-/-/-/0//-\n"
+ " 4567 0xa002/0xa003/PN/-/-/10/COND/- 0x0/0xa001/-/-/-/0//-\n"
+ " 4567 0xb002/0xb003/P/-/-/4/RET/- 0x0/0xb001/-/-/-/0//-\n"
+ " 4567 0xc456/0xc789/P/-/-/13/-/- 0x0/0xc123/-/-/-/0//-\n"
+ " 4567 0xd456/0xd789/M/-/-/7/RET/- 0x0/0xd123/-/-/-/0//-\n"
+ " 4567 0xe005/0xe009/P/-/-/14/RET/- 0x0/0xe001/-/-/-/0//-\n"
+ " 4567 0xd456/0xd789/M/-/-/7/RET/- 0x0/0xd123/-/-/-/0//-\n"
+ " 4567 0xf002/0xf003/MN/-/-/8/COND/- 0x0/0xf001/-/-/-/0//-\n"
+ " 4567 0xc456/0xc789/P/-/-/13/-/- 0x0/0xc123/-/-/-/0//-\n";
+
+ // ExpectedSamples contains the aggregated information about
+ // a branch {{From, To, TraceTo}, {TakenCount, MispredCount}}.
+ // Where
+ // - From: is the source address of the sampled branch operation.
+ // - To: is the target address of the sampled branch operation.
+ // - TraceTo could be either
+ // - A 'Type = Trace::BR_ONLY', which means the trace only contains branch
+ // data.
+ // - Or an address, when the trace contains information about the previous
+ // branch.
+ //
+ // When FEAT_SPE_PBT is present, Arm SPE emits two records per sample:
+ // - the current branch (Spe.From/Spe.To), and
+ // - the previous taken branch target (PBT) (PBT.From, PBT.To).
+ //
+ // Together they behave like a depth-1 branch stack where:
+ // - the PBT entry is always taken
+ // - the current branch entry may represent a taken branch or a fall-through
+ // - the destination (Spe.To) is the architecturally executed target
+ //
+ // There can be fall-throughs to be inferred between the PBT entry and
+ // the current branch (Spe.From), but there cannot be between current
+ // branch's (Spe.From/Spe.To).
+ //
+ // PBT records only the target address (PBT.To), meaning we have no
+ // information as the branch source (PBT.From=0x0), branch type, and the
+ // prediction bit.
+ //
+ // Consider the trace pair:
+ // {{Spe.From, Spe.To, Type}, {TK, MP}},
+ // {{PBT.From, PBT.To, TraceTo}, {TK, MP}}
+ // {{0xd456, 0xd789, Trace::BR_ONLY}, {2, 2}}, {{0x0, 0xd123, 0xd456}, {2, 0}}
+ //
+ // The first entry is the Spe record, which represents a trace from 0xd456
+ // (Spe.From) to 0xd789 (Spe.To). Type = Trace::BR_ONLY, as Bolt processes the
+ // current branch event first. At this point we have no information about the
+ // previous trace (PBT). This entry has a TakenCount = 2, as we have two
+ // samples for (0xd456, 0xd789) in our input. It also has MispredsCount = 2,
+ // as 'M' misprediction flag appears in both cases.
+ //
+ // The second entry is the PBT record. TakenCount = 2 because the
+ // (PBT.From = 0x0, PBT.To = 0xd123) branch target appears twice in the input,
+ // and MispredsCount = 0 because prediction data is absent. There is no branch
+ // source information, so the PBT.From field is zero (0x0). TraceTo = 0xd456
+ // connect the flow from the previous taken branch at 0xd123 (PBT.To) to the
+ // current source branch at 0xd456 (Spe.From), which then continues to 0xd789
+ // (Spe.To).
+ std::vector<std::pair<Trace, TakenBranchInfo>> ExpectedSamples = {
+ {{0xa002, 0xa003, Trace::BR_ONLY}, {1, 0}},
+ {{0x0, 0xa001, 0xa002}, {1, 0}},
+ {{0xb002, 0xb003, Trace::BR_ONLY}, {1, 0}},
+ {{0x0, 0xb001, 0xb002}, {1, 0}},
+ {{0xc456, 0xc789, Trace::BR_ONLY}, {2, 0}},
+ {{0x0, 0xc123, 0xc456}, {2, 0}},
+ {{0xd456, 0xd789, Trace::BR_ONLY}, {2, 2}},
+ {{0x0, 0xd123, 0xd456}, {2, 0}},
+ {{0xe005, 0xe009, Trace::BR_ONLY}, {1, 0}},
+ {{0x0, 0xe001, 0xe005}, {1, 0}},
+ {{0xf002, 0xf003, Trace::BR_ONLY}, {1, 1}},
+ {{0x0, 0xf001, 0xf002}, {1, 0}}};
+
+ parseAndCheckBrstackEvents(4567, ExpectedSamples);
+}
+
#endif