aboutsummaryrefslogtreecommitdiff
path: root/bolt
diff options
context:
space:
mode:
Diffstat (limited to 'bolt')
-rw-r--r--bolt/include/bolt/Core/BinaryBasicBlock.h1
-rw-r--r--bolt/include/bolt/Core/BinaryContext.h9
-rw-r--r--bolt/include/bolt/Core/BinaryData.h1
-rw-r--r--bolt/include/bolt/Core/BinaryFunction.h3
-rw-r--r--bolt/include/bolt/Profile/ProfileYAMLMapping.h81
-rw-r--r--bolt/include/bolt/Profile/YAMLProfileWriter.h52
-rw-r--r--bolt/include/bolt/Rewrite/RewriteInstance.h11
-rw-r--r--bolt/include/bolt/Utils/Utils.h5
-rw-r--r--bolt/lib/Core/BinaryContext.cpp3
-rw-r--r--bolt/lib/Core/BinaryFunction.cpp26
-rw-r--r--bolt/lib/Passes/ADRRelaxationPass.cpp9
-rw-r--r--bolt/lib/Passes/RetpolineInsertion.cpp2
-rw-r--r--bolt/lib/Passes/ValidateInternalCalls.cpp9
-rw-r--r--bolt/lib/Profile/DataAggregator.cpp54
-rw-r--r--bolt/lib/Profile/YAMLProfileReader.cpp6
-rw-r--r--bolt/lib/Profile/YAMLProfileWriter.cpp189
-rw-r--r--bolt/lib/Rewrite/PseudoProbeRewriter.cpp56
-rw-r--r--bolt/lib/Rewrite/RewriteInstance.cpp12
-rw-r--r--bolt/lib/RuntimeLibs/InstrumentationRuntimeLibrary.cpp1
-rw-r--r--bolt/lib/Utils/Utils.cpp12
-rw-r--r--bolt/test/AArch64/constant_island_pie_update.s6
-rw-r--r--bolt/test/AArch64/ifunc.test (renamed from bolt/test/AArch64/ifunc.c)23
-rw-r--r--bolt/test/AArch64/internal-call.s65
-rw-r--r--bolt/test/AArch64/update-weak-reference-symbol.s2
-rw-r--r--bolt/test/Inputs/ifunc.c12
-rw-r--r--bolt/test/Inputs/iplt.ld (renamed from bolt/test/AArch64/Inputs/iplt.ld)0
-rw-r--r--bolt/test/X86/ifunc.test47
-rw-r--r--bolt/test/X86/log.test4
-rw-r--r--bolt/test/X86/print-only-section.s29
-rw-r--r--bolt/test/X86/pseudoprobe-decoding-inline.test36
-rw-r--r--bolt/test/X86/pseudoprobe-decoding-noinline.test41
-rw-r--r--bolt/test/X86/yaml-unknown-keys.test50
-rw-r--r--bolt/test/lit.local.cfg4
-rw-r--r--bolt/test/merge-fdata-uninitialized-header.test45
-rw-r--r--bolt/test/perf2bolt/lit.local.cfg5
-rw-r--r--bolt/test/perf2bolt/perf_test.test9
-rw-r--r--bolt/tools/merge-fdata/merge-fdata.cpp13
-rw-r--r--bolt/unittests/Core/BinaryContext.cpp36
38 files changed, 793 insertions, 176 deletions
diff --git a/bolt/include/bolt/Core/BinaryBasicBlock.h b/bolt/include/bolt/Core/BinaryBasicBlock.h
index 9a9d7b8..b4f31cf 100644
--- a/bolt/include/bolt/Core/BinaryBasicBlock.h
+++ b/bolt/include/bolt/Core/BinaryBasicBlock.h
@@ -19,6 +19,7 @@
#include "bolt/Core/MCPlus.h"
#include "llvm/ADT/GraphTraits.h"
#include "llvm/ADT/StringRef.h"
+#include "llvm/Config/llvm-config.h" // for LLVM_ON_UNIX
#include "llvm/MC/MCInst.h"
#include "llvm/MC/MCSymbol.h"
#include "llvm/Support/ErrorOr.h"
diff --git a/bolt/include/bolt/Core/BinaryContext.h b/bolt/include/bolt/Core/BinaryContext.h
index 5fb32a1..08ce892 100644
--- a/bolt/include/bolt/Core/BinaryContext.h
+++ b/bolt/include/bolt/Core/BinaryContext.h
@@ -71,14 +71,15 @@ struct SegmentInfo {
uint64_t FileOffset; /// Offset in the file.
uint64_t FileSize; /// Size in file.
uint64_t Alignment; /// Alignment of the segment.
+ bool IsExecutable; /// Is the executable bit set on the Segment?
void print(raw_ostream &OS) const {
- OS << "SegmentInfo { Address: 0x"
- << Twine::utohexstr(Address) << ", Size: 0x"
- << Twine::utohexstr(Size) << ", FileOffset: 0x"
+ OS << "SegmentInfo { Address: 0x" << Twine::utohexstr(Address)
+ << ", Size: 0x" << Twine::utohexstr(Size) << ", FileOffset: 0x"
<< Twine::utohexstr(FileOffset) << ", FileSize: 0x"
<< Twine::utohexstr(FileSize) << ", Alignment: 0x"
- << Twine::utohexstr(Alignment) << "}";
+ << Twine::utohexstr(Alignment) << ", " << (IsExecutable ? "x" : " ")
+ << "}";
};
};
diff --git a/bolt/include/bolt/Core/BinaryData.h b/bolt/include/bolt/Core/BinaryData.h
index 8a67b3e..6a773c4 100644
--- a/bolt/include/bolt/Core/BinaryData.h
+++ b/bolt/include/bolt/Core/BinaryData.h
@@ -226,7 +226,6 @@ inline raw_ostream &operator<<(raw_ostream &OS,
Sep = ",\n ";
TotalCount += AccessInfo.Count;
}
- SS.flush();
OS << TotalCount << " total counts : " << TempString;
return OS;
diff --git a/bolt/include/bolt/Core/BinaryFunction.h b/bolt/include/bolt/Core/BinaryFunction.h
index 24c7db2..fc0375b 100644
--- a/bolt/include/bolt/Core/BinaryFunction.h
+++ b/bolt/include/bolt/Core/BinaryFunction.h
@@ -117,7 +117,6 @@ inline raw_ostream &operator<<(raw_ostream &OS,
TotalCount += CSP.Count;
TotalMispreds += CSP.Mispreds;
}
- SS.flush();
OS << TotalCount << " (" << TotalMispreds << " misses) :" << TempString;
return OS;
@@ -1692,6 +1691,8 @@ public:
void setPseudo(bool Pseudo) { IsPseudo = Pseudo; }
+ void setPreserveNops(bool Value) { PreserveNops = Value; }
+
BinaryFunction &setUsesGnuArgsSize(bool Uses = true) {
UsesGnuArgsSize = Uses;
return *this;
diff --git a/bolt/include/bolt/Profile/ProfileYAMLMapping.h b/bolt/include/bolt/Profile/ProfileYAMLMapping.h
index 2a0514d..9865118 100644
--- a/bolt/include/bolt/Profile/ProfileYAMLMapping.h
+++ b/bolt/include/bolt/Profile/ProfileYAMLMapping.h
@@ -95,24 +95,29 @@ template <> struct MappingTraits<bolt::SuccessorInfo> {
namespace bolt {
struct PseudoProbeInfo {
- llvm::yaml::Hex64 GUID;
- uint64_t Index;
- uint8_t Type;
+ uint32_t InlineTreeIndex = 0;
+ uint64_t BlockMask = 0; // bitset with probe indices from 1 to 64
+ std::vector<uint64_t> BlockProbes; // block probes with indices above 64
+ std::vector<uint64_t> CallProbes;
+ std::vector<uint64_t> IndCallProbes;
+ std::vector<uint32_t> InlineTreeNodes;
bool operator==(const PseudoProbeInfo &Other) const {
- return GUID == Other.GUID && Index == Other.Index;
- }
- bool operator!=(const PseudoProbeInfo &Other) const {
- return !(*this == Other);
+ return InlineTreeIndex == Other.InlineTreeIndex &&
+ BlockProbes == Other.BlockProbes && CallProbes == Other.CallProbes &&
+ IndCallProbes == Other.IndCallProbes;
}
};
} // end namespace bolt
template <> struct MappingTraits<bolt::PseudoProbeInfo> {
static void mapping(IO &YamlIO, bolt::PseudoProbeInfo &PI) {
- YamlIO.mapRequired("guid", PI.GUID);
- YamlIO.mapRequired("id", PI.Index);
- YamlIO.mapRequired("type", PI.Type);
+ YamlIO.mapOptional("blx", PI.BlockMask, 0);
+ YamlIO.mapOptional("blk", PI.BlockProbes, std::vector<uint64_t>());
+ YamlIO.mapOptional("call", PI.CallProbes, std::vector<uint64_t>());
+ YamlIO.mapOptional("icall", PI.IndCallProbes, std::vector<uint64_t>());
+ YamlIO.mapOptional("id", PI.InlineTreeIndex, 0);
+ YamlIO.mapOptional("ids", PI.InlineTreeNodes, std::vector<uint32_t>());
}
static const bool flow = true;
@@ -158,15 +163,35 @@ template <> struct MappingTraits<bolt::BinaryBasicBlockProfile> {
std::vector<bolt::CallSiteInfo>());
YamlIO.mapOptional("succ", BBP.Successors,
std::vector<bolt::SuccessorInfo>());
- YamlIO.mapOptional("pseudo_probes", BBP.PseudoProbes,
+ YamlIO.mapOptional("probes", BBP.PseudoProbes,
std::vector<bolt::PseudoProbeInfo>());
}
};
+namespace bolt {
+struct InlineTreeNode {
+ uint32_t ParentIndexDelta;
+ uint32_t CallSiteProbe;
+ // Index in PseudoProbeDesc.GUID, UINT32_MAX for same as previous (omitted)
+ uint32_t GUIDIndex;
+ bool operator==(const InlineTreeNode &) const { return false; }
+};
+} // end namespace bolt
+
+template <> struct MappingTraits<bolt::InlineTreeNode> {
+ static void mapping(IO &YamlIO, bolt::InlineTreeNode &ITI) {
+ YamlIO.mapOptional("g", ITI.GUIDIndex, UINT32_MAX);
+ YamlIO.mapOptional("p", ITI.ParentIndexDelta, 0);
+ YamlIO.mapOptional("cs", ITI.CallSiteProbe, 0);
+ }
+
+ static const bool flow = true;
+};
} // end namespace yaml
} // end namespace llvm
LLVM_YAML_IS_SEQUENCE_VECTOR(llvm::yaml::bolt::BinaryBasicBlockProfile)
+LLVM_YAML_IS_FLOW_SEQUENCE_VECTOR(llvm::yaml::bolt::InlineTreeNode)
namespace llvm {
namespace yaml {
@@ -179,8 +204,7 @@ struct BinaryFunctionProfile {
llvm::yaml::Hex64 Hash{0};
uint64_t ExecCount{0};
std::vector<BinaryBasicBlockProfile> Blocks;
- llvm::yaml::Hex64 GUID{0};
- llvm::yaml::Hex64 PseudoProbeDescHash{0};
+ std::vector<InlineTreeNode> InlineTree;
bool Used{false};
};
} // end namespace bolt
@@ -194,9 +218,8 @@ template <> struct MappingTraits<bolt::BinaryFunctionProfile> {
YamlIO.mapRequired("nblocks", BFP.NumBasicBlocks);
YamlIO.mapOptional("blocks", BFP.Blocks,
std::vector<bolt::BinaryBasicBlockProfile>());
- YamlIO.mapOptional("guid", BFP.GUID, (uint64_t)0);
- YamlIO.mapOptional("pseudo_probe_desc_hash", BFP.PseudoProbeDescHash,
- (uint64_t)0);
+ YamlIO.mapOptional("inline_tree", BFP.InlineTree,
+ std::vector<bolt::InlineTreeNode>());
}
};
@@ -246,10 +269,33 @@ template <> struct MappingTraits<bolt::BinaryProfileHeader> {
}
};
+namespace bolt {
+struct ProfilePseudoProbeDesc {
+ std::vector<Hex64> GUID;
+ std::vector<Hex64> Hash;
+ std::vector<uint32_t> GUIDHashIdx; // Index of hash for that GUID in Hash
+
+ bool operator==(const ProfilePseudoProbeDesc &Other) const {
+ // Only treat empty Desc as equal
+ return GUID.empty() && Other.GUID.empty() && Hash.empty() &&
+ Other.Hash.empty() && GUIDHashIdx.empty() &&
+ Other.GUIDHashIdx.empty();
+ }
+};
+} // end namespace bolt
+
+template <> struct MappingTraits<bolt::ProfilePseudoProbeDesc> {
+ static void mapping(IO &YamlIO, bolt::ProfilePseudoProbeDesc &PD) {
+ YamlIO.mapRequired("gs", PD.GUID);
+ YamlIO.mapRequired("gh", PD.GUIDHashIdx);
+ YamlIO.mapRequired("hs", PD.Hash);
+ }
+};
} // end namespace yaml
} // end namespace llvm
LLVM_YAML_IS_SEQUENCE_VECTOR(llvm::yaml::bolt::BinaryFunctionProfile)
+LLVM_YAML_IS_SEQUENCE_VECTOR(llvm::yaml::bolt::ProfilePseudoProbeDesc)
namespace llvm {
namespace yaml {
@@ -258,6 +304,7 @@ namespace bolt {
struct BinaryProfile {
BinaryProfileHeader Header;
std::vector<BinaryFunctionProfile> Functions;
+ ProfilePseudoProbeDesc PseudoProbeDesc;
};
} // namespace bolt
@@ -265,6 +312,8 @@ template <> struct MappingTraits<bolt::BinaryProfile> {
static void mapping(IO &YamlIO, bolt::BinaryProfile &BP) {
YamlIO.mapRequired("header", BP.Header);
YamlIO.mapRequired("functions", BP.Functions);
+ YamlIO.mapOptional("pseudo_probe_desc", BP.PseudoProbeDesc,
+ bolt::ProfilePseudoProbeDesc());
}
};
diff --git a/bolt/include/bolt/Profile/YAMLProfileWriter.h b/bolt/include/bolt/Profile/YAMLProfileWriter.h
index 4a9355d..d4d7217 100644
--- a/bolt/include/bolt/Profile/YAMLProfileWriter.h
+++ b/bolt/include/bolt/Profile/YAMLProfileWriter.h
@@ -32,8 +32,27 @@ public:
/// Save execution profile for that instance.
std::error_code writeProfile(const RewriteInstance &RI);
+ using InlineTreeMapTy =
+ DenseMap<const MCDecodedPseudoProbeInlineTree *, uint32_t>;
+ struct InlineTreeDesc {
+ template <typename T> using GUIDMapTy = std::unordered_map<uint64_t, T>;
+ using GUIDNodeMap = GUIDMapTy<const MCDecodedPseudoProbeInlineTree *>;
+ using GUIDNumMap = GUIDMapTy<uint32_t>;
+ GUIDNodeMap TopLevelGUIDToInlineTree;
+ GUIDNumMap GUIDIdxMap;
+ GUIDNumMap HashIdxMap;
+ };
+
+ static std::tuple<std::vector<yaml::bolt::InlineTreeNode>, InlineTreeMapTy>
+ convertBFInlineTree(const MCPseudoProbeDecoder &Decoder,
+ const InlineTreeDesc &InlineTree, uint64_t GUID);
+
+ static std::tuple<yaml::bolt::ProfilePseudoProbeDesc, InlineTreeDesc>
+ convertPseudoProbeDesc(const MCPseudoProbeDecoder &PseudoProbeDecoder);
+
static yaml::bolt::BinaryFunctionProfile
convert(const BinaryFunction &BF, bool UseDFS,
+ const InlineTreeDesc &InlineTree,
const BoltAddressTranslation *BAT = nullptr);
/// Set CallSiteInfo destination fields from \p Symbol and return a target
@@ -42,8 +61,39 @@ public:
setCSIDestination(const BinaryContext &BC, yaml::bolt::CallSiteInfo &CSI,
const MCSymbol *Symbol, const BoltAddressTranslation *BAT,
uint32_t Offset = 0);
-};
+private:
+ struct InlineTreeNode {
+ const MCDecodedPseudoProbeInlineTree *InlineTree;
+ uint64_t GUID;
+ uint64_t Hash;
+ uint32_t ParentId;
+ uint32_t InlineSite;
+ };
+ static std::vector<InlineTreeNode>
+ collectInlineTree(const MCPseudoProbeDecoder &Decoder,
+ const MCDecodedPseudoProbeInlineTree &Root);
+
+ // 0 - block probe, 1 - indirect call, 2 - direct call
+ using ProbeList = std::array<SmallVector<uint64_t, 0>, 3>;
+ using NodeIdToProbes = DenseMap<uint32_t, ProbeList>;
+ static std::vector<yaml::bolt::PseudoProbeInfo>
+ convertNodeProbes(NodeIdToProbes &NodeProbes);
+
+public:
+ template <typename T>
+ static std::vector<yaml::bolt::PseudoProbeInfo>
+ writeBlockProbes(T Probes, const InlineTreeMapTy &InlineTreeNodeId) {
+ NodeIdToProbes NodeProbes;
+ for (const MCDecodedPseudoProbe &Probe : Probes) {
+ auto It = InlineTreeNodeId.find(Probe.getInlineTreeNode());
+ if (It == InlineTreeNodeId.end())
+ continue;
+ NodeProbes[It->second][Probe.getType()].emplace_back(Probe.getIndex());
+ }
+ return convertNodeProbes(NodeProbes);
+ }
+};
} // namespace bolt
} // namespace llvm
diff --git a/bolt/include/bolt/Rewrite/RewriteInstance.h b/bolt/include/bolt/Rewrite/RewriteInstance.h
index 16a82d5..e5b7ad6 100644
--- a/bolt/include/bolt/Rewrite/RewriteInstance.h
+++ b/bolt/include/bolt/Rewrite/RewriteInstance.h
@@ -510,12 +510,11 @@ private:
};
/// Different types of X86-64 PLT sections.
- const PLTSectionInfo X86_64_PLTSections[4] = {
- { ".plt", 16 },
- { ".plt.got", 8 },
- { ".plt.sec", 8 },
- { nullptr, 0 }
- };
+ const PLTSectionInfo X86_64_PLTSections[5] = {{".plt", 16},
+ {".plt.got", 8},
+ {".plt.sec", 8},
+ {".iplt", 16},
+ {nullptr, 0}};
/// AArch64 PLT sections.
const PLTSectionInfo AArch64_PLTSections[4] = {
diff --git a/bolt/include/bolt/Utils/Utils.h b/bolt/include/bolt/Utils/Utils.h
index 3886c5f..9baee7d 100644
--- a/bolt/include/bolt/Utils/Utils.h
+++ b/bolt/include/bolt/Utils/Utils.h
@@ -41,6 +41,11 @@ std::string getEscapedName(const StringRef &Name);
/// Return the unescaped name
std::string getUnescapedName(const StringRef &Name);
+/// Return a common part for a given \p Name wrt a given \p Suffixes list.
+/// Preserve the suffix if \p KeepSuffix is set, only dropping characters
+/// following it, otherwise drop the suffix as well.
+std::optional<StringRef> getCommonName(const StringRef Name, bool KeepSuffix,
+ ArrayRef<StringRef> Suffixes);
/// LTO-generated function names take a form:
///
/// <function_name>.lto_priv.<decimal_number>/...
diff --git a/bolt/lib/Core/BinaryContext.cpp b/bolt/lib/Core/BinaryContext.cpp
index cd137f4..1347047 100644
--- a/bolt/lib/Core/BinaryContext.cpp
+++ b/bolt/lib/Core/BinaryContext.cpp
@@ -2021,6 +2021,9 @@ BinaryContext::getBaseAddressForMapping(uint64_t MMapAddress,
// Find a segment with a matching file offset.
for (auto &KV : SegmentMapInfo) {
const SegmentInfo &SegInfo = KV.second;
+ // Only consider executable segments.
+ if (!SegInfo.IsExecutable)
+ continue;
// FileOffset is got from perf event,
// and it is equal to alignDown(SegInfo.FileOffset, pagesize).
// If the pagesize is not equal to SegInfo.Alignment.
diff --git a/bolt/lib/Core/BinaryFunction.cpp b/bolt/lib/Core/BinaryFunction.cpp
index af982fd..36c42fc 100644
--- a/bolt/lib/Core/BinaryFunction.cpp
+++ b/bolt/lib/Core/BinaryFunction.cpp
@@ -165,6 +165,12 @@ bool shouldPrint(const BinaryFunction &Function) {
}
}
+ std::optional<StringRef> Origin = Function.getOriginSectionName();
+ if (Origin && llvm::any_of(opts::PrintOnly, [&](const std::string &Name) {
+ return Name == *Origin;
+ }))
+ return true;
+
return false;
}
@@ -1339,22 +1345,10 @@ Error BinaryFunction::disassemble() {
BC.getBinaryFunctionContainingAddress(TargetAddress))
TargetFunc->setIgnored();
- if (IsCall && containsAddress(TargetAddress)) {
- if (TargetAddress == getAddress()) {
- // Recursive call.
- TargetSymbol = getSymbol();
- } else {
- if (BC.isX86()) {
- // Dangerous old-style x86 PIC code. We may need to freeze this
- // function, so preserve the function as is for now.
- PreserveNops = true;
- } else {
- BC.errs() << "BOLT-WARNING: internal call detected at 0x"
- << Twine::utohexstr(AbsoluteInstrAddr)
- << " in function " << *this << ". Skipping.\n";
- IsSimple = false;
- }
- }
+ if (IsCall && TargetAddress == getAddress()) {
+ // A recursive call. Calls to internal blocks are handled by
+ // ValidateInternalCalls pass.
+ TargetSymbol = getSymbol();
}
if (!TargetSymbol) {
diff --git a/bolt/lib/Passes/ADRRelaxationPass.cpp b/bolt/lib/Passes/ADRRelaxationPass.cpp
index 24fddbc..256034a 100644
--- a/bolt/lib/Passes/ADRRelaxationPass.cpp
+++ b/bolt/lib/Passes/ADRRelaxationPass.cpp
@@ -59,10 +59,15 @@ void ADRRelaxationPass::runOnFunction(BinaryFunction &BF) {
// Don't relax adr if it points to the same function and it is not split
// and BF initial size is < 1MB.
const unsigned OneMB = 0x100000;
- if (!BF.isSplit() && BF.getSize() < OneMB) {
+ if (BF.getSize() < OneMB) {
BinaryFunction *TargetBF = BC.getFunctionForSymbol(Symbol);
- if (TargetBF && TargetBF == &BF)
+ if (TargetBF == &BF && !BF.isSplit())
continue;
+ // No relaxation needed if ADR references a basic block in the same
+ // fragment.
+ if (BinaryBasicBlock *TargetBB = BF.getBasicBlockForLabel(Symbol))
+ if (BB.getFragmentNum() == TargetBB->getFragmentNum())
+ continue;
}
MCPhysReg Reg;
diff --git a/bolt/lib/Passes/RetpolineInsertion.cpp b/bolt/lib/Passes/RetpolineInsertion.cpp
index 2808575..171177d 100644
--- a/bolt/lib/Passes/RetpolineInsertion.cpp
+++ b/bolt/lib/Passes/RetpolineInsertion.cpp
@@ -181,7 +181,6 @@ std::string createRetpolineFunctionTag(BinaryContext &BC,
if (BrInfo.isReg()) {
BC.InstPrinter->printRegName(TagOS, BrInfo.BranchReg);
TagOS << "_";
- TagOS.flush();
return Tag;
}
@@ -212,7 +211,6 @@ std::string createRetpolineFunctionTag(BinaryContext &BC,
BC.InstPrinter->printRegName(TagOS, MemRef.SegRegNum);
}
- TagOS.flush();
return Tag;
}
diff --git a/bolt/lib/Passes/ValidateInternalCalls.cpp b/bolt/lib/Passes/ValidateInternalCalls.cpp
index 88df2e5..bdab895 100644
--- a/bolt/lib/Passes/ValidateInternalCalls.cpp
+++ b/bolt/lib/Passes/ValidateInternalCalls.cpp
@@ -302,9 +302,6 @@ bool ValidateInternalCalls::analyzeFunction(BinaryFunction &Function) const {
}
Error ValidateInternalCalls::runOnFunctions(BinaryContext &BC) {
- if (!BC.isX86())
- return Error::success();
-
// Look for functions that need validation. This should be pretty rare.
std::set<BinaryFunction *> NeedsValidation;
for (auto &BFI : BC.getBinaryFunctions()) {
@@ -312,14 +309,20 @@ Error ValidateInternalCalls::runOnFunctions(BinaryContext &BC) {
for (BinaryBasicBlock &BB : Function) {
for (MCInst &Inst : BB) {
if (getInternalCallTarget(Function, Inst)) {
+ BC.errs() << "BOLT-WARNING: internal call detected in function "
+ << Function << '\n';
NeedsValidation.insert(&Function);
Function.setSimple(false);
+ Function.setPreserveNops(true);
break;
}
}
}
}
+ if (!BC.isX86())
+ return Error::success();
+
// Skip validation for non-relocation mode
if (!BC.HasRelocations)
return Error::success();
diff --git a/bolt/lib/Profile/DataAggregator.cpp b/bolt/lib/Profile/DataAggregator.cpp
index 813d825..0a63148 100644
--- a/bolt/lib/Profile/DataAggregator.cpp
+++ b/bolt/lib/Profile/DataAggregator.cpp
@@ -88,7 +88,7 @@ MaxSamples("max-samples",
cl::cat(AggregatorCategory));
extern cl::opt<opts::ProfileFormatKind> ProfileFormat;
-extern cl::opt<bool> ProfileUsePseudoProbes;
+extern cl::opt<bool> ProfileWritePseudoProbes;
extern cl::opt<std::string> SaveProfile;
cl::opt<bool> ReadPreAggregated(
@@ -2043,7 +2043,8 @@ std::error_code DataAggregator::parseMMapEvents() {
// size of the mapping, but we know it should not exceed the segment
// alignment value. Hence we are performing an approximate check.
return SegInfo.Address >= MMapInfo.MMapAddress &&
- SegInfo.Address - MMapInfo.MMapAddress < SegInfo.Alignment;
+ SegInfo.Address - MMapInfo.MMapAddress < SegInfo.Alignment &&
+ SegInfo.IsExecutable;
});
if (!MatchFound) {
errs() << "PERF2BOLT-WARNING: ignoring mapping of " << NameToUse
@@ -2300,7 +2301,7 @@ std::error_code DataAggregator::writeBATYAML(BinaryContext &BC,
yaml::bolt::BinaryProfile BP;
const MCPseudoProbeDecoder *PseudoProbeDecoder =
- opts::ProfileUsePseudoProbes ? BC.getPseudoProbeDecoder() : nullptr;
+ opts::ProfileWritePseudoProbes ? BC.getPseudoProbeDecoder() : nullptr;
// Fill out the header info.
BP.Header.Version = 1;
@@ -2321,6 +2322,12 @@ std::error_code DataAggregator::writeBATYAML(BinaryContext &BC,
BP.Header.Flags = opts::BasicAggregation ? BinaryFunction::PF_SAMPLE
: BinaryFunction::PF_LBR;
+ // Add probe inline tree nodes.
+ YAMLProfileWriter::InlineTreeDesc InlineTree;
+ if (PseudoProbeDecoder)
+ std::tie(BP.PseudoProbeDesc, InlineTree) =
+ YAMLProfileWriter::convertPseudoProbeDesc(*PseudoProbeDecoder);
+
if (!opts::BasicAggregation) {
// Convert profile for functions not covered by BAT
for (auto &BFI : BC.getBinaryFunctions()) {
@@ -2329,8 +2336,8 @@ std::error_code DataAggregator::writeBATYAML(BinaryContext &BC,
continue;
if (BAT->isBATFunction(Function.getAddress()))
continue;
- BP.Functions.emplace_back(
- YAMLProfileWriter::convert(Function, /*UseDFS=*/false, BAT));
+ BP.Functions.emplace_back(YAMLProfileWriter::convert(
+ Function, /*UseDFS=*/false, InlineTree, BAT));
}
for (const auto &KV : NamesToBranches) {
@@ -2403,16 +2410,22 @@ std::error_code DataAggregator::writeBATYAML(BinaryContext &BC,
YamlBF.Blocks[BlockIndex].ExecCount += BI.Branches;
}
if (PseudoProbeDecoder) {
- if ((YamlBF.GUID = BF->getGUID())) {
- const MCPseudoProbeFuncDesc *FuncDesc =
- PseudoProbeDecoder->getFuncDescForGUID(YamlBF.GUID);
- YamlBF.PseudoProbeDescHash = FuncDesc->FuncHash;
+ DenseMap<const MCDecodedPseudoProbeInlineTree *, uint32_t>
+ InlineTreeNodeId;
+ if (BF->getGUID()) {
+ std::tie(YamlBF.InlineTree, InlineTreeNodeId) =
+ YAMLProfileWriter::convertBFInlineTree(*PseudoProbeDecoder,
+ InlineTree, BF->getGUID());
}
// Fetch probes belonging to all fragments
const AddressProbesMap &ProbeMap =
PseudoProbeDecoder->getAddress2ProbesMap();
BinaryFunction::FragmentsSetTy Fragments(BF->Fragments);
Fragments.insert(BF);
+ DenseMap<
+ uint32_t,
+ std::vector<std::reference_wrapper<const MCDecodedPseudoProbe>>>
+ BlockProbes;
for (const BinaryFunction *F : Fragments) {
const uint64_t FuncAddr = F->getAddress();
for (const MCDecodedPseudoProbe &Probe :
@@ -2421,17 +2434,24 @@ std::error_code DataAggregator::writeBATYAML(BinaryContext &BC,
const uint32_t InputOffset = BAT->translate(
FuncAddr, OutputAddress - FuncAddr, /*IsBranchSrc=*/true);
const unsigned BlockIndex = getBlock(InputOffset).second;
- YamlBF.Blocks[BlockIndex].PseudoProbes.emplace_back(
- yaml::bolt::PseudoProbeInfo{Probe.getGuid(), Probe.getIndex(),
- Probe.getType()});
+ BlockProbes[BlockIndex].emplace_back(Probe);
}
}
+
+ for (auto &[Block, Probes] : BlockProbes) {
+ YamlBF.Blocks[Block].PseudoProbes =
+ YAMLProfileWriter::writeBlockProbes(Probes, InlineTreeNodeId);
+ }
}
- // Drop blocks without a hash, won't be useful for stale matching.
- llvm::erase_if(YamlBF.Blocks,
- [](const yaml::bolt::BinaryBasicBlockProfile &YamlBB) {
- return YamlBB.Hash == (yaml::Hex64)0;
- });
+ // Skip printing if there's no profile data
+ llvm::erase_if(
+ YamlBF.Blocks, [](const yaml::bolt::BinaryBasicBlockProfile &YamlBB) {
+ auto HasCount = [](const auto &SI) { return SI.Count; };
+ bool HasAnyCount = YamlBB.ExecCount ||
+ llvm::any_of(YamlBB.Successors, HasCount) ||
+ llvm::any_of(YamlBB.CallSites, HasCount);
+ return !HasAnyCount;
+ });
BP.Functions.emplace_back(YamlBF);
}
}
diff --git a/bolt/lib/Profile/YAMLProfileReader.cpp b/bolt/lib/Profile/YAMLProfileReader.cpp
index 3eca5e9..67ed320 100644
--- a/bolt/lib/Profile/YAMLProfileReader.cpp
+++ b/bolt/lib/Profile/YAMLProfileReader.cpp
@@ -49,11 +49,6 @@ llvm::cl::opt<bool>
llvm::cl::opt<bool> ProfileUseDFS("profile-use-dfs",
cl::desc("use DFS order for YAML profile"),
cl::Hidden, cl::cat(BoltOptCategory));
-
-llvm::cl::opt<bool> ProfileUsePseudoProbes(
- "profile-use-pseudo-probes",
- cl::desc("Use pseudo probes for profile generation and matching"),
- cl::Hidden, cl::cat(BoltOptCategory));
} // namespace opts
namespace llvm {
@@ -373,6 +368,7 @@ Error YAMLProfileReader::preprocessProfile(BinaryContext &BC) {
return errorCodeToError(EC);
}
yaml::Input YamlInput(MB.get()->getBuffer());
+ YamlInput.setAllowUnknownKeys(true);
// Consume YAML file.
YamlInput >> YamlBP;
diff --git a/bolt/lib/Profile/YAMLProfileWriter.cpp b/bolt/lib/Profile/YAMLProfileWriter.cpp
index f74cf60..4437be4 100644
--- a/bolt/lib/Profile/YAMLProfileWriter.cpp
+++ b/bolt/lib/Profile/YAMLProfileWriter.cpp
@@ -13,6 +13,8 @@
#include "bolt/Profile/DataAggregator.h"
#include "bolt/Profile/ProfileReaderBase.h"
#include "bolt/Rewrite/RewriteInstance.h"
+#include "bolt/Utils/CommandLineOpts.h"
+#include "llvm/MC/MCPseudoProbe.h"
#include "llvm/Support/CommandLine.h"
#include "llvm/Support/FileSystem.h"
#include "llvm/Support/raw_ostream.h"
@@ -21,8 +23,12 @@
#define DEBUG_TYPE "bolt-prof"
namespace opts {
-extern llvm::cl::opt<bool> ProfileUseDFS;
-extern llvm::cl::opt<bool> ProfileUsePseudoProbes;
+using namespace llvm;
+extern cl::opt<bool> ProfileUseDFS;
+cl::opt<bool> ProfileWritePseudoProbes(
+ "profile-write-pseudo-probes",
+ cl::desc("Use pseudo probes in profile generation"), cl::Hidden,
+ cl::cat(BoltOptCategory));
} // namespace opts
namespace llvm {
@@ -53,13 +59,164 @@ const BinaryFunction *YAMLProfileWriter::setCSIDestination(
return nullptr;
}
+std::vector<YAMLProfileWriter::InlineTreeNode>
+YAMLProfileWriter::collectInlineTree(
+ const MCPseudoProbeDecoder &Decoder,
+ const MCDecodedPseudoProbeInlineTree &Root) {
+ auto getHash = [&](const MCDecodedPseudoProbeInlineTree &Node) {
+ return Decoder.getFuncDescForGUID(Node.Guid)->FuncHash;
+ };
+ std::vector<InlineTreeNode> InlineTree(
+ {InlineTreeNode{&Root, Root.Guid, getHash(Root), 0, 0}});
+ uint32_t ParentId = 0;
+ while (ParentId != InlineTree.size()) {
+ const MCDecodedPseudoProbeInlineTree *Cur = InlineTree[ParentId].InlineTree;
+ for (const MCDecodedPseudoProbeInlineTree &Child : Cur->getChildren())
+ InlineTree.emplace_back(
+ InlineTreeNode{&Child, Child.Guid, getHash(Child), ParentId,
+ std::get<1>(Child.getInlineSite())});
+ ++ParentId;
+ }
+
+ return InlineTree;
+}
+
+std::tuple<yaml::bolt::ProfilePseudoProbeDesc,
+ YAMLProfileWriter::InlineTreeDesc>
+YAMLProfileWriter::convertPseudoProbeDesc(const MCPseudoProbeDecoder &Decoder) {
+ yaml::bolt::ProfilePseudoProbeDesc Desc;
+ InlineTreeDesc InlineTree;
+
+ for (const MCDecodedPseudoProbeInlineTree &TopLev :
+ Decoder.getDummyInlineRoot().getChildren())
+ InlineTree.TopLevelGUIDToInlineTree[TopLev.Guid] = &TopLev;
+
+ for (const auto &FuncDesc : Decoder.getGUID2FuncDescMap())
+ ++InlineTree.HashIdxMap[FuncDesc.FuncHash];
+
+ InlineTree.GUIDIdxMap.reserve(Decoder.getGUID2FuncDescMap().size());
+ for (const auto &Node : Decoder.getInlineTreeVec())
+ ++InlineTree.GUIDIdxMap[Node.Guid];
+
+ std::vector<std::pair<uint32_t, uint64_t>> GUIDFreqVec;
+ GUIDFreqVec.reserve(InlineTree.GUIDIdxMap.size());
+ for (const auto [GUID, Cnt] : InlineTree.GUIDIdxMap)
+ GUIDFreqVec.emplace_back(Cnt, GUID);
+ llvm::sort(GUIDFreqVec);
+
+ std::vector<std::pair<uint32_t, uint64_t>> HashFreqVec;
+ HashFreqVec.reserve(InlineTree.HashIdxMap.size());
+ for (const auto [Hash, Cnt] : InlineTree.HashIdxMap)
+ HashFreqVec.emplace_back(Cnt, Hash);
+ llvm::sort(HashFreqVec);
+
+ uint32_t Index = 0;
+ Desc.Hash.reserve(HashFreqVec.size());
+ for (uint64_t Hash : llvm::make_second_range(llvm::reverse(HashFreqVec))) {
+ Desc.Hash.emplace_back(Hash);
+ InlineTree.HashIdxMap[Hash] = Index++;
+ }
+
+ Index = 0;
+ Desc.GUID.reserve(GUIDFreqVec.size());
+ for (uint64_t GUID : llvm::make_second_range(llvm::reverse(GUIDFreqVec))) {
+ Desc.GUID.emplace_back(GUID);
+ InlineTree.GUIDIdxMap[GUID] = Index++;
+ uint64_t Hash = Decoder.getFuncDescForGUID(GUID)->FuncHash;
+ Desc.GUIDHashIdx.emplace_back(InlineTree.HashIdxMap[Hash]);
+ }
+
+ return {Desc, InlineTree};
+}
+
+std::vector<yaml::bolt::PseudoProbeInfo>
+YAMLProfileWriter::convertNodeProbes(NodeIdToProbes &NodeProbes) {
+ struct BlockProbeInfoHasher {
+ size_t operator()(const yaml::bolt::PseudoProbeInfo &BPI) const {
+ auto HashCombine = [](auto &Range) {
+ return llvm::hash_combine_range(Range.begin(), Range.end());
+ };
+ return llvm::hash_combine(HashCombine(BPI.BlockProbes),
+ HashCombine(BPI.CallProbes),
+ HashCombine(BPI.IndCallProbes));
+ }
+ };
+
+ // Check identical BlockProbeInfo structs and merge them
+ std::unordered_map<yaml::bolt::PseudoProbeInfo, std::vector<uint32_t>,
+ BlockProbeInfoHasher>
+ BPIToNodes;
+ for (auto &[NodeId, Probes] : NodeProbes) {
+ yaml::bolt::PseudoProbeInfo BPI;
+ BPI.BlockProbes = std::vector(Probes[0].begin(), Probes[0].end());
+ BPI.IndCallProbes = std::vector(Probes[1].begin(), Probes[1].end());
+ BPI.CallProbes = std::vector(Probes[2].begin(), Probes[2].end());
+ BPIToNodes[BPI].push_back(NodeId);
+ }
+
+ auto handleMask = [](const auto &Ids, auto &Vec, auto &Mask) {
+ for (auto Id : Ids)
+ if (Id > 64)
+ Vec.emplace_back(Id);
+ else
+ Mask |= 1ull << (Id - 1);
+ };
+
+ // Add to YAML with merged nodes/block mask optimizations
+ std::vector<yaml::bolt::PseudoProbeInfo> YamlProbes;
+ YamlProbes.reserve(BPIToNodes.size());
+ for (const auto &[BPI, Nodes] : BPIToNodes) {
+ auto &YamlBPI = YamlProbes.emplace_back(yaml::bolt::PseudoProbeInfo());
+ YamlBPI.CallProbes = BPI.CallProbes;
+ YamlBPI.IndCallProbes = BPI.IndCallProbes;
+ if (Nodes.size() == 1)
+ YamlBPI.InlineTreeIndex = Nodes.front();
+ else
+ YamlBPI.InlineTreeNodes = Nodes;
+ handleMask(BPI.BlockProbes, YamlBPI.BlockProbes, YamlBPI.BlockMask);
+ }
+ return YamlProbes;
+}
+
+std::tuple<std::vector<yaml::bolt::InlineTreeNode>,
+ YAMLProfileWriter::InlineTreeMapTy>
+YAMLProfileWriter::convertBFInlineTree(const MCPseudoProbeDecoder &Decoder,
+ const InlineTreeDesc &InlineTree,
+ uint64_t GUID) {
+ DenseMap<const MCDecodedPseudoProbeInlineTree *, uint32_t> InlineTreeNodeId;
+ std::vector<yaml::bolt::InlineTreeNode> YamlInlineTree;
+ auto It = InlineTree.TopLevelGUIDToInlineTree.find(GUID);
+ if (It == InlineTree.TopLevelGUIDToInlineTree.end())
+ return {YamlInlineTree, InlineTreeNodeId};
+ const MCDecodedPseudoProbeInlineTree *Root = It->second;
+ assert(Root && "Malformed TopLevelGUIDToInlineTree");
+ uint32_t Index = 0;
+ uint32_t PrevParent = 0;
+ uint32_t PrevGUIDIdx = 0;
+ for (const auto &Node : collectInlineTree(Decoder, *Root)) {
+ InlineTreeNodeId[Node.InlineTree] = Index++;
+ auto GUIDIdxIt = InlineTree.GUIDIdxMap.find(Node.GUID);
+ assert(GUIDIdxIt != InlineTree.GUIDIdxMap.end() && "Malformed GUIDIdxMap");
+ uint32_t GUIDIdx = GUIDIdxIt->second;
+ if (GUIDIdx == PrevGUIDIdx)
+ GUIDIdx = UINT32_MAX;
+ else
+ PrevGUIDIdx = GUIDIdx;
+ YamlInlineTree.emplace_back(yaml::bolt::InlineTreeNode{
+ Node.ParentId - PrevParent, Node.InlineSite, GUIDIdx});
+ PrevParent = Node.ParentId;
+ }
+ return {YamlInlineTree, InlineTreeNodeId};
+}
+
yaml::bolt::BinaryFunctionProfile
YAMLProfileWriter::convert(const BinaryFunction &BF, bool UseDFS,
+ const InlineTreeDesc &InlineTree,
const BoltAddressTranslation *BAT) {
yaml::bolt::BinaryFunctionProfile YamlBF;
const BinaryContext &BC = BF.getBinaryContext();
const MCPseudoProbeDecoder *PseudoProbeDecoder =
- opts::ProfileUsePseudoProbes ? BC.getPseudoProbeDecoder() : nullptr;
+ opts::ProfileWritePseudoProbes ? BC.getPseudoProbeDecoder() : nullptr;
const uint16_t LBRProfile = BF.getProfileFlags() & BinaryFunction::PF_LBR;
@@ -72,12 +229,10 @@ YAMLProfileWriter::convert(const BinaryFunction &BF, bool UseDFS,
YamlBF.Hash = BF.getHash();
YamlBF.NumBasicBlocks = BF.size();
YamlBF.ExecCount = BF.getKnownExecutionCount();
- if (PseudoProbeDecoder) {
- if ((YamlBF.GUID = BF.getGUID())) {
- const MCPseudoProbeFuncDesc *FuncDesc =
- PseudoProbeDecoder->getFuncDescForGUID(YamlBF.GUID);
- YamlBF.PseudoProbeDescHash = FuncDesc->FuncHash;
- }
+ DenseMap<const MCDecodedPseudoProbeInlineTree *, uint32_t> InlineTreeNodeId;
+ if (PseudoProbeDecoder && BF.getGUID()) {
+ std::tie(YamlBF.InlineTree, InlineTreeNodeId) =
+ convertBFInlineTree(*PseudoProbeDecoder, InlineTree, BF.getGUID());
}
BinaryFunction::BasicBlockOrderType Order;
@@ -193,10 +348,10 @@ YAMLProfileWriter::convert(const BinaryFunction &BF, bool UseDFS,
const uint64_t FuncAddr = BF.getAddress();
const std::pair<uint64_t, uint64_t> &BlockRange =
BB->getInputAddressRange();
- for (const MCDecodedPseudoProbe &Probe : ProbeMap.find(
- FuncAddr + BlockRange.first, FuncAddr + BlockRange.second))
- YamlBB.PseudoProbes.emplace_back(yaml::bolt::PseudoProbeInfo{
- Probe.getGuid(), Probe.getIndex(), Probe.getType()});
+ const std::pair<uint64_t, uint64_t> BlockAddrRange = {
+ FuncAddr + BlockRange.first, FuncAddr + BlockRange.second};
+ auto Probes = ProbeMap.find(BlockAddrRange.first, BlockAddrRange.second);
+ YamlBB.PseudoProbes = writeBlockProbes(Probes, InlineTreeNodeId);
}
YamlBF.Blocks.emplace_back(YamlBB);
@@ -251,6 +406,12 @@ std::error_code YAMLProfileWriter::writeProfile(const RewriteInstance &RI) {
}
BP.Header.Flags = ProfileFlags;
+ // Add probe inline tree nodes.
+ InlineTreeDesc InlineTree;
+ if (const MCPseudoProbeDecoder *Decoder =
+ opts::ProfileWritePseudoProbes ? BC.getPseudoProbeDecoder() : nullptr)
+ std::tie(BP.PseudoProbeDesc, InlineTree) = convertPseudoProbeDesc(*Decoder);
+
// Add all function objects.
for (const auto &BFI : Functions) {
const BinaryFunction &BF = BFI.second;
@@ -258,7 +419,7 @@ std::error_code YAMLProfileWriter::writeProfile(const RewriteInstance &RI) {
if (!BF.hasValidProfile() && !RI.getProfileReader()->isTrustedSource())
continue;
- BP.Functions.emplace_back(convert(BF, opts::ProfileUseDFS));
+ BP.Functions.emplace_back(convert(BF, opts::ProfileUseDFS, InlineTree));
}
}
diff --git a/bolt/lib/Rewrite/PseudoProbeRewriter.cpp b/bolt/lib/Rewrite/PseudoProbeRewriter.cpp
index 4925b4b..8647df4 100644
--- a/bolt/lib/Rewrite/PseudoProbeRewriter.cpp
+++ b/bolt/lib/Rewrite/PseudoProbeRewriter.cpp
@@ -14,6 +14,7 @@
#include "bolt/Rewrite/MetadataRewriter.h"
#include "bolt/Rewrite/MetadataRewriters.h"
#include "bolt/Utils/CommandLineOpts.h"
+#include "bolt/Utils/Utils.h"
#include "llvm/IR/Function.h"
#include "llvm/MC/MCPseudoProbe.h"
#include "llvm/Support/CommandLine.h"
@@ -49,7 +50,7 @@ static cl::opt<PrintPseudoProbesOptions> PrintPseudoProbes(
clEnumValN(PPP_All, "all", "enable all debugging printout")),
cl::Hidden, cl::cat(BoltCategory));
-extern cl::opt<bool> ProfileUsePseudoProbes;
+extern cl::opt<bool> ProfileWritePseudoProbes;
} // namespace opts
namespace {
@@ -71,7 +72,8 @@ class PseudoProbeRewriter final : public MetadataRewriter {
/// Parse .pseudo_probe_desc section and .pseudo_probe section
/// Setup Pseudo probe decoder
- void parsePseudoProbe();
+ /// If \p ProfiledOnly is set, only parse records for functions with profile.
+ void parsePseudoProbe(bool ProfiledOnly = false);
/// PseudoProbe decoder
std::shared_ptr<MCPseudoProbeDecoder> ProbeDecoderPtr;
@@ -90,21 +92,21 @@ public:
};
Error PseudoProbeRewriter::preCFGInitializer() {
- if (opts::ProfileUsePseudoProbes)
- parsePseudoProbe();
+ if (opts::ProfileWritePseudoProbes)
+ parsePseudoProbe(true);
return Error::success();
}
Error PseudoProbeRewriter::postEmitFinalizer() {
- if (!opts::ProfileUsePseudoProbes)
+ if (!opts::ProfileWritePseudoProbes)
parsePseudoProbe();
updatePseudoProbes();
return Error::success();
}
-void PseudoProbeRewriter::parsePseudoProbe() {
+void PseudoProbeRewriter::parsePseudoProbe(bool ProfiledOnly) {
MCPseudoProbeDecoder &ProbeDecoder(*ProbeDecoderPtr);
PseudoProbeDescSection = BC.getUniqueSectionByName(".pseudo_probe_desc");
PseudoProbeSection = BC.getUniqueSectionByName(".pseudo_probe");
@@ -133,10 +135,22 @@ void PseudoProbeRewriter::parsePseudoProbe() {
MCPseudoProbeDecoder::Uint64Set GuidFilter;
MCPseudoProbeDecoder::Uint64Map FuncStartAddrs;
+ SmallVector<StringRef, 0> Suffixes(
+ {".destroy", ".resume", ".llvm.", ".cold", ".warm"});
for (const BinaryFunction *F : BC.getAllBinaryFunctions()) {
+ bool HasProfile = F->hasProfileAvailable();
for (const MCSymbol *Sym : F->getSymbols()) {
- FuncStartAddrs[Function::getGUID(NameResolver::restore(Sym->getName()))] =
- F->getAddress();
+ StringRef SymName = Sym->getName();
+ for (auto Name : {std::optional(NameResolver::restore(SymName)),
+ getCommonName(SymName, false, Suffixes)}) {
+ if (!Name)
+ continue;
+ SymName = *Name;
+ uint64_t GUID = Function::getGUID(SymName);
+ FuncStartAddrs[GUID] = F->getAddress();
+ if (ProfiledOnly && HasProfile)
+ GuidFilter.insert(GUID);
+ }
}
}
Contents = PseudoProbeSection->getContents();
@@ -155,13 +169,25 @@ void PseudoProbeRewriter::parsePseudoProbe() {
ProbeDecoder.printProbesForAllAddresses(outs());
}
- for (const auto &FuncDesc : ProbeDecoder.getGUID2FuncDescMap()) {
- uint64_t GUID = FuncDesc.FuncGUID;
- if (!FuncStartAddrs.contains(GUID))
- continue;
- BinaryFunction *BF = BC.getBinaryFunctionAtAddress(FuncStartAddrs[GUID]);
- assert(BF);
- BF->setGUID(GUID);
+ const GUIDProbeFunctionMap &GUID2Func = ProbeDecoder.getGUID2FuncDescMap();
+ // Checks GUID in GUID2Func and returns it if it's present or null otherwise.
+ auto checkGUID = [&](StringRef SymName) -> uint64_t {
+ uint64_t GUID = Function::getGUID(SymName);
+ if (GUID2Func.find(GUID) == GUID2Func.end())
+ return 0;
+ return GUID;
+ };
+ for (BinaryFunction *F : BC.getAllBinaryFunctions()) {
+ for (const MCSymbol *Sym : F->getSymbols()) {
+ StringRef SymName = NameResolver::restore(Sym->getName());
+ uint64_t GUID = checkGUID(SymName);
+ std::optional<StringRef> CommonName =
+ getCommonName(SymName, false, Suffixes);
+ if (!GUID && CommonName)
+ GUID = checkGUID(*CommonName);
+ if (GUID)
+ F->setGUID(GUID);
+ }
}
}
diff --git a/bolt/lib/Rewrite/RewriteInstance.cpp b/bolt/lib/Rewrite/RewriteInstance.cpp
index adacb50d..32ec7ab 100644
--- a/bolt/lib/Rewrite/RewriteInstance.cpp
+++ b/bolt/lib/Rewrite/RewriteInstance.cpp
@@ -526,11 +526,9 @@ Error RewriteInstance::discoverStorage() {
NextAvailableOffset = std::max(NextAvailableOffset,
Phdr.p_offset + Phdr.p_filesz);
- BC->SegmentMapInfo[Phdr.p_vaddr] = SegmentInfo{Phdr.p_vaddr,
- Phdr.p_memsz,
- Phdr.p_offset,
- Phdr.p_filesz,
- Phdr.p_align};
+ BC->SegmentMapInfo[Phdr.p_vaddr] = SegmentInfo{
+ Phdr.p_vaddr, Phdr.p_memsz, Phdr.p_offset,
+ Phdr.p_filesz, Phdr.p_align, ((Phdr.p_flags & ELF::PF_X) != 0)};
if (BC->TheTriple->getArch() == llvm::Triple::x86_64 &&
Phdr.p_vaddr >= BinaryContext::KernelStartX86_64)
BC->IsLinuxKernel = true;
@@ -1533,7 +1531,7 @@ void RewriteInstance::createPLTBinaryFunction(uint64_t TargetAddress,
MCSymbol *Symbol = Rel->Symbol;
if (!Symbol) {
- if (!BC->isAArch64() || !Rel->Addend || !Rel->isIRelative())
+ if (BC->isRISCV() || !Rel->Addend || !Rel->isIRelative())
return;
// IFUNC trampoline without symbol
@@ -4247,7 +4245,6 @@ void RewriteInstance::addBoltInfoSection() {
<< "command line:";
for (int I = 0; I < Argc; ++I)
DescOS << " " << Argv[I];
- DescOS.flush();
// Encode as GNU GOLD VERSION so it is easily printable by 'readelf -n'
const std::string BoltInfo =
@@ -4270,7 +4267,6 @@ void RewriteInstance::encodeBATSection() {
raw_string_ostream DescOS(DescStr);
BAT->write(*BC, DescOS);
- DescOS.flush();
const std::string BoltInfo =
BinarySection::encodeELFNote("BOLT", DescStr, BinarySection::NT_BOLT_BAT);
diff --git a/bolt/lib/RuntimeLibs/InstrumentationRuntimeLibrary.cpp b/bolt/lib/RuntimeLibs/InstrumentationRuntimeLibrary.cpp
index 53a0c81..f3199eb 100644
--- a/bolt/lib/RuntimeLibs/InstrumentationRuntimeLibrary.cpp
+++ b/bolt/lib/RuntimeLibs/InstrumentationRuntimeLibrary.cpp
@@ -314,7 +314,6 @@ std::string InstrumentationRuntimeLibrary::buildTables(BinaryContext &BC) {
}
// Our string table lives immediately after descriptions vector
OS << Summary->StringTable;
- OS.flush();
return TablesStr;
}
diff --git a/bolt/lib/Utils/Utils.cpp b/bolt/lib/Utils/Utils.cpp
index 718e975..ecc2f10 100644
--- a/bolt/lib/Utils/Utils.cpp
+++ b/bolt/lib/Utils/Utils.cpp
@@ -66,15 +66,21 @@ std::string getUnescapedName(const StringRef &Name) {
return Output;
}
-std::optional<StringRef> getLTOCommonName(const StringRef Name) {
- for (StringRef Suffix : {".__uniq.", ".lto_priv.", ".constprop.", ".llvm."}) {
+std::optional<StringRef> getCommonName(const StringRef Name, bool KeepSuffix,
+ ArrayRef<StringRef> Suffixes) {
+ for (StringRef Suffix : Suffixes) {
size_t LTOSuffixPos = Name.find(Suffix);
if (LTOSuffixPos != StringRef::npos)
- return Name.substr(0, LTOSuffixPos + Suffix.size());
+ return Name.substr(0, LTOSuffixPos + (KeepSuffix ? Suffix.size() : 0));
}
return std::nullopt;
}
+std::optional<StringRef> getLTOCommonName(const StringRef Name) {
+ return getCommonName(Name, true,
+ {".__uniq.", ".lto_priv.", ".constprop.", ".llvm."});
+}
+
std::optional<uint8_t> readDWARFExpressionTargetReg(StringRef ExprBytes) {
uint8_t Opcode = ExprBytes[0];
if (Opcode == dwarf::DW_CFA_def_cfa_expression)
diff --git a/bolt/test/AArch64/constant_island_pie_update.s b/bolt/test/AArch64/constant_island_pie_update.s
index 313e103..889f6b6 100644
--- a/bolt/test/AArch64/constant_island_pie_update.s
+++ b/bolt/test/AArch64/constant_island_pie_update.s
@@ -8,15 +8,15 @@
# RUN: %clang %cflags -fPIC -pie %t.o -o %t.rela.exe -nostdlib \
# RUN: -Wl,-q -Wl,-z,notext
# RUN: llvm-bolt %t.rela.exe -o %t.rela.bolt --use-old-text=0 --lite=0
-# RUN: llvm-objdump -j .text -d --show-all-symbols %t.rela.bolt | FileCheck %s
+# RUN: llvm-objdump -j .text -d -z --show-all-symbols %t.rela.bolt | FileCheck %s
# RUN: llvm-readelf -rsW %t.rela.bolt | FileCheck --check-prefix=ELFCHECK %s
// .relr.dyn
# RUN: %clang %cflags -fPIC -pie %t.o -o %t.relr.exe -nostdlib \
# RUN: -Wl,-q -Wl,-z,notext -Wl,--pack-dyn-relocs=relr
# RUN: llvm-objcopy --remove-section .rela.mytext %t.relr.exe
# RUN: llvm-bolt %t.relr.exe -o %t.relr.bolt --use-old-text=0 --lite=0
-# RUN: llvm-objdump -j .text -d --show-all-symbols %t.relr.bolt | FileCheck %s
-# RUN: llvm-objdump -j .text -d %t.relr.bolt | \
+# RUN: llvm-objdump -j .text -d -z --show-all-symbols %t.relr.bolt | FileCheck %s
+# RUN: llvm-objdump -j .text -d -z %t.relr.bolt | \
# RUN: FileCheck %s --check-prefix=ADDENDCHECK
# RUN: llvm-readelf -rsW %t.relr.bolt | FileCheck --check-prefix=RELRELFCHECK %s
# RUN: llvm-readelf -SW %t.relr.bolt | FileCheck --check-prefix=RELRSZCHECK %s
diff --git a/bolt/test/AArch64/ifunc.c b/bolt/test/AArch64/ifunc.test
index 1744976..3da42c6 100644
--- a/bolt/test/AArch64/ifunc.c
+++ b/bolt/test/AArch64/ifunc.test
@@ -1,8 +1,6 @@
-// This test checks that IFUNC trampoline is properly recognised by BOLT
-
// With -O0 indirect call is performed on IPLT trampoline. IPLT trampoline
// has IFUNC symbol.
-// RUN: %clang %cflags -nostdlib -O0 -no-pie %s -fuse-ld=lld \
+// RUN: %clang %cflags -nostdlib -O0 -no-pie %p/../Inputs/ifunc.c -fuse-ld=lld \
// RUN: -o %t.O0.exe -Wl,-q
// RUN: llvm-bolt %t.O0.exe -o %t.O0.bolt.exe \
// RUN: --print-disasm --print-only=_start | \
@@ -12,7 +10,7 @@
// Non-pie static executable doesn't generate PT_DYNAMIC, check relocation
// is readed successfully and IPLT trampoline has been identified by bolt.
-// RUN: %clang %cflags -nostdlib -O3 %s -fuse-ld=lld -no-pie \
+// RUN: %clang %cflags -nostdlib -O3 %p/../Inputs/ifunc.c -fuse-ld=lld -no-pie \
// RUN: -o %t.O3_nopie.exe -Wl,-q
// RUN: llvm-readelf -l %t.O3_nopie.exe | \
// RUN: FileCheck --check-prefix=NON_DYN_CHECK %s
@@ -25,7 +23,7 @@
// With -O3 direct call is performed on IPLT trampoline. IPLT trampoline
// doesn't have associated symbol. The ifunc symbol has the same address as
// IFUNC resolver function.
-// RUN: %clang %cflags -nostdlib -O3 %s -fuse-ld=lld -fPIC -pie \
+// RUN: %clang %cflags -nostdlib -O3 %p/../Inputs/ifunc.c -fuse-ld=lld -fPIC -pie \
// RUN: -o %t.O3_pie.exe -Wl,-q
// RUN: llvm-bolt %t.O3_pie.exe -o %t.O3_pie.bolt.exe \
// RUN: --print-disasm --print-only=_start | \
@@ -35,8 +33,8 @@
// Check that IPLT trampoline located in .plt section are normally handled by
// BOLT. The gnu-ld linker doesn't use separate .iplt section.
-// RUN: %clang %cflags -nostdlib -O3 %s -fuse-ld=lld -fPIC -pie \
-// RUN: -T %p/Inputs/iplt.ld -o %t.iplt_O3_pie.exe -Wl,-q
+// RUN: %clang %cflags -nostdlib -O3 %p/../Inputs/ifunc.c -fuse-ld=lld -fPIC -pie \
+// RUN: -T %p/../Inputs/iplt.ld -o %t.iplt_O3_pie.exe -Wl,-q
// RUN: llvm-bolt %t.iplt_O3_pie.exe -o %t.iplt_O3_pie.bolt.exe \
// RUN: --print-disasm --print-only=_start | \
// RUN: FileCheck --check-prefix=CHECK %s
@@ -49,14 +47,3 @@
// REL_CHECK: R_AARCH64_IRELATIVE [[#%x,REL_SYMB_ADDR:]]
// REL_CHECK: [[#REL_SYMB_ADDR]] {{.*}} FUNC {{.*}} resolver_foo
-
-static void foo() {}
-static void bar() {}
-
-extern int use_foo;
-
-static void *resolver_foo(void) { return use_foo ? foo : bar; }
-
-__attribute__((ifunc("resolver_foo"))) void ifoo();
-
-void _start() { ifoo(); }
diff --git a/bolt/test/AArch64/internal-call.s b/bolt/test/AArch64/internal-call.s
new file mode 100644
index 0000000..43b3a64
--- /dev/null
+++ b/bolt/test/AArch64/internal-call.s
@@ -0,0 +1,65 @@
+## Test that llvm-bolt detects internal calls and marks the containing function
+## as non-simple.
+
+# RUN: llvm-mc -filetype=obj -triple aarch64-unknown-unknown %s -o %t.o
+# RUN: %clang %cflags %t.o -o %t.exe -Wl,-q -static
+# RUN: llvm-bolt %t.exe -o %t.null --print-all 2>&1 | FileCheck %s
+
+# CHECK: Binary Function "_start" after building cfg
+# CHECK: internal call detected in function _start
+# CHECK-NOT: Binary Function "_start" after validate-internal-calls
+
+ .text
+ .globl _start
+ .type _start, %function
+_start:
+ .cfi_startproc
+.LBB00:
+ mov x11, #0x1fff
+ cmp x1, x11
+ b.hi .Ltmp1
+
+.entry1:
+ movi v4.16b, #0x0
+ movi v5.16b, #0x0
+ subs x1, x1, #0x8
+ b.lo .Ltmp2
+
+.entry2:
+ ld1 { v2.2d, v3.2d }, [x0], #32
+ ld1 { v0.2d, v1.2d }, [x0], #32
+
+.Ltmp2:
+ uaddlp v4.4s, v4.8h
+ uaddlp v4.2d, v4.4s
+ mov x0, v4.d[0]
+ mov x1, v4.d[1]
+ add x0, x0, x1
+ ret x30
+
+.Ltmp1:
+ mov x8, x30
+
+.Lloop:
+ add x5, x0, x9
+ mov x1, #0xface
+ movi v4.16b, #0x0
+ movi v5.16b, #0x0
+ bl .entry2
+ add x4, x4, x0
+ mov x0, x5
+ sub x7, x7, x10
+ cmp x7, x11
+ b.hi .Lloop
+
+ mov x1, x7
+ bl .entry1
+ add x0, x4, x0
+ mov x30, x8
+ ret x30
+
+ .cfi_endproc
+.size _start, .-_start
+
+## Force relocation mode.
+ .reloc 0, R_AARCH64_NONE
diff --git a/bolt/test/AArch64/update-weak-reference-symbol.s b/bolt/test/AArch64/update-weak-reference-symbol.s
index 600a06b8..46819e8 100644
--- a/bolt/test/AArch64/update-weak-reference-symbol.s
+++ b/bolt/test/AArch64/update-weak-reference-symbol.s
@@ -3,7 +3,7 @@
// RUN: %clang %cflags -Wl,-z,notext -shared -Wl,-q %s -o %t.so
// RUN: llvm-bolt %t.so -o %t.so.bolt
// RUN: llvm-nm -n %t.so.bolt > %t.out.txt
-// RUN: llvm-objdump -dj .rodata %t.so.bolt >> %t.out.txt
+// RUN: llvm-objdump -z -dj .rodata %t.so.bolt >> %t.out.txt
// RUN: FileCheck %s --input-file=%t.out.txt
# CHECK: w func_1
diff --git a/bolt/test/Inputs/ifunc.c b/bolt/test/Inputs/ifunc.c
new file mode 100644
index 0000000..3fa62be
--- /dev/null
+++ b/bolt/test/Inputs/ifunc.c
@@ -0,0 +1,12 @@
+// This test checks that IFUNC trampoline is properly recognised by BOLT
+
+static void foo() {}
+static void bar() {}
+
+extern int use_foo;
+
+static void *resolver_foo(void) { return use_foo ? foo : bar; }
+
+__attribute__((ifunc("resolver_foo"))) void ifoo();
+
+void _start() { ifoo(); }
diff --git a/bolt/test/AArch64/Inputs/iplt.ld b/bolt/test/Inputs/iplt.ld
index 1e54a24..1e54a24 100644
--- a/bolt/test/AArch64/Inputs/iplt.ld
+++ b/bolt/test/Inputs/iplt.ld
diff --git a/bolt/test/X86/ifunc.test b/bolt/test/X86/ifunc.test
new file mode 100644
index 0000000..befefbe
--- /dev/null
+++ b/bolt/test/X86/ifunc.test
@@ -0,0 +1,47 @@
+// Check if BOLT can process ifunc symbols from .plt section
+// RUN: %clang %cflags -nostdlib -no-pie %p/../Inputs/ifunc.c -fuse-ld=lld \
+// RUN: -o %t.exe -Wl,-q
+// RUN: llvm-bolt %t.exe -o %t.bolt.exe \
+// RUN: --print-disasm --print-only=_start | \
+// RUN: FileCheck --check-prefix=CHECK %s
+// RUN: llvm-readelf -aW %t.bolt.exe | \
+// RUN: FileCheck --check-prefix=REL_CHECK %s
+
+// Check if BOLT can process ifunc symbols from .plt section in non-pie static
+// executable case.
+// RUN: %clang %cflags -nostdlib %p/../Inputs/ifunc.c -fuse-ld=lld -no-pie \
+// RUN: -o %t.nopie.exe -Wl,-q
+// RUN: llvm-readelf -l %t.nopie.exe | \
+// RUN: FileCheck --check-prefix=NON_DYN_CHECK %s
+// RUN: llvm-bolt %t.nopie.exe -o %t.nopie.bolt.exe \
+// RUN: --print-disasm --print-only=_start | \
+// RUN: FileCheck --check-prefix=CHECK %s
+// RUN: llvm-readelf -aW %t.nopie.bolt.exe | \
+// RUN: FileCheck --check-prefix=REL_CHECK %s
+
+// Check if BOLT can process ifunc symbols from .plt section in pie executable
+// case.
+// RUN: %clang %cflags -nostdlib %p/../Inputs/ifunc.c -fuse-ld=lld -fPIC -pie \
+// RUN: -o %t.pie.exe -Wl,-q
+// RUN: llvm-bolt %t.pie.exe -o %t.pie.bolt.exe \
+// RUN: --print-disasm --print-only=_start | \
+// RUN: FileCheck --check-prefix=CHECK %s
+// RUN: llvm-readelf -aW %t.pie.bolt.exe | \
+// RUN: FileCheck --check-prefix=REL_CHECK %s
+
+// Check that IPLT trampoline located in .plt section are normally handled by
+// BOLT. The gnu-ld linker doesn't use separate .iplt section.
+// RUN: %clang %cflags -nostdlib %p/../Inputs/ifunc.c -fuse-ld=lld -fPIC -pie \
+// RUN: -T %p/../Inputs/iplt.ld -o %t.iplt_pie.exe -Wl,-q
+// RUN: llvm-bolt %t.iplt_pie.exe -o %t.iplt_pie.bolt.exe \
+// RUN: --print-disasm --print-only=_start | \
+// RUN: FileCheck --check-prefix=CHECK %s
+// RUN: llvm-readelf -aW %t.iplt_pie.bolt.exe | \
+// RUN: FileCheck --check-prefix=REL_CHECK %s
+
+// NON_DYN_CHECK-NOT: DYNAMIC
+
+// CHECK: callq "resolver_foo/1@PLT"
+
+// REL_CHECK: R_X86_64_IRELATIVE [[#%x,REL_SYMB_ADDR:]]
+// REL_CHECK: [[#REL_SYMB_ADDR]] {{.*}} FUNC {{.*}} resolver_foo
diff --git a/bolt/test/X86/log.test b/bolt/test/X86/log.test
index 42109db..2c006e9 100644
--- a/bolt/test/X86/log.test
+++ b/bolt/test/X86/log.test
@@ -6,7 +6,7 @@ RUN: yaml2obj %p/Inputs/blarge.yaml &> %t.exe
RUN: llvm-bolt %t.exe -o %t.null --data %p/Inputs/blarge.fdata -v=2 \
RUN: --reorder-blocks=normal --print-finalized --log-file=%t.log 2>&1 \
RUN: | FileCheck --check-prefix=CHECK --allow-empty %s
-RUN: cat %t.log | FileCheck %s --check-prefix=CHECK-LOG
+RUN: FileCheck %s --check-prefix=CHECK-LOG --input-file %t.log
CHECK-NOT: BOLT-INFO
CHECK-NOT: BOLT-WARNING
@@ -16,4 +16,4 @@ CHECK-NOT: BOLT-ERROR
CHECK-LOG: BOLT-INFO: Target architecture
CHECK-LOG: BOLT-INFO: BOLT version
CHECK-LOG: BOLT-INFO: basic block reordering modified layout
-CHECK-LOG: Binary Function "usqrt"
+CHECK-LOG: Binary Function "main"
diff --git a/bolt/test/X86/print-only-section.s b/bolt/test/X86/print-only-section.s
new file mode 100644
index 0000000..d580818
--- /dev/null
+++ b/bolt/test/X86/print-only-section.s
@@ -0,0 +1,29 @@
+## Check that --print-only flag works with sections.
+
+# REQUIRES: system-linux
+
+# RUN: llvm-mc -filetype=obj -triple x86_64-unknown-linux %s -o %t.o
+# RUN: ld.lld %t.o -o %t.exe
+# RUN: llvm-bolt %t.exe -o %t.out --print-cfg --print-only=unused_code 2>&1 \
+# RUN: | FileCheck %s
+
+# CHECK: Binary Function "foo"
+# CHECK-NOT: Binary Function "_start"
+
+ .text
+ .globl _start
+ .type _start, %function
+_start:
+ .cfi_startproc
+ ret
+ .cfi_endproc
+ .size _start, .-_start
+
+ .section unused_code,"ax",@progbits
+ .globl foo
+ .type foo, %function
+foo:
+ .cfi_startproc
+ ret
+ .cfi_endproc
+ .size foo, .-foo
diff --git a/bolt/test/X86/pseudoprobe-decoding-inline.test b/bolt/test/X86/pseudoprobe-decoding-inline.test
index b361551..e5e8aad 100644
--- a/bolt/test/X86/pseudoprobe-decoding-inline.test
+++ b/bolt/test/X86/pseudoprobe-decoding-inline.test
@@ -6,37 +6,39 @@
# PREAGG: B X:0 #main# 1 0
## Check pseudo-probes in regular YAML profile (non-BOLTed binary)
# RUN: link_fdata %s %S/../../../llvm/test/tools/llvm-profgen/Inputs/inline-cs-pseudoprobe.perfbin %t.preagg PREAGG
-# RUN: perf2bolt %S/../../../llvm/test/tools/llvm-profgen/Inputs/inline-cs-pseudoprobe.perfbin -p %t.preagg --pa -w %t.yaml -o %t.fdata --profile-use-pseudo-probes
+# RUN: perf2bolt %S/../../../llvm/test/tools/llvm-profgen/Inputs/inline-cs-pseudoprobe.perfbin -p %t.preagg --pa -w %t.yaml -o %t.fdata --profile-write-pseudo-probes
# RUN: FileCheck --input-file %t.yaml %s --check-prefix CHECK-YAML
## Check pseudo-probes in BAT YAML profile (BOLTed binary)
# RUN: link_fdata %s %t.bolt %t.preagg2 PREAGG
-# RUN: perf2bolt %t.bolt -p %t.preagg2 --pa -w %t.yaml2 -o %t.fdata2 --profile-use-pseudo-probes
+# RUN: perf2bolt %t.bolt -p %t.preagg2 --pa -w %t.yaml2 -o %t.fdata2 --profile-write-pseudo-probes
# RUN: FileCheck --input-file %t.yaml2 %s --check-prefix CHECK-YAML
# CHECK-YAML: name: bar
# CHECK-YAML: - bid: 0
-# CHECK-YAML: pseudo_probes: [ { guid: 0xE413754A191DB537, id: 1, type: 0 }, { guid: 0xE413754A191DB537, id: 4, type: 0 } ]
-# CHECK-YAML: guid: 0xE413754A191DB537
-# CHECK-YAML: pseudo_probe_desc_hash: 0x10E852DA94
+# CHECK-YAML: probes: [ { blx: 9 } ]
+# CHECK-YAML: inline_tree: [ { } ]
#
# CHECK-YAML: name: foo
# CHECK-YAML: - bid: 0
-# CHECK-YAML: pseudo_probes: [ { guid: 0x5CF8C24CDB18BDAC, id: 1, type: 0 }, { guid: 0x5CF8C24CDB18BDAC, id: 2, type: 0 } ]
-# CHECK-YAML: guid: 0x5CF8C24CDB18BDAC
-# CHECK-YAML: pseudo_probe_desc_hash: 0x200205A19C5B4
+# CHECK-YAML: probes: [ { blx: 3 } ]
+# CHECK-YAML: inline_tree: [ { g: 1 }, { g: 0, cs: 8 } ]
#
# CHECK-YAML: name: main
# CHECK-YAML: - bid: 0
-# CHECK-YAML: pseudo_probes: [ { guid: 0xDB956436E78DD5FA, id: 1, type: 0 }, { guid: 0x5CF8C24CDB18BDAC, id: 1, type: 0 }, { guid: 0x5CF8C24CDB18BDAC, id: 2, type: 0 } ]
-# CHECK-YAML: guid: 0xDB956436E78DD5FA
-# CHECK-YAML: pseudo_probe_desc_hash: 0x10000FFFFFFFF
+# CHECK-YAML: probes: [ { blx: 3, id: 1 }, { blx: 1 } ]
+# CHECK-YAML: inline_tree: [ { g: 2 }, { g: 1, cs: 2 }, { g: 0, p: 1, cs: 8 } ]
#
-## Check that without --profile-use-pseudo-probes option, no pseudo probes are
+# CHECK-YAML: pseudo_probe_desc:
+# CHECK-YAML-NEXT: gs: [ 0xE413754A191DB537, 0x5CF8C24CDB18BDAC, 0xDB956436E78DD5FA ]
+# CHECK-YAML-NEXT: gh: [ 2, 0, 1 ]
+# CHECK-YAML-NEXT: hs: [ 0x200205A19C5B4, 0x10000FFFFFFFF, 0x10E852DA94 ]
+#
+## Check that without --profile-write-pseudo-probes option, no pseudo probes are
## generated
-# RUN: perf2bolt %S/../../../llvm/test/tools/llvm-profgen/Inputs/inline-cs-pseudoprobe.perfbin -p %t.preagg --pa -w %t.yaml -o %t.fdata
-# RUN: FileCheck --input-file %t.yaml %s --check-prefix CHECK-NO-OPT
-# CHECK-NO-OPT-NOT: pseudo_probes
-# CHECK-NO-OPT-NOT: guid
-# CHECK-NO-OPT-NOT: pseudo_probe_desc_hash
+# RUN: perf2bolt %S/../../../llvm/test/tools/llvm-profgen/Inputs/inline-cs-pseudoprobe.perfbin -p %t.preagg --pa -w %t.yaml3 -o %t.fdata
+# RUN: FileCheck --input-file %t.yaml3 %s --check-prefix CHECK-NO-OPT
+# CHECK-NO-OPT-NOT: probes:
+# CHECK-NO-OPT-NOT: inline_tree:
+# CHECK-NO-OPT-NOT: pseudo_probe_desc:
CHECK: Report of decoding input pseudo probe binaries
diff --git a/bolt/test/X86/pseudoprobe-decoding-noinline.test b/bolt/test/X86/pseudoprobe-decoding-noinline.test
index 5dd6c2e..36a2fab 100644
--- a/bolt/test/X86/pseudoprobe-decoding-noinline.test
+++ b/bolt/test/X86/pseudoprobe-decoding-noinline.test
@@ -1,6 +1,45 @@
# REQUIRES: system-linux
-# RUN: llvm-bolt %S/../../../llvm/test/tools/llvm-profgen/Inputs/noinline-cs-pseudoprobe.perfbin --print-pseudo-probes=all -o %t.bolt 2>&1 | FileCheck %s
+# RUN: llvm-bolt %S/../../../llvm/test/tools/llvm-profgen/Inputs/noinline-cs-pseudoprobe.perfbin --print-pseudo-probes=all -o %t.bolt --lite=0 --enable-bat 2>&1 | FileCheck %s
+# PREAGG: B X:0 #foo# 1 0
+# PREAGG: B X:0 #bar# 1 0
+# PREAGG: B X:0 #main# 1 0
+
+## Check pseudo-probes in regular YAML profile (non-BOLTed binary)
+# RUN: link_fdata %s %S/../../../llvm/test/tools/llvm-profgen/Inputs/noinline-cs-pseudoprobe.perfbin %t.preagg PREAGG
+# RUN: perf2bolt %S/../../../llvm/test/tools/llvm-profgen/Inputs/noinline-cs-pseudoprobe.perfbin -p %t.preagg --pa -w %t.yaml -o %t.fdata --profile-write-pseudo-probes
+# RUN: FileCheck --input-file %t.yaml %s --check-prefix CHECK-YAML
+## Check pseudo-probes in BAT YAML profile (BOLTed binary)
+# RUN: link_fdata %s %t.bolt %t.preagg2 PREAGG
+# RUN: perf2bolt %t.bolt -p %t.preagg2 --pa -w %t.yaml2 -o %t.fdata2 --profile-write-pseudo-probes
+# RUN: FileCheck --input-file %t.yaml2 %s --check-prefix CHECK-YAML
+# CHECK-YAML: name: bar
+# CHECK-YAML: - bid: 0
+# CHECK-YAML: probes: [ { blx: 9 } ]
+# CHECK-YAML: inline_tree: [ { } ]
+#
+# CHECK-YAML: name: foo
+# CHECK-YAML: - bid: 0
+# CHECK-YAML: probes: [ { blx: 3 } ]
+# CHECK-YAML: inline_tree: [ { g: 2 } ]
+#
+# CHECK-YAML: name: main
+# CHECK-YAML: - bid: 0
+# CHECK-YAML: probes: [ { blx: 1, call: [ 2 ] } ]
+# CHECK-YAML: inline_tree: [ { g: 1 } ]
+#
+# CHECK-YAML: pseudo_probe_desc:
+# CHECK-YAML-NEXT: gs: [ 0xE413754A191DB537, 0xDB956436E78DD5FA, 0x5CF8C24CDB18BDAC ]
+# CHECK-YAML-NEXT: gh: [ 2, 1, 0 ]
+# CHECK-YAML-NEXT: hs: [ 0x200205A19C5B4, 0x10000FFFFFFFF, 0x10E852DA94 ]
+#
+## Check that without --profile-write-pseudo-probes option, no pseudo probes are
+## generated
+# RUN: perf2bolt %S/../../../llvm/test/tools/llvm-profgen/Inputs/noinline-cs-pseudoprobe.perfbin -p %t.preagg --pa -w %t.yaml3 -o %t.fdata
+# RUN: FileCheck --input-file %t.yaml3 %s --check-prefix CHECK-NO-OPT
+# CHECK-NO-OPT-NOT: probes:
+# CHECK-NO-OPT-NOT: inline_tree:
+# CHECK-NO-OPT-NOT: pseudo_probe_desc:
;; Report of decoding input pseudo probe binaries
; CHECK: GUID: 6699318081062747564 Name: foo
diff --git a/bolt/test/X86/yaml-unknown-keys.test b/bolt/test/X86/yaml-unknown-keys.test
new file mode 100644
index 0000000..cbcf9a4
--- /dev/null
+++ b/bolt/test/X86/yaml-unknown-keys.test
@@ -0,0 +1,50 @@
+## Test that BOLT gracefully handles a YAML profile with unknown keys.
+
+# REQUIRES: system-linux
+# RUN: split-file %s %t
+# RUN: llvm-mc -filetype=obj -triple x86_64-unknown-unknown %t/main.s -o %t.o
+# RUN: %clang %cflags %t.o -o %t.exe -Wl,-q -nostdlib
+# RUN: llvm-bolt %t.exe -o %t.null --data %t/profile.yaml \
+# RUN: --profile-ignore-hash -v=1 2>&1 | FileCheck %s
+# CHECK: warning: unknown key 'unknown-header-key'
+# CHECK: warning: unknown key 'unknown_succ_key'
+# CHECK: warning: unknown key 'unknown_block_key'
+# CHECK: warning: unknown key 'unknown_function_key'
+# CHECK: warning: unknown key 'unknown_toplev_key'
+#--- main.s
+ .globl main
+ .type main, %function
+main:
+ .cfi_startproc
+ cmpl $0x0, %eax
+ jne .LBB0
+.LBB0:
+ retq
+ .cfi_endproc
+.size main, .-main
+#--- profile.yaml
+---
+header:
+ profile-version: 1
+ binary-name: 'yaml-multiple-profiles.test.tmp.exe'
+ binary-build-id: '<unknown>'
+ profile-flags: [ lbr ]
+ profile-origin: branch profile reader
+ profile-events: ''
+ dfs-order: false
+ unknown-header-key: true
+functions:
+ - name: 'main'
+ fid: 1
+ hash: 0x50BBA3441D436491
+ exec: 1
+ nblocks: 1
+ blocks:
+ - bid: 0
+ insns: 2
+ hash: 0x4D4D8FAF7D4C0000
+ succ: [ { bid: 1, cnt: 0, unknown_succ_key: 0x10 } ]
+ unknown_block_key: [ ]
+ unknown_function_key: 1
+unknown_toplev_key: ''
+...
diff --git a/bolt/test/lit.local.cfg b/bolt/test/lit.local.cfg
index 8aa5f15..e2fa0a4 100644
--- a/bolt/test/lit.local.cfg
+++ b/bolt/test/lit.local.cfg
@@ -1,6 +1,6 @@
host_linux_triple = config.target_triple.split("-")[0] + "-unknown-linux-gnu"
-common_linker_flags = "-fuse-ld=lld -Wl,--unresolved-symbols=ignore-all"
-flags = f"--target={host_linux_triple} {common_linker_flags}"
+common_linker_flags = "-fuse-ld=lld -Wl,--unresolved-symbols=ignore-all -pie"
+flags = f"--target={host_linux_triple} -fPIE {common_linker_flags}"
config.substitutions.insert(0, ("%cflags", f"%cflags {flags}"))
config.substitutions.insert(0, ("%cxxflags", f"%cxxflags {flags}"))
diff --git a/bolt/test/merge-fdata-uninitialized-header.test b/bolt/test/merge-fdata-uninitialized-header.test
new file mode 100644
index 0000000..5336961
--- /dev/null
+++ b/bolt/test/merge-fdata-uninitialized-header.test
@@ -0,0 +1,45 @@
+## Test that merge-fdata correctly handles YAML header with an uninitialized
+## fields. a.yaml does not have hash-func set and it used to crash merge-fdata.
+
+# REQUIRES: system-linux
+
+# RUN: split-file %s %t
+# RUN: not merge-fdata %t/a.yaml %t/b.yaml 2>&1 | FileCheck %s
+
+# CHECK: cannot merge profiles with different hash functions
+
+#--- a.yaml
+---
+header:
+ profile-version: 1
+ binary-name: 'a.out'
+ binary-build-id: '<unknown>'
+ profile-flags: [ lbr ]
+ profile-origin: branch profile reader
+ profile-events: ''
+ dfs-order: false
+functions:
+ - name: 'main'
+ fid: 1
+ hash: 0x50BBA3441D436491
+ exec: 1
+ nblocks: 0
+...
+#--- b.yaml
+---
+header:
+ profile-version: 1
+ binary-name: 'a.out'
+ binary-build-id: '<unknown>'
+ profile-flags: [ lbr ]
+ profile-origin: branch profile reader
+ profile-events: ''
+ dfs-order: false
+ hash-func: xxh3
+functions:
+ - name: 'main'
+ fid: 1
+ hash: 0x50BBA3441D436491
+ exec: 1
+ nblocks: 0
+...
diff --git a/bolt/test/perf2bolt/lit.local.cfg b/bolt/test/perf2bolt/lit.local.cfg
index 4ee9ad0..0fecf91 100644
--- a/bolt/test/perf2bolt/lit.local.cfg
+++ b/bolt/test/perf2bolt/lit.local.cfg
@@ -1,4 +1,5 @@
import shutil
+import subprocess
-if shutil.which("perf") is not None:
- config.available_features.add("perf") \ No newline at end of file
+if shutil.which("perf") is not None and subprocess.run(["perf", "record", "-e", "cycles:u", "-o", "/dev/null", "--", "perf", "--version"], capture_output=True).returncode == 0:
+ config.available_features.add("perf")
diff --git a/bolt/test/perf2bolt/perf_test.test b/bolt/test/perf2bolt/perf_test.test
index 44db899..7bec442 100644
--- a/bolt/test/perf2bolt/perf_test.test
+++ b/bolt/test/perf2bolt/perf_test.test
@@ -3,15 +3,12 @@
REQUIRES: system-linux, perf
RUN: %clang %S/Inputs/perf_test.c -fuse-ld=lld -Wl,--script=%S/Inputs/perf_test.lds -o %t
-RUN: perf record -e cycles:u -o %t2 -- %t
+RUN: perf record -Fmax -e cycles:u -o %t2 -- %t
RUN: perf2bolt %t -p=%t2 -o %t3 -nl -ignore-build-id 2>&1 | FileCheck %s
CHECK-NOT: PERF2BOLT-ERROR
CHECK-NOT: !! WARNING !! This high mismatch ratio indicates the input binary is probably not the same binary used during profiling collection.
RUN: %clang %S/Inputs/perf_test.c -no-pie -fuse-ld=lld -o %t4
-RUN: perf record -e cycles:u -o %t5 -- %t4
-RUN: perf2bolt %t4 -p=%t5 -o %t6 -nl -ignore-build-id 2>&1 | FileCheck %s --check-prefix=CHECK-NO-PIE
-
-CHECK-NO-PIE-NOT: PERF2BOLT-ERROR
-CHECK-NO-PIE-NOT: !! WARNING !! This high mismatch ratio indicates the input binary is probably not the same binary used during profiling collection. \ No newline at end of file
+RUN: perf record -Fmax -e cycles:u -o %t5 -- %t4
+RUN: perf2bolt %t4 -p=%t5 -o %t6 -nl -ignore-build-id 2>&1 | FileCheck %s
diff --git a/bolt/tools/merge-fdata/merge-fdata.cpp b/bolt/tools/merge-fdata/merge-fdata.cpp
index f2ac5ad..89ca46c 100644
--- a/bolt/tools/merge-fdata/merge-fdata.cpp
+++ b/bolt/tools/merge-fdata/merge-fdata.cpp
@@ -145,6 +145,10 @@ void mergeProfileHeaders(BinaryProfileHeader &MergedHeader,
errs() << "WARNING: merging profiles with different sampling events\n";
MergedHeader.EventNames += "," + Header.EventNames;
}
+
+ if (MergedHeader.HashFunction != Header.HashFunction)
+ report_error("merge conflict",
+ "cannot merge profiles with different hash functions");
}
void mergeBasicBlockProfile(BinaryBasicBlockProfile &MergedBB,
@@ -386,12 +390,14 @@ int main(int argc, char **argv) {
// Merged information for all functions.
StringMap<BinaryFunctionProfile> MergedBFs;
+ bool FirstHeader = true;
for (std::string &InputDataFilename : Inputs) {
ErrorOr<std::unique_ptr<MemoryBuffer>> MB =
MemoryBuffer::getFileOrSTDIN(InputDataFilename);
if (std::error_code EC = MB.getError())
report_error(InputDataFilename, EC);
yaml::Input YamlInput(MB.get()->getBuffer());
+ YamlInput.setAllowUnknownKeys(true);
errs() << "Merging data from " << InputDataFilename << "...\n";
@@ -408,7 +414,12 @@ int main(int argc, char **argv) {
}
// Merge the header.
- mergeProfileHeaders(MergedHeader, BP.Header);
+ if (FirstHeader) {
+ MergedHeader = BP.Header;
+ FirstHeader = false;
+ } else {
+ mergeProfileHeaders(MergedHeader, BP.Header);
+ }
// Do the function merge.
for (BinaryFunctionProfile &BF : BP.Functions) {
diff --git a/bolt/unittests/Core/BinaryContext.cpp b/bolt/unittests/Core/BinaryContext.cpp
index 6c32881..05b898d 100644
--- a/bolt/unittests/Core/BinaryContext.cpp
+++ b/bolt/unittests/Core/BinaryContext.cpp
@@ -160,13 +160,14 @@ TEST_P(BinaryContextTester, FlushPendingRelocJUMP26) {
TEST_P(BinaryContextTester, BaseAddress) {
// Check that base address calculation is correct for a binary with the
// following segment layout:
- BC->SegmentMapInfo[0] = SegmentInfo{0, 0x10e8c2b4, 0, 0x10e8c2b4, 0x1000};
+ BC->SegmentMapInfo[0] =
+ SegmentInfo{0, 0x10e8c2b4, 0, 0x10e8c2b4, 0x1000, true};
BC->SegmentMapInfo[0x10e8d2b4] =
- SegmentInfo{0x10e8d2b4, 0x3952faec, 0x10e8c2b4, 0x3952faec, 0x1000};
+ SegmentInfo{0x10e8d2b4, 0x3952faec, 0x10e8c2b4, 0x3952faec, 0x1000, true};
BC->SegmentMapInfo[0x4a3bddc0] =
- SegmentInfo{0x4a3bddc0, 0x148e828, 0x4a3bbdc0, 0x148e828, 0x1000};
+ SegmentInfo{0x4a3bddc0, 0x148e828, 0x4a3bbdc0, 0x148e828, 0x1000, true};
BC->SegmentMapInfo[0x4b84d5e8] =
- SegmentInfo{0x4b84d5e8, 0x294f830, 0x4b84a5e8, 0x3d3820, 0x1000};
+ SegmentInfo{0x4b84d5e8, 0x294f830, 0x4b84a5e8, 0x3d3820, 0x1000, true};
std::optional<uint64_t> BaseAddress =
BC->getBaseAddressForMapping(0x7f13f5556000, 0x10e8c000);
@@ -181,13 +182,13 @@ TEST_P(BinaryContextTester, BaseAddress2) {
// Check that base address calculation is correct for a binary if the
// alignment in ELF file are different from pagesize.
// The segment layout is as follows:
- BC->SegmentMapInfo[0] = SegmentInfo{0, 0x2177c, 0, 0x2177c, 0x10000};
+ BC->SegmentMapInfo[0] = SegmentInfo{0, 0x2177c, 0, 0x2177c, 0x10000, true};
BC->SegmentMapInfo[0x31860] =
- SegmentInfo{0x31860, 0x370, 0x21860, 0x370, 0x10000};
+ SegmentInfo{0x31860, 0x370, 0x21860, 0x370, 0x10000, true};
BC->SegmentMapInfo[0x41c20] =
- SegmentInfo{0x41c20, 0x1f8, 0x21c20, 0x1f8, 0x10000};
+ SegmentInfo{0x41c20, 0x1f8, 0x21c20, 0x1f8, 0x10000, true};
BC->SegmentMapInfo[0x54e18] =
- SegmentInfo{0x54e18, 0x51, 0x24e18, 0x51, 0x10000};
+ SegmentInfo{0x54e18, 0x51, 0x24e18, 0x51, 0x10000, true};
std::optional<uint64_t> BaseAddress =
BC->getBaseAddressForMapping(0xaaaaea444000, 0x21000);
@@ -197,3 +198,22 @@ TEST_P(BinaryContextTester, BaseAddress2) {
BaseAddress = BC->getBaseAddressForMapping(0xaaaaea444000, 0x11000);
ASSERT_FALSE(BaseAddress.has_value());
}
+
+TEST_P(BinaryContextTester, BaseAddressSegmentsSmallerThanAlignment) {
+ // Check that the correct segment is used to compute the base address
+ // when multiple segments are close together in the ELF file (closer
+ // than the required alignment in the process space).
+ // See https://github.com/llvm/llvm-project/issues/109384
+ BC->SegmentMapInfo[0] = SegmentInfo{0, 0x1d1c, 0, 0x1d1c, 0x10000, false};
+ BC->SegmentMapInfo[0x11d40] =
+ SegmentInfo{0x11d40, 0x11e0, 0x1d40, 0x11e0, 0x10000, true};
+ BC->SegmentMapInfo[0x22f20] =
+ SegmentInfo{0x22f20, 0x10e0, 0x2f20, 0x1f0, 0x10000, false};
+ BC->SegmentMapInfo[0x33110] =
+ SegmentInfo{0x33110, 0x89, 0x3110, 0x88, 0x10000, false};
+
+ std::optional<uint64_t> BaseAddress =
+ BC->getBaseAddressForMapping(0xaaaaaaab1000, 0x1000);
+ ASSERT_TRUE(BaseAddress.has_value());
+ ASSERT_EQ(*BaseAddress, 0xaaaaaaaa0000ULL);
+} \ No newline at end of file